2008-08-26 19:12:23 +00:00
// This file is part of Eigen, a lightweight C++ template library
2009-05-22 20:25:33 +02:00
// for linear algebra.
2008-08-26 19:12:23 +00:00
//
2015-07-29 10:22:25 +02:00
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
2009-02-02 13:22:19 +00:00
// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
2009-01-09 00:55:53 +00:00
// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
2010-02-27 17:25:07 +01:00
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
2010-06-21 11:59:37 +02:00
// Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org>
2013-08-25 18:00:28 +09:00
// Copyright (C) 2013 Pavel Holoborodko <pavel@holoborodko.com>
2008-08-26 19:12:23 +00:00
//
2012-07-13 14:42:47 -04:00
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
2008-08-26 19:12:23 +00:00
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Platform checks for aligned malloc functions * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2008-08-26 19:12:23 +00:00
# ifndef EIGEN_MEMORY_H
# define EIGEN_MEMORY_H
2013-02-25 19:17:13 +01:00
# ifndef EIGEN_MALLOC_ALREADY_ALIGNED
// Try to determine automatically if malloc is already aligned.
2010-02-28 10:10:53 -05:00
// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
// http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
// This is true at least since glibc 2.8.
// This leaves the question how to detect 64-bit. According to this document,
// http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
# if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
2015-08-06 13:56:53 +02:00
& & defined ( __LP64__ ) & & ! defined ( __SANITIZE_ADDRESS__ ) & & ( EIGEN_DEFAULT_ALIGN_BYTES = = 16 )
2010-02-28 10:10:53 -05:00
# define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
# else
# define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
# endif
2009-06-29 00:08:34 +02:00
// FreeBSD 6 seems to have 16-byte aligned malloc
2010-02-28 10:10:53 -05:00
// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
2009-06-29 00:08:34 +02:00
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
2010-02-28 10:10:53 -05:00
// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
2015-08-06 13:56:53 +02:00
# if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
2010-02-28 10:10:53 -05:00
# define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
2009-06-29 00:08:34 +02:00
# else
2010-02-28 10:10:53 -05:00
# define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
2009-06-29 00:08:34 +02:00
# endif
2015-08-06 13:56:53 +02:00
# if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
| | ( EIGEN_OS_WIN64 & & ( EIGEN_DEFAULT_ALIGN_BYTES = = 16 ) ) \
2014-11-04 21:58:52 +01:00
| | EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
2010-02-28 10:10:53 -05:00
| | EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
2009-01-09 14:56:44 +00:00
# define EIGEN_MALLOC_ALREADY_ALIGNED 1
# else
# define EIGEN_MALLOC_ALREADY_ALIGNED 0
2008-08-26 19:12:23 +00:00
# endif
2013-02-25 19:17:13 +01:00
# endif
2021-09-10 19:12:26 +00:00
# include "../InternalHeaderCheck.h"
2012-04-15 11:06:28 +01:00
namespace Eigen {
2010-10-25 10:15:22 -04:00
namespace internal {
2017-01-24 13:55:18 -08:00
EIGEN_DEVICE_FUNC
2011-10-17 08:49:59 -04:00
inline void throw_std_bad_alloc ( )
2011-10-17 08:44:44 -04:00
{
# ifdef EIGEN_EXCEPTIONS
throw std : : bad_alloc ( ) ;
# else
2014-04-10 17:43:13 -07:00
std : : size_t huge = static_cast < std : : size_t > ( - 1 ) ;
2018-06-06 10:12:58 -04:00
# if defined(EIGEN_HIPCC)
//
// calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
// and as a consequence the code in the #else block triggers the hipcc warning :
// "no overloaded function has restriction specifiers that are compatible with the ambient context"
//
// "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
2018-07-13 11:03:04 -07:00
// the same on "operator new"
2018-06-06 10:12:58 -04:00
// Reverting code back to the old version in this #if block for the hipcc compiler
//
new int [ huge ] ;
# else
2020-09-11 18:17:08 -07:00
void * unused = : : operator new ( huge ) ;
2020-09-15 01:18:55 +00:00
EIGEN_UNUSED_VARIABLE ( unused ) ;
2018-06-06 10:12:58 -04:00
# endif
2011-10-17 08:44:44 -04:00
# endif
}
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of handmade aligned functions * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-02-28 14:32:57 +01:00
/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
2022-10-22 22:51:31 +00:00
/** \internal Like malloc, but the returned pointer is guaranteed to be aligned to `alignment`.
* Fast , but wastes ` alignment ` additional bytes of memory . Does not throw any exception .
2009-01-09 14:56:44 +00:00
*/
2018-10-01 14:28:37 +00:00
EIGEN_DEVICE_FUNC inline void * handmade_aligned_malloc ( std : : size_t size , std : : size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES )
2009-01-09 14:56:44 +00:00
{
2022-11-18 22:35:31 +00:00
eigen_assert ( alignment > = sizeof ( void * ) & & alignment < = 128 & & ( alignment & ( alignment - 1 ) ) = = 0 & & " Alignment must be at least sizeof(void*), less than or equal to 128, and a power of 2 " ) ;
2022-10-22 22:51:31 +00:00
void * original = std : : malloc ( size + alignment ) ;
2010-02-28 14:32:57 +01:00
if ( original = = 0 ) return 0 ;
2022-11-18 22:35:31 +00:00
uint8_t offset = static_cast < uint8_t > ( alignment - ( reinterpret_cast < std : : size_t > ( original ) & ( alignment - 1 ) ) ) ;
void * aligned = static_cast < void * > ( static_cast < uint8_t * > ( original ) + offset ) ;
* ( static_cast < uint8_t * > ( aligned ) - 1 ) = offset ;
2009-01-09 14:56:44 +00:00
return aligned ;
}
2010-10-25 10:15:22 -04:00
/** \internal Frees memory allocated with handmade_aligned_malloc */
2018-10-01 14:28:37 +00:00
EIGEN_DEVICE_FUNC inline void handmade_aligned_free ( void * ptr )
2009-01-09 14:56:44 +00:00
{
2018-10-01 14:28:37 +00:00
if ( ptr ) {
2022-11-18 22:35:31 +00:00
uint8_t offset = static_cast < uint8_t > ( * ( static_cast < uint8_t * > ( ptr ) - 1 ) ) ;
void * original = static_cast < void * > ( static_cast < uint8_t * > ( ptr ) - offset ) ;
2022-10-22 22:51:31 +00:00
std : : free ( original ) ;
2018-10-01 14:28:37 +00:00
}
2009-01-09 14:56:44 +00:00
}
2010-03-05 09:44:21 +01:00
/** \internal
* \ brief Reallocates aligned memory .
2016-02-05 21:24:35 +01:00
* Since we know that our handmade version is based on std : : malloc
2010-02-28 14:32:57 +01:00
* we can use std : : realloc to implement efficient reallocation .
*/
2022-11-18 22:35:31 +00:00
EIGEN_DEVICE_FUNC inline void * handmade_aligned_realloc ( void * ptr , std : : size_t new_size , std : : size_t old_size , std : : size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES )
2010-02-27 17:25:07 +01:00
{
2022-11-18 22:35:31 +00:00
if ( ptr = = 0 ) return handmade_aligned_malloc ( new_size , alignment ) ;
uint8_t old_offset = * ( static_cast < uint8_t * > ( ptr ) - 1 ) ;
void * old_original = static_cast < uint8_t * > ( ptr ) - old_offset ;
void * original = std : : realloc ( old_original , new_size + alignment ) ;
2010-02-28 14:32:57 +01:00
if ( original = = 0 ) return 0 ;
2022-11-18 22:35:31 +00:00
if ( original = = old_original ) return ptr ;
uint8_t offset = static_cast < uint8_t > ( alignment - ( reinterpret_cast < std : : size_t > ( original ) & ( alignment - 1 ) ) ) ;
void * aligned = static_cast < void * > ( static_cast < uint8_t * > ( original ) + offset ) ;
if ( offset ! = old_offset ) {
const void * src = static_cast < const void * > ( static_cast < uint8_t * > ( original ) + old_offset ) ;
std : : size_t count = ( std : : min ) ( new_size , old_size ) ;
std : : memmove ( aligned , src , count ) ;
2022-10-22 22:51:31 +00:00
}
2022-11-18 22:35:31 +00:00
* ( static_cast < uint8_t * > ( aligned ) - 1 ) = offset ;
return aligned ;
2010-02-27 17:25:07 +01:00
}
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of portable aligned versions of malloc / free / realloc * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-02-27 17:25:07 +01:00
2011-03-06 20:59:25 -05:00
# ifdef EIGEN_NO_MALLOC
2015-02-10 14:29:47 -08:00
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed ( )
2011-03-06 20:59:25 -05:00
{
eigen_assert ( false & & " heap allocation is forbidden (EIGEN_NO_MALLOC is defined) " ) ;
}
# elif defined EIGEN_RUNTIME_NO_MALLOC
2015-02-10 14:29:47 -08:00
EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl ( bool update , bool new_value = false )
2011-03-06 20:59:25 -05:00
{
static bool value = true ;
if ( update = = 1 )
value = new_value ;
return value ;
}
2015-02-10 14:29:47 -08:00
EIGEN_DEVICE_FUNC inline bool is_malloc_allowed ( ) { return is_malloc_allowed_impl ( false ) ; }
EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed ( bool new_value ) { return is_malloc_allowed_impl ( true , new_value ) ; }
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed ( )
2011-03-06 20:59:25 -05:00
{
eigen_assert ( is_malloc_allowed ( ) & & " heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false) " ) ;
}
2017-01-24 13:55:18 -08:00
# else
2015-02-10 14:29:47 -08:00
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed ( )
2011-03-06 20:59:25 -05:00
{ }
# endif
2014-01-29 11:43:05 -08:00
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements.
2011-10-17 08:44:44 -04:00
* On allocation error , the returned pointer is null , and std : : bad_alloc is thrown .
2008-12-16 15:17:29 +00:00
*/
2017-01-23 22:02:53 +01:00
EIGEN_DEVICE_FUNC inline void * aligned_malloc ( std : : size_t size )
2008-08-26 19:12:23 +00:00
{
2011-03-06 20:59:25 -05:00
check_that_malloc_is_allowed ( ) ;
2009-01-08 15:20:21 +00:00
2009-11-07 09:07:23 +01:00
void * result ;
2016-02-05 21:24:35 +01:00
# if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
2018-07-13 11:03:04 -07:00
2018-11-19 18:13:59 +00:00
EIGEN_USING_STD ( malloc )
result = malloc ( size ) ;
2018-07-13 11:03:04 -07:00
2016-02-05 21:38:16 +01:00
# if EIGEN_DEFAULT_ALIGN_BYTES==16
2018-08-01 21:34:47 -04:00
eigen_assert ( ( size < 16 | | ( std : : size_t ( result ) % 16 ) = = 0 ) & & " System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator. " ) ;
2016-02-05 21:38:16 +01:00
# endif
2009-01-08 15:20:21 +00:00
# else
2010-10-25 10:15:22 -04:00
result = handmade_aligned_malloc ( size ) ;
2009-01-08 15:20:21 +00:00
# endif
2009-11-07 09:07:23 +01:00
2011-10-17 08:44:44 -04:00
if ( ! result & & size )
throw_std_bad_alloc ( ) ;
2009-01-08 15:20:21 +00:00
return result ;
}
2010-10-25 10:15:22 -04:00
/** \internal Frees memory allocated with aligned_malloc. */
2015-02-10 14:29:47 -08:00
EIGEN_DEVICE_FUNC inline void aligned_free ( void * ptr )
2009-01-08 15:20:21 +00:00
{
2016-02-05 21:24:35 +01:00
# if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
2018-06-06 10:12:58 -04:00
2018-11-19 18:13:59 +00:00
EIGEN_USING_STD ( free )
free ( ptr ) ;
2018-07-13 11:03:04 -07:00
2009-01-09 14:56:44 +00:00
# else
2010-10-25 10:15:22 -04:00
handmade_aligned_free ( ptr ) ;
2009-01-08 15:20:21 +00:00
# endif
}
2010-02-28 14:32:57 +01:00
/**
2016-02-05 21:24:35 +01:00
* \ internal
* \ brief Reallocates an aligned block of memory .
* \ throws std : : bad_alloc on allocation failure
*/
2022-10-22 22:51:31 +00:00
EIGEN_DEVICE_FUNC inline void * aligned_realloc ( void * ptr , std : : size_t new_size , std : : size_t old_size )
2010-02-27 17:25:07 +01:00
{
2022-07-19 20:59:07 +00:00
if ( ptr = = 0 ) return aligned_malloc ( new_size ) ;
2010-02-27 17:25:07 +01:00
void * result ;
2016-02-05 21:24:35 +01:00
# if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
2022-11-18 22:35:31 +00:00
EIGEN_UNUSED_VARIABLE ( old_size )
2010-02-28 14:32:57 +01:00
result = std : : realloc ( ptr , new_size ) ;
2010-02-27 17:25:07 +01:00
# else
2022-11-18 22:35:31 +00:00
result = handmade_aligned_realloc ( ptr , new_size , old_size ) ;
2010-02-27 17:25:07 +01:00
# endif
2011-10-17 08:44:44 -04:00
if ( ! result & & new_size )
throw_std_bad_alloc ( ) ;
2022-09-05 09:11:08 +02:00
# ifdef EIGEN_RUNTIME_NO_MALLOC
if ( result ! = ptr )
check_that_malloc_is_allowed ( ) ;
# endif
2010-02-27 17:25:07 +01:00
return result ;
}
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of conditionally aligned functions * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-02-28 14:32:57 +01:00
/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
2011-10-17 08:44:44 -04:00
* On allocation error , the returned pointer is null , and a std : : bad_alloc is thrown .
2010-02-28 14:32:57 +01:00
*/
2017-01-23 22:02:53 +01:00
template < bool Align > EIGEN_DEVICE_FUNC inline void * conditional_aligned_malloc ( std : : size_t size )
2010-02-28 14:32:57 +01:00
{
2010-10-25 10:15:22 -04:00
return aligned_malloc ( size ) ;
2010-02-28 14:32:57 +01:00
}
2017-01-23 22:02:53 +01:00
template < > EIGEN_DEVICE_FUNC inline void * conditional_aligned_malloc < false > ( std : : size_t size )
2010-02-28 14:32:57 +01:00
{
2011-03-06 20:59:25 -05:00
check_that_malloc_is_allowed ( ) ;
2010-02-28 14:32:57 +01:00
2018-11-19 18:13:59 +00:00
EIGEN_USING_STD ( malloc )
void * result = malloc ( size ) ;
2018-07-13 11:03:04 -07:00
2011-10-17 08:44:44 -04:00
if ( ! result & & size )
throw_std_bad_alloc ( ) ;
2010-02-28 14:32:57 +01:00
return result ;
}
2010-10-25 10:15:22 -04:00
/** \internal Frees memory allocated with conditional_aligned_malloc */
2015-02-10 14:29:47 -08:00
template < bool Align > EIGEN_DEVICE_FUNC inline void conditional_aligned_free ( void * ptr )
2010-02-28 14:32:57 +01:00
{
2010-10-25 10:15:22 -04:00
aligned_free ( ptr ) ;
2010-02-28 14:32:57 +01:00
}
2015-02-10 14:29:47 -08:00
template < > EIGEN_DEVICE_FUNC inline void conditional_aligned_free < false > ( void * ptr )
2010-02-28 14:32:57 +01:00
{
2018-11-19 18:13:59 +00:00
EIGEN_USING_STD ( free )
free ( ptr ) ;
2010-02-28 14:32:57 +01:00
}
2022-10-22 22:51:31 +00:00
template < bool Align > EIGEN_DEVICE_FUNC inline void * conditional_aligned_realloc ( void * ptr , std : : size_t new_size , std : : size_t old_size )
2010-02-27 17:25:07 +01:00
{
2010-10-25 10:15:22 -04:00
return aligned_realloc ( ptr , new_size , old_size ) ;
2010-02-27 17:25:07 +01:00
}
2022-10-22 22:51:31 +00:00
template < > EIGEN_DEVICE_FUNC inline void * conditional_aligned_realloc < false > ( void * ptr , std : : size_t new_size , std : : size_t )
2010-02-27 17:25:07 +01:00
{
return std : : realloc ( ptr , new_size ) ;
}
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Construction / destruction of array elements * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-02-28 14:32:57 +01:00
/** \internal Destructs the elements of an array.
2009-01-08 15:20:21 +00:00
* The \ a size parameters tells on how many objects to call the destructor of T .
*/
2017-01-23 22:02:53 +01:00
template < typename T > EIGEN_DEVICE_FUNC inline void destruct_elements_of_array ( T * ptr , std : : size_t size )
2009-01-08 15:20:21 +00:00
{
// always destruct an array starting from the end.
2010-07-14 22:49:34 +02:00
if ( ptr )
while ( size ) ptr [ - - size ] . ~ T ( ) ;
2009-01-08 15:20:21 +00:00
}
2014-07-06 06:58:13 +02:00
/** \internal Constructs the elements of an array.
* The \ a size parameter tells on how many objects to call the constructor of T .
*/
2022-09-08 19:39:36 +00:00
template < typename T > EIGEN_DEVICE_FUNC inline T * default_construct_elements_of_array ( T * ptr , std : : size_t size )
2014-07-06 06:58:13 +02:00
{
2022-01-19 21:34:10 +00:00
std : : size_t i = 0 ;
2014-07-22 13:16:44 +02:00
EIGEN_TRY
{
2014-07-06 06:58:13 +02:00
for ( i = 0 ; i < size ; + + i ) : : new ( ptr + i ) T ;
2014-07-22 13:16:44 +02:00
}
EIGEN_CATCH ( . . . )
{
destruct_elements_of_array ( ptr , i ) ;
EIGEN_THROW ;
}
2022-03-01 21:35:22 +00:00
return ptr ;
2014-07-06 06:58:13 +02:00
}
2022-09-08 19:39:36 +00:00
/** \internal Copy-constructs the elements of an array.
* The \ a size parameter tells on how many objects to copy .
*/
template < typename T > EIGEN_DEVICE_FUNC inline T * copy_construct_elements_of_array ( T * ptr , const T * src , std : : size_t size )
{
std : : size_t i = 0 ;
EIGEN_TRY
{
for ( i = 0 ; i < size ; + + i ) : : new ( ptr + i ) T ( * ( src + i ) ) ;
}
EIGEN_CATCH ( . . . )
{
destruct_elements_of_array ( ptr , i ) ;
EIGEN_THROW ;
}
return ptr ;
}
/** \internal Move-constructs the elements of an array.
* The \ a size parameter tells on how many objects to move .
*/
template < typename T > EIGEN_DEVICE_FUNC inline T * move_construct_elements_of_array ( T * ptr , T * src , std : : size_t size )
{
std : : size_t i = 0 ;
EIGEN_TRY
{
for ( i = 0 ; i < size ; + + i ) : : new ( ptr + i ) T ( std : : move ( * ( src + i ) ) ) ;
}
EIGEN_CATCH ( . . . )
{
destruct_elements_of_array ( ptr , i ) ;
EIGEN_THROW ;
}
return ptr ;
}
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of aligned new / delete - like functions * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-02-28 14:32:57 +01:00
2011-10-16 16:12:19 -04:00
template < typename T >
2017-01-23 22:02:53 +01:00
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow ( std : : size_t size )
2011-10-16 16:12:19 -04:00
{
2017-01-23 22:02:53 +01:00
if ( size > std : : size_t ( - 1 ) / sizeof ( T ) )
2011-10-17 08:44:44 -04:00
throw_std_bad_alloc ( ) ;
2011-10-16 16:12:19 -04:00
}
2010-02-28 14:32:57 +01:00
/** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
2011-10-17 08:44:44 -04:00
* On allocation error , the returned pointer is undefined , but a std : : bad_alloc is thrown .
2010-02-28 14:32:57 +01:00
* The default constructor of T is called .
*/
2017-01-23 22:02:53 +01:00
template < typename T > EIGEN_DEVICE_FUNC inline T * aligned_new ( std : : size_t size )
2010-02-28 14:32:57 +01:00
{
2011-10-16 16:12:19 -04:00
check_size_for_overflow < T > ( size ) ;
2022-09-08 19:39:36 +00:00
T * result = static_cast < T * > ( aligned_malloc ( sizeof ( T ) * size ) ) ;
2014-07-22 13:16:44 +02:00
EIGEN_TRY
{
2022-09-08 19:39:36 +00:00
return default_construct_elements_of_array ( result , size ) ;
2014-07-22 13:16:44 +02:00
}
EIGEN_CATCH ( . . . )
{
aligned_free ( result ) ;
EIGEN_THROW ;
}
2016-09-14 09:56:11 -07:00
return result ;
2010-02-28 14:32:57 +01:00
}
2017-01-23 22:02:53 +01:00
template < typename T , bool Align > EIGEN_DEVICE_FUNC inline T * conditional_aligned_new ( std : : size_t size )
2010-02-28 14:32:57 +01:00
{
2011-10-16 16:12:19 -04:00
check_size_for_overflow < T > ( size ) ;
2022-09-08 19:39:36 +00:00
T * result = static_cast < T * > ( conditional_aligned_malloc < Align > ( sizeof ( T ) * size ) ) ;
2014-07-22 13:16:44 +02:00
EIGEN_TRY
{
2022-09-08 19:39:36 +00:00
return default_construct_elements_of_array ( result , size ) ;
2014-07-22 13:16:44 +02:00
}
EIGEN_CATCH ( . . . )
{
conditional_aligned_free < Align > ( result ) ;
EIGEN_THROW ;
}
2016-09-14 09:56:11 -07:00
return result ;
2010-02-28 14:32:57 +01:00
}
2010-10-25 10:15:22 -04:00
/** \internal Deletes objects constructed with aligned_new
2009-01-08 15:20:21 +00:00
* The \ a size parameters tells on how many objects to call the destructor of T .
*/
2017-01-23 22:02:53 +01:00
template < typename T > EIGEN_DEVICE_FUNC inline void aligned_delete ( T * ptr , std : : size_t size )
2008-08-26 19:12:23 +00:00
{
2010-10-25 10:15:22 -04:00
destruct_elements_of_array < T > ( ptr , size ) ;
2019-03-02 17:42:16 +00:00
Eigen : : internal : : aligned_free ( ptr ) ;
2009-01-08 15:20:21 +00:00
}
2010-10-25 10:15:22 -04:00
/** \internal Deletes objects constructed with conditional_aligned_new
2009-01-08 15:20:21 +00:00
* The \ a size parameters tells on how many objects to call the destructor of T .
*/
2017-01-23 22:02:53 +01:00
template < typename T , bool Align > EIGEN_DEVICE_FUNC inline void conditional_aligned_delete ( T * ptr , std : : size_t size )
2009-01-08 15:20:21 +00:00
{
2010-10-25 10:15:22 -04:00
destruct_elements_of_array < T > ( ptr , size ) ;
conditional_aligned_free < Align > ( ptr ) ;
2008-08-26 19:12:23 +00:00
}
2017-01-23 22:02:53 +01:00
template < typename T , bool Align > EIGEN_DEVICE_FUNC inline T * conditional_aligned_realloc_new ( T * pts , std : : size_t new_size , std : : size_t old_size )
2010-02-28 14:32:57 +01:00
{
2011-10-16 16:12:19 -04:00
check_size_for_overflow < T > ( new_size ) ;
check_size_for_overflow < T > ( old_size ) ;
2022-09-08 19:39:36 +00:00
// If elements need to be explicitly initialized, we cannot simply realloc
// (or memcpy) the memory block - each element needs to be reconstructed.
// Otherwise, objects that contain internal pointers like mpfr or
// AnnoyingScalar can be pointing to the wrong thing.
T * result = static_cast < T * > ( conditional_aligned_malloc < Align > ( sizeof ( T ) * new_size ) ) ;
EIGEN_TRY
2014-07-22 13:16:44 +02:00
{
2022-09-08 19:39:36 +00:00
// Move-construct initial elements.
std : : size_t copy_size = ( std : : min ) ( old_size , new_size ) ;
move_construct_elements_of_array ( result , pts , copy_size ) ;
// Default-construct remaining elements.
if ( new_size > old_size ) {
default_construct_elements_of_array ( result + copy_size , new_size - old_size ) ;
2014-07-06 06:58:13 +02:00
}
2022-09-08 19:39:36 +00:00
// Delete old elements.
conditional_aligned_delete < T , Align > ( pts , old_size ) ;
}
EIGEN_CATCH ( . . . )
{
conditional_aligned_free < Align > ( result ) ;
EIGEN_THROW ;
2014-07-22 13:16:44 +02:00
}
2022-09-08 19:39:36 +00:00
2010-02-28 14:32:57 +01:00
return result ;
}
2011-01-26 17:56:49 +01:00
2017-01-23 22:02:53 +01:00
template < typename T , bool Align > EIGEN_DEVICE_FUNC inline T * conditional_aligned_new_auto ( std : : size_t size )
2011-01-26 17:56:49 +01:00
{
2014-09-25 16:05:17 +02:00
if ( size = = 0 )
return 0 ; // short-cut. Also fixes Bug 884
2011-10-16 16:12:19 -04:00
check_size_for_overflow < T > ( size ) ;
2022-09-08 19:39:36 +00:00
T * result = static_cast < T * > ( conditional_aligned_malloc < Align > ( sizeof ( T ) * size ) ) ;
2011-01-26 17:56:49 +01:00
if ( NumTraits < T > : : RequireInitialization )
2014-07-22 13:16:44 +02:00
{
EIGEN_TRY
2014-07-06 06:58:13 +02:00
{
2022-09-08 19:39:36 +00:00
default_construct_elements_of_array ( result , size ) ;
2014-07-06 06:58:13 +02:00
}
2014-07-22 13:16:44 +02:00
EIGEN_CATCH ( . . . )
{
conditional_aligned_free < Align > ( result ) ;
EIGEN_THROW ;
}
}
2011-01-26 17:56:49 +01:00
return result ;
}
2022-10-22 22:51:31 +00:00
template < typename T , bool Align > EIGEN_DEVICE_FUNC inline T * conditional_aligned_realloc_new_auto ( T * pts , std : : size_t new_size , std : : size_t old_size )
2011-01-26 17:56:49 +01:00
{
2022-09-08 19:39:36 +00:00
if ( NumTraits < T > : : RequireInitialization ) {
return conditional_aligned_realloc_new < T , Align > ( pts , new_size , old_size ) ;
}
2011-10-16 16:12:19 -04:00
check_size_for_overflow < T > ( new_size ) ;
check_size_for_overflow < T > ( old_size ) ;
2022-09-08 19:39:36 +00:00
return static_cast < T * > ( conditional_aligned_realloc < Align > ( static_cast < void * > ( pts ) , sizeof ( T ) * new_size , sizeof ( T ) * old_size ) ) ;
2011-01-26 17:56:49 +01:00
}
2017-01-23 22:02:53 +01:00
template < typename T , bool Align > EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto ( T * ptr , std : : size_t size )
2011-01-26 17:56:49 +01:00
{
if ( NumTraits < T > : : RequireInitialization )
destruct_elements_of_array < T > ( ptr , size ) ;
conditional_aligned_free < Align > ( ptr ) ;
}
2010-02-28 09:10:41 -05:00
/****************************************************************************/
2010-02-28 14:32:57 +01:00
2015-08-06 17:52:01 +02:00
/** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a Alignment.
2009-12-16 08:53:14 -05:00
*
2015-08-06 17:52:01 +02:00
* \ tparam Alignment requested alignment in Bytes .
2010-01-02 12:38:16 -05:00
* \ param array the address of the start of the array
* \ param size the size of the array
*
2015-08-06 17:52:01 +02:00
* \ note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar ,
* the size of the array is returned . For example with SSE , the requested alignment is typically 16 - bytes . If
2010-01-02 12:38:16 -05:00
* packet size for the given scalar type is 1 , then everything is considered well - aligned .
*
2015-08-06 17:52:01 +02:00
* \ note Otherwise , if the Alignment is larger that the scalar size , we rely on the assumptions that sizeof ( Scalar ) is a
* power of 2. On the other hand , we do not assume that the array address is a multiple of sizeof ( Scalar ) , as that fails for
2010-01-02 12:38:16 -05:00
* example with Scalar = double on certain 32 - bit platforms , see bug # 79.
*
2010-10-25 10:15:22 -04:00
* There is also the variant first_aligned ( const MatrixBase & ) defined in DenseCoeffsBase . h .
2015-08-06 17:52:01 +02:00
* \ sa first_default_aligned ( )
2009-12-16 08:53:14 -05:00
*/
2015-08-06 17:52:01 +02:00
template < int Alignment , typename Scalar , typename Index >
2016-01-11 17:26:56 -08:00
EIGEN_DEVICE_FUNC inline Index first_aligned ( const Scalar * array , Index size )
2008-08-26 19:12:23 +00:00
{
2016-01-27 15:37:03 -08:00
const Index ScalarSize = sizeof ( Scalar ) ;
const Index AlignmentSize = Alignment / ScalarSize ;
const Index AlignmentMask = AlignmentSize - 1 ;
2010-02-25 21:01:52 -05:00
2015-08-06 17:52:01 +02:00
if ( AlignmentSize < = 1 )
2010-01-02 12:38:16 -05:00
{
2015-08-06 17:52:01 +02:00
// Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar
// so that all elements of the array have the same alignment.
2010-01-02 12:38:16 -05:00
return 0 ;
}
2016-05-26 10:52:12 +02:00
else if ( ( UIntPtr ( array ) & ( sizeof ( Scalar ) - 1 ) ) | | ( Alignment % ScalarSize ) ! = 0 )
2010-01-02 12:38:16 -05:00
{
2015-08-06 17:52:01 +02:00
// The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.
2010-01-02 12:38:16 -05:00
// Consequently, no element of the array is well aligned.
return size ;
}
else
{
2016-05-26 10:52:12 +02:00
Index first = ( AlignmentSize - ( Index ( ( UIntPtr ( array ) / sizeof ( Scalar ) ) ) & AlignmentMask ) ) & AlignmentMask ;
2016-01-11 17:26:56 -08:00
return ( first < size ) ? first : size ;
2010-01-02 12:38:16 -05:00
}
2008-08-26 19:12:23 +00:00
}
2015-08-06 17:52:01 +02:00
/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement.
* \ sa first_aligned ( Scalar * , Index ) and first_default_aligned ( DenseBase < Derived > ) */
template < typename Scalar , typename Index >
2016-01-11 17:26:56 -08:00
EIGEN_DEVICE_FUNC inline Index first_default_aligned ( const Scalar * array , Index size )
2015-08-06 17:52:01 +02:00
{
2015-08-07 10:44:01 +02:00
typedef typename packet_traits < Scalar > : : type DefaultPacketType ;
return first_aligned < unpacket_traits < DefaultPacketType > : : alignment > ( array , size ) ;
2015-08-06 17:52:01 +02:00
}
2012-10-30 16:27:52 +01:00
/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
2017-01-24 13:55:18 -08:00
*/
template < typename Index >
2014-03-04 15:10:29 +01:00
inline Index first_multiple ( Index size , Index base )
2012-10-30 16:27:52 +01:00
{
return ( ( size + base - 1 ) / base ) * base ;
}
2011-06-09 19:04:06 +02:00
2019-11-05 17:17:58 -08:00
// std::copy is much slower than memcpy, so let's introduce a smart_copy which
// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
template < typename T , bool UseMemcpy > struct smart_copy_helper ;
template < typename T > EIGEN_DEVICE_FUNC void smart_copy ( const T * start , const T * end , T * target )
{
smart_copy_helper < T , ! NumTraits < T > : : RequireInitialization > : : run ( start , end , target ) ;
}
template < typename T > struct smart_copy_helper < T , true > {
EIGEN_DEVICE_FUNC static inline void run ( const T * start , const T * end , T * target )
{
IntPtr size = IntPtr ( end ) - IntPtr ( start ) ;
if ( size = = 0 ) return ;
eigen_internal_assert ( start ! = 0 & & end ! = 0 & & target ! = 0 ) ;
EIGEN_USING_STD ( memcpy )
memcpy ( target , start , size ) ;
}
} ;
template < typename T > struct smart_copy_helper < T , false > {
EIGEN_DEVICE_FUNC static inline void run ( const T * start , const T * end , T * target )
{ std : : copy ( start , end , target ) ; }
} ;
2017-01-24 13:55:18 -08:00
// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
2013-11-05 15:40:58 +01:00
template < typename T , bool UseMemmove > struct smart_memmove_helper ;
template < typename T > void smart_memmove ( const T * start , const T * end , T * target )
{
2014-10-09 23:35:26 +02:00
smart_memmove_helper < T , ! NumTraits < T > : : RequireInitialization > : : run ( start , end , target ) ;
2013-11-05 15:40:58 +01:00
}
template < typename T > struct smart_memmove_helper < T , true > {
2014-10-09 23:35:26 +02:00
static inline void run ( const T * start , const T * end , T * target )
2016-02-19 22:58:52 +01:00
{
2016-05-26 10:52:12 +02:00
IntPtr size = IntPtr ( end ) - IntPtr ( start ) ;
2016-02-19 22:58:52 +01:00
if ( size = = 0 ) return ;
eigen_internal_assert ( start ! = 0 & & end ! = 0 & & target ! = 0 ) ;
std : : memmove ( target , start , size ) ;
}
2013-11-05 15:40:58 +01:00
} ;
template < typename T > struct smart_memmove_helper < T , false > {
2014-10-09 23:35:26 +02:00
static inline void run ( const T * start , const T * end , T * target )
2017-01-24 13:55:18 -08:00
{
2016-12-01 21:25:58 +01:00
if ( UIntPtr ( target ) < UIntPtr ( start ) )
2014-10-09 23:35:26 +02:00
{
std : : copy ( start , end , target ) ;
2013-11-05 15:40:58 +01:00
}
2017-01-24 13:55:18 -08:00
else
2014-10-09 23:35:26 +02:00
{
std : : ptrdiff_t count = ( std : : ptrdiff_t ( end ) - std : : ptrdiff_t ( start ) ) / sizeof ( T ) ;
2017-01-24 13:55:18 -08:00
std : : copy_backward ( start , end , target + count ) ;
2014-10-09 23:35:26 +02:00
}
}
2013-11-05 15:40:58 +01:00
} ;
2013-08-25 18:00:28 +09:00
2021-04-21 15:45:31 -07:00
template < typename T > EIGEN_DEVICE_FUNC T * smart_move ( T * start , T * end , T * target )
{
return std : : move ( start , end , target ) ;
}
2011-06-09 19:04:06 +02:00
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of runtime stack allocation ( falling back to malloc ) * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2011-03-19 01:06:50 +01:00
// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
// to the appropriate stack allocation function
2018-07-17 14:16:48 -04:00
# if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE
2014-11-04 21:58:52 +01:00
# if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
2011-03-19 01:06:50 +01:00
# define EIGEN_ALLOCA alloca
2014-11-04 21:58:52 +01:00
# elif EIGEN_COMP_MSVC
2011-03-19 01:06:50 +01:00
# define EIGEN_ALLOCA _alloca
# endif
# endif
2018-07-13 11:03:04 -07:00
// With clang -Oz -mthumb, alloca changes the stack pointer in a way that is
// not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because
// the compiler still emits bad code because stack allocation checks use "<=".
// TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772
// is fixed.
2018-07-13 11:24:07 -07:00
# if defined(__clang__) && defined(__thumb__)
2018-07-13 11:03:04 -07:00
# undef EIGEN_ALLOCA
# endif
2011-03-19 10:27:47 +01:00
// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
2014-10-09 23:34:05 +02:00
template < typename T > class aligned_stack_memory_handler : noncopyable
2011-03-19 01:06:50 +01:00
{
public :
2011-03-19 10:27:47 +01:00
/* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
* Note that \ a ptr can be 0 regardless of the other parameters .
* This constructor takes care of constructing / initializing the elements of the buffer if required by the scalar type T ( see NumTraits < T > : : RequireInitialization ) .
* In this case , the buffer elements will also be destructed when this handler will be destructed .
* Finally , if \ a dealloc is true , then the pointer \ a ptr is freed .
* */
2018-06-11 18:33:24 +02:00
EIGEN_DEVICE_FUNC
2017-01-23 22:02:53 +01:00
aligned_stack_memory_handler ( T * ptr , std : : size_t size , bool dealloc )
2011-03-19 10:27:47 +01:00
: m_ptr ( ptr ) , m_size ( size ) , m_deallocate ( dealloc )
2011-03-19 01:06:50 +01:00
{
2011-04-21 09:00:55 +02:00
if ( NumTraits < T > : : RequireInitialization & & m_ptr )
2022-09-08 19:39:36 +00:00
Eigen : : internal : : default_construct_elements_of_array ( m_ptr , size ) ;
2011-03-19 10:27:47 +01:00
}
2018-06-11 18:33:24 +02:00
EIGEN_DEVICE_FUNC
2011-03-19 10:27:47 +01:00
~ aligned_stack_memory_handler ( )
{
2011-04-21 09:00:55 +02:00
if ( NumTraits < T > : : RequireInitialization & & m_ptr )
2011-03-19 10:27:47 +01:00
Eigen : : internal : : destruct_elements_of_array < T > ( m_ptr , m_size ) ;
if ( m_deallocate )
2011-03-19 01:06:50 +01:00
Eigen : : internal : : aligned_free ( m_ptr ) ;
}
protected :
T * m_ptr ;
2017-01-23 22:02:53 +01:00
std : : size_t m_size ;
2011-03-19 10:27:47 +01:00
bool m_deallocate ;
2011-03-19 01:06:50 +01:00
} ;
2018-07-09 15:41:14 +02:00
# ifdef EIGEN_ALLOCA
template < typename Xpr , int NbEvaluations ,
bool MapExternalBuffer = nested_eval < Xpr , NbEvaluations > : : Evaluate & & Xpr : : MaxSizeAtCompileTime = = Dynamic
>
struct local_nested_eval_wrapper
{
2022-04-04 17:33:33 +00:00
static constexpr bool NeedExternalBuffer = false ;
2018-07-09 15:41:14 +02:00
typedef typename Xpr : : Scalar Scalar ;
typedef typename nested_eval < Xpr , NbEvaluations > : : type ObjectType ;
ObjectType object ;
EIGEN_DEVICE_FUNC
local_nested_eval_wrapper ( const Xpr & xpr , Scalar * ptr ) : object ( xpr )
{
EIGEN_UNUSED_VARIABLE ( ptr ) ;
eigen_internal_assert ( ptr = = 0 ) ;
}
} ;
template < typename Xpr , int NbEvaluations >
struct local_nested_eval_wrapper < Xpr , NbEvaluations , true >
{
2022-04-04 17:33:33 +00:00
static constexpr bool NeedExternalBuffer = true ;
2018-07-09 15:41:14 +02:00
typedef typename Xpr : : Scalar Scalar ;
typedef typename plain_object_eval < Xpr > : : type PlainObject ;
typedef Map < PlainObject , EIGEN_DEFAULT_ALIGN_BYTES > ObjectType ;
ObjectType object ;
EIGEN_DEVICE_FUNC
local_nested_eval_wrapper ( const Xpr & xpr , Scalar * ptr )
: object ( ptr = = 0 ? reinterpret_cast < Scalar * > ( Eigen : : internal : : aligned_malloc ( sizeof ( Scalar ) * xpr . size ( ) ) ) : ptr , xpr . rows ( ) , xpr . cols ( ) ) ,
m_deallocate ( ptr = = 0 )
{
if ( NumTraits < Scalar > : : RequireInitialization & & object . data ( ) )
2022-09-08 19:39:36 +00:00
Eigen : : internal : : default_construct_elements_of_array ( object . data ( ) , object . size ( ) ) ;
2018-07-09 15:41:14 +02:00
object = xpr ;
}
EIGEN_DEVICE_FUNC
~ local_nested_eval_wrapper ( )
{
if ( NumTraits < Scalar > : : RequireInitialization & & object . data ( ) )
Eigen : : internal : : destruct_elements_of_array ( object . data ( ) , object . size ( ) ) ;
if ( m_deallocate )
Eigen : : internal : : aligned_free ( object . data ( ) ) ;
}
private :
bool m_deallocate ;
} ;
# endif // EIGEN_ALLOCA
2014-10-09 23:34:05 +02:00
template < typename T > class scoped_array : noncopyable
{
T * m_ptr ;
public :
explicit scoped_array ( std : : ptrdiff_t size )
{
m_ptr = new T [ size ] ;
}
~ scoped_array ( )
{
delete [ ] m_ptr ;
}
T & operator [ ] ( std : : ptrdiff_t i ) { return m_ptr [ i ] ; }
const T & operator [ ] ( std : : ptrdiff_t i ) const { return m_ptr [ i ] ; }
T * & ptr ( ) { return m_ptr ; }
const T * ptr ( ) const { return m_ptr ; }
operator const T * ( ) const { return m_ptr ; }
} ;
template < typename T > void swap ( scoped_array < T > & a , scoped_array < T > & b )
{
std : : swap ( a . ptr ( ) , b . ptr ( ) ) ;
}
2017-01-24 13:55:18 -08:00
2012-04-15 11:06:28 +01:00
} // end namespace internal
2011-03-19 01:06:50 +01:00
2008-08-26 19:12:23 +00:00
/** \internal
2018-07-13 11:03:04 -07:00
*
2018-07-09 15:41:14 +02:00
* The macro ei_declare_aligned_stack_constructed_variable ( TYPE , NAME , SIZE , BUFFER ) declares , allocates ,
* and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
* if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT , and if stack allocation is supported by the platform
* ( currently , this is Linux , OSX and Visual Studio only ) . Otherwise the memory is allocated on the heap .
2011-03-19 01:06:50 +01:00
* The allocated buffer is automatically deleted when exiting the scope of this declaration .
2011-06-09 19:04:06 +02:00
* If BUFFER is non null , then the declared variable is simply an alias for BUFFER , and no allocation / deletion occurs .
2011-03-19 01:06:50 +01:00
* Here is an example :
2008-08-26 19:12:23 +00:00
* \ code
2011-03-19 01:06:50 +01:00
* {
* ei_declare_aligned_stack_constructed_variable ( float , data , size , 0 ) ;
* // use data[0] to data[size-1]
* }
2008-08-26 19:12:23 +00:00
* \ endcode
2011-03-19 01:06:50 +01:00
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token .
2018-07-13 11:03:04 -07:00
*
2018-07-09 15:41:14 +02:00
* The macro ei_declare_local_nested_eval ( XPR_T , XPR , N , NAME ) is analogue to
* \ code
* typename internal : : nested_eval < XPRT_T , N > : : type NAME ( XPR ) ;
* \ endcode
* with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown .
* This is accomplished through alloca if this later is supported and if the required number of bytes
* is below EIGEN_STACK_ALLOCATION_LIMIT .
2008-08-26 19:12:23 +00:00
*/
2011-03-19 01:06:50 +01:00
# ifdef EIGEN_ALLOCA
2017-01-24 13:55:18 -08:00
2015-08-06 13:56:53 +02:00
# if EIGEN_DEFAULT_ALIGN_BYTES>0
// We always manually re-align the result of EIGEN_ALLOCA.
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
2016-05-26 10:52:12 +02:00
# define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
2015-08-06 13:56:53 +02:00
# else
# define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
# endif
2011-05-17 21:30:12 +02:00
2011-03-19 01:06:50 +01:00
# define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
2011-10-16 16:12:19 -04:00
Eigen : : internal : : check_size_for_overflow < TYPE > ( SIZE ) ; \
2011-03-19 01:06:50 +01:00
TYPE * NAME = ( BUFFER ) ! = 0 ? ( BUFFER ) \
: reinterpret_cast < TYPE * > ( \
2011-05-17 21:30:12 +02:00
( sizeof ( TYPE ) * SIZE < = EIGEN_STACK_ALLOCATION_LIMIT ) ? EIGEN_ALIGNED_ALLOCA ( sizeof ( TYPE ) * SIZE ) \
2011-03-19 01:06:50 +01:00
: Eigen : : internal : : aligned_malloc ( sizeof ( TYPE ) * SIZE ) ) ; \
2011-03-19 10:27:47 +01:00
Eigen : : internal : : aligned_stack_memory_handler < TYPE > EIGEN_CAT ( NAME , _stack_memory_destructor ) ( ( BUFFER ) = = 0 ? NAME : 0 , SIZE , sizeof ( TYPE ) * SIZE > EIGEN_STACK_ALLOCATION_LIMIT )
2011-03-19 01:06:50 +01:00
2018-07-09 15:41:14 +02:00
# define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \
Eigen : : internal : : local_nested_eval_wrapper < XPR_T , N > EIGEN_CAT ( NAME , _wrapper ) ( XPR , reinterpret_cast < typename XPR_T : : Scalar * > ( \
( ( Eigen : : internal : : local_nested_eval_wrapper < XPR_T , N > : : NeedExternalBuffer ) & & ( ( sizeof ( typename XPR_T : : Scalar ) * XPR . size ( ) ) < = EIGEN_STACK_ALLOCATION_LIMIT ) ) \
? EIGEN_ALIGNED_ALLOCA ( sizeof ( typename XPR_T : : Scalar ) * XPR . size ( ) ) : 0 ) ) ; \
typename Eigen : : internal : : local_nested_eval_wrapper < XPR_T , N > : : ObjectType NAME ( EIGEN_CAT ( NAME , _wrapper ) . object )
2008-08-26 19:12:23 +00:00
# else
2011-03-19 01:06:50 +01:00
# define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
2011-10-16 16:12:19 -04:00
Eigen : : internal : : check_size_for_overflow < TYPE > ( SIZE ) ; \
2011-03-19 01:06:50 +01:00
TYPE * NAME = ( BUFFER ) ! = 0 ? BUFFER : reinterpret_cast < TYPE * > ( Eigen : : internal : : aligned_malloc ( sizeof ( TYPE ) * SIZE ) ) ; \
2011-03-19 10:27:47 +01:00
Eigen : : internal : : aligned_stack_memory_handler < TYPE > EIGEN_CAT ( NAME , _stack_memory_destructor ) ( ( BUFFER ) = = 0 ? NAME : 0 , SIZE , true )
2017-01-24 13:55:18 -08:00
2018-07-09 15:41:14 +02:00
2018-07-11 17:08:30 +02:00
# define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR)
2018-07-09 15:41:14 +02:00
2011-03-19 01:06:50 +01:00
# endif
2009-01-06 03:16:50 +00:00
2009-02-04 16:53:03 +00:00
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [ _IF ] * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2019-02-20 13:52:11 +01:00
# if EIGEN_HAS_CXX17_OVERALIGN
// C++17 -> no need to bother about alignment anymore :)
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
# else
2021-02-17 15:09:37 -08:00
// HIP does not support new/delete on device.
# if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
2014-07-22 13:16:44 +02:00
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
2021-02-17 15:09:37 -08:00
EIGEN_DEVICE_FUNC \
2017-01-23 22:02:53 +01:00
void * operator new ( std : : size_t size , const std : : nothrow_t & ) EIGEN_NO_THROW { \
2014-07-22 13:16:44 +02:00
EIGEN_TRY { return Eigen : : internal : : conditional_aligned_malloc < NeedsToAlign > ( size ) ; } \
EIGEN_CATCH ( . . . ) { return 0 ; } \
2009-05-15 15:53:26 +00:00
}
2009-02-04 16:53:03 +00:00
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
2021-02-17 15:09:37 -08:00
EIGEN_DEVICE_FUNC \
2017-01-23 22:02:53 +01:00
void * operator new ( std : : size_t size ) { \
2010-10-25 10:15:22 -04:00
return Eigen : : internal : : conditional_aligned_malloc < NeedsToAlign > ( size ) ; \
2009-02-04 16:53:03 +00:00
} \
2021-02-17 15:09:37 -08:00
EIGEN_DEVICE_FUNC \
2017-01-23 22:02:53 +01:00
void * operator new [ ] ( std : : size_t size ) { \
2010-10-25 10:15:22 -04:00
return Eigen : : internal : : conditional_aligned_malloc < NeedsToAlign > ( size ) ; \
2009-02-04 16:53:03 +00:00
} \
2021-02-17 15:09:37 -08:00
EIGEN_DEVICE_FUNC \
2015-12-10 14:21:23 +01:00
void operator delete ( void * ptr ) EIGEN_NO_THROW { Eigen : : internal : : conditional_aligned_free < NeedsToAlign > ( ptr ) ; } \
2021-02-17 15:09:37 -08:00
EIGEN_DEVICE_FUNC \
2015-12-10 14:21:23 +01:00
void operator delete [ ] ( void * ptr ) EIGEN_NO_THROW { Eigen : : internal : : conditional_aligned_free < NeedsToAlign > ( ptr ) ; } \
2021-02-17 15:09:37 -08:00
EIGEN_DEVICE_FUNC \
2015-12-10 14:21:23 +01:00
void operator delete ( void * ptr , std : : size_t /* sz */ ) EIGEN_NO_THROW { Eigen : : internal : : conditional_aligned_free < NeedsToAlign > ( ptr ) ; } \
2021-02-17 15:09:37 -08:00
EIGEN_DEVICE_FUNC \
2015-12-10 14:21:23 +01:00
void operator delete [ ] ( void * ptr , std : : size_t /* sz */ ) EIGEN_NO_THROW { Eigen : : internal : : conditional_aligned_free < NeedsToAlign > ( ptr ) ; } \
2009-05-07 20:33:48 +00:00
/* in-place new and delete. since (at least afaik) there is no actual */ \
/* memory allocated we can safely let the default implementation handle */ \
/* this particular case. */ \
2021-02-17 15:09:37 -08:00
EIGEN_DEVICE_FUNC \
2017-01-23 22:02:53 +01:00
static void * operator new ( std : : size_t size , void * ptr ) { return : : operator new ( size , ptr ) ; } \
2021-02-17 15:09:37 -08:00
EIGEN_DEVICE_FUNC \
2017-01-23 22:02:53 +01:00
static void * operator new [ ] ( std : : size_t size , void * ptr ) { return : : operator new [ ] ( size , ptr ) ; } \
2021-02-17 15:09:37 -08:00
EIGEN_DEVICE_FUNC \
2015-12-10 14:21:23 +01:00
void operator delete ( void * memory , void * ptr ) EIGEN_NO_THROW { return : : operator delete ( memory , ptr ) ; } \
2021-02-17 15:09:37 -08:00
EIGEN_DEVICE_FUNC \
2015-12-10 14:21:23 +01:00
void operator delete [ ] ( void * memory , void * ptr ) EIGEN_NO_THROW { return : : operator delete [ ] ( memory , ptr ) ; } \
2009-05-07 20:33:48 +00:00
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
2009-05-15 15:53:26 +00:00
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW ( NeedsToAlign ) \
2021-02-17 15:09:37 -08:00
EIGEN_DEVICE_FUNC \
2015-12-10 14:21:23 +01:00
void operator delete ( void * ptr , const std : : nothrow_t & ) EIGEN_NO_THROW { \
2010-10-25 10:15:22 -04:00
Eigen : : internal : : conditional_aligned_free < NeedsToAlign > ( ptr ) ; \
2009-05-07 20:33:48 +00:00
} \
2010-10-25 10:15:22 -04:00
typedef void eigen_aligned_operator_new_marker_type ;
2009-02-04 16:53:03 +00:00
# else
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
# endif
2009-01-08 15:20:21 +00:00
2009-01-08 15:37:13 +00:00
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
2018-09-21 10:33:22 +02:00
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF ( bool ( \
( ( Size ) ! = Eigen : : Dynamic ) & & \
( ( ( EIGEN_MAX_ALIGN_BYTES > = 16 ) & & ( ( sizeof ( Scalar ) * ( Size ) ) % ( EIGEN_MAX_ALIGN_BYTES ) = = 0 ) ) | | \
( ( EIGEN_MAX_ALIGN_BYTES > = 32 ) & & ( ( sizeof ( Scalar ) * ( Size ) ) % ( EIGEN_MAX_ALIGN_BYTES / 2 ) = = 0 ) ) | | \
( ( EIGEN_MAX_ALIGN_BYTES > = 64 ) & & ( ( sizeof ( Scalar ) * ( Size ) ) % ( EIGEN_MAX_ALIGN_BYTES / 4 ) = = 0 ) ) ) ) )
2009-01-08 15:20:21 +00:00
2019-02-20 13:52:11 +01:00
# endif
2010-02-28 09:10:41 -05:00
/****************************************************************************/
2009-02-04 16:53:03 +00:00
2009-01-09 00:55:53 +00:00
/** \class aligned_allocator
2010-07-06 13:10:08 +01:00
* \ ingroup Core_Module
2009-01-09 00:55:53 +00:00
*
2021-11-13 13:11:06 +02:00
* \ brief STL compatible allocator to use with types requiring a non - standard alignment .
2017-09-20 10:22:00 +02:00
*
* The memory is aligned as for dynamically aligned matrix / array types such as MatrixXd .
* By default , it will thus provide at least 16 bytes alignment and more in following cases :
* - 32 bytes alignment if AVX is enabled .
* - 64 bytes alignment if AVX512 is enabled .
*
2018-03-11 10:01:44 -04:00
* This can be controlled using the \ c EIGEN_MAX_ALIGN_BYTES macro as documented
2017-09-20 10:22:00 +02:00
* \ link TopicPreprocessorDirectivesPerformance there \ endlink .
2009-01-09 00:55:53 +00:00
*
* Example :
* \ code
2009-01-10 02:50:09 +00:00
* // Matrix4f requires 16 bytes alignment:
2017-01-24 13:55:18 -08:00
* std : : map < int , Matrix4f , std : : less < int > ,
2010-11-12 12:06:24 +00:00
* aligned_allocator < std : : pair < const int , Matrix4f > > > my_map_mat4 ;
2009-01-09 00:55:53 +00:00
* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
2009-01-10 02:50:09 +00:00
* std : : map < int , Vector3f > my_map_vec3 ;
2009-01-09 00:55:53 +00:00
* \ endcode
*
2015-12-30 16:45:44 +01:00
* \ sa \ blank \ ref TopicStlContainers .
2009-01-09 00:55:53 +00:00
*/
template < class T >
2014-03-19 13:28:50 +01:00
class aligned_allocator : public std : : allocator < T >
2009-01-09 00:55:53 +00:00
{
public :
2017-01-23 22:02:53 +01:00
typedef std : : size_t size_type ;
2014-03-19 13:28:50 +01:00
typedef std : : ptrdiff_t difference_type ;
typedef T * pointer ;
typedef const T * const_pointer ;
typedef T & reference ;
typedef const T & const_reference ;
typedef T value_type ;
template < class U >
struct rebind
{
typedef aligned_allocator < U > other ;
} ;
2009-01-09 00:55:53 +00:00
2014-03-19 13:28:50 +01:00
aligned_allocator ( ) : std : : allocator < T > ( ) { }
2014-01-10 11:02:11 +01:00
2014-03-19 13:28:50 +01:00
aligned_allocator ( const aligned_allocator & other ) : std : : allocator < T > ( other ) { }
2009-01-09 00:55:53 +00:00
2014-03-19 13:28:50 +01:00
template < class U >
aligned_allocator ( const aligned_allocator < U > & other ) : std : : allocator < T > ( other ) { }
2014-01-10 11:02:11 +01:00
2014-03-19 13:28:50 +01:00
~ aligned_allocator ( ) { }
2009-11-07 09:07:23 +01:00
2018-10-07 15:00:05 +02:00
# if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
// In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
// eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
size_type max_size ( ) const {
return ( std : : numeric_limits < std : : ptrdiff_t > : : max ) ( ) / sizeof ( T ) ;
}
# endif
2014-03-19 13:28:50 +01:00
pointer allocate ( size_type num , const void * /*hint*/ = 0 )
{
internal : : check_size_for_overflow < T > ( num ) ;
2018-10-07 15:00:05 +02:00
return static_cast < pointer > ( internal : : aligned_malloc ( num * sizeof ( T ) ) ) ;
2014-03-19 13:28:50 +01:00
}
2009-11-07 09:07:23 +01:00
2014-03-19 13:28:50 +01:00
void deallocate ( pointer p , size_type /*num*/ )
{
internal : : aligned_free ( p ) ;
}
2009-01-09 00:55:53 +00:00
} ;
2010-06-21 11:59:37 +02:00
//---------- Cache sizes ----------
2012-02-02 12:05:02 +01:00
# if !defined(EIGEN_NO_CPUID)
2014-11-04 21:58:52 +01:00
# if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
# if defined(__PIC__) && EIGEN_ARCH_i386
2012-02-02 12:05:02 +01:00
// Case for x86 with PIC
# define EIGEN_CPUID(abcd,func,id) \
2013-03-19 14:11:59 +01:00
__asm__ __volatile__ ( " xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1 " : " =a " ( abcd [ 0 ] ) , " =&r " ( abcd [ 1 ] ) , " =c " ( abcd [ 2 ] ) , " =d " ( abcd [ 3 ] ) : " a " ( func ) , " c " ( id ) ) ;
2014-11-04 21:58:52 +01:00
# elif defined(__PIC__) && EIGEN_ARCH_x86_64
2013-03-19 14:11:59 +01:00
// Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
// However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
2013-03-19 14:00:42 +01:00
# define EIGEN_CPUID(abcd,func,id) \
__asm__ __volatile__ ( " xchg{q} \t {%%}rbx, %q1; cpuid; xchg{q} \t {%%}rbx, %q1 " : " =a " ( abcd [ 0 ] ) , " =&r " ( abcd [ 1 ] ) , " =c " ( abcd [ 2 ] ) , " =d " ( abcd [ 3 ] ) : " 0 " ( func ) , " 2 " ( id ) ) ;
2012-02-02 12:05:02 +01:00
# else
// Case for x86_64 or x86 w/o PIC
# define EIGEN_CPUID(abcd,func,id) \
2013-03-19 14:00:42 +01:00
__asm__ __volatile__ ( " cpuid " : " =a " ( abcd [ 0 ] ) , " =b " ( abcd [ 1 ] ) , " =c " ( abcd [ 2 ] ) , " =d " ( abcd [ 3 ] ) : " 0 " ( func ) , " 2 " ( id ) ) ;
2012-02-02 12:05:02 +01:00
# endif
2014-11-04 21:58:52 +01:00
# elif EIGEN_COMP_MSVC
2021-12-01 00:48:34 +00:00
# if EIGEN_ARCH_i386_OR_x86_64
2012-02-02 12:05:02 +01:00
# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
# endif
2010-07-06 11:02:01 +02:00
# endif
2010-06-24 10:05:24 +02:00
# endif
2010-06-21 11:59:37 +02:00
2010-10-25 10:15:22 -04:00
namespace internal {
2010-06-24 09:45:17 +02:00
# ifdef EIGEN_CPUID
2010-10-25 10:15:22 -04:00
2014-05-01 23:03:54 +02:00
inline bool cpuid_is_vendor ( int abcd [ 4 ] , const int vendor [ 3 ] )
2010-06-23 17:14:06 +02:00
{
2014-05-01 23:03:54 +02:00
return abcd [ 1 ] = = vendor [ 0 ] & & abcd [ 3 ] = = vendor [ 1 ] & & abcd [ 2 ] = = vendor [ 2 ] ;
2010-06-23 17:14:06 +02:00
}
2010-10-25 10:15:22 -04:00
inline void queryCacheSizes_intel_direct ( int & l1 , int & l2 , int & l3 )
2010-06-23 17:14:06 +02:00
{
int abcd [ 4 ] ;
l1 = l2 = l3 = 0 ;
int cache_id = 0 ;
int cache_type = 0 ;
do {
2010-06-26 23:15:06 +02:00
abcd [ 0 ] = abcd [ 1 ] = abcd [ 2 ] = abcd [ 3 ] = 0 ;
2010-06-23 17:14:06 +02:00
EIGEN_CPUID ( abcd , 0x4 , cache_id ) ;
cache_type = ( abcd [ 0 ] & 0x0F ) > > 0 ;
if ( cache_type = = 1 | | cache_type = = 3 ) // data or unified cache
{
int cache_level = ( abcd [ 0 ] & 0xE0 ) > > 5 ; // A[7:5]
int ways = ( abcd [ 1 ] & 0xFFC00000 ) > > 22 ; // B[31:22]
int partitions = ( abcd [ 1 ] & 0x003FF000 ) > > 12 ; // B[21:12]
int line_size = ( abcd [ 1 ] & 0x00000FFF ) > > 0 ; // B[11:0]
int sets = ( abcd [ 2 ] ) ; // C[31:0]
2010-06-24 09:29:43 +02:00
2010-06-23 17:14:06 +02:00
int cache_size = ( ways + 1 ) * ( partitions + 1 ) * ( line_size + 1 ) * ( sets + 1 ) ;
2010-06-24 09:29:43 +02:00
2010-06-23 17:14:06 +02:00
switch ( cache_level )
{
case 1 : l1 = cache_size ; break ;
case 2 : l2 = cache_size ; break ;
case 3 : l3 = cache_size ; break ;
default : break ;
}
}
cache_id + + ;
2010-06-27 00:17:38 +02:00
} while ( cache_type > 0 & & cache_id < 16 ) ;
}
2010-10-25 10:15:22 -04:00
inline void queryCacheSizes_intel_codes ( int & l1 , int & l2 , int & l3 )
2010-06-27 00:17:38 +02:00
{
int abcd [ 4 ] ;
abcd [ 0 ] = abcd [ 1 ] = abcd [ 2 ] = abcd [ 3 ] = 0 ;
l1 = l2 = l3 = 0 ;
EIGEN_CPUID ( abcd , 0x00000002 , 0 ) ;
unsigned char * bytes = reinterpret_cast < unsigned char * > ( abcd ) + 2 ;
bool check_for_p2_core2 = false ;
for ( int i = 0 ; i < 14 ; + + i )
{
switch ( bytes [ i ] )
{
case 0x0A : l1 = 8 ; break ; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
case 0x0C : l1 = 16 ; break ; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
case 0x0E : l1 = 24 ; break ; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
case 0x10 : l1 = 16 ; break ; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
case 0x15 : l1 = 16 ; break ; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
case 0x2C : l1 = 32 ; break ; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
case 0x30 : l1 = 32 ; break ; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
case 0x60 : l1 = 16 ; break ; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
case 0x66 : l1 = 8 ; break ; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
case 0x67 : l1 = 16 ; break ; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
case 0x68 : l1 = 32 ; break ; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
case 0x1A : l2 = 96 ; break ; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
case 0x22 : l3 = 512 ; break ; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
case 0x23 : l3 = 1024 ; break ; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
case 0x25 : l3 = 2048 ; break ; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
case 0x29 : l3 = 4096 ; break ; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
case 0x39 : l2 = 128 ; break ; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
case 0x3A : l2 = 192 ; break ; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
case 0x3B : l2 = 128 ; break ; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
case 0x3C : l2 = 256 ; break ; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
case 0x3D : l2 = 384 ; break ; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
case 0x3E : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
case 0x40 : l2 = 0 ; break ; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
case 0x41 : l2 = 128 ; break ; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
case 0x42 : l2 = 256 ; break ; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
case 0x43 : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
case 0x44 : l2 = 1024 ; break ; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
case 0x45 : l2 = 2048 ; break ; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
case 0x46 : l3 = 4096 ; break ; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
case 0x47 : l3 = 8192 ; break ; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
case 0x48 : l2 = 3072 ; break ; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
case 0x49 : if ( l2 ! = 0 ) l3 = 4096 ; else { check_for_p2_core2 = true ; l3 = l2 = 4096 ; } break ; // code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
case 0x4A : l3 = 6144 ; break ; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
case 0x4B : l3 = 8192 ; break ; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
case 0x4C : l3 = 12288 ; break ; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
case 0x4D : l3 = 16384 ; break ; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
case 0x4E : l2 = 6144 ; break ; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
case 0x78 : l2 = 1024 ; break ; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
case 0x79 : l2 = 128 ; break ; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
case 0x7A : l2 = 256 ; break ; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
case 0x7B : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
case 0x7C : l2 = 1024 ; break ; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
case 0x7D : l2 = 2048 ; break ; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
case 0x7E : l2 = 256 ; break ; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
case 0x7F : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
case 0x80 : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
case 0x81 : l2 = 128 ; break ; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
case 0x82 : l2 = 256 ; break ; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
case 0x83 : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
case 0x84 : l2 = 1024 ; break ; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
case 0x85 : l2 = 2048 ; break ; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
case 0x86 : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
case 0x87 : l2 = 1024 ; break ; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
case 0x88 : l3 = 2048 ; break ; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
case 0x89 : l3 = 4096 ; break ; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
case 0x8A : l3 = 8192 ; break ; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
case 0x8D : l3 = 3072 ; break ; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
default : break ;
}
}
if ( check_for_p2_core2 & & l2 = = l3 )
l3 = 0 ;
l1 * = 1024 ;
l2 * = 1024 ;
l3 * = 1024 ;
}
2010-10-25 10:15:22 -04:00
inline void queryCacheSizes_intel ( int & l1 , int & l2 , int & l3 , int max_std_funcs )
2010-06-27 00:17:38 +02:00
{
if ( max_std_funcs > = 4 )
2010-10-25 10:15:22 -04:00
queryCacheSizes_intel_direct ( l1 , l2 , l3 ) ;
2021-01-05 12:51:10 -08:00
else if ( max_std_funcs > = 2 )
2010-10-25 10:15:22 -04:00
queryCacheSizes_intel_codes ( l1 , l2 , l3 ) ;
2021-01-05 12:51:10 -08:00
else
l1 = l2 = l3 = 0 ;
2010-06-23 17:14:06 +02:00
}
2010-10-25 10:15:22 -04:00
inline void queryCacheSizes_amd ( int & l1 , int & l2 , int & l3 )
2010-06-23 17:14:06 +02:00
{
int abcd [ 4 ] ;
2010-06-26 23:15:06 +02:00
abcd [ 0 ] = abcd [ 1 ] = abcd [ 2 ] = abcd [ 3 ] = 0 ;
2021-01-05 12:51:10 -08:00
// First query the max supported function.
EIGEN_CPUID ( abcd , 0x80000000 , 0 ) ;
2021-01-20 08:34:00 -08:00
if ( static_cast < numext : : uint32_t > ( abcd [ 0 ] ) > = static_cast < numext : : uint32_t > ( 0x80000006 ) )
2021-01-05 12:51:10 -08:00
{
EIGEN_CPUID ( abcd , 0x80000005 , 0 ) ;
l1 = ( abcd [ 2 ] > > 24 ) * 1024 ; // C[31:24] = L1 size in KB
abcd [ 0 ] = abcd [ 1 ] = abcd [ 2 ] = abcd [ 3 ] = 0 ;
EIGEN_CPUID ( abcd , 0x80000006 , 0 ) ;
l2 = ( abcd [ 2 ] > > 16 ) * 1024 ; // C[31;16] = l2 cache size in KB
l3 = ( ( abcd [ 3 ] & 0xFFFC000 ) > > 18 ) * 512 * 1024 ; // D[31;18] = l3 cache size in 512KB
}
else
{
l1 = l2 = l3 = 0 ;
}
2010-06-23 17:14:06 +02:00
}
2010-06-24 09:45:17 +02:00
# endif
2010-06-23 17:14:06 +02:00
2010-06-21 11:59:37 +02:00
/** \internal
2010-06-23 16:34:51 +02:00
* Queries and returns the cache sizes in Bytes of the L1 , L2 , and L3 data caches respectively */
2010-10-25 10:15:22 -04:00
inline void queryCacheSizes ( int & l1 , int & l2 , int & l3 )
2010-06-21 11:59:37 +02:00
{
2010-06-21 23:44:20 +02:00
# ifdef EIGEN_CPUID
2010-06-21 11:59:37 +02:00
int abcd [ 4 ] ;
2014-05-01 23:03:54 +02:00
const int GenuineIntel [ ] = { 0x756e6547 , 0x49656e69 , 0x6c65746e } ;
const int AuthenticAMD [ ] = { 0x68747541 , 0x69746e65 , 0x444d4163 } ;
const int AMDisbetter_ [ ] = { 0x69444d41 , 0x74656273 , 0x21726574 } ; // "AMDisbetter!"
2010-06-24 09:29:43 +02:00
2010-06-23 17:14:06 +02:00
// identify the CPU vendor
2010-06-23 16:34:51 +02:00
EIGEN_CPUID ( abcd , 0x0 , 0 ) ;
2021-01-05 12:51:10 -08:00
int max_std_funcs = abcd [ 0 ] ;
2014-05-01 23:03:54 +02:00
if ( cpuid_is_vendor ( abcd , GenuineIntel ) )
2010-10-25 10:15:22 -04:00
queryCacheSizes_intel ( l1 , l2 , l3 , max_std_funcs ) ;
2014-05-01 23:03:54 +02:00
else if ( cpuid_is_vendor ( abcd , AuthenticAMD ) | | cpuid_is_vendor ( abcd , AMDisbetter_ ) )
2010-10-25 10:15:22 -04:00
queryCacheSizes_amd ( l1 , l2 , l3 ) ;
2010-06-23 17:14:06 +02:00
else
// by default let's use Intel's API
2010-10-25 10:15:22 -04:00
queryCacheSizes_intel ( l1 , l2 , l3 , max_std_funcs ) ;
2010-06-24 09:29:43 +02:00
2010-06-23 17:14:06 +02:00
// here is the list of other vendors:
2010-10-25 10:15:22 -04:00
// ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
// ||cpuid_is_vendor(abcd,"CyrixInstead")
// ||cpuid_is_vendor(abcd,"CentaurHauls")
// ||cpuid_is_vendor(abcd,"GenuineTMx86")
// ||cpuid_is_vendor(abcd,"TransmetaCPU")
// ||cpuid_is_vendor(abcd,"RiseRiseRise")
// ||cpuid_is_vendor(abcd,"Geode by NSC")
// ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
// ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
// ||cpuid_is_vendor(abcd,"NexGenDriven")
2010-07-05 21:27:15 +02:00
# else
2010-07-05 16:44:41 +03:00
l1 = l2 = l3 = - 1 ;
2010-07-05 21:27:15 +02:00
# endif
2010-06-21 11:59:37 +02:00
}
2010-06-23 16:34:51 +02:00
/** \internal
* \ returns the size in Bytes of the L1 data cache */
2010-10-25 10:15:22 -04:00
inline int queryL1CacheSize ( )
2010-06-23 16:34:51 +02:00
{
int l1 ( - 1 ) , l2 , l3 ;
2010-10-25 10:15:22 -04:00
queryCacheSizes ( l1 , l2 , l3 ) ;
2010-06-23 16:34:51 +02:00
return l1 ;
}
2010-06-21 11:59:37 +02:00
/** \internal
* \ returns the size in Bytes of the L2 or L3 cache if this later is present */
2010-10-25 10:15:22 -04:00
inline int queryTopLevelCacheSize ( )
2010-06-21 11:59:37 +02:00
{
2010-06-23 16:34:51 +02:00
int l1 , l2 ( - 1 ) , l3 ( - 1 ) ;
2010-10-25 10:15:22 -04:00
queryCacheSizes ( l1 , l2 , l3 ) ;
2011-07-21 11:19:36 +02:00
return ( std : : max ) ( l2 , l3 ) ;
2010-06-21 11:59:37 +02:00
}
2022-03-08 20:43:22 +00:00
/** \internal
* This wraps C + + 20 ' s std : : construct_at , using placement new instead if it is not available .
*/
# if EIGEN_COMP_CXXVER >= 20
using std : : construct_at ;
# else
template < class T , class . . . Args >
2022-03-09 16:47:53 +00:00
EIGEN_DEVICE_FUNC T * construct_at ( T * p , Args & & . . . args )
2022-03-08 20:43:22 +00:00
{
return : : new ( const_cast < void * > ( static_cast < const volatile void * > ( p ) ) )
T ( std : : forward < Args > ( args ) . . . ) ;
}
# endif
/** \internal
* This wraps C + + 17 ' s std : : destroy_at . If it ' s not available it calls the destructor .
* The wrapper is not a full replacement for C + + 20 ' s std : : destroy_at as it cannot
* be applied to std : : array .
*/
# if EIGEN_COMP_CXXVER >= 17
using std : : destroy_at ;
# else
template < class T >
2022-03-09 16:47:53 +00:00
EIGEN_DEVICE_FUNC void destroy_at ( T * p )
2022-03-08 20:43:22 +00:00
{
p - > ~ T ( ) ;
}
# endif
2010-10-25 10:15:22 -04:00
} // end namespace internal
2012-04-15 11:06:28 +01:00
} // end namespace Eigen
2008-08-26 19:12:23 +00:00
# endif // EIGEN_MEMORY_H