2008-08-26 19:12:23 +00:00
// This file is part of Eigen, a lightweight C++ template library
2009-05-22 20:25:33 +02:00
// for linear algebra.
2008-08-26 19:12:23 +00:00
//
2010-06-24 23:21:58 +02:00
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
2009-02-02 13:22:19 +00:00
// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
2009-01-09 00:55:53 +00:00
// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
2010-02-27 17:25:07 +01:00
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
2010-06-21 11:59:37 +02:00
// Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org>
2008-08-26 19:12:23 +00:00
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Platform checks for aligned malloc functions * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2008-08-26 19:12:23 +00:00
# ifndef EIGEN_MEMORY_H
# define EIGEN_MEMORY_H
2010-02-28 10:10:53 -05:00
// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
// http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
// This is true at least since glibc 2.8.
// This leaves the question how to detect 64-bit. According to this document,
// http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
# if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
2010-02-28 10:11:28 -05:00
& & defined ( __LP64__ )
2010-02-28 10:10:53 -05:00
# define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
# else
# define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
# endif
2009-06-29 00:08:34 +02:00
// FreeBSD 6 seems to have 16-byte aligned malloc
2010-02-28 10:10:53 -05:00
// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
2009-06-29 00:08:34 +02:00
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
2010-02-28 10:10:53 -05:00
// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
2009-06-29 00:08:34 +02:00
# if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__)
2010-02-28 10:10:53 -05:00
# define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
2009-06-29 00:08:34 +02:00
# else
2010-02-28 10:10:53 -05:00
# define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
2009-06-29 00:08:34 +02:00
# endif
2010-02-28 10:10:53 -05:00
# if defined(__APPLE__) \
| | defined ( _WIN64 ) \
| | EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
| | EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
2009-01-09 14:56:44 +00:00
# define EIGEN_MALLOC_ALREADY_ALIGNED 1
# else
# define EIGEN_MALLOC_ALREADY_ALIGNED 0
2008-08-26 19:12:23 +00:00
# endif
2010-03-05 09:44:21 +01:00
# if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \
2010-02-28 10:10:53 -05:00
& & ( defined _POSIX_ADVISORY_INFO ) & & ( _POSIX_ADVISORY_INFO > 0 )
2009-01-09 14:56:44 +00:00
# define EIGEN_HAS_POSIX_MEMALIGN 1
# else
# define EIGEN_HAS_POSIX_MEMALIGN 0
# endif
# ifdef EIGEN_VECTORIZE_SSE
# define EIGEN_HAS_MM_MALLOC 1
# else
# define EIGEN_HAS_MM_MALLOC 0
# endif
2010-10-25 10:15:22 -04:00
namespace internal {
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of handmade aligned functions * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-02-28 14:32:57 +01:00
/* ----- Hand made implementations of aligned malloc/free and realloc ----- */
/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
* Fast , but wastes 16 additional bytes of memory . Does not throw any exception .
2009-01-09 14:56:44 +00:00
*/
2010-10-25 10:15:22 -04:00
inline void * handmade_aligned_malloc ( size_t size )
2009-01-09 14:56:44 +00:00
{
2010-02-10 13:24:47 +01:00
void * original = std : : malloc ( size + 16 ) ;
2010-02-28 14:32:57 +01:00
if ( original = = 0 ) return 0 ;
2010-02-12 08:58:29 -05:00
void * aligned = reinterpret_cast < void * > ( ( reinterpret_cast < size_t > ( original ) & ~ ( size_t ( 15 ) ) ) + 16 ) ;
2009-01-09 14:56:44 +00:00
* ( reinterpret_cast < void * * > ( aligned ) - 1 ) = original ;
return aligned ;
}
2010-10-25 10:15:22 -04:00
/** \internal Frees memory allocated with handmade_aligned_malloc */
inline void handmade_aligned_free ( void * ptr )
2009-01-09 14:56:44 +00:00
{
2010-02-28 14:32:57 +01:00
if ( ptr ) std : : free ( * ( reinterpret_cast < void * * > ( ptr ) - 1 ) ) ;
2009-01-09 14:56:44 +00:00
}
2010-03-05 09:44:21 +01:00
/** \internal
* \ brief Reallocates aligned memory .
2010-02-28 14:32:57 +01:00
* Since we know that our handmade version is based on std : : realloc
* we can use std : : realloc to implement efficient reallocation .
*/
2010-10-25 10:15:22 -04:00
inline void * handmade_aligned_realloc ( void * ptr , size_t size , size_t = 0 )
2010-02-27 17:25:07 +01:00
{
2010-10-25 10:15:22 -04:00
if ( ptr = = 0 ) return handmade_aligned_malloc ( size ) ;
2010-02-28 14:32:57 +01:00
void * original = * ( reinterpret_cast < void * * > ( ptr ) - 1 ) ;
2010-02-28 09:10:41 -05:00
original = std : : realloc ( original , size + 16 ) ;
2010-02-28 14:32:57 +01:00
if ( original = = 0 ) return 0 ;
2010-02-27 17:25:07 +01:00
void * aligned = reinterpret_cast < void * > ( ( reinterpret_cast < size_t > ( original ) & ~ ( size_t ( 15 ) ) ) + 16 ) ;
* ( reinterpret_cast < void * * > ( aligned ) - 1 ) = original ;
return aligned ;
}
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of generic aligned realloc ( when no realloc can be used ) * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-10-25 10:15:22 -04:00
void * aligned_malloc ( size_t size ) ;
void aligned_free ( void * ptr ) ;
2010-02-28 09:10:41 -05:00
2010-03-05 09:44:21 +01:00
/** \internal
2010-02-28 14:32:57 +01:00
* \ brief Reallocates aligned memory .
* Allows reallocation with aligned ptr types . This implementation will
2010-03-05 09:44:21 +01:00
* always create a new memory chunk and copy the old data .
2010-02-28 14:32:57 +01:00
*/
2010-10-25 10:15:22 -04:00
inline void * generic_aligned_realloc ( void * ptr , size_t size , size_t old_size )
2010-02-27 17:25:07 +01:00
{
2010-02-28 14:32:57 +01:00
if ( ptr = = 0 )
2010-10-25 10:15:22 -04:00
return aligned_malloc ( size ) ;
2010-02-28 14:32:57 +01:00
2010-02-27 18:57:07 -05:00
if ( size = = 0 )
2010-02-27 17:25:07 +01:00
{
2010-10-25 10:15:22 -04:00
aligned_free ( ptr ) ;
2010-02-27 18:57:07 -05:00
return 0 ;
2010-02-27 17:25:07 +01:00
}
2010-10-25 10:15:22 -04:00
void * newptr = aligned_malloc ( size ) ;
2010-03-05 09:44:21 +01:00
if ( newptr = = 0 )
{
2010-10-07 18:09:15 +02:00
# ifdef EIGEN_HAS_ERRNO
2010-02-28 14:32:57 +01:00
errno = ENOMEM ; // according to the standard
2010-10-07 18:09:15 +02:00
# endif
2010-02-28 14:32:57 +01:00
return 0 ;
2010-02-27 17:25:07 +01:00
}
2010-03-05 09:44:21 +01:00
if ( ptr ! = 0 )
2010-02-27 17:25:07 +01:00
{
std : : memcpy ( newptr , ptr , std : : min ( size , old_size ) ) ;
2010-10-25 10:15:22 -04:00
aligned_free ( ptr ) ;
2010-02-27 17:25:07 +01:00
}
return newptr ;
}
2010-02-27 18:57:07 -05:00
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of portable aligned versions of malloc / free / realloc * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-02-27 17:25:07 +01:00
2011-03-06 20:59:25 -05:00
# ifdef EIGEN_NO_MALLOC
inline void check_that_malloc_is_allowed ( )
{
eigen_assert ( false & & " heap allocation is forbidden (EIGEN_NO_MALLOC is defined) " ) ;
}
# elif defined EIGEN_RUNTIME_NO_MALLOC
inline bool is_malloc_allowed_impl ( bool update , bool new_value = false )
{
static bool value = true ;
if ( update = = 1 )
value = new_value ;
return value ;
}
inline bool is_malloc_allowed ( ) { return is_malloc_allowed_impl ( false ) ; }
inline bool set_is_malloc_allowed ( bool new_value ) { return is_malloc_allowed_impl ( true , new_value ) ; }
inline void check_that_malloc_is_allowed ( )
{
eigen_assert ( is_malloc_allowed ( ) & & " heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false) " ) ;
}
# else
inline void check_that_malloc_is_allowed ( )
{ }
# endif
2010-02-28 14:32:57 +01:00
/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment.
2009-04-06 13:33:42 +00:00
* On allocation error , the returned pointer is null , and if exceptions are enabled then a std : : bad_alloc is thrown .
2008-12-16 15:17:29 +00:00
*/
2010-10-25 10:15:22 -04:00
inline void * aligned_malloc ( size_t size )
2008-08-26 19:12:23 +00:00
{
2011-03-06 20:59:25 -05:00
check_that_malloc_is_allowed ( ) ;
2009-01-08 15:20:21 +00:00
2009-11-07 09:07:23 +01:00
void * result ;
2010-03-06 09:05:15 -05:00
# if !EIGEN_ALIGN
2010-02-28 09:10:41 -05:00
result = std : : malloc ( size ) ;
2009-05-15 16:05:45 +00:00
# elif EIGEN_MALLOC_ALREADY_ALIGNED
2010-02-28 09:10:41 -05:00
result = std : : malloc ( size ) ;
2009-05-15 16:05:45 +00:00
# elif EIGEN_HAS_POSIX_MEMALIGN
if ( posix_memalign ( & result , 16 , size ) ) result = 0 ;
# elif EIGEN_HAS_MM_MALLOC
result = _mm_malloc ( size , 16 ) ;
# elif (defined _MSC_VER)
result = _aligned_malloc ( size , 16 ) ;
2009-01-08 15:20:21 +00:00
# else
2010-10-25 10:15:22 -04:00
result = handmade_aligned_malloc ( size ) ;
2009-01-08 15:20:21 +00:00
# endif
2009-11-07 09:07:23 +01:00
2009-01-08 15:20:21 +00:00
# ifdef EIGEN_EXCEPTIONS
2009-02-21 16:35:57 +00:00
if ( result = = 0 )
2009-01-08 15:20:21 +00:00
throw std : : bad_alloc ( ) ;
# endif
return result ;
}
2010-10-25 10:15:22 -04:00
/** \internal Frees memory allocated with aligned_malloc. */
inline void aligned_free ( void * ptr )
2009-01-08 15:20:21 +00:00
{
2010-03-06 09:05:15 -05:00
# if !EIGEN_ALIGN
2010-02-28 14:32:57 +01:00
std : : free ( ptr ) ;
2009-02-04 16:53:03 +00:00
# elif EIGEN_MALLOC_ALREADY_ALIGNED
2010-02-28 14:32:57 +01:00
std : : free ( ptr ) ;
2009-01-09 20:57:06 +00:00
# elif EIGEN_HAS_POSIX_MEMALIGN
2010-02-28 14:32:57 +01:00
std : : free ( ptr ) ;
2009-01-09 14:56:44 +00:00
# elif EIGEN_HAS_MM_MALLOC
2009-01-08 15:20:21 +00:00
_mm_free ( ptr ) ;
2009-01-09 20:57:06 +00:00
# elif defined(_MSC_VER)
_aligned_free ( ptr ) ;
2009-01-09 14:56:44 +00:00
# else
2010-10-25 10:15:22 -04:00
handmade_aligned_free ( ptr ) ;
2009-01-08 15:20:21 +00:00
# endif
}
2010-02-28 14:32:57 +01:00
/**
2010-03-05 09:44:21 +01:00
* \ internal
2010-02-28 14:32:57 +01:00
* \ brief Reallocates an aligned block of memory .
* \ throws std : : bad_alloc if EIGEN_EXCEPTIONS are defined .
* */
2010-10-25 10:15:22 -04:00
inline void * aligned_realloc ( void * ptr , size_t new_size , size_t old_size )
2010-02-27 17:25:07 +01:00
{
2010-06-08 15:52:00 +02:00
EIGEN_UNUSED_VARIABLE ( old_size ) ;
2010-02-27 17:25:07 +01:00
void * result ;
2010-03-06 09:05:15 -05:00
# if !EIGEN_ALIGN
2010-02-28 14:32:57 +01:00
result = std : : realloc ( ptr , new_size ) ;
2010-02-27 17:25:07 +01:00
# elif EIGEN_MALLOC_ALREADY_ALIGNED
2010-02-28 14:32:57 +01:00
result = std : : realloc ( ptr , new_size ) ;
2010-02-27 17:25:07 +01:00
# elif EIGEN_HAS_POSIX_MEMALIGN
2010-10-25 10:15:22 -04:00
result = generic_aligned_realloc ( ptr , new_size , old_size ) ;
2010-02-27 17:25:07 +01:00
# elif EIGEN_HAS_MM_MALLOC
2010-02-28 14:32:57 +01:00
// The defined(_mm_free) is just here to verify that this MSVC version
// implements _mm_malloc/_mm_free based on the corresponding _aligned_
// functions. This may not always be the case and we just try to be safe.
2010-02-28 09:10:41 -05:00
# if defined(_MSC_VER) && defined(_mm_free)
result = _aligned_realloc ( ptr , new_size , 16 ) ;
# else
2010-10-25 10:15:22 -04:00
result = generic_aligned_realloc ( ptr , new_size , old_size ) ;
2010-02-28 09:10:41 -05:00
# endif
2010-02-27 17:25:07 +01:00
# elif defined(_MSC_VER)
result = _aligned_realloc ( ptr , new_size , 16 ) ;
# else
2010-10-25 10:15:22 -04:00
result = handmade_aligned_realloc ( ptr , new_size , old_size ) ;
2010-02-27 17:25:07 +01:00
# endif
# ifdef EIGEN_EXCEPTIONS
if ( result = = 0 & & new_size ! = 0 )
throw std : : bad_alloc ( ) ;
# endif
return result ;
}
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of conditionally aligned functions * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-02-28 14:32:57 +01:00
/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
* On allocation error , the returned pointer is null , and if exceptions are enabled then a std : : bad_alloc is thrown .
*/
2010-10-25 10:15:22 -04:00
template < bool Align > inline void * conditional_aligned_malloc ( size_t size )
2010-02-28 14:32:57 +01:00
{
2010-10-25 10:15:22 -04:00
return aligned_malloc ( size ) ;
2010-02-28 14:32:57 +01:00
}
2010-10-25 10:15:22 -04:00
template < > inline void * conditional_aligned_malloc < false > ( size_t size )
2010-02-28 14:32:57 +01:00
{
2011-03-06 20:59:25 -05:00
check_that_malloc_is_allowed ( ) ;
2010-02-28 14:32:57 +01:00
void * result = std : : malloc ( size ) ;
# ifdef EIGEN_EXCEPTIONS
if ( ! result ) throw std : : bad_alloc ( ) ;
# endif
return result ;
}
2010-10-25 10:15:22 -04:00
/** \internal Frees memory allocated with conditional_aligned_malloc */
template < bool Align > inline void conditional_aligned_free ( void * ptr )
2010-02-28 14:32:57 +01:00
{
2010-10-25 10:15:22 -04:00
aligned_free ( ptr ) ;
2010-02-28 14:32:57 +01:00
}
2010-10-25 10:15:22 -04:00
template < > inline void conditional_aligned_free < false > ( void * ptr )
2010-02-28 14:32:57 +01:00
{
std : : free ( ptr ) ;
}
2010-10-25 10:15:22 -04:00
template < bool Align > inline void * conditional_aligned_realloc ( void * ptr , size_t new_size , size_t old_size )
2010-02-27 17:25:07 +01:00
{
2010-10-25 10:15:22 -04:00
return aligned_realloc ( ptr , new_size , old_size ) ;
2010-02-27 17:25:07 +01:00
}
2010-10-25 10:15:22 -04:00
template < > inline void * conditional_aligned_realloc < false > ( void * ptr , size_t new_size , size_t )
2010-02-27 17:25:07 +01:00
{
return std : : realloc ( ptr , new_size ) ;
}
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Construction / destruction of array elements * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-02-28 14:32:57 +01:00
/** \internal Constructs the elements of an array.
* The \ a size parameter tells on how many objects to call the constructor of T .
*/
2010-10-25 10:15:22 -04:00
template < typename T > inline T * construct_elements_of_array ( T * ptr , size_t size )
2010-02-27 17:25:07 +01:00
{
2010-02-28 14:32:57 +01:00
for ( size_t i = 0 ; i < size ; + + i ) : : new ( ptr + i ) T ;
return ptr ;
2010-02-27 17:25:07 +01:00
}
2010-02-28 14:32:57 +01:00
/** \internal Destructs the elements of an array.
2009-01-08 15:20:21 +00:00
* The \ a size parameters tells on how many objects to call the destructor of T .
*/
2010-10-25 10:15:22 -04:00
template < typename T > inline void destruct_elements_of_array ( T * ptr , size_t size )
2009-01-08 15:20:21 +00:00
{
// always destruct an array starting from the end.
2010-07-14 22:49:34 +02:00
if ( ptr )
while ( size ) ptr [ - - size ] . ~ T ( ) ;
2009-01-08 15:20:21 +00:00
}
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of aligned new / delete - like functions * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-02-28 14:32:57 +01:00
/** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment.
* On allocation error , the returned pointer is undefined , but if exceptions are enabled then a std : : bad_alloc is thrown .
* The default constructor of T is called .
*/
2010-10-25 10:15:22 -04:00
template < typename T > inline T * aligned_new ( size_t size )
2010-02-28 14:32:57 +01:00
{
2010-10-25 10:15:22 -04:00
T * result = reinterpret_cast < T * > ( aligned_malloc ( sizeof ( T ) * size ) ) ;
return construct_elements_of_array ( result , size ) ;
2010-02-28 14:32:57 +01:00
}
2010-10-25 10:15:22 -04:00
template < typename T , bool Align > inline T * conditional_aligned_new ( size_t size )
2010-02-28 14:32:57 +01:00
{
2010-10-25 10:15:22 -04:00
T * result = reinterpret_cast < T * > ( conditional_aligned_malloc < Align > ( sizeof ( T ) * size ) ) ;
return construct_elements_of_array ( result , size ) ;
2010-02-28 14:32:57 +01:00
}
2010-10-25 10:15:22 -04:00
/** \internal Deletes objects constructed with aligned_new
2009-01-08 15:20:21 +00:00
* The \ a size parameters tells on how many objects to call the destructor of T .
*/
2010-10-25 10:15:22 -04:00
template < typename T > inline void aligned_delete ( T * ptr , size_t size )
2008-08-26 19:12:23 +00:00
{
2010-10-25 10:15:22 -04:00
destruct_elements_of_array < T > ( ptr , size ) ;
aligned_free ( ptr ) ;
2009-01-08 15:20:21 +00:00
}
2010-10-25 10:15:22 -04:00
/** \internal Deletes objects constructed with conditional_aligned_new
2009-01-08 15:20:21 +00:00
* The \ a size parameters tells on how many objects to call the destructor of T .
*/
2010-10-25 10:15:22 -04:00
template < typename T , bool Align > inline void conditional_aligned_delete ( T * ptr , size_t size )
2009-01-08 15:20:21 +00:00
{
2010-10-25 10:15:22 -04:00
destruct_elements_of_array < T > ( ptr , size ) ;
conditional_aligned_free < Align > ( ptr ) ;
2008-08-26 19:12:23 +00:00
}
2010-10-25 10:15:22 -04:00
template < typename T , bool Align > inline T * conditional_aligned_realloc_new ( T * pts , size_t new_size , size_t old_size )
2010-02-28 14:32:57 +01:00
{
2011-02-07 19:52:16 +01:00
if ( new_size < old_size )
destruct_elements_of_array ( pts + new_size , old_size - new_size ) ;
2010-10-25 10:15:22 -04:00
T * result = reinterpret_cast < T * > ( conditional_aligned_realloc < Align > ( reinterpret_cast < void * > ( pts ) , sizeof ( T ) * new_size , sizeof ( T ) * old_size ) ) ;
2011-02-07 19:52:16 +01:00
if ( new_size > old_size )
2010-10-25 10:15:22 -04:00
construct_elements_of_array ( result + old_size , new_size - old_size ) ;
2010-02-28 14:32:57 +01:00
return result ;
}
2011-01-26 17:56:49 +01:00
template < typename T , bool Align > inline T * conditional_aligned_new_auto ( size_t size )
{
T * result = reinterpret_cast < T * > ( conditional_aligned_malloc < Align > ( sizeof ( T ) * size ) ) ;
if ( NumTraits < T > : : RequireInitialization )
construct_elements_of_array ( result , size ) ;
return result ;
}
template < typename T , bool Align > inline T * conditional_aligned_realloc_new_auto ( T * pts , size_t new_size , size_t old_size )
{
2011-02-07 19:52:16 +01:00
if ( NumTraits < T > : : RequireInitialization & & ( new_size < old_size ) )
destruct_elements_of_array ( pts + new_size , old_size - new_size ) ;
2011-01-26 17:56:49 +01:00
T * result = reinterpret_cast < T * > ( conditional_aligned_realloc < Align > ( reinterpret_cast < void * > ( pts ) , sizeof ( T ) * new_size , sizeof ( T ) * old_size ) ) ;
2011-02-07 19:52:16 +01:00
if ( NumTraits < T > : : RequireInitialization & & ( new_size > old_size ) )
2011-01-26 17:56:49 +01:00
construct_elements_of_array ( result + old_size , new_size - old_size ) ;
return result ;
}
template < typename T , bool Align > inline void conditional_aligned_delete_auto ( T * ptr , size_t size )
{
if ( NumTraits < T > : : RequireInitialization )
destruct_elements_of_array < T > ( ptr , size ) ;
conditional_aligned_free < Align > ( ptr ) ;
}
2010-02-28 09:10:41 -05:00
/****************************************************************************/
2010-02-28 14:32:57 +01:00
/** \internal Returns the index of the first element of the array that is well aligned for vectorization.
2009-12-16 08:53:14 -05:00
*
2010-01-02 12:38:16 -05:00
* \ param array the address of the start of the array
* \ param size the size of the array
*
* \ note If no element of the array is well aligned , the size of the array is returned . Typically ,
* for example with SSE , " well aligned " means 16 - byte - aligned . If vectorization is disabled or if the
* packet size for the given scalar type is 1 , then everything is considered well - aligned .
*
* \ note If the scalar type is vectorizable , we rely on the following assumptions : sizeof ( Scalar ) is a
* power of 2 , the packet size in bytes is also a power of 2 , and is a multiple of sizeof ( Scalar ) . On the
* other hand , we do not assume that the array address is a multiple of sizeof ( Scalar ) , as that fails for
* example with Scalar = double on certain 32 - bit platforms , see bug # 79.
*
2010-10-25 10:15:22 -04:00
* There is also the variant first_aligned ( const MatrixBase & ) defined in DenseCoeffsBase . h .
2009-12-16 08:53:14 -05:00
*/
2010-05-30 16:00:58 -04:00
template < typename Scalar , typename Index >
2010-10-25 10:15:22 -04:00
inline static Index first_aligned ( const Scalar * array , Index size )
2008-08-26 19:12:23 +00:00
{
2010-10-25 10:15:22 -04:00
typedef typename packet_traits < Scalar > : : type Packet ;
enum { PacketSize = packet_traits < Scalar > : : size ,
2010-01-02 12:38:16 -05:00
PacketAlignedMask = PacketSize - 1
} ;
2010-02-25 21:01:52 -05:00
2010-01-02 12:38:16 -05:00
if ( PacketSize = = 1 )
{
// Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
2010-02-27 17:25:07 +01:00
// of the array have the same alignment.
2010-01-02 12:38:16 -05:00
return 0 ;
}
else if ( size_t ( array ) & ( sizeof ( Scalar ) - 1 ) )
{
// There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar.
// Consequently, no element of the array is well aligned.
return size ;
}
else
{
2010-05-30 16:00:58 -04:00
return std : : min < Index > ( ( PacketSize - ( Index ( ( size_t ( array ) / sizeof ( Scalar ) ) ) & PacketAlignedMask ) )
2010-01-02 12:38:16 -05:00
& PacketAlignedMask , size ) ;
}
2008-08-26 19:12:23 +00:00
}
2010-10-25 10:15:22 -04:00
} // end namespace internal
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of runtime stack allocation ( falling back to malloc ) * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2011-03-19 01:06:50 +01:00
// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
// to the appropriate stack allocation function
# ifndef EIGEN_ALLOCA
# if (defined __linux__)
# define EIGEN_ALLOCA alloca
# elif defined(_MSC_VER)
# define EIGEN_ALLOCA _alloca
# endif
# endif
namespace internal {
template < typename T > class stack_memory_destructor
{
public :
stack_memory_destructor ( T * ptr , size_t size ) : m_ptr ( ptr ) , m_size ( size ) { }
~ stack_memory_destructor ( )
{
Eigen : : internal : : destruct_elements_of_array < T > ( m_ptr , m_size ) ;
# ifdef EIGEN_ALLOCA
if ( sizeof ( T ) * m_size > EIGEN_STACK_ALLOCATION_LIMIT )
# endif
Eigen : : internal : : aligned_free ( m_ptr ) ;
}
protected :
T * m_ptr ;
size_t m_size ;
} ;
}
2008-08-26 19:12:23 +00:00
/** \internal
2011-03-19 01:06:50 +01:00
* Declares , allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
* if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT , and if stack allocation is supported by the platform
* ( currently , this is Linux and Visual Studio only ) . Otherwise the memory is allocated on the heap .
* The allocated buffer is automatically deleted when exiting the scope of this declaration .
* If BUFFER is non nul , then the declared variable is simply an alias for BUFFER , and no allocation / deletion occurs .
* Here is an example :
2008-08-26 19:12:23 +00:00
* \ code
2011-03-19 01:06:50 +01:00
* {
* ei_declare_aligned_stack_constructed_variable ( float , data , size , 0 ) ;
* // use data[0] to data[size-1]
* }
2008-08-26 19:12:23 +00:00
* \ endcode
2011-03-19 01:06:50 +01:00
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token .
2008-08-26 19:12:23 +00:00
*/
2011-03-19 01:06:50 +01:00
# ifdef EIGEN_ALLOCA
# define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
TYPE * NAME = ( BUFFER ) ! = 0 ? ( BUFFER ) \
: reinterpret_cast < TYPE * > ( \
( sizeof ( TYPE ) * SIZE < = EIGEN_STACK_ALLOCATION_LIMIT ) ? alloca ( sizeof ( TYPE ) * SIZE ) \
: Eigen : : internal : : aligned_malloc ( sizeof ( TYPE ) * SIZE ) ) ; \
if ( ( BUFFER ) = = 0 ) Eigen : : internal : : construct_elements_of_array ( NAME , SIZE ) ; \
Eigen : : internal : : stack_memory_destructor < TYPE > EIGEN_CAT ( stack_memory_destructor , __LINE__ ) ( ( BUFFER ) = = 0 ? NAME : 0 , SIZE )
2008-08-26 19:12:23 +00:00
# else
2011-03-19 01:06:50 +01:00
# define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
TYPE * NAME = ( BUFFER ) ! = 0 ? BUFFER : reinterpret_cast < TYPE * > ( Eigen : : internal : : aligned_malloc ( sizeof ( TYPE ) * SIZE ) ) ; \
if ( ( BUFFER ) = = 0 ) Eigen : : internal : : construct_elements_of_array ( NAME , SIZE ) ; \
Eigen : : internal : : stack_memory_destructor < TYPE > EIGEN_CAT ( stack_memory_destructor , __LINE__ ) ( ( BUFFER ) = = 0 ? NAME : 0 , SIZE )
# endif
2009-01-06 03:16:50 +00:00
2009-02-04 16:53:03 +00:00
2010-02-28 09:10:41 -05:00
/*****************************************************************************
* * * Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [ _IF ] * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-03-06 09:05:15 -05:00
# if EIGEN_ALIGN
2009-05-15 15:53:26 +00:00
# ifdef EIGEN_EXCEPTIONS
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
2010-02-12 08:58:29 -05:00
void * operator new ( size_t size , const std : : nothrow_t & ) throw ( ) { \
2010-10-25 10:15:22 -04:00
try { return Eigen : : internal : : conditional_aligned_malloc < NeedsToAlign > ( size ) ; } \
2009-05-15 15:53:26 +00:00
catch ( . . . ) { return 0 ; } \
return 0 ; \
}
# else
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
2010-02-12 08:58:29 -05:00
void * operator new ( size_t size , const std : : nothrow_t & ) throw ( ) { \
2010-10-25 10:15:22 -04:00
return Eigen : : internal : : conditional_aligned_malloc < NeedsToAlign > ( size ) ; \
2009-05-15 15:53:26 +00:00
}
# endif
2009-02-04 16:53:03 +00:00
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
2010-02-12 08:58:29 -05:00
void * operator new ( size_t size ) { \
2010-10-25 10:15:22 -04:00
return Eigen : : internal : : conditional_aligned_malloc < NeedsToAlign > ( size ) ; \
2009-02-04 16:53:03 +00:00
} \
2010-02-12 08:58:29 -05:00
void * operator new [ ] ( size_t size ) { \
2010-10-25 10:15:22 -04:00
return Eigen : : internal : : conditional_aligned_malloc < NeedsToAlign > ( size ) ; \
2009-02-04 16:53:03 +00:00
} \
2010-10-25 10:15:22 -04:00
void operator delete ( void * ptr ) throw ( ) { Eigen : : internal : : conditional_aligned_free < NeedsToAlign > ( ptr ) ; } \
void operator delete [ ] ( void * ptr ) throw ( ) { Eigen : : internal : : conditional_aligned_free < NeedsToAlign > ( ptr ) ; } \
2009-05-07 20:33:48 +00:00
/* in-place new and delete. since (at least afaik) there is no actual */ \
/* memory allocated we can safely let the default implementation handle */ \
/* this particular case. */ \
2010-02-12 08:58:29 -05:00
static void * operator new ( size_t size , void * ptr ) { return : : operator new ( size , ptr ) ; } \
2009-05-07 20:33:48 +00:00
void operator delete ( void * memory , void * ptr ) throw ( ) { return : : operator delete ( memory , ptr ) ; } \
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
2009-05-15 15:53:26 +00:00
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW ( NeedsToAlign ) \
2009-05-07 20:33:48 +00:00
void operator delete ( void * ptr , const std : : nothrow_t & ) throw ( ) { \
2010-10-25 10:15:22 -04:00
Eigen : : internal : : conditional_aligned_free < NeedsToAlign > ( ptr ) ; \
2009-05-07 20:33:48 +00:00
} \
2010-10-25 10:15:22 -04:00
typedef void eigen_aligned_operator_new_marker_type ;
2009-02-04 16:53:03 +00:00
# else
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
# endif
2009-01-08 15:20:21 +00:00
2009-01-08 15:37:13 +00:00
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
2009-01-12 16:06:04 +00:00
# define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
2009-01-08 15:37:13 +00:00
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF ( ( ( Size ) ! = Eigen : : Dynamic ) & & ( ( sizeof ( Scalar ) * ( Size ) ) % 16 = = 0 ) )
2009-01-08 15:20:21 +00:00
2010-02-28 09:10:41 -05:00
/****************************************************************************/
2009-02-04 16:53:03 +00:00
2009-01-09 00:55:53 +00:00
/** \class aligned_allocator
2010-07-06 13:10:08 +01:00
* \ ingroup Core_Module
2009-01-09 00:55:53 +00:00
*
2010-02-28 14:32:57 +01:00
* \ brief STL compatible allocator to use with with 16 byte aligned types
2009-01-09 00:55:53 +00:00
*
* Example :
* \ code
2009-01-10 02:50:09 +00:00
* // Matrix4f requires 16 bytes alignment:
2010-11-12 12:06:24 +00:00
* std : : map < int , Matrix4f , std : : less < int > ,
* aligned_allocator < std : : pair < const int , Matrix4f > > > my_map_mat4 ;
2009-01-09 00:55:53 +00:00
* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
2009-01-10 02:50:09 +00:00
* std : : map < int , Vector3f > my_map_vec3 ;
2009-01-09 00:55:53 +00:00
* \ endcode
*
2010-11-12 12:06:24 +00:00
* \ sa \ ref TopicStlContainers .
2009-01-09 00:55:53 +00:00
*/
template < class T >
class aligned_allocator
{
public :
2010-02-12 08:58:29 -05:00
typedef size_t size_type ;
2010-02-10 13:24:47 +01:00
typedef std : : ptrdiff_t difference_type ;
2009-01-09 00:55:53 +00:00
typedef T * pointer ;
typedef const T * const_pointer ;
typedef T & reference ;
typedef const T & const_reference ;
typedef T value_type ;
template < class U >
struct rebind
{
typedef aligned_allocator < U > other ;
} ;
2009-11-07 09:07:23 +01:00
pointer address ( reference value ) const
2009-01-09 00:55:53 +00:00
{
return & value ;
}
2009-11-07 09:07:23 +01:00
const_pointer address ( const_reference value ) const
2009-01-09 00:55:53 +00:00
{
return & value ;
}
2009-11-07 09:07:23 +01:00
aligned_allocator ( ) throw ( )
2009-01-09 00:55:53 +00:00
{
}
2009-11-07 09:07:23 +01:00
aligned_allocator ( const aligned_allocator & ) throw ( )
2009-01-09 00:55:53 +00:00
{
}
template < class U >
2009-11-07 09:07:23 +01:00
aligned_allocator ( const aligned_allocator < U > & ) throw ( )
2009-01-09 00:55:53 +00:00
{
}
2009-11-07 09:07:23 +01:00
~ aligned_allocator ( ) throw ( )
2009-01-09 00:55:53 +00:00
{
}
2009-11-07 09:07:23 +01:00
size_type max_size ( ) const throw ( )
2009-01-09 00:55:53 +00:00
{
return std : : numeric_limits < size_type > : : max ( ) ;
}
pointer allocate ( size_type num , const_pointer * hint = 0 )
{
static_cast < void > ( hint ) ; // suppress unused variable warning
2010-10-25 10:15:22 -04:00
return static_cast < pointer > ( internal : : aligned_malloc ( num * sizeof ( T ) ) ) ;
2009-01-09 00:55:53 +00:00
}
2009-11-07 09:07:23 +01:00
void construct ( pointer p , const T & value )
2009-01-09 00:55:53 +00:00
{
: : new ( p ) T ( value ) ;
}
2009-11-07 09:07:23 +01:00
void destroy ( pointer p )
2009-01-09 00:55:53 +00:00
{
p - > ~ T ( ) ;
}
2009-11-07 09:07:23 +01:00
void deallocate ( pointer p , size_type /*num*/ )
2009-01-09 00:55:53 +00:00
{
2010-10-25 10:15:22 -04:00
internal : : aligned_free ( p ) ;
2009-01-09 00:55:53 +00:00
}
2009-11-07 09:07:23 +01:00
2010-01-04 23:21:04 +01:00
bool operator ! = ( const aligned_allocator < T > & ) const
2009-04-09 21:22:02 +00:00
{ return false ; }
2009-11-07 09:07:23 +01:00
2010-01-04 23:21:04 +01:00
bool operator = = ( const aligned_allocator < T > & ) const
2009-04-09 21:22:02 +00:00
{ return true ; }
2009-01-09 00:55:53 +00:00
} ;
2010-06-21 11:59:37 +02:00
//---------- Cache sizes ----------
2010-12-28 13:46:39 +01:00
# if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
2011-01-05 02:43:43 +01:00
# if defined(__PIC__) && defined(__i386__)
// Case for x86 with PIC
2010-06-24 09:29:43 +02:00
# define EIGEN_CPUID(abcd,func,id) \
__asm__ __volatile__ ( " xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi " : " =a " ( abcd [ 0 ] ) , " =S " ( abcd [ 1 ] ) , " =c " ( abcd [ 2 ] ) , " =d " ( abcd [ 3 ] ) : " a " ( func ) , " c " ( id ) ) ;
2010-12-28 13:46:39 +01:00
# else
2011-01-05 02:43:43 +01:00
// Case for x86_64 or x86 w/o PIC
2010-06-23 16:34:51 +02:00
# define EIGEN_CPUID(abcd,func,id) \
__asm__ __volatile__ ( " cpuid " : " =a " ( abcd [ 0 ] ) , " =b " ( abcd [ 1 ] ) , " =c " ( abcd [ 2 ] ) , " =d " ( abcd [ 3 ] ) : " a " ( func ) , " c " ( id ) ) ;
2010-07-06 11:02:01 +02:00
# endif
2010-06-24 09:55:53 +02:00
# elif defined(_MSC_VER)
2011-01-17 11:17:45 -05:00
# if (_MSC_VER > 1500)
2010-06-24 09:55:53 +02:00
# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
2010-07-06 11:02:01 +02:00
# endif
2010-06-24 10:05:24 +02:00
# endif
2010-06-21 11:59:37 +02:00
2010-10-25 10:15:22 -04:00
namespace internal {
2010-06-24 09:45:17 +02:00
# ifdef EIGEN_CPUID
2010-10-25 10:15:22 -04:00
inline bool cpuid_is_vendor ( int abcd [ 4 ] , const char * vendor )
2010-06-23 17:14:06 +02:00
{
return abcd [ 1 ] = = ( ( int * ) ( vendor ) ) [ 0 ] & & abcd [ 3 ] = = ( ( int * ) ( vendor ) ) [ 1 ] & & abcd [ 2 ] = = ( ( int * ) ( vendor ) ) [ 2 ] ;
}
2010-10-25 10:15:22 -04:00
inline void queryCacheSizes_intel_direct ( int & l1 , int & l2 , int & l3 )
2010-06-23 17:14:06 +02:00
{
int abcd [ 4 ] ;
l1 = l2 = l3 = 0 ;
int cache_id = 0 ;
int cache_type = 0 ;
do {
2010-06-26 23:15:06 +02:00
abcd [ 0 ] = abcd [ 1 ] = abcd [ 2 ] = abcd [ 3 ] = 0 ;
2010-06-23 17:14:06 +02:00
EIGEN_CPUID ( abcd , 0x4 , cache_id ) ;
cache_type = ( abcd [ 0 ] & 0x0F ) > > 0 ;
if ( cache_type = = 1 | | cache_type = = 3 ) // data or unified cache
{
int cache_level = ( abcd [ 0 ] & 0xE0 ) > > 5 ; // A[7:5]
int ways = ( abcd [ 1 ] & 0xFFC00000 ) > > 22 ; // B[31:22]
int partitions = ( abcd [ 1 ] & 0x003FF000 ) > > 12 ; // B[21:12]
int line_size = ( abcd [ 1 ] & 0x00000FFF ) > > 0 ; // B[11:0]
int sets = ( abcd [ 2 ] ) ; // C[31:0]
2010-06-24 09:29:43 +02:00
2010-06-23 17:14:06 +02:00
int cache_size = ( ways + 1 ) * ( partitions + 1 ) * ( line_size + 1 ) * ( sets + 1 ) ;
2010-06-24 09:29:43 +02:00
2010-06-23 17:14:06 +02:00
switch ( cache_level )
{
case 1 : l1 = cache_size ; break ;
case 2 : l2 = cache_size ; break ;
case 3 : l3 = cache_size ; break ;
default : break ;
}
}
cache_id + + ;
2010-06-27 00:17:38 +02:00
} while ( cache_type > 0 & & cache_id < 16 ) ;
}
2010-10-25 10:15:22 -04:00
inline void queryCacheSizes_intel_codes ( int & l1 , int & l2 , int & l3 )
2010-06-27 00:17:38 +02:00
{
int abcd [ 4 ] ;
abcd [ 0 ] = abcd [ 1 ] = abcd [ 2 ] = abcd [ 3 ] = 0 ;
l1 = l2 = l3 = 0 ;
EIGEN_CPUID ( abcd , 0x00000002 , 0 ) ;
unsigned char * bytes = reinterpret_cast < unsigned char * > ( abcd ) + 2 ;
bool check_for_p2_core2 = false ;
for ( int i = 0 ; i < 14 ; + + i )
{
switch ( bytes [ i ] )
{
case 0x0A : l1 = 8 ; break ; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
case 0x0C : l1 = 16 ; break ; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
case 0x0E : l1 = 24 ; break ; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
case 0x10 : l1 = 16 ; break ; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
case 0x15 : l1 = 16 ; break ; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
case 0x2C : l1 = 32 ; break ; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
case 0x30 : l1 = 32 ; break ; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
case 0x60 : l1 = 16 ; break ; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
case 0x66 : l1 = 8 ; break ; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
case 0x67 : l1 = 16 ; break ; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
case 0x68 : l1 = 32 ; break ; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
case 0x1A : l2 = 96 ; break ; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
case 0x22 : l3 = 512 ; break ; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
case 0x23 : l3 = 1024 ; break ; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
case 0x25 : l3 = 2048 ; break ; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
case 0x29 : l3 = 4096 ; break ; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
case 0x39 : l2 = 128 ; break ; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
case 0x3A : l2 = 192 ; break ; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
case 0x3B : l2 = 128 ; break ; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
case 0x3C : l2 = 256 ; break ; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
case 0x3D : l2 = 384 ; break ; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
case 0x3E : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
case 0x40 : l2 = 0 ; break ; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
case 0x41 : l2 = 128 ; break ; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
case 0x42 : l2 = 256 ; break ; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
case 0x43 : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
case 0x44 : l2 = 1024 ; break ; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
case 0x45 : l2 = 2048 ; break ; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
case 0x46 : l3 = 4096 ; break ; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
case 0x47 : l3 = 8192 ; break ; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
case 0x48 : l2 = 3072 ; break ; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
case 0x49 : if ( l2 ! = 0 ) l3 = 4096 ; else { check_for_p2_core2 = true ; l3 = l2 = 4096 ; } break ; // code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
case 0x4A : l3 = 6144 ; break ; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
case 0x4B : l3 = 8192 ; break ; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
case 0x4C : l3 = 12288 ; break ; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
case 0x4D : l3 = 16384 ; break ; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
case 0x4E : l2 = 6144 ; break ; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
case 0x78 : l2 = 1024 ; break ; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
case 0x79 : l2 = 128 ; break ; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
case 0x7A : l2 = 256 ; break ; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
case 0x7B : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
case 0x7C : l2 = 1024 ; break ; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
case 0x7D : l2 = 2048 ; break ; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
case 0x7E : l2 = 256 ; break ; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
case 0x7F : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
case 0x80 : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
case 0x81 : l2 = 128 ; break ; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
case 0x82 : l2 = 256 ; break ; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
case 0x83 : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
case 0x84 : l2 = 1024 ; break ; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
case 0x85 : l2 = 2048 ; break ; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
case 0x86 : l2 = 512 ; break ; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
case 0x87 : l2 = 1024 ; break ; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
case 0x88 : l3 = 2048 ; break ; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
case 0x89 : l3 = 4096 ; break ; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
case 0x8A : l3 = 8192 ; break ; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
case 0x8D : l3 = 3072 ; break ; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
default : break ;
}
}
if ( check_for_p2_core2 & & l2 = = l3 )
l3 = 0 ;
l1 * = 1024 ;
l2 * = 1024 ;
l3 * = 1024 ;
}
2010-10-25 10:15:22 -04:00
inline void queryCacheSizes_intel ( int & l1 , int & l2 , int & l3 , int max_std_funcs )
2010-06-27 00:17:38 +02:00
{
if ( max_std_funcs > = 4 )
2010-10-25 10:15:22 -04:00
queryCacheSizes_intel_direct ( l1 , l2 , l3 ) ;
2010-06-27 00:17:38 +02:00
else
2010-10-25 10:15:22 -04:00
queryCacheSizes_intel_codes ( l1 , l2 , l3 ) ;
2010-06-23 17:14:06 +02:00
}
2010-10-25 10:15:22 -04:00
inline void queryCacheSizes_amd ( int & l1 , int & l2 , int & l3 )
2010-06-23 17:14:06 +02:00
{
int abcd [ 4 ] ;
2010-06-26 23:15:06 +02:00
abcd [ 0 ] = abcd [ 1 ] = abcd [ 2 ] = abcd [ 3 ] = 0 ;
2010-06-23 17:14:06 +02:00
EIGEN_CPUID ( abcd , 0x80000005 , 0 ) ;
l1 = ( abcd [ 2 ] > > 24 ) * 1024 ; // C[31:24] = L1 size in KB
2010-06-26 23:15:06 +02:00
abcd [ 0 ] = abcd [ 1 ] = abcd [ 2 ] = abcd [ 3 ] = 0 ;
2010-06-23 17:14:06 +02:00
EIGEN_CPUID ( abcd , 0x80000006 , 0 ) ;
l2 = ( abcd [ 2 ] > > 16 ) * 1024 ; // C[31;16] = l2 cache size in KB
l3 = ( ( abcd [ 3 ] & 0xFFFC000 ) > > 18 ) * 512 * 1024 ; // D[31;18] = l3 cache size in 512KB
}
2010-06-24 09:45:17 +02:00
# endif
2010-06-23 17:14:06 +02:00
2010-06-21 11:59:37 +02:00
/** \internal
2010-06-23 16:34:51 +02:00
* Queries and returns the cache sizes in Bytes of the L1 , L2 , and L3 data caches respectively */
2010-10-25 10:15:22 -04:00
inline void queryCacheSizes ( int & l1 , int & l2 , int & l3 )
2010-06-21 11:59:37 +02:00
{
2010-06-21 23:44:20 +02:00
# ifdef EIGEN_CPUID
2010-06-21 11:59:37 +02:00
int abcd [ 4 ] ;
2010-06-24 09:29:43 +02:00
2010-06-23 17:14:06 +02:00
// identify the CPU vendor
2010-06-23 16:34:51 +02:00
EIGEN_CPUID ( abcd , 0x0 , 0 ) ;
2010-06-27 00:17:38 +02:00
int max_std_funcs = abcd [ 1 ] ;
2010-10-25 10:15:22 -04:00
if ( cpuid_is_vendor ( abcd , " GenuineIntel " ) )
queryCacheSizes_intel ( l1 , l2 , l3 , max_std_funcs ) ;
else if ( cpuid_is_vendor ( abcd , " AuthenticAMD " ) | | cpuid_is_vendor ( abcd , " AMDisbetter! " ) )
queryCacheSizes_amd ( l1 , l2 , l3 ) ;
2010-06-23 17:14:06 +02:00
else
// by default let's use Intel's API
2010-10-25 10:15:22 -04:00
queryCacheSizes_intel ( l1 , l2 , l3 , max_std_funcs ) ;
2010-06-24 09:29:43 +02:00
2010-06-23 17:14:06 +02:00
// here is the list of other vendors:
2010-10-25 10:15:22 -04:00
// ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
// ||cpuid_is_vendor(abcd,"CyrixInstead")
// ||cpuid_is_vendor(abcd,"CentaurHauls")
// ||cpuid_is_vendor(abcd,"GenuineTMx86")
// ||cpuid_is_vendor(abcd,"TransmetaCPU")
// ||cpuid_is_vendor(abcd,"RiseRiseRise")
// ||cpuid_is_vendor(abcd,"Geode by NSC")
// ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
// ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
// ||cpuid_is_vendor(abcd,"NexGenDriven")
2010-07-05 21:27:15 +02:00
# else
2010-07-05 16:44:41 +03:00
l1 = l2 = l3 = - 1 ;
2010-07-05 21:27:15 +02:00
# endif
2010-06-21 11:59:37 +02:00
}
2010-06-23 16:34:51 +02:00
/** \internal
* \ returns the size in Bytes of the L1 data cache */
2010-10-25 10:15:22 -04:00
inline int queryL1CacheSize ( )
2010-06-23 16:34:51 +02:00
{
int l1 ( - 1 ) , l2 , l3 ;
2010-10-25 10:15:22 -04:00
queryCacheSizes ( l1 , l2 , l3 ) ;
2010-06-23 16:34:51 +02:00
return l1 ;
}
2010-06-21 11:59:37 +02:00
/** \internal
* \ returns the size in Bytes of the L2 or L3 cache if this later is present */
2010-10-25 10:15:22 -04:00
inline int queryTopLevelCacheSize ( )
2010-06-21 11:59:37 +02:00
{
2010-06-23 16:34:51 +02:00
int l1 , l2 ( - 1 ) , l3 ( - 1 ) ;
2010-10-25 10:15:22 -04:00
queryCacheSizes ( l1 , l2 , l3 ) ;
2010-06-21 11:59:37 +02:00
return std : : max ( l2 , l3 ) ;
}
2010-10-25 10:15:22 -04:00
} // end namespace internal
2008-08-26 19:12:23 +00:00
# endif // EIGEN_MEMORY_H