diff options
Diffstat (limited to 'Eigen/src/Core/util/Memory.h')
-rw-r--r-- | Eigen/src/Core/util/Memory.h | 268 |
1 files changed, 41 insertions, 227 deletions
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 875318cdb..c634d7ea0 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -63,28 +63,14 @@ namespace Eigen { namespace internal { -EIGEN_DEVICE_FUNC +EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc() { #ifdef EIGEN_EXCEPTIONS throw std::bad_alloc(); #else std::size_t huge = static_cast<std::size_t>(-1); - #if defined(EIGEN_HIPCC) - // - // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining), - // and as a consequence the code in the #else block triggers the hipcc warning : - // "no overloaded function has restriction specifiers that are compatible with the ambient context" - // - // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects - // the same on "operator new" - // Reverting code back to the old version in this #if block for the hipcc compiler - // new int[huge]; - #else - void* unused = ::operator new(huge); - EIGEN_UNUSED_VARIABLE(unused); - #endif #endif } @@ -97,26 +83,19 @@ inline void throw_std_bad_alloc() /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned. * Fast, but wastes 16 additional bytes of memory. Does not throw any exception. */ -EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) +inline void* handmade_aligned_malloc(std::size_t size) { - eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2"); - - EIGEN_USING_STD(malloc) - void *original = malloc(size+alignment); - + void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES); if (original == 0) return 0; - void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment); + void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES); *(reinterpret_cast<void**>(aligned) - 1) = original; return aligned; } /** \internal Frees memory allocated with handmade_aligned_malloc */ -EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr) +inline void handmade_aligned_free(void *ptr) { - if (ptr) { - EIGEN_USING_STD(free) - free(*(reinterpret_cast<void**>(ptr) - 1)); - } + if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1)); } /** \internal @@ -135,7 +114,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = void *previous_aligned = static_cast<char *>(original)+previous_offset; if(aligned!=previous_aligned) std::memmove(aligned, previous_aligned, size); - + *(reinterpret_cast<void**>(aligned) - 1) = original; return aligned; } @@ -163,7 +142,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() { eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)"); } -#else +#else EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {} #endif @@ -177,12 +156,9 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) void *result; #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED - - EIGEN_USING_STD(malloc) - result = malloc(size); - + result = std::malloc(size); #if EIGEN_DEFAULT_ALIGN_BYTES==16 - eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator."); + eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator."); #endif #else result = handmade_aligned_malloc(size); @@ -198,10 +174,7 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) { #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED - - EIGEN_USING_STD(free) - free(ptr); - + std::free(ptr); #else handmade_aligned_free(ptr); #endif @@ -214,7 +187,7 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) */ inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size) { - EIGEN_UNUSED_VARIABLE(old_size) + EIGEN_UNUSED_VARIABLE(old_size); void *result; #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED @@ -245,9 +218,7 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std: { check_that_malloc_is_allowed(); - EIGEN_USING_STD(malloc) - void *result = malloc(size); - + void *result = std::malloc(size); if(!result && size) throw_std_bad_alloc(); return result; @@ -261,8 +232,7 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr) { - EIGEN_USING_STD(free) - free(ptr); + std::free(ptr); } template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) @@ -361,7 +331,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size) { destruct_elements_of_array<T>(ptr, size); - Eigen::internal::aligned_free(ptr); + aligned_free(ptr); } /** \internal Deletes objects constructed with conditional_aligned_new @@ -501,8 +471,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index } /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size - */ -template<typename Index> + */ +template<typename Index> inline Index first_multiple(Index size, Index base) { return ((size+base-1)/base)*base; @@ -523,7 +493,6 @@ template<typename T> struct smart_copy_helper<T,true> { IntPtr size = IntPtr(end)-IntPtr(start); if(size==0) return; eigen_internal_assert(start!=0 && end!=0 && target!=0); - EIGEN_USING_STD(memcpy) memcpy(target, start, size); } }; @@ -533,7 +502,7 @@ template<typename T> struct smart_copy_helper<T,false> { { std::copy(start, end, target); } }; -// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. +// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. template<typename T, bool UseMemmove> struct smart_memmove_helper; template<typename T> void smart_memmove(const T* start, const T* end, T* target) @@ -553,30 +522,19 @@ template<typename T> struct smart_memmove_helper<T,true> { template<typename T> struct smart_memmove_helper<T,false> { static inline void run(const T* start, const T* end, T* target) - { + { if (UIntPtr(target) < UIntPtr(start)) { std::copy(start, end, target); } - else + else { std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); - std::copy_backward(start, end, target + count); + std::copy_backward(start, end, target + count); } } }; -#if EIGEN_HAS_RVALUE_REFERENCES -template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target) -{ - return std::move(start, end, target); -} -#else -template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target) -{ - return std::copy(start, end, target); -} -#endif /***************************************************************************** *** Implementation of runtime stack allocation (falling back to malloc) *** @@ -584,7 +542,7 @@ template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA // to the appropriate stack allocation function -#if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE +#ifndef EIGEN_ALLOCA #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca) #define EIGEN_ALLOCA alloca #elif EIGEN_COMP_MSVC @@ -592,15 +550,6 @@ template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target #endif #endif -// With clang -Oz -mthumb, alloca changes the stack pointer in a way that is -// not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because -// the compiler still emits bad code because stack allocation checks use "<=". -// TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772 -// is fixed. -#if defined(__clang__) && defined(__thumb__) - #undef EIGEN_ALLOCA -#endif - // This helper class construct the allocated memory, and takes care of destructing and freeing the handled data // at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions. template<typename T> class aligned_stack_memory_handler : noncopyable @@ -612,14 +561,12 @@ template<typename T> class aligned_stack_memory_handler : noncopyable * In this case, the buffer elements will also be destructed when this handler will be destructed. * Finally, if \a dealloc is true, then the pointer \a ptr is freed. **/ - EIGEN_DEVICE_FUNC aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc) : m_ptr(ptr), m_size(size), m_deallocate(dealloc) { if(NumTraits<T>::RequireInitialization && m_ptr) Eigen::internal::construct_elements_of_array(m_ptr, size); } - EIGEN_DEVICE_FUNC ~aligned_stack_memory_handler() { if(NumTraits<T>::RequireInitialization && m_ptr) @@ -633,60 +580,6 @@ template<typename T> class aligned_stack_memory_handler : noncopyable bool m_deallocate; }; -#ifdef EIGEN_ALLOCA - -template<typename Xpr, int NbEvaluations, - bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic - > -struct local_nested_eval_wrapper -{ - static const bool NeedExternalBuffer = false; - typedef typename Xpr::Scalar Scalar; - typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType; - ObjectType object; - - EIGEN_DEVICE_FUNC - local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr) - { - EIGEN_UNUSED_VARIABLE(ptr); - eigen_internal_assert(ptr==0); - } -}; - -template<typename Xpr, int NbEvaluations> -struct local_nested_eval_wrapper<Xpr,NbEvaluations,true> -{ - static const bool NeedExternalBuffer = true; - typedef typename Xpr::Scalar Scalar; - typedef typename plain_object_eval<Xpr>::type PlainObject; - typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType; - ObjectType object; - - EIGEN_DEVICE_FUNC - local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) - : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()), - m_deallocate(ptr==0) - { - if(NumTraits<Scalar>::RequireInitialization && object.data()) - Eigen::internal::construct_elements_of_array(object.data(), object.size()); - object = xpr; - } - - EIGEN_DEVICE_FUNC - ~local_nested_eval_wrapper() - { - if(NumTraits<Scalar>::RequireInitialization && object.data()) - Eigen::internal::destruct_elements_of_array(object.data(), object.size()); - if(m_deallocate) - Eigen::internal::aligned_free(object.data()); - } - -private: - bool m_deallocate; -}; - -#endif // EIGEN_ALLOCA - template<typename T> class scoped_array : noncopyable { T* m_ptr; @@ -710,15 +603,13 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) { std::swap(a.ptr(),b.ptr()); } - + } // end namespace internal /** \internal - * - * The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates, - * and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack - * if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform - * (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap. + * Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack + * if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform + * (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap. * The allocated buffer is automatically deleted when exiting the scope of this declaration. * If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs. * Here is an example: @@ -729,17 +620,9 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) * } * \endcode * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token. - * - * The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to - * \code - * typename internal::nested_eval<XPRT_T,N>::type NAME(XPR); - * \endcode - * with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown. - * This is accomplished through alloca if this later is supported and if the required number of bytes - * is below EIGEN_STACK_ALLOCATION_LIMIT. */ #ifdef EIGEN_ALLOCA - + #if EIGEN_DEFAULT_ALIGN_BYTES>0 // We always manually re-align the result of EIGEN_ALLOCA. // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. @@ -756,23 +639,13 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) - - #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \ - Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \ - ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \ - ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \ - typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object) - #else #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true) - - -#define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR) - + #endif @@ -780,56 +653,32 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] *** *****************************************************************************/ -#if EIGEN_HAS_CXX17_OVERALIGN - -// C++17 -> no need to bother about alignment anymore :) - -#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) -#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) -#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW -#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) - -#else - -// HIP does not support new/delete on device. -#if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE) +#if EIGEN_MAX_ALIGN_BYTES!=0 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ - EIGEN_DEVICE_FUNC \ void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \ EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \ EIGEN_CATCH (...) { return 0; } \ } #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \ - EIGEN_DEVICE_FUNC \ void *operator new(std::size_t size) { \ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ } \ - EIGEN_DEVICE_FUNC \ void *operator new[](std::size_t size) { \ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ } \ - EIGEN_DEVICE_FUNC \ void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ - EIGEN_DEVICE_FUNC \ void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ - EIGEN_DEVICE_FUNC \ void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ - EIGEN_DEVICE_FUNC \ void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ /* in-place new and delete. since (at least afaik) there is no actual */ \ /* memory allocated we can safely let the default implementation handle */ \ /* this particular case. */ \ - EIGEN_DEVICE_FUNC \ static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \ - EIGEN_DEVICE_FUNC \ static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \ - EIGEN_DEVICE_FUNC \ void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \ - EIGEN_DEVICE_FUNC \ void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \ /* nothrow-new (returns zero instead of std::bad_alloc) */ \ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ - EIGEN_DEVICE_FUNC \ void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \ } \ @@ -839,34 +688,20 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) #endif #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true) -#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool( \ - ((Size)!=Eigen::Dynamic) && \ - (((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES )==0)) || \ - ((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) || \ - ((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0)) ))) - -#endif +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0))) /****************************************************************************/ /** \class aligned_allocator * \ingroup Core_Module * -* \brief STL compatible allocator to use with types requiring a non standrad alignment. -* -* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd. -* By default, it will thus provide at least 16 bytes alignment and more in following cases: -* - 32 bytes alignment if AVX is enabled. -* - 64 bytes alignment if AVX512 is enabled. -* -* This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented -* \link TopicPreprocessorDirectivesPerformance there \endlink. +* \brief STL compatible allocator to use with with 16 byte aligned types * * Example: * \code * // Matrix4f requires 16 bytes alignment: -* std::map< int, Matrix4f, std::less<int>, +* std::map< int, Matrix4f, std::less<int>, * aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4; * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator: * std::map< int, Vector3f > my_map_vec3; @@ -901,15 +736,6 @@ public: ~aligned_allocator() {} - #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0) - // In gcc std::allocator::max_size() is bugged making gcc triggers a warning: - // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807 - // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544 - size_type max_size() const { - return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T); - } - #endif - pointer allocate(size_type num, const void* /*hint*/ = 0) { internal::check_size_for_overflow<T>(num); @@ -1072,32 +898,20 @@ inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs) { if(max_std_funcs>=4) queryCacheSizes_intel_direct(l1,l2,l3); - else if(max_std_funcs>=2) - queryCacheSizes_intel_codes(l1,l2,l3); else - l1 = l2 = l3 = 0; + queryCacheSizes_intel_codes(l1,l2,l3); } inline void queryCacheSizes_amd(int& l1, int& l2, int& l3) { int abcd[4]; abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; - - // First query the max supported function. - EIGEN_CPUID(abcd,0x80000000,0); - if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006)) - { - EIGEN_CPUID(abcd,0x80000005,0); - l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB - abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; - EIGEN_CPUID(abcd,0x80000006,0); - l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB - l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB - } - else - { - l1 = l2 = l3 = 0; - } + EIGEN_CPUID(abcd,0x80000005,0); + l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB + abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; + EIGEN_CPUID(abcd,0x80000006,0); + l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB + l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB } #endif @@ -1113,7 +927,7 @@ inline void queryCacheSizes(int& l1, int& l2, int& l3) // identify the CPU vendor EIGEN_CPUID(abcd,0x0,0); - int max_std_funcs = abcd[0]; + int max_std_funcs = abcd[1]; if(cpuid_is_vendor(abcd,GenuineIntel)) queryCacheSizes_intel(l1,l2,l3,max_std_funcs); else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_)) |