diff options
Diffstat (limited to 'Eigen/src/Core/util/Memory.h')
-rw-r--r-- | Eigen/src/Core/util/Memory.h | 268 |
1 files changed, 227 insertions, 41 deletions
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index c634d7ea0..875318cdb 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -63,14 +63,28 @@ namespace Eigen { namespace internal { -EIGEN_DEVICE_FUNC +EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc() { #ifdef EIGEN_EXCEPTIONS throw std::bad_alloc(); #else std::size_t huge = static_cast<std::size_t>(-1); + #if defined(EIGEN_HIPCC) + // + // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining), + // and as a consequence the code in the #else block triggers the hipcc warning : + // "no overloaded function has restriction specifiers that are compatible with the ambient context" + // + // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects + // the same on "operator new" + // Reverting code back to the old version in this #if block for the hipcc compiler + // new int[huge]; + #else + void* unused = ::operator new(huge); + EIGEN_UNUSED_VARIABLE(unused); + #endif #endif } @@ -83,19 +97,26 @@ inline void throw_std_bad_alloc() /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned. * Fast, but wastes 16 additional bytes of memory. Does not throw any exception. */ -inline void* handmade_aligned_malloc(std::size_t size) +EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) { - void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES); + eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2"); + + EIGEN_USING_STD(malloc) + void *original = malloc(size+alignment); + if (original == 0) return 0; - void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES); + void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment); *(reinterpret_cast<void**>(aligned) - 1) = original; return aligned; } /** \internal Frees memory allocated with handmade_aligned_malloc */ -inline void handmade_aligned_free(void *ptr) +EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr) { - if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1)); + if (ptr) { + EIGEN_USING_STD(free) + free(*(reinterpret_cast<void**>(ptr) - 1)); + } } /** \internal @@ -114,7 +135,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = void *previous_aligned = static_cast<char *>(original)+previous_offset; if(aligned!=previous_aligned) std::memmove(aligned, previous_aligned, size); - + *(reinterpret_cast<void**>(aligned) - 1) = original; return aligned; } @@ -142,7 +163,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() { eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)"); } -#else +#else EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {} #endif @@ -156,9 +177,12 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) void *result; #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED - result = std::malloc(size); + + EIGEN_USING_STD(malloc) + result = malloc(size); + #if EIGEN_DEFAULT_ALIGN_BYTES==16 - eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator."); + eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator."); #endif #else result = handmade_aligned_malloc(size); @@ -174,7 +198,10 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) { #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED - std::free(ptr); + + EIGEN_USING_STD(free) + free(ptr); + #else handmade_aligned_free(ptr); #endif @@ -187,7 +214,7 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) */ inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size) { - EIGEN_UNUSED_VARIABLE(old_size); + EIGEN_UNUSED_VARIABLE(old_size) void *result; #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED @@ -218,7 +245,9 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std: { check_that_malloc_is_allowed(); - void *result = std::malloc(size); + EIGEN_USING_STD(malloc) + void *result = malloc(size); + if(!result && size) throw_std_bad_alloc(); return result; @@ -232,7 +261,8 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr) { - std::free(ptr); + EIGEN_USING_STD(free) + free(ptr); } template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) @@ -331,7 +361,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size) { destruct_elements_of_array<T>(ptr, size); - aligned_free(ptr); + Eigen::internal::aligned_free(ptr); } /** \internal Deletes objects constructed with conditional_aligned_new @@ -471,8 +501,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index } /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size - */ -template<typename Index> + */ +template<typename Index> inline Index first_multiple(Index size, Index base) { return ((size+base-1)/base)*base; @@ -493,6 +523,7 @@ template<typename T> struct smart_copy_helper<T,true> { IntPtr size = IntPtr(end)-IntPtr(start); if(size==0) return; eigen_internal_assert(start!=0 && end!=0 && target!=0); + EIGEN_USING_STD(memcpy) memcpy(target, start, size); } }; @@ -502,7 +533,7 @@ template<typename T> struct smart_copy_helper<T,false> { { std::copy(start, end, target); } }; -// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. +// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. template<typename T, bool UseMemmove> struct smart_memmove_helper; template<typename T> void smart_memmove(const T* start, const T* end, T* target) @@ -522,19 +553,30 @@ template<typename T> struct smart_memmove_helper<T,true> { template<typename T> struct smart_memmove_helper<T,false> { static inline void run(const T* start, const T* end, T* target) - { + { if (UIntPtr(target) < UIntPtr(start)) { std::copy(start, end, target); } - else + else { std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); - std::copy_backward(start, end, target + count); + std::copy_backward(start, end, target + count); } } }; +#if EIGEN_HAS_RVALUE_REFERENCES +template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target) +{ + return std::move(start, end, target); +} +#else +template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target) +{ + return std::copy(start, end, target); +} +#endif /***************************************************************************** *** Implementation of runtime stack allocation (falling back to malloc) *** @@ -542,7 +584,7 @@ template<typename T> struct smart_memmove_helper<T,false> { // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA // to the appropriate stack allocation function -#ifndef EIGEN_ALLOCA +#if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca) #define EIGEN_ALLOCA alloca #elif EIGEN_COMP_MSVC @@ -550,6 +592,15 @@ template<typename T> struct smart_memmove_helper<T,false> { #endif #endif +// With clang -Oz -mthumb, alloca changes the stack pointer in a way that is +// not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because +// the compiler still emits bad code because stack allocation checks use "<=". +// TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772 +// is fixed. +#if defined(__clang__) && defined(__thumb__) + #undef EIGEN_ALLOCA +#endif + // This helper class construct the allocated memory, and takes care of destructing and freeing the handled data // at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions. template<typename T> class aligned_stack_memory_handler : noncopyable @@ -561,12 +612,14 @@ template<typename T> class aligned_stack_memory_handler : noncopyable * In this case, the buffer elements will also be destructed when this handler will be destructed. * Finally, if \a dealloc is true, then the pointer \a ptr is freed. **/ + EIGEN_DEVICE_FUNC aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc) : m_ptr(ptr), m_size(size), m_deallocate(dealloc) { if(NumTraits<T>::RequireInitialization && m_ptr) Eigen::internal::construct_elements_of_array(m_ptr, size); } + EIGEN_DEVICE_FUNC ~aligned_stack_memory_handler() { if(NumTraits<T>::RequireInitialization && m_ptr) @@ -580,6 +633,60 @@ template<typename T> class aligned_stack_memory_handler : noncopyable bool m_deallocate; }; +#ifdef EIGEN_ALLOCA + +template<typename Xpr, int NbEvaluations, + bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic + > +struct local_nested_eval_wrapper +{ + static const bool NeedExternalBuffer = false; + typedef typename Xpr::Scalar Scalar; + typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType; + ObjectType object; + + EIGEN_DEVICE_FUNC + local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr) + { + EIGEN_UNUSED_VARIABLE(ptr); + eigen_internal_assert(ptr==0); + } +}; + +template<typename Xpr, int NbEvaluations> +struct local_nested_eval_wrapper<Xpr,NbEvaluations,true> +{ + static const bool NeedExternalBuffer = true; + typedef typename Xpr::Scalar Scalar; + typedef typename plain_object_eval<Xpr>::type PlainObject; + typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType; + ObjectType object; + + EIGEN_DEVICE_FUNC + local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) + : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()), + m_deallocate(ptr==0) + { + if(NumTraits<Scalar>::RequireInitialization && object.data()) + Eigen::internal::construct_elements_of_array(object.data(), object.size()); + object = xpr; + } + + EIGEN_DEVICE_FUNC + ~local_nested_eval_wrapper() + { + if(NumTraits<Scalar>::RequireInitialization && object.data()) + Eigen::internal::destruct_elements_of_array(object.data(), object.size()); + if(m_deallocate) + Eigen::internal::aligned_free(object.data()); + } + +private: + bool m_deallocate; +}; + +#endif // EIGEN_ALLOCA + template<typename T> class scoped_array : noncopyable { T* m_ptr; @@ -603,13 +710,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) { std::swap(a.ptr(),b.ptr()); } - + } // end namespace internal /** \internal - * Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack - * if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform - * (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap. + * + * The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates, + * and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack + * if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform + * (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap. * The allocated buffer is automatically deleted when exiting the scope of this declaration. * If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs. * Here is an example: @@ -620,9 +729,17 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) * } * \endcode * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token. + * + * The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to + * \code + * typename internal::nested_eval<XPRT_T,N>::type NAME(XPR); + * \endcode + * with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown. + * This is accomplished through alloca if this later is supported and if the required number of bytes + * is below EIGEN_STACK_ALLOCATION_LIMIT. */ #ifdef EIGEN_ALLOCA - + #if EIGEN_DEFAULT_ALIGN_BYTES>0 // We always manually re-align the result of EIGEN_ALLOCA. // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. @@ -639,13 +756,23 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) + + #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \ + Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \ + ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \ + ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \ + typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object) + #else #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true) - + + +#define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR) + #endif @@ -653,32 +780,56 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] *** *****************************************************************************/ -#if EIGEN_MAX_ALIGN_BYTES!=0 +#if EIGEN_HAS_CXX17_OVERALIGN + +// C++17 -> no need to bother about alignment anymore :) + +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) + +#else + +// HIP does not support new/delete on device. +#if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE) #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ + EIGEN_DEVICE_FUNC \ void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \ EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \ EIGEN_CATCH (...) { return 0; } \ } #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \ + EIGEN_DEVICE_FUNC \ void *operator new(std::size_t size) { \ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ } \ + EIGEN_DEVICE_FUNC \ void *operator new[](std::size_t size) { \ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ } \ + EIGEN_DEVICE_FUNC \ void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ + EIGEN_DEVICE_FUNC \ void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ + EIGEN_DEVICE_FUNC \ void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ + EIGEN_DEVICE_FUNC \ void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ /* in-place new and delete. since (at least afaik) there is no actual */ \ /* memory allocated we can safely let the default implementation handle */ \ /* this particular case. */ \ + EIGEN_DEVICE_FUNC \ static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \ + EIGEN_DEVICE_FUNC \ static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \ + EIGEN_DEVICE_FUNC \ void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \ + EIGEN_DEVICE_FUNC \ void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \ /* nothrow-new (returns zero instead of std::bad_alloc) */ \ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ + EIGEN_DEVICE_FUNC \ void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \ } \ @@ -688,20 +839,34 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) #endif #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true) -#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0))) +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool( \ + ((Size)!=Eigen::Dynamic) && \ + (((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES )==0)) || \ + ((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) || \ + ((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0)) ))) + +#endif /****************************************************************************/ /** \class aligned_allocator * \ingroup Core_Module * -* \brief STL compatible allocator to use with with 16 byte aligned types +* \brief STL compatible allocator to use with types requiring a non standrad alignment. +* +* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd. +* By default, it will thus provide at least 16 bytes alignment and more in following cases: +* - 32 bytes alignment if AVX is enabled. +* - 64 bytes alignment if AVX512 is enabled. +* +* This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented +* \link TopicPreprocessorDirectivesPerformance there \endlink. * * Example: * \code * // Matrix4f requires 16 bytes alignment: -* std::map< int, Matrix4f, std::less<int>, +* std::map< int, Matrix4f, std::less<int>, * aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4; * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator: * std::map< int, Vector3f > my_map_vec3; @@ -736,6 +901,15 @@ public: ~aligned_allocator() {} + #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0) + // In gcc std::allocator::max_size() is bugged making gcc triggers a warning: + // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807 + // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544 + size_type max_size() const { + return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T); + } + #endif + pointer allocate(size_type num, const void* /*hint*/ = 0) { internal::check_size_for_overflow<T>(num); @@ -898,20 +1072,32 @@ inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs) { if(max_std_funcs>=4) queryCacheSizes_intel_direct(l1,l2,l3); - else + else if(max_std_funcs>=2) queryCacheSizes_intel_codes(l1,l2,l3); + else + l1 = l2 = l3 = 0; } inline void queryCacheSizes_amd(int& l1, int& l2, int& l3) { int abcd[4]; abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; - EIGEN_CPUID(abcd,0x80000005,0); - l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB - abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; - EIGEN_CPUID(abcd,0x80000006,0); - l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB - l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB + + // First query the max supported function. + EIGEN_CPUID(abcd,0x80000000,0); + if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006)) + { + EIGEN_CPUID(abcd,0x80000005,0); + l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB + abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; + EIGEN_CPUID(abcd,0x80000006,0); + l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB + l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB + } + else + { + l1 = l2 = l3 = 0; + } } #endif @@ -927,7 +1113,7 @@ inline void queryCacheSizes(int& l1, int& l2, int& l3) // identify the CPU vendor EIGEN_CPUID(abcd,0x0,0); - int max_std_funcs = abcd[1]; + int max_std_funcs = abcd[0]; if(cpuid_is_vendor(abcd,GenuineIntel)) queryCacheSizes_intel(l1,l2,l3,max_std_funcs); else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_)) |