aboutsummaryrefslogtreecommitdiff
path: root/Eigen/src/Core/util/Memory.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/util/Memory.h')
-rw-r--r--Eigen/src/Core/util/Memory.h268
1 files changed, 227 insertions, 41 deletions
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index c634d7ea0..875318cdb 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -63,14 +63,28 @@ namespace Eigen {
namespace internal {
-EIGEN_DEVICE_FUNC
+EIGEN_DEVICE_FUNC
inline void throw_std_bad_alloc()
{
#ifdef EIGEN_EXCEPTIONS
throw std::bad_alloc();
#else
std::size_t huge = static_cast<std::size_t>(-1);
+ #if defined(EIGEN_HIPCC)
+ //
+ // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
+ // and as a consequence the code in the #else block triggers the hipcc warning :
+ // "no overloaded function has restriction specifiers that are compatible with the ambient context"
+ //
+ // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
+ // the same on "operator new"
+ // Reverting code back to the old version in this #if block for the hipcc compiler
+ //
new int[huge];
+ #else
+ void* unused = ::operator new(huge);
+ EIGEN_UNUSED_VARIABLE(unused);
+ #endif
#endif
}
@@ -83,19 +97,26 @@ inline void throw_std_bad_alloc()
/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
* Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
*/
-inline void* handmade_aligned_malloc(std::size_t size)
+EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
{
- void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES);
+ eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2");
+
+ EIGEN_USING_STD(malloc)
+ void *original = malloc(size+alignment);
+
if (original == 0) return 0;
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
/** \internal Frees memory allocated with handmade_aligned_malloc */
-inline void handmade_aligned_free(void *ptr)
+EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
{
- if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
+ if (ptr) {
+ EIGEN_USING_STD(free)
+ free(*(reinterpret_cast<void**>(ptr) - 1));
+ }
}
/** \internal
@@ -114,7 +135,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
void *previous_aligned = static_cast<char *>(original)+previous_offset;
if(aligned!=previous_aligned)
std::memmove(aligned, previous_aligned, size);
-
+
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
@@ -142,7 +163,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
{
eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
}
-#else
+#else
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
{}
#endif
@@ -156,9 +177,12 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
void *result;
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
- result = std::malloc(size);
+
+ EIGEN_USING_STD(malloc)
+ result = malloc(size);
+
#if EIGEN_DEFAULT_ALIGN_BYTES==16
- eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator.");
+ eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator.");
#endif
#else
result = handmade_aligned_malloc(size);
@@ -174,7 +198,10 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
{
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
- std::free(ptr);
+
+ EIGEN_USING_STD(free)
+ free(ptr);
+
#else
handmade_aligned_free(ptr);
#endif
@@ -187,7 +214,7 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
*/
inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
{
- EIGEN_UNUSED_VARIABLE(old_size);
+ EIGEN_UNUSED_VARIABLE(old_size)
void *result;
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
@@ -218,7 +245,9 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std:
{
check_that_malloc_is_allowed();
- void *result = std::malloc(size);
+ EIGEN_USING_STD(malloc)
+ void *result = malloc(size);
+
if(!result && size)
throw_std_bad_alloc();
return result;
@@ -232,7 +261,8 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void
template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
{
- std::free(ptr);
+ EIGEN_USING_STD(free)
+ free(ptr);
}
template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
@@ -331,7 +361,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
{
destruct_elements_of_array<T>(ptr, size);
- aligned_free(ptr);
+ Eigen::internal::aligned_free(ptr);
}
/** \internal Deletes objects constructed with conditional_aligned_new
@@ -471,8 +501,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index
}
/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
- */
-template<typename Index>
+ */
+template<typename Index>
inline Index first_multiple(Index size, Index base)
{
return ((size+base-1)/base)*base;
@@ -493,6 +523,7 @@ template<typename T> struct smart_copy_helper<T,true> {
IntPtr size = IntPtr(end)-IntPtr(start);
if(size==0) return;
eigen_internal_assert(start!=0 && end!=0 && target!=0);
+ EIGEN_USING_STD(memcpy)
memcpy(target, start, size);
}
};
@@ -502,7 +533,7 @@ template<typename T> struct smart_copy_helper<T,false> {
{ std::copy(start, end, target); }
};
-// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
+// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
template<typename T, bool UseMemmove> struct smart_memmove_helper;
template<typename T> void smart_memmove(const T* start, const T* end, T* target)
@@ -522,19 +553,30 @@ template<typename T> struct smart_memmove_helper<T,true> {
template<typename T> struct smart_memmove_helper<T,false> {
static inline void run(const T* start, const T* end, T* target)
- {
+ {
if (UIntPtr(target) < UIntPtr(start))
{
std::copy(start, end, target);
}
- else
+ else
{
std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
- std::copy_backward(start, end, target + count);
+ std::copy_backward(start, end, target + count);
}
}
};
+#if EIGEN_HAS_RVALUE_REFERENCES
+template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
+{
+ return std::move(start, end, target);
+}
+#else
+template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
+{
+ return std::copy(start, end, target);
+}
+#endif
/*****************************************************************************
*** Implementation of runtime stack allocation (falling back to malloc) ***
@@ -542,7 +584,7 @@ template<typename T> struct smart_memmove_helper<T,false> {
// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
// to the appropriate stack allocation function
-#ifndef EIGEN_ALLOCA
+#if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE
#if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
#define EIGEN_ALLOCA alloca
#elif EIGEN_COMP_MSVC
@@ -550,6 +592,15 @@ template<typename T> struct smart_memmove_helper<T,false> {
#endif
#endif
+// With clang -Oz -mthumb, alloca changes the stack pointer in a way that is
+// not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because
+// the compiler still emits bad code because stack allocation checks use "<=".
+// TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772
+// is fixed.
+#if defined(__clang__) && defined(__thumb__)
+ #undef EIGEN_ALLOCA
+#endif
+
// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
template<typename T> class aligned_stack_memory_handler : noncopyable
@@ -561,12 +612,14 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
* In this case, the buffer elements will also be destructed when this handler will be destructed.
* Finally, if \a dealloc is true, then the pointer \a ptr is freed.
**/
+ EIGEN_DEVICE_FUNC
aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
: m_ptr(ptr), m_size(size), m_deallocate(dealloc)
{
if(NumTraits<T>::RequireInitialization && m_ptr)
Eigen::internal::construct_elements_of_array(m_ptr, size);
}
+ EIGEN_DEVICE_FUNC
~aligned_stack_memory_handler()
{
if(NumTraits<T>::RequireInitialization && m_ptr)
@@ -580,6 +633,60 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
bool m_deallocate;
};
+#ifdef EIGEN_ALLOCA
+
+template<typename Xpr, int NbEvaluations,
+ bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic
+ >
+struct local_nested_eval_wrapper
+{
+ static const bool NeedExternalBuffer = false;
+ typedef typename Xpr::Scalar Scalar;
+ typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType;
+ ObjectType object;
+
+ EIGEN_DEVICE_FUNC
+ local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr)
+ {
+ EIGEN_UNUSED_VARIABLE(ptr);
+ eigen_internal_assert(ptr==0);
+ }
+};
+
+template<typename Xpr, int NbEvaluations>
+struct local_nested_eval_wrapper<Xpr,NbEvaluations,true>
+{
+ static const bool NeedExternalBuffer = true;
+ typedef typename Xpr::Scalar Scalar;
+ typedef typename plain_object_eval<Xpr>::type PlainObject;
+ typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
+ ObjectType object;
+
+ EIGEN_DEVICE_FUNC
+ local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
+ : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()),
+ m_deallocate(ptr==0)
+ {
+ if(NumTraits<Scalar>::RequireInitialization && object.data())
+ Eigen::internal::construct_elements_of_array(object.data(), object.size());
+ object = xpr;
+ }
+
+ EIGEN_DEVICE_FUNC
+ ~local_nested_eval_wrapper()
+ {
+ if(NumTraits<Scalar>::RequireInitialization && object.data())
+ Eigen::internal::destruct_elements_of_array(object.data(), object.size());
+ if(m_deallocate)
+ Eigen::internal::aligned_free(object.data());
+ }
+
+private:
+ bool m_deallocate;
+};
+
+#endif // EIGEN_ALLOCA
+
template<typename T> class scoped_array : noncopyable
{
T* m_ptr;
@@ -603,13 +710,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
{
std::swap(a.ptr(),b.ptr());
}
-
+
} // end namespace internal
/** \internal
- * Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
- * if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
- * (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap.
+ *
+ * The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates,
+ * and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
+ * if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
+ * (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap.
* The allocated buffer is automatically deleted when exiting the scope of this declaration.
* If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
* Here is an example:
@@ -620,9 +729,17 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
* }
* \endcode
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
+ *
+ * The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to
+ * \code
+ * typename internal::nested_eval<XPRT_T,N>::type NAME(XPR);
+ * \endcode
+ * with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown.
+ * This is accomplished through alloca if this later is supported and if the required number of bytes
+ * is below EIGEN_STACK_ALLOCATION_LIMIT.
*/
#ifdef EIGEN_ALLOCA
-
+
#if EIGEN_DEFAULT_ALIGN_BYTES>0
// We always manually re-align the result of EIGEN_ALLOCA.
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
@@ -639,13 +756,23 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
: Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
+
+ #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \
+ Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
+ ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \
+ ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \
+ typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object)
+
#else
#define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
-
+
+
+#define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR)
+
#endif
@@ -653,32 +780,56 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
*****************************************************************************/
-#if EIGEN_MAX_ALIGN_BYTES!=0
+#if EIGEN_HAS_CXX17_OVERALIGN
+
+// C++17 -> no need to bother about alignment anymore :)
+
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
+
+#else
+
+// HIP does not support new/delete on device.
+#if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+ EIGEN_DEVICE_FUNC \
void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
EIGEN_CATCH (...) { return 0; } \
}
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
+ EIGEN_DEVICE_FUNC \
void *operator new(std::size_t size) { \
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
} \
+ EIGEN_DEVICE_FUNC \
void *operator new[](std::size_t size) { \
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
} \
+ EIGEN_DEVICE_FUNC \
void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ EIGEN_DEVICE_FUNC \
void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ EIGEN_DEVICE_FUNC \
void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ EIGEN_DEVICE_FUNC \
void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
/* in-place new and delete. since (at least afaik) there is no actual */ \
/* memory allocated we can safely let the default implementation handle */ \
/* this particular case. */ \
+ EIGEN_DEVICE_FUNC \
static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
+ EIGEN_DEVICE_FUNC \
static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
+ EIGEN_DEVICE_FUNC \
void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
+ EIGEN_DEVICE_FUNC \
void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+ EIGEN_DEVICE_FUNC \
void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
} \
@@ -688,20 +839,34 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
#endif
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
-#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
- EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool( \
+ ((Size)!=Eigen::Dynamic) && \
+ (((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES )==0)) || \
+ ((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) || \
+ ((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0)) )))
+
+#endif
/****************************************************************************/
/** \class aligned_allocator
* \ingroup Core_Module
*
-* \brief STL compatible allocator to use with with 16 byte aligned types
+* \brief STL compatible allocator to use with types requiring a non standrad alignment.
+*
+* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
+* By default, it will thus provide at least 16 bytes alignment and more in following cases:
+* - 32 bytes alignment if AVX is enabled.
+* - 64 bytes alignment if AVX512 is enabled.
+*
+* This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
+* \link TopicPreprocessorDirectivesPerformance there \endlink.
*
* Example:
* \code
* // Matrix4f requires 16 bytes alignment:
-* std::map< int, Matrix4f, std::less<int>,
+* std::map< int, Matrix4f, std::less<int>,
* aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
* std::map< int, Vector3f > my_map_vec3;
@@ -736,6 +901,15 @@ public:
~aligned_allocator() {}
+ #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
+ // In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
+ // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
+ // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
+ size_type max_size() const {
+ return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T);
+ }
+ #endif
+
pointer allocate(size_type num, const void* /*hint*/ = 0)
{
internal::check_size_for_overflow<T>(num);
@@ -898,20 +1072,32 @@ inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
{
if(max_std_funcs>=4)
queryCacheSizes_intel_direct(l1,l2,l3);
- else
+ else if(max_std_funcs>=2)
queryCacheSizes_intel_codes(l1,l2,l3);
+ else
+ l1 = l2 = l3 = 0;
}
inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
{
int abcd[4];
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
- EIGEN_CPUID(abcd,0x80000005,0);
- l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
- abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
- EIGEN_CPUID(abcd,0x80000006,0);
- l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
- l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
+
+ // First query the max supported function.
+ EIGEN_CPUID(abcd,0x80000000,0);
+ if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006))
+ {
+ EIGEN_CPUID(abcd,0x80000005,0);
+ l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x80000006,0);
+ l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
+ l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
+ }
+ else
+ {
+ l1 = l2 = l3 = 0;
+ }
}
#endif
@@ -927,7 +1113,7 @@ inline void queryCacheSizes(int& l1, int& l2, int& l3)
// identify the CPU vendor
EIGEN_CPUID(abcd,0x0,0);
- int max_std_funcs = abcd[1];
+ int max_std_funcs = abcd[0];
if(cpuid_is_vendor(abcd,GenuineIntel))
queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))