aboutsummaryrefslogtreecommitdiff
path: root/Eigen/src/Core/util/Memory.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/util/Memory.h')
-rw-r--r--Eigen/src/Core/util/Memory.h96
1 files changed, 60 insertions, 36 deletions
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 758913712..c4011245a 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -19,6 +19,10 @@
#ifndef EIGEN_MEMORY_H
#define EIGEN_MEMORY_H
+#ifndef EIGEN_MALLOC_ALREADY_ALIGNED
+
+// Try to determine automatically if malloc is already aligned.
+
// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
// http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
// This is true at least since glibc 2.8.
@@ -27,7 +31,7 @@
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
- && defined(__LP64__)
+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ )
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
#else
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
@@ -52,10 +56,19 @@
#define EIGEN_MALLOC_ALREADY_ALIGNED 0
#endif
-#if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \
- && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
- #define EIGEN_HAS_POSIX_MEMALIGN 1
-#else
+#endif
+
+// See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
+// It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
+// Currently, let's include it only on unix systems:
+#if defined(__unix__) || defined(__unix)
+ #include <unistd.h>
+ #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
+ #define EIGEN_HAS_POSIX_MEMALIGN 1
+ #endif
+#endif
+
+#ifndef EIGEN_HAS_POSIX_MEMALIGN
#define EIGEN_HAS_POSIX_MEMALIGN 0
#endif
@@ -88,11 +101,11 @@ inline void throw_std_bad_alloc()
/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
* Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
*/
-inline void* handmade_aligned_malloc(size_t size)
+inline void* handmade_aligned_malloc(std::size_t size)
{
void *original = std::malloc(size+16);
if (original == 0) return 0;
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
@@ -108,13 +121,18 @@ inline void handmade_aligned_free(void *ptr)
* Since we know that our handmade version is based on std::realloc
* we can use std::realloc to implement efficient reallocation.
*/
-inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
+inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
{
if (ptr == 0) return handmade_aligned_malloc(size);
void *original = *(reinterpret_cast<void**>(ptr) - 1);
+ std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
original = std::realloc(original,size+16);
if (original == 0) return 0;
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
+ void *previous_aligned = static_cast<char *>(original)+previous_offset;
+ if(aligned!=previous_aligned)
+ std::memmove(aligned, previous_aligned, size);
+
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
@@ -123,7 +141,7 @@ inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
*** Implementation of generic aligned realloc (when no realloc can be used)***
*****************************************************************************/
-void* aligned_malloc(size_t size);
+void* aligned_malloc(std::size_t size);
void aligned_free(void *ptr);
/** \internal
@@ -183,7 +201,7 @@ inline void check_that_malloc_is_allowed()
{
eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
}
-#else
+#else
inline void check_that_malloc_is_allowed()
{}
#endif
@@ -204,7 +222,7 @@ inline void* aligned_malloc(size_t size)
if(posix_memalign(&result, 16, size)) result = 0;
#elif EIGEN_HAS_MM_MALLOC
result = _mm_malloc(size, 16);
- #elif (defined _MSC_VER)
+ #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
result = _aligned_malloc(size, 16);
#else
result = handmade_aligned_malloc(size);
@@ -227,7 +245,7 @@ inline void aligned_free(void *ptr)
std::free(ptr);
#elif EIGEN_HAS_MM_MALLOC
_mm_free(ptr);
- #elif defined(_MSC_VER)
+ #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
_aligned_free(ptr);
#else
handmade_aligned_free(ptr);
@@ -254,12 +272,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
// The defined(_mm_free) is just here to verify that this MSVC version
// implements _mm_malloc/_mm_free based on the corresponding _aligned_
// functions. This may not always be the case and we just try to be safe.
- #if defined(_MSC_VER) && defined(_mm_free)
+ #if defined(_MSC_VER) && (!defined(_WIN32_WCE)) && defined(_mm_free)
result = _aligned_realloc(ptr,new_size,16);
#else
result = generic_aligned_realloc(ptr,new_size,old_size);
#endif
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
result = _aligned_realloc(ptr,new_size,16);
#else
result = handmade_aligned_realloc(ptr,new_size,old_size);
@@ -446,7 +464,6 @@ template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *
template<typename Scalar, typename Index>
static inline Index first_aligned(const Scalar* array, Index size)
{
- typedef typename packet_traits<Scalar>::type Packet;
enum { PacketSize = packet_traits<Scalar>::size,
PacketAlignedMask = PacketSize-1
};
@@ -470,6 +487,13 @@ static inline Index first_aligned(const Scalar* array, Index size)
}
}
+/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
+ */
+template<typename Index>
+inline static Index first_multiple(Index size, Index base)
+{
+ return ((size+base-1)/base)*base;
+}
// std::copy is much slower than memcpy, so let's introduce a smart_copy which
// use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
@@ -554,7 +578,7 @@ template<typename T> class aligned_stack_memory_handler
*/
#ifdef EIGEN_ALLOCA
- #ifdef __arm__
+ #if defined(__arm__) || defined(_WIN32)
#define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+16)) & ~(size_t(15))) + 16)
#else
#define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA
@@ -574,7 +598,7 @@ template<typename T> class aligned_stack_memory_handler
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
-
+
#endif
@@ -610,7 +634,9 @@ template<typename T> class aligned_stack_memory_handler
/* memory allocated we can safely let the default implementation handle */ \
/* this particular case. */ \
static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
+ static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \
void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
+ void operator delete[](void * memory, void *ptr) throw() { return ::operator delete[](memory,ptr); } \
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
void operator delete(void *ptr, const std::nothrow_t&) throw() { \
@@ -635,7 +661,7 @@ template<typename T> class aligned_stack_memory_handler
* Example:
* \code
* // Matrix4f requires 16 bytes alignment:
-* std::map< int, Matrix4f, std::less<int>,
+* std::map< int, Matrix4f, std::less<int>,
* aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
* std::map< int, Vector3f > my_map_vec3;
@@ -705,16 +731,6 @@ public:
::new( p ) T( value );
}
- // Support for c++11
- // Not yet supported in google3
-#if 0 //(__cplusplus >= 201103L)
- template<typename... Args>
- void construct(pointer p, Args&&... args)
- {
- ::new(p) T(std::forward<Args>(args)...);
- }
-#endif
-
void destroy( pointer p )
{
p->~T();
@@ -739,14 +755,19 @@ public:
# if defined(__PIC__) && defined(__i386__)
// Case for x86 with PIC
# define EIGEN_CPUID(abcd,func,id) \
- __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+ __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+# elif defined(__PIC__) && defined(__x86_64__)
+ // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
+ // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
+# define EIGEN_CPUID(abcd,func,id) \
+ __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
# else
// Case for x86_64 or x86 w/o PIC
# define EIGEN_CPUID(abcd,func,id) \
- __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
+ __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
# endif
# elif defined(_MSC_VER)
-# if (_MSC_VER > 1500)
+# if (_MSC_VER > 1500) && ( defined(_M_IX86) || defined(_M_X64) )
# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
# endif
# endif
@@ -756,9 +777,9 @@ namespace internal {
#ifdef EIGEN_CPUID
-inline bool cpuid_is_vendor(int abcd[4], const char* vendor)
+inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
{
- return abcd[1]==(reinterpret_cast<const int*>(vendor))[0] && abcd[3]==(reinterpret_cast<const int*>(vendor))[1] && abcd[2]==(reinterpret_cast<const int*>(vendor))[2];
+ return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
}
inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
@@ -900,13 +921,16 @@ inline void queryCacheSizes(int& l1, int& l2, int& l3)
{
#ifdef EIGEN_CPUID
int abcd[4];
+ const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
+ const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
+ const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
// identify the CPU vendor
EIGEN_CPUID(abcd,0x0,0);
int max_std_funcs = abcd[1];
- if(cpuid_is_vendor(abcd,"GenuineIntel"))
+ if(cpuid_is_vendor(abcd,GenuineIntel))
queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
- else if(cpuid_is_vendor(abcd,"AuthenticAMD") || cpuid_is_vendor(abcd,"AMDisbetter!"))
+ else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
queryCacheSizes_amd(l1,l2,l3);
else
// by default let's use Intel's API