9 files changed, 169 insertions, 71 deletions
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 91496651c..a28f16fa0 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -42,7 +42,7 @@ template<bool Conjugate> struct conj_if;
 
 template<> struct conj_if<true> {
   template<typename T>
-  inline T operator()(const T& x) { return conj(x); }
+  inline T operator()(const T& x) { return numext::conj(x); }
   template<typename T>
   inline T pconj(const T& x) { return internal::pconj(x); }
 };
@@ -67,7 +67,7 @@ template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::
   { return c + pmul(x,y); }
 
   EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
-  { return Scalar(real(x)*real(y) + imag(x)*imag(y), imag(x)*real(y) - real(x)*imag(y)); }
+  { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); }
 };
 
 template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
@@ -77,7 +77,7 @@ template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::
   { return c + pmul(x,y); }
 
   EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
-  { return Scalar(real(x)*real(y) + imag(x)*imag(y), real(x)*imag(y) - imag(x)*real(y)); }
+  { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
 };
 
 template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
@@ -87,7 +87,7 @@ template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::
   { return c + pmul(x,y); }
 
   EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
-  { return Scalar(real(x)*real(y) - imag(x)*imag(y), - real(x)*imag(y) - imag(x)*real(y)); }
+  { return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
 };
 
 template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
@@ -113,7 +113,7 @@ template<typename From,typename To> struct get_factor {
 };
 
 template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
-  static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return real(x); }
+  static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); }
 };
 
 // Lightweight helper class to access matrix coefficients.
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index 3fd45e84f..14b9624e1 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -13,13 +13,18 @@
 
 namespace Eigen {
 
-/** This value means that a quantity is not known at compile-time, and that instead the value is
+/** This value means that a positive quantity (e.g., a size) is not known at compile-time, and that instead the value is
   * stored in some runtime variable.
   *
   * Changing the value of Dynamic breaks the ABI, as Dynamic is often used as a template parameter for Matrix.
   */
 const int Dynamic = -1;
 
+/** This value means that a signed quantity (e.g., a signed index) is not known at compile-time, and that instead its value
+  * has to be specified at runtime.
+  */
+const int DynamicIndex = 0xffffff;
+
 /** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>().
   * The value Infinity there means the L-infinity norm.
   */
@@ -227,7 +232,9 @@ enum {
     * scalar loops to handle the unaligned boundaries */
   SliceVectorizedTraversal,
   /** \internal Special case to properly handle incompatible scalar types or other defecting cases*/
-  InvalidTraversal
+  InvalidTraversal,
+  /** \internal Evaluate all entries at once */
+  AllAtOnceTraversal
 };
 
 /** \internal \ingroup enums
@@ -257,9 +264,9 @@ enum {
   ColMajor = 0,
   /** Storage order is row major (see \ref TopicStorageOrders). */
   RowMajor = 0x1,  // it is only a coincidence that this is equal to RowMajorBit -- don't rely on that
-  /** \internal Align the matrix itself if it is vectorizable fixed-size */
+  /** Align the matrix itself if it is vectorizable fixed-size */
   AutoAlign = 0,
-  /** \internal Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be requested to be aligned) */ // FIXME --- clarify the situation
+  /** Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be requested to be aligned) */ // FIXME --- clarify the situation
   DontAlign = 0x2
 };
 
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index bcdfe3914..d6a814586 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -78,8 +78,7 @@ template<typename ExpressionType> class NestByValue;
 template<typename ExpressionType> class ForceAlignedAccess;
 template<typename ExpressionType> class SwapWrapper;
 
-template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
-         bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class Block;
+template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block;
 
 template<typename MatrixType, int Size=Dynamic> class VectorBlock;
 template<typename MatrixType> class Transpose;
@@ -154,7 +153,6 @@ template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRh
 template<typename Scalar> struct scalar_sum_op;
 template<typename Scalar> struct scalar_difference_op;
 template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op;
-template<typename Scalar> struct scalar_quotient_op;
 template<typename Scalar> struct scalar_opposite_op;
 template<typename Scalar> struct scalar_conjugate_op;
 template<typename Scalar> struct scalar_real_op;
@@ -185,6 +183,7 @@ template<typename Scalar> struct scalar_identity_op;
 
 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
 template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
 
 } // end namespace internal
 
@@ -271,6 +270,8 @@ template<typename Derived> struct MatrixExponentialReturnValue;
 template<typename Derived> class MatrixFunctionReturnValue;
 template<typename Derived> class MatrixSquareRootReturnValue;
 template<typename Derived> class MatrixLogarithmReturnValue;
+template<typename Derived> class MatrixPowerReturnValue;
+template<typename Derived, typename Lhs, typename Rhs> class MatrixPowerProduct;
 
 namespace internal {
 template <typename Scalar>
diff --git a/Eigen/src/Core/util/MKL_support.h b/Eigen/src/Core/util/MKL_support.h
index 1e6e355d6..8acca9c8c 100644
--- a/Eigen/src/Core/util/MKL_support.h
+++ b/Eigen/src/Core/util/MKL_support.h
@@ -54,8 +54,25 @@
 #endif
 
 #if defined EIGEN_USE_MKL
+#   include <mkl.h> 
+/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
+#   ifndef INTEL_MKL_VERSION
+#       undef EIGEN_USE_MKL /* INTEL_MKL_VERSION is not even defined on older versions */
+#   elif INTEL_MKL_VERSION < 100305    /* the intel-mkl-103-release-notes say this was when the lapacke.h interface was added*/
+#       undef EIGEN_USE_MKL
+#   endif
+#   ifndef EIGEN_USE_MKL
+    /*If the MKL version is too old, undef everything*/
+#       undef   EIGEN_USE_MKL_ALL
+#       undef   EIGEN_USE_BLAS
+#       undef   EIGEN_USE_LAPACKE
+#       undef   EIGEN_USE_MKL_VML
+#       undef   EIGEN_USE_LAPACKE_STRICT
+#       undef   EIGEN_USE_LAPACKE
+#   endif
+#endif
 
-#include <mkl.h>
+#if defined EIGEN_USE_MKL
 #include <mkl_lapacke.h>
 #define EIGEN_MKL_VML_THRESHOLD 128
 
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index d973a6837..3a010ec6a 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -12,8 +12,8 @@
 #define EIGEN_MACROS_H
 
 #define EIGEN_WORLD_VERSION 3
-#define EIGEN_MAJOR_VERSION 1
-#define EIGEN_MINOR_VERSION 1
+#define EIGEN_MAJOR_VERSION 2
+#define EIGEN_MINOR_VERSION 2
 
 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                       (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -115,12 +115,6 @@
 #define EIGEN_MAKESTRING2(a) #a
 #define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
 
-#if EIGEN_GNUC_AT_LEAST(4,1) && !defined(__clang__) && !defined(__INTEL_COMPILER)
-#define EIGEN_FLATTEN_ATTRIB __attribute__((flatten))
-#else
-#define EIGEN_FLATTEN_ATTRIB
-#endif
-
 // EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
 // but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
 // but GCC is still doing fine with just inline.
@@ -151,6 +145,12 @@
 #define EIGEN_DONT_INLINE
 #endif
 
+#if (defined __GNUC__)
+#define EIGEN_PERMISSIVE_EXPR __extension__
+#else
+#define EIGEN_PERMISSIVE_EXPR
+#endif
+
 // this macro allows to get rid of linking errors about multiply defined functions.
 //  - static is not very good because it prevents definitions from different object files to be merged.
 //           So static causes the resulting linked executable to be bloated with multiple copies of the same function.
@@ -238,12 +238,19 @@
 #endif
 
 // Suppresses 'unused variable' warnings.
-#define EIGEN_UNUSED_VARIABLE(var) (void)var;
+namespace Eigen {
+  namespace internal {
+    template<typename T> void ignore_unused_variable(const T&) {}
+  }
+}
+#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);
 
-#if !defined(EIGEN_ASM_COMMENT) && (defined __GNUC__)
-#define EIGEN_ASM_COMMENT(X)  asm("#" X)
-#else
-#define EIGEN_ASM_COMMENT(X)
+#if !defined(EIGEN_ASM_COMMENT)
+  #if (defined __GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
+    #define EIGEN_ASM_COMMENT(X)  asm("#" X)
+  #else
+    #define EIGEN_ASM_COMMENT(X)
+  #endif
 #endif
 
 /* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
@@ -282,7 +289,8 @@
 #endif
 
 #ifndef EIGEN_STACK_ALLOCATION_LIMIT
-#define EIGEN_STACK_ALLOCATION_LIMIT 20000
+// 131072 == 128 KB
+#define EIGEN_STACK_ALLOCATION_LIMIT 131072
 #endif
 
 #ifndef EIGEN_DEFAULT_IO_FORMAT
@@ -301,6 +309,12 @@
 #if defined(_MSC_VER) && (!defined(__INTEL_COMPILER))
 #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
   using Base::operator =;
+#elif defined(__clang__) // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
+#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+  using Base::operator =; \
+  EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \
+  template <typename OtherDerived> \
+  EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; }
 #else
 #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
   using Base::operator =; \
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 758913712..c4011245a 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -19,6 +19,10 @@
 #ifndef EIGEN_MEMORY_H
 #define EIGEN_MEMORY_H
 
+#ifndef EIGEN_MALLOC_ALREADY_ALIGNED
+
+// Try to determine automatically if malloc is already aligned.
+
 // On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
 //   http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
 // This is true at least since glibc 2.8.
@@ -27,7 +31,7 @@
 // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
 // quite safe, at least within the context of glibc, to equate 64-bit with LP64.
 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
- && defined(__LP64__)
+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ )
   #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
 #else
   #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
@@ -52,10 +56,19 @@
   #define EIGEN_MALLOC_ALREADY_ALIGNED 0
 #endif
 
-#if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \
- && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
-  #define EIGEN_HAS_POSIX_MEMALIGN 1
-#else
+#endif
+
+// See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554)
+// It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first.
+// Currently, let's include it only on unix systems:
+#if defined(__unix__) || defined(__unix)
+  #include <unistd.h>
+  #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
+    #define EIGEN_HAS_POSIX_MEMALIGN 1
+  #endif
+#endif
+
+#ifndef EIGEN_HAS_POSIX_MEMALIGN
   #define EIGEN_HAS_POSIX_MEMALIGN 0
 #endif
 
@@ -88,11 +101,11 @@ inline void throw_std_bad_alloc()
 /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
   * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
   */
-inline void* handmade_aligned_malloc(size_t size)
+inline void* handmade_aligned_malloc(std::size_t size)
 {
   void *original = std::malloc(size+16);
   if (original == 0) return 0;
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
   *(reinterpret_cast<void**>(aligned) - 1) = original;
   return aligned;
 }
@@ -108,13 +121,18 @@ inline void handmade_aligned_free(void *ptr)
   * Since we know that our handmade version is based on std::realloc
   * we can use std::realloc to implement efficient reallocation.
   */
-inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
+inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
 {
   if (ptr == 0) return handmade_aligned_malloc(size);
   void *original = *(reinterpret_cast<void**>(ptr) - 1);
+  std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
   original = std::realloc(original,size+16);
   if (original == 0) return 0;
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
+  void *previous_aligned = static_cast<char *>(original)+previous_offset;
+  if(aligned!=previous_aligned)
+    std::memmove(aligned, previous_aligned, size);
+  
   *(reinterpret_cast<void**>(aligned) - 1) = original;
   return aligned;
 }
@@ -123,7 +141,7 @@ inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
 *** Implementation of generic aligned realloc (when no realloc can be used)***
 *****************************************************************************/
 
-void* aligned_malloc(size_t size);
+void* aligned_malloc(std::size_t size);
 void  aligned_free(void *ptr);
 
 /** \internal
@@ -183,7 +201,7 @@ inline void check_that_malloc_is_allowed()
 {
   eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
 }
-#else
+#else 
 inline void check_that_malloc_is_allowed()
 {}
 #endif
@@ -204,7 +222,7 @@ inline void* aligned_malloc(size_t size)
     if(posix_memalign(&result, 16, size)) result = 0;
   #elif EIGEN_HAS_MM_MALLOC
     result = _mm_malloc(size, 16);
-  #elif (defined _MSC_VER)
+  #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
     result = _aligned_malloc(size, 16);
   #else
     result = handmade_aligned_malloc(size);
@@ -227,7 +245,7 @@ inline void aligned_free(void *ptr)
     std::free(ptr);
   #elif EIGEN_HAS_MM_MALLOC
     _mm_free(ptr);
-  #elif defined(_MSC_VER)
+  #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
     _aligned_free(ptr);
   #else
     handmade_aligned_free(ptr);
@@ -254,12 +272,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
   // The defined(_mm_free) is just here to verify that this MSVC version
   // implements _mm_malloc/_mm_free based on the corresponding _aligned_
   // functions. This may not always be the case and we just try to be safe.
-  #if defined(_MSC_VER) && defined(_mm_free)
+  #if defined(_MSC_VER) && (!defined(_WIN32_WCE)) && defined(_mm_free)
     result = _aligned_realloc(ptr,new_size,16);
   #else
     result = generic_aligned_realloc(ptr,new_size,old_size);
   #endif
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
   result = _aligned_realloc(ptr,new_size,16);
 #else
   result = handmade_aligned_realloc(ptr,new_size,old_size);
@@ -446,7 +464,6 @@ template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *
 template<typename Scalar, typename Index>
 static inline Index first_aligned(const Scalar* array, Index size)
 {
-  typedef typename packet_traits<Scalar>::type Packet;
   enum { PacketSize = packet_traits<Scalar>::size,
          PacketAlignedMask = PacketSize-1
   };
@@ -470,6 +487,13 @@ static inline Index first_aligned(const Scalar* array, Index size)
   }
 }
 
+/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
+  */ 
+template<typename Index> 
+inline static Index first_multiple(Index size, Index base)
+{
+  return ((size+base-1)/base)*base;
+}
 
 // std::copy is much slower than memcpy, so let's introduce a smart_copy which
 // use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
@@ -554,7 +578,7 @@ template<typename T> class aligned_stack_memory_handler
   */
 #ifdef EIGEN_ALLOCA
 
-  #ifdef __arm__
+  #if defined(__arm__) || defined(_WIN32)
     #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+16)) & ~(size_t(15))) + 16)
   #else
     #define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA
@@ -574,7 +598,7 @@ template<typename T> class aligned_stack_memory_handler
     Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
     TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE));    \
     Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
-
+    
 #endif
 
 
@@ -610,7 +634,9 @@ template<typename T> class aligned_stack_memory_handler
       /* memory allocated we can safely let the default implementation handle */ \
       /* this particular case. */ \
       static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
+      static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \
       void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
+      void operator delete[](void * memory, void *ptr) throw() { return ::operator delete[](memory,ptr); } \
       /* nothrow-new (returns zero instead of std::bad_alloc) */ \
       EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
       void operator delete(void *ptr, const std::nothrow_t&) throw() { \
@@ -635,7 +661,7 @@ template<typename T> class aligned_stack_memory_handler
 * Example:
 * \code
 * // Matrix4f requires 16 bytes alignment:
-* std::map< int, Matrix4f, std::less<int>,
+* std::map< int, Matrix4f, std::less<int>, 
 *           aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
 * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
 * std::map< int, Vector3f > my_map_vec3;
@@ -705,16 +731,6 @@ public:
         ::new( p ) T( value );
     }
 
-    // Support for c++11
-    // Not yet supported in google3
-#if 0 //(__cplusplus >= 201103L)
-    template<typename... Args>
-    void  construct(pointer p, Args&&... args)
-    {
-      ::new(p) T(std::forward<Args>(args)...);
-    }
-#endif
-
     void destroy( pointer p )
     {
         p->~T();
@@ -739,14 +755,19 @@ public:
 #    if defined(__PIC__) && defined(__i386__)
        // Case for x86 with PIC
 #      define EIGEN_CPUID(abcd,func,id) \
-         __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+         __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+#    elif defined(__PIC__) && defined(__x86_64__)
+       // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
+       // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
+#      define EIGEN_CPUID(abcd,func,id) \
+        __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
 #    else
        // Case for x86_64 or x86 w/o PIC
 #      define EIGEN_CPUID(abcd,func,id) \
-         __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
+         __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
 #    endif
 #  elif defined(_MSC_VER)
-#    if (_MSC_VER > 1500)
+#    if (_MSC_VER > 1500) && ( defined(_M_IX86) || defined(_M_X64) )
 #      define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
 #    endif
 #  endif
@@ -756,9 +777,9 @@ namespace internal {
 
 #ifdef EIGEN_CPUID
 
-inline bool cpuid_is_vendor(int abcd[4], const char* vendor)
+inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
 {
-  return abcd[1]==(reinterpret_cast<const int*>(vendor))[0] && abcd[3]==(reinterpret_cast<const int*>(vendor))[1] && abcd[2]==(reinterpret_cast<const int*>(vendor))[2];
+  return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
 }
 
 inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
@@ -900,13 +921,16 @@ inline void queryCacheSizes(int& l1, int& l2, int& l3)
 {
   #ifdef EIGEN_CPUID
   int abcd[4];
+  const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
+  const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
+  const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
 
   // identify the CPU vendor
   EIGEN_CPUID(abcd,0x0,0);
   int max_std_funcs = abcd[1];
-  if(cpuid_is_vendor(abcd,"GenuineIntel"))
+  if(cpuid_is_vendor(abcd,GenuineIntel))
     queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
-  else if(cpuid_is_vendor(abcd,"AuthenticAMD") || cpuid_is_vendor(abcd,"AMDisbetter!"))
+  else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
     queryCacheSizes_amd(l1,l2,l3);
   else
     // by default let's use Intel's API
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index a5f31164d..71d587108 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -186,23 +186,35 @@ template<int Y, int InfX, int SupX>
 class meta_sqrt<Y, InfX, SupX, true> { public:  enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
 
 /** \internal determines whether the product of two numeric types is allowed and what the return type is */
-template<typename T, typename U> struct scalar_product_traits;
+template<typename T, typename U> struct scalar_product_traits
+{
+  enum { Defined = 0 };
+};
 
 template<typename T> struct scalar_product_traits<T,T>
 {
-  //enum { Cost = NumTraits<T>::MulCost };
+  enum {
+    // Cost = NumTraits<T>::MulCost,
+    Defined = 1
+  };
   typedef T ReturnType;
 };
 
 template<typename T> struct scalar_product_traits<T,std::complex<T> >
 {
-  //enum { Cost = 2*NumTraits<T>::MulCost };
+  enum {
+    // Cost = 2*NumTraits<T>::MulCost,
+    Defined = 1
+  };
   typedef std::complex<T> ReturnType;
 };
 
 template<typename T> struct scalar_product_traits<std::complex<T>, T>
 {
-  //enum { Cost = 2*NumTraits<T>::MulCost  };
+  enum {
+    // Cost = 2*NumTraits<T>::MulCost,
+    Defined = 1
+  };
   typedef std::complex<T> ReturnType;
 };
 
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h
index b46a75b37..8872c5b64 100644
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -89,7 +89,8 @@
         YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
         YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
         THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
-        THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH
+        THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
+        OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG
       };
     };
 
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 2a65c7cbf..3c4773054 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -65,12 +65,34 @@ template<typename T> class variable_if_dynamic<T, Dynamic>
     void setValue(T value) { m_value = value; }
 };
 
+/** \internal like variable_if_dynamic but for DynamicIndex
+  */
+template<typename T, int Value> class variable_if_dynamicindex
+{
+  public:
+    EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex)
+    explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); assert(v == T(Value)); }
+    static T value() { return T(Value); }
+    void setValue(T) {}
+};
+
+template<typename T> class variable_if_dynamicindex<T, DynamicIndex>
+{
+    T m_value;
+    variable_if_dynamicindex() { assert(false); }
+  public:
+    explicit variable_if_dynamicindex(T value) : m_value(value) {}
+    T value() const { return m_value; }
+    void setValue(T value) { m_value = value; }
+};
+
 template<typename T> struct functor_traits
 {
   enum
   {
     Cost = 10,
-    PacketAccess = false
+    PacketAccess = false,
+    IsRepeatable = false
   };
 };
 
@@ -301,9 +323,9 @@ template<typename T, int n=1, typename PlainObject = typename eval<T>::type> str
     // it's important that this value can still be squared without integer overflowing.
     DynamicAsInteger = 10000,
     ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost,
-    ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? DynamicAsInteger : ScalarReadCost,
+    ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost),
     CoeffReadCost = traits<T>::CoeffReadCost,
-    CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? DynamicAsInteger : CoeffReadCost,
+    CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost),
     NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n,
     CostEvalAsInteger   = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger,
     CostNoEvalAsInteger = NAsInteger * CoeffReadCostAsInteger