Update Eigen to the latest stable release, 3.2.2android-wear-5.1.1_r1 android-wear-5.1.0_r1 android-wear-5.0.0_r1 android-l-preview_r2 android-cts-5.1_r9 android-cts-5.1_r8 android-cts-5.1_r7 android-cts-5.1_r6 android-cts-5.1_r5 android-cts-5.1_r4 android-cts-5.1_r3 android-cts-5.1_r28 android-cts-5.1_r27 android-cts-5.1_r26 android-cts-5.1_r25 android-cts-5.1_r24 android-cts-5.1_r23 android-cts-5.1_r22 android-cts-5.1_r21 android-cts-5.1_r20 android-cts-5.1_r2 android-cts-5.1_r19 android-cts-5.1_r18 android-cts-5.1_r17 android-cts-5.1_r16 android-cts-5.1_r15 android-cts-5.1_r14 android-cts-5.1_r13 android-cts-5.1_r10 android-cts-5.1_r1 android-cts-5.0_r9 android-cts-5.0_r8 android-cts-5.0_r7 android-cts-5.0_r6 android-cts-5.0_r5 android-cts-5.0_r4 android-cts-5.0_r3 android-5.1.1_r9 android-5.1.1_r8 android-5.1.1_r7 android-5.1.1_r6 android-5.1.1_r5 android-5.1.1_r4 android-5.1.1_r38 android-5.1.1_r37 android-5.1.1_r36 android-5.1.1_r35 android-5.1.1_r34 android-5.1.1_r33 android-5.1.1_r30 android-5.1.1_r3 android-5.1.1_r29 android-5.1.1_r28 android-5.1.1_r26 android-5.1.1_r25 android-5.1.1_r24 android-5.1.1_r23 android-5.1.1_r22 android-5.1.1_r20 android-5.1.1_r2 android-5.1.1_r19 android-5.1.1_r18 android-5.1.1_r17 android-5.1.1_r16 android-5.1.1_r15 android-5.1.1_r14 android-5.1.1_r13 android-5.1.1_r12 android-5.1.1_r10 android-5.1.1_r1 android-5.1.0_r5 android-5.1.0_r4 android-5.1.0_r3 android-5.1.0_r1 android-5.0.2_r3 android-5.0.2_r1 android-5.0.1_r1 android-5.0.0_r7 android-5.0.0_r6 android-5.0.0_r5.1 android-5.0.0_r5 android-5.0.0_r4 android-5.0.0_r3 android-5.0.0_r2 android-5.0.0_r1 lollipop-wear-release lollipop-release lollipop-mr1-wfc-release lollipop-mr1-release lollipop-mr1-fi-release lollipop-mr1-dev lollipop-mr1-cts-release lollipop-dev lollipop-cts-release l-preview

./Eigen/src/Core/util/NonMPL2.h is left untouched, so that usage of non MPL2 code is disabled. Change-Id: I86fc9257b3c30d0ca15b268d4ef07bf038bba7ca
author: Carlos Hernandez <chernand@google.com> 2014-08-05 17:53:32 -0700
committer: Carlos Hernandez <chernand@google.com> 2014-08-05 17:54:05 -0700
commit: 7faaa9f3f0df9d23790277834d426c3d992ac3ba (patch)
tree: b788ae3b96daf9f5a79d8ec434e1e9edd56b3a72 /Eigen/src/Core/products
parent: 810535bb0c575a003b32076e5352ab8fd3f23a1c (diff)
download: eigen-7faaa9f3f0df9d23790277834d426c3d992ac3ba.tar.gz
18 files changed, 471 insertions, 297 deletions
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index 403d25fa9..c06a0df1c 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -150,7 +150,7 @@ class CoeffBasedProduct
     {
       // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
       // We still allow to mix T and complex<T>.
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+      EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
       eigen_assert(lhs.cols() == rhs.rows()
         && "invalid matrix product"
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 5eb03c98c..bcdca5b0d 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -69,8 +69,8 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdi
   * - the number of scalars that fit into a packet (when vectorization is enabled).
   *
   * \sa setCpuCacheSizes */
-template<typename LhsScalar, typename RhsScalar, int KcFactor>
-void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType>
+void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
 {
   EIGEN_UNUSED_VARIABLE(n);
   // Explanations:
@@ -91,13 +91,13 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
   };
 
   manage_caching_sizes(GetAction, &l1, &l2);
-  k = std::min<std::ptrdiff_t>(k, l1/kdiv);
-  std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
+  k = std::min<SizeType>(k, l1/kdiv);
+  SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
   if(_m<m) m = _m & mr_mask;
 }
 
-template<typename LhsScalar, typename RhsScalar>
-inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+template<typename LhsScalar, typename RhsScalar, typename SizeType>
+inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
 {
   computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
 }
@@ -527,9 +527,16 @@ struct gebp_kernel
     ResPacketSize = Traits::ResPacketSize
   };
 
-  EIGEN_DONT_INLINE EIGEN_FLATTEN_ATTRIB
+  EIGEN_DONT_INLINE
   void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
-                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
+                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB=0);
+};
+
+template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_DONT_INLINE
+void gebp_kernel<LhsScalar,RhsScalar,Index,mr,nr,ConjugateLhs,ConjugateRhs>
+  ::operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
+               Index strideA, Index strideB, Index offsetA, Index offsetB, RhsScalar* unpackedB)
   {
     Traits traits;
     
@@ -1089,7 +1096,7 @@ EIGEN_ASM_COMMENT("mybegin4");
       }
     }
   }
-};
+
 
 #undef CJMADD
 
@@ -1110,80 +1117,85 @@ EIGEN_ASM_COMMENT("mybegin4");
 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
 struct gemm_pack_lhs
 {
-  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
-                  Index stride=0, Index offset=0)
+  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, StorageOrder, Conjugate, PanelMode>
+  ::operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride, Index offset)
+{
+  typedef typename packet_traits<Scalar>::type Packet;
+  enum { PacketSize = packet_traits<Scalar>::size };
+
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+  EIGEN_UNUSED_VARIABLE(stride)
+  EIGEN_UNUSED_VARIABLE(offset)
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
+  Index count = 0;
+  Index peeled_mc = (rows/Pack1)*Pack1;
+  for(Index i=0; i<peeled_mc; i+=Pack1)
   {
-    typedef typename packet_traits<Scalar>::type Packet;
-    enum { PacketSize = packet_traits<Scalar>::size };
-
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
-    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
-    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-    const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
-    Index count = 0;
-    Index peeled_mc = (rows/Pack1)*Pack1;
-    for(Index i=0; i<peeled_mc; i+=Pack1)
-    {
-      if(PanelMode) count += Pack1 * offset;
+    if(PanelMode) count += Pack1 * offset;
 
-      if(StorageOrder==ColMajor)
+    if(StorageOrder==ColMajor)
+    {
+      for(Index k=0; k<depth; k++)
       {
-        for(Index k=0; k<depth; k++)
-        {
-          Packet A, B, C, D;
-          if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
-          if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
-          if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
-          if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
-          if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
-          if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
-          if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
-          if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
-        }
+        Packet A, B, C, D;
+        if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
+        if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
+        if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
+        if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
+        if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
+        if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
+        if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
+        if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
       }
-      else
+    }
+    else
+    {
+      for(Index k=0; k<depth; k++)
       {
-        for(Index k=0; k<depth; k++)
+        // TODO add a vectorized transpose here
+        Index w=0;
+        for(; w<Pack1-3; w+=4)
         {
-          // TODO add a vectorized transpose here
-          Index w=0;
-          for(; w<Pack1-3; w+=4)
-          {
-            Scalar a(cj(lhs(i+w+0, k))),
-                   b(cj(lhs(i+w+1, k))),
-                   c(cj(lhs(i+w+2, k))),
-                   d(cj(lhs(i+w+3, k)));
-            blockA[count++] = a;
-            blockA[count++] = b;
-            blockA[count++] = c;
-            blockA[count++] = d;
-          }
-          if(Pack1%4)
-            for(;w<Pack1;++w)
-              blockA[count++] = cj(lhs(i+w, k));
+          Scalar a(cj(lhs(i+w+0, k))),
+                  b(cj(lhs(i+w+1, k))),
+                  c(cj(lhs(i+w+2, k))),
+                  d(cj(lhs(i+w+3, k)));
+          blockA[count++] = a;
+          blockA[count++] = b;
+          blockA[count++] = c;
+          blockA[count++] = d;
         }
+        if(Pack1%4)
+          for(;w<Pack1;++w)
+            blockA[count++] = cj(lhs(i+w, k));
       }
-      if(PanelMode) count += Pack1 * (stride-offset-depth);
-    }
-    if(rows-peeled_mc>=Pack2)
-    {
-      if(PanelMode) count += Pack2*offset;
-      for(Index k=0; k<depth; k++)
-        for(Index w=0; w<Pack2; w++)
-          blockA[count++] = cj(lhs(peeled_mc+w, k));
-      if(PanelMode) count += Pack2 * (stride-offset-depth);
-      peeled_mc += Pack2;
-    }
-    for(Index i=peeled_mc; i<rows; i++)
-    {
-      if(PanelMode) count += offset;
-      for(Index k=0; k<depth; k++)
-        blockA[count++] = cj(lhs(i, k));
-      if(PanelMode) count += (stride-offset-depth);
     }
+    if(PanelMode) count += Pack1 * (stride-offset-depth);
   }
-};
+  if(rows-peeled_mc>=Pack2)
+  {
+    if(PanelMode) count += Pack2*offset;
+    for(Index k=0; k<depth; k++)
+      for(Index w=0; w<Pack2; w++)
+        blockA[count++] = cj(lhs(peeled_mc+w, k));
+    if(PanelMode) count += Pack2 * (stride-offset-depth);
+    peeled_mc += Pack2;
+  }
+  for(Index i=peeled_mc; i<rows; i++)
+  {
+    if(PanelMode) count += offset;
+    for(Index k=0; k<depth; k++)
+      blockA[count++] = cj(lhs(i, k));
+    if(PanelMode) count += (stride-offset-depth);
+  }
+}
 
 // copy a complete panel of the rhs
 // this version is optimized for column major matrices
@@ -1197,92 +1209,102 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
 {
   typedef typename packet_traits<Scalar>::type Packet;
   enum { PacketSize = packet_traits<Scalar>::size };
-  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
-                  Index stride=0, Index offset=0)
+  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
+  ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
+{
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
+  EIGEN_UNUSED_VARIABLE(stride)
+  EIGEN_UNUSED_VARIABLE(offset)
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index packet_cols = (cols/nr) * nr;
+  Index count = 0;
+  for(Index j2=0; j2<packet_cols; j2+=nr)
   {
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
-    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-    Index packet_cols = (cols/nr) * nr;
-    Index count = 0;
-    for(Index j2=0; j2<packet_cols; j2+=nr)
+    // skip what we have before
+    if(PanelMode) count += nr * offset;
+    const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+    const Scalar* b1 = &rhs[(j2+1)*rhsStride];
+    const Scalar* b2 = &rhs[(j2+2)*rhsStride];
+    const Scalar* b3 = &rhs[(j2+3)*rhsStride];
+    for(Index k=0; k<depth; k++)
     {
-      // skip what we have before
-      if(PanelMode) count += nr * offset;
-      const Scalar* b0 = &rhs[(j2+0)*rhsStride];
-      const Scalar* b1 = &rhs[(j2+1)*rhsStride];
-      const Scalar* b2 = &rhs[(j2+2)*rhsStride];
-      const Scalar* b3 = &rhs[(j2+3)*rhsStride];
-      for(Index k=0; k<depth; k++)
-      {
-                  blockB[count+0] = cj(b0[k]);
-                  blockB[count+1] = cj(b1[k]);
-        if(nr==4) blockB[count+2] = cj(b2[k]);
-        if(nr==4) blockB[count+3] = cj(b3[k]);
-        count += nr;
-      }
-      // skip what we have after
-      if(PanelMode) count += nr * (stride-offset-depth);
+                blockB[count+0] = cj(b0[k]);
+                blockB[count+1] = cj(b1[k]);
+      if(nr==4) blockB[count+2] = cj(b2[k]);
+      if(nr==4) blockB[count+3] = cj(b3[k]);
+      count += nr;
     }
+    // skip what we have after
+    if(PanelMode) count += nr * (stride-offset-depth);
+  }
 
-    // copy the remaining columns one at a time (nr==1)
-    for(Index j2=packet_cols; j2<cols; ++j2)
+  // copy the remaining columns one at a time (nr==1)
+  for(Index j2=packet_cols; j2<cols; ++j2)
+  {
+    if(PanelMode) count += offset;
+    const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+    for(Index k=0; k<depth; k++)
     {
-      if(PanelMode) count += offset;
-      const Scalar* b0 = &rhs[(j2+0)*rhsStride];
-      for(Index k=0; k<depth; k++)
-      {
-        blockB[count] = cj(b0[k]);
-        count += 1;
-      }
-      if(PanelMode) count += (stride-offset-depth);
+      blockB[count] = cj(b0[k]);
+      count += 1;
     }
+    if(PanelMode) count += (stride-offset-depth);
   }
-};
+}
 
 // this version is optimized for row major matrices
 template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
 struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
 {
   enum { PacketSize = packet_traits<Scalar>::size };
-  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
-                  Index stride=0, Index offset=0)
+  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
+};
+
+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
+  ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
+{
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
+  EIGEN_UNUSED_VARIABLE(stride)
+  EIGEN_UNUSED_VARIABLE(offset)
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index packet_cols = (cols/nr) * nr;
+  Index count = 0;
+  for(Index j2=0; j2<packet_cols; j2+=nr)
   {
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
-    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-    Index packet_cols = (cols/nr) * nr;
-    Index count = 0;
-    for(Index j2=0; j2<packet_cols; j2+=nr)
+    // skip what we have before
+    if(PanelMode) count += nr * offset;
+    for(Index k=0; k<depth; k++)
     {
-      // skip what we have before
-      if(PanelMode) count += nr * offset;
-      for(Index k=0; k<depth; k++)
-      {
-        const Scalar* b0 = &rhs[k*rhsStride + j2];
-                  blockB[count+0] = cj(b0[0]);
-                  blockB[count+1] = cj(b0[1]);
-        if(nr==4) blockB[count+2] = cj(b0[2]);
-        if(nr==4) blockB[count+3] = cj(b0[3]);
-        count += nr;
-      }
-      // skip what we have after
-      if(PanelMode) count += nr * (stride-offset-depth);
+      const Scalar* b0 = &rhs[k*rhsStride + j2];
+                blockB[count+0] = cj(b0[0]);
+                blockB[count+1] = cj(b0[1]);
+      if(nr==4) blockB[count+2] = cj(b0[2]);
+      if(nr==4) blockB[count+3] = cj(b0[3]);
+      count += nr;
     }
-    // copy the remaining columns one at a time (nr==1)
-    for(Index j2=packet_cols; j2<cols; ++j2)
+    // skip what we have after
+    if(PanelMode) count += nr * (stride-offset-depth);
+  }
+  // copy the remaining columns one at a time (nr==1)
+  for(Index j2=packet_cols; j2<cols; ++j2)
+  {
+    if(PanelMode) count += offset;
+    const Scalar* b0 = &rhs[j2];
+    for(Index k=0; k<depth; k++)
     {
-      if(PanelMode) count += offset;
-      const Scalar* b0 = &rhs[j2];
-      for(Index k=0; k<depth; k++)
-      {
-        blockB[count] = cj(b0[k*rhsStride]);
-        count += 1;
-      }
-      if(PanelMode) count += stride-offset-depth;
+      blockB[count] = cj(b0[k*rhsStride]);
+      count += 1;
     }
+    if(PanelMode) count += stride-offset-depth;
   }
-};
+}
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index 73a465ec5..3f5ffcf51 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -50,6 +50,7 @@ template<
   typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
 struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
 {
+
 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 static void run(Index rows, Index cols, Index depth,
   const LhsScalar* _lhs, Index lhsStride,
@@ -169,7 +170,6 @@ static void run(Index rows, Index cols, Index depth,
       // vertical panel which is, in practice, a very low number.
       pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
 
-
       // For each mc x kc block of the lhs's vertical panel...
       // (==GEPP_VAR1)
       for(Index i2=0; i2<rows; i2+=mc)
@@ -183,7 +183,6 @@ static void run(Index rows, Index cols, Index depth,
 
         // Everything is packed, we can now call the block * panel kernel:
         gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
-
       }
     }
   }
@@ -204,7 +203,7 @@ struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
 template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
 struct gemm_functor
 {
-  gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha,
+  gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha,
                   BlockingType& blocking)
     : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
   {}
@@ -395,7 +394,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
       EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
     }
 
-    template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+    template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
     {
       eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
 
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index 432d3a9dc..5c3763909 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -12,6 +12,9 @@
 
 namespace Eigen { 
 
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
+struct selfadjoint_rank1_update;
+
 namespace internal {
 
 /**********************************************************************
@@ -39,7 +42,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
 {
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
-                                      const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
+                                      const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
   {
     general_matrix_matrix_triangular_product<Index,
         RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
@@ -55,7 +58,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
 {
   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
-                                      const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
+                                      const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
   {
     const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
     const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
@@ -133,7 +136,7 @@ struct tribb_kernel
   enum {
     BlockSize  = EIGEN_PLAIN_ENUM_MAX(mr,nr)
   };
-  void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, ResScalar alpha, RhsScalar* workspace)
+  void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace)
   {
     gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
     Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
@@ -180,31 +183,92 @@ struct tribb_kernel
 
 // high level API
 
+template<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct>
+struct general_product_to_triangular_selector;
+
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
+{
+  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+  {
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::Index Index;
+    
+    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    
+    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+    enum {
+      StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+      UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
+      UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1
+    };
+    
+    internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;
+    ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),
+      (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
+    if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
+    
+    internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;
+    ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),
+      (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
+    if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+    
+    
+    selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
+                              LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
+                              RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex>
+          ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha);
+  }
+};
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
+{
+  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+  {
+    typedef typename MatrixType::Index Index;
+    
+    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    
+    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+    internal::general_matrix_matrix_triangular_product<Index,
+      typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+      typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+      MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
+      ::run(mat.cols(), actualLhs.cols(),
+            &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
+            mat.data(), mat.outerStride(), actualAlpha);
+  }
+};
+
 template<typename MatrixType, unsigned int UpLo>
 template<typename ProductDerived, typename _Lhs, typename _Rhs>
 TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
 {
-  typedef typename internal::remove_all<typename ProductDerived::LhsNested>::type Lhs;
-  typedef internal::blas_traits<Lhs> LhsBlasTraits;
-  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
-  typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
-  typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-  
-  typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
-  typedef internal::blas_traits<Rhs> RhsBlasTraits;
-  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
-  typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
-  typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
-  typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
-
-  internal::general_matrix_matrix_triangular_product<Index,
-    typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
-    typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
-    MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
-    ::run(m_matrix.cols(), actualLhs.cols(),
-          &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
-          const_cast<Scalar*>(m_matrix.data()), m_matrix.outerStride(), actualAlpha);
+  general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
   
   return *this;
 }
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index ba1f73957..09387703e 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -52,12 +52,17 @@ EIGEN_DONT_INLINE static void run(
   Index rows, Index cols,
   const LhsScalar* lhs, Index lhsStride,
   const RhsScalar* rhs, Index rhsIncr,
-  ResScalar* res, Index
-  #ifdef EIGEN_INTERNAL_DEBUGGING
-    resIncr
-  #endif
-  , RhsScalar alpha)
+  ResScalar* res, Index resIncr, RhsScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
+  Index rows, Index cols,
+  const LhsScalar* lhs, Index lhsStride,
+  const RhsScalar* rhs, Index rhsIncr,
+  ResScalar* res, Index resIncr, RhsScalar alpha)
 {
+  EIGEN_UNUSED_VARIABLE(resIncr)
   eigen_internal_assert(resIncr==1);
   #ifdef _EIGEN_ACCUMULATE_PACKETS
   #error _EIGEN_ACCUMULATE_PACKETS has already been defined
@@ -74,21 +79,21 @@ EIGEN_DONT_INLINE static void run(
   conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
   conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
   if(ConjugateRhs)
-    alpha = conj(alpha);
+    alpha = numext::conj(alpha);
 
   enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
   const Index columnsAtOnce = 4;
   const Index peels = 2;
   const Index LhsPacketAlignedMask = LhsPacketSize-1;
   const Index ResPacketAlignedMask = ResPacketSize-1;
-  const Index PeelAlignedMask = ResPacketSize*peels-1;
+//  const Index PeelAlignedMask = ResPacketSize*peels-1;
   const Index size = rows;
   
   // How many coeffs of the result do we have to skip to be aligned.
   // Here we assume data are at least aligned on the base scalar type.
   Index alignedStart = internal::first_aligned(res,size);
   Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
-  const Index peeledSize  = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
+  const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
 
   const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
   Index alignmentPattern = alignmentStep==0 ? AllAligned
@@ -177,6 +182,8 @@ EIGEN_DONT_INLINE static void run(
               _EIGEN_ACCUMULATE_PACKETS(d,du,d);
             break;
           case FirstAligned:
+          {
+            Index j = alignedStart;
             if(peels>1)
             {
               LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
@@ -186,7 +193,7 @@ EIGEN_DONT_INLINE static void run(
               A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
               A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
 
-              for (Index j = alignedStart; j<peeledSize; j+=peels*ResPacketSize)
+              for (; j<peeledSize; j+=peels*ResPacketSize)
               {
                 A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]);  palign<1>(A01,A11);
                 A12 = pload<LhsPacket>(&lhs2[j-2+LhsPacketSize]);  palign<2>(A02,A12);
@@ -210,9 +217,10 @@ EIGEN_DONT_INLINE static void run(
                 pstore(&res[j+ResPacketSize],T1);
               }
             }
-            for (Index j = peeledSize; j<alignedSize; j+=ResPacketSize)
+            for (; j<alignedSize; j+=ResPacketSize)
               _EIGEN_ACCUMULATE_PACKETS(d,du,du);
             break;
+          }
           default:
             for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
               _EIGEN_ACCUMULATE_PACKETS(du,du,du);
@@ -250,7 +258,7 @@ EIGEN_DONT_INLINE static void run(
         // process aligned result's coeffs
         if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
           for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
-            pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
+            pstore(&res[i], pcj.pmadd(pload<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
         else
           for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
             pstore(&res[i], pcj.pmadd(ploadu<LhsPacket>(&lhs0[i]), ptmp0, pload<ResPacket>(&res[i])));
@@ -271,7 +279,6 @@ EIGEN_DONT_INLINE static void run(
   } while(Vectorizable);
   #undef _EIGEN_ACCUMULATE_PACKETS
 }
-};
 
 /* Optimized row-major matrix * vector product:
  * This algorithm processes 4 rows at onces that allows to both reduce
@@ -309,6 +316,15 @@ EIGEN_DONT_INLINE static void run(
   const LhsScalar* lhs, Index lhsStride,
   const RhsScalar* rhs, Index rhsIncr,
   ResScalar* res, Index resIncr,
+  ResScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
+  Index rows, Index cols,
+  const LhsScalar* lhs, Index lhsStride,
+  const RhsScalar* rhs, Index rhsIncr,
+  ResScalar* res, Index resIncr,
   ResScalar alpha)
 {
   EIGEN_UNUSED_VARIABLE(rhsIncr);
@@ -332,7 +348,7 @@ EIGEN_DONT_INLINE static void run(
   const Index peels = 2;
   const Index RhsPacketAlignedMask = RhsPacketSize-1;
   const Index LhsPacketAlignedMask = LhsPacketSize-1;
-  const Index PeelAlignedMask = RhsPacketSize*peels-1;
+//   const Index PeelAlignedMask = RhsPacketSize*peels-1;
   const Index depth = cols;
 
   // How many coeffs of the result do we have to skip to be aligned.
@@ -340,7 +356,7 @@ EIGEN_DONT_INLINE static void run(
   // if that's not the case then vectorization is discarded, see below.
   Index alignedStart = internal::first_aligned(rhs, depth);
   Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
-  const Index peeledSize  = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
+  const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
 
   const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
   Index alignmentPattern = alignmentStep==0 ? AllAligned
@@ -430,10 +446,12 @@ EIGEN_DONT_INLINE static void run(
               _EIGEN_ACCUMULATE_PACKETS(d,du,d);
             break;
           case FirstAligned:
+          {
+            Index j = alignedStart;
             if (peels>1)
             {
               /* Here we proccess 4 rows with with two peeled iterations to hide
-               * tghe overhead of unaligned loads. Moreover unaligned loads are handled
+               * the overhead of unaligned loads. Moreover unaligned loads are handled
                * using special shift/move operations between the two aligned packets
                * overlaping the desired unaligned packet. This is *much* more efficient
                * than basic unaligned loads.
@@ -443,7 +461,7 @@ EIGEN_DONT_INLINE static void run(
               A02 = pload<LhsPacket>(&lhs2[alignedStart-2]);
               A03 = pload<LhsPacket>(&lhs3[alignedStart-3]);
 
-              for (Index j = alignedStart; j<peeledSize; j+=peels*RhsPacketSize)
+              for (; j<peeledSize; j+=peels*RhsPacketSize)
               {
                 RhsPacket b = pload<RhsPacket>(&rhs[j]);
                 A11 = pload<LhsPacket>(&lhs1[j-1+LhsPacketSize]);  palign<1>(A01,A11);
@@ -465,9 +483,10 @@ EIGEN_DONT_INLINE static void run(
                 ptmp3 = pcj.pmadd(A13, b, ptmp3);
               }
             }
-            for (Index j = peeledSize; j<alignedSize; j+=RhsPacketSize)
+            for (; j<alignedSize; j+=RhsPacketSize)
               _EIGEN_ACCUMULATE_PACKETS(d,du,du);
             break;
+          }
           default:
             for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
               _EIGEN_ACCUMULATE_PACKETS(du,du,du);
@@ -539,7 +558,6 @@ EIGEN_DONT_INLINE static void run(
 
   #undef _EIGEN_ACCUMULATE_PACKETS
 }
-};
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
index e9de6af3e..1cb9fe6b5 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
@@ -53,7 +53,7 @@ struct general_matrix_vector_product_gemv :
 #define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
 template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
 struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
   Index rows, Index cols, \
   const Scalar* lhs, Index lhsStride, \
   const Scalar* rhs, Index rhsIncr, \
@@ -70,7 +70,7 @@ static EIGEN_DONT_INLINE void run( \
 }; \
 template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
 struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
   Index rows, Index cols, \
   const Scalar* lhs, Index lhsStride, \
   const Scalar* rhs, Index rhsIncr, \
@@ -92,7 +92,7 @@ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,Conjugat
 { \
 typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
 \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
   Index rows, Index cols, \
   const EIGTYPE* lhs, Index lhsStride, \
   const EIGTYPE* rhs, Index rhsIncr, \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
index 48209636e..99cf9e0ae 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@@ -30,9 +30,9 @@ struct symm_pack_lhs
     for(Index k=i; k<i+BlockRows; k++)
     {
       for(Index w=0; w<h; w++)
-        blockA[count++] = conj(lhs(k, i+w)); // transposed
+        blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
 
-      blockA[count++] = real(lhs(k,k));   // real (diagonal)
+      blockA[count++] = numext::real(lhs(k,k));   // real (diagonal)
 
       for(Index w=h+1; w<BlockRows; w++)
         blockA[count++] = lhs(i+w, k);          // normal
@@ -41,7 +41,7 @@ struct symm_pack_lhs
     // transposed copy
     for(Index k=i+BlockRows; k<cols; k++)
       for(Index w=0; w<BlockRows; w++)
-        blockA[count++] = conj(lhs(k, i+w)); // transposed
+        blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
   }
   void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
   {
@@ -65,10 +65,10 @@ struct symm_pack_lhs
       for(Index k=0; k<i; k++)
         blockA[count++] = lhs(i, k);              // normal
 
-      blockA[count++] = real(lhs(i, i));       // real (diagonal)
+      blockA[count++] = numext::real(lhs(i, i));       // real (diagonal)
 
       for(Index k=i+1; k<cols; k++)
-        blockA[count++] = conj(lhs(k, i));     // transposed
+        blockA[count++] = numext::conj(lhs(k, i));     // transposed
     }
   }
 };
@@ -107,12 +107,12 @@ struct symm_pack_rhs
       // transpose
       for(Index k=k2; k<j2; k++)
       {
-        blockB[count+0] = conj(rhs(j2+0,k));
-        blockB[count+1] = conj(rhs(j2+1,k));
+        blockB[count+0] = numext::conj(rhs(j2+0,k));
+        blockB[count+1] = numext::conj(rhs(j2+1,k));
         if (nr==4)
         {
-          blockB[count+2] = conj(rhs(j2+2,k));
-          blockB[count+3] = conj(rhs(j2+3,k));
+          blockB[count+2] = numext::conj(rhs(j2+2,k));
+          blockB[count+3] = numext::conj(rhs(j2+3,k));
         }
         count += nr;
       }
@@ -124,11 +124,11 @@ struct symm_pack_rhs
         for (Index w=0 ; w<h; ++w)
           blockB[count+w] = rhs(k,j2+w);
 
-        blockB[count+h] = real(rhs(k,k));
+        blockB[count+h] = numext::real(rhs(k,k));
 
         // transpose
         for (Index w=h+1 ; w<nr; ++w)
-          blockB[count+w] = conj(rhs(j2+w,k));
+          blockB[count+w] = numext::conj(rhs(j2+w,k));
         count += nr;
         ++h;
       }
@@ -151,12 +151,12 @@ struct symm_pack_rhs
     {
       for(Index k=k2; k<end_k; k++)
       {
-        blockB[count+0] = conj(rhs(j2+0,k));
-        blockB[count+1] = conj(rhs(j2+1,k));
+        blockB[count+0] = numext::conj(rhs(j2+0,k));
+        blockB[count+1] = numext::conj(rhs(j2+1,k));
         if (nr==4)
         {
-          blockB[count+2] = conj(rhs(j2+2,k));
-          blockB[count+3] = conj(rhs(j2+3,k));
+          blockB[count+2] = numext::conj(rhs(j2+2,k));
+          blockB[count+3] = numext::conj(rhs(j2+3,k));
         }
         count += nr;
       }
@@ -169,13 +169,13 @@ struct symm_pack_rhs
       Index half = (std::min)(end_k,j2);
       for(Index k=k2; k<half; k++)
       {
-        blockB[count] = conj(rhs(j2,k));
+        blockB[count] = numext::conj(rhs(j2,k));
         count += 1;
       }
 
       if(half==j2 && half<k2+rows)
       {
-        blockB[count] = real(rhs(j2,j2));
+        blockB[count] = numext::real(rhs(j2,j2));
         count += 1;
       }
       else
@@ -211,7 +211,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
     const Scalar* lhs, Index lhsStride,
     const Scalar* rhs, Index rhsStride,
     Scalar* res,       Index resStride,
-    Scalar alpha)
+    const Scalar& alpha)
   {
     product_selfadjoint_matrix<Scalar, Index,
       EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@@ -234,7 +234,18 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha)
+    const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
+    Index rows, Index cols,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha)
   {
     Index size = rows;
 
@@ -301,7 +312,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
       }
     }
   }
-};
 
 // matrix * selfadjoint product
 template <typename Scalar, typename Index,
@@ -315,7 +325,18 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha)
+    const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
+    Index rows, Index cols,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha)
   {
     Index size = cols;
 
@@ -353,7 +374,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
       }
     }
   }
-};
 
 } // end namespace internal
 
@@ -383,7 +403,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
     RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
   };
 
-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
   {
     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
 
diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
index 4e5c4125c..dfa687fef 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
@@ -23,7 +23,7 @@
  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
+//
  ********************************************************************************
  *   Content : Eigen bindings to Intel(R) MKL
  *   Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
@@ -47,7 +47,7 @@ template <typename Index, \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
 {\
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index rows, Index cols, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
@@ -98,7 +98,7 @@ template <typename Index, \
           int RhsStorageOrder, bool ConjugateRhs> \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
 {\
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index rows, Index cols, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
@@ -174,7 +174,7 @@ template <typename Index, \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
 {\
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index rows, Index cols, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
@@ -224,7 +224,7 @@ template <typename Index, \
           int RhsStorageOrder, bool ConjugateRhs> \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
 {\
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index rows, Index cols, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector.h b/Eigen/src/Core/products/SelfadjointMatrixVector.h
index c3145c69a..f698f67f9 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@@ -32,10 +32,18 @@ static EIGEN_DONT_INLINE void run(
   const Scalar*  lhs, Index lhsStride,
   const Scalar* _rhs, Index rhsIncr,
   Scalar* res,
+  Scalar alpha);
+};
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
+  Index size,
+  const Scalar*  lhs, Index lhsStride,
+  const Scalar* _rhs, Index rhsIncr,
+  Scalar* res,
   Scalar alpha)
 {
   typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
   const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
 
   enum {
@@ -51,7 +59,7 @@ static EIGEN_DONT_INLINE void run(
   conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> pcj0;
   conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
 
-  Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha;
+  Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;
 
   // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
   // if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
@@ -71,8 +79,8 @@ static EIGEN_DONT_INLINE void run(
   for (Index j=FirstTriangular ? bound : 0;
        j<(FirstTriangular ? size : bound);j+=2)
   {
-    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
-    register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
+    const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+    const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
 
     Scalar t0 = cjAlpha * rhs[j];
     Packet ptmp0 = pset1<Packet>(t0);
@@ -90,8 +98,8 @@ static EIGEN_DONT_INLINE void run(
     size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
 
     // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
-    res[j]   += cjd.pmul(internal::real(A0[j]), t0);
-    res[j+1] += cjd.pmul(internal::real(A1[j+1]), t1);
+    res[j]   += cjd.pmul(numext::real(A0[j]), t0);
+    res[j+1] += cjd.pmul(numext::real(A1[j+1]), t1);
     if(FirstTriangular)
     {
       res[j]   += cj0.pmul(A1[j],   t1);
@@ -106,8 +114,8 @@ static EIGEN_DONT_INLINE void run(
     for (size_t i=starti; i<alignedStart; ++i)
     {
       res[i] += t0 * A0[i] + t1 * A1[i];
-      t2 += conj(A0[i]) * rhs[i];
-      t3 += conj(A1[i]) * rhs[i];
+      t2 += numext::conj(A0[i]) * rhs[i];
+      t3 += numext::conj(A1[i]) * rhs[i];
     }
     // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
     // gcc 4.2 does this optimization automatically.
@@ -139,12 +147,12 @@ static EIGEN_DONT_INLINE void run(
   }
   for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
   {
-    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+    const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
 
     Scalar t1 = cjAlpha * rhs[j];
     Scalar t2(0);
     // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
-    res[j] += cjd.pmul(internal::real(A0[j]), t1);
+    res[j] += cjd.pmul(numext::real(A0[j]), t1);
     for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
     {
       res[i] += cj0.pmul(A0[i], t1);
@@ -153,7 +161,6 @@ static EIGEN_DONT_INLINE void run(
     res[j] += alpha * t2;
   }
 }
-};
 
 } // end namespace internal 
 
@@ -180,7 +187,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
 
   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
   {
     typedef typename Dest::Scalar ResScalar;
     typedef typename Base::RhsScalar RhsScalar;
@@ -260,7 +267,7 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
 
   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
   {
     // let's simply transpose the product
     Transpose<Dest> destT(dest);
diff --git a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
index f88d483b6..86684b66d 100644
--- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
@@ -50,7 +50,7 @@ struct selfadjoint_matrix_vector_product_symv :
 #define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
 template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
 struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
   Index size, const Scalar*  lhs, Index lhsStride, \
   const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
     enum {\
@@ -77,7 +77,7 @@ struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,Co
 { \
 typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
 \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
 Index size, const EIGTYPE*  lhs, Index lhsStride, \
 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
 { \
diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h
index 6a55f3d77..6ca4ae6c0 100644
--- a/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/Eigen/src/Core/products/SelfadjointProduct.h
@@ -18,21 +18,19 @@
 
 namespace Eigen { 
 
-template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
-struct selfadjoint_rank1_update;
 
 template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
 struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
 {
-  static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
+  static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
   {
     internal::conj_if<ConjRhs> cj;
     typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;
-    typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjRhsType;
+    typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjLhsType;
     for (Index i=0; i<size; ++i)
     {
       Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))
-          += (alpha * cj(vec[i])) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
+          += (alpha * cj(vecY[i])) * ConjLhsType(OtherMap(vecX+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
     }
   }
 };
@@ -40,9 +38,9 @@ struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
 template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
 struct selfadjoint_rank1_update<Scalar,Index,RowMajor,UpLo,ConjLhs,ConjRhs>
 {
-  static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
+  static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
   {
-    selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vec,alpha);
+    selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vecY,vecX,alpha);
   }
 };
 
@@ -52,7 +50,7 @@ struct selfadjoint_product_selector;
 template<typename MatrixType, typename OtherType, int UpLo>
 struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
 {
-  static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
+  static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
   {
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::Index Index;
@@ -78,14 +76,14 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
     selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
                               OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
                             (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
-          ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha);
+          ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualOtherPtr, actualAlpha);
   }
 };
 
 template<typename MatrixType, typename OtherType, int UpLo>
 struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
 {
-  static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
+  static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
   {
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::Index Index;
@@ -113,7 +111,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
 template<typename MatrixType, unsigned int UpLo>
 template<typename DerivedU>
 SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha)
+::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
 {
   selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
 
diff --git a/Eigen/src/Core/products/SelfadjointRank2Update.h b/Eigen/src/Core/products/SelfadjointRank2Update.h
index 57a98cc2d..8594a97ce 100644
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -24,14 +24,14 @@ struct selfadjoint_rank2_update_selector;
 template<typename Scalar, typename Index, typename UType, typename VType>
 struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
 {
-  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
+  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
   {
     const Index size = u.size();
     for (Index i=0; i<size; ++i)
     {
       Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+i, size-i) +=
-                        (conj(alpha)  * conj(u.coeff(i))) * v.tail(size-i)
-                      + (alpha * conj(v.coeff(i))) * u.tail(size-i);
+                        (numext::conj(alpha) * numext::conj(u.coeff(i))) * v.tail(size-i)
+                      + (alpha * numext::conj(v.coeff(i))) * u.tail(size-i);
     }
   }
 };
@@ -39,13 +39,13 @@ struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
 template<typename Scalar, typename Index, typename UType, typename VType>
 struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
 {
-  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
+  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
   {
     const Index size = u.size();
     for (Index i=0; i<size; ++i)
       Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i, i+1) +=
-                        (conj(alpha)  * conj(u.coeff(i))) * v.head(i+1)
-                      + (alpha * conj(v.coeff(i))) * u.head(i+1);
+                        (numext::conj(alpha)  * numext::conj(u.coeff(i))) * v.head(i+1)
+                      + (alpha * numext::conj(v.coeff(i))) * u.head(i+1);
   }
 };
 
@@ -58,7 +58,7 @@ template<bool Cond, typename T> struct conj_expr_if
 template<typename MatrixType, unsigned int UpLo>
 template<typename DerivedU, typename DerivedV>
 SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha)
+::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
 {
   typedef internal::blas_traits<DerivedU> UBlasTraits;
   typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
@@ -75,9 +75,9 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
 
   enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
   Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived())
-                             * internal::conj(VBlasTraits::extractScalarFactor(v.derived()));
+                             * numext::conj(VBlasTraits::extractScalarFactor(v.derived()));
   if (IsRowMajor)
-    actualAlpha = internal::conj(actualAlpha);
+    actualAlpha = numext::conj(actualAlpha);
 
   internal::selfadjoint_rank2_update_selector<Scalar, Index,
     typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type,
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 92cba66f6..8110507b5 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -61,7 +61,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
     const Scalar* lhs, Index lhsStride,
     const Scalar* rhs, Index rhsStride,
     Scalar* res,       Index resStride,
-    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     product_triangular_matrix_matrix<Scalar, Index,
       (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
@@ -96,7 +96,20 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
+                                                        LhsStorageOrder,ConjugateLhs,
+                                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+    Index _rows, Index _cols, Index _depth,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     // strip zeros
     Index diagSize  = (std::min)(_rows,_depth);
@@ -203,15 +216,14 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
       }
     }
   }
-};
 
 // implements col-major += alpha * op(general) * op(triangular)
 template <typename Scalar, typename Index, int Mode,
           int LhsStorageOrder, bool ConjugateLhs,
           int RhsStorageOrder, bool ConjugateRhs, int Version>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
-                                           LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,ColMajor,Version>
+                                        LhsStorageOrder,ConjugateLhs,
+                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>
 {
   typedef gebp_traits<Scalar,Scalar> Traits;
   enum {
@@ -225,7 +237,20 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
     const Scalar* _lhs, Index lhsStride,
     const Scalar* _rhs, Index rhsStride,
     Scalar* res,        Index resStride,
-    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
+                                                        LhsStorageOrder,ConjugateLhs,
+                                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+    Index _rows, Index _cols, Index _depth,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
   {
     // strip zeros
     Index diagSize  = (std::min)(_cols,_depth);
@@ -343,7 +368,6 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
       }
     }
   }
-};
 
 /***************************************************************************
 * Wrapper to product_triangular_matrix_matrix
@@ -364,7 +388,7 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
 
   TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
   {
     typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
     typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
index 8173da5bb..ba41a1c99 100644
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
@@ -57,11 +57,11 @@ template <typename Index, int Mode, \
 struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
            LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
   static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
-    const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) { \
+    const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
       product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
         LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
         RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
-        _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
+        _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
   } \
 };
 
@@ -91,12 +91,12 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
     conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
   }; \
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index _rows, Index _cols, Index _depth, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
     EIGTYPE* res,        Index resStride, \
-    EIGTYPE alpha) \
+    EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
   { \
    Index diagSize  = (std::min)(_rows,_depth); \
    Index rows      = IsLower ? _rows : diagSize; \
@@ -115,16 +115,16 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
      /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
        product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
        LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
-           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
+           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
      /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
      } else { \
      /* Make sense to call GEMM */ \
        Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
        MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
        MKL_INT aStride = aa_tmp.outerStride(); \
-       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
+       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
        general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
-       rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, blocking, 0); \
+       rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
 \
      /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
      } \
@@ -205,12 +205,12 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
     conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
   }; \
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
     Index _rows, Index _cols, Index _depth, \
     const EIGTYPE* _lhs, Index lhsStride, \
     const EIGTYPE* _rhs, Index rhsStride, \
     EIGTYPE* res,        Index resStride, \
-    EIGTYPE alpha) \
+    EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
   { \
    Index diagSize  = (std::min)(_cols,_depth); \
    Index rows      = _rows; \
@@ -229,16 +229,16 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
      /* Most likely no benefit to call TRMM or GEMM from MKL*/ \
        product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
        LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
-           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha); \
+           _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
        /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
      } else { \
      /* Make sense to call GEMM */ \
        Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
        MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
        MKL_INT aStride = aa_tmp.outerStride(); \
-       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> blocking(_rows,_cols,_depth); \
+       gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
        general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
-       rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, blocking, 0); \
+       rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
 \
      /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
      } \
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h
index b1c10c201..6117d5a82 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -27,7 +27,13 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
     HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
   };
   static EIGEN_DONT_INLINE  void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
-                                     const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
+                                     const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
+  ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+        const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
   {
     static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
     Index size = (std::min)(_rows,_cols);
@@ -78,7 +84,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
           _res, resIncr, alpha);
     }
   }
-};
 
 template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
 struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
@@ -89,8 +94,14 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
     HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
     HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
   };
-  static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
-                  const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
+  static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+                                    const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
+  ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+        const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
   {
     static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
     Index diagSize = (std::min)(_rows,_cols);
@@ -141,7 +152,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
             &res.coeffRef(diagSize), resIncr, alpha);
     }
   }
-};
 
 /***************************************************************************
 * Wrapper to product_triangular_vector
@@ -171,7 +181,7 @@ struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
 
   TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
   {
     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
   
@@ -187,7 +197,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
 
   TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
 
-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
   {
     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
 
@@ -205,7 +215,7 @@ namespace internal {
 template<> struct trmv_selector<ColMajor>
 {
   template<int Mode, typename Lhs, typename Rhs, typename Dest>
-  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
+  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
   {
     typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
     typedef typename ProductType::Index Index;
@@ -235,7 +245,7 @@ template<> struct trmv_selector<ColMajor>
 
     gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
 
-    bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
+    bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
     bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
     
     RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
@@ -246,7 +256,7 @@ template<> struct trmv_selector<ColMajor>
     if(!evalToDest)
     {
       #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      int size = dest.size();
+      Index size = dest.size();
       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
       #endif
       if(!alphaIsCompatible)
@@ -281,7 +291,7 @@ template<> struct trmv_selector<ColMajor>
 template<> struct trmv_selector<RowMajor>
 {
   template<int Mode, typename Lhs, typename Rhs, typename Dest>
-  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
+  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
   {
     typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
     typedef typename ProductType::LhsScalar LhsScalar;
diff --git a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
index 3589b8c5e..09f110da7 100644
--- a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
@@ -50,7 +50,7 @@ struct triangular_matrix_vector_product_trmv :
 #define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
                                      const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
       triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
         _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
@@ -58,7 +58,7 @@ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs
 }; \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
                                      const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
       triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
         _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
@@ -81,12 +81,12 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
     IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
     LowUp = IsLower ? Lower : Upper \
   }; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
-                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
  { \
    if (ConjLhs || IsZeroDiag) { \
      triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
-       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha, blocking); \
+       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
      return; \
    }\
    Index size = (std::min)(_rows,_cols); \
@@ -166,12 +166,12 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
     IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
     LowUp = IsLower ? Lower : Upper \
   }; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
-                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
  { \
    if (IsZeroDiag) { \
      triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
-       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha, blocking); \
+       _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
      return; \
    }\
    Index size = (std::min)(_rows,_cols); \
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h
index a49ea3183..f103eae72 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@@ -18,7 +18,7 @@ namespace internal {
 template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
 struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
 {
-  static EIGEN_DONT_INLINE void run(
+  static void run(
     Index size, Index cols,
     const Scalar*  tri, Index triStride,
     Scalar* _other, Index otherStride,
@@ -42,6 +42,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
     Index size, Index otherSize,
     const Scalar* _tri, Index triStride,
     Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherStride,
     level3_blocking<Scalar,Scalar>& blocking)
   {
     Index cols = otherSize;
@@ -173,7 +180,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
       }
     }
   }
-};
 
 /* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
  */
@@ -184,6 +190,13 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
     Index size, Index otherSize,
     const Scalar* _tri, Index triStride,
     Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherStride,
     level3_blocking<Scalar,Scalar>& blocking)
   {
     Index rows = otherSize;
@@ -308,7 +321,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
       }
     }
   }
-};
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
index a4f508b2e..6a0bb8339 100644
--- a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
@@ -48,7 +48,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
     IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
     conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
   }; \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
       Index size, Index otherSize, \
       const EIGTYPE* _tri, Index triStride, \
       EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
@@ -103,7 +103,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
     IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
     conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
   }; \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
       Index size, Index otherSize, \
       const EIGTYPE* _tri, Index triStride, \
       EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
author	Carlos Hernandez <chernand@google.com>	2014-08-05 17:53:32 -0700
committer	Carlos Hernandez <chernand@google.com>	2014-08-05 17:54:05 -0700
commit	7faaa9f3f0df9d23790277834d426c3d992ac3ba (patch)
tree	b788ae3b96daf9f5a79d8ec434e1e9edd56b3a72 /Eigen/src/Core/products
parent	810535bb0c575a003b32076e5352ab8fd3f23a1c (diff)
download	eigen-7faaa9f3f0df9d23790277834d426c3d992ac3ba.tar.gz