aboutsummaryrefslogtreecommitdiff
path: root/Eigen/src/Core/GeneralProduct.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/GeneralProduct.h')
-rw-r--r--Eigen/src/Core/GeneralProduct.h49
1 files changed, 30 insertions, 19 deletions
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index 0f16cd8e3..6906aa75d 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -18,18 +18,33 @@ enum {
Small = 3
};
+// Define the threshold value to fallback from the generic matrix-matrix product
+// implementation (heavy) to the lightweight coeff-based product one.
+// See generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
+// in products/GeneralMatrixMatrix.h for more details.
+// TODO This threshold should also be used in the compile-time selector below.
+#ifndef EIGEN_GEMM_TO_COEFFBASED_THRESHOLD
+// This default value has been obtained on a Haswell architecture.
+#define EIGEN_GEMM_TO_COEFFBASED_THRESHOLD 20
+#endif
+
namespace internal {
template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category
{
- enum { is_large = MaxSize == Dynamic ||
- Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
- (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
- value = is_large ? Large
- : Size == 1 ? 1
- : Small
+ enum {
+ #ifndef EIGEN_GPU_COMPILE_PHASE
+ is_large = MaxSize == Dynamic ||
+ Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
+ (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
+ #else
+ is_large = 0,
+ #endif
+ value = is_large ? Large
+ : Size == 1 ? 1
+ : Small
};
};
@@ -148,13 +163,13 @@ template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vect
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,false>
{
- EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; }
};
template<typename Scalar,int Size>
struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
{
- EIGEN_STRONG_INLINE Scalar* data() { return 0; }
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { return 0; }
};
template<typename Scalar,int Size,int MaxSize>
@@ -213,8 +228,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
- ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
- * RhsBlasTraits::extractScalarFactor(rhs);
+ ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);
// make sure Dest is a compile-time vector type (bug 1166)
typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
@@ -224,7 +238,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
- MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
+ MightCannotUseDest = ((!EvalToDestAtCompileTime) || ComplexByReal) && (ActualDest::MaxSizeAtCompileTime!=0)
};
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
@@ -305,13 +319,12 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
- ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
- * RhsBlasTraits::extractScalarFactor(rhs);
+ ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
- DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
+ DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 || ActualRhsTypeCleaned::MaxSizeAtCompileTime==0
};
gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
@@ -379,11 +392,10 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
*
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/
-#ifndef __CUDACC__
-
template<typename Derived>
template<typename OtherDerived>
-inline const Product<Derived, OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+const Product<Derived, OtherDerived>
MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
{
// A note regarding the function declaration: In MSVC, this function will sometimes
@@ -412,8 +424,6 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
return Product<Derived, OtherDerived>(derived(), other.derived());
}
-#endif // __CUDACC__
-
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
* The returned product will behave like any other expressions: the coefficients of the product will be
@@ -427,6 +437,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
*/
template<typename Derived>
template<typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Product<Derived,OtherDerived,LazyProduct>
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
{