diff options
Diffstat (limited to 'Eigen/src/Core/GeneralProduct.h')
-rw-r--r-- | Eigen/src/Core/GeneralProduct.h | 49 |
1 files changed, 30 insertions, 19 deletions
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 0f16cd8e3..6906aa75d 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -18,18 +18,33 @@ enum { Small = 3 }; +// Define the threshold value to fallback from the generic matrix-matrix product +// implementation (heavy) to the lightweight coeff-based product one. +// See generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> +// in products/GeneralMatrixMatrix.h for more details. +// TODO This threshold should also be used in the compile-time selector below. +#ifndef EIGEN_GEMM_TO_COEFFBASED_THRESHOLD +// This default value has been obtained on a Haswell architecture. +#define EIGEN_GEMM_TO_COEFFBASED_THRESHOLD 20 +#endif + namespace internal { template<int Rows, int Cols, int Depth> struct product_type_selector; template<int Size, int MaxSize> struct product_size_category { - enum { is_large = MaxSize == Dynamic || - Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || - (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), - value = is_large ? Large - : Size == 1 ? 1 - : Small + enum { + #ifndef EIGEN_GPU_COMPILE_PHASE + is_large = MaxSize == Dynamic || + Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || + (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), + #else + is_large = 0, + #endif + value = is_large ? Large + : Size == 1 ? 1 + : Small }; }; @@ -148,13 +163,13 @@ template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vect template<typename Scalar,int Size,int MaxSize> struct gemv_static_vector_if<Scalar,Size,MaxSize,false> { - EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; } }; template<typename Scalar,int Size> struct gemv_static_vector_if<Scalar,Size,Dynamic,true> { - EIGEN_STRONG_INLINE Scalar* data() { return 0; } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { return 0; } }; template<typename Scalar,int Size,int MaxSize> @@ -213,8 +228,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true> ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) - * RhsBlasTraits::extractScalarFactor(rhs); + ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs); // make sure Dest is a compile-time vector type (bug 1166) typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest; @@ -224,7 +238,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true> // on, the other hand it is good for the cache to pack the vector anyways... EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1), ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex), - MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal + MightCannotUseDest = ((!EvalToDestAtCompileTime) || ComplexByReal) && (ActualDest::MaxSizeAtCompileTime!=0) }; typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper; @@ -305,13 +319,12 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,true> typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) - * RhsBlasTraits::extractScalarFactor(rhs); + ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 // on, the other hand it is good for the cache to pack the vector anyways... - DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 || ActualRhsTypeCleaned::MaxSizeAtCompileTime==0 }; gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs; @@ -379,11 +392,10 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false> * * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*() */ -#ifndef __CUDACC__ - template<typename Derived> template<typename OtherDerived> -inline const Product<Derived, OtherDerived> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +const Product<Derived, OtherDerived> MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const { // A note regarding the function declaration: In MSVC, this function will sometimes @@ -412,8 +424,6 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const return Product<Derived, OtherDerived>(derived(), other.derived()); } -#endif // __CUDACC__ - /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. * * The returned product will behave like any other expressions: the coefficients of the product will be @@ -427,6 +437,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const */ template<typename Derived> template<typename OtherDerived> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Product<Derived,OtherDerived,LazyProduct> MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const { |