diff options
Diffstat (limited to 'Eigen/src/Core/GeneralProduct.h')
-rw-r--r-- | Eigen/src/Core/GeneralProduct.h | 513 |
1 files changed, 166 insertions, 347 deletions
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index 9e805a80f..0f16cd8e3 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -11,29 +11,7 @@ #ifndef EIGEN_GENERAL_PRODUCT_H #define EIGEN_GENERAL_PRODUCT_H -namespace Eigen { - -/** \class GeneralProduct - * \ingroup Core_Module - * - * \brief Expression of the product of two general matrices or vectors - * - * \param LhsNested the type used to store the left-hand side - * \param RhsNested the type used to store the right-hand side - * \param ProductMode the type of the product - * - * This class represents an expression of the product of two general matrices. - * We call a general matrix, a dense matrix with full storage. For instance, - * This excludes triangular, selfadjoint, and sparse matrices. - * It is the return type of the operator* between general matrices. Its template - * arguments are determined automatically by ProductReturnType. Therefore, - * GeneralProduct should never be used direclty. To determine the result type of a - * function which involves a matrix product, use ProductReturnType::Type. - * - * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&) - */ -template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value> -class GeneralProduct; +namespace Eigen { enum { Large = 2, @@ -47,7 +25,8 @@ template<int Rows, int Cols, int Depth> struct product_type_selector; template<int Size, int MaxSize> struct product_size_category { enum { is_large = MaxSize == Dynamic || - Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD, + Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || + (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), value = is_large ? Large : Size == 1 ? 1 : Small @@ -59,15 +38,14 @@ template<typename Lhs, typename Rhs> struct product_type typedef typename remove_all<Lhs>::type _Lhs; typedef typename remove_all<Rhs>::type _Rhs; enum { - MaxRows = _Lhs::MaxRowsAtCompileTime, - Rows = _Lhs::RowsAtCompileTime, - MaxCols = _Rhs::MaxColsAtCompileTime, - Cols = _Rhs::ColsAtCompileTime, - MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime, - _Rhs::MaxRowsAtCompileTime), - Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, - _Rhs::RowsAtCompileTime), - LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + MaxRows = traits<_Lhs>::MaxRowsAtCompileTime, + Rows = traits<_Lhs>::RowsAtCompileTime, + MaxCols = traits<_Rhs>::MaxColsAtCompileTime, + Cols = traits<_Rhs>::ColsAtCompileTime, + MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime, + traits<_Rhs>::MaxRowsAtCompileTime), + Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime, + traits<_Rhs>::RowsAtCompileTime) }; // the splitting into different lines of code here, introducing the _select enums and the typedef below, @@ -82,7 +60,8 @@ private: public: enum { - value = selector::ret + value = selector::ret, + ret = selector::ret }; #ifdef EIGEN_DEBUG_PRODUCT static void debug() @@ -98,12 +77,13 @@ public: #endif }; - /* The following allows to select the kind of product at compile time * based on the three dimensions of the product. * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ // FIXME I'm not sure the current mapping is the ideal one. template<int M, int N> struct product_type_selector<M,N,1> { enum { ret = OuterProduct }; }; +template<int M> struct product_type_selector<M, 1, 1> { enum { ret = LazyCoeffBasedProductMode }; }; +template<int N> struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; }; template<int Depth> struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; }; template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; }; template<> struct product_type_selector<Small,1, Small> { enum { ret = CoeffBasedProductMode }; }; @@ -122,60 +102,12 @@ template<> struct product_type_selector<Small,Small,Large> { enum template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; }; -template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; }; -template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; }; +template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; }; template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; }; } // end namespace internal -/** \class ProductReturnType - * \ingroup Core_Module - * - * \brief Helper class to get the correct and optimized returned type of operator* - * - * \param Lhs the type of the left-hand side - * \param Rhs the type of the right-hand side - * \param ProductMode the type of the product (determined automatically by internal::product_mode) - * - * This class defines the typename Type representing the optimized product expression - * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type - * is the recommended way to define the result type of a function returning an expression - * which involve a matrix product. The class Product should never be - * used directly. - * - * \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&) - */ -template<typename Lhs, typename Rhs, int ProductType> -struct ProductReturnType -{ - // TODO use the nested type to reduce instanciations ???? -// typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; -// typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; - - typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type; -}; - -template<typename Lhs, typename Rhs> -struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode> -{ - typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested; - typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested; - typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type; -}; - -template<typename Lhs, typename Rhs> -struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode> -{ - typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested; - typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested; - typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type; -}; - -// this is a workaround for sun CC -template<typename Lhs, typename Rhs> -struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode> -{}; - /*********************************************************************** * Implementation of Inner Vector Vector Product ***********************************************************************/ @@ -187,119 +119,10 @@ struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedPr // product ends up to a row-vector times col-vector product... To tackle this use // case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x); -namespace internal { - -template<typename Lhs, typename Rhs> -struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> > - : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> > -{}; - -} - -template<typename Lhs, typename Rhs> -class GeneralProduct<Lhs, Rhs, InnerProduct> - : internal::no_assignment_operator, - public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> -{ - typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base; - public: - GeneralProduct(const Lhs& lhs, const Rhs& rhs) - { - EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); - } - - /** Convertion to scalar */ - operator const typename Base::Scalar() const { - return Base::coeff(0,0); - } -}; - /*********************************************************************** * Implementation of Outer Vector Vector Product ***********************************************************************/ -namespace internal { - -// Column major -template<typename ProductType, typename Dest, typename Func> -EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&) -{ - typedef typename Dest::Index Index; - // FIXME make sure lhs is sequentially stored - // FIXME not very good if rhs is real and lhs complex while alpha is real too - const Index cols = dest.cols(); - for (Index j=0; j<cols; ++j) - func(dest.col(j), prod.rhs().coeff(0,j) * prod.lhs()); -} - -// Row major -template<typename ProductType, typename Dest, typename Func> -EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) { - typedef typename Dest::Index Index; - // FIXME make sure rhs is sequentially stored - // FIXME not very good if lhs is real and rhs complex while alpha is real too - const Index rows = dest.rows(); - for (Index i=0; i<rows; ++i) - func(dest.row(i), prod.lhs().coeff(i,0) * prod.rhs()); -} - -template<typename Lhs, typename Rhs> -struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> > - : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> > -{}; - -} - -template<typename Lhs, typename Rhs> -class GeneralProduct<Lhs, Rhs, OuterProduct> - : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> -{ - template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; - - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - { - EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - } - - struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; - struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; - struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; - struct adds { - Scalar m_scale; - adds(const Scalar& s) : m_scale(s) {} - template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { - dst.const_cast_derived() += m_scale * src; - } - }; - - template<typename Dest> - inline void evalTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>()); - } - - template<typename Dest> - inline void addTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>()); - } - - template<typename Dest> - inline void subTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>()); - } - - template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const - { - internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>()); - } -}; - /*********************************************************************** * Implementation of General Matrix Vector Product ***********************************************************************/ @@ -313,60 +136,13 @@ class GeneralProduct<Lhs, Rhs, OuterProduct> */ namespace internal { -template<typename Lhs, typename Rhs> -struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> > - : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> > -{}; - template<int Side, int StorageOrder, bool BlasCompatible> -struct gemv_selector; +struct gemv_dense_selector; } // end namespace internal -template<typename Lhs, typename Rhs> -class GeneralProduct<Lhs, Rhs, GemvProduct> - : public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> -{ - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - typedef typename Lhs::Scalar LhsScalar; - typedef typename Rhs::Scalar RhsScalar; - - GeneralProduct(const Lhs& a_lhs, const Rhs& a_rhs) : Base(a_lhs,a_rhs) - { -// EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value), -// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - } - - enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; - typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType; - - template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const - { - eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols()); - internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor, - bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha); - } -}; - namespace internal { -// The vector is on the left => transposition -template<int StorageOrder, bool BlasCompatible> -struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible> -{ - template<typename ProductType, typename Dest> - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) - { - Transpose<Dest> destT(dest); - enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; - gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible> - ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct> - (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha); - } -}; - template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if; template<typename Scalar,int Size,int MaxSize> @@ -384,126 +160,161 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true> template<typename Scalar,int Size,int MaxSize> struct gemv_static_vector_if<Scalar,Size,MaxSize,true> { - #if EIGEN_ALIGN_STATICALLY - internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data; - EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } - #else - // Some architectures cannot align on the stack, - // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. enum { ForceAlignment = internal::packet_traits<Scalar>::Vectorizable, PacketSize = internal::packet_traits<Scalar>::size }; - internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data; + #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 + internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data; + EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } + #else + // Some architectures cannot align on the stack, + // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. + internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data; EIGEN_STRONG_INLINE Scalar* data() { return ForceAlignment - ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16) + ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) : m_data.array; } #endif }; -template<> struct gemv_selector<OnTheRight,ColMajor,true> +// The vector is on the left => transposition +template<int StorageOrder, bool BlasCompatible> +struct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible> { - template<typename ProductType, typename Dest> - static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template<typename Lhs, typename Rhs, typename Dest> + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename ProductType::Index Index; - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::RealScalar RealScalar; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; - typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; - - ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); - ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); - - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + Transpose<Dest> destT(dest); + enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; + gemv_dense_selector<OnTheRight,OtherStorageOrder,BlasCompatible> + ::run(rhs.transpose(), lhs.transpose(), destT, alpha); + } +}; + +template<> struct gemv_dense_selector<OnTheRight,ColMajor,true> +{ + template<typename Lhs, typename Rhs, typename Dest> + static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + + typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest; + + ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); + ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); + + // make sure Dest is a compile-time vector type (bug 1166) + typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest; enum { // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 // on, the other hand it is good for the cache to pack the vector anyways... - EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1, + EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1), ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex), - MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal + MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal }; - gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest; - - bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); - bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; - + typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper; + typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper; RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha); - ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), - evalToDest ? dest.data() : static_dest.data()); - - if(!evalToDest) + if(!MightCannotUseDest) { - #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = dest.size(); - EIGEN_DENSE_STORAGE_CTOR_PLUGIN - #endif - if(!alphaIsCompatible) + // shortcut if we are sure to be able to use dest directly, + // this ease the compiler to generate cleaner and more optimzized code for most common cases + general_matrix_vector_product + <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + dest.data(), 1, + compatibleAlpha); + } + else + { + gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest; + + const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); + const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + + ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), + evalToDest ? dest.data() : static_dest.data()); + + if(!evalToDest) { - MappedDest(actualDestPtr, dest.size()).setZero(); - compatibleAlpha = RhsScalar(1); + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + Index size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + if(!alphaIsCompatible) + { + MappedDest(actualDestPtr, dest.size()).setZero(); + compatibleAlpha = RhsScalar(1); + } + else + MappedDest(actualDestPtr, dest.size()) = dest; } - else - MappedDest(actualDestPtr, dest.size()) = dest; - } - general_matrix_vector_product - <Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run( - actualLhs.rows(), actualLhs.cols(), - actualLhs.data(), actualLhs.outerStride(), - actualRhs.data(), actualRhs.innerStride(), - actualDestPtr, 1, - compatibleAlpha); + general_matrix_vector_product + <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + actualDestPtr, 1, + compatibleAlpha); - if (!evalToDest) - { - if(!alphaIsCompatible) - dest += actualAlpha * MappedDest(actualDestPtr, dest.size()); - else - dest = MappedDest(actualDestPtr, dest.size()); + if (!evalToDest) + { + if(!alphaIsCompatible) + dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size()); + else + dest = MappedDest(actualDestPtr, dest.size()); + } } } }; -template<> struct gemv_selector<OnTheRight,RowMajor,true> +template<> struct gemv_dense_selector<OnTheRight,RowMajor,true> { - template<typename ProductType, typename Dest> - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template<typename Lhs, typename Rhs, typename Dest> + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::Index Index; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::_ActualRhsType _ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; - - typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); - - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits<Lhs> LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits<Rhs> RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned; + + typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); + typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 // on, the other hand it is good for the cache to pack the vector anyways... - DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1 + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 }; - gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs; + gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs; ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data()); @@ -511,45 +322,48 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true> if(!DirectlyUseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = actualRhs.size(); + Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif - Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; + Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; } + typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper; + typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper; general_matrix_vector_product - <Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run( + <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run( actualLhs.rows(), actualLhs.cols(), - actualLhs.data(), actualLhs.outerStride(), - actualRhsPtr, 1, - dest.data(), dest.innerStride(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhsPtr, 1), + dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166) actualAlpha); } }; -template<> struct gemv_selector<OnTheRight,ColMajor,false> +template<> struct gemv_dense_selector<OnTheRight,ColMajor,false> { - template<typename ProductType, typename Dest> - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template<typename Lhs, typename Rhs, typename Dest> + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename Dest::Index Index; - // TODO makes sure dest is sequentially stored in memory, otherwise use a temp - const Index size = prod.rhs().rows(); + EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); + // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp + typename nested_eval<Rhs,1>::type actual_rhs(rhs); + const Index size = rhs.rows(); for(Index k=0; k<size; ++k) - dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k); + dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k); } }; -template<> struct gemv_selector<OnTheRight,RowMajor,false> +template<> struct gemv_dense_selector<OnTheRight,RowMajor,false> { - template<typename ProductType, typename Dest> - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template<typename Lhs, typename Rhs, typename Dest> + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename Dest::Index Index; - // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp - const Index rows = prod.rows(); + EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); + typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs); + const Index rows = dest.rows(); for(Index i=0; i<rows; ++i) - dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum(); + dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum(); } }; @@ -565,9 +379,11 @@ template<> struct gemv_selector<OnTheRight,RowMajor,false> * * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*() */ +#ifndef __CUDACC__ + template<typename Derived> template<typename OtherDerived> -inline const typename ProductReturnType<Derived, OtherDerived>::Type +inline const Product<Derived, OtherDerived> MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const { // A note regarding the function declaration: In MSVC, this function will sometimes @@ -592,9 +408,12 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const #ifdef EIGEN_DEBUG_PRODUCT internal::product_type<Derived,OtherDerived>::debug(); #endif - return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); + + return Product<Derived, OtherDerived>(derived(), other.derived()); } +#endif // __CUDACC__ + /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. * * The returned product will behave like any other expressions: the coefficients of the product will be @@ -608,7 +427,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const */ template<typename Derived> template<typename OtherDerived> -const typename LazyProductReturnType<Derived,OtherDerived>::Type +const Product<Derived,OtherDerived,LazyProduct> MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const { enum { @@ -627,7 +446,7 @@ MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) - return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived()); + return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived()); } } // end namespace Eigen |