diff options
author | Yi Kong <yikong@google.com> | 2022-02-25 17:02:53 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2022-02-25 17:02:53 +0000 |
commit | edb0ad5bb04b48aab7dd0978f0475edd3550de7c (patch) | |
tree | fb979fb4cf4f8052c8cc66b1ec9516d91fcd859b /Eigen/src/Core/CoreEvaluators.h | |
parent | 8fd413e275f78a4c240f1442ce5cf77c73a20a55 (diff) | |
parent | bc0f5df265caa21a2120c22453655a7fcc941991 (diff) | |
download | eigen-7ecd5586f9218fe07c97ede8e405561aca973d78.tar.gz |
Merge changes Iee153445,Iee274471 am: 79df15ea88 am: 10f298fc41 am: 7cb5001398 am: bc0f5df265aml_uwb_331910010aml_uwb_331820070aml_uwb_331613010aml_uwb_331611010aml_uwb_331410010aml_uwb_331310030aml_uwb_331115000aml_uwb_331015040aml_uwb_330810010aml_tz4_332714070aml_tz4_332714050aml_tz4_332714010aml_tz4_331910000aml_tz4_331314030aml_tz4_331314020aml_tz4_331314010aml_tz4_331012050aml_tz4_331012040aml_tz4_331012000aml_ase_331311020aml_ase_331112000aml_ase_331011020android13-mainline-uwb-releaseandroid13-mainline-tzdata4-releaseandroid13-mainline-appsearch-releaseaml_tz4_332714010
Original change: https://android-review.googlesource.com/c/platform/external/eigen/+/1999079
Change-Id: Ife39d10c8b23d3eeb174cd52f462f9d20527ad03
Diffstat (limited to 'Eigen/src/Core/CoreEvaluators.h')
-rw-r--r-- | Eigen/src/Core/CoreEvaluators.h | 614 |
1 files changed, 342 insertions, 272 deletions
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index f7c1effca..0ff8c8deb 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -14,7 +14,7 @@ #define EIGEN_COREEVALUATORS_H namespace Eigen { - + namespace internal { // This class returns the evaluator kind from the expression storage kind. @@ -63,8 +63,8 @@ template< typename T, template< typename T, typename Kind = typename evaluator_traits<typename T::NestedExpression>::Kind, typename Scalar = typename T::Scalar> struct unary_evaluator; - -// evaluator_traits<T> contains traits for evaluator<T> + +// evaluator_traits<T> contains traits for evaluator<T> template<typename T> struct evaluator_traits_base @@ -90,7 +90,8 @@ template<typename T> struct evaluator : public unary_evaluator<T> { typedef unary_evaluator<T> Base; - EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit evaluator(const T& xpr) : Base(xpr) {} }; @@ -99,21 +100,29 @@ template<typename T> struct evaluator<const T> : evaluator<T> { - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const T& xpr) : evaluator<T>(xpr) {} }; // ---------- base class for all evaluators ---------- template<typename ExpressionType> -struct evaluator_base : public noncopyable +struct evaluator_base { // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits<ExpressionType> ExpressionTraits; - + enum { Alignment = 0 }; + // noncopyable: + // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization) + // and make complex evaluator much larger than then should do. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator_base() {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() {} +private: + EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&); + EIGEN_DEVICE_FUNC const evaluator_base& operator=(const evaluator_base&); }; // -------------------- Matrix and Array -------------------- @@ -123,6 +132,33 @@ struct evaluator_base : public noncopyable // Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, // so no need for more sophisticated dispatching. +// this helper permits to completely eliminate m_outerStride if it is known at compiletime. +template<typename Scalar,int OuterStride> class plainobjectbase_evaluator_data { +public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr) + { +#ifndef EIGEN_INTERNAL_DEBUGGING + EIGEN_UNUSED_VARIABLE(outerStride); +#endif + eigen_internal_assert(outerStride==OuterStride); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + Index outerStride() const EIGEN_NOEXCEPT { return OuterStride; } + const Scalar *data; +}; + +template<typename Scalar> class plainobjectbase_evaluator_data<Scalar,Dynamic> { +public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index outerStride() const { return m_outerStride; } + const Scalar *data; +protected: + Index m_outerStride; +}; + template<typename Derived> struct evaluator<PlainObjectBase<Derived> > : evaluator_base<Derived> @@ -136,23 +172,28 @@ struct evaluator<PlainObjectBase<Derived> > IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime, RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, - + CoeffReadCost = NumTraits<Scalar>::ReadCost, Flags = traits<Derived>::EvaluatorFlags, Alignment = traits<Derived>::Alignment }; - - EIGEN_DEVICE_FUNC evaluator() - : m_data(0), - m_outerStride(IsVectorAtCompileTime ? 0 - : int(IsRowMajor) ? ColsAtCompileTime - : RowsAtCompileTime) + enum { + // We do not need to know the outer stride for vectors + OuterStrideAtCompileTime = IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + evaluator() + : m_d(0,OuterStrideAtCompileTime) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - - EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) - : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit evaluator(const PlainObjectType& m) + : m_d(m.data(),IsVectorAtCompileTime ? 0 : m.outerStride()) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } @@ -161,30 +202,30 @@ struct evaluator<PlainObjectBase<Derived> > CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) - return m_data[row * m_outerStride.value() + col]; + return m_d.data[row * m_d.outerStride() + col]; else - return m_data[row + col * m_outerStride.value()]; + return m_d.data[row + col * m_d.outerStride()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_data[index]; + return m_d.data[index]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) - return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col]; + return const_cast<Scalar*>(m_d.data)[row * m_d.outerStride() + col]; else - return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()]; + return const_cast<Scalar*>(m_d.data)[row + col * m_d.outerStride()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return const_cast<Scalar*>(m_data)[index]; + return const_cast<Scalar*>(m_d.data)[index]; } template<int LoadMode, typename PacketType> @@ -192,16 +233,16 @@ struct evaluator<PlainObjectBase<Derived> > PacketType packet(Index row, Index col) const { if (IsRowMajor) - return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col); + return ploadt<PacketType, LoadMode>(m_d.data + row * m_d.outerStride() + col); else - return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value()); + return ploadt<PacketType, LoadMode>(m_d.data + row + col * m_d.outerStride()); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return ploadt<PacketType, LoadMode>(m_data + index); + return ploadt<PacketType, LoadMode>(m_d.data + index); } template<int StoreMode,typename PacketType> @@ -210,26 +251,22 @@ struct evaluator<PlainObjectBase<Derived> > { if (IsRowMajor) return pstoret<Scalar, PacketType, StoreMode> - (const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x); + (const_cast<Scalar*>(m_d.data) + row * m_d.outerStride() + col, x); else return pstoret<Scalar, PacketType, StoreMode> - (const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x); + (const_cast<Scalar*>(m_d.data) + row + col * m_d.outerStride(), x); } template<int StoreMode, typename PacketType> EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x); + return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + index, x); } protected: - const Scalar *m_data; - // We do not need to know the outer stride for vectors - variable_if_dynamic<Index, IsVectorAtCompileTime ? 0 - : int(IsRowMajor) ? ColsAtCompileTime - : RowsAtCompileTime> m_outerStride; + plainobjectbase_evaluator_data<Scalar,OuterStrideAtCompileTime> m_d; }; template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> @@ -237,11 +274,13 @@ struct evaluator<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > : evaluator<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > > { typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType; - - EIGEN_DEVICE_FUNC evaluator() {} - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) - : evaluator<PlainObjectBase<XprType> >(m) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + evaluator() {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit evaluator(const XprType& m) + : evaluator<PlainObjectBase<XprType> >(m) { } }; @@ -251,10 +290,12 @@ struct evaluator<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > { typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType; - EIGEN_DEVICE_FUNC evaluator() {} - - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) - : evaluator<PlainObjectBase<XprType> >(m) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + evaluator() {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit evaluator(const XprType& m) + : evaluator<PlainObjectBase<XprType> >(m) { } }; @@ -265,14 +306,15 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased> : evaluator_base<Transpose<ArgType> > { typedef Transpose<ArgType> XprType; - + enum { - CoeffReadCost = evaluator<ArgType>::CoeffReadCost, + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, Flags = evaluator<ArgType>::Flags ^ RowMajorBit, Alignment = evaluator<ArgType>::Alignment }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -457,10 +499,10 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> > { typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType; typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned; - + enum { CoeffReadCost = internal::functor_traits<NullaryOp>::Cost, - + Flags = (evaluator<PlainObjectTypeCleaned>::Flags & ( HereditaryBits | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0) @@ -517,19 +559,17 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > : evaluator_base<CwiseUnaryOp<UnaryOp, ArgType> > { typedef CwiseUnaryOp<UnaryOp, ArgType> XprType; - + enum { - CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost, - + CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost), + Flags = evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)), Alignment = evaluator<ArgType>::Alignment }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - explicit unary_evaluator(const XprType& op) - : m_functor(op.functor()), - m_argImpl(op.nestedExpression()) + explicit unary_evaluator(const XprType& op) : m_d(op) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -540,32 +580,43 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_functor(m_argImpl.coeff(row, col)); + return m_d.func()(m_d.argImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_functor(m_argImpl.coeff(index)); + return m_d.func()(m_d.argImpl.coeff(index)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col)); + return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(row, col)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index)); + return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(index)); } protected: - const UnaryOp m_functor; - evaluator<ArgType> m_argImpl; + + // this helper permits to completely eliminate the functor if it is empty + struct Data + { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const UnaryOp& func() const { return op; } + UnaryOp op; + evaluator<ArgType> argImpl; + }; + + Data m_d; }; // -------------------- CwiseTernaryOp -------------------- @@ -577,7 +628,7 @@ struct evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > { typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType; typedef ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > Base; - + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} }; @@ -586,10 +637,10 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased : evaluator_base<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > { typedef CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> XprType; - + enum { - CoeffReadCost = evaluator<Arg1>::CoeffReadCost + evaluator<Arg2>::CoeffReadCost + evaluator<Arg3>::CoeffReadCost + functor_traits<TernaryOp>::Cost, - + CoeffReadCost = int(evaluator<Arg1>::CoeffReadCost) + int(evaluator<Arg2>::CoeffReadCost) + int(evaluator<Arg3>::CoeffReadCost) + int(functor_traits<TernaryOp>::Cost), + Arg1Flags = evaluator<Arg1>::Flags, Arg2Flags = evaluator<Arg2>::Flags, Arg3Flags = evaluator<Arg3>::Flags, @@ -609,11 +660,7 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased evaluator<Arg3>::Alignment) }; - EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) - : m_functor(xpr.functor()), - m_arg1Impl(xpr.arg1()), - m_arg2Impl(xpr.arg2()), - m_arg3Impl(xpr.arg3()) + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -624,38 +671,48 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col)); + return m_d.func()(m_d.arg1Impl.coeff(row, col), m_d.arg2Impl.coeff(row, col), m_d.arg3Impl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + return m_d.func()(m_d.arg1Impl.coeff(index), m_d.arg2Impl.coeff(index), m_d.arg3Impl.coeff(index)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(row, col), - m_arg2Impl.template packet<LoadMode,PacketType>(row, col), - m_arg3Impl.template packet<LoadMode,PacketType>(row, col)); + return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(row, col), + m_d.arg2Impl.template packet<LoadMode,PacketType>(row, col), + m_d.arg3Impl.template packet<LoadMode,PacketType>(row, col)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(index), - m_arg2Impl.template packet<LoadMode,PacketType>(index), - m_arg3Impl.template packet<LoadMode,PacketType>(index)); + return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(index), + m_d.arg2Impl.template packet<LoadMode,PacketType>(index), + m_d.arg3Impl.template packet<LoadMode,PacketType>(index)); } protected: - const TernaryOp m_functor; - evaluator<Arg1> m_arg1Impl; - evaluator<Arg2> m_arg2Impl; - evaluator<Arg3> m_arg3Impl; + // this helper permits to completely eliminate the functor if it is empty + struct Data + { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Data(const XprType& xpr) : op(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TernaryOp& func() const { return op; } + TernaryOp op; + evaluator<Arg1> arg1Impl; + evaluator<Arg2> arg2Impl; + evaluator<Arg3> arg3Impl; + }; + + Data m_d; }; // -------------------- CwiseBinaryOp -------------------- @@ -667,8 +724,9 @@ struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > { typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; typedef binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > Base; - - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit evaluator(const XprType& xpr) : Base(xpr) {} }; template<typename BinaryOp, typename Lhs, typename Rhs> @@ -676,10 +734,10 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > { typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType; - + enum { - CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, - + CoeffReadCost = int(evaluator<Lhs>::CoeffReadCost) + int(evaluator<Rhs>::CoeffReadCost) + int(functor_traits<BinaryOp>::Cost), + LhsFlags = evaluator<Lhs>::Flags, RhsFlags = evaluator<Rhs>::Flags, SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value, @@ -696,10 +754,8 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment) }; - EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) - : m_functor(xpr.functor()), - m_lhsImpl(xpr.lhs()), - m_rhsImpl(xpr.rhs()) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit binary_evaluator(const XprType& xpr) : m_d(xpr) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -710,35 +766,46 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); + return m_d.func()(m_d.lhsImpl.coeff(row, col), m_d.rhsImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); + return m_d.func()(m_d.lhsImpl.coeff(index), m_d.rhsImpl.coeff(index)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col), - m_rhsImpl.template packet<LoadMode,PacketType>(row, col)); + return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(row, col), + m_d.rhsImpl.template packet<LoadMode,PacketType>(row, col)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index), - m_rhsImpl.template packet<LoadMode,PacketType>(index)); + return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(index), + m_d.rhsImpl.template packet<LoadMode,PacketType>(index)); } protected: - const BinaryOp m_functor; - evaluator<Lhs> m_lhsImpl; - evaluator<Rhs> m_rhsImpl; + + // this helper permits to completely eliminate the functor if it is empty + struct Data + { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Data(const XprType& xpr) : op(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const BinaryOp& func() const { return op; } + BinaryOp op; + evaluator<Lhs> lhsImpl; + evaluator<Rhs> rhsImpl; + }; + + Data m_d; }; // -------------------- CwiseUnaryView -------------------- @@ -748,18 +815,16 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> : evaluator_base<CwiseUnaryView<UnaryOp, ArgType> > { typedef CwiseUnaryView<UnaryOp, ArgType> XprType; - + enum { - CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost, - + CoeffReadCost = int(evaluator<ArgType>::CoeffReadCost) + int(functor_traits<UnaryOp>::Cost), + Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)), - + Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) - : m_unaryOp(op.functor()), - m_argImpl(op.nestedExpression()) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_d(op) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -771,30 +836,41 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_unaryOp(m_argImpl.coeff(row, col)); + return m_d.func()(m_d.argImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_unaryOp(m_argImpl.coeff(index)); + return m_d.func()(m_d.argImpl.coeff(index)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { - return m_unaryOp(m_argImpl.coeffRef(row, col)); + return m_d.func()(m_d.argImpl.coeffRef(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return m_unaryOp(m_argImpl.coeffRef(index)); + return m_d.func()(m_d.argImpl.coeffRef(index)); } protected: - const UnaryOp m_unaryOp; - evaluator<ArgType> m_argImpl; + + // this helper permits to completely eliminate the functor if it is empty + struct Data + { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const UnaryOp& func() const { return op; } + UnaryOp op; + evaluator<ArgType> argImpl; + }; + + Data m_d; }; // -------------------- Map -------------------- @@ -811,14 +887,15 @@ struct mapbase_evaluator : evaluator_base<Derived> typedef typename XprType::PointerType PointerType; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - + enum { IsRowMajor = XprType::RowsAtCompileTime, ColsAtCompileTime = XprType::ColsAtCompileTime, CoeffReadCost = NumTraits<Scalar>::ReadCost }; - EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit mapbase_evaluator(const XprType& map) : m_data(const_cast<PointerType>(map.data())), m_innerStride(map.innerStride()), m_outerStride(map.outerStride()) @@ -882,17 +959,21 @@ struct mapbase_evaluator : evaluator_base<Derived> internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x); } protected: - EIGEN_DEVICE_FUNC - inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); } - EIGEN_DEVICE_FUNC - inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + Index rowStride() const EIGEN_NOEXCEPT { + return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + Index colStride() const EIGEN_NOEXCEPT { + return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); + } PointerType m_data; const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride; const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride; }; -template<typename PlainObjectType, int MapOptions, typename StrideType> +template<typename PlainObjectType, int MapOptions, typename StrideType> struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > : public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType> { @@ -900,7 +981,7 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > typedef typename XprType::Scalar Scalar; // TODO: should check for smaller packet types once we can handle multi-sized packet types typedef typename packet_traits<Scalar>::type PacketScalar; - + enum { InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 ? int(PlainObjectType::InnerStrideAtCompileTime) @@ -912,34 +993,35 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> > HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, HasNoStride = HasNoInnerStride && HasNoOuterStride, IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, - + PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit), LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit), Flags = int( evaluator<PlainObjectType>::Flags) & (LinearAccessMask&PacketAccessMask), - + Alignment = int(MapOptions)&int(AlignedMask) }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) - : mapbase_evaluator<XprType, PlainObjectType>(map) + : mapbase_evaluator<XprType, PlainObjectType>(map) { } }; // -------------------- Ref -------------------- -template<typename PlainObjectType, int RefOptions, typename StrideType> +template<typename PlainObjectType, int RefOptions, typename StrideType> struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> > : public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType> { typedef Ref<PlainObjectType, RefOptions, StrideType> XprType; - + enum { Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags, Alignment = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Alignment }; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref) - : mapbase_evaluator<XprType, PlainObjectType>(ref) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit evaluator(const XprType& ref) + : mapbase_evaluator<XprType, PlainObjectType>(ref) { } }; @@ -947,8 +1029,8 @@ struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> > template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess = internal::has_direct_access<ArgType>::ret> struct block_evaluator; - -template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> + +template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > : block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> { @@ -956,15 +1038,15 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > typedef typename XprType::Scalar Scalar; // TODO: should check for smaller packet types once we can handle multi-sized packet types typedef typename packet_traits<Scalar>::type PacketScalar; - + enum { CoeffReadCost = evaluator<ArgType>::CoeffReadCost, - + RowsAtCompileTime = traits<XprType>::RowsAtCompileTime, ColsAtCompileTime = traits<XprType>::ColsAtCompileTime, MaxRowsAtCompileTime = traits<XprType>::MaxRowsAtCompileTime, MaxColsAtCompileTime = traits<XprType>::MaxColsAtCompileTime, - + ArgTypeIsRowMajor = (int(evaluator<ArgType>::Flags)&RowMajorBit) != 0, IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 @@ -977,21 +1059,24 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > OuterStrideAtCompileTime = HasSameStorageOrderAsArgType ? int(outer_stride_at_compile_time<ArgType>::ret) : int(inner_stride_at_compile_time<ArgType>::ret), - MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, - - FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0, + + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsRowMajorBit = XprType::Flags&RowMajorBit, Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) | DirectAccessBit | MaskPacketAccessBit), Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, - + PacketAlignment = unpacket_traits<PacketScalar>::alignment, - Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) + && (OuterStrideAtCompileTime!=0) + && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0) }; typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit evaluator(const XprType& block) : block_evaluator_type(block) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } @@ -1004,8 +1089,9 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /*HasDirectAcc { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; - EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) - : unary_evaluator<XprType>(block) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit block_evaluator(const XprType& block) + : unary_evaluator<XprType>(block) {} }; @@ -1015,84 +1101,116 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa { typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) - : m_argImpl(block.nestedExpression()), - m_startRow(block.startRow()), - m_startCol(block.startCol()) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit unary_evaluator(const XprType& block) + : m_argImpl(block.nestedExpression()), + m_startRow(block.startRow()), + m_startCol(block.startCol()), + m_linear_offset(ForwardLinearAccess?(ArgType::IsRowMajor ? block.startRow()*block.nestedExpression().cols() + block.startCol() : block.startCol()*block.nestedExpression().rows() + block.startRow()):0) { } - + typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; enum { - RowsAtCompileTime = XprType::RowsAtCompileTime + RowsAtCompileTime = XprType::RowsAtCompileTime, + ForwardLinearAccess = (InnerPanel || int(XprType::IsRowMajor)==int(ArgType::IsRowMajor)) && bool(evaluator<ArgType>::Flags&LinearAccessBit) }; - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const - { - return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); + { + return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + { + return linear_coeff_impl(index, bool_constant<ForwardLinearAccess>()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) - { - return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); + { + return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + { + return linear_coeffRef_impl(index, bool_constant<ForwardLinearAccess>()); } - + template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE - PacketType packet(Index row, Index col) const - { - return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col); + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE - PacketType packet(Index index) const - { - return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + PacketType packet(Index index) const + { + if (ForwardLinearAccess) + return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index); + else + return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0); } - + template<int StoreMode, typename PacketType> EIGEN_STRONG_INLINE - void writePacket(Index row, Index col, const PacketType& x) + void writePacket(Index row, Index col, const PacketType& x) { - return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x); + return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x); } - + template<int StoreMode, typename PacketType> EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketType& x) + void writePacket(Index index, const PacketType& x) { - return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + if (ForwardLinearAccess) + return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x); + else + return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, + x); } - + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType linear_coeff_impl(Index index, internal::true_type /* ForwardLinearAccess */) const + { + return m_argImpl.coeff(m_linear_offset.value() + index); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType linear_coeff_impl(Index index, internal::false_type /* not ForwardLinearAccess */) const + { + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& linear_coeffRef_impl(Index index, internal::true_type /* ForwardLinearAccess */) + { + return m_argImpl.coeffRef(m_linear_offset.value() + index); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& linear_coeffRef_impl(Index index, internal::false_type /* not ForwardLinearAccess */) + { + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + evaluator<ArgType> m_argImpl; const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow; const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol; + const variable_if_dynamic<Index, ForwardLinearAccess ? Dynamic : 0> m_linear_offset; }; -// TODO: This evaluator does not actually use the child evaluator; +// TODO: This evaluator does not actually use the child evaluator; // all action is via the data() as returned by the Block expression. -template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> +template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true> : mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject> @@ -1100,8 +1218,9 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType; typedef typename XprType::Scalar Scalar; - EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) - : mapbase_evaluator<XprType, typename XprType::PlainObject>(block) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit block_evaluator(const XprType& block) + : mapbase_evaluator<XprType, typename XprType::PlainObject>(block) { // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator<XprType>::Alignment)) == 0) && "data is not aligned"); @@ -1124,18 +1243,19 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > evaluator<ElseMatrixType>::CoeffReadCost), Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits, - + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment) }; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit evaluator(const XprType& select) : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - + typedef typename XprType::CoeffReturnType CoeffReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -1155,7 +1275,7 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > else return m_elseImpl.coeff(index); } - + protected: evaluator<ConditionMatrixType> m_conditionImpl; evaluator<ThenMatrixType> m_thenImpl; @@ -1165,7 +1285,7 @@ protected: // -------------------- Replicate -------------------- -template<typename ArgType, int RowFactor, int ColFactor> +template<typename ArgType, int RowFactor, int ColFactor> struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > : evaluator_base<Replicate<ArgType, RowFactor, ColFactor> > { @@ -1176,22 +1296,23 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > }; typedef typename internal::nested_eval<ArgType,Factor>::type ArgTypeNested; typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned; - + enum { CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost, LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0, Flags = (evaluator<ArgTypeNestedCleaned>::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit), - + Alignment = evaluator<ArgTypeNestedCleaned>::Alignment }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit unary_evaluator(const XprType& replicate) : m_arg(replicate.nestedExpression()), m_argImpl(m_arg), m_rows(replicate.nestedExpression().rows()), m_cols(replicate.nestedExpression().cols()) {} - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { @@ -1202,10 +1323,10 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0 : ColFactor==1 ? col : col % m_cols.value(); - + return m_argImpl.coeff(actual_row, actual_col); } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { @@ -1213,7 +1334,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1 ? (ColFactor==1 ? index : index%m_cols.value()) : (RowFactor==1 ? index : index%m_rows.value()); - + return m_argImpl.coeff(actual_index); } @@ -1230,7 +1351,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > return m_argImpl.template packet<LoadMode,PacketType>(actual_row, actual_col); } - + template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const @@ -1241,7 +1362,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> > return m_argImpl.template packet<LoadMode,PacketType>(actual_index); } - + protected: const ArgTypeNested m_arg; evaluator<ArgTypeNestedCleaned> m_argImpl; @@ -1249,64 +1370,6 @@ protected: const variable_if_dynamic<Index, ArgType::ColsAtCompileTime> m_cols; }; - -// -------------------- PartialReduxExpr -------------------- - -template< typename ArgType, typename MemberOp, int Direction> -struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > - : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> > -{ - typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType; - typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested; - typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned; - typedef typename ArgType::Scalar InputScalar; - typedef typename XprType::Scalar Scalar; - enum { - TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) - }; - typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType; - enum { - CoeffReadCost = TraversalSize==Dynamic ? HugeCost - : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value), - - Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit, - - Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized - }; - - EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) - : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) - { - EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value)); - EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); - } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Scalar coeff(Index i, Index j) const - { - if (Direction==Vertical) - return m_functor(m_arg.col(j)); - else - return m_functor(m_arg.row(i)); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Scalar coeff(Index index) const - { - if (Direction==Vertical) - return m_functor(m_arg.col(index)); - else - return m_functor(m_arg.row(index)); - } - -protected: - typename internal::add_const_on_value_type<ArgTypeNested>::type m_arg; - const MemberOp m_functor; -}; - - // -------------------- MatrixWrapper and ArrayWrapper -------------------- // // evaluator_wrapper_base<T> is a common base class for the @@ -1323,7 +1386,8 @@ struct evaluator_wrapper_base Alignment = evaluator<ArgType>::Alignment }; - EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} typedef typename ArgType::Scalar Scalar; typedef typename ArgType::CoeffReturnType CoeffReturnType; @@ -1390,7 +1454,8 @@ struct unary_evaluator<MatrixWrapper<TArgType> > { typedef MatrixWrapper<TArgType> XprType; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit unary_evaluator(const XprType& wrapper) : evaluator_wrapper_base<MatrixWrapper<TArgType> >(wrapper.nestedExpression()) { } }; @@ -1401,7 +1466,8 @@ struct unary_evaluator<ArrayWrapper<TArgType> > { typedef ArrayWrapper<TArgType> XprType; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit unary_evaluator(const XprType& wrapper) : evaluator_wrapper_base<ArrayWrapper<TArgType> >(wrapper.nestedExpression()) { } }; @@ -1428,9 +1494,9 @@ struct unary_evaluator<Reverse<ArgType, Direction> > ReversePacket = (Direction == BothDirections) || ((Direction == Vertical) && IsColMajor) || ((Direction == Horizontal) && IsRowMajor), - + CoeffReadCost = evaluator<ArgType>::CoeffReadCost, - + // let's enable LinearAccess only with vectorization because of the product overhead // FIXME enable DirectAccess with negative strides? Flags0 = evaluator<ArgType>::Flags, @@ -1439,16 +1505,17 @@ struct unary_evaluator<Reverse<ArgType, Direction> > ? LinearAccessBit : 0, Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess), - + Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f. }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit unary_evaluator(const XprType& reverse) : m_argImpl(reverse.nestedExpression()), m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1), m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) { } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { @@ -1523,7 +1590,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> > m_argImpl.template writePacket<LoadMode> (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x)); } - + protected: evaluator<ArgType> m_argImpl; @@ -1541,20 +1608,21 @@ struct evaluator<Diagonal<ArgType, DiagIndex> > : evaluator_base<Diagonal<ArgType, DiagIndex> > { typedef Diagonal<ArgType, DiagIndex> XprType; - + enum { CoeffReadCost = evaluator<ArgType>::CoeffReadCost, - + Flags = (unsigned int)(evaluator<ArgType>::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit, - + Alignment = 0 }; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit evaluator(const XprType& diagonal) : m_argImpl(diagonal.nestedExpression()), m_index(diagonal.index()) { } - + typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -1587,8 +1655,10 @@ protected: const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index; private: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } }; @@ -1612,25 +1682,25 @@ class EvalToTemp : public dense_xpr_base<EvalToTemp<ArgType> >::type { public: - + typedef typename dense_xpr_base<EvalToTemp>::type Base; EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) - + explicit EvalToTemp(const ArgType& arg) : m_arg(arg) { } - + const ArgType& arg() const { return m_arg; } - Index rows() const + EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_arg.rows(); } - Index cols() const + EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_arg.cols(); } @@ -1638,7 +1708,7 @@ class EvalToTemp private: const ArgType& m_arg; }; - + template<typename ArgType> struct evaluator<EvalToTemp<ArgType> > : public evaluator<typename ArgType::PlainObject> @@ -1646,7 +1716,7 @@ struct evaluator<EvalToTemp<ArgType> > typedef EvalToTemp<ArgType> XprType; typedef typename ArgType::PlainObject PlainObject; typedef evaluator<PlainObject> Base; - + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : m_result(xpr.arg()) { |