diff options
Diffstat (limited to 'Eigen/src/Core/util')
-rwxr-xr-x | Eigen/src/Core/util/BlasUtil.h | 423 | ||||
-rw-r--r-- | Eigen/src/Core/util/ConfigureVectorization.h | 512 | ||||
-rw-r--r-- | Eigen/src/Core/util/Constants.h | 34 | ||||
-rwxr-xr-x | Eigen/src/Core/util/DisableStupidWarnings.h | 43 | ||||
-rw-r--r-- | Eigen/src/Core/util/ForwardDeclarations.h | 38 | ||||
-rw-r--r-- | Eigen/src/Core/util/IndexedViewHelper.h | 186 | ||||
-rw-r--r-- | Eigen/src/Core/util/IntegralConstant.h | 272 | ||||
-rwxr-xr-x | Eigen/src/Core/util/MKL_support.h | 19 | ||||
-rw-r--r-- | Eigen/src/Core/util/Macros.h | 960 | ||||
-rw-r--r-- | Eigen/src/Core/util/Memory.h | 268 | ||||
-rwxr-xr-x | Eigen/src/Core/util/Meta.h | 462 | ||||
-rw-r--r-- | Eigen/src/Core/util/ReenableStupidWarnings.h | 8 | ||||
-rw-r--r-- | Eigen/src/Core/util/ReshapedHelper.h | 51 | ||||
-rw-r--r-- | Eigen/src/Core/util/StaticAssert.h | 131 | ||||
-rw-r--r-- | Eigen/src/Core/util/SymbolicIndex.h | 293 | ||||
-rw-r--r-- | Eigen/src/Core/util/XprHelper.h | 95 |
16 files changed, 3196 insertions, 599 deletions
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 6e6ee119b..e16a56498 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -24,14 +24,14 @@ struct gebp_kernel; template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false> struct gemm_pack_rhs; -template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false> +template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, int StorageOrder, bool Conjugate = false, bool PanelMode = false> struct gemm_pack_lhs; template< typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, - int ResStorageOrder> + int ResStorageOrder, int ResInnerStride> struct general_matrix_matrix_product; template<typename Index, @@ -39,90 +39,6 @@ template<typename Index, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized> struct general_matrix_vector_product; - -template<bool Conjugate> struct conj_if; - -template<> struct conj_if<true> { - template<typename T> - inline T operator()(const T& x) const { return numext::conj(x); } - template<typename T> - inline T pconj(const T& x) const { return internal::pconj(x); } -}; - -template<> struct conj_if<false> { - template<typename T> - inline const T& operator()(const T& x) const { return x; } - template<typename T> - inline const T& pconj(const T& x) const { return x; } -}; - -// Generic implementation for custom complex types. -template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs> -struct conj_helper -{ - typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType Scalar; - - EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const - { return conj_if<ConjLhs>()(x) * conj_if<ConjRhs>()(y); } -}; - -template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false> -{ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); } -}; - -template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true> -{ - typedef std::complex<RealScalar> Scalar; - EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const - { return c + pmul(x,y); } - - EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const - { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); } -}; - -template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false> -{ - typedef std::complex<RealScalar> Scalar; - EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const - { return c + pmul(x,y); } - - EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const - { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); } -}; - -template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true> -{ - typedef std::complex<RealScalar> Scalar; - EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const - { return c + pmul(x,y); } - - EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const - { return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); } -}; - -template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false> -{ - typedef std::complex<RealScalar> Scalar; - EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const - { return padd(c, pmul(x,y)); } - EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const - { return conj_if<Conj>()(x)*y; } -}; - -template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::complex<RealScalar>, false,Conj> -{ - typedef std::complex<RealScalar> Scalar; - EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const - { return padd(c, pmul(x,y)); } - EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const - { return x*conj_if<Conj>()(y); } -}; - template<typename From,typename To> struct get_factor { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); } }; @@ -155,13 +71,19 @@ class BlasVectorMapper { Scalar* m_data; }; -template<typename Scalar, typename Index, int AlignmentType> -class BlasLinearMapper { - public: - typedef typename packet_traits<Scalar>::type Packet; - typedef typename packet_traits<Scalar>::half HalfPacket; +template<typename Scalar, typename Index, int AlignmentType, int Incr=1> +class BlasLinearMapper; - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {} +template<typename Scalar, typename Index, int AlignmentType> +class BlasLinearMapper<Scalar,Index,AlignmentType> +{ +public: + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data, Index incr=1) + : m_data(data) + { + EIGEN_ONLY_USED_FOR_DEBUG(incr); + eigen_assert(incr==1); + } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const { internal::prefetch(&operator()(i)); @@ -171,33 +93,86 @@ class BlasLinearMapper { return m_data[i]; } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { - return ploadt<Packet, AlignmentType>(m_data + i); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { - return ploadt<HalfPacket, AlignmentType>(m_data + i); + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const { + return ploadt<PacketType, AlignmentType>(m_data + i); } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const { - pstoret<Scalar, Packet, AlignmentType>(m_data + i, p); + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const { + pstoret<Scalar, PacketType, AlignmentType>(m_data + i, p); } - protected: +protected: Scalar *m_data; }; // Lightweight helper class to access matrix coefficients. -template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned> -class blas_data_mapper { - public: - typedef typename packet_traits<Scalar>::type Packet; - typedef typename packet_traits<Scalar>::half HalfPacket; +template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned, int Incr = 1> +class blas_data_mapper; + +// TMP to help PacketBlock store implementation. +// There's currently no known use case for PacketBlock load. +// The default implementation assumes ColMajor order. +// It always store each packet sequentially one `stride` apart. +template<typename Index, typename Scalar, typename Packet, int n, int idx, int StorageOrder> +struct PacketBlockManagement +{ + PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, StorageOrder> pbm; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const { + pbm.store(to, stride, i, j, block); + pstoreu<Scalar>(to + i + (j + idx)*stride, block.packet[idx]); + } +}; + +// PacketBlockManagement specialization to take care of RowMajor order without ifs. +template<typename Index, typename Scalar, typename Packet, int n, int idx> +struct PacketBlockManagement<Index, Scalar, Packet, n, idx, RowMajor> +{ + PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, RowMajor> pbm; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const { + pbm.store(to, stride, i, j, block); + pstoreu<Scalar>(to + j + (i + idx)*stride, block.packet[idx]); + } +}; + +template<typename Index, typename Scalar, typename Packet, int n, int StorageOrder> +struct PacketBlockManagement<Index, Scalar, Packet, n, -1, StorageOrder> +{ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const { + EIGEN_UNUSED_VARIABLE(to); + EIGEN_UNUSED_VARIABLE(stride); + EIGEN_UNUSED_VARIABLE(i); + EIGEN_UNUSED_VARIABLE(j); + EIGEN_UNUSED_VARIABLE(block); + } +}; + +template<typename Index, typename Scalar, typename Packet, int n> +struct PacketBlockManagement<Index, Scalar, Packet, n, -1, RowMajor> +{ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const { + EIGEN_UNUSED_VARIABLE(to); + EIGEN_UNUSED_VARIABLE(stride); + EIGEN_UNUSED_VARIABLE(i); + EIGEN_UNUSED_VARIABLE(j); + EIGEN_UNUSED_VARIABLE(block); + } +}; +template<typename Scalar, typename Index, int StorageOrder, int AlignmentType> +class blas_data_mapper<Scalar,Index,StorageOrder,AlignmentType,1> +{ +public: typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper; typedef BlasVectorMapper<Scalar, Index> VectorMapper; - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {} + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1) + : m_data(data), m_stride(stride) + { + EIGEN_ONLY_USED_FOR_DEBUG(incr); + eigen_assert(incr==1); + } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType> getSubMapper(Index i, Index j) const { @@ -218,12 +193,14 @@ class blas_data_mapper { return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { - return ploadt<Packet, AlignmentType>(&operator()(i, j)); + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const { + return ploadt<PacketType, AlignmentType>(&operator()(i, j)); } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { - return ploadt<HalfPacket, AlignmentType>(&operator()(i, j)); + template <typename PacketT, int AlignmentT> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const { + return ploadt<PacketT, AlignmentT>(&operator()(i, j)); } template<typename SubPacket> @@ -246,11 +223,167 @@ class blas_data_mapper { return internal::first_default_aligned(m_data, size); } - protected: + template<typename SubPacket, int n> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n> &block) const { + PacketBlockManagement<Index, Scalar, SubPacket, n, n-1, StorageOrder> pbm; + pbm.store(m_data, m_stride, i, j, block); + } +protected: Scalar* EIGEN_RESTRICT m_data; const Index m_stride; }; +// Implementation of non-natural increment (i.e. inner-stride != 1) +// The exposed API is not complete yet compared to the Incr==1 case +// because some features makes less sense in this case. +template<typename Scalar, typename Index, int AlignmentType, int Incr> +class BlasLinearMapper +{ +public: + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data,Index incr) : m_data(data), m_incr(incr) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const { + internal::prefetch(&operator()(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const { + return m_data[i*m_incr.value()]; + } + + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const { + return pgather<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value()); + } + + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const { + pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value()); + } + +protected: + Scalar *m_data; + const internal::variable_if_dynamic<Index,Incr> m_incr; +}; + +template<typename Scalar, typename Index, int StorageOrder, int AlignmentType,int Incr> +class blas_data_mapper +{ +public: + typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper + getSubMapper(Index i, Index j) const { + return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value()); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + return LinearMapper(&operator()(i, j), m_incr.value()); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const { + return m_data[StorageOrder==RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride]; + } + + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const { + return pgather<Scalar,PacketType>(&operator()(i, j),m_incr.value()); + } + + template <typename PacketT, int AlignmentT> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const { + return pgather<Scalar,PacketT>(&operator()(i, j),m_incr.value()); + } + + template<typename SubPacket> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { + pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride); + } + + template<typename SubPacket> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const { + return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride); + } + + // storePacketBlock_helper defines a way to access values inside the PacketBlock, this is essentially required by the Complex types. + template<typename SubPacket, typename ScalarT, int n, int idx> + struct storePacketBlock_helper + { + storePacketBlock_helper<SubPacket, ScalarT, n, idx-1> spbh; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const { + spbh.store(sup, i,j,block); + for(int l = 0; l < unpacket_traits<SubPacket>::size; l++) + { + ScalarT *v = &sup->operator()(i+l, j+idx); + *v = block.packet[idx][l]; + } + } + }; + + template<typename SubPacket, int n, int idx> + struct storePacketBlock_helper<SubPacket, std::complex<float>, n, idx> + { + storePacketBlock_helper<SubPacket, std::complex<float>, n, idx-1> spbh; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const { + spbh.store(sup,i,j,block); + for(int l = 0; l < unpacket_traits<SubPacket>::size; l++) + { + std::complex<float> *v = &sup->operator()(i+l, j+idx); + v->real(block.packet[idx].v[2*l+0]); + v->imag(block.packet[idx].v[2*l+1]); + } + } + }; + + template<typename SubPacket, int n, int idx> + struct storePacketBlock_helper<SubPacket, std::complex<double>, n, idx> + { + storePacketBlock_helper<SubPacket, std::complex<double>, n, idx-1> spbh; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const { + spbh.store(sup,i,j,block); + for(int l = 0; l < unpacket_traits<SubPacket>::size; l++) + { + std::complex<double> *v = &sup->operator()(i+l, j+idx); + v->real(block.packet[idx].v[2*l+0]); + v->imag(block.packet[idx].v[2*l+1]); + } + } + }; + + template<typename SubPacket, typename ScalarT, int n> + struct storePacketBlock_helper<SubPacket, ScalarT, n, -1> + { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const { + } + }; + + template<typename SubPacket, int n> + struct storePacketBlock_helper<SubPacket, std::complex<float>, n, -1> + { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const { + } + }; + + template<typename SubPacket, int n> + struct storePacketBlock_helper<SubPacket, std::complex<double>, n, -1> + { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const { + } + }; + // This function stores a PacketBlock on m_data, this approach is really quite slow compare to Incr=1 and should be avoided when possible. + template<typename SubPacket, int n> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n>&block) const { + storePacketBlock_helper<SubPacket, Scalar, n, n-1> spb; + spb.store(this, i,j,block); + } +protected: + Scalar* EIGEN_RESTRICT m_data; + const Index m_stride; + const internal::variable_if_dynamic<Index,Incr> m_incr; +}; + // lightweight helper class to access matrix coefficients (const version) template<typename Scalar, typename Index, int StorageOrder> class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> { @@ -278,14 +411,15 @@ template<typename XprType> struct blas_traits HasUsableDirectAccess = ( (int(XprType::Flags)&DirectAccessBit) && ( bool(XprType::IsVectorAtCompileTime) || int(inner_stride_at_compile_time<XprType>::ret) == 1) - ) ? 1 : 0 + ) ? 1 : 0, + HasScalarFactor = false }; typedef typename conditional<bool(HasUsableDirectAccess), ExtractType, typename _ExtractType::PlainObject >::type DirectLinearAccessType; - static inline ExtractType extract(const XprType& x) { return x; } - static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); } + static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return x; } + static inline EIGEN_DEVICE_FUNC const Scalar extractScalarFactor(const XprType&) { return Scalar(1); } }; // pop conjugate @@ -310,17 +444,23 @@ template<typename Scalar, typename NestedXpr, typename Plain> struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> > : blas_traits<NestedXpr> { + enum { + HasScalarFactor = true + }; typedef blas_traits<NestedXpr> Base; typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType; typedef typename Base::ExtractType ExtractType; - static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); } - static inline Scalar extractScalarFactor(const XprType& x) + static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); } + static inline EIGEN_DEVICE_FUNC Scalar extractScalarFactor(const XprType& x) { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); } }; template<typename Scalar, typename NestedXpr, typename Plain> struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > > : blas_traits<NestedXpr> { + enum { + HasScalarFactor = true + }; typedef blas_traits<NestedXpr> Base; typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType; typedef typename Base::ExtractType ExtractType; @@ -339,6 +479,9 @@ template<typename Scalar, typename NestedXpr> struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> > : blas_traits<NestedXpr> { + enum { + HasScalarFactor = true + }; typedef blas_traits<NestedXpr> Base; typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType; typedef typename Base::ExtractType ExtractType; @@ -375,7 +518,7 @@ struct blas_traits<const T> template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess> struct extract_data_selector { - static const typename T::Scalar* run(const T& m) + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename T::Scalar* run(const T& m) { return blas_traits<T>::extract(m).data(); } @@ -386,11 +529,53 @@ struct extract_data_selector<T,false> { static typename T::Scalar* run(const T&) { return 0; } }; -template<typename T> const typename T::Scalar* extract_data(const T& m) +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const typename T::Scalar* extract_data(const T& m) { return extract_data_selector<T>::run(m); } +/** + * \c combine_scalar_factors extracts and multiplies factors from GEMM and GEMV products. + * There is a specialization for booleans + */ +template<typename ResScalar, typename Lhs, typename Rhs> +struct combine_scalar_factors_impl +{ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const Lhs& lhs, const Rhs& rhs) + { + return blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs) + { + return alpha * blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs); + } +}; +template<typename Lhs, typename Rhs> +struct combine_scalar_factors_impl<bool, Lhs, Rhs> +{ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const Lhs& lhs, const Rhs& rhs) + { + return blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const bool& alpha, const Lhs& lhs, const Rhs& rhs) + { + return alpha && blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs); + } +}; + +template<typename ResScalar, typename Lhs, typename Rhs> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs) +{ + return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(alpha, lhs, rhs); +} +template<typename ResScalar, typename Lhs, typename Rhs> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const Lhs& lhs, const Rhs& rhs) +{ + return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(lhs, rhs); +} + + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h new file mode 100644 index 000000000..af4e69623 --- /dev/null +++ b/Eigen/src/Core/util/ConfigureVectorization.h @@ -0,0 +1,512 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2020, Arm Limited and Contributors +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CONFIGURE_VECTORIZATION_H +#define EIGEN_CONFIGURE_VECTORIZATION_H + +//------------------------------------------------------------------------------------------ +// Static and dynamic alignment control +// +// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES +// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively. +// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not, +// a default value is automatically computed based on architecture, compiler, and OS. +// +// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX} +// to be used to declare statically aligned buffers. +//------------------------------------------------------------------------------------------ + + +/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements. + * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled, + * so that vectorization doesn't affect binary compatibility. + * + * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link + * vectorized and non-vectorized code. + * + * FIXME: this code can be cleaned up once we switch to proper C++11 only. + */ +#if (defined EIGEN_CUDACC) + #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n) + #define EIGEN_ALIGNOF(x) __alignof(x) +#elif EIGEN_HAS_ALIGNAS + #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n) + #define EIGEN_ALIGNOF(x) alignof(x) +#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM + #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) + #define EIGEN_ALIGNOF(x) __alignof(x) +#elif EIGEN_COMP_MSVC + #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n)) + #define EIGEN_ALIGNOF(x) __alignof(x) +#elif EIGEN_COMP_SUNCC + // FIXME not sure about this one: + #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) + #define EIGEN_ALIGNOF(x) __alignof(x) +#else + #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler +#endif + +// If the user explicitly disable vectorization, then we also disable alignment +#if defined(EIGEN_DONT_VECTORIZE) + #if defined(EIGEN_GPUCC) + // GPU code is always vectorized and requires memory alignment for + // statically allocated buffers. + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16 + #else + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0 + #endif +#elif defined(__AVX512F__) + // 64 bytes static alignment is preferred only if really required + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64 +#elif defined(__AVX__) + // 32 bytes static alignment is preferred only if really required + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32 +#else + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16 +#endif + + +// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense +#define EIGEN_MIN_ALIGN_BYTES 16 + +// Defined the boundary (in bytes) on which the data needs to be aligned. Note +// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be +// aligned at all regardless of the value of this #define. + +#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 +#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY. +#endif + +// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated +// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0 +#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN) + #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES + #undef EIGEN_MAX_STATIC_ALIGN_BYTES + #endif + #define EIGEN_MAX_STATIC_ALIGN_BYTES 0 +#endif + +#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES + + // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES + + // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable + // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always + // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in + // certain common platform (compiler+architecture combinations) to avoid these problems. + // Only static alignment is really problematic (relies on nonstandard compiler extensions), + // try to keep heap alignment even when we have to disable static alignment. + #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS) + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 + #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6) + // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support. + // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use. + // 4.8 and newer seem definitely unaffected. + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 + #else + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 + #endif + + // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX + #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \ + && !EIGEN_GCC3_OR_OLDER \ + && !EIGEN_COMP_SUNCC \ + && !EIGEN_OS_QNX + #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 + #else + #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 + #endif + + #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT + #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES + #else + #define EIGEN_MAX_STATIC_ALIGN_BYTES 0 + #endif + +#endif + +// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES +#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES +#undef EIGEN_MAX_STATIC_ALIGN_BYTES +#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES +#endif + +#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) + #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT +#endif + +// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not. +// It takes into account both the user choice to explicitly enable/disable alignment (by setting EIGEN_MAX_STATIC_ALIGN_BYTES) +// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). +// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used. + + +// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY +#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8) +#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16) +#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32) +#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64) +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 +#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES) +#else +#define EIGEN_ALIGN_MAX +#endif + + +// Dynamic alignment control + +#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0 +#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN. +#endif + +#ifdef EIGEN_DONT_ALIGN + #ifdef EIGEN_MAX_ALIGN_BYTES + #undef EIGEN_MAX_ALIGN_BYTES + #endif + #define EIGEN_MAX_ALIGN_BYTES 0 +#elif !defined(EIGEN_MAX_ALIGN_BYTES) + #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES +#endif + +#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES +#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES +#else +#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES +#endif + + +#ifndef EIGEN_UNALIGNED_VECTORIZE +#define EIGEN_UNALIGNED_VECTORIZE 1 +#endif + +//---------------------------------------------------------------------- + +// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into +// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks +#if EIGEN_MAX_ALIGN_BYTES==0 + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif +#endif + + +// The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be +// removed as gcc 4.1 and msvc 2008 are not supported anyways. +#if EIGEN_COMP_MSVC + #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled + #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later + // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. + #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64 + #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER + #endif + #endif +#else + #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) ) + #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC + #endif +#endif + +#if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC)) + + #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER) + + // Defines symbols for compile-time detection of which instructions are + // used. + // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_SSE + #define EIGEN_VECTORIZE_SSE2 + + // Detect sse3/ssse3/sse4: + // gcc and icc defines __SSE3__, ... + // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you + // want to force the use of those instructions with msvc. + #ifdef __SSE3__ + #define EIGEN_VECTORIZE_SSE3 + #endif + #ifdef __SSSE3__ + #define EIGEN_VECTORIZE_SSSE3 + #endif + #ifdef __SSE4_1__ + #define EIGEN_VECTORIZE_SSE4_1 + #endif + #ifdef __SSE4_2__ + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #ifdef __AVX__ + #ifndef EIGEN_USE_SYCL + #define EIGEN_VECTORIZE_AVX + #endif + #define EIGEN_VECTORIZE_SSE3 + #define EIGEN_VECTORIZE_SSSE3 + #define EIGEN_VECTORIZE_SSE4_1 + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #ifdef __AVX2__ + #ifndef EIGEN_USE_SYCL + #define EIGEN_VECTORIZE_AVX2 + #define EIGEN_VECTORIZE_AVX + #endif + #define EIGEN_VECTORIZE_SSE3 + #define EIGEN_VECTORIZE_SSSE3 + #define EIGEN_VECTORIZE_SSE4_1 + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__)) + // MSVC does not expose a switch dedicated for FMA + // For MSVC, AVX2 => FMA + #define EIGEN_VECTORIZE_FMA + #endif + #if defined(__AVX512F__) + #ifndef EIGEN_VECTORIZE_FMA + #if EIGEN_COMP_GNUC + #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638). + #else + #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638). + #endif + #endif + #ifndef EIGEN_USE_SYCL + #define EIGEN_VECTORIZE_AVX512 + #define EIGEN_VECTORIZE_AVX2 + #define EIGEN_VECTORIZE_AVX + #endif + #define EIGEN_VECTORIZE_FMA + #define EIGEN_VECTORIZE_SSE3 + #define EIGEN_VECTORIZE_SSSE3 + #define EIGEN_VECTORIZE_SSE4_1 + #define EIGEN_VECTORIZE_SSE4_2 + #ifndef EIGEN_USE_SYCL + #ifdef __AVX512DQ__ + #define EIGEN_VECTORIZE_AVX512DQ + #endif + #ifdef __AVX512ER__ + #define EIGEN_VECTORIZE_AVX512ER + #endif + #ifdef __AVX512BF16__ + #define EIGEN_VECTORIZE_AVX512BF16 + #endif + #endif + #endif + + // Disable AVX support on broken xcode versions + #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 ) + // A nasty bug in the clang compiler shipped with xcode in a common compilation situation + // when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1 + #ifdef EIGEN_VECTORIZE_AVX + #undef EIGEN_VECTORIZE_AVX + #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. " + #ifdef EIGEN_VECTORIZE_AVX2 + #undef EIGEN_VECTORIZE_AVX2 + #endif + #ifdef EIGEN_VECTORIZE_FMA + #undef EIGEN_VECTORIZE_FMA + #endif + #ifdef EIGEN_VECTORIZE_AVX512 + #undef EIGEN_VECTORIZE_AVX512 + #endif + #ifdef EIGEN_VECTORIZE_AVX512DQ + #undef EIGEN_VECTORIZE_AVX512DQ + #endif + #ifdef EIGEN_VECTORIZE_AVX512ER + #undef EIGEN_VECTORIZE_AVX512ER + #endif + #endif + // NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with -macosx-version-min=10.15 and AVX + // NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2 produce core dumps in 3 tests + // NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all cases + // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)" XCode 11.0 <- Produces many segfault and core dumping tests + // with -macosx-version-min=10.15 and AVX + // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with + // -macosx-version-min=10.15 and AVX + #endif + + // include files + + // This extern "C" works around a MINGW-w64 compilation issue + // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354 + // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do). + // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations + // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know; + // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too. + // notice that since these are C headers, the extern "C" is theoretically needed anyways. + extern "C" { + // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. + // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: + #if EIGEN_COMP_ICC >= 1110 + #include <immintrin.h> + #else + #include <mmintrin.h> + #include <emmintrin.h> + #include <xmmintrin.h> + #ifdef EIGEN_VECTORIZE_SSE3 + #include <pmmintrin.h> + #endif + #ifdef EIGEN_VECTORIZE_SSSE3 + #include <tmmintrin.h> + #endif + #ifdef EIGEN_VECTORIZE_SSE4_1 + #include <smmintrin.h> + #endif + #ifdef EIGEN_VECTORIZE_SSE4_2 + #include <nmmintrin.h> + #endif + #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512) + #include <immintrin.h> + #endif + #endif + } // end extern "C" + + #elif defined __VSX__ + + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_VSX + #include <altivec.h> + // We need to #undef all these ugly tokens defined in <altivec.h> + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel + + #elif defined __ALTIVEC__ + + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_ALTIVEC + #include <altivec.h> + // We need to #undef all these ugly tokens defined in <altivec.h> + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel + + #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE) + + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_NEON + #include <arm_neon.h> + + // We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and + // will not select the backend automatically + #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE) + + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_SVE + #include <arm_sve.h> + + // Since we depend on knowing SVE vector lengths at compile-time, we need + // to ensure a fixed lengths is set + #if defined __ARM_FEATURE_SVE_BITS + #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS + #else +#error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set." +#endif + +#elif (defined __s390x__ && defined __VEC__) + +#define EIGEN_VECTORIZE +#define EIGEN_VECTORIZE_ZVECTOR +#include <vecintrin.h> + +#elif defined __mips_msa + +// Limit MSA optimizations to little-endian CPUs for now. +// TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs? +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#if defined(__LP64__) +#define EIGEN_MIPS_64 +#else +#define EIGEN_MIPS_32 +#endif +#define EIGEN_VECTORIZE +#define EIGEN_VECTORIZE_MSA +#include <msa.h> +#endif + +#endif +#endif + +// Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all +// compilers seem to follow this. We therefore include it explicitly. +// See also: https://bugs.llvm.org/show_bug.cgi?id=47955 +#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) + #include <arm_fp16.h> +#endif + +#if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380)) + // We can use the optimized fp16 to float and float to fp16 conversion routines + #define EIGEN_HAS_FP16_C + + #if defined(EIGEN_COMP_CLANG) + // Workaround for clang: The FP16C intrinsics for clang are included by + // immintrin.h, as opposed to emmintrin.h as suggested by Intel: + // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711 + #include <immintrin.h> + #endif +#endif + +#if defined EIGEN_CUDACC + #define EIGEN_VECTORIZE_GPU + #include <vector_types.h> + #if EIGEN_CUDA_SDK_VER >= 70500 + #define EIGEN_HAS_CUDA_FP16 + #endif +#endif + +#if defined(EIGEN_HAS_CUDA_FP16) + #include <cuda_runtime_api.h> + #include <cuda_fp16.h> +#endif + +#if defined(EIGEN_HIPCC) + #define EIGEN_VECTORIZE_GPU + #include <hip/hip_vector_types.h> + #define EIGEN_HAS_HIP_FP16 + #include <hip/hip_fp16.h> +#endif + + +/** \brief Namespace containing all symbols from the %Eigen library. */ +namespace Eigen { + +inline static const char *SimdInstructionSetsInUse(void) { +#if defined(EIGEN_VECTORIZE_AVX512) + return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_AVX) + return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_2) + return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_1) + return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; +#elif defined(EIGEN_VECTORIZE_SSSE3) + return "SSE, SSE2, SSE3, SSSE3"; +#elif defined(EIGEN_VECTORIZE_SSE3) + return "SSE, SSE2, SSE3"; +#elif defined(EIGEN_VECTORIZE_SSE2) + return "SSE, SSE2"; +#elif defined(EIGEN_VECTORIZE_ALTIVEC) + return "AltiVec"; +#elif defined(EIGEN_VECTORIZE_VSX) + return "VSX"; +#elif defined(EIGEN_VECTORIZE_NEON) + return "ARM NEON"; +#elif defined(EIGEN_VECTORIZE_SVE) + return "ARM SVE"; +#elif defined(EIGEN_VECTORIZE_ZVECTOR) + return "S390X ZVECTOR"; +#elif defined(EIGEN_VECTORIZE_MSA) + return "MIPS MSA"; +#else + return "None"; +#endif +} + +} // end namespace Eigen + + +#endif // EIGEN_CONFIGURE_VECTORIZATION_H diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 7587d6842..35dcaa7b3 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -3,6 +3,7 @@ // // Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2020, Arm Limited and Contributors // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -25,6 +26,10 @@ const int Dynamic = -1; */ const int DynamicIndex = 0xffffff; +/** This value means that the increment to go from one value to another in a sequence is not constant for each step. + */ +const int UndefinedIncr = 0xfffffe; + /** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>(). * The value Infinity there means the L-infinity norm. */ @@ -152,7 +157,7 @@ const unsigned int DirectAccessBit = 0x40; /** \deprecated \ingroup flags * * means the first coefficient packet is guaranteed to be aligned. - * An expression cannot has the AlignedBit without the PacketAccessBit flag. + * An expression cannot have the AlignedBit without the PacketAccessBit flag. * In other words, this means we are allow to perform an aligned packet access to the first element regardless * of the expression kind: * \code @@ -251,12 +256,6 @@ enum AlignmentType { }; /** \ingroup enums - * Enum used by DenseBase::corner() in Eigen2 compatibility mode. */ -// FIXME after the corner() API change, this was not needed anymore, except by AlignedBox -// TODO: find out what to do with that. Adapt the AlignedBox API ? -enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight }; - -/** \ingroup enums * Enum containing possible values for the \p Direction parameter of * Reverse, PartialReduxExpr and VectorwiseOp. */ enum DirectionType { @@ -330,9 +329,20 @@ enum StorageOptions { * Enum for specifying whether to apply or solve on the left or right. */ enum SideType { /** Apply transformation on the left. */ - OnTheLeft = 1, + OnTheLeft = 1, /** Apply transformation on the right. */ - OnTheRight = 2 + OnTheRight = 2 +}; + +/** \ingroup enums + * Enum for specifying NaN-propagation behavior, e.g. for coeff-wise min/max. */ +enum NaNPropagationOptions { + /** Implementation defined behavior if NaNs are present. */ + PropagateFast = 0, + /** Always propagate NaNs. */ + PropagateNaN, + /** Always propagate not-NaNs. */ + PropagateNumbers }; /* the following used to be written as: @@ -464,6 +474,8 @@ namespace Architecture AltiVec = 0x2, VSX = 0x3, NEON = 0x4, + MSA = 0x5, + SVE = 0x6, #if defined EIGEN_VECTORIZE_SSE Target = SSE #elif defined EIGEN_VECTORIZE_ALTIVEC @@ -472,6 +484,10 @@ namespace Architecture Target = VSX #elif defined EIGEN_VECTORIZE_NEON Target = NEON +#elif defined EIGEN_VECTORIZE_SVE + Target = SVE +#elif defined EIGEN_VECTORIZE_MSA + Target = MSA #else Target = Generic #endif diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h index 7559e129c..fe0cfec0b 100755 --- a/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/Eigen/src/Core/util/DisableStupidWarnings.h @@ -4,7 +4,6 @@ #ifdef _MSC_VER // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p)) // 4101 - unreferenced local variable - // 4127 - conditional expression is constant // 4181 - qualifier applied to reference type ignored // 4211 - nonstandard extension used : redefined extern to static // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data @@ -20,7 +19,7 @@ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning( push ) #endif - #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) + #pragma warning( disable : 4100 4101 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) #elif defined __INTEL_COMPILER // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) @@ -42,17 +41,40 @@ #pragma clang diagnostic push #endif #pragma clang diagnostic ignored "-Wconstant-logical-operand" + #if __clang_major__ >= 3 && __clang_minor__ >= 5 + #pragma clang diagnostic ignored "-Wabsolute-value" + #endif + #if __clang_major__ >= 10 + #pragma clang diagnostic ignored "-Wimplicit-int-float-conversion" + #endif + #if ( defined(__ALTIVEC__) || defined(__VSX__) ) && __cplusplus < 201103L + // warning: generic selections are a C11-specific feature + // ignoring warnings thrown at vec_ctf in Altivec/PacketMath.h + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif -#elif defined __GNUC__ && __GNUC__>=6 +#elif defined __GNUC__ - #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS + #if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) #pragma GCC diagnostic push #endif - #pragma GCC diagnostic ignored "-Wignored-attributes" - + // g++ warns about local variables shadowing member functions, which is too strict + #pragma GCC diagnostic ignored "-Wshadow" + #if __GNUC__ == 4 && __GNUC_MINOR__ < 8 + // Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions: + #pragma GCC diagnostic ignored "-Wtype-limits" + #endif + #if __GNUC__>=6 + #pragma GCC diagnostic ignored "-Wignored-attributes" + #endif + #if __GNUC__==7 + // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89325 + #pragma GCC diagnostic ignored "-Wattributes" + #endif #endif #if defined __NVCC__ + #pragma diag_suppress boolean_controlling_expr_is_constant // Disable the "statement is unreachable" message #pragma diag_suppress code_is_unreachable // Disable the "dynamic initialization in unreachable code" message @@ -70,6 +92,15 @@ #pragma diag_suppress 2671 #pragma diag_suppress 2735 #pragma diag_suppress 2737 + #pragma diag_suppress 2739 #endif +#else +// warnings already disabled: +# ifndef EIGEN_WARNINGS_DISABLED_2 +# define EIGEN_WARNINGS_DISABLED_2 +# elif defined(EIGEN_INTERNAL_DEBUGGING) +# error "Do not include \"DisableStupidWarnings.h\" recursively more than twice!" +# endif + #endif // not EIGEN_WARNINGS_DISABLED diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index ea107393a..2f9cc4491 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -47,11 +47,7 @@ template<typename T> struct NumTraits; template<typename Derived> struct EigenBase; template<typename Derived> class DenseBase; template<typename Derived> class PlainObjectBase; - - -template<typename Derived, - int Level = internal::accessors_level<Derived>::value > -class DenseCoeffsBase; +template<typename Derived, int Level> class DenseCoeffsBase; template<typename _Scalar, int _Rows, int _Cols, int _Options = AutoAlign | @@ -83,6 +79,8 @@ template<typename ExpressionType> class ForceAlignedAccess; template<typename ExpressionType> class SwapWrapper; template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block; +template<typename XprType, typename RowIndices, typename ColIndices> class IndexedView; +template<typename XprType, int Rows=Dynamic, int Cols=Dynamic, int Order=0> class Reshaped; template<typename MatrixType, int Size=Dynamic> class VectorBlock; template<typename MatrixType> class Transpose; @@ -112,7 +110,7 @@ template<typename _IndicesType> class TranspositionsWrapper; template<typename Derived, int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors > class MapBase; -template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride; +template<int OuterStrideAtCompileTime, int InnerStrideAtCompileTime> class Stride; template<int Value = Dynamic> class InnerStride; template<int Value = Dynamic> class OuterStride; template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map; @@ -133,6 +131,10 @@ template<typename Derived> class SolverBase; template<typename XprType> class InnerIterator; namespace internal { +template<typename XprType> class generic_randaccess_stl_iterator; +template<typename XprType> class pointer_based_stl_iterator; +template<typename XprType, DirectionType Direction> class subvector_stl_iterator; +template<typename XprType, DirectionType Direction> class subvector_stl_reverse_iterator; template<typename DecompositionType> struct kernel_retval_base; template<typename DecompositionType> struct kernel_retval; template<typename DecompositionType> struct image_retval_base; @@ -178,14 +180,15 @@ template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRh template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_sum_op; template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_difference_op; template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_conj_product_op; -template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_min_op; -template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_max_op; +template<typename LhsScalar,typename RhsScalar=LhsScalar, int NaNPropagation=PropagateFast> struct scalar_min_op; +template<typename LhsScalar,typename RhsScalar=LhsScalar, int NaNPropagation=PropagateFast> struct scalar_max_op; template<typename Scalar> struct scalar_opposite_op; template<typename Scalar> struct scalar_conjugate_op; template<typename Scalar> struct scalar_real_op; template<typename Scalar> struct scalar_imag_op; template<typename Scalar> struct scalar_abs_op; template<typename Scalar> struct scalar_abs2_op; +template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_absolute_difference_op; template<typename Scalar> struct scalar_sqrt_op; template<typename Scalar> struct scalar_rsqrt_op; template<typename Scalar> struct scalar_exp_op; @@ -202,7 +205,7 @@ template<typename Scalar, typename NewType> struct scalar_cast_op; template<typename Scalar> struct scalar_random_op; template<typename Scalar> struct scalar_constant_op; template<typename Scalar> struct scalar_identity_op; -template<typename Scalar,bool iscpx> struct scalar_sign_op; +template<typename Scalar,bool is_complex, bool is_integer> struct scalar_sign_op; template<typename Scalar,typename ScalarExponent> struct scalar_pow_op; template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_hypot_op; template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op; @@ -213,11 +216,27 @@ template<typename Scalar> struct scalar_lgamma_op; template<typename Scalar> struct scalar_digamma_op; template<typename Scalar> struct scalar_erf_op; template<typename Scalar> struct scalar_erfc_op; +template<typename Scalar> struct scalar_ndtri_op; template<typename Scalar> struct scalar_igamma_op; template<typename Scalar> struct scalar_igammac_op; template<typename Scalar> struct scalar_zeta_op; template<typename Scalar> struct scalar_betainc_op; +// Bessel functions in SpecialFunctions module +template<typename Scalar> struct scalar_bessel_i0_op; +template<typename Scalar> struct scalar_bessel_i0e_op; +template<typename Scalar> struct scalar_bessel_i1_op; +template<typename Scalar> struct scalar_bessel_i1e_op; +template<typename Scalar> struct scalar_bessel_j0_op; +template<typename Scalar> struct scalar_bessel_y0_op; +template<typename Scalar> struct scalar_bessel_j1_op; +template<typename Scalar> struct scalar_bessel_y1_op; +template<typename Scalar> struct scalar_bessel_k0_op; +template<typename Scalar> struct scalar_bessel_k0e_op; +template<typename Scalar> struct scalar_bessel_k1_op; +template<typename Scalar> struct scalar_bessel_k1e_op; + + } // end namespace internal struct IOFormat; @@ -255,6 +274,7 @@ template<typename MatrixType> class HouseholderQR; template<typename MatrixType> class ColPivHouseholderQR; template<typename MatrixType> class FullPivHouseholderQR; template<typename MatrixType> class CompleteOrthogonalDecomposition; +template<typename MatrixType> class SVDBase; template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD; template<typename MatrixType> class BDCSVD; template<typename MatrixType, int UpLo = Lower> class LLT; diff --git a/Eigen/src/Core/util/IndexedViewHelper.h b/Eigen/src/Core/util/IndexedViewHelper.h new file mode 100644 index 000000000..f85de305f --- /dev/null +++ b/Eigen/src/Core/util/IndexedViewHelper.h @@ -0,0 +1,186 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_INDEXED_VIEW_HELPER_H +#define EIGEN_INDEXED_VIEW_HELPER_H + +namespace Eigen { + +namespace internal { +struct symbolic_last_tag {}; +} + +/** \var last + * \ingroup Core_Module + * + * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last element/row/columns + * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&). + * + * This symbolic placeholder supports standard arithmetic operations. + * + * A typical usage example would be: + * \code + * using namespace Eigen; + * using Eigen::last; + * VectorXd v(n); + * v(seq(2,last-2)).setOnes(); + * \endcode + * + * \sa end + */ +static const symbolic::SymbolExpr<internal::symbolic_last_tag> last; // PLEASE use Eigen::last instead of Eigen::placeholders::last + +/** \var lastp1 + * \ingroup Core_Module + * + * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically + * reference the last+1 element/row/columns of the underlying vector or matrix once + * passed to DenseBase::operator()(const RowIndices&, const ColIndices&). + * + * This symbolic placeholder supports standard arithmetic operations. + * It is essentially an alias to last+fix<1>. + * + * \sa last + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN +static const auto lastp1 = last+fix<1>; +#else +// Using a FixedExpr<1> expression is important here to make sure the compiler +// can fully optimize the computation starting indices with zero overhead. +static const symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > lastp1(last+fix<1>()); +#endif + +namespace internal { + + // Replace symbolic last/end "keywords" by their true runtime value +inline Index eval_expr_given_size(Index x, Index /* size */) { return x; } + +template<int N> +FixedInt<N> eval_expr_given_size(FixedInt<N> x, Index /*size*/) { return x; } + +template<typename Derived> +Index eval_expr_given_size(const symbolic::BaseExpr<Derived> &x, Index size) +{ + return x.derived().eval(last=size-1); +} + +// Extract increment/step at compile time +template<typename T, typename EnableIf = void> struct get_compile_time_incr { + enum { value = UndefinedIncr }; +}; + +// Analogue of std::get<0>(x), but tailored for our needs. +template<typename T> +EIGEN_CONSTEXPR Index first(const T& x) EIGEN_NOEXCEPT { return x.first(); } + +// IndexedViewCompatibleType/makeIndexedViewCompatible turn an arbitrary object of type T into something usable by MatrixSlice +// The generic implementation is a no-op +template<typename T,int XprSize,typename EnableIf=void> +struct IndexedViewCompatibleType { + typedef T type; +}; + +template<typename T,typename Q> +const T& makeIndexedViewCompatible(const T& x, Index /*size*/, Q) { return x; } + +//-------------------------------------------------------------------------------- +// Handling of a single Index +//-------------------------------------------------------------------------------- + +struct SingleRange { + enum { + SizeAtCompileTime = 1 + }; + SingleRange(Index val) : m_value(val) {} + Index operator[](Index) const { return m_value; } + static EIGEN_CONSTEXPR Index size() EIGEN_NOEXCEPT { return 1; } + Index first() const EIGEN_NOEXCEPT { return m_value; } + Index m_value; +}; + +template<> struct get_compile_time_incr<SingleRange> { + enum { value = 1 }; // 1 or 0 ?? +}; + +// Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operator[](int) methods) +template<typename T, int XprSize> +struct IndexedViewCompatibleType<T,XprSize,typename internal::enable_if<internal::is_integral<T>::value>::type> { + // Here we could simply use Array, but maybe it's less work for the compiler to use + // a simpler wrapper as SingleRange + //typedef Eigen::Array<Index,1,1> type; + typedef SingleRange type; +}; + +template<typename T, int XprSize> +struct IndexedViewCompatibleType<T, XprSize, typename enable_if<symbolic::is_symbolic<T>::value>::type> { + typedef SingleRange type; +}; + + +template<typename T> +typename enable_if<symbolic::is_symbolic<T>::value,SingleRange>::type +makeIndexedViewCompatible(const T& id, Index size, SpecializedType) { + return eval_expr_given_size(id,size); +} + +//-------------------------------------------------------------------------------- +// Handling of all +//-------------------------------------------------------------------------------- + +struct all_t { all_t() {} }; + +// Convert a symbolic 'all' into a usable range type +template<int XprSize> +struct AllRange { + enum { SizeAtCompileTime = XprSize }; + AllRange(Index size = XprSize) : m_size(size) {} + EIGEN_CONSTEXPR Index operator[](Index i) const EIGEN_NOEXCEPT { return i; } + EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_size.value(); } + EIGEN_CONSTEXPR Index first() const EIGEN_NOEXCEPT { return 0; } + variable_if_dynamic<Index,XprSize> m_size; +}; + +template<int XprSize> +struct IndexedViewCompatibleType<all_t,XprSize> { + typedef AllRange<XprSize> type; +}; + +template<typename XprSizeType> +inline AllRange<get_fixed_value<XprSizeType>::value> makeIndexedViewCompatible(all_t , XprSizeType size, SpecializedType) { + return AllRange<get_fixed_value<XprSizeType>::value>(size); +} + +template<int Size> struct get_compile_time_incr<AllRange<Size> > { + enum { value = 1 }; +}; + +} // end namespace internal + + +/** \var all + * \ingroup Core_Module + * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or columns + */ +static const Eigen::internal::all_t all; // PLEASE use Eigen::all instead of Eigen::placeholders::all + + +namespace placeholders { + typedef symbolic::SymbolExpr<internal::symbolic_last_tag> last_t; + typedef symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > end_t; + typedef Eigen::internal::all_t all_t; + + EIGEN_DEPRECATED static const all_t all = Eigen::all; // PLEASE use Eigen::all instead of Eigen::placeholders::all + EIGEN_DEPRECATED static const last_t last = Eigen::last; // PLEASE use Eigen::last instead of Eigen::placeholders::last + EIGEN_DEPRECATED static const end_t end = Eigen::lastp1; // PLEASE use Eigen::lastp1 instead of Eigen::placeholders::end +} + +} // end namespace Eigen + +#endif // EIGEN_INDEXED_VIEW_HELPER_H diff --git a/Eigen/src/Core/util/IntegralConstant.h b/Eigen/src/Core/util/IntegralConstant.h new file mode 100644 index 000000000..945d426ea --- /dev/null +++ b/Eigen/src/Core/util/IntegralConstant.h @@ -0,0 +1,272 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_INTEGRAL_CONSTANT_H +#define EIGEN_INTEGRAL_CONSTANT_H + +namespace Eigen { + +namespace internal { + +template<int N> class FixedInt; +template<int N> class VariableAndFixedInt; + +/** \internal + * \class FixedInt + * + * This class embeds a compile-time integer \c N. + * + * It is similar to c++11 std::integral_constant<int,N> but with some additional features + * such as: + * - implicit conversion to int + * - arithmetic and some bitwise operators: -, +, *, /, %, &, | + * - c++98/14 compatibility with fix<N> and fix<N>() syntax to define integral constants. + * + * It is strongly discouraged to directly deal with this class FixedInt. Instances are expcected to + * be created by the user using Eigen::fix<N> or Eigen::fix<N>(). In C++98-11, the former syntax does + * not create a FixedInt<N> instance but rather a point to function that needs to be \em cleaned-up + * using the generic helper: + * \code + * internal::cleanup_index_type<T>::type + * internal::cleanup_index_type<T,DynamicKey>::type + * \endcode + * where T can a FixedInt<N>, a pointer to function FixedInt<N> (*)(), or numerous other integer-like representations. + * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values. + * + * For convenience, you can extract the compile-time value \c N in a generic way using the following helper: + * \code + * internal::get_fixed_value<T,DefaultVal>::value + * \endcode + * that will give you \c N if T equals FixedInt<N> or FixedInt<N> (*)(), and \c DefaultVal if T does not embed any compile-time value (e.g., T==int). + * + * \sa fix<N>, class VariableAndFixedInt + */ +template<int N> class FixedInt +{ +public: + static const int value = N; + EIGEN_CONSTEXPR operator int() const { return value; } + FixedInt() {} + FixedInt( VariableAndFixedInt<N> other) { + #ifndef EIGEN_INTERNAL_DEBUGGING + EIGEN_UNUSED_VARIABLE(other); + #endif + eigen_internal_assert(int(other)==N); + } + + FixedInt<-N> operator-() const { return FixedInt<-N>(); } + template<int M> + FixedInt<N+M> operator+( FixedInt<M>) const { return FixedInt<N+M>(); } + template<int M> + FixedInt<N-M> operator-( FixedInt<M>) const { return FixedInt<N-M>(); } + template<int M> + FixedInt<N*M> operator*( FixedInt<M>) const { return FixedInt<N*M>(); } + template<int M> + FixedInt<N/M> operator/( FixedInt<M>) const { return FixedInt<N/M>(); } + template<int M> + FixedInt<N%M> operator%( FixedInt<M>) const { return FixedInt<N%M>(); } + template<int M> + FixedInt<N|M> operator|( FixedInt<M>) const { return FixedInt<N|M>(); } + template<int M> + FixedInt<N&M> operator&( FixedInt<M>) const { return FixedInt<N&M>(); } + +#if EIGEN_HAS_CXX14_VARIABLE_TEMPLATES + // Needed in C++14 to allow fix<N>(): + FixedInt operator() () const { return *this; } + + VariableAndFixedInt<N> operator() (int val) const { return VariableAndFixedInt<N>(val); } +#else + FixedInt ( FixedInt<N> (*)() ) {} +#endif + +#if EIGEN_HAS_CXX11 + FixedInt(std::integral_constant<int,N>) {} +#endif +}; + +/** \internal + * \class VariableAndFixedInt + * + * This class embeds both a compile-time integer \c N and a runtime integer. + * Both values are supposed to be equal unless the compile-time value \c N has a special + * value meaning that the runtime-value should be used. Depending on the context, this special + * value can be either Eigen::Dynamic (for positive quantities) or Eigen::DynamicIndex (for + * quantities that can be negative). + * + * It is the return-type of the function Eigen::fix<N>(int), and most of the time this is the only + * way it is used. It is strongly discouraged to directly deal with instances of VariableAndFixedInt. + * Indeed, in order to write generic code, it is the responsibility of the callee to properly convert + * it to either a true compile-time quantity (i.e. a FixedInt<N>), or to a runtime quantity (e.g., an Index) + * using the following generic helper: + * \code + * internal::cleanup_index_type<T>::type + * internal::cleanup_index_type<T,DynamicKey>::type + * \endcode + * where T can be a template instantiation of VariableAndFixedInt or numerous other integer-like representations. + * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values. + * + * For convenience, you can also extract the compile-time value \c N using the following helper: + * \code + * internal::get_fixed_value<T,DefaultVal>::value + * \endcode + * that will give you \c N if T equals VariableAndFixedInt<N>, and \c DefaultVal if T does not embed any compile-time value (e.g., T==int). + * + * \sa fix<N>(int), class FixedInt + */ +template<int N> class VariableAndFixedInt +{ +public: + static const int value = N; + operator int() const { return m_value; } + VariableAndFixedInt(int val) { m_value = val; } +protected: + int m_value; +}; + +template<typename T, int Default=Dynamic> struct get_fixed_value { + static const int value = Default; +}; + +template<int N,int Default> struct get_fixed_value<FixedInt<N>,Default> { + static const int value = N; +}; + +#if !EIGEN_HAS_CXX14 +template<int N,int Default> struct get_fixed_value<FixedInt<N> (*)(),Default> { + static const int value = N; +}; +#endif + +template<int N,int Default> struct get_fixed_value<VariableAndFixedInt<N>,Default> { + static const int value = N ; +}; + +template<typename T, int N, int Default> +struct get_fixed_value<variable_if_dynamic<T,N>,Default> { + static const int value = N; +}; + +template<typename T> EIGEN_DEVICE_FUNC Index get_runtime_value(const T &x) { return x; } +#if !EIGEN_HAS_CXX14 +template<int N> EIGEN_DEVICE_FUNC Index get_runtime_value(FixedInt<N> (*)()) { return N; } +#endif + +// Cleanup integer/FixedInt/VariableAndFixedInt/etc types: + +// By default, no cleanup: +template<typename T, int DynamicKey=Dynamic, typename EnableIf=void> struct cleanup_index_type { typedef T type; }; + +// Convert any integral type (e.g., short, int, unsigned int, etc.) to Eigen::Index +template<typename T, int DynamicKey> struct cleanup_index_type<T,DynamicKey,typename internal::enable_if<internal::is_integral<T>::value>::type> { typedef Index type; }; + +#if !EIGEN_HAS_CXX14 +// In c++98/c++11, fix<N> is a pointer to function that we better cleanup to a true FixedInt<N>: +template<int N, int DynamicKey> struct cleanup_index_type<FixedInt<N> (*)(), DynamicKey> { typedef FixedInt<N> type; }; +#endif + +// If VariableAndFixedInt does not match DynamicKey, then we turn it to a pure compile-time value: +template<int N, int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<N>, DynamicKey> { typedef FixedInt<N> type; }; +// If VariableAndFixedInt matches DynamicKey, then we turn it to a pure runtime-value (aka Index): +template<int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<DynamicKey>, DynamicKey> { typedef Index type; }; + +#if EIGEN_HAS_CXX11 +template<int N, int DynamicKey> struct cleanup_index_type<std::integral_constant<int,N>, DynamicKey> { typedef FixedInt<N> type; }; +#endif + +} // end namespace internal + +#ifndef EIGEN_PARSED_BY_DOXYGEN + +#if EIGEN_HAS_CXX14_VARIABLE_TEMPLATES +template<int N> +static const internal::FixedInt<N> fix{}; +#else +template<int N> +inline internal::FixedInt<N> fix() { return internal::FixedInt<N>(); } + +// The generic typename T is mandatory. Otherwise, a code like fix<N> could refer to either the function above or this next overload. +// This way a code like fix<N> can only refer to the previous function. +template<int N,typename T> +inline internal::VariableAndFixedInt<N> fix(T val) { return internal::VariableAndFixedInt<N>(internal::convert_index<int>(val)); } +#endif + +#else // EIGEN_PARSED_BY_DOXYGEN + +/** \var fix<N>() + * \ingroup Core_Module + * + * This \em identifier permits to construct an object embedding a compile-time integer \c N. + * + * \tparam N the compile-time integer value + * + * It is typically used in conjunction with the Eigen::seq and Eigen::seqN functions to pass compile-time values to them: + * \code + * seqN(10,fix<4>,fix<-3>) // <=> [10 7 4 1] + * \endcode + * + * See also the function fix(int) to pass both a compile-time and runtime value. + * + * In c++14, it is implemented as: + * \code + * template<int N> static const internal::FixedInt<N> fix{}; + * \endcode + * where internal::FixedInt<N> is an internal template class similar to + * <a href="http://en.cppreference.com/w/cpp/types/integral_constant">\c std::integral_constant </a><tt> <int,N> </tt> + * Here, \c fix<N> is thus an object of type \c internal::FixedInt<N>. + * + * In c++98/11, it is implemented as a function: + * \code + * template<int N> inline internal::FixedInt<N> fix(); + * \endcode + * Here internal::FixedInt<N> is thus a pointer to function. + * + * If for some reason you want a true object in c++98 then you can write: \code fix<N>() \endcode which is also valid in c++14. + * + * \sa fix<N>(int), seq, seqN + */ +template<int N> +static const auto fix(); + +/** \fn fix<N>(int) + * \ingroup Core_Module + * + * This function returns an object embedding both a compile-time integer \c N, and a fallback runtime value \a val. + * + * \tparam N the compile-time integer value + * \param val the fallback runtime integer value + * + * This function is a more general version of the \ref fix identifier/function that can be used in template code + * where the compile-time value could turn out to actually mean "undefined at compile-time". For positive integers + * such as a size or a dimension, this case is identified by Eigen::Dynamic, whereas runtime signed integers + * (e.g., an increment/stride) are identified as Eigen::DynamicIndex. In such a case, the runtime value \a val + * will be used as a fallback. + * + * A typical use case would be: + * \code + * template<typename Derived> void foo(const MatrixBase<Derived> &mat) { + * const int N = Derived::RowsAtCompileTime==Dynamic ? Dynamic : Derived::RowsAtCompileTime/2; + * const int n = mat.rows()/2; + * ... mat( seqN(0,fix<N>(n) ) ...; + * } + * \endcode + * In this example, the function Eigen::seqN knows that the second argument is expected to be a size. + * If the passed compile-time value N equals Eigen::Dynamic, then the proxy object returned by fix will be dissmissed, and converted to an Eigen::Index of value \c n. + * Otherwise, the runtime-value \c n will be dissmissed, and the returned ArithmeticSequence will be of the exact same type as <tt> seqN(0,fix<N>) </tt>. + * + * \sa fix, seqN, class ArithmeticSequence + */ +template<int N> +static const auto fix(int val); + +#endif // EIGEN_PARSED_BY_DOXYGEN + +} // end namespace Eigen + +#endif // EIGEN_INTEGRAL_CONSTANT_H diff --git a/Eigen/src/Core/util/MKL_support.h b/Eigen/src/Core/util/MKL_support.h index 26b59669e..17963fad4 100755 --- a/Eigen/src/Core/util/MKL_support.h +++ b/Eigen/src/Core/util/MKL_support.h @@ -49,12 +49,17 @@ #define EIGEN_USE_LAPACKE #endif -#if defined(EIGEN_USE_MKL_VML) +#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL) #define EIGEN_USE_MKL #endif + #if defined EIGEN_USE_MKL -# include <mkl.h> +# if (!defined MKL_DIRECT_CALL) && (!defined EIGEN_MKL_NO_DIRECT_CALL) +# define MKL_DIRECT_CALL +# define MKL_DIRECT_CALL_JUST_SET +# endif +# include <mkl.h> /*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/ # ifndef INTEL_MKL_VERSION # undef EIGEN_USE_MKL /* INTEL_MKL_VERSION is not even defined on older versions */ @@ -68,6 +73,9 @@ # undef EIGEN_USE_MKL_VML # undef EIGEN_USE_LAPACKE_STRICT # undef EIGEN_USE_LAPACKE +# ifdef MKL_DIRECT_CALL_JUST_SET +# undef MKL_DIRECT_CALL +# endif # endif #endif @@ -108,6 +116,10 @@ #endif #endif +#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL) +#include "../../misc/blas.h" +#endif + namespace Eigen { typedef std::complex<double> dcomplex; @@ -121,8 +133,5 @@ typedef int BlasIndex; } // end namespace Eigen -#if defined(EIGEN_USE_BLAS) -#include "../../misc/blas.h" -#endif #endif // EIGEN_MKL_SUPPORT_H diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 38d6ddb9a..986c3d44d 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -11,19 +11,56 @@ #ifndef EIGEN_MACROS_H #define EIGEN_MACROS_H +//------------------------------------------------------------------------------------------ +// Eigen version and basic defaults +//------------------------------------------------------------------------------------------ + #define EIGEN_WORLD_VERSION 3 -#define EIGEN_MAJOR_VERSION 3 -#define EIGEN_MINOR_VERSION 4 +#define EIGEN_MAJOR_VERSION 4 +#define EIGEN_MINOR_VERSION 0 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ EIGEN_MINOR_VERSION>=z)))) +#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR +#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor +#else +#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor +#endif + +#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t +#endif + +// Upperbound on the C++ version to use. +// Expected values are 03, 11, 14, 17, etc. +// By default, let's use an arbitrarily large C++ version. +#ifndef EIGEN_MAX_CPP_VER +#define EIGEN_MAX_CPP_VER 99 +#endif + +/** Allows to disable some optimizations which might affect the accuracy of the result. + * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. + * They currently include: + * - single precision ArrayBase::sin() and ArrayBase::cos() for SSE and AVX vectorization. + */ +#ifndef EIGEN_FAST_MATH +#define EIGEN_FAST_MATH 1 +#endif + +#ifndef EIGEN_STACK_ALLOCATION_LIMIT +// 131072 == 128 KB +#define EIGEN_STACK_ALLOCATION_LIMIT 131072 +#endif + +//------------------------------------------------------------------------------------------ // Compiler identification, EIGEN_COMP_* +//------------------------------------------------------------------------------------------ /// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC #ifdef __GNUC__ - #define EIGEN_COMP_GNUC 1 + #define EIGEN_COMP_GNUC (__GNUC__*10+__GNUC_MINOR__) #else #define EIGEN_COMP_GNUC 0 #endif @@ -35,6 +72,12 @@ #define EIGEN_COMP_CLANG 0 #endif +/// \internal EIGEN_COMP_CASTXML set to 1 if being preprocessed by CastXML +#if defined(__castxml__) + #define EIGEN_COMP_CASTXML 1 +#else + #define EIGEN_COMP_CASTXML 0 +#endif /// \internal EIGEN_COMP_LLVM set to 1 if the compiler backend is llvm #if defined(__llvm__) @@ -71,14 +114,44 @@ #define EIGEN_COMP_MSVC 0 #endif +#if defined(__NVCC__) +#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9) + #define EIGEN_COMP_NVCC ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100)) +#elif defined(__CUDACC_VER__) + #define EIGEN_COMP_NVCC __CUDACC_VER__ +#else + #error "NVCC did not define compiler version." +#endif +#else + #define EIGEN_COMP_NVCC 0 +#endif + // For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC: -// name ver MSC_VER -// 2008 9 1500 -// 2010 10 1600 -// 2012 11 1700 -// 2013 12 1800 -// 2015 14 1900 -// "15" 15 1900 +// name ver MSC_VER +// 2008 9 1500 +// 2010 10 1600 +// 2012 11 1700 +// 2013 12 1800 +// 2015 14 1900 +// "15" 15 1900 +// 2017-14.1 15.0 1910 +// 2017-14.11 15.3 1911 +// 2017-14.12 15.5 1912 +// 2017-14.13 15.6 1913 +// 2017-14.14 15.7 1914 + +/// \internal EIGEN_COMP_MSVC_LANG set to _MSVC_LANG if the compiler is Microsoft Visual C++, 0 otherwise. +#if defined(_MSVC_LANG) + #define EIGEN_COMP_MSVC_LANG _MSVC_LANG +#else + #define EIGEN_COMP_MSVC_LANG 0 +#endif + +// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC_LANG: +// MSVC option Standard MSVC_LANG +// /std:c++14 (default as of VS 2019) C++14 201402L +// /std:c++17 C++17 201703L +// /std:c++latest >C++17 >201703L /// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC or clang-cl #if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG) @@ -87,16 +160,21 @@ #define EIGEN_COMP_MSVC_STRICT 0 #endif -/// \internal EIGEN_COMP_IBM set to 1 if the compiler is IBM XL C++ -#if defined(__IBMCPP__) || defined(__xlc__) - #define EIGEN_COMP_IBM 1 +/// \internal EIGEN_COMP_IBM set to xlc version if the compiler is IBM XL C++ +// XLC version +// 3.1 0x0301 +// 4.5 0x0405 +// 5.0 0x0500 +// 12.1 0x0C01 +#if defined(__IBMCPP__) || defined(__xlc__) || defined(__ibmxl__) + #define EIGEN_COMP_IBM __xlC__ #else #define EIGEN_COMP_IBM 0 #endif -/// \internal EIGEN_COMP_PGI set to 1 if the compiler is Portland Group Compiler +/// \internal EIGEN_COMP_PGI set to PGI version if the compiler is Portland Group Compiler #if defined(__PGI) - #define EIGEN_COMP_PGI 1 + #define EIGEN_COMP_PGI (__PGIC__*100+__PGIC_MINOR__) #else #define EIGEN_COMP_PGI 0 #endif @@ -108,7 +186,7 @@ #define EIGEN_COMP_ARM 0 #endif -/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler +/// \internal EIGEN_COMP_EMSCRIPTEN set to 1 if the compiler is Emscripten Compiler #if defined(__EMSCRIPTEN__) #define EIGEN_COMP_EMSCRIPTEN 1 #else @@ -142,9 +220,13 @@ #endif + +//------------------------------------------------------------------------------------------ // Architecture identification, EIGEN_ARCH_* +//------------------------------------------------------------------------------------------ + -#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64) +#if defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC)) || defined(__amd64) #define EIGEN_ARCH_x86_64 1 #else #define EIGEN_ARCH_x86_64 0 @@ -170,18 +252,61 @@ #endif /// \internal EIGEN_ARCH_ARM64 set to 1 if the architecture is ARM64 -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) #define EIGEN_ARCH_ARM64 1 #else #define EIGEN_ARCH_ARM64 0 #endif +/// \internal EIGEN_ARCH_ARM_OR_ARM64 set to 1 if the architecture is ARM or ARM64 #if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64 #define EIGEN_ARCH_ARM_OR_ARM64 1 #else #define EIGEN_ARCH_ARM_OR_ARM64 0 #endif +/// \internal EIGEN_ARCH_ARMV8 set to 1 if the architecture is armv8 or greater. +#if EIGEN_ARCH_ARM_OR_ARM64 && defined(__ARM_ARCH) && __ARM_ARCH >= 8 +#define EIGEN_ARCH_ARMV8 1 +#else +#define EIGEN_ARCH_ARMV8 0 +#endif + + +/// \internal EIGEN_HAS_ARM64_FP16 set to 1 if the architecture provides an IEEE +/// compliant Arm fp16 type +#if EIGEN_ARCH_ARM64 + #ifndef EIGEN_HAS_ARM64_FP16 + #if defined(__ARM_FP16_FORMAT_IEEE) + #define EIGEN_HAS_ARM64_FP16 1 + #else + #define EIGEN_HAS_ARM64_FP16 0 + #endif + #endif +#endif + +/// \internal EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC set to 1 if the architecture +/// supports Neon vector intrinsics for fp16. +#if EIGEN_ARCH_ARM64 + #ifndef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 1 + #else + #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 0 + #endif + #endif +#endif + +/// \internal EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC set to 1 if the architecture +/// supports Neon scalar intrinsics for fp16. +#if EIGEN_ARCH_ARM64 + #ifndef EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC + #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) + #define EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC 1 + #endif + #endif +#endif + /// \internal EIGEN_ARCH_MIPS set to 1 if the architecture is MIPS #if defined(__mips__) || defined(__mips) #define EIGEN_ARCH_MIPS 1 @@ -212,7 +337,9 @@ +//------------------------------------------------------------------------------------------ // Operating system identification, EIGEN_OS_* +//------------------------------------------------------------------------------------------ /// \internal EIGEN_OS_UNIX set to 1 if the OS is a unix variant #if defined(__unix__) || defined(__unix) @@ -299,9 +426,17 @@ #define EIGEN_OS_WIN_STRICT 0 #endif -/// \internal EIGEN_OS_SUN set to 1 if the OS is SUN +/// \internal EIGEN_OS_SUN set to __SUNPRO_C if the OS is SUN +// compiler solaris __SUNPRO_C +// version studio +// 5.7 10 0x570 +// 5.8 11 0x580 +// 5.9 12 0x590 +// 5.10 12.1 0x5100 +// 5.11 12.2 0x5110 +// 5.12 12.3 0x5120 #if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__)) - #define EIGEN_OS_SUN 1 + #define EIGEN_OS_SUN __SUNPRO_C #else #define EIGEN_OS_SUN 0 #endif @@ -314,26 +449,137 @@ #endif +//------------------------------------------------------------------------------------------ +// Detect GPU compilers and architectures +//------------------------------------------------------------------------------------------ -#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG - // see bug 89 - #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0 -#else - #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1 +// NVCC is not supported as the target platform for HIPCC +// Note that this also makes EIGEN_CUDACC and EIGEN_HIPCC mutually exclusive +#if defined(__NVCC__) && defined(__HIPCC__) + #error "NVCC as the target platform for HIPCC is currently not supported." #endif -// This macro can be used to prevent from macro expansion, e.g.: -// std::max EIGEN_NOT_A_MACRO(a,b) -#define EIGEN_NOT_A_MACRO +#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA) + // Means the compiler is either nvcc or clang with CUDA enabled + #define EIGEN_CUDACC __CUDACC__ +#endif -#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR -#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor +#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA) + // Means we are generating code for the device + #define EIGEN_CUDA_ARCH __CUDA_ARCH__ +#endif + +#if defined(EIGEN_CUDACC) +#include <cuda.h> + #define EIGEN_CUDA_SDK_VER (CUDA_VERSION * 10) #else -#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor + #define EIGEN_CUDA_SDK_VER 0 #endif -#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t +#if defined(__HIPCC__) && !defined(EIGEN_NO_HIP) + // Means the compiler is HIPCC (analogous to EIGEN_CUDACC, but for HIP) + #define EIGEN_HIPCC __HIPCC__ + + // We need to include hip_runtime.h here because it pulls in + // ++ hip_common.h which contains the define for __HIP_DEVICE_COMPILE__ + // ++ host_defines.h which contains the defines for the __host__ and __device__ macros + #include <hip/hip_runtime.h> + + #if defined(__HIP_DEVICE_COMPILE__) + // analogous to EIGEN_CUDA_ARCH, but for HIP + #define EIGEN_HIP_DEVICE_COMPILE __HIP_DEVICE_COMPILE__ + #endif + + // For HIP (ROCm 3.5 and higher), we need to explicitly set the launch_bounds attribute + // value to 1024. The compiler assigns a default value of 256 when the attribute is not + // specified. This results in failures on the HIP platform, for cases when a GPU kernel + // without an explicit launch_bounds attribute is called with a threads_per_block value + // greater than 256. + // + // This is a regression in functioanlity and is expected to be fixed within the next + // couple of ROCm releases (compiler will go back to using 1024 value as the default) + // + // In the meantime, we will use a "only enabled for HIP" macro to set the launch_bounds + // attribute. + + #define EIGEN_HIP_LAUNCH_BOUNDS_1024 __launch_bounds__(1024) + +#endif + +#if !defined(EIGEN_HIP_LAUNCH_BOUNDS_1024) +#define EIGEN_HIP_LAUNCH_BOUNDS_1024 +#endif // !defined(EIGEN_HIP_LAUNCH_BOUNDS_1024) + +// Unify CUDA/HIPCC + +#if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC) +// +// If either EIGEN_CUDACC or EIGEN_HIPCC is defined, then define EIGEN_GPUCC +// +#define EIGEN_GPUCC +// +// EIGEN_HIPCC implies the HIP compiler and is used to tweak Eigen code for use in HIP kernels +// EIGEN_CUDACC implies the CUDA compiler and is used to tweak Eigen code for use in CUDA kernels +// +// In most cases the same tweaks are required to the Eigen code to enable in both the HIP and CUDA kernels. +// For those cases, the corresponding code should be guarded with +// #if defined(EIGEN_GPUCC) +// instead of +// #if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC) +// +// For cases where the tweak is specific to HIP, the code should be guarded with +// #if defined(EIGEN_HIPCC) +// +// For cases where the tweak is specific to CUDA, the code should be guarded with +// #if defined(EIGEN_CUDACC) +// +#endif + +#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) +// +// If either EIGEN_CUDA_ARCH or EIGEN_HIP_DEVICE_COMPILE is defined, then define EIGEN_GPU_COMPILE_PHASE +// +#define EIGEN_GPU_COMPILE_PHASE +// +// GPU compilers (HIPCC, NVCC) typically do two passes over the source code, +// + one to compile the source for the "host" (ie CPU) +// + another to compile the source for the "device" (ie. GPU) +// +// Code that needs to enabled only during the either the "host" or "device" compilation phase +// needs to be guarded with a macro that indicates the current compilation phase +// +// EIGEN_HIP_DEVICE_COMPILE implies the device compilation phase in HIP +// EIGEN_CUDA_ARCH implies the device compilation phase in CUDA +// +// In most cases, the "host" / "device" specific code is the same for both HIP and CUDA +// For those cases, the code should be guarded with +// #if defined(EIGEN_GPU_COMPILE_PHASE) +// instead of +// #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) +// +// For cases where the tweak is specific to HIP, the code should be guarded with +// #if defined(EIGEN_HIP_DEVICE_COMPILE) +// +// For cases where the tweak is specific to CUDA, the code should be guarded with +// #if defined(EIGEN_CUDA_ARCH) +// +#endif + +#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) +// EIGEN_USE_SYCL is a user-defined macro while __SYCL_DEVICE_ONLY__ is a compiler-defined macro. +// In most cases we want to check if both macros are defined which can be done using the define below. +#define SYCL_DEVICE_ONLY +#endif + +//------------------------------------------------------------------------------------------ +// Detect Compiler/Architecture/OS specific features +//------------------------------------------------------------------------------------------ + +#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG + // see bug 89 + #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0 +#else + #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1 #endif // Cross compiler wrapper around LLVM's __has_builtin @@ -349,26 +595,79 @@ # define __has_feature(x) 0 #endif -// Upperbound on the C++ version to use. -// Expected values are 03, 11, 14, 17, etc. -// By default, let's use an arbitrarily large C++ version. -#ifndef EIGEN_MAX_CPP_VER -#define EIGEN_MAX_CPP_VER 99 +// Some old compilers do not support template specializations like: +// template<typename T,int N> void foo(const T x[N]); +#if !( EIGEN_COMP_CLANG && ( (EIGEN_COMP_CLANG<309) \ + || (defined(__apple_build_version__) && (__apple_build_version__ < 9000000))) \ + || EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<49) +#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 1 +#else +#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 0 #endif -#if EIGEN_MAX_CPP_VER>=11 && (defined(__cplusplus) && (__cplusplus >= 201103L) || EIGEN_COMP_MSVC >= 1900) +// The macro EIGEN_CPLUSPLUS is a replacement for __cplusplus/_MSVC_LANG that +// works for both platforms, indicating the C++ standard version number. +// +// With MSVC, without defining /Zc:__cplusplus, the __cplusplus macro will +// report 199711L regardless of the language standard specified via /std. +// We need to rely on _MSVC_LANG instead, which is only available after +// VS2015.3. +#if EIGEN_COMP_MSVC_LANG > 0 +#define EIGEN_CPLUSPLUS EIGEN_COMP_MSVC_LANG +#elif EIGEN_COMP_MSVC >= 1900 +#define EIGEN_CPLUSPLUS 201103L +#elif defined(__cplusplus) +#define EIGEN_CPLUSPLUS __cplusplus +#else +#define EIGEN_CPLUSPLUS 0 +#endif + +// The macro EIGEN_COMP_CXXVER defines the c++ verson expected by the compiler. +// For instance, if compiling with gcc and -std=c++17, then EIGEN_COMP_CXXVER +// is defined to 17. +#if EIGEN_CPLUSPLUS > 201703L + #define EIGEN_COMP_CXXVER 20 +#elif EIGEN_CPLUSPLUS > 201402L + #define EIGEN_COMP_CXXVER 17 +#elif EIGEN_CPLUSPLUS > 201103L + #define EIGEN_COMP_CXXVER 14 +#elif EIGEN_CPLUSPLUS >= 201103L + #define EIGEN_COMP_CXXVER 11 +#else + #define EIGEN_COMP_CXXVER 03 +#endif + +#ifndef EIGEN_HAS_CXX14_VARIABLE_TEMPLATES + #if defined(__cpp_variable_templates) && __cpp_variable_templates >= 201304 && EIGEN_MAX_CPP_VER>=14 + #define EIGEN_HAS_CXX14_VARIABLE_TEMPLATES 1 + #else + #define EIGEN_HAS_CXX14_VARIABLE_TEMPLATES 0 + #endif +#endif + + +// The macros EIGEN_HAS_CXX?? defines a rough estimate of available c++ features +// but in practice we should not rely on them but rather on the availabilty of +// individual features as defined later. +// This is why there is no EIGEN_HAS_CXX17. +// FIXME: get rid of EIGEN_HAS_CXX14 and maybe even EIGEN_HAS_CXX11. +#if EIGEN_MAX_CPP_VER>=11 && EIGEN_COMP_CXXVER>=11 #define EIGEN_HAS_CXX11 1 #else #define EIGEN_HAS_CXX11 0 #endif +#if EIGEN_MAX_CPP_VER>=14 && EIGEN_COMP_CXXVER>=14 +#define EIGEN_HAS_CXX14 1 +#else +#define EIGEN_HAS_CXX14 0 +#endif // Do we support r-value references? #ifndef EIGEN_HAS_RVALUE_REFERENCES #if EIGEN_MAX_CPP_VER>=11 && \ (__has_feature(cxx_rvalue_references) || \ - (defined(__cplusplus) && __cplusplus >= 201103L) || \ - (EIGEN_COMP_MSVC >= 1600)) + (EIGEN_COMP_CXXVER >= 11) || (EIGEN_COMP_MSVC >= 1600)) #define EIGEN_HAS_RVALUE_REFERENCES 1 #else #define EIGEN_HAS_RVALUE_REFERENCES 0 @@ -376,11 +675,14 @@ #endif // Does the compiler support C99? +// Need to include <cmath> to make sure _GLIBCXX_USE_C99 gets defined +#include <cmath> #ifndef EIGEN_HAS_C99_MATH #if EIGEN_MAX_CPP_VER>=11 && \ ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ - || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))) + || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \ + || (EIGEN_COMP_MSVC >= 1900) || defined(SYCL_DEVICE_ONLY)) #define EIGEN_HAS_C99_MATH 1 #else #define EIGEN_HAS_C99_MATH 0 @@ -388,21 +690,73 @@ #endif // Does the compiler support result_of? +// result_of was deprecated in c++17 and removed in c++ 20 #ifndef EIGEN_HAS_STD_RESULT_OF -#if EIGEN_MAX_CPP_VER>=11 && ((__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L))) +#if EIGEN_HAS_CXX11 && EIGEN_COMP_CXXVER < 17 #define EIGEN_HAS_STD_RESULT_OF 1 #else #define EIGEN_HAS_STD_RESULT_OF 0 #endif #endif +// Does the compiler support std::hash? +#ifndef EIGEN_HAS_STD_HASH +// The std::hash struct is defined in C++11 but is not labelled as a __device__ +// function and is not constexpr, so cannot be used on device. +#if EIGEN_HAS_CXX11 && !defined(EIGEN_GPU_COMPILE_PHASE) +#define EIGEN_HAS_STD_HASH 1 +#else +#define EIGEN_HAS_STD_HASH 0 +#endif +#endif // EIGEN_HAS_STD_HASH + +#ifndef EIGEN_HAS_STD_INVOKE_RESULT +#if EIGEN_MAX_CPP_VER >= 17 && EIGEN_COMP_CXXVER >= 17 +#define EIGEN_HAS_STD_INVOKE_RESULT 1 +#else +#define EIGEN_HAS_STD_INVOKE_RESULT 0 +#endif +#endif + +#ifndef EIGEN_HAS_ALIGNAS +#if EIGEN_MAX_CPP_VER>=11 && EIGEN_HAS_CXX11 && \ + ( __has_feature(cxx_alignas) \ + || EIGEN_HAS_CXX14 \ + || (EIGEN_COMP_MSVC >= 1800) \ + || (EIGEN_GNUC_AT_LEAST(4,8)) \ + || (EIGEN_COMP_CLANG>=305) \ + || (EIGEN_COMP_ICC>=1500) \ + || (EIGEN_COMP_PGI>=1500) \ + || (EIGEN_COMP_SUNCC>=0x5130)) +#define EIGEN_HAS_ALIGNAS 1 +#else +#define EIGEN_HAS_ALIGNAS 0 +#endif +#endif + +// Does the compiler support type_traits? +// - full support of type traits was added only to GCC 5.1.0. +// - 20150626 corresponds to the last release of 4.x libstdc++ +#ifndef EIGEN_HAS_TYPE_TRAITS +#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_HAS_CXX11 || EIGEN_COMP_MSVC >= 1700) \ + && ((!EIGEN_COMP_GNUC_STRICT) || EIGEN_GNUC_AT_LEAST(5, 1)) \ + && ((!defined(__GLIBCXX__)) || __GLIBCXX__ > 20150626) +#define EIGEN_HAS_TYPE_TRAITS 1 +#define EIGEN_INCLUDE_TYPE_TRAITS +#else +#define EIGEN_HAS_TYPE_TRAITS 0 +#endif +#endif + // Does the compiler support variadic templates? #ifndef EIGEN_HAS_VARIADIC_TEMPLATES -#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ - && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) +#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_COMP_CXXVER >= 11) \ + && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_COMP_NVCC >= 80000) ) // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: // this prevents nvcc from crashing when compiling Eigen on Tegra X1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1 +#elif EIGEN_MAX_CPP_VER>=11 && (EIGEN_COMP_CXXVER >= 11) && defined(SYCL_DEVICE_ONLY) +#define EIGEN_HAS_VARIADIC_TEMPLATES 1 #else #define EIGEN_HAS_VARIADIC_TEMPLATES 0 #endif @@ -410,27 +764,33 @@ // Does the compiler fully support const expressions? (as in c++14) #ifndef EIGEN_HAS_CONSTEXPR + #if defined(EIGEN_CUDACC) + // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above + #if EIGEN_MAX_CPP_VER>=14 && (EIGEN_COMP_CXXVER >= 11 && (EIGEN_COMP_CLANG || EIGEN_COMP_NVCC >= 70500)) + #define EIGEN_HAS_CONSTEXPR 1 + #endif + #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (EIGEN_COMP_CXXVER >= 14) || \ + (EIGEN_GNUC_AT_LEAST(4,8) && (EIGEN_COMP_CXXVER >= 11)) || \ + (EIGEN_COMP_CLANG >= 306 && (EIGEN_COMP_CXXVER >= 11))) + #define EIGEN_HAS_CONSTEXPR 1 + #endif -#ifdef __CUDACC__ -// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above -#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) - #define EIGEN_HAS_CONSTEXPR 1 -#endif -#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ - (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L))) -#define EIGEN_HAS_CONSTEXPR 1 -#endif + #ifndef EIGEN_HAS_CONSTEXPR + #define EIGEN_HAS_CONSTEXPR 0 + #endif -#ifndef EIGEN_HAS_CONSTEXPR -#define EIGEN_HAS_CONSTEXPR 0 -#endif +#endif // EIGEN_HAS_CONSTEXPR +#if EIGEN_HAS_CONSTEXPR +#define EIGEN_CONSTEXPR constexpr +#else +#define EIGEN_CONSTEXPR #endif // Does the compiler support C++11 math? // Let's be conservative and enable the default C++11 implementation only if we are sure it exists #ifndef EIGEN_HAS_CXX11_MATH - #if EIGEN_MAX_CPP_VER>=11 && ((__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ + #if EIGEN_MAX_CPP_VER>=11 && ((EIGEN_COMP_CXXVER > 11) || (EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC)) #define EIGEN_HAS_CXX11_MATH 1 #else @@ -441,9 +801,8 @@ // Does the compiler support proper C++11 containers? #ifndef EIGEN_HAS_CXX11_CONTAINERS #if EIGEN_MAX_CPP_VER>=11 && \ - ((__cplusplus > 201103L) \ - || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \ - || EIGEN_COMP_MSVC >= 1900) + ((EIGEN_COMP_CXXVER > 11) \ + || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC>=1400))) #define EIGEN_HAS_CXX11_CONTAINERS 1 #else #define EIGEN_HAS_CXX11_CONTAINERS 0 @@ -454,24 +813,88 @@ #ifndef EIGEN_HAS_CXX11_NOEXCEPT #if EIGEN_MAX_CPP_VER>=11 && \ (__has_feature(cxx_noexcept) \ - || (__cplusplus > 201103L) \ - || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \ - || EIGEN_COMP_MSVC >= 1900) + || (EIGEN_COMP_CXXVER > 11) \ + || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC>=1400))) #define EIGEN_HAS_CXX11_NOEXCEPT 1 #else #define EIGEN_HAS_CXX11_NOEXCEPT 0 #endif #endif -/** Allows to disable some optimizations which might affect the accuracy of the result. - * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. - * They currently include: - * - single precision ArrayBase::sin() and ArrayBase::cos() for SSE and AVX vectorization. - */ -#ifndef EIGEN_FAST_MATH -#define EIGEN_FAST_MATH 1 +#ifndef EIGEN_HAS_CXX11_ATOMIC + #if EIGEN_MAX_CPP_VER>=11 && \ + (__has_feature(cxx_atomic) \ + || (EIGEN_COMP_CXXVER > 11) \ + || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_MSVC==0 || EIGEN_COMP_MSVC >= 1700))) + #define EIGEN_HAS_CXX11_ATOMIC 1 + #else + #define EIGEN_HAS_CXX11_ATOMIC 0 + #endif +#endif + +#ifndef EIGEN_HAS_CXX11_OVERRIDE_FINAL + #if EIGEN_MAX_CPP_VER>=11 && \ + (EIGEN_COMP_CXXVER >= 11 || EIGEN_COMP_MSVC >= 1700) + #define EIGEN_HAS_CXX11_OVERRIDE_FINAL 1 + #else + #define EIGEN_HAS_CXX11_OVERRIDE_FINAL 0 + #endif +#endif + +// NOTE: the required Apple's clang version is very conservative +// and it could be that XCode 9 works just fine. +// NOTE: the MSVC version is based on https://en.cppreference.com/w/cpp/compiler_support +// and not tested. +#ifndef EIGEN_HAS_CXX17_OVERALIGN +#if EIGEN_MAX_CPP_VER>=17 && EIGEN_COMP_CXXVER>=17 && ( \ + (EIGEN_COMP_MSVC >= 1912) \ + || (EIGEN_GNUC_AT_LEAST(7,0)) \ + || ((!defined(__apple_build_version__)) && (EIGEN_COMP_CLANG>=500)) \ + || (( defined(__apple_build_version__)) && (__apple_build_version__>=10000000)) \ + ) +#define EIGEN_HAS_CXX17_OVERALIGN 1 +#else +#define EIGEN_HAS_CXX17_OVERALIGN 0 +#endif +#endif + +#if defined(EIGEN_CUDACC) && EIGEN_HAS_CONSTEXPR + // While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules + #if defined(__NVCC__) + // nvcc considers constexpr functions as __host__ __device__ with the option --expt-relaxed-constexpr + #ifdef __CUDACC_RELAXED_CONSTEXPR__ + #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC + #endif + #elif defined(__clang__) && defined(__CUDA__) && __has_feature(cxx_relaxed_constexpr) + // clang++ always considers constexpr functions as implicitly __host__ __device__ + #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC + #endif #endif +// Does the compiler support the __int128 and __uint128_t extensions for 128-bit +// integer arithmetic? +// +// Clang and GCC define __SIZEOF_INT128__ when these extensions are supported, +// but we avoid using them in certain cases: +// +// * Building using Clang for Windows, where the Clang runtime library has +// 128-bit support only on LP64 architectures, but Windows is LLP64. +#ifndef EIGEN_HAS_BUILTIN_INT128 +#if defined(__SIZEOF_INT128__) && !(EIGEN_OS_WIN && EIGEN_COMP_CLANG) +#define EIGEN_HAS_BUILTIN_INT128 1 +#else +#define EIGEN_HAS_BUILTIN_INT128 0 +#endif +#endif + +//------------------------------------------------------------------------------------------ +// Preprocessor programming helpers +//------------------------------------------------------------------------------------------ + +// This macro can be used to prevent from macro expansion, e.g.: +// std::max EIGEN_NOT_A_MACRO(a,b) +#define EIGEN_NOT_A_MACRO + #define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl; // concatenate two tokens @@ -487,11 +910,13 @@ // EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC, // but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline // but GCC is still doing fine with just inline. -#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC +#ifndef EIGEN_STRONG_INLINE +#if (EIGEN_COMP_MSVC || EIGEN_COMP_ICC) && !defined(EIGEN_GPUCC) #define EIGEN_STRONG_INLINE __forceinline #else #define EIGEN_STRONG_INLINE inline #endif +#endif // EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible // attribute to maximize inlining. This should only be used when really necessary: in particular, @@ -501,7 +926,7 @@ // Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const' // : function body not available // See also bug 1367 -#if EIGEN_GNUC_AT_LEAST(4,2) +#if EIGEN_GNUC_AT_LEAST(4,2) && !defined(SYCL_DEVICE_ONLY) #define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline #else #define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE @@ -521,12 +946,43 @@ #define EIGEN_PERMISSIVE_EXPR #endif +// GPU stuff + +// Disable some features when compiling with GPU compilers (NVCC/clang-cuda/SYCL/HIPCC) +#if defined(EIGEN_CUDACC) || defined(SYCL_DEVICE_ONLY) || defined(EIGEN_HIPCC) + // Do not try asserts on device code + #ifndef EIGEN_NO_DEBUG + #define EIGEN_NO_DEBUG + #endif + + #ifdef EIGEN_INTERNAL_DEBUGGING + #undef EIGEN_INTERNAL_DEBUGGING + #endif + + #ifdef EIGEN_EXCEPTIONS + #undef EIGEN_EXCEPTIONS + #endif +#endif + +#if defined(SYCL_DEVICE_ONLY) + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif + #define EIGEN_DEVICE_FUNC __attribute__((flatten)) __attribute__((always_inline)) +// All functions callable from CUDA/HIP code must be qualified with __device__ +#elif defined(EIGEN_GPUCC) + #define EIGEN_DEVICE_FUNC __host__ __device__ +#else + #define EIGEN_DEVICE_FUNC +#endif + + // this macro allows to get rid of linking errors about multiply defined functions. // - static is not very good because it prevents definitions from different object files to be merged. // So static causes the resulting linked executable to be bloated with multiple copies of the same function. // - inline is not perfect either as it unwantedly hints the compiler toward inlining the function. -#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline +#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC +#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC inline #ifdef NDEBUG # ifndef EIGEN_NO_DEBUG @@ -536,7 +992,11 @@ // eigen_plain_assert is where we implement the workaround for the assert() bug in GCC <= 4.3, see bug 89 #ifdef EIGEN_NO_DEBUG - #define eigen_plain_assert(x) + #ifdef SYCL_DEVICE_ONLY // used to silence the warning on SYCL device + #define eigen_plain_assert(x) EIGEN_UNUSED_VARIABLE(x) + #else + #define eigen_plain_assert(x) + #endif #else #if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO namespace Eigen { @@ -610,7 +1070,7 @@ // Suppresses 'unused variable' warnings. namespace Eigen { namespace internal { - template<typename T> EIGEN_DEVICE_FUNC void ignore_unused_variable(const T&) {} + template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ignore_unused_variable(const T&) {} } } #define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var); @@ -624,169 +1084,75 @@ namespace Eigen { #endif -//------------------------------------------------------------------------------------------ -// Static and dynamic alignment control +// Acts as a barrier preventing operations involving `X` from crossing. This +// occurs, for example, in the fast rounding trick where a magic constant is +// added then subtracted, which is otherwise compiled away with -ffast-math. // -// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES -// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively. -// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not, -// a default value is automatically computed based on architecture, compiler, and OS. -// -// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX} -// to be used to declare statically aligned buffers. -//------------------------------------------------------------------------------------------ - - -/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements. - * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled, - * so that vectorization doesn't affect binary compatibility. - * - * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link - * vectorized and non-vectorized code. - */ -#if (defined __CUDACC__) - #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n) -#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM - #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) -#elif EIGEN_COMP_MSVC - #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n)) -#elif EIGEN_COMP_SUNCC - // FIXME not sure about this one: - #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) -#else - #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler -#endif - -// If the user explicitly disable vectorization, then we also disable alignment -#if defined(EIGEN_DONT_VECTORIZE) - #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0 -#elif defined(EIGEN_VECTORIZE_AVX512) - // 64 bytes static alignmeent is preferred only if really required - #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64 -#elif defined(__AVX__) - // 32 bytes static alignmeent is preferred only if really required - #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32 -#else - #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16 -#endif - - -// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense -#define EIGEN_MIN_ALIGN_BYTES 16 - -// Defined the boundary (in bytes) on which the data needs to be aligned. Note -// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be -// aligned at all regardless of the value of this #define. - -#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 -#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY. -#endif - -// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprectated -// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0 -#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN) - #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES - #undef EIGEN_MAX_STATIC_ALIGN_BYTES - #endif - #define EIGEN_MAX_STATIC_ALIGN_BYTES 0 -#endif - -#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES - - // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES - - // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable - // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always - // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in - // certain common platform (compiler+architecture combinations) to avoid these problems. - // Only static alignment is really problematic (relies on nonstandard compiler extensions), - // try to keep heap alignment even when we have to disable static alignment. - #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64) - #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 - #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6) - // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support. - // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use. - // 4.8 and newer seem definitely unaffected. - #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 - #else - #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 - #endif - - // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX - #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \ - && !EIGEN_GCC3_OR_OLDER \ - && !EIGEN_COMP_SUNCC \ - && !EIGEN_OS_QNX - #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 - #else - #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 - #endif - - #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT - #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES +// See bug 1674 +#if !defined(EIGEN_OPTIMIZATION_BARRIER) + #if EIGEN_COMP_GNUC + // According to https://gcc.gnu.org/onlinedocs/gcc/Constraints.html: + // X: Any operand whatsoever. + // r: A register operand is allowed provided that it is in a general + // register. + // g: Any register, memory or immediate integer operand is allowed, except + // for registers that are not general registers. + // w: (AArch32/AArch64) Floating point register, Advanced SIMD vector + // register or SVE vector register. + // x: (SSE) Any SSE register. + // (AArch64) Like w, but restricted to registers 0 to 15 inclusive. + // v: (PowerPC) An Altivec vector register. + // wa:(PowerPC) A VSX register. + // + // "X" (uppercase) should work for all cases, though this seems to fail for + // some versions of GCC for arm/aarch64 with + // "error: inconsistent operand constraints in an 'asm'" + // Clang x86_64/arm/aarch64 seems to require "g" to support both scalars and + // vectors, otherwise + // "error: non-trivial scalar-to-vector conversion, possible invalid + // constraint for vector type" + // + // GCC for ppc64le generates an internal compiler error with x/X/g. + // GCC for AVX generates an internal compiler error with X. + // + // Tested on icc/gcc/clang for sse, avx, avx2, avx512dq + // gcc for arm, aarch64, + // gcc for ppc64le, + // both vectors and scalars. + // + // Note that this is restricted to plain types - this will not work + // directly for std::complex<T>, Eigen::half, Eigen::bfloat16. For these, + // you will need to apply to the underlying POD type. + #if EIGEN_ARCH_PPC && EIGEN_COMP_GNUC_STRICT + // This seems to be broken on clang. Packet4f is loaded into a single + // register rather than a vector, zeroing out some entries. Integer + // types also generate a compile error. + // General, Altivec, VSX. + #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v,wa" (X)); + #elif EIGEN_ARCH_ARM_OR_ARM64 + // General, NEON. + #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+g,w" (X)); + #elif EIGEN_ARCH_i386_OR_x86_64 + // General, SSE. + #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+g,x" (X)); + #else + // Not implemented for other architectures. + #define EIGEN_OPTIMIZATION_BARRIER(X) + #endif #else - #define EIGEN_MAX_STATIC_ALIGN_BYTES 0 - #endif - -#endif - -// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_ALIGN_BYTES -#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES -#undef EIGEN_MAX_STATIC_ALIGN_BYTES -#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES -#endif - -#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) - #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT -#endif - -// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not. -// It takes into account both the user choice to explicitly enable/disable alignment (by settting EIGEN_MAX_STATIC_ALIGN_BYTES) -// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). -// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used. - - -// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY -#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8) -#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16) -#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32) -#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64) -#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 -#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES) -#else -#define EIGEN_ALIGN_MAX -#endif - - -// Dynamic alignment control - -#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0 -#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN. -#endif - -#ifdef EIGEN_DONT_ALIGN - #ifdef EIGEN_MAX_ALIGN_BYTES - #undef EIGEN_MAX_ALIGN_BYTES + // Not implemented for other compilers. + #define EIGEN_OPTIMIZATION_BARRIER(X) #endif - #define EIGEN_MAX_ALIGN_BYTES 0 -#elif !defined(EIGEN_MAX_ALIGN_BYTES) - #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES #endif -#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES -#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES +#if EIGEN_COMP_MSVC + // NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362. + // This workaround is ugly, but it does the job. +# define EIGEN_CONST_CONDITIONAL(cond) (void)0, cond #else -#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES -#endif - - -#ifndef EIGEN_UNALIGNED_VECTORIZE -#define EIGEN_UNALIGNED_VECTORIZE 1 +# define EIGEN_CONST_CONDITIONAL(cond) cond #endif -//---------------------------------------------------------------------- - - #ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD #define EIGEN_RESTRICT #endif @@ -794,10 +1160,6 @@ namespace Eigen { #define EIGEN_RESTRICT __restrict #endif -#ifndef EIGEN_STACK_ALLOCATION_LIMIT -// 131072 == 128 KB -#define EIGEN_STACK_ALLOCATION_LIMIT 131072 -#endif #ifndef EIGEN_DEFAULT_IO_FORMAT #ifdef EIGEN_MAKING_DOCS @@ -812,7 +1174,23 @@ namespace Eigen { // just an empty macro ! #define EIGEN_EMPTY -#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) + +// When compiling CUDA/HIP device code with NVCC or HIPCC +// pull in math functions from the global namespace. +// In host mode, and when device code is compiled with clang, +// use the std versions. +#if (defined(EIGEN_CUDA_ARCH) && defined(__NVCC__)) || defined(EIGEN_HIP_DEVICE_COMPILE) + #define EIGEN_USING_STD(FUNC) using ::FUNC; +#else + #define EIGEN_USING_STD(FUNC) using std::FUNC; +#endif + +#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || (EIGEN_COMP_MSVC == 1900 && EIGEN_COMP_NVCC)) + // For older MSVC versions, as well as 1900 && CUDA 8, using the base operator is necessary, + // otherwise we get duplicate definition errors + // For later MSVC versions, we require explicit operator= definition, otherwise we get + // use of implicitly deleted operator errors. + // (cf Bugs 920, 1000, 1324, 2291) #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ using Base::operator =; #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) @@ -832,11 +1210,48 @@ namespace Eigen { #endif +/** + * \internal + * \brief Macro to explicitly define the default copy constructor. + * This is necessary, because the implicit definition is deprecated if the copy-assignment is overridden. + */ +#if EIGEN_HAS_CXX11 +#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS) CLASS(const CLASS&) = default; +#else +#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS) +#endif + + + /** \internal * \brief Macro to manually inherit assignment operators. * This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined. + * With C++11 or later this also default-implements the copy-constructor */ -#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) +#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) \ + EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + EIGEN_DEFAULT_COPY_CONSTRUCTOR(Derived) + +/** \internal + * \brief Macro to manually define default constructors and destructors. + * This is necessary when the copy constructor is re-defined. + * For empty helper classes this should usually be protected, to avoid accidentally creating empty objects. + * + * Hiding the default destructor lead to problems in C++03 mode together with boost::multiprecision + */ +#if EIGEN_HAS_CXX11 +#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived) \ + Derived() = default; \ + ~Derived() = default; +#else +#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived) \ + Derived() {}; \ + /* ~Derived() {}; */ +#endif + + + + /** * Just a side note. Commenting within defines works only by documenting @@ -853,7 +1268,8 @@ namespace Eigen { typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \ typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \ typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \ - enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \ + enum CompileTimeTraits \ + { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \ ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \ Flags = Eigen::internal::traits<Derived>::Flags, \ SizeAtCompileTime = Base::SizeAtCompileTime, \ @@ -898,6 +1314,14 @@ namespace Eigen { #define EIGEN_IMPLIES(a,b) (!(a) || (b)) +#if EIGEN_HAS_BUILTIN(__builtin_expect) || EIGEN_COMP_GNUC +#define EIGEN_PREDICT_FALSE(x) (__builtin_expect(x, false)) +#define EIGEN_PREDICT_TRUE(x) (__builtin_expect(false || (x), true)) +#else +#define EIGEN_PREDICT_FALSE(x) (x) +#define EIGEN_PREDICT_TRUE(x) (x) +#endif + // the expression type of a standard coefficient wise binary operation #define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \ CwiseBinaryOp< \ @@ -929,14 +1353,14 @@ namespace Eigen { const typename internal::plain_constant_type<EXPR,SCALAR>::type, const EXPR> // Workaround for MSVC 2010 (see ML thread "patch with compile for for MSVC 2010") -#if EIGEN_COMP_MSVC_STRICT<=1600 +#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC_STRICT<=1600) #define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) typename internal::enable_if<true,X>::type #else #define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) X #endif #define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \ - template <typename T> EIGEN_DEVICE_FUNC inline \ + template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type,OPNAME))\ (METHOD)(const T& scalar) const { \ typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type PromotedT; \ @@ -945,7 +1369,7 @@ namespace Eigen { } #define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ - template <typename T> EIGEN_DEVICE_FUNC inline friend \ + template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend \ EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type,Derived,OPNAME)) \ (METHOD)(const T& scalar, const StorageBaseType& matrix) { \ typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type PromotedT; \ @@ -958,15 +1382,23 @@ namespace Eigen { EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) +#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) && !defined(EIGEN_HIP_DEVICE_COMPILE) + #define EIGEN_EXCEPTIONS +#endif + + #ifdef EIGEN_EXCEPTIONS # define EIGEN_THROW_X(X) throw X # define EIGEN_THROW throw # define EIGEN_TRY try # define EIGEN_CATCH(X) catch (X) #else -# ifdef __CUDA_ARCH__ +# if defined(EIGEN_CUDA_ARCH) # define EIGEN_THROW_X(X) asm("trap;") # define EIGEN_THROW asm("trap;") +# elif defined(EIGEN_HIP_DEVICE_COMPILE) +# define EIGEN_THROW_X(X) asm("s_trap 0") +# define EIGEN_THROW asm("s_trap 0") # else # define EIGEN_THROW_X(X) std::abort() # define EIGEN_THROW std::abort() @@ -986,7 +1418,47 @@ namespace Eigen { # define EIGEN_NOEXCEPT # define EIGEN_NOEXCEPT_IF(x) # define EIGEN_NO_THROW throw() -# define EIGEN_EXCEPTION_SPEC(X) throw(X) +# if EIGEN_COMP_MSVC || EIGEN_COMP_CXXVER>=17 + // MSVC does not support exception specifications (warning C4290), + // and they are deprecated in c++11 anyway. This is even an error in c++17. +# define EIGEN_EXCEPTION_SPEC(X) throw() +# else +# define EIGEN_EXCEPTION_SPEC(X) throw(X) +# endif +#endif + +#if EIGEN_HAS_VARIADIC_TEMPLATES +// The all function is used to enable a variadic version of eigen_assert which can take a parameter pack as its input. +namespace Eigen { +namespace internal { + +inline bool all(){ return true; } + +template<typename T, typename ...Ts> +bool all(T t, Ts ... ts){ return t && all(ts...); } + +} +} +#endif + +#if EIGEN_HAS_CXX11_OVERRIDE_FINAL +// provide override and final specifiers if they are available: +# define EIGEN_OVERRIDE override +# define EIGEN_FINAL final +#else +# define EIGEN_OVERRIDE +# define EIGEN_FINAL +#endif + +// Wrapping #pragma unroll in a macro since it is required for SYCL +#if defined(SYCL_DEVICE_ONLY) + #if defined(_MSC_VER) + #define EIGEN_UNROLL_LOOP __pragma(unroll) + #else + #define EIGEN_UNROLL_LOOP _Pragma("unroll") + #endif +#else + #define EIGEN_UNROLL_LOOP #endif #endif // EIGEN_MACROS_H diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index c634d7ea0..875318cdb 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -63,14 +63,28 @@ namespace Eigen { namespace internal { -EIGEN_DEVICE_FUNC +EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc() { #ifdef EIGEN_EXCEPTIONS throw std::bad_alloc(); #else std::size_t huge = static_cast<std::size_t>(-1); + #if defined(EIGEN_HIPCC) + // + // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining), + // and as a consequence the code in the #else block triggers the hipcc warning : + // "no overloaded function has restriction specifiers that are compatible with the ambient context" + // + // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects + // the same on "operator new" + // Reverting code back to the old version in this #if block for the hipcc compiler + // new int[huge]; + #else + void* unused = ::operator new(huge); + EIGEN_UNUSED_VARIABLE(unused); + #endif #endif } @@ -83,19 +97,26 @@ inline void throw_std_bad_alloc() /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned. * Fast, but wastes 16 additional bytes of memory. Does not throw any exception. */ -inline void* handmade_aligned_malloc(std::size_t size) +EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) { - void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES); + eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2"); + + EIGEN_USING_STD(malloc) + void *original = malloc(size+alignment); + if (original == 0) return 0; - void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES); + void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment); *(reinterpret_cast<void**>(aligned) - 1) = original; return aligned; } /** \internal Frees memory allocated with handmade_aligned_malloc */ -inline void handmade_aligned_free(void *ptr) +EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr) { - if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1)); + if (ptr) { + EIGEN_USING_STD(free) + free(*(reinterpret_cast<void**>(ptr) - 1)); + } } /** \internal @@ -114,7 +135,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = void *previous_aligned = static_cast<char *>(original)+previous_offset; if(aligned!=previous_aligned) std::memmove(aligned, previous_aligned, size); - + *(reinterpret_cast<void**>(aligned) - 1) = original; return aligned; } @@ -142,7 +163,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() { eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)"); } -#else +#else EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {} #endif @@ -156,9 +177,12 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) void *result; #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED - result = std::malloc(size); + + EIGEN_USING_STD(malloc) + result = malloc(size); + #if EIGEN_DEFAULT_ALIGN_BYTES==16 - eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator."); + eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator."); #endif #else result = handmade_aligned_malloc(size); @@ -174,7 +198,10 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) { #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED - std::free(ptr); + + EIGEN_USING_STD(free) + free(ptr); + #else handmade_aligned_free(ptr); #endif @@ -187,7 +214,7 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) */ inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size) { - EIGEN_UNUSED_VARIABLE(old_size); + EIGEN_UNUSED_VARIABLE(old_size) void *result; #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED @@ -218,7 +245,9 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std: { check_that_malloc_is_allowed(); - void *result = std::malloc(size); + EIGEN_USING_STD(malloc) + void *result = malloc(size); + if(!result && size) throw_std_bad_alloc(); return result; @@ -232,7 +261,8 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr) { - std::free(ptr); + EIGEN_USING_STD(free) + free(ptr); } template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) @@ -331,7 +361,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size) { destruct_elements_of_array<T>(ptr, size); - aligned_free(ptr); + Eigen::internal::aligned_free(ptr); } /** \internal Deletes objects constructed with conditional_aligned_new @@ -471,8 +501,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index } /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size - */ -template<typename Index> + */ +template<typename Index> inline Index first_multiple(Index size, Index base) { return ((size+base-1)/base)*base; @@ -493,6 +523,7 @@ template<typename T> struct smart_copy_helper<T,true> { IntPtr size = IntPtr(end)-IntPtr(start); if(size==0) return; eigen_internal_assert(start!=0 && end!=0 && target!=0); + EIGEN_USING_STD(memcpy) memcpy(target, start, size); } }; @@ -502,7 +533,7 @@ template<typename T> struct smart_copy_helper<T,false> { { std::copy(start, end, target); } }; -// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. +// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. template<typename T, bool UseMemmove> struct smart_memmove_helper; template<typename T> void smart_memmove(const T* start, const T* end, T* target) @@ -522,19 +553,30 @@ template<typename T> struct smart_memmove_helper<T,true> { template<typename T> struct smart_memmove_helper<T,false> { static inline void run(const T* start, const T* end, T* target) - { + { if (UIntPtr(target) < UIntPtr(start)) { std::copy(start, end, target); } - else + else { std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); - std::copy_backward(start, end, target + count); + std::copy_backward(start, end, target + count); } } }; +#if EIGEN_HAS_RVALUE_REFERENCES +template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target) +{ + return std::move(start, end, target); +} +#else +template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target) +{ + return std::copy(start, end, target); +} +#endif /***************************************************************************** *** Implementation of runtime stack allocation (falling back to malloc) *** @@ -542,7 +584,7 @@ template<typename T> struct smart_memmove_helper<T,false> { // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA // to the appropriate stack allocation function -#ifndef EIGEN_ALLOCA +#if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca) #define EIGEN_ALLOCA alloca #elif EIGEN_COMP_MSVC @@ -550,6 +592,15 @@ template<typename T> struct smart_memmove_helper<T,false> { #endif #endif +// With clang -Oz -mthumb, alloca changes the stack pointer in a way that is +// not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because +// the compiler still emits bad code because stack allocation checks use "<=". +// TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772 +// is fixed. +#if defined(__clang__) && defined(__thumb__) + #undef EIGEN_ALLOCA +#endif + // This helper class construct the allocated memory, and takes care of destructing and freeing the handled data // at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions. template<typename T> class aligned_stack_memory_handler : noncopyable @@ -561,12 +612,14 @@ template<typename T> class aligned_stack_memory_handler : noncopyable * In this case, the buffer elements will also be destructed when this handler will be destructed. * Finally, if \a dealloc is true, then the pointer \a ptr is freed. **/ + EIGEN_DEVICE_FUNC aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc) : m_ptr(ptr), m_size(size), m_deallocate(dealloc) { if(NumTraits<T>::RequireInitialization && m_ptr) Eigen::internal::construct_elements_of_array(m_ptr, size); } + EIGEN_DEVICE_FUNC ~aligned_stack_memory_handler() { if(NumTraits<T>::RequireInitialization && m_ptr) @@ -580,6 +633,60 @@ template<typename T> class aligned_stack_memory_handler : noncopyable bool m_deallocate; }; +#ifdef EIGEN_ALLOCA + +template<typename Xpr, int NbEvaluations, + bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic + > +struct local_nested_eval_wrapper +{ + static const bool NeedExternalBuffer = false; + typedef typename Xpr::Scalar Scalar; + typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType; + ObjectType object; + + EIGEN_DEVICE_FUNC + local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr) + { + EIGEN_UNUSED_VARIABLE(ptr); + eigen_internal_assert(ptr==0); + } +}; + +template<typename Xpr, int NbEvaluations> +struct local_nested_eval_wrapper<Xpr,NbEvaluations,true> +{ + static const bool NeedExternalBuffer = true; + typedef typename Xpr::Scalar Scalar; + typedef typename plain_object_eval<Xpr>::type PlainObject; + typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType; + ObjectType object; + + EIGEN_DEVICE_FUNC + local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) + : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()), + m_deallocate(ptr==0) + { + if(NumTraits<Scalar>::RequireInitialization && object.data()) + Eigen::internal::construct_elements_of_array(object.data(), object.size()); + object = xpr; + } + + EIGEN_DEVICE_FUNC + ~local_nested_eval_wrapper() + { + if(NumTraits<Scalar>::RequireInitialization && object.data()) + Eigen::internal::destruct_elements_of_array(object.data(), object.size()); + if(m_deallocate) + Eigen::internal::aligned_free(object.data()); + } + +private: + bool m_deallocate; +}; + +#endif // EIGEN_ALLOCA + template<typename T> class scoped_array : noncopyable { T* m_ptr; @@ -603,13 +710,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) { std::swap(a.ptr(),b.ptr()); } - + } // end namespace internal /** \internal - * Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack - * if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform - * (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap. + * + * The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates, + * and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack + * if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform + * (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap. * The allocated buffer is automatically deleted when exiting the scope of this declaration. * If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs. * Here is an example: @@ -620,9 +729,17 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) * } * \endcode * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token. + * + * The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to + * \code + * typename internal::nested_eval<XPRT_T,N>::type NAME(XPR); + * \endcode + * with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown. + * This is accomplished through alloca if this later is supported and if the required number of bytes + * is below EIGEN_STACK_ALLOCATION_LIMIT. */ #ifdef EIGEN_ALLOCA - + #if EIGEN_DEFAULT_ALIGN_BYTES>0 // We always manually re-align the result of EIGEN_ALLOCA. // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. @@ -639,13 +756,23 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) + + #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \ + Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \ + ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \ + ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \ + typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object) + #else #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true) - + + +#define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR) + #endif @@ -653,32 +780,56 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] *** *****************************************************************************/ -#if EIGEN_MAX_ALIGN_BYTES!=0 +#if EIGEN_HAS_CXX17_OVERALIGN + +// C++17 -> no need to bother about alignment anymore :) + +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) + +#else + +// HIP does not support new/delete on device. +#if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE) #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ + EIGEN_DEVICE_FUNC \ void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \ EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \ EIGEN_CATCH (...) { return 0; } \ } #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \ + EIGEN_DEVICE_FUNC \ void *operator new(std::size_t size) { \ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ } \ + EIGEN_DEVICE_FUNC \ void *operator new[](std::size_t size) { \ return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ } \ + EIGEN_DEVICE_FUNC \ void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ + EIGEN_DEVICE_FUNC \ void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ + EIGEN_DEVICE_FUNC \ void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ + EIGEN_DEVICE_FUNC \ void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ /* in-place new and delete. since (at least afaik) there is no actual */ \ /* memory allocated we can safely let the default implementation handle */ \ /* this particular case. */ \ + EIGEN_DEVICE_FUNC \ static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \ + EIGEN_DEVICE_FUNC \ static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \ + EIGEN_DEVICE_FUNC \ void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \ + EIGEN_DEVICE_FUNC \ void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \ /* nothrow-new (returns zero instead of std::bad_alloc) */ \ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ + EIGEN_DEVICE_FUNC \ void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \ Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \ } \ @@ -688,20 +839,34 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) #endif #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true) -#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0))) +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool( \ + ((Size)!=Eigen::Dynamic) && \ + (((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES )==0)) || \ + ((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) || \ + ((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0)) ))) + +#endif /****************************************************************************/ /** \class aligned_allocator * \ingroup Core_Module * -* \brief STL compatible allocator to use with with 16 byte aligned types +* \brief STL compatible allocator to use with types requiring a non standrad alignment. +* +* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd. +* By default, it will thus provide at least 16 bytes alignment and more in following cases: +* - 32 bytes alignment if AVX is enabled. +* - 64 bytes alignment if AVX512 is enabled. +* +* This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented +* \link TopicPreprocessorDirectivesPerformance there \endlink. * * Example: * \code * // Matrix4f requires 16 bytes alignment: -* std::map< int, Matrix4f, std::less<int>, +* std::map< int, Matrix4f, std::less<int>, * aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4; * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator: * std::map< int, Vector3f > my_map_vec3; @@ -736,6 +901,15 @@ public: ~aligned_allocator() {} + #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0) + // In gcc std::allocator::max_size() is bugged making gcc triggers a warning: + // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807 + // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544 + size_type max_size() const { + return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T); + } + #endif + pointer allocate(size_type num, const void* /*hint*/ = 0) { internal::check_size_for_overflow<T>(num); @@ -898,20 +1072,32 @@ inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs) { if(max_std_funcs>=4) queryCacheSizes_intel_direct(l1,l2,l3); - else + else if(max_std_funcs>=2) queryCacheSizes_intel_codes(l1,l2,l3); + else + l1 = l2 = l3 = 0; } inline void queryCacheSizes_amd(int& l1, int& l2, int& l3) { int abcd[4]; abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; - EIGEN_CPUID(abcd,0x80000005,0); - l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB - abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; - EIGEN_CPUID(abcd,0x80000006,0); - l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB - l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB + + // First query the max supported function. + EIGEN_CPUID(abcd,0x80000000,0); + if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006)) + { + EIGEN_CPUID(abcd,0x80000005,0); + l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB + abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; + EIGEN_CPUID(abcd,0x80000006,0); + l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB + l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB + } + else + { + l1 = l2 = l3 = 0; + } } #endif @@ -927,7 +1113,7 @@ inline void queryCacheSizes(int& l1, int& l2, int& l3) // identify the CPU vendor EIGEN_CPUID(abcd,0x0,0); - int max_std_funcs = abcd[1]; + int max_std_funcs = abcd[0]; if(cpuid_is_vendor(abcd,GenuineIntel)) queryCacheSizes_intel(l1,l2,l3,max_std_funcs); else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_)) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 7f6370755..81ae2a32d 100755 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -11,13 +11,54 @@ #ifndef EIGEN_META_H #define EIGEN_META_H -#if defined(__CUDA_ARCH__) -#include <cfloat> -#include <math_constants.h> +#if defined(EIGEN_GPU_COMPILE_PHASE) + + #include <cfloat> + + #if defined(EIGEN_CUDA_ARCH) + #include <math_constants.h> + #endif + + #if defined(EIGEN_HIP_DEVICE_COMPILE) + #include "Eigen/src/Core/arch/HIP/hcc/math_constants.h" + #endif + #endif -#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L +// Recent versions of ICC require <cstdint> for pointer types below. +#define EIGEN_ICC_NEEDS_CSTDINT (EIGEN_COMP_ICC>=1600 && EIGEN_COMP_CXXVER >= 11) + +// Define portable (u)int{32,64} types +#if EIGEN_HAS_CXX11 || EIGEN_ICC_NEEDS_CSTDINT #include <cstdint> +namespace Eigen { +namespace numext { +typedef std::uint8_t uint8_t; +typedef std::int8_t int8_t; +typedef std::uint16_t uint16_t; +typedef std::int16_t int16_t; +typedef std::uint32_t uint32_t; +typedef std::int32_t int32_t; +typedef std::uint64_t uint64_t; +typedef std::int64_t int64_t; +} +} +#else +// Without c++11, all compilers able to compile Eigen also +// provide the C99 stdint.h header file. +#include <stdint.h> +namespace Eigen { +namespace numext { +typedef ::uint8_t uint8_t; +typedef ::int8_t int8_t; +typedef ::uint16_t uint16_t; +typedef ::int16_t int16_t; +typedef ::uint32_t uint32_t; +typedef ::int32_t int32_t; +typedef ::uint64_t uint64_t; +typedef ::int64_t int64_t; +} +} #endif namespace Eigen { @@ -43,26 +84,33 @@ namespace internal { // Only recent versions of ICC complain about using ptrdiff_t to hold pointers, // and older versions do not provide *intptr_t types. -#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L +#if EIGEN_ICC_NEEDS_CSTDINT typedef std::intptr_t IntPtr; typedef std::uintptr_t UIntPtr; #else typedef std::ptrdiff_t IntPtr; typedef std::size_t UIntPtr; #endif +#undef EIGEN_ICC_NEEDS_CSTDINT struct true_type { enum { value = 1 }; }; struct false_type { enum { value = 0 }; }; +template<bool Condition> +struct bool_constant; + +template<> +struct bool_constant<true> : true_type {}; + +template<> +struct bool_constant<false> : false_type {}; + template<bool Condition, typename Then, typename Else> struct conditional { typedef Then type; }; template<typename Then, typename Else> struct conditional <false, Then, Else> { typedef Else type; }; -template<typename T, typename U> struct is_same { enum { value = 0 }; }; -template<typename T> struct is_same<T,T> { enum { value = 1 }; }; - template<typename T> struct remove_reference { typedef T type; }; template<typename T> struct remove_reference<T&> { typedef T type; }; @@ -97,17 +145,65 @@ template<> struct is_arithmetic<unsigned int> { enum { value = true }; }; template<> struct is_arithmetic<signed long> { enum { value = true }; }; template<> struct is_arithmetic<unsigned long> { enum { value = true }; }; -template<typename T> struct is_integral { enum { value = false }; }; -template<> struct is_integral<bool> { enum { value = true }; }; -template<> struct is_integral<char> { enum { value = true }; }; -template<> struct is_integral<signed char> { enum { value = true }; }; -template<> struct is_integral<unsigned char> { enum { value = true }; }; -template<> struct is_integral<signed short> { enum { value = true }; }; -template<> struct is_integral<unsigned short> { enum { value = true }; }; -template<> struct is_integral<signed int> { enum { value = true }; }; -template<> struct is_integral<unsigned int> { enum { value = true }; }; -template<> struct is_integral<signed long> { enum { value = true }; }; -template<> struct is_integral<unsigned long> { enum { value = true }; }; +template<typename T, typename U> struct is_same { enum { value = 0 }; }; +template<typename T> struct is_same<T,T> { enum { value = 1 }; }; + +template< class T > +struct is_void : is_same<void, typename remove_const<T>::type> {}; + +#if EIGEN_HAS_CXX11 +template<> struct is_arithmetic<signed long long> { enum { value = true }; }; +template<> struct is_arithmetic<unsigned long long> { enum { value = true }; }; +using std::is_integral; +#else +template<typename T> struct is_integral { enum { value = false }; }; +template<> struct is_integral<bool> { enum { value = true }; }; +template<> struct is_integral<char> { enum { value = true }; }; +template<> struct is_integral<signed char> { enum { value = true }; }; +template<> struct is_integral<unsigned char> { enum { value = true }; }; +template<> struct is_integral<signed short> { enum { value = true }; }; +template<> struct is_integral<unsigned short> { enum { value = true }; }; +template<> struct is_integral<signed int> { enum { value = true }; }; +template<> struct is_integral<unsigned int> { enum { value = true }; }; +template<> struct is_integral<signed long> { enum { value = true }; }; +template<> struct is_integral<unsigned long> { enum { value = true }; }; +#if EIGEN_COMP_MSVC +template<> struct is_integral<signed __int64> { enum { value = true }; }; +template<> struct is_integral<unsigned __int64> { enum { value = true }; }; +#endif +#endif + +#if EIGEN_HAS_CXX11 +using std::make_unsigned; +#else +// TODO: Possibly improve this implementation of make_unsigned. +// It is currently used only by +// template<typename Scalar> struct random_default_impl<Scalar, false, true>. +template<typename> struct make_unsigned; +template<> struct make_unsigned<char> { typedef unsigned char type; }; +template<> struct make_unsigned<signed char> { typedef unsigned char type; }; +template<> struct make_unsigned<unsigned char> { typedef unsigned char type; }; +template<> struct make_unsigned<signed short> { typedef unsigned short type; }; +template<> struct make_unsigned<unsigned short> { typedef unsigned short type; }; +template<> struct make_unsigned<signed int> { typedef unsigned int type; }; +template<> struct make_unsigned<unsigned int> { typedef unsigned int type; }; +template<> struct make_unsigned<signed long> { typedef unsigned long type; }; +template<> struct make_unsigned<unsigned long> { typedef unsigned long type; }; +#if EIGEN_COMP_MSVC +template<> struct make_unsigned<signed __int64> { typedef unsigned __int64 type; }; +template<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; }; +#endif + +// Some platforms define int64_t as `long long` even for C++03, where +// `long long` is not guaranteed by the standard. In this case we are missing +// the definition for make_unsigned. If we just define it, we run into issues +// where `long long` doesn't exist in some compilers for C++03. We therefore add +// the specialization for these platforms only. +#if EIGEN_OS_MAC || EIGEN_COMP_MINGW +template<> struct make_unsigned<unsigned long long> { typedef unsigned long long type; }; +template<> struct make_unsigned<long long> { typedef unsigned long long type; }; +#endif +#endif template <typename T> struct add_const { typedef const T type; }; template <typename T> struct add_const<T&> { typedef T& type; }; @@ -121,6 +217,11 @@ template<typename T> struct add_const_on_value_type<T*> { typedef T const template<typename T> struct add_const_on_value_type<T* const> { typedef T const* const type; }; template<typename T> struct add_const_on_value_type<T const* const> { typedef T const* const type; }; +#if EIGEN_HAS_CXX11 + +using std::is_convertible; + +#else template<typename From, typename To> struct is_convertible_impl @@ -134,16 +235,19 @@ private: struct yes {int a[1];}; struct no {int a[2];}; - static yes test(const To&, int); + template<typename T> + static yes test(T, int); + + template<typename T> static no test(any_conversion, ...); public: - static From ms_from; + static typename internal::remove_reference<From>::type* ms_from; #ifdef __INTEL_COMPILER #pragma warning push #pragma warning ( disable : 2259 ) #endif - enum { value = sizeof(test(ms_from, 0))==sizeof(yes) }; + enum { value = sizeof(test<To>(*ms_from, 0))==sizeof(yes) }; #ifdef __INTEL_COMPILER #pragma warning pop #endif @@ -152,10 +256,17 @@ public: template<typename From, typename To> struct is_convertible { - enum { value = is_convertible_impl<typename remove_all<From>::type, - typename remove_all<To >::type>::value }; + enum { value = is_convertible_impl<From,To>::value }; }; +template<typename T> +struct is_convertible<T,T&> { enum { value = false }; }; + +template<typename T> +struct is_convertible<const T,const T&> { enum { value = true }; }; + +#endif + /** \internal Allows to enable/disable an overload * according to a compile time condition. */ @@ -164,7 +275,7 @@ template<bool Condition, typename T=void> struct enable_if; template<typename T> struct enable_if<true,T> { typedef T type; }; -#if defined(__CUDA_ARCH__) +#if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11 #if !defined(__FLT_EPSILON__) #define __FLT_EPSILON__ FLT_EPSILON #define __DBL_EPSILON__ DBL_EPSILON @@ -175,7 +286,7 @@ namespace device { template<typename T> struct numeric_limits { EIGEN_DEVICE_FUNC - static T epsilon() { return 0; } + static EIGEN_CONSTEXPR T epsilon() { return 0; } static T (max)() { assert(false && "Highest not supported for this type"); } static T (min)() { assert(false && "Lowest not supported for this type"); } static T infinity() { assert(false && "Infinity not supported for this type"); } @@ -183,91 +294,130 @@ template<typename T> struct numeric_limits }; template<> struct numeric_limits<float> { - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static float epsilon() { return __FLT_EPSILON__; } EIGEN_DEVICE_FUNC - static float (max)() { return CUDART_MAX_NORMAL_F; } - EIGEN_DEVICE_FUNC + static float (max)() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_MAX_NORMAL_F; + #else + return HIPRT_MAX_NORMAL_F; + #endif + } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static float (min)() { return FLT_MIN; } EIGEN_DEVICE_FUNC - static float infinity() { return CUDART_INF_F; } + static float infinity() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_INF_F; + #else + return HIPRT_INF_F; + #endif + } EIGEN_DEVICE_FUNC - static float quiet_NaN() { return CUDART_NAN_F; } + static float quiet_NaN() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_NAN_F; + #else + return HIPRT_NAN_F; + #endif + } }; template<> struct numeric_limits<double> { - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static double epsilon() { return __DBL_EPSILON__; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static double (max)() { return DBL_MAX; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static double (min)() { return DBL_MIN; } EIGEN_DEVICE_FUNC - static double infinity() { return CUDART_INF; } + static double infinity() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_INF; + #else + return HIPRT_INF; + #endif + } EIGEN_DEVICE_FUNC - static double quiet_NaN() { return CUDART_NAN; } + static double quiet_NaN() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_NAN; + #else + return HIPRT_NAN; + #endif + } }; template<> struct numeric_limits<int> { - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static int epsilon() { return 0; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static int (max)() { return INT_MAX; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static int (min)() { return INT_MIN; } }; template<> struct numeric_limits<unsigned int> { - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static unsigned int epsilon() { return 0; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static unsigned int (max)() { return UINT_MAX; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static unsigned int (min)() { return 0; } }; template<> struct numeric_limits<long> { - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static long epsilon() { return 0; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static long (max)() { return LONG_MAX; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static long (min)() { return LONG_MIN; } }; template<> struct numeric_limits<unsigned long> { - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static unsigned long epsilon() { return 0; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static unsigned long (max)() { return ULONG_MAX; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static unsigned long (min)() { return 0; } }; template<> struct numeric_limits<long long> { - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static long long epsilon() { return 0; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static long long (max)() { return LLONG_MAX; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static long long (min)() { return LLONG_MIN; } }; template<> struct numeric_limits<unsigned long long> { - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static unsigned long long epsilon() { return 0; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static unsigned long long (max)() { return ULLONG_MAX; } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static unsigned long long (min)() { return 0; } }; +template<> struct numeric_limits<bool> +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static bool epsilon() { return false; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static bool (max)() { return true; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static bool (min)() { return false; } +}; } -#endif +#endif // defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11 /** \internal - * A base class do disable default copy ctor and copy assignement operator. + * A base class do disable default copy ctor and copy assignment operator. */ class noncopyable { @@ -279,13 +429,82 @@ protected: }; /** \internal - * Convenient struct to get the result type of a unary or binary functor. + * Provides access to the number of elements in the object of as a compile-time constant expression. + * It "returns" Eigen::Dynamic if the size cannot be resolved at compile-time (default). + * + * Similar to std::tuple_size, but more general. + * + * It currently supports: + * - any types T defining T::SizeAtCompileTime + * - plain C arrays as T[N] + * - std::array (c++11) + * - some internal types such as SingleRange and AllRange * - * It supports both the current STL mechanism (using the result_type member) as well as - * upcoming next STL generation (using a templated result member). - * If none of these members is provided, then the type of the first argument is returned. FIXME, that behavior is a pretty bad hack. + * The second template parameter eases SFINAE-based specializations. */ -#if EIGEN_HAS_STD_RESULT_OF +template<typename T, typename EnableIf = void> struct array_size { + enum { value = Dynamic }; +}; + +template<typename T> struct array_size<T,typename internal::enable_if<((T::SizeAtCompileTime&0)==0)>::type> { + enum { value = T::SizeAtCompileTime }; +}; + +template<typename T, int N> struct array_size<const T (&)[N]> { + enum { value = N }; +}; +template<typename T, int N> struct array_size<T (&)[N]> { + enum { value = N }; +}; + +#if EIGEN_HAS_CXX11 +template<typename T, std::size_t N> struct array_size<const std::array<T,N> > { + enum { value = N }; +}; +template<typename T, std::size_t N> struct array_size<std::array<T,N> > { + enum { value = N }; +}; +#endif + +/** \internal + * Analogue of the std::size free function. + * It returns the size of the container or view \a x of type \c T + * + * It currently supports: + * - any types T defining a member T::size() const + * - plain C arrays as T[N] + * + */ +template<typename T> +EIGEN_CONSTEXPR Index size(const T& x) { return x.size(); } + +template<typename T,std::size_t N> +EIGEN_CONSTEXPR Index size(const T (&) [N]) { return N; } + +/** \internal + * Convenient struct to get the result type of a nullary, unary, binary, or + * ternary functor. + * + * Pre C++11: + * Supports both a Func::result_type member and templated + * Func::result<Func(ArgTypes...)>::type member. + * + * If none of these members is provided, then the type of the first + * argument is returned. + * + * Post C++11: + * This uses std::result_of. However, note the `type` member removes + * const and converts references/pointers to their corresponding value type. + */ +#if EIGEN_HAS_STD_INVOKE_RESULT +template<typename T> struct result_of; + +template<typename F, typename... ArgTypes> +struct result_of<F(ArgTypes...)> { + typedef typename std::invoke_result<F, ArgTypes...>::type type1; + typedef typename remove_all<type1>::type type; +}; +#elif EIGEN_HAS_STD_RESULT_OF template<typename T> struct result_of { typedef typename std::result_of<T>::type type1; typedef typename remove_all<type1>::type type; @@ -297,6 +516,28 @@ struct has_none {int a[1];}; struct has_std_result_type {int a[2];}; struct has_tr1_result {int a[3];}; +template<typename Func, int SizeOf> +struct nullary_result_of_select {}; + +template<typename Func> +struct nullary_result_of_select<Func, sizeof(has_std_result_type)> {typedef typename Func::result_type type;}; + +template<typename Func> +struct nullary_result_of_select<Func, sizeof(has_tr1_result)> {typedef typename Func::template result<Func()>::type type;}; + +template<typename Func> +struct result_of<Func()> { + template<typename T> + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + template<typename T> + static has_tr1_result testFunctor(T const *, typename T::template result<T()>::type const * = 0); + static has_none testFunctor(...); + + // note that the following indirection is needed for gcc-3.3 + enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))}; + typedef typename nullary_result_of_select<Func, FunctorType>::type type; +}; + template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)> struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;}; @@ -366,6 +607,45 @@ struct result_of<Func(ArgType0,ArgType1,ArgType2)> { enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))}; typedef typename ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, FunctorType>::type type; }; + +#endif + +#if EIGEN_HAS_STD_INVOKE_RESULT +template<typename F, typename... ArgTypes> +struct invoke_result { + typedef typename std::invoke_result<F, ArgTypes...>::type type1; + typedef typename remove_all<type1>::type type; +}; +#elif EIGEN_HAS_CXX11 +template<typename F, typename... ArgTypes> +struct invoke_result { + typedef typename result_of<F(ArgTypes...)>::type type1; + typedef typename remove_all<type1>::type type; +}; +#else +template<typename F, typename ArgType0 = void, typename ArgType1 = void, typename ArgType2 = void> +struct invoke_result { + typedef typename result_of<F(ArgType0, ArgType1, ArgType2)>::type type1; + typedef typename remove_all<type1>::type type; +}; + +template<typename F> +struct invoke_result<F, void, void, void> { + typedef typename result_of<F()>::type type1; + typedef typename remove_all<type1>::type type; +}; + +template<typename F, typename ArgType0> +struct invoke_result<F, ArgType0, void, void> { + typedef typename result_of<F(ArgType0)>::type type1; + typedef typename remove_all<type1>::type type; +}; + +template<typename F, typename ArgType0, typename ArgType1> +struct invoke_result<F, ArgType0, ArgType1, void> { + typedef typename result_of<F(ArgType0, ArgType1)>::type type1; + typedef typename remove_all<type1>::type type; +}; #endif struct meta_yes { char a[1]; }; @@ -375,10 +655,10 @@ struct meta_no { char a[2]; }; template <typename T> struct has_ReturnType { - template <typename C> static meta_yes testFunctor(typename C::ReturnType const *); - template <typename C> static meta_no testFunctor(...); + template <typename C> static meta_yes testFunctor(C const *, typename C::ReturnType const * = 0); + template <typename C> static meta_no testFunctor(...); - enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) }; + enum { value = sizeof(testFunctor<T>(static_cast<T*>(0))) == sizeof(meta_yes) }; }; template<typename T> const T* return_ptr(); @@ -435,20 +715,25 @@ class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? /** \internal Computes the least common multiple of two positive integer A and B - * at compile-time. It implements a naive algorithm testing all multiples of A. - * It thus works better if A>=B. + * at compile-time. */ -template<int A, int B, int K=1, bool Done = ((A*K)%B)==0> +template<int A, int B, int K=1, bool Done = ((A*K)%B)==0, bool Big=(A>=B)> struct meta_least_common_multiple { enum { ret = meta_least_common_multiple<A,B,K+1>::ret }; }; +template<int A, int B, int K, bool Done> +struct meta_least_common_multiple<A,B,K,Done,false> +{ + enum { ret = meta_least_common_multiple<B,A,K>::ret }; +}; template<int A, int B, int K> -struct meta_least_common_multiple<A,B,K,true> +struct meta_least_common_multiple<A,B,K,true,true> { enum { ret = A*K }; }; + /** \internal determines whether the product of two numeric types is allowed and what the return type is */ template<typename T, typename U> struct scalar_product_traits { @@ -461,17 +746,27 @@ template<typename T, typename U> struct scalar_product_traits // typedef typename scalar_product_traits<typename remove_all<ArgType0>::type, typename remove_all<ArgType1>::type>::ReturnType type; // }; +/** \internal Obtains a POD type suitable to use as storage for an object of a size + * of at most Len bytes, aligned as specified by \c Align. + */ +template<unsigned Len, unsigned Align> +struct aligned_storage { + struct type { + EIGEN_ALIGN_TO_BOUNDARY(Align) unsigned char data[Len]; + }; +}; + } // end namespace internal namespace numext { - -#if defined(__CUDA_ARCH__) + +#if defined(EIGEN_GPU_COMPILE_PHASE) template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; } #else template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); } #endif -#if defined(__CUDA_ARCH__) +#if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11 using internal::device::numeric_limits; #else using std::numeric_limits; @@ -480,11 +775,36 @@ using std::numeric_limits; // Integer division with rounding up. // T is assumed to be an integer type with a>=0, and b>0 template<typename T> +EIGEN_DEVICE_FUNC T div_ceil(const T &a, const T &b) { return (a+b-1) / b; } +// The aim of the following functions is to bypass -Wfloat-equal warnings +// when we really want a strict equality comparison on floating points. +template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC +bool equal_strict(const X& x,const Y& y) { return x == y; } + +#if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)) +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC +bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); } + +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC +bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); } +#endif + +template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC +bool not_equal_strict(const X& x,const Y& y) { return x != y; } + +#if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)) +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC +bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); } + +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC +bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); } +#endif + } // end namespace numext } // end namespace Eigen diff --git a/Eigen/src/Core/util/ReenableStupidWarnings.h b/Eigen/src/Core/util/ReenableStupidWarnings.h index 86b60f52f..1ce6fd1b0 100644 --- a/Eigen/src/Core/util/ReenableStupidWarnings.h +++ b/Eigen/src/Core/util/ReenableStupidWarnings.h @@ -1,4 +1,8 @@ -#ifdef EIGEN_WARNINGS_DISABLED +#ifdef EIGEN_WARNINGS_DISABLED_2 +// "DisableStupidWarnings.h" was included twice recursively: Do not reenable warnings yet! +# undef EIGEN_WARNINGS_DISABLED_2 + +#elif defined(EIGEN_WARNINGS_DISABLED) #undef EIGEN_WARNINGS_DISABLED #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS @@ -8,7 +12,7 @@ #pragma warning pop #elif defined __clang__ #pragma clang diagnostic pop - #elif defined __GNUC__ && __GNUC__>=6 + #elif defined __GNUC__ && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) #pragma GCC diagnostic pop #endif diff --git a/Eigen/src/Core/util/ReshapedHelper.h b/Eigen/src/Core/util/ReshapedHelper.h new file mode 100644 index 000000000..412432132 --- /dev/null +++ b/Eigen/src/Core/util/ReshapedHelper.h @@ -0,0 +1,51 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_RESHAPED_HELPER_H +#define EIGEN_RESHAPED_HELPER_H + +namespace Eigen { + +enum AutoSize_t { AutoSize }; +const int AutoOrder = 2; + +namespace internal { + +template<typename SizeType,typename OtherSize, int TotalSize> +struct get_compiletime_reshape_size { + enum { value = get_fixed_value<SizeType>::value }; +}; + +template<typename SizeType> +Index get_runtime_reshape_size(SizeType size, Index /*other*/, Index /*total*/) { + return internal::get_runtime_value(size); +} + +template<typename OtherSize, int TotalSize> +struct get_compiletime_reshape_size<AutoSize_t,OtherSize,TotalSize> { + enum { + other_size = get_fixed_value<OtherSize>::value, + value = (TotalSize==Dynamic || other_size==Dynamic) ? Dynamic : TotalSize / other_size }; +}; + +inline Index get_runtime_reshape_size(AutoSize_t /*size*/, Index other, Index total) { + return total/other; +} + +template<int Flags, int Order> +struct get_compiletime_reshape_order { + enum { value = Order == AutoOrder ? Flags & RowMajorBit : Order }; +}; + +} + +} // end namespace Eigen + +#endif // EIGEN_RESHAPED_HELPER_H diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 983361a45..c45de5901 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -24,9 +24,10 @@ * */ +#ifndef EIGEN_STATIC_ASSERT #ifndef EIGEN_NO_STATIC_ASSERT - #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)) + #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (EIGEN_COMP_CXXVER >= 11) || (EIGEN_COMP_MSVC >= 1600)) // if native static_assert is enabled, let's use it #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG); @@ -44,64 +45,68 @@ struct static_assertion<true> { enum { - YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX, - YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES, - YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES, - THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE, - THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE, - THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE, - OUT_OF_RANGE_ACCESS, - YOU_MADE_A_PROGRAMMING_MISTAKE, - EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT, - EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE, - YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR, - YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR, - UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC, - THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES, - FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED, - NUMERIC_TYPE_MUST_BE_REAL, - COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED, - WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED, - THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE, - INVALID_MATRIX_PRODUCT, - INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS, - INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION, - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY, - THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES, - THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES, - INVALID_MATRIX_TEMPLATE_PARAMETERS, - INVALID_MATRIXBASE_TEMPLATE_PARAMETERS, - BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER, - THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX, - THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES, - YOU_ALREADY_SPECIFIED_THIS_STRIDE, - INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION, - THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD, - PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1, - THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS, - YOU_CANNOT_MIX_ARRAYS_AND_MATRICES, - YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION, - THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY, - YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT, - THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS, - THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL, - THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES, - YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED, - YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED, - THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE, - THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, - OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, - IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, - STORAGE_LAYOUT_DOES_NOT_MATCH, - EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, - THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, - MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, - THIS_TYPE_IS_NOT_SUPPORTED, - STORAGE_KIND_MUST_MATCH, - STORAGE_INDEX_MUST_MATCH, - CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY + YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1, + YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1, + YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1, + THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1, + THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1, + THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1, + OUT_OF_RANGE_ACCESS=1, + YOU_MADE_A_PROGRAMMING_MISTAKE=1, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1, + EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1, + YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1, + YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1, + UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1, + THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1, + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1, + NUMERIC_TYPE_MUST_BE_REAL=1, + COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1, + WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1, + THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1, + INVALID_MATRIX_PRODUCT=1, + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1, + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1, + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1, + THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1, + INVALID_MATRIX_TEMPLATE_PARAMETERS=1, + INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1, + BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1, + THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1, + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1, + YOU_ALREADY_SPECIFIED_THIS_STRIDE=1, + INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1, + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1, + THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1, + YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1, + YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1, + THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1, + YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1, + THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1, + THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1, + THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1, + YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1, + YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1, + THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1, + THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1, + OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1, + IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1, + STORAGE_LAYOUT_DOES_NOT_MATCH=1, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1, + MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1, + THIS_TYPE_IS_NOT_SUPPORTED=1, + STORAGE_KIND_MUST_MATCH=1, + STORAGE_INDEX_MUST_MATCH=1, + CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1, + SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1, + INVALID_TEMPLATE_PARAMETER=1, + GPU_TENSOR_CONTRACTION_DOES_NOT_SUPPORT_OUTPUT_KERNELS=1, + THE_ARRAY_SIZE_SHOULD_EQUAL_WITH_PACKET_SIZE=1 }; }; @@ -131,7 +136,7 @@ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG); #endif // EIGEN_NO_STATIC_ASSERT - +#endif // EIGEN_STATIC_ASSERT // static assertion failing if the type \a TYPE is not a vector type #define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \ @@ -180,7 +185,7 @@ ) #define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \ - EIGEN_STATIC_ASSERT(!NumTraits<TYPE>::IsInteger, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES) + EIGEN_STATIC_ASSERT(!Eigen::NumTraits<TYPE>::IsInteger, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES) // static assertion failing if it is guaranteed at compile-time that the two matrix expression types have different sizes @@ -190,8 +195,8 @@ YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES) #define EIGEN_STATIC_ASSERT_SIZE_1x1(TYPE) \ - EIGEN_STATIC_ASSERT((TYPE::RowsAtCompileTime == 1 || TYPE::RowsAtCompileTime == Dynamic) && \ - (TYPE::ColsAtCompileTime == 1 || TYPE::ColsAtCompileTime == Dynamic), \ + EIGEN_STATIC_ASSERT((TYPE::RowsAtCompileTime == 1 || TYPE::RowsAtCompileTime == Eigen::Dynamic) && \ + (TYPE::ColsAtCompileTime == 1 || TYPE::ColsAtCompileTime == Eigen::Dynamic), \ THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS) #define EIGEN_STATIC_ASSERT_LVALUE(Derived) \ diff --git a/Eigen/src/Core/util/SymbolicIndex.h b/Eigen/src/Core/util/SymbolicIndex.h new file mode 100644 index 000000000..354dd9add --- /dev/null +++ b/Eigen/src/Core/util/SymbolicIndex.h @@ -0,0 +1,293 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SYMBOLIC_INDEX_H +#define EIGEN_SYMBOLIC_INDEX_H + +namespace Eigen { + +/** \namespace Eigen::symbolic + * \ingroup Core_Module + * + * This namespace defines a set of classes and functions to build and evaluate symbolic expressions of scalar type Index. + * Here is a simple example: + * + * \code + * // First step, defines symbols: + * struct x_tag {}; static const symbolic::SymbolExpr<x_tag> x; + * struct y_tag {}; static const symbolic::SymbolExpr<y_tag> y; + * struct z_tag {}; static const symbolic::SymbolExpr<z_tag> z; + * + * // Defines an expression: + * auto expr = (x+3)/y+z; + * + * // And evaluate it: (c++14) + * std::cout << expr.eval(x=6,y=3,z=-13) << "\n"; + * + * // In c++98/11, only one symbol per expression is supported for now: + * auto expr98 = (3-x)/2; + * std::cout << expr98.eval(x=6) << "\n"; + * \endcode + * + * It is currently only used internally to define and manipulate the Eigen::last and Eigen::lastp1 symbols in Eigen::seq and Eigen::seqN. + * + */ +namespace symbolic { + +template<typename Tag> class Symbol; +template<typename Arg0> class NegateExpr; +template<typename Arg1,typename Arg2> class AddExpr; +template<typename Arg1,typename Arg2> class ProductExpr; +template<typename Arg1,typename Arg2> class QuotientExpr; + +// A simple wrapper around an integral value to provide the eval method. +// We could also use a free-function symbolic_eval... +template<typename IndexType=Index> +class ValueExpr { +public: + ValueExpr(IndexType val) : m_value(val) {} + template<typename T> + IndexType eval_impl(const T&) const { return m_value; } +protected: + IndexType m_value; +}; + +// Specialization for compile-time value, +// It is similar to ValueExpr(N) but this version helps the compiler to generate better code. +template<int N> +class ValueExpr<internal::FixedInt<N> > { +public: + ValueExpr() {} + template<typename T> + EIGEN_CONSTEXPR Index eval_impl(const T&) const { return N; } +}; + + +/** \class BaseExpr + * \ingroup Core_Module + * Common base class of any symbolic expressions + */ +template<typename Derived> +class BaseExpr +{ +public: + const Derived& derived() const { return *static_cast<const Derived*>(this); } + + /** Evaluate the expression given the \a values of the symbols. + * + * \param values defines the values of the symbols, it can either be a SymbolValue or a std::tuple of SymbolValue + * as constructed by SymbolExpr::operator= operator. + * + */ + template<typename T> + Index eval(const T& values) const { return derived().eval_impl(values); } + +#if EIGEN_HAS_CXX14 + template<typename... Types> + Index eval(Types&&... values) const { return derived().eval_impl(std::make_tuple(values...)); } +#endif + + NegateExpr<Derived> operator-() const { return NegateExpr<Derived>(derived()); } + + AddExpr<Derived,ValueExpr<> > operator+(Index b) const + { return AddExpr<Derived,ValueExpr<> >(derived(), b); } + AddExpr<Derived,ValueExpr<> > operator-(Index a) const + { return AddExpr<Derived,ValueExpr<> >(derived(), -a); } + ProductExpr<Derived,ValueExpr<> > operator*(Index a) const + { return ProductExpr<Derived,ValueExpr<> >(derived(),a); } + QuotientExpr<Derived,ValueExpr<> > operator/(Index a) const + { return QuotientExpr<Derived,ValueExpr<> >(derived(),a); } + + friend AddExpr<Derived,ValueExpr<> > operator+(Index a, const BaseExpr& b) + { return AddExpr<Derived,ValueExpr<> >(b.derived(), a); } + friend AddExpr<NegateExpr<Derived>,ValueExpr<> > operator-(Index a, const BaseExpr& b) + { return AddExpr<NegateExpr<Derived>,ValueExpr<> >(-b.derived(), a); } + friend ProductExpr<ValueExpr<>,Derived> operator*(Index a, const BaseExpr& b) + { return ProductExpr<ValueExpr<>,Derived>(a,b.derived()); } + friend QuotientExpr<ValueExpr<>,Derived> operator/(Index a, const BaseExpr& b) + { return QuotientExpr<ValueExpr<>,Derived>(a,b.derived()); } + + template<int N> + AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>) const + { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); } + template<int N> + AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N>) const + { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); } + template<int N> + ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N>) const + { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } + template<int N> + QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N>) const + { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } + + template<int N> + friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>, const BaseExpr& b) + { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); } + template<int N> + friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N>, const BaseExpr& b) + { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); } + template<int N> + friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N>, const BaseExpr& b) + { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } + template<int N> + friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N>, const BaseExpr& b) + { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } + +#if (!EIGEN_HAS_CXX14) + template<int N> + AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)()) const + { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); } + template<int N> + AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N> (*)()) const + { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); } + template<int N> + ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N> (*)()) const + { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } + template<int N> + QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N> (*)()) const + { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } + + template<int N> + friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)(), const BaseExpr& b) + { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); } + template<int N> + friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N> (*)(), const BaseExpr& b) + { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); } + template<int N> + friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N> (*)(), const BaseExpr& b) + { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } + template<int N> + friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N> (*)(), const BaseExpr& b) + { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } +#endif + + + template<typename OtherDerived> + AddExpr<Derived,OtherDerived> operator+(const BaseExpr<OtherDerived> &b) const + { return AddExpr<Derived,OtherDerived>(derived(), b.derived()); } + + template<typename OtherDerived> + AddExpr<Derived,NegateExpr<OtherDerived> > operator-(const BaseExpr<OtherDerived> &b) const + { return AddExpr<Derived,NegateExpr<OtherDerived> >(derived(), -b.derived()); } + + template<typename OtherDerived> + ProductExpr<Derived,OtherDerived> operator*(const BaseExpr<OtherDerived> &b) const + { return ProductExpr<Derived,OtherDerived>(derived(), b.derived()); } + + template<typename OtherDerived> + QuotientExpr<Derived,OtherDerived> operator/(const BaseExpr<OtherDerived> &b) const + { return QuotientExpr<Derived,OtherDerived>(derived(), b.derived()); } +}; + +template<typename T> +struct is_symbolic { + // BaseExpr has no conversion ctor, so we only have to check whether T can be statically cast to its base class BaseExpr<T>. + enum { value = internal::is_convertible<T,BaseExpr<T> >::value }; +}; + +/** Represents the actual value of a symbol identified by its tag + * + * It is the return type of SymbolValue::operator=, and most of the time this is only way it is used. + */ +template<typename Tag> +class SymbolValue +{ +public: + /** Default constructor from the value \a val */ + SymbolValue(Index val) : m_value(val) {} + + /** \returns the stored value of the symbol */ + Index value() const { return m_value; } +protected: + Index m_value; +}; + +/** Expression of a symbol uniquely identified by the template parameter type \c tag */ +template<typename tag> +class SymbolExpr : public BaseExpr<SymbolExpr<tag> > +{ +public: + /** Alias to the template parameter \c tag */ + typedef tag Tag; + + SymbolExpr() {} + + /** Associate the value \a val to the given symbol \c *this, uniquely identified by its \c Tag. + * + * The returned object should be passed to ExprBase::eval() to evaluate a given expression with this specified runtime-time value. + */ + SymbolValue<Tag> operator=(Index val) const { + return SymbolValue<Tag>(val); + } + + Index eval_impl(const SymbolValue<Tag> &values) const { return values.value(); } + +#if EIGEN_HAS_CXX14 + // C++14 versions suitable for multiple symbols + template<typename... Types> + Index eval_impl(const std::tuple<Types...>& values) const { return std::get<SymbolValue<Tag> >(values).value(); } +#endif +}; + +template<typename Arg0> +class NegateExpr : public BaseExpr<NegateExpr<Arg0> > +{ +public: + NegateExpr(const Arg0& arg0) : m_arg0(arg0) {} + + template<typename T> + Index eval_impl(const T& values) const { return -m_arg0.eval_impl(values); } +protected: + Arg0 m_arg0; +}; + +template<typename Arg0, typename Arg1> +class AddExpr : public BaseExpr<AddExpr<Arg0,Arg1> > +{ +public: + AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} + + template<typename T> + Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) + m_arg1.eval_impl(values); } +protected: + Arg0 m_arg0; + Arg1 m_arg1; +}; + +template<typename Arg0, typename Arg1> +class ProductExpr : public BaseExpr<ProductExpr<Arg0,Arg1> > +{ +public: + ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} + + template<typename T> + Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) * m_arg1.eval_impl(values); } +protected: + Arg0 m_arg0; + Arg1 m_arg1; +}; + +template<typename Arg0, typename Arg1> +class QuotientExpr : public BaseExpr<QuotientExpr<Arg0,Arg1> > +{ +public: + QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} + + template<typename T> + Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) / m_arg1.eval_impl(values); } +protected: + Arg0 m_arg0; + Arg1 m_arg1; +}; + +} // end namespace symbolic + +} // end namespace Eigen + +#endif // EIGEN_SYMBOLIC_INDEX_H diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index ba5bd186d..71c32b8a1 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -34,6 +34,26 @@ inline IndexDest convert_index(const IndexSrc& idx) { return IndexDest(idx); } +// true if T can be considered as an integral index (i.e., and integral type or enum) +template<typename T> struct is_valid_index_type +{ + enum { value = +#if EIGEN_HAS_TYPE_TRAITS + internal::is_integral<T>::value || std::is_enum<T>::value +#elif EIGEN_COMP_MSVC + internal::is_integral<T>::value || __is_enum(T) +#else + // without C++11, we use is_convertible to Index instead of is_integral in order to treat enums as Index. + internal::is_convertible<T,Index>::value && !internal::is_same<T,float>::value && !is_same<T,double>::value +#endif + }; +}; + +// true if both types are not valid index types +template<typename RowIndices, typename ColIndices> +struct valid_indexed_view_overload { + enum { value = !(internal::is_valid_index_type<RowIndices>::value && internal::is_valid_index_type<ColIndices>::value) }; +}; // promote_scalar_arg is an helper used in operation between an expression and a scalar, like: // expression * scalar @@ -90,6 +110,9 @@ class no_assignment_operator { private: no_assignment_operator& operator=(const no_assignment_operator&); + protected: + EIGEN_DEFAULT_COPY_CONSTRUCTOR(no_assignment_operator) + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(no_assignment_operator) }; /** \internal return the index type with the largest number of bits */ @@ -106,19 +129,23 @@ struct promote_index_type template<typename T, int Value> class variable_if_dynamic { public: - EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic) + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(variable_if_dynamic) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {} + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + T value() { return T(Value); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + operator T() const { return T(Value); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void setValue(T v) const { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } }; template<typename T> class variable_if_dynamic<T, Dynamic> { T m_value; - EIGEN_DEVICE_FUNC variable_if_dynamic() { eigen_assert(false); } public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value = 0) EIGEN_NO_THROW : m_value(value) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return m_value; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; } }; @@ -129,8 +156,10 @@ template<typename T, int Value> class variable_if_dynamicindex public: EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {} + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + T value() { return T(Value); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void setValue(T) {} }; template<typename T> class variable_if_dynamicindex<T, DynamicIndex> @@ -155,16 +184,7 @@ template<typename T> struct functor_traits template<typename T> struct packet_traits; -template<typename T> struct unpacket_traits -{ - typedef T type; - typedef T half; - enum - { - size = 1, - alignment = 1 - }; -}; +template<typename T> struct unpacket_traits; template<int Size, typename PacketType, bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value> @@ -383,7 +403,7 @@ template<typename T> struct plain_matrix_type_row_major typedef Matrix<typename traits<T>::Scalar, Rows, Cols, - (MaxCols==1&&MaxRows!=1) ? RowMajor : ColMajor, + (MaxCols==1&&MaxRows!=1) ? ColMajor : RowMajor, MaxRows, MaxCols > type; @@ -400,7 +420,7 @@ struct ref_selector T const&, const T >::type type; - + typedef typename conditional< bool(traits<T>::Flags & NestByRefBit), T &, @@ -438,7 +458,7 @@ template<typename T, int n, typename PlainObject = typename plain_object_eval<T> { enum { ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost, - CoeffReadCost = evaluator<T>::CoeffReadCost, // NOTE What if an evaluator evaluate itself into a tempory? + CoeffReadCost = evaluator<T>::CoeffReadCost, // NOTE What if an evaluator evaluate itself into a temporary? // Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1. // This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON // for all evaluator creating a temporary. This flag is then propagated by the parent evaluators. @@ -579,14 +599,14 @@ template<typename ExpressionType, typename Scalar = typename ExpressionType::Sca struct plain_row_type { typedef Matrix<Scalar, 1, ExpressionType::ColsAtCompileTime, - ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType; + int(ExpressionType::PlainObject::Options) | int(RowMajor), 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType; typedef Array<Scalar, 1, ExpressionType::ColsAtCompileTime, - ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType; + int(ExpressionType::PlainObject::Options) | int(RowMajor), 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType; typedef typename conditional< is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value, MatrixRowType, - ArrayRowType + ArrayRowType >::type type; }; @@ -601,7 +621,7 @@ struct plain_col_type typedef typename conditional< is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value, MatrixColType, - ArrayColType + ArrayColType >::type type; }; @@ -617,7 +637,7 @@ struct plain_diag_type typedef typename conditional< is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value, MatrixDiagType, - ArrayDiagType + ArrayDiagType >::type type; }; @@ -654,24 +674,39 @@ template<typename T> struct is_diagonal<DiagonalWrapper<T> > template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> > { enum { ret = true }; }; + +template<typename T> struct is_identity +{ enum { value = false }; }; + +template<typename T> struct is_identity<CwiseNullaryOp<internal::scalar_identity_op<typename T::Scalar>, T> > +{ enum { value = true }; }; + + template<typename S1, typename S2> struct glue_shapes; template<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type; }; template<typename T1, typename T2> -bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<has_direct_access<T1>::ret&&has_direct_access<T2>::ret, T1>::type * = 0) +struct possibly_same_dense { + enum { value = has_direct_access<T1>::ret && has_direct_access<T2>::ret && is_same<typename T1::Scalar,typename T2::Scalar>::value }; +}; + +template<typename T1, typename T2> +EIGEN_DEVICE_FUNC +bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<possibly_same_dense<T1,T2>::value>::type * = 0) { return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride()); } template<typename T1, typename T2> -bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_access<T1>::ret&&has_direct_access<T2>::ret), T1>::type * = 0) +EIGEN_DEVICE_FUNC +bool is_same_dense(const T1 &, const T2 &, typename enable_if<!possibly_same_dense<T1,T2>::value>::type * = 0) { return false; } // Internal helper defining the cost of a scalar division for the type T. // The default heuristic can be specialized for each scalar type and architecture. -template<typename T,bool Vectorized=false,typename EnaleIf = void> +template<typename T,bool Vectorized=false,typename EnableIf = void> struct scalar_div_cost { enum { value = 8*NumTraits<T>::MulCost }; }; @@ -718,7 +753,7 @@ std::string demangle_flags(int f) if(f&DirectAccessBit) res += " | Direct"; if(f&NestByRefBit) res += " | NestByRef"; if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit"; - + return res; } #endif @@ -815,7 +850,7 @@ struct ScalarBinaryOpTraits<void,void,BinaryOp> #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ EIGEN_STATIC_ASSERT((Eigen::internal::has_ReturnType<ScalarBinaryOpTraits<LHS, RHS,BINOP> >::value), \ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - + } // end namespace Eigen #endif // EIGEN_XPRHELPER_H |