aboutsummaryrefslogtreecommitdiff
path: root/Eigen/src/Core/util
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/util')
-rwxr-xr-xEigen/src/Core/util/BlasUtil.h423
-rw-r--r--Eigen/src/Core/util/ConfigureVectorization.h512
-rw-r--r--Eigen/src/Core/util/Constants.h34
-rwxr-xr-xEigen/src/Core/util/DisableStupidWarnings.h43
-rw-r--r--Eigen/src/Core/util/ForwardDeclarations.h38
-rw-r--r--Eigen/src/Core/util/IndexedViewHelper.h186
-rw-r--r--Eigen/src/Core/util/IntegralConstant.h272
-rwxr-xr-xEigen/src/Core/util/MKL_support.h19
-rw-r--r--Eigen/src/Core/util/Macros.h960
-rw-r--r--Eigen/src/Core/util/Memory.h268
-rwxr-xr-xEigen/src/Core/util/Meta.h462
-rw-r--r--Eigen/src/Core/util/ReenableStupidWarnings.h8
-rw-r--r--Eigen/src/Core/util/ReshapedHelper.h51
-rw-r--r--Eigen/src/Core/util/StaticAssert.h131
-rw-r--r--Eigen/src/Core/util/SymbolicIndex.h293
-rw-r--r--Eigen/src/Core/util/XprHelper.h95
16 files changed, 3196 insertions, 599 deletions
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 6e6ee119b..e16a56498 100755
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -24,14 +24,14 @@ struct gebp_kernel;
template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
struct gemm_pack_rhs;
-template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
+template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
struct gemm_pack_lhs;
template<
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
- int ResStorageOrder>
+ int ResStorageOrder, int ResInnerStride>
struct general_matrix_matrix_product;
template<typename Index,
@@ -39,90 +39,6 @@ template<typename Index,
typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>
struct general_matrix_vector_product;
-
-template<bool Conjugate> struct conj_if;
-
-template<> struct conj_if<true> {
- template<typename T>
- inline T operator()(const T& x) const { return numext::conj(x); }
- template<typename T>
- inline T pconj(const T& x) const { return internal::pconj(x); }
-};
-
-template<> struct conj_if<false> {
- template<typename T>
- inline const T& operator()(const T& x) const { return x; }
- template<typename T>
- inline const T& pconj(const T& x) const { return x; }
-};
-
-// Generic implementation for custom complex types.
-template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs>
-struct conj_helper
-{
- typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType Scalar;
-
- EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const
- { return conj_if<ConjLhs>()(x) * conj_if<ConjRhs>()(y); }
-};
-
-template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
-{
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
-};
-
-template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
-{
- typedef std::complex<RealScalar> Scalar;
- EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
- { return c + pmul(x,y); }
-
- EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
- { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); }
-};
-
-template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
-{
- typedef std::complex<RealScalar> Scalar;
- EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
- { return c + pmul(x,y); }
-
- EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
- { return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
-};
-
-template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
-{
- typedef std::complex<RealScalar> Scalar;
- EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
- { return c + pmul(x,y); }
-
- EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
- { return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
-};
-
-template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
-{
- typedef std::complex<RealScalar> Scalar;
- EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const
- { return padd(c, pmul(x,y)); }
- EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const
- { return conj_if<Conj>()(x)*y; }
-};
-
-template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
-{
- typedef std::complex<RealScalar> Scalar;
- EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const
- { return padd(c, pmul(x,y)); }
- EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const
- { return x*conj_if<Conj>()(y); }
-};
-
template<typename From,typename To> struct get_factor {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); }
};
@@ -155,13 +71,19 @@ class BlasVectorMapper {
Scalar* m_data;
};
-template<typename Scalar, typename Index, int AlignmentType>
-class BlasLinearMapper {
- public:
- typedef typename packet_traits<Scalar>::type Packet;
- typedef typename packet_traits<Scalar>::half HalfPacket;
+template<typename Scalar, typename Index, int AlignmentType, int Incr=1>
+class BlasLinearMapper;
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {}
+template<typename Scalar, typename Index, int AlignmentType>
+class BlasLinearMapper<Scalar,Index,AlignmentType>
+{
+public:
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data, Index incr=1)
+ : m_data(data)
+ {
+ EIGEN_ONLY_USED_FOR_DEBUG(incr);
+ eigen_assert(incr==1);
+ }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
internal::prefetch(&operator()(i));
@@ -171,33 +93,86 @@ class BlasLinearMapper {
return m_data[i];
}
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
- return ploadt<Packet, AlignmentType>(m_data + i);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
- return ploadt<HalfPacket, AlignmentType>(m_data + i);
+ template<typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const {
+ return ploadt<PacketType, AlignmentType>(m_data + i);
}
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
- pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
+ template<typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
+ pstoret<Scalar, PacketType, AlignmentType>(m_data + i, p);
}
- protected:
+protected:
Scalar *m_data;
};
// Lightweight helper class to access matrix coefficients.
-template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned>
-class blas_data_mapper {
- public:
- typedef typename packet_traits<Scalar>::type Packet;
- typedef typename packet_traits<Scalar>::half HalfPacket;
+template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned, int Incr = 1>
+class blas_data_mapper;
+
+// TMP to help PacketBlock store implementation.
+// There's currently no known use case for PacketBlock load.
+// The default implementation assumes ColMajor order.
+// It always store each packet sequentially one `stride` apart.
+template<typename Index, typename Scalar, typename Packet, int n, int idx, int StorageOrder>
+struct PacketBlockManagement
+{
+ PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, StorageOrder> pbm;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
+ pbm.store(to, stride, i, j, block);
+ pstoreu<Scalar>(to + i + (j + idx)*stride, block.packet[idx]);
+ }
+};
+
+// PacketBlockManagement specialization to take care of RowMajor order without ifs.
+template<typename Index, typename Scalar, typename Packet, int n, int idx>
+struct PacketBlockManagement<Index, Scalar, Packet, n, idx, RowMajor>
+{
+ PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, RowMajor> pbm;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
+ pbm.store(to, stride, i, j, block);
+ pstoreu<Scalar>(to + j + (i + idx)*stride, block.packet[idx]);
+ }
+};
+
+template<typename Index, typename Scalar, typename Packet, int n, int StorageOrder>
+struct PacketBlockManagement<Index, Scalar, Packet, n, -1, StorageOrder>
+{
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
+ EIGEN_UNUSED_VARIABLE(to);
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(i);
+ EIGEN_UNUSED_VARIABLE(j);
+ EIGEN_UNUSED_VARIABLE(block);
+ }
+};
+
+template<typename Index, typename Scalar, typename Packet, int n>
+struct PacketBlockManagement<Index, Scalar, Packet, n, -1, RowMajor>
+{
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
+ EIGEN_UNUSED_VARIABLE(to);
+ EIGEN_UNUSED_VARIABLE(stride);
+ EIGEN_UNUSED_VARIABLE(i);
+ EIGEN_UNUSED_VARIABLE(j);
+ EIGEN_UNUSED_VARIABLE(block);
+ }
+};
+template<typename Scalar, typename Index, int StorageOrder, int AlignmentType>
+class blas_data_mapper<Scalar,Index,StorageOrder,AlignmentType,1>
+{
+public:
typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
typedef BlasVectorMapper<Scalar, Index> VectorMapper;
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1)
+ : m_data(data), m_stride(stride)
+ {
+ EIGEN_ONLY_USED_FOR_DEBUG(incr);
+ eigen_assert(incr==1);
+ }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
getSubMapper(Index i, Index j) const {
@@ -218,12 +193,14 @@ class blas_data_mapper {
return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
}
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
- return ploadt<Packet, AlignmentType>(&operator()(i, j));
+ template<typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const {
+ return ploadt<PacketType, AlignmentType>(&operator()(i, j));
}
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
- return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
+ template <typename PacketT, int AlignmentT>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
+ return ploadt<PacketT, AlignmentT>(&operator()(i, j));
}
template<typename SubPacket>
@@ -246,11 +223,167 @@ class blas_data_mapper {
return internal::first_default_aligned(m_data, size);
}
- protected:
+ template<typename SubPacket, int n>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n> &block) const {
+ PacketBlockManagement<Index, Scalar, SubPacket, n, n-1, StorageOrder> pbm;
+ pbm.store(m_data, m_stride, i, j, block);
+ }
+protected:
Scalar* EIGEN_RESTRICT m_data;
const Index m_stride;
};
+// Implementation of non-natural increment (i.e. inner-stride != 1)
+// The exposed API is not complete yet compared to the Incr==1 case
+// because some features makes less sense in this case.
+template<typename Scalar, typename Index, int AlignmentType, int Incr>
+class BlasLinearMapper
+{
+public:
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data,Index incr) : m_data(data), m_incr(incr) {}
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
+ internal::prefetch(&operator()(i));
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
+ return m_data[i*m_incr.value()];
+ }
+
+ template<typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const {
+ return pgather<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value());
+ }
+
+ template<typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
+ pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value());
+ }
+
+protected:
+ Scalar *m_data;
+ const internal::variable_if_dynamic<Index,Incr> m_incr;
+};
+
+template<typename Scalar, typename Index, int StorageOrder, int AlignmentType,int Incr>
+class blas_data_mapper
+{
+public:
+ typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper;
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {}
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper
+ getSubMapper(Index i, Index j) const {
+ return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value());
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
+ return LinearMapper(&operator()(i, j), m_incr.value());
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
+ return m_data[StorageOrder==RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride];
+ }
+
+ template<typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const {
+ return pgather<Scalar,PacketType>(&operator()(i, j),m_incr.value());
+ }
+
+ template <typename PacketT, int AlignmentT>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
+ return pgather<Scalar,PacketT>(&operator()(i, j),m_incr.value());
+ }
+
+ template<typename SubPacket>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
+ pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
+ }
+
+ template<typename SubPacket>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
+ return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
+ }
+
+ // storePacketBlock_helper defines a way to access values inside the PacketBlock, this is essentially required by the Complex types.
+ template<typename SubPacket, typename ScalarT, int n, int idx>
+ struct storePacketBlock_helper
+ {
+ storePacketBlock_helper<SubPacket, ScalarT, n, idx-1> spbh;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
+ spbh.store(sup, i,j,block);
+ for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
+ {
+ ScalarT *v = &sup->operator()(i+l, j+idx);
+ *v = block.packet[idx][l];
+ }
+ }
+ };
+
+ template<typename SubPacket, int n, int idx>
+ struct storePacketBlock_helper<SubPacket, std::complex<float>, n, idx>
+ {
+ storePacketBlock_helper<SubPacket, std::complex<float>, n, idx-1> spbh;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
+ spbh.store(sup,i,j,block);
+ for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
+ {
+ std::complex<float> *v = &sup->operator()(i+l, j+idx);
+ v->real(block.packet[idx].v[2*l+0]);
+ v->imag(block.packet[idx].v[2*l+1]);
+ }
+ }
+ };
+
+ template<typename SubPacket, int n, int idx>
+ struct storePacketBlock_helper<SubPacket, std::complex<double>, n, idx>
+ {
+ storePacketBlock_helper<SubPacket, std::complex<double>, n, idx-1> spbh;
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
+ spbh.store(sup,i,j,block);
+ for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
+ {
+ std::complex<double> *v = &sup->operator()(i+l, j+idx);
+ v->real(block.packet[idx].v[2*l+0]);
+ v->imag(block.packet[idx].v[2*l+1]);
+ }
+ }
+ };
+
+ template<typename SubPacket, typename ScalarT, int n>
+ struct storePacketBlock_helper<SubPacket, ScalarT, n, -1>
+ {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
+ }
+ };
+
+ template<typename SubPacket, int n>
+ struct storePacketBlock_helper<SubPacket, std::complex<float>, n, -1>
+ {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
+ }
+ };
+
+ template<typename SubPacket, int n>
+ struct storePacketBlock_helper<SubPacket, std::complex<double>, n, -1>
+ {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
+ }
+ };
+ // This function stores a PacketBlock on m_data, this approach is really quite slow compare to Incr=1 and should be avoided when possible.
+ template<typename SubPacket, int n>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n>&block) const {
+ storePacketBlock_helper<SubPacket, Scalar, n, n-1> spb;
+ spb.store(this, i,j,block);
+ }
+protected:
+ Scalar* EIGEN_RESTRICT m_data;
+ const Index m_stride;
+ const internal::variable_if_dynamic<Index,Incr> m_incr;
+};
+
// lightweight helper class to access matrix coefficients (const version)
template<typename Scalar, typename Index, int StorageOrder>
class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {
@@ -278,14 +411,15 @@ template<typename XprType> struct blas_traits
HasUsableDirectAccess = ( (int(XprType::Flags)&DirectAccessBit)
&& ( bool(XprType::IsVectorAtCompileTime)
|| int(inner_stride_at_compile_time<XprType>::ret) == 1)
- ) ? 1 : 0
+ ) ? 1 : 0,
+ HasScalarFactor = false
};
typedef typename conditional<bool(HasUsableDirectAccess),
ExtractType,
typename _ExtractType::PlainObject
>::type DirectLinearAccessType;
- static inline ExtractType extract(const XprType& x) { return x; }
- static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
+ static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return x; }
+ static inline EIGEN_DEVICE_FUNC const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
};
// pop conjugate
@@ -310,17 +444,23 @@ template<typename Scalar, typename NestedXpr, typename Plain>
struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> >
: blas_traits<NestedXpr>
{
+ enum {
+ HasScalarFactor = true
+ };
typedef blas_traits<NestedXpr> Base;
typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;
typedef typename Base::ExtractType ExtractType;
- static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
- static inline Scalar extractScalarFactor(const XprType& x)
+ static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
+ static inline EIGEN_DEVICE_FUNC Scalar extractScalarFactor(const XprType& x)
{ return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }
};
template<typename Scalar, typename NestedXpr, typename Plain>
struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > >
: blas_traits<NestedXpr>
{
+ enum {
+ HasScalarFactor = true
+ };
typedef blas_traits<NestedXpr> Base;
typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType;
typedef typename Base::ExtractType ExtractType;
@@ -339,6 +479,9 @@ template<typename Scalar, typename NestedXpr>
struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
: blas_traits<NestedXpr>
{
+ enum {
+ HasScalarFactor = true
+ };
typedef blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
typedef typename Base::ExtractType ExtractType;
@@ -375,7 +518,7 @@ struct blas_traits<const T>
template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
struct extract_data_selector {
- static const typename T::Scalar* run(const T& m)
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename T::Scalar* run(const T& m)
{
return blas_traits<T>::extract(m).data();
}
@@ -386,11 +529,53 @@ struct extract_data_selector<T,false> {
static typename T::Scalar* run(const T&) { return 0; }
};
-template<typename T> const typename T::Scalar* extract_data(const T& m)
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const typename T::Scalar* extract_data(const T& m)
{
return extract_data_selector<T>::run(m);
}
+/**
+ * \c combine_scalar_factors extracts and multiplies factors from GEMM and GEMV products.
+ * There is a specialization for booleans
+ */
+template<typename ResScalar, typename Lhs, typename Rhs>
+struct combine_scalar_factors_impl
+{
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const Lhs& lhs, const Rhs& rhs)
+ {
+ return blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs)
+ {
+ return alpha * blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs);
+ }
+};
+template<typename Lhs, typename Rhs>
+struct combine_scalar_factors_impl<bool, Lhs, Rhs>
+{
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const Lhs& lhs, const Rhs& rhs)
+ {
+ return blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const bool& alpha, const Lhs& lhs, const Rhs& rhs)
+ {
+ return alpha && blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs);
+ }
+};
+
+template<typename ResScalar, typename Lhs, typename Rhs>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs)
+{
+ return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(alpha, lhs, rhs);
+}
+template<typename ResScalar, typename Lhs, typename Rhs>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const Lhs& lhs, const Rhs& rhs)
+{
+ return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(lhs, rhs);
+}
+
+
} // end namespace internal
} // end namespace Eigen
diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h
new file mode 100644
index 000000000..af4e69623
--- /dev/null
+++ b/Eigen/src/Core/util/ConfigureVectorization.h
@@ -0,0 +1,512 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2020, Arm Limited and Contributors
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CONFIGURE_VECTORIZATION_H
+#define EIGEN_CONFIGURE_VECTORIZATION_H
+
+//------------------------------------------------------------------------------------------
+// Static and dynamic alignment control
+//
+// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
+// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
+// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
+// a default value is automatically computed based on architecture, compiler, and OS.
+//
+// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
+// to be used to declare statically aligned buffers.
+//------------------------------------------------------------------------------------------
+
+
+/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
+ * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
+ * so that vectorization doesn't affect binary compatibility.
+ *
+ * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
+ * vectorized and non-vectorized code.
+ *
+ * FIXME: this code can be cleaned up once we switch to proper C++11 only.
+ */
+#if (defined EIGEN_CUDACC)
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
+ #define EIGEN_ALIGNOF(x) __alignof(x)
+#elif EIGEN_HAS_ALIGNAS
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
+ #define EIGEN_ALIGNOF(x) alignof(x)
+#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
+ #define EIGEN_ALIGNOF(x) __alignof(x)
+#elif EIGEN_COMP_MSVC
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
+ #define EIGEN_ALIGNOF(x) __alignof(x)
+#elif EIGEN_COMP_SUNCC
+ // FIXME not sure about this one:
+ #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
+ #define EIGEN_ALIGNOF(x) __alignof(x)
+#else
+ #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler
+#endif
+
+// If the user explicitly disable vectorization, then we also disable alignment
+#if defined(EIGEN_DONT_VECTORIZE)
+ #if defined(EIGEN_GPUCC)
+ // GPU code is always vectorized and requires memory alignment for
+ // statically allocated buffers.
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
+ #else
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
+ #endif
+#elif defined(__AVX512F__)
+ // 64 bytes static alignment is preferred only if really required
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
+#elif defined(__AVX__)
+ // 32 bytes static alignment is preferred only if really required
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
+#else
+ #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
+#endif
+
+
+// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
+#define EIGEN_MIN_ALIGN_BYTES 16
+
+// Defined the boundary (in bytes) on which the data needs to be aligned. Note
+// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
+// aligned at all regardless of the value of this #define.
+
+#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
+#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
+#endif
+
+// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated
+// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
+#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
+ #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
+ #undef EIGEN_MAX_STATIC_ALIGN_BYTES
+ #endif
+ #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
+#endif
+
+#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
+
+ // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
+
+ // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
+ // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
+ // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
+ // certain common platform (compiler+architecture combinations) to avoid these problems.
+ // Only static alignment is really problematic (relies on nonstandard compiler extensions),
+ // try to keep heap alignment even when we have to disable static alignment.
+ #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
+ #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
+ #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
+ // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support.
+ // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use.
+ // 4.8 and newer seem definitely unaffected.
+ #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
+ #else
+ #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
+ #endif
+
+ // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
+ #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
+ && !EIGEN_GCC3_OR_OLDER \
+ && !EIGEN_COMP_SUNCC \
+ && !EIGEN_OS_QNX
+ #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
+ #else
+ #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
+ #endif
+
+ #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
+ #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+ #else
+ #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
+ #endif
+
+#endif
+
+// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES
+#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
+#undef EIGEN_MAX_STATIC_ALIGN_BYTES
+#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
+#endif
+
+#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
+ #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+#endif
+
+// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
+// It takes into account both the user choice to explicitly enable/disable alignment (by setting EIGEN_MAX_STATIC_ALIGN_BYTES)
+// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
+// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
+
+
+// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
+#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
+#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
+#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
+#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
+#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
+#else
+#define EIGEN_ALIGN_MAX
+#endif
+
+
+// Dynamic alignment control
+
+#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
+#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
+#endif
+
+#ifdef EIGEN_DONT_ALIGN
+ #ifdef EIGEN_MAX_ALIGN_BYTES
+ #undef EIGEN_MAX_ALIGN_BYTES
+ #endif
+ #define EIGEN_MAX_ALIGN_BYTES 0
+#elif !defined(EIGEN_MAX_ALIGN_BYTES)
+ #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+#endif
+
+#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
+#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+#else
+#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
+#endif
+
+
+#ifndef EIGEN_UNALIGNED_VECTORIZE
+#define EIGEN_UNALIGNED_VECTORIZE 1
+#endif
+
+//----------------------------------------------------------------------
+
+// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
+// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
+#if EIGEN_MAX_ALIGN_BYTES==0
+ #ifndef EIGEN_DONT_VECTORIZE
+ #define EIGEN_DONT_VECTORIZE
+ #endif
+#endif
+
+
+// The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be
+// removed as gcc 4.1 and msvc 2008 are not supported anyways.
+#if EIGEN_COMP_MSVC
+ #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
+ #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
+ // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
+ #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
+ #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
+ #endif
+ #endif
+#else
+ #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
+ #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
+ #endif
+#endif
+
+#if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
+
+ #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
+
+ // Defines symbols for compile-time detection of which instructions are
+ // used.
+ // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_SSE
+ #define EIGEN_VECTORIZE_SSE2
+
+ // Detect sse3/ssse3/sse4:
+ // gcc and icc defines __SSE3__, ...
+ // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
+ // want to force the use of those instructions with msvc.
+ #ifdef __SSE3__
+ #define EIGEN_VECTORIZE_SSE3
+ #endif
+ #ifdef __SSSE3__
+ #define EIGEN_VECTORIZE_SSSE3
+ #endif
+ #ifdef __SSE4_1__
+ #define EIGEN_VECTORIZE_SSE4_1
+ #endif
+ #ifdef __SSE4_2__
+ #define EIGEN_VECTORIZE_SSE4_2
+ #endif
+ #ifdef __AVX__
+ #ifndef EIGEN_USE_SYCL
+ #define EIGEN_VECTORIZE_AVX
+ #endif
+ #define EIGEN_VECTORIZE_SSE3
+ #define EIGEN_VECTORIZE_SSSE3
+ #define EIGEN_VECTORIZE_SSE4_1
+ #define EIGEN_VECTORIZE_SSE4_2
+ #endif
+ #ifdef __AVX2__
+ #ifndef EIGEN_USE_SYCL
+ #define EIGEN_VECTORIZE_AVX2
+ #define EIGEN_VECTORIZE_AVX
+ #endif
+ #define EIGEN_VECTORIZE_SSE3
+ #define EIGEN_VECTORIZE_SSSE3
+ #define EIGEN_VECTORIZE_SSE4_1
+ #define EIGEN_VECTORIZE_SSE4_2
+ #endif
+ #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
+ // MSVC does not expose a switch dedicated for FMA
+ // For MSVC, AVX2 => FMA
+ #define EIGEN_VECTORIZE_FMA
+ #endif
+ #if defined(__AVX512F__)
+ #ifndef EIGEN_VECTORIZE_FMA
+ #if EIGEN_COMP_GNUC
+ #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
+ #else
+ #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
+ #endif
+ #endif
+ #ifndef EIGEN_USE_SYCL
+ #define EIGEN_VECTORIZE_AVX512
+ #define EIGEN_VECTORIZE_AVX2
+ #define EIGEN_VECTORIZE_AVX
+ #endif
+ #define EIGEN_VECTORIZE_FMA
+ #define EIGEN_VECTORIZE_SSE3
+ #define EIGEN_VECTORIZE_SSSE3
+ #define EIGEN_VECTORIZE_SSE4_1
+ #define EIGEN_VECTORIZE_SSE4_2
+ #ifndef EIGEN_USE_SYCL
+ #ifdef __AVX512DQ__
+ #define EIGEN_VECTORIZE_AVX512DQ
+ #endif
+ #ifdef __AVX512ER__
+ #define EIGEN_VECTORIZE_AVX512ER
+ #endif
+ #ifdef __AVX512BF16__
+ #define EIGEN_VECTORIZE_AVX512BF16
+ #endif
+ #endif
+ #endif
+
+ // Disable AVX support on broken xcode versions
+ #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 )
+ // A nasty bug in the clang compiler shipped with xcode in a common compilation situation
+ // when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1
+ #ifdef EIGEN_VECTORIZE_AVX
+ #undef EIGEN_VECTORIZE_AVX
+ #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
+ #ifdef EIGEN_VECTORIZE_AVX2
+ #undef EIGEN_VECTORIZE_AVX2
+ #endif
+ #ifdef EIGEN_VECTORIZE_FMA
+ #undef EIGEN_VECTORIZE_FMA
+ #endif
+ #ifdef EIGEN_VECTORIZE_AVX512
+ #undef EIGEN_VECTORIZE_AVX512
+ #endif
+ #ifdef EIGEN_VECTORIZE_AVX512DQ
+ #undef EIGEN_VECTORIZE_AVX512DQ
+ #endif
+ #ifdef EIGEN_VECTORIZE_AVX512ER
+ #undef EIGEN_VECTORIZE_AVX512ER
+ #endif
+ #endif
+ // NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with -macosx-version-min=10.15 and AVX
+ // NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2 produce core dumps in 3 tests
+ // NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all cases
+ // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)" XCode 11.0 <- Produces many segfault and core dumping tests
+ // with -macosx-version-min=10.15 and AVX
+ // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with
+ // -macosx-version-min=10.15 and AVX
+ #endif
+
+ // include files
+
+ // This extern "C" works around a MINGW-w64 compilation issue
+ // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
+ // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
+ // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
+ // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
+ // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
+ // notice that since these are C headers, the extern "C" is theoretically needed anyways.
+ extern "C" {
+ // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
+ // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
+ #if EIGEN_COMP_ICC >= 1110
+ #include <immintrin.h>
+ #else
+ #include <mmintrin.h>
+ #include <emmintrin.h>
+ #include <xmmintrin.h>
+ #ifdef EIGEN_VECTORIZE_SSE3
+ #include <pmmintrin.h>
+ #endif
+ #ifdef EIGEN_VECTORIZE_SSSE3
+ #include <tmmintrin.h>
+ #endif
+ #ifdef EIGEN_VECTORIZE_SSE4_1
+ #include <smmintrin.h>
+ #endif
+ #ifdef EIGEN_VECTORIZE_SSE4_2
+ #include <nmmintrin.h>
+ #endif
+ #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
+ #include <immintrin.h>
+ #endif
+ #endif
+ } // end extern "C"
+
+ #elif defined __VSX__
+
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_VSX
+ #include <altivec.h>
+ // We need to #undef all these ugly tokens defined in <altivec.h>
+ // => use __vector instead of vector
+ #undef bool
+ #undef vector
+ #undef pixel
+
+ #elif defined __ALTIVEC__
+
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_ALTIVEC
+ #include <altivec.h>
+ // We need to #undef all these ugly tokens defined in <altivec.h>
+ // => use __vector instead of vector
+ #undef bool
+ #undef vector
+ #undef pixel
+
+ #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
+
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_NEON
+ #include <arm_neon.h>
+
+ // We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and
+ // will not select the backend automatically
+ #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
+
+ #define EIGEN_VECTORIZE
+ #define EIGEN_VECTORIZE_SVE
+ #include <arm_sve.h>
+
+ // Since we depend on knowing SVE vector lengths at compile-time, we need
+ // to ensure a fixed lengths is set
+ #if defined __ARM_FEATURE_SVE_BITS
+ #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
+ #else
+#error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
+#endif
+
+#elif (defined __s390x__ && defined __VEC__)
+
+#define EIGEN_VECTORIZE
+#define EIGEN_VECTORIZE_ZVECTOR
+#include <vecintrin.h>
+
+#elif defined __mips_msa
+
+// Limit MSA optimizations to little-endian CPUs for now.
+// TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#if defined(__LP64__)
+#define EIGEN_MIPS_64
+#else
+#define EIGEN_MIPS_32
+#endif
+#define EIGEN_VECTORIZE
+#define EIGEN_VECTORIZE_MSA
+#include <msa.h>
+#endif
+
+#endif
+#endif
+
+// Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all
+// compilers seem to follow this. We therefore include it explicitly.
+// See also: https://bugs.llvm.org/show_bug.cgi?id=47955
+#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
+ #include <arm_fp16.h>
+#endif
+
+#if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380))
+ // We can use the optimized fp16 to float and float to fp16 conversion routines
+ #define EIGEN_HAS_FP16_C
+
+ #if defined(EIGEN_COMP_CLANG)
+ // Workaround for clang: The FP16C intrinsics for clang are included by
+ // immintrin.h, as opposed to emmintrin.h as suggested by Intel:
+ // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711
+ #include <immintrin.h>
+ #endif
+#endif
+
+#if defined EIGEN_CUDACC
+ #define EIGEN_VECTORIZE_GPU
+ #include <vector_types.h>
+ #if EIGEN_CUDA_SDK_VER >= 70500
+ #define EIGEN_HAS_CUDA_FP16
+ #endif
+#endif
+
+#if defined(EIGEN_HAS_CUDA_FP16)
+ #include <cuda_runtime_api.h>
+ #include <cuda_fp16.h>
+#endif
+
+#if defined(EIGEN_HIPCC)
+ #define EIGEN_VECTORIZE_GPU
+ #include <hip/hip_vector_types.h>
+ #define EIGEN_HAS_HIP_FP16
+ #include <hip/hip_fp16.h>
+#endif
+
+
+/** \brief Namespace containing all symbols from the %Eigen library. */
+namespace Eigen {
+
+inline static const char *SimdInstructionSetsInUse(void) {
+#if defined(EIGEN_VECTORIZE_AVX512)
+ return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_AVX)
+ return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_SSE4_2)
+ return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
+#elif defined(EIGEN_VECTORIZE_SSE4_1)
+ return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
+#elif defined(EIGEN_VECTORIZE_SSSE3)
+ return "SSE, SSE2, SSE3, SSSE3";
+#elif defined(EIGEN_VECTORIZE_SSE3)
+ return "SSE, SSE2, SSE3";
+#elif defined(EIGEN_VECTORIZE_SSE2)
+ return "SSE, SSE2";
+#elif defined(EIGEN_VECTORIZE_ALTIVEC)
+ return "AltiVec";
+#elif defined(EIGEN_VECTORIZE_VSX)
+ return "VSX";
+#elif defined(EIGEN_VECTORIZE_NEON)
+ return "ARM NEON";
+#elif defined(EIGEN_VECTORIZE_SVE)
+ return "ARM SVE";
+#elif defined(EIGEN_VECTORIZE_ZVECTOR)
+ return "S390X ZVECTOR";
+#elif defined(EIGEN_VECTORIZE_MSA)
+ return "MIPS MSA";
+#else
+ return "None";
+#endif
+}
+
+} // end namespace Eigen
+
+
+#endif // EIGEN_CONFIGURE_VECTORIZATION_H
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index 7587d6842..35dcaa7b3 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -3,6 +3,7 @@
//
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2020, Arm Limited and Contributors
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -25,6 +26,10 @@ const int Dynamic = -1;
*/
const int DynamicIndex = 0xffffff;
+/** This value means that the increment to go from one value to another in a sequence is not constant for each step.
+ */
+const int UndefinedIncr = 0xfffffe;
+
/** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>().
* The value Infinity there means the L-infinity norm.
*/
@@ -152,7 +157,7 @@ const unsigned int DirectAccessBit = 0x40;
/** \deprecated \ingroup flags
*
* means the first coefficient packet is guaranteed to be aligned.
- * An expression cannot has the AlignedBit without the PacketAccessBit flag.
+ * An expression cannot have the AlignedBit without the PacketAccessBit flag.
* In other words, this means we are allow to perform an aligned packet access to the first element regardless
* of the expression kind:
* \code
@@ -251,12 +256,6 @@ enum AlignmentType {
};
/** \ingroup enums
- * Enum used by DenseBase::corner() in Eigen2 compatibility mode. */
-// FIXME after the corner() API change, this was not needed anymore, except by AlignedBox
-// TODO: find out what to do with that. Adapt the AlignedBox API ?
-enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
-
-/** \ingroup enums
* Enum containing possible values for the \p Direction parameter of
* Reverse, PartialReduxExpr and VectorwiseOp. */
enum DirectionType {
@@ -330,9 +329,20 @@ enum StorageOptions {
* Enum for specifying whether to apply or solve on the left or right. */
enum SideType {
/** Apply transformation on the left. */
- OnTheLeft = 1,
+ OnTheLeft = 1,
/** Apply transformation on the right. */
- OnTheRight = 2
+ OnTheRight = 2
+};
+
+/** \ingroup enums
+ * Enum for specifying NaN-propagation behavior, e.g. for coeff-wise min/max. */
+enum NaNPropagationOptions {
+ /** Implementation defined behavior if NaNs are present. */
+ PropagateFast = 0,
+ /** Always propagate NaNs. */
+ PropagateNaN,
+ /** Always propagate not-NaNs. */
+ PropagateNumbers
};
/* the following used to be written as:
@@ -464,6 +474,8 @@ namespace Architecture
AltiVec = 0x2,
VSX = 0x3,
NEON = 0x4,
+ MSA = 0x5,
+ SVE = 0x6,
#if defined EIGEN_VECTORIZE_SSE
Target = SSE
#elif defined EIGEN_VECTORIZE_ALTIVEC
@@ -472,6 +484,10 @@ namespace Architecture
Target = VSX
#elif defined EIGEN_VECTORIZE_NEON
Target = NEON
+#elif defined EIGEN_VECTORIZE_SVE
+ Target = SVE
+#elif defined EIGEN_VECTORIZE_MSA
+ Target = MSA
#else
Target = Generic
#endif
diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h
index 7559e129c..fe0cfec0b 100755
--- a/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -4,7 +4,6 @@
#ifdef _MSC_VER
// 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p))
// 4101 - unreferenced local variable
- // 4127 - conditional expression is constant
// 4181 - qualifier applied to reference type ignored
// 4211 - nonstandard extension used : redefined extern to static
// 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data
@@ -20,7 +19,7 @@
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning( push )
#endif
- #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
+ #pragma warning( disable : 4100 4101 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
#elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
@@ -42,17 +41,40 @@
#pragma clang diagnostic push
#endif
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
+ #if __clang_major__ >= 3 && __clang_minor__ >= 5
+ #pragma clang diagnostic ignored "-Wabsolute-value"
+ #endif
+ #if __clang_major__ >= 10
+ #pragma clang diagnostic ignored "-Wimplicit-int-float-conversion"
+ #endif
+ #if ( defined(__ALTIVEC__) || defined(__VSX__) ) && __cplusplus < 201103L
+ // warning: generic selections are a C11-specific feature
+ // ignoring warnings thrown at vec_ctf in Altivec/PacketMath.h
+ #pragma clang diagnostic ignored "-Wc11-extensions"
+ #endif
-#elif defined __GNUC__ && __GNUC__>=6
+#elif defined __GNUC__
- #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
+ #if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
#pragma GCC diagnostic push
#endif
- #pragma GCC diagnostic ignored "-Wignored-attributes"
-
+ // g++ warns about local variables shadowing member functions, which is too strict
+ #pragma GCC diagnostic ignored "-Wshadow"
+ #if __GNUC__ == 4 && __GNUC_MINOR__ < 8
+ // Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions:
+ #pragma GCC diagnostic ignored "-Wtype-limits"
+ #endif
+ #if __GNUC__>=6
+ #pragma GCC diagnostic ignored "-Wignored-attributes"
+ #endif
+ #if __GNUC__==7
+ // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89325
+ #pragma GCC diagnostic ignored "-Wattributes"
+ #endif
#endif
#if defined __NVCC__
+ #pragma diag_suppress boolean_controlling_expr_is_constant
// Disable the "statement is unreachable" message
#pragma diag_suppress code_is_unreachable
// Disable the "dynamic initialization in unreachable code" message
@@ -70,6 +92,15 @@
#pragma diag_suppress 2671
#pragma diag_suppress 2735
#pragma diag_suppress 2737
+ #pragma diag_suppress 2739
#endif
+#else
+// warnings already disabled:
+# ifndef EIGEN_WARNINGS_DISABLED_2
+# define EIGEN_WARNINGS_DISABLED_2
+# elif defined(EIGEN_INTERNAL_DEBUGGING)
+# error "Do not include \"DisableStupidWarnings.h\" recursively more than twice!"
+# endif
+
#endif // not EIGEN_WARNINGS_DISABLED
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index ea107393a..2f9cc4491 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -47,11 +47,7 @@ template<typename T> struct NumTraits;
template<typename Derived> struct EigenBase;
template<typename Derived> class DenseBase;
template<typename Derived> class PlainObjectBase;
-
-
-template<typename Derived,
- int Level = internal::accessors_level<Derived>::value >
-class DenseCoeffsBase;
+template<typename Derived, int Level> class DenseCoeffsBase;
template<typename _Scalar, int _Rows, int _Cols,
int _Options = AutoAlign |
@@ -83,6 +79,8 @@ template<typename ExpressionType> class ForceAlignedAccess;
template<typename ExpressionType> class SwapWrapper;
template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block;
+template<typename XprType, typename RowIndices, typename ColIndices> class IndexedView;
+template<typename XprType, int Rows=Dynamic, int Cols=Dynamic, int Order=0> class Reshaped;
template<typename MatrixType, int Size=Dynamic> class VectorBlock;
template<typename MatrixType> class Transpose;
@@ -112,7 +110,7 @@ template<typename _IndicesType> class TranspositionsWrapper;
template<typename Derived,
int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
> class MapBase;
-template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride;
+template<int OuterStrideAtCompileTime, int InnerStrideAtCompileTime> class Stride;
template<int Value = Dynamic> class InnerStride;
template<int Value = Dynamic> class OuterStride;
template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map;
@@ -133,6 +131,10 @@ template<typename Derived> class SolverBase;
template<typename XprType> class InnerIterator;
namespace internal {
+template<typename XprType> class generic_randaccess_stl_iterator;
+template<typename XprType> class pointer_based_stl_iterator;
+template<typename XprType, DirectionType Direction> class subvector_stl_iterator;
+template<typename XprType, DirectionType Direction> class subvector_stl_reverse_iterator;
template<typename DecompositionType> struct kernel_retval_base;
template<typename DecompositionType> struct kernel_retval;
template<typename DecompositionType> struct image_retval_base;
@@ -178,14 +180,15 @@ template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRh
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_sum_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_difference_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_conj_product_op;
-template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_min_op;
-template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_max_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar, int NaNPropagation=PropagateFast> struct scalar_min_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar, int NaNPropagation=PropagateFast> struct scalar_max_op;
template<typename Scalar> struct scalar_opposite_op;
template<typename Scalar> struct scalar_conjugate_op;
template<typename Scalar> struct scalar_real_op;
template<typename Scalar> struct scalar_imag_op;
template<typename Scalar> struct scalar_abs_op;
template<typename Scalar> struct scalar_abs2_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_absolute_difference_op;
template<typename Scalar> struct scalar_sqrt_op;
template<typename Scalar> struct scalar_rsqrt_op;
template<typename Scalar> struct scalar_exp_op;
@@ -202,7 +205,7 @@ template<typename Scalar, typename NewType> struct scalar_cast_op;
template<typename Scalar> struct scalar_random_op;
template<typename Scalar> struct scalar_constant_op;
template<typename Scalar> struct scalar_identity_op;
-template<typename Scalar,bool iscpx> struct scalar_sign_op;
+template<typename Scalar,bool is_complex, bool is_integer> struct scalar_sign_op;
template<typename Scalar,typename ScalarExponent> struct scalar_pow_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_hypot_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
@@ -213,11 +216,27 @@ template<typename Scalar> struct scalar_lgamma_op;
template<typename Scalar> struct scalar_digamma_op;
template<typename Scalar> struct scalar_erf_op;
template<typename Scalar> struct scalar_erfc_op;
+template<typename Scalar> struct scalar_ndtri_op;
template<typename Scalar> struct scalar_igamma_op;
template<typename Scalar> struct scalar_igammac_op;
template<typename Scalar> struct scalar_zeta_op;
template<typename Scalar> struct scalar_betainc_op;
+// Bessel functions in SpecialFunctions module
+template<typename Scalar> struct scalar_bessel_i0_op;
+template<typename Scalar> struct scalar_bessel_i0e_op;
+template<typename Scalar> struct scalar_bessel_i1_op;
+template<typename Scalar> struct scalar_bessel_i1e_op;
+template<typename Scalar> struct scalar_bessel_j0_op;
+template<typename Scalar> struct scalar_bessel_y0_op;
+template<typename Scalar> struct scalar_bessel_j1_op;
+template<typename Scalar> struct scalar_bessel_y1_op;
+template<typename Scalar> struct scalar_bessel_k0_op;
+template<typename Scalar> struct scalar_bessel_k0e_op;
+template<typename Scalar> struct scalar_bessel_k1_op;
+template<typename Scalar> struct scalar_bessel_k1e_op;
+
+
} // end namespace internal
struct IOFormat;
@@ -255,6 +274,7 @@ template<typename MatrixType> class HouseholderQR;
template<typename MatrixType> class ColPivHouseholderQR;
template<typename MatrixType> class FullPivHouseholderQR;
template<typename MatrixType> class CompleteOrthogonalDecomposition;
+template<typename MatrixType> class SVDBase;
template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
template<typename MatrixType> class BDCSVD;
template<typename MatrixType, int UpLo = Lower> class LLT;
diff --git a/Eigen/src/Core/util/IndexedViewHelper.h b/Eigen/src/Core/util/IndexedViewHelper.h
new file mode 100644
index 000000000..f85de305f
--- /dev/null
+++ b/Eigen/src/Core/util/IndexedViewHelper.h
@@ -0,0 +1,186 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_INDEXED_VIEW_HELPER_H
+#define EIGEN_INDEXED_VIEW_HELPER_H
+
+namespace Eigen {
+
+namespace internal {
+struct symbolic_last_tag {};
+}
+
+/** \var last
+ * \ingroup Core_Module
+ *
+ * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last element/row/columns
+ * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&).
+ *
+ * This symbolic placeholder supports standard arithmetic operations.
+ *
+ * A typical usage example would be:
+ * \code
+ * using namespace Eigen;
+ * using Eigen::last;
+ * VectorXd v(n);
+ * v(seq(2,last-2)).setOnes();
+ * \endcode
+ *
+ * \sa end
+ */
+static const symbolic::SymbolExpr<internal::symbolic_last_tag> last; // PLEASE use Eigen::last instead of Eigen::placeholders::last
+
+/** \var lastp1
+ * \ingroup Core_Module
+ *
+ * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically
+ * reference the last+1 element/row/columns of the underlying vector or matrix once
+ * passed to DenseBase::operator()(const RowIndices&, const ColIndices&).
+ *
+ * This symbolic placeholder supports standard arithmetic operations.
+ * It is essentially an alias to last+fix<1>.
+ *
+ * \sa last
+ */
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+static const auto lastp1 = last+fix<1>;
+#else
+// Using a FixedExpr<1> expression is important here to make sure the compiler
+// can fully optimize the computation starting indices with zero overhead.
+static const symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > lastp1(last+fix<1>());
+#endif
+
+namespace internal {
+
+ // Replace symbolic last/end "keywords" by their true runtime value
+inline Index eval_expr_given_size(Index x, Index /* size */) { return x; }
+
+template<int N>
+FixedInt<N> eval_expr_given_size(FixedInt<N> x, Index /*size*/) { return x; }
+
+template<typename Derived>
+Index eval_expr_given_size(const symbolic::BaseExpr<Derived> &x, Index size)
+{
+ return x.derived().eval(last=size-1);
+}
+
+// Extract increment/step at compile time
+template<typename T, typename EnableIf = void> struct get_compile_time_incr {
+ enum { value = UndefinedIncr };
+};
+
+// Analogue of std::get<0>(x), but tailored for our needs.
+template<typename T>
+EIGEN_CONSTEXPR Index first(const T& x) EIGEN_NOEXCEPT { return x.first(); }
+
+// IndexedViewCompatibleType/makeIndexedViewCompatible turn an arbitrary object of type T into something usable by MatrixSlice
+// The generic implementation is a no-op
+template<typename T,int XprSize,typename EnableIf=void>
+struct IndexedViewCompatibleType {
+ typedef T type;
+};
+
+template<typename T,typename Q>
+const T& makeIndexedViewCompatible(const T& x, Index /*size*/, Q) { return x; }
+
+//--------------------------------------------------------------------------------
+// Handling of a single Index
+//--------------------------------------------------------------------------------
+
+struct SingleRange {
+ enum {
+ SizeAtCompileTime = 1
+ };
+ SingleRange(Index val) : m_value(val) {}
+ Index operator[](Index) const { return m_value; }
+ static EIGEN_CONSTEXPR Index size() EIGEN_NOEXCEPT { return 1; }
+ Index first() const EIGEN_NOEXCEPT { return m_value; }
+ Index m_value;
+};
+
+template<> struct get_compile_time_incr<SingleRange> {
+ enum { value = 1 }; // 1 or 0 ??
+};
+
+// Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operator[](int) methods)
+template<typename T, int XprSize>
+struct IndexedViewCompatibleType<T,XprSize,typename internal::enable_if<internal::is_integral<T>::value>::type> {
+ // Here we could simply use Array, but maybe it's less work for the compiler to use
+ // a simpler wrapper as SingleRange
+ //typedef Eigen::Array<Index,1,1> type;
+ typedef SingleRange type;
+};
+
+template<typename T, int XprSize>
+struct IndexedViewCompatibleType<T, XprSize, typename enable_if<symbolic::is_symbolic<T>::value>::type> {
+ typedef SingleRange type;
+};
+
+
+template<typename T>
+typename enable_if<symbolic::is_symbolic<T>::value,SingleRange>::type
+makeIndexedViewCompatible(const T& id, Index size, SpecializedType) {
+ return eval_expr_given_size(id,size);
+}
+
+//--------------------------------------------------------------------------------
+// Handling of all
+//--------------------------------------------------------------------------------
+
+struct all_t { all_t() {} };
+
+// Convert a symbolic 'all' into a usable range type
+template<int XprSize>
+struct AllRange {
+ enum { SizeAtCompileTime = XprSize };
+ AllRange(Index size = XprSize) : m_size(size) {}
+ EIGEN_CONSTEXPR Index operator[](Index i) const EIGEN_NOEXCEPT { return i; }
+ EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_size.value(); }
+ EIGEN_CONSTEXPR Index first() const EIGEN_NOEXCEPT { return 0; }
+ variable_if_dynamic<Index,XprSize> m_size;
+};
+
+template<int XprSize>
+struct IndexedViewCompatibleType<all_t,XprSize> {
+ typedef AllRange<XprSize> type;
+};
+
+template<typename XprSizeType>
+inline AllRange<get_fixed_value<XprSizeType>::value> makeIndexedViewCompatible(all_t , XprSizeType size, SpecializedType) {
+ return AllRange<get_fixed_value<XprSizeType>::value>(size);
+}
+
+template<int Size> struct get_compile_time_incr<AllRange<Size> > {
+ enum { value = 1 };
+};
+
+} // end namespace internal
+
+
+/** \var all
+ * \ingroup Core_Module
+ * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or columns
+ */
+static const Eigen::internal::all_t all; // PLEASE use Eigen::all instead of Eigen::placeholders::all
+
+
+namespace placeholders {
+ typedef symbolic::SymbolExpr<internal::symbolic_last_tag> last_t;
+ typedef symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > end_t;
+ typedef Eigen::internal::all_t all_t;
+
+ EIGEN_DEPRECATED static const all_t all = Eigen::all; // PLEASE use Eigen::all instead of Eigen::placeholders::all
+ EIGEN_DEPRECATED static const last_t last = Eigen::last; // PLEASE use Eigen::last instead of Eigen::placeholders::last
+ EIGEN_DEPRECATED static const end_t end = Eigen::lastp1; // PLEASE use Eigen::lastp1 instead of Eigen::placeholders::end
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_INDEXED_VIEW_HELPER_H
diff --git a/Eigen/src/Core/util/IntegralConstant.h b/Eigen/src/Core/util/IntegralConstant.h
new file mode 100644
index 000000000..945d426ea
--- /dev/null
+++ b/Eigen/src/Core/util/IntegralConstant.h
@@ -0,0 +1,272 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_INTEGRAL_CONSTANT_H
+#define EIGEN_INTEGRAL_CONSTANT_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<int N> class FixedInt;
+template<int N> class VariableAndFixedInt;
+
+/** \internal
+ * \class FixedInt
+ *
+ * This class embeds a compile-time integer \c N.
+ *
+ * It is similar to c++11 std::integral_constant<int,N> but with some additional features
+ * such as:
+ * - implicit conversion to int
+ * - arithmetic and some bitwise operators: -, +, *, /, %, &, |
+ * - c++98/14 compatibility with fix<N> and fix<N>() syntax to define integral constants.
+ *
+ * It is strongly discouraged to directly deal with this class FixedInt. Instances are expcected to
+ * be created by the user using Eigen::fix<N> or Eigen::fix<N>(). In C++98-11, the former syntax does
+ * not create a FixedInt<N> instance but rather a point to function that needs to be \em cleaned-up
+ * using the generic helper:
+ * \code
+ * internal::cleanup_index_type<T>::type
+ * internal::cleanup_index_type<T,DynamicKey>::type
+ * \endcode
+ * where T can a FixedInt<N>, a pointer to function FixedInt<N> (*)(), or numerous other integer-like representations.
+ * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values.
+ *
+ * For convenience, you can extract the compile-time value \c N in a generic way using the following helper:
+ * \code
+ * internal::get_fixed_value<T,DefaultVal>::value
+ * \endcode
+ * that will give you \c N if T equals FixedInt<N> or FixedInt<N> (*)(), and \c DefaultVal if T does not embed any compile-time value (e.g., T==int).
+ *
+ * \sa fix<N>, class VariableAndFixedInt
+ */
+template<int N> class FixedInt
+{
+public:
+ static const int value = N;
+ EIGEN_CONSTEXPR operator int() const { return value; }
+ FixedInt() {}
+ FixedInt( VariableAndFixedInt<N> other) {
+ #ifndef EIGEN_INTERNAL_DEBUGGING
+ EIGEN_UNUSED_VARIABLE(other);
+ #endif
+ eigen_internal_assert(int(other)==N);
+ }
+
+ FixedInt<-N> operator-() const { return FixedInt<-N>(); }
+ template<int M>
+ FixedInt<N+M> operator+( FixedInt<M>) const { return FixedInt<N+M>(); }
+ template<int M>
+ FixedInt<N-M> operator-( FixedInt<M>) const { return FixedInt<N-M>(); }
+ template<int M>
+ FixedInt<N*M> operator*( FixedInt<M>) const { return FixedInt<N*M>(); }
+ template<int M>
+ FixedInt<N/M> operator/( FixedInt<M>) const { return FixedInt<N/M>(); }
+ template<int M>
+ FixedInt<N%M> operator%( FixedInt<M>) const { return FixedInt<N%M>(); }
+ template<int M>
+ FixedInt<N|M> operator|( FixedInt<M>) const { return FixedInt<N|M>(); }
+ template<int M>
+ FixedInt<N&M> operator&( FixedInt<M>) const { return FixedInt<N&M>(); }
+
+#if EIGEN_HAS_CXX14_VARIABLE_TEMPLATES
+ // Needed in C++14 to allow fix<N>():
+ FixedInt operator() () const { return *this; }
+
+ VariableAndFixedInt<N> operator() (int val) const { return VariableAndFixedInt<N>(val); }
+#else
+ FixedInt ( FixedInt<N> (*)() ) {}
+#endif
+
+#if EIGEN_HAS_CXX11
+ FixedInt(std::integral_constant<int,N>) {}
+#endif
+};
+
+/** \internal
+ * \class VariableAndFixedInt
+ *
+ * This class embeds both a compile-time integer \c N and a runtime integer.
+ * Both values are supposed to be equal unless the compile-time value \c N has a special
+ * value meaning that the runtime-value should be used. Depending on the context, this special
+ * value can be either Eigen::Dynamic (for positive quantities) or Eigen::DynamicIndex (for
+ * quantities that can be negative).
+ *
+ * It is the return-type of the function Eigen::fix<N>(int), and most of the time this is the only
+ * way it is used. It is strongly discouraged to directly deal with instances of VariableAndFixedInt.
+ * Indeed, in order to write generic code, it is the responsibility of the callee to properly convert
+ * it to either a true compile-time quantity (i.e. a FixedInt<N>), or to a runtime quantity (e.g., an Index)
+ * using the following generic helper:
+ * \code
+ * internal::cleanup_index_type<T>::type
+ * internal::cleanup_index_type<T,DynamicKey>::type
+ * \endcode
+ * where T can be a template instantiation of VariableAndFixedInt or numerous other integer-like representations.
+ * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values.
+ *
+ * For convenience, you can also extract the compile-time value \c N using the following helper:
+ * \code
+ * internal::get_fixed_value<T,DefaultVal>::value
+ * \endcode
+ * that will give you \c N if T equals VariableAndFixedInt<N>, and \c DefaultVal if T does not embed any compile-time value (e.g., T==int).
+ *
+ * \sa fix<N>(int), class FixedInt
+ */
+template<int N> class VariableAndFixedInt
+{
+public:
+ static const int value = N;
+ operator int() const { return m_value; }
+ VariableAndFixedInt(int val) { m_value = val; }
+protected:
+ int m_value;
+};
+
+template<typename T, int Default=Dynamic> struct get_fixed_value {
+ static const int value = Default;
+};
+
+template<int N,int Default> struct get_fixed_value<FixedInt<N>,Default> {
+ static const int value = N;
+};
+
+#if !EIGEN_HAS_CXX14
+template<int N,int Default> struct get_fixed_value<FixedInt<N> (*)(),Default> {
+ static const int value = N;
+};
+#endif
+
+template<int N,int Default> struct get_fixed_value<VariableAndFixedInt<N>,Default> {
+ static const int value = N ;
+};
+
+template<typename T, int N, int Default>
+struct get_fixed_value<variable_if_dynamic<T,N>,Default> {
+ static const int value = N;
+};
+
+template<typename T> EIGEN_DEVICE_FUNC Index get_runtime_value(const T &x) { return x; }
+#if !EIGEN_HAS_CXX14
+template<int N> EIGEN_DEVICE_FUNC Index get_runtime_value(FixedInt<N> (*)()) { return N; }
+#endif
+
+// Cleanup integer/FixedInt/VariableAndFixedInt/etc types:
+
+// By default, no cleanup:
+template<typename T, int DynamicKey=Dynamic, typename EnableIf=void> struct cleanup_index_type { typedef T type; };
+
+// Convert any integral type (e.g., short, int, unsigned int, etc.) to Eigen::Index
+template<typename T, int DynamicKey> struct cleanup_index_type<T,DynamicKey,typename internal::enable_if<internal::is_integral<T>::value>::type> { typedef Index type; };
+
+#if !EIGEN_HAS_CXX14
+// In c++98/c++11, fix<N> is a pointer to function that we better cleanup to a true FixedInt<N>:
+template<int N, int DynamicKey> struct cleanup_index_type<FixedInt<N> (*)(), DynamicKey> { typedef FixedInt<N> type; };
+#endif
+
+// If VariableAndFixedInt does not match DynamicKey, then we turn it to a pure compile-time value:
+template<int N, int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<N>, DynamicKey> { typedef FixedInt<N> type; };
+// If VariableAndFixedInt matches DynamicKey, then we turn it to a pure runtime-value (aka Index):
+template<int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<DynamicKey>, DynamicKey> { typedef Index type; };
+
+#if EIGEN_HAS_CXX11
+template<int N, int DynamicKey> struct cleanup_index_type<std::integral_constant<int,N>, DynamicKey> { typedef FixedInt<N> type; };
+#endif
+
+} // end namespace internal
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+
+#if EIGEN_HAS_CXX14_VARIABLE_TEMPLATES
+template<int N>
+static const internal::FixedInt<N> fix{};
+#else
+template<int N>
+inline internal::FixedInt<N> fix() { return internal::FixedInt<N>(); }
+
+// The generic typename T is mandatory. Otherwise, a code like fix<N> could refer to either the function above or this next overload.
+// This way a code like fix<N> can only refer to the previous function.
+template<int N,typename T>
+inline internal::VariableAndFixedInt<N> fix(T val) { return internal::VariableAndFixedInt<N>(internal::convert_index<int>(val)); }
+#endif
+
+#else // EIGEN_PARSED_BY_DOXYGEN
+
+/** \var fix<N>()
+ * \ingroup Core_Module
+ *
+ * This \em identifier permits to construct an object embedding a compile-time integer \c N.
+ *
+ * \tparam N the compile-time integer value
+ *
+ * It is typically used in conjunction with the Eigen::seq and Eigen::seqN functions to pass compile-time values to them:
+ * \code
+ * seqN(10,fix<4>,fix<-3>) // <=> [10 7 4 1]
+ * \endcode
+ *
+ * See also the function fix(int) to pass both a compile-time and runtime value.
+ *
+ * In c++14, it is implemented as:
+ * \code
+ * template<int N> static const internal::FixedInt<N> fix{};
+ * \endcode
+ * where internal::FixedInt<N> is an internal template class similar to
+ * <a href="http://en.cppreference.com/w/cpp/types/integral_constant">\c std::integral_constant </a><tt> <int,N> </tt>
+ * Here, \c fix<N> is thus an object of type \c internal::FixedInt<N>.
+ *
+ * In c++98/11, it is implemented as a function:
+ * \code
+ * template<int N> inline internal::FixedInt<N> fix();
+ * \endcode
+ * Here internal::FixedInt<N> is thus a pointer to function.
+ *
+ * If for some reason you want a true object in c++98 then you can write: \code fix<N>() \endcode which is also valid in c++14.
+ *
+ * \sa fix<N>(int), seq, seqN
+ */
+template<int N>
+static const auto fix();
+
+/** \fn fix<N>(int)
+ * \ingroup Core_Module
+ *
+ * This function returns an object embedding both a compile-time integer \c N, and a fallback runtime value \a val.
+ *
+ * \tparam N the compile-time integer value
+ * \param val the fallback runtime integer value
+ *
+ * This function is a more general version of the \ref fix identifier/function that can be used in template code
+ * where the compile-time value could turn out to actually mean "undefined at compile-time". For positive integers
+ * such as a size or a dimension, this case is identified by Eigen::Dynamic, whereas runtime signed integers
+ * (e.g., an increment/stride) are identified as Eigen::DynamicIndex. In such a case, the runtime value \a val
+ * will be used as a fallback.
+ *
+ * A typical use case would be:
+ * \code
+ * template<typename Derived> void foo(const MatrixBase<Derived> &mat) {
+ * const int N = Derived::RowsAtCompileTime==Dynamic ? Dynamic : Derived::RowsAtCompileTime/2;
+ * const int n = mat.rows()/2;
+ * ... mat( seqN(0,fix<N>(n) ) ...;
+ * }
+ * \endcode
+ * In this example, the function Eigen::seqN knows that the second argument is expected to be a size.
+ * If the passed compile-time value N equals Eigen::Dynamic, then the proxy object returned by fix will be dissmissed, and converted to an Eigen::Index of value \c n.
+ * Otherwise, the runtime-value \c n will be dissmissed, and the returned ArithmeticSequence will be of the exact same type as <tt> seqN(0,fix<N>) </tt>.
+ *
+ * \sa fix, seqN, class ArithmeticSequence
+ */
+template<int N>
+static const auto fix(int val);
+
+#endif // EIGEN_PARSED_BY_DOXYGEN
+
+} // end namespace Eigen
+
+#endif // EIGEN_INTEGRAL_CONSTANT_H
diff --git a/Eigen/src/Core/util/MKL_support.h b/Eigen/src/Core/util/MKL_support.h
index 26b59669e..17963fad4 100755
--- a/Eigen/src/Core/util/MKL_support.h
+++ b/Eigen/src/Core/util/MKL_support.h
@@ -49,12 +49,17 @@
#define EIGEN_USE_LAPACKE
#endif
-#if defined(EIGEN_USE_MKL_VML)
+#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL)
#define EIGEN_USE_MKL
#endif
+
#if defined EIGEN_USE_MKL
-# include <mkl.h>
+# if (!defined MKL_DIRECT_CALL) && (!defined EIGEN_MKL_NO_DIRECT_CALL)
+# define MKL_DIRECT_CALL
+# define MKL_DIRECT_CALL_JUST_SET
+# endif
+# include <mkl.h>
/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
# ifndef INTEL_MKL_VERSION
# undef EIGEN_USE_MKL /* INTEL_MKL_VERSION is not even defined on older versions */
@@ -68,6 +73,9 @@
# undef EIGEN_USE_MKL_VML
# undef EIGEN_USE_LAPACKE_STRICT
# undef EIGEN_USE_LAPACKE
+# ifdef MKL_DIRECT_CALL_JUST_SET
+# undef MKL_DIRECT_CALL
+# endif
# endif
#endif
@@ -108,6 +116,10 @@
#endif
#endif
+#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL)
+#include "../../misc/blas.h"
+#endif
+
namespace Eigen {
typedef std::complex<double> dcomplex;
@@ -121,8 +133,5 @@ typedef int BlasIndex;
} // end namespace Eigen
-#if defined(EIGEN_USE_BLAS)
-#include "../../misc/blas.h"
-#endif
#endif // EIGEN_MKL_SUPPORT_H
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 38d6ddb9a..986c3d44d 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -11,19 +11,56 @@
#ifndef EIGEN_MACROS_H
#define EIGEN_MACROS_H
+//------------------------------------------------------------------------------------------
+// Eigen version and basic defaults
+//------------------------------------------------------------------------------------------
+
#define EIGEN_WORLD_VERSION 3
-#define EIGEN_MAJOR_VERSION 3
-#define EIGEN_MINOR_VERSION 4
+#define EIGEN_MAJOR_VERSION 4
+#define EIGEN_MINOR_VERSION 0
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
EIGEN_MINOR_VERSION>=z))))
+#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
+#else
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor
+#endif
+
+#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
+#endif
+
+// Upperbound on the C++ version to use.
+// Expected values are 03, 11, 14, 17, etc.
+// By default, let's use an arbitrarily large C++ version.
+#ifndef EIGEN_MAX_CPP_VER
+#define EIGEN_MAX_CPP_VER 99
+#endif
+
+/** Allows to disable some optimizations which might affect the accuracy of the result.
+ * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
+ * They currently include:
+ * - single precision ArrayBase::sin() and ArrayBase::cos() for SSE and AVX vectorization.
+ */
+#ifndef EIGEN_FAST_MATH
+#define EIGEN_FAST_MATH 1
+#endif
+
+#ifndef EIGEN_STACK_ALLOCATION_LIMIT
+// 131072 == 128 KB
+#define EIGEN_STACK_ALLOCATION_LIMIT 131072
+#endif
+
+//------------------------------------------------------------------------------------------
// Compiler identification, EIGEN_COMP_*
+//------------------------------------------------------------------------------------------
/// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC
#ifdef __GNUC__
- #define EIGEN_COMP_GNUC 1
+ #define EIGEN_COMP_GNUC (__GNUC__*10+__GNUC_MINOR__)
#else
#define EIGEN_COMP_GNUC 0
#endif
@@ -35,6 +72,12 @@
#define EIGEN_COMP_CLANG 0
#endif
+/// \internal EIGEN_COMP_CASTXML set to 1 if being preprocessed by CastXML
+#if defined(__castxml__)
+ #define EIGEN_COMP_CASTXML 1
+#else
+ #define EIGEN_COMP_CASTXML 0
+#endif
/// \internal EIGEN_COMP_LLVM set to 1 if the compiler backend is llvm
#if defined(__llvm__)
@@ -71,14 +114,44 @@
#define EIGEN_COMP_MSVC 0
#endif
+#if defined(__NVCC__)
+#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
+ #define EIGEN_COMP_NVCC ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
+#elif defined(__CUDACC_VER__)
+ #define EIGEN_COMP_NVCC __CUDACC_VER__
+#else
+ #error "NVCC did not define compiler version."
+#endif
+#else
+ #define EIGEN_COMP_NVCC 0
+#endif
+
// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC:
-// name ver MSC_VER
-// 2008 9 1500
-// 2010 10 1600
-// 2012 11 1700
-// 2013 12 1800
-// 2015 14 1900
-// "15" 15 1900
+// name ver MSC_VER
+// 2008 9 1500
+// 2010 10 1600
+// 2012 11 1700
+// 2013 12 1800
+// 2015 14 1900
+// "15" 15 1900
+// 2017-14.1 15.0 1910
+// 2017-14.11 15.3 1911
+// 2017-14.12 15.5 1912
+// 2017-14.13 15.6 1913
+// 2017-14.14 15.7 1914
+
+/// \internal EIGEN_COMP_MSVC_LANG set to _MSVC_LANG if the compiler is Microsoft Visual C++, 0 otherwise.
+#if defined(_MSVC_LANG)
+ #define EIGEN_COMP_MSVC_LANG _MSVC_LANG
+#else
+ #define EIGEN_COMP_MSVC_LANG 0
+#endif
+
+// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC_LANG:
+// MSVC option Standard MSVC_LANG
+// /std:c++14 (default as of VS 2019) C++14 201402L
+// /std:c++17 C++17 201703L
+// /std:c++latest >C++17 >201703L
/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC or clang-cl
#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG)
@@ -87,16 +160,21 @@
#define EIGEN_COMP_MSVC_STRICT 0
#endif
-/// \internal EIGEN_COMP_IBM set to 1 if the compiler is IBM XL C++
-#if defined(__IBMCPP__) || defined(__xlc__)
- #define EIGEN_COMP_IBM 1
+/// \internal EIGEN_COMP_IBM set to xlc version if the compiler is IBM XL C++
+// XLC version
+// 3.1 0x0301
+// 4.5 0x0405
+// 5.0 0x0500
+// 12.1 0x0C01
+#if defined(__IBMCPP__) || defined(__xlc__) || defined(__ibmxl__)
+ #define EIGEN_COMP_IBM __xlC__
#else
#define EIGEN_COMP_IBM 0
#endif
-/// \internal EIGEN_COMP_PGI set to 1 if the compiler is Portland Group Compiler
+/// \internal EIGEN_COMP_PGI set to PGI version if the compiler is Portland Group Compiler
#if defined(__PGI)
- #define EIGEN_COMP_PGI 1
+ #define EIGEN_COMP_PGI (__PGIC__*100+__PGIC_MINOR__)
#else
#define EIGEN_COMP_PGI 0
#endif
@@ -108,7 +186,7 @@
#define EIGEN_COMP_ARM 0
#endif
-/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler
+/// \internal EIGEN_COMP_EMSCRIPTEN set to 1 if the compiler is Emscripten Compiler
#if defined(__EMSCRIPTEN__)
#define EIGEN_COMP_EMSCRIPTEN 1
#else
@@ -142,9 +220,13 @@
#endif
+
+//------------------------------------------------------------------------------------------
// Architecture identification, EIGEN_ARCH_*
+//------------------------------------------------------------------------------------------
+
-#if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
+#if defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC)) || defined(__amd64)
#define EIGEN_ARCH_x86_64 1
#else
#define EIGEN_ARCH_x86_64 0
@@ -170,18 +252,61 @@
#endif
/// \internal EIGEN_ARCH_ARM64 set to 1 if the architecture is ARM64
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#define EIGEN_ARCH_ARM64 1
#else
#define EIGEN_ARCH_ARM64 0
#endif
+/// \internal EIGEN_ARCH_ARM_OR_ARM64 set to 1 if the architecture is ARM or ARM64
#if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64
#define EIGEN_ARCH_ARM_OR_ARM64 1
#else
#define EIGEN_ARCH_ARM_OR_ARM64 0
#endif
+/// \internal EIGEN_ARCH_ARMV8 set to 1 if the architecture is armv8 or greater.
+#if EIGEN_ARCH_ARM_OR_ARM64 && defined(__ARM_ARCH) && __ARM_ARCH >= 8
+#define EIGEN_ARCH_ARMV8 1
+#else
+#define EIGEN_ARCH_ARMV8 0
+#endif
+
+
+/// \internal EIGEN_HAS_ARM64_FP16 set to 1 if the architecture provides an IEEE
+/// compliant Arm fp16 type
+#if EIGEN_ARCH_ARM64
+ #ifndef EIGEN_HAS_ARM64_FP16
+ #if defined(__ARM_FP16_FORMAT_IEEE)
+ #define EIGEN_HAS_ARM64_FP16 1
+ #else
+ #define EIGEN_HAS_ARM64_FP16 0
+ #endif
+ #endif
+#endif
+
+/// \internal EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC set to 1 if the architecture
+/// supports Neon vector intrinsics for fp16.
+#if EIGEN_ARCH_ARM64
+ #ifndef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+ #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
+ #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 1
+ #else
+ #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 0
+ #endif
+ #endif
+#endif
+
+/// \internal EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC set to 1 if the architecture
+/// supports Neon scalar intrinsics for fp16.
+#if EIGEN_ARCH_ARM64
+ #ifndef EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC
+ #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC)
+ #define EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC 1
+ #endif
+ #endif
+#endif
+
/// \internal EIGEN_ARCH_MIPS set to 1 if the architecture is MIPS
#if defined(__mips__) || defined(__mips)
#define EIGEN_ARCH_MIPS 1
@@ -212,7 +337,9 @@
+//------------------------------------------------------------------------------------------
// Operating system identification, EIGEN_OS_*
+//------------------------------------------------------------------------------------------
/// \internal EIGEN_OS_UNIX set to 1 if the OS is a unix variant
#if defined(__unix__) || defined(__unix)
@@ -299,9 +426,17 @@
#define EIGEN_OS_WIN_STRICT 0
#endif
-/// \internal EIGEN_OS_SUN set to 1 if the OS is SUN
+/// \internal EIGEN_OS_SUN set to __SUNPRO_C if the OS is SUN
+// compiler solaris __SUNPRO_C
+// version studio
+// 5.7 10 0x570
+// 5.8 11 0x580
+// 5.9 12 0x590
+// 5.10 12.1 0x5100
+// 5.11 12.2 0x5110
+// 5.12 12.3 0x5120
#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__))
- #define EIGEN_OS_SUN 1
+ #define EIGEN_OS_SUN __SUNPRO_C
#else
#define EIGEN_OS_SUN 0
#endif
@@ -314,26 +449,137 @@
#endif
+//------------------------------------------------------------------------------------------
+// Detect GPU compilers and architectures
+//------------------------------------------------------------------------------------------
-#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG
- // see bug 89
- #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
-#else
- #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
+// NVCC is not supported as the target platform for HIPCC
+// Note that this also makes EIGEN_CUDACC and EIGEN_HIPCC mutually exclusive
+#if defined(__NVCC__) && defined(__HIPCC__)
+ #error "NVCC as the target platform for HIPCC is currently not supported."
#endif
-// This macro can be used to prevent from macro expansion, e.g.:
-// std::max EIGEN_NOT_A_MACRO(a,b)
-#define EIGEN_NOT_A_MACRO
+#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
+ // Means the compiler is either nvcc or clang with CUDA enabled
+ #define EIGEN_CUDACC __CUDACC__
+#endif
-#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
-#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
+#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)
+ // Means we are generating code for the device
+ #define EIGEN_CUDA_ARCH __CUDA_ARCH__
+#endif
+
+#if defined(EIGEN_CUDACC)
+#include <cuda.h>
+ #define EIGEN_CUDA_SDK_VER (CUDA_VERSION * 10)
#else
-#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor
+ #define EIGEN_CUDA_SDK_VER 0
#endif
-#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
+#if defined(__HIPCC__) && !defined(EIGEN_NO_HIP)
+ // Means the compiler is HIPCC (analogous to EIGEN_CUDACC, but for HIP)
+ #define EIGEN_HIPCC __HIPCC__
+
+ // We need to include hip_runtime.h here because it pulls in
+ // ++ hip_common.h which contains the define for __HIP_DEVICE_COMPILE__
+ // ++ host_defines.h which contains the defines for the __host__ and __device__ macros
+ #include <hip/hip_runtime.h>
+
+ #if defined(__HIP_DEVICE_COMPILE__)
+ // analogous to EIGEN_CUDA_ARCH, but for HIP
+ #define EIGEN_HIP_DEVICE_COMPILE __HIP_DEVICE_COMPILE__
+ #endif
+
+ // For HIP (ROCm 3.5 and higher), we need to explicitly set the launch_bounds attribute
+ // value to 1024. The compiler assigns a default value of 256 when the attribute is not
+ // specified. This results in failures on the HIP platform, for cases when a GPU kernel
+ // without an explicit launch_bounds attribute is called with a threads_per_block value
+ // greater than 256.
+ //
+ // This is a regression in functioanlity and is expected to be fixed within the next
+ // couple of ROCm releases (compiler will go back to using 1024 value as the default)
+ //
+ // In the meantime, we will use a "only enabled for HIP" macro to set the launch_bounds
+ // attribute.
+
+ #define EIGEN_HIP_LAUNCH_BOUNDS_1024 __launch_bounds__(1024)
+
+#endif
+
+#if !defined(EIGEN_HIP_LAUNCH_BOUNDS_1024)
+#define EIGEN_HIP_LAUNCH_BOUNDS_1024
+#endif // !defined(EIGEN_HIP_LAUNCH_BOUNDS_1024)
+
+// Unify CUDA/HIPCC
+
+#if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)
+//
+// If either EIGEN_CUDACC or EIGEN_HIPCC is defined, then define EIGEN_GPUCC
+//
+#define EIGEN_GPUCC
+//
+// EIGEN_HIPCC implies the HIP compiler and is used to tweak Eigen code for use in HIP kernels
+// EIGEN_CUDACC implies the CUDA compiler and is used to tweak Eigen code for use in CUDA kernels
+//
+// In most cases the same tweaks are required to the Eigen code to enable in both the HIP and CUDA kernels.
+// For those cases, the corresponding code should be guarded with
+// #if defined(EIGEN_GPUCC)
+// instead of
+// #if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC)
+//
+// For cases where the tweak is specific to HIP, the code should be guarded with
+// #if defined(EIGEN_HIPCC)
+//
+// For cases where the tweak is specific to CUDA, the code should be guarded with
+// #if defined(EIGEN_CUDACC)
+//
+#endif
+
+#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
+//
+// If either EIGEN_CUDA_ARCH or EIGEN_HIP_DEVICE_COMPILE is defined, then define EIGEN_GPU_COMPILE_PHASE
+//
+#define EIGEN_GPU_COMPILE_PHASE
+//
+// GPU compilers (HIPCC, NVCC) typically do two passes over the source code,
+// + one to compile the source for the "host" (ie CPU)
+// + another to compile the source for the "device" (ie. GPU)
+//
+// Code that needs to enabled only during the either the "host" or "device" compilation phase
+// needs to be guarded with a macro that indicates the current compilation phase
+//
+// EIGEN_HIP_DEVICE_COMPILE implies the device compilation phase in HIP
+// EIGEN_CUDA_ARCH implies the device compilation phase in CUDA
+//
+// In most cases, the "host" / "device" specific code is the same for both HIP and CUDA
+// For those cases, the code should be guarded with
+// #if defined(EIGEN_GPU_COMPILE_PHASE)
+// instead of
+// #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE)
+//
+// For cases where the tweak is specific to HIP, the code should be guarded with
+// #if defined(EIGEN_HIP_DEVICE_COMPILE)
+//
+// For cases where the tweak is specific to CUDA, the code should be guarded with
+// #if defined(EIGEN_CUDA_ARCH)
+//
+#endif
+
+#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+// EIGEN_USE_SYCL is a user-defined macro while __SYCL_DEVICE_ONLY__ is a compiler-defined macro.
+// In most cases we want to check if both macros are defined which can be done using the define below.
+#define SYCL_DEVICE_ONLY
+#endif
+
+//------------------------------------------------------------------------------------------
+// Detect Compiler/Architecture/OS specific features
+//------------------------------------------------------------------------------------------
+
+#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG
+ // see bug 89
+ #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
+#else
+ #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
#endif
// Cross compiler wrapper around LLVM's __has_builtin
@@ -349,26 +595,79 @@
# define __has_feature(x) 0
#endif
-// Upperbound on the C++ version to use.
-// Expected values are 03, 11, 14, 17, etc.
-// By default, let's use an arbitrarily large C++ version.
-#ifndef EIGEN_MAX_CPP_VER
-#define EIGEN_MAX_CPP_VER 99
+// Some old compilers do not support template specializations like:
+// template<typename T,int N> void foo(const T x[N]);
+#if !( EIGEN_COMP_CLANG && ( (EIGEN_COMP_CLANG<309) \
+ || (defined(__apple_build_version__) && (__apple_build_version__ < 9000000))) \
+ || EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<49)
+#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 1
+#else
+#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 0
#endif
-#if EIGEN_MAX_CPP_VER>=11 && (defined(__cplusplus) && (__cplusplus >= 201103L) || EIGEN_COMP_MSVC >= 1900)
+// The macro EIGEN_CPLUSPLUS is a replacement for __cplusplus/_MSVC_LANG that
+// works for both platforms, indicating the C++ standard version number.
+//
+// With MSVC, without defining /Zc:__cplusplus, the __cplusplus macro will
+// report 199711L regardless of the language standard specified via /std.
+// We need to rely on _MSVC_LANG instead, which is only available after
+// VS2015.3.
+#if EIGEN_COMP_MSVC_LANG > 0
+#define EIGEN_CPLUSPLUS EIGEN_COMP_MSVC_LANG
+#elif EIGEN_COMP_MSVC >= 1900
+#define EIGEN_CPLUSPLUS 201103L
+#elif defined(__cplusplus)
+#define EIGEN_CPLUSPLUS __cplusplus
+#else
+#define EIGEN_CPLUSPLUS 0
+#endif
+
+// The macro EIGEN_COMP_CXXVER defines the c++ verson expected by the compiler.
+// For instance, if compiling with gcc and -std=c++17, then EIGEN_COMP_CXXVER
+// is defined to 17.
+#if EIGEN_CPLUSPLUS > 201703L
+ #define EIGEN_COMP_CXXVER 20
+#elif EIGEN_CPLUSPLUS > 201402L
+ #define EIGEN_COMP_CXXVER 17
+#elif EIGEN_CPLUSPLUS > 201103L
+ #define EIGEN_COMP_CXXVER 14
+#elif EIGEN_CPLUSPLUS >= 201103L
+ #define EIGEN_COMP_CXXVER 11
+#else
+ #define EIGEN_COMP_CXXVER 03
+#endif
+
+#ifndef EIGEN_HAS_CXX14_VARIABLE_TEMPLATES
+ #if defined(__cpp_variable_templates) && __cpp_variable_templates >= 201304 && EIGEN_MAX_CPP_VER>=14
+ #define EIGEN_HAS_CXX14_VARIABLE_TEMPLATES 1
+ #else
+ #define EIGEN_HAS_CXX14_VARIABLE_TEMPLATES 0
+ #endif
+#endif
+
+
+// The macros EIGEN_HAS_CXX?? defines a rough estimate of available c++ features
+// but in practice we should not rely on them but rather on the availabilty of
+// individual features as defined later.
+// This is why there is no EIGEN_HAS_CXX17.
+// FIXME: get rid of EIGEN_HAS_CXX14 and maybe even EIGEN_HAS_CXX11.
+#if EIGEN_MAX_CPP_VER>=11 && EIGEN_COMP_CXXVER>=11
#define EIGEN_HAS_CXX11 1
#else
#define EIGEN_HAS_CXX11 0
#endif
+#if EIGEN_MAX_CPP_VER>=14 && EIGEN_COMP_CXXVER>=14
+#define EIGEN_HAS_CXX14 1
+#else
+#define EIGEN_HAS_CXX14 0
+#endif
// Do we support r-value references?
#ifndef EIGEN_HAS_RVALUE_REFERENCES
#if EIGEN_MAX_CPP_VER>=11 && \
(__has_feature(cxx_rvalue_references) || \
- (defined(__cplusplus) && __cplusplus >= 201103L) || \
- (EIGEN_COMP_MSVC >= 1600))
+ (EIGEN_COMP_CXXVER >= 11) || (EIGEN_COMP_MSVC >= 1600))
#define EIGEN_HAS_RVALUE_REFERENCES 1
#else
#define EIGEN_HAS_RVALUE_REFERENCES 0
@@ -376,11 +675,14 @@
#endif
// Does the compiler support C99?
+// Need to include <cmath> to make sure _GLIBCXX_USE_C99 gets defined
+#include <cmath>
#ifndef EIGEN_HAS_C99_MATH
#if EIGEN_MAX_CPP_VER>=11 && \
((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \
|| (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
- || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)))
+ || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \
+ || (EIGEN_COMP_MSVC >= 1900) || defined(SYCL_DEVICE_ONLY))
#define EIGEN_HAS_C99_MATH 1
#else
#define EIGEN_HAS_C99_MATH 0
@@ -388,21 +690,73 @@
#endif
// Does the compiler support result_of?
+// result_of was deprecated in c++17 and removed in c++ 20
#ifndef EIGEN_HAS_STD_RESULT_OF
-#if EIGEN_MAX_CPP_VER>=11 && ((__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L)))
+#if EIGEN_HAS_CXX11 && EIGEN_COMP_CXXVER < 17
#define EIGEN_HAS_STD_RESULT_OF 1
#else
#define EIGEN_HAS_STD_RESULT_OF 0
#endif
#endif
+// Does the compiler support std::hash?
+#ifndef EIGEN_HAS_STD_HASH
+// The std::hash struct is defined in C++11 but is not labelled as a __device__
+// function and is not constexpr, so cannot be used on device.
+#if EIGEN_HAS_CXX11 && !defined(EIGEN_GPU_COMPILE_PHASE)
+#define EIGEN_HAS_STD_HASH 1
+#else
+#define EIGEN_HAS_STD_HASH 0
+#endif
+#endif // EIGEN_HAS_STD_HASH
+
+#ifndef EIGEN_HAS_STD_INVOKE_RESULT
+#if EIGEN_MAX_CPP_VER >= 17 && EIGEN_COMP_CXXVER >= 17
+#define EIGEN_HAS_STD_INVOKE_RESULT 1
+#else
+#define EIGEN_HAS_STD_INVOKE_RESULT 0
+#endif
+#endif
+
+#ifndef EIGEN_HAS_ALIGNAS
+#if EIGEN_MAX_CPP_VER>=11 && EIGEN_HAS_CXX11 && \
+ ( __has_feature(cxx_alignas) \
+ || EIGEN_HAS_CXX14 \
+ || (EIGEN_COMP_MSVC >= 1800) \
+ || (EIGEN_GNUC_AT_LEAST(4,8)) \
+ || (EIGEN_COMP_CLANG>=305) \
+ || (EIGEN_COMP_ICC>=1500) \
+ || (EIGEN_COMP_PGI>=1500) \
+ || (EIGEN_COMP_SUNCC>=0x5130))
+#define EIGEN_HAS_ALIGNAS 1
+#else
+#define EIGEN_HAS_ALIGNAS 0
+#endif
+#endif
+
+// Does the compiler support type_traits?
+// - full support of type traits was added only to GCC 5.1.0.
+// - 20150626 corresponds to the last release of 4.x libstdc++
+#ifndef EIGEN_HAS_TYPE_TRAITS
+#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_HAS_CXX11 || EIGEN_COMP_MSVC >= 1700) \
+ && ((!EIGEN_COMP_GNUC_STRICT) || EIGEN_GNUC_AT_LEAST(5, 1)) \
+ && ((!defined(__GLIBCXX__)) || __GLIBCXX__ > 20150626)
+#define EIGEN_HAS_TYPE_TRAITS 1
+#define EIGEN_INCLUDE_TYPE_TRAITS
+#else
+#define EIGEN_HAS_TYPE_TRAITS 0
+#endif
+#endif
+
// Does the compiler support variadic templates?
#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
-#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
- && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
+#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_COMP_CXXVER >= 11) \
+ && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_COMP_NVCC >= 80000) )
// ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
// this prevents nvcc from crashing when compiling Eigen on Tegra X1
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
+#elif EIGEN_MAX_CPP_VER>=11 && (EIGEN_COMP_CXXVER >= 11) && defined(SYCL_DEVICE_ONLY)
+#define EIGEN_HAS_VARIADIC_TEMPLATES 1
#else
#define EIGEN_HAS_VARIADIC_TEMPLATES 0
#endif
@@ -410,27 +764,33 @@
// Does the compiler fully support const expressions? (as in c++14)
#ifndef EIGEN_HAS_CONSTEXPR
+ #if defined(EIGEN_CUDACC)
+ // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
+ #if EIGEN_MAX_CPP_VER>=14 && (EIGEN_COMP_CXXVER >= 11 && (EIGEN_COMP_CLANG || EIGEN_COMP_NVCC >= 70500))
+ #define EIGEN_HAS_CONSTEXPR 1
+ #endif
+ #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (EIGEN_COMP_CXXVER >= 14) || \
+ (EIGEN_GNUC_AT_LEAST(4,8) && (EIGEN_COMP_CXXVER >= 11)) || \
+ (EIGEN_COMP_CLANG >= 306 && (EIGEN_COMP_CXXVER >= 11)))
+ #define EIGEN_HAS_CONSTEXPR 1
+ #endif
-#ifdef __CUDACC__
-// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
-#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
- #define EIGEN_HAS_CONSTEXPR 1
-#endif
-#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
- (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)))
-#define EIGEN_HAS_CONSTEXPR 1
-#endif
+ #ifndef EIGEN_HAS_CONSTEXPR
+ #define EIGEN_HAS_CONSTEXPR 0
+ #endif
-#ifndef EIGEN_HAS_CONSTEXPR
-#define EIGEN_HAS_CONSTEXPR 0
-#endif
+#endif // EIGEN_HAS_CONSTEXPR
+#if EIGEN_HAS_CONSTEXPR
+#define EIGEN_CONSTEXPR constexpr
+#else
+#define EIGEN_CONSTEXPR
#endif
// Does the compiler support C++11 math?
// Let's be conservative and enable the default C++11 implementation only if we are sure it exists
#ifndef EIGEN_HAS_CXX11_MATH
- #if EIGEN_MAX_CPP_VER>=11 && ((__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
+ #if EIGEN_MAX_CPP_VER>=11 && ((EIGEN_COMP_CXXVER > 11) || (EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \
&& (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC))
#define EIGEN_HAS_CXX11_MATH 1
#else
@@ -441,9 +801,8 @@
// Does the compiler support proper C++11 containers?
#ifndef EIGEN_HAS_CXX11_CONTAINERS
#if EIGEN_MAX_CPP_VER>=11 && \
- ((__cplusplus > 201103L) \
- || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
- || EIGEN_COMP_MSVC >= 1900)
+ ((EIGEN_COMP_CXXVER > 11) \
+ || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC>=1400)))
#define EIGEN_HAS_CXX11_CONTAINERS 1
#else
#define EIGEN_HAS_CXX11_CONTAINERS 0
@@ -454,24 +813,88 @@
#ifndef EIGEN_HAS_CXX11_NOEXCEPT
#if EIGEN_MAX_CPP_VER>=11 && \
(__has_feature(cxx_noexcept) \
- || (__cplusplus > 201103L) \
- || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \
- || EIGEN_COMP_MSVC >= 1900)
+ || (EIGEN_COMP_CXXVER > 11) \
+ || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC>=1400)))
#define EIGEN_HAS_CXX11_NOEXCEPT 1
#else
#define EIGEN_HAS_CXX11_NOEXCEPT 0
#endif
#endif
-/** Allows to disable some optimizations which might affect the accuracy of the result.
- * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
- * They currently include:
- * - single precision ArrayBase::sin() and ArrayBase::cos() for SSE and AVX vectorization.
- */
-#ifndef EIGEN_FAST_MATH
-#define EIGEN_FAST_MATH 1
+#ifndef EIGEN_HAS_CXX11_ATOMIC
+ #if EIGEN_MAX_CPP_VER>=11 && \
+ (__has_feature(cxx_atomic) \
+ || (EIGEN_COMP_CXXVER > 11) \
+ || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_MSVC==0 || EIGEN_COMP_MSVC >= 1700)))
+ #define EIGEN_HAS_CXX11_ATOMIC 1
+ #else
+ #define EIGEN_HAS_CXX11_ATOMIC 0
+ #endif
+#endif
+
+#ifndef EIGEN_HAS_CXX11_OVERRIDE_FINAL
+ #if EIGEN_MAX_CPP_VER>=11 && \
+ (EIGEN_COMP_CXXVER >= 11 || EIGEN_COMP_MSVC >= 1700)
+ #define EIGEN_HAS_CXX11_OVERRIDE_FINAL 1
+ #else
+ #define EIGEN_HAS_CXX11_OVERRIDE_FINAL 0
+ #endif
+#endif
+
+// NOTE: the required Apple's clang version is very conservative
+// and it could be that XCode 9 works just fine.
+// NOTE: the MSVC version is based on https://en.cppreference.com/w/cpp/compiler_support
+// and not tested.
+#ifndef EIGEN_HAS_CXX17_OVERALIGN
+#if EIGEN_MAX_CPP_VER>=17 && EIGEN_COMP_CXXVER>=17 && ( \
+ (EIGEN_COMP_MSVC >= 1912) \
+ || (EIGEN_GNUC_AT_LEAST(7,0)) \
+ || ((!defined(__apple_build_version__)) && (EIGEN_COMP_CLANG>=500)) \
+ || (( defined(__apple_build_version__)) && (__apple_build_version__>=10000000)) \
+ )
+#define EIGEN_HAS_CXX17_OVERALIGN 1
+#else
+#define EIGEN_HAS_CXX17_OVERALIGN 0
+#endif
+#endif
+
+#if defined(EIGEN_CUDACC) && EIGEN_HAS_CONSTEXPR
+ // While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules
+ #if defined(__NVCC__)
+ // nvcc considers constexpr functions as __host__ __device__ with the option --expt-relaxed-constexpr
+ #ifdef __CUDACC_RELAXED_CONSTEXPR__
+ #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
+ #endif
+ #elif defined(__clang__) && defined(__CUDA__) && __has_feature(cxx_relaxed_constexpr)
+ // clang++ always considers constexpr functions as implicitly __host__ __device__
+ #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
+ #endif
#endif
+// Does the compiler support the __int128 and __uint128_t extensions for 128-bit
+// integer arithmetic?
+//
+// Clang and GCC define __SIZEOF_INT128__ when these extensions are supported,
+// but we avoid using them in certain cases:
+//
+// * Building using Clang for Windows, where the Clang runtime library has
+// 128-bit support only on LP64 architectures, but Windows is LLP64.
+#ifndef EIGEN_HAS_BUILTIN_INT128
+#if defined(__SIZEOF_INT128__) && !(EIGEN_OS_WIN && EIGEN_COMP_CLANG)
+#define EIGEN_HAS_BUILTIN_INT128 1
+#else
+#define EIGEN_HAS_BUILTIN_INT128 0
+#endif
+#endif
+
+//------------------------------------------------------------------------------------------
+// Preprocessor programming helpers
+//------------------------------------------------------------------------------------------
+
+// This macro can be used to prevent from macro expansion, e.g.:
+// std::max EIGEN_NOT_A_MACRO(a,b)
+#define EIGEN_NOT_A_MACRO
+
#define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl;
// concatenate two tokens
@@ -487,11 +910,13 @@
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
// but GCC is still doing fine with just inline.
-#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
+#ifndef EIGEN_STRONG_INLINE
+#if (EIGEN_COMP_MSVC || EIGEN_COMP_ICC) && !defined(EIGEN_GPUCC)
#define EIGEN_STRONG_INLINE __forceinline
#else
#define EIGEN_STRONG_INLINE inline
#endif
+#endif
// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
// attribute to maximize inlining. This should only be used when really necessary: in particular,
@@ -501,7 +926,7 @@
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
// : function body not available
// See also bug 1367
-#if EIGEN_GNUC_AT_LEAST(4,2)
+#if EIGEN_GNUC_AT_LEAST(4,2) && !defined(SYCL_DEVICE_ONLY)
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
#else
#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
@@ -521,12 +946,43 @@
#define EIGEN_PERMISSIVE_EXPR
#endif
+// GPU stuff
+
+// Disable some features when compiling with GPU compilers (NVCC/clang-cuda/SYCL/HIPCC)
+#if defined(EIGEN_CUDACC) || defined(SYCL_DEVICE_ONLY) || defined(EIGEN_HIPCC)
+ // Do not try asserts on device code
+ #ifndef EIGEN_NO_DEBUG
+ #define EIGEN_NO_DEBUG
+ #endif
+
+ #ifdef EIGEN_INTERNAL_DEBUGGING
+ #undef EIGEN_INTERNAL_DEBUGGING
+ #endif
+
+ #ifdef EIGEN_EXCEPTIONS
+ #undef EIGEN_EXCEPTIONS
+ #endif
+#endif
+
+#if defined(SYCL_DEVICE_ONLY)
+ #ifndef EIGEN_DONT_VECTORIZE
+ #define EIGEN_DONT_VECTORIZE
+ #endif
+ #define EIGEN_DEVICE_FUNC __attribute__((flatten)) __attribute__((always_inline))
+// All functions callable from CUDA/HIP code must be qualified with __device__
+#elif defined(EIGEN_GPUCC)
+ #define EIGEN_DEVICE_FUNC __host__ __device__
+#else
+ #define EIGEN_DEVICE_FUNC
+#endif
+
+
// this macro allows to get rid of linking errors about multiply defined functions.
// - static is not very good because it prevents definitions from different object files to be merged.
// So static causes the resulting linked executable to be bloated with multiple copies of the same function.
// - inline is not perfect either as it unwantedly hints the compiler toward inlining the function.
-#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline
+#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC
+#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC inline
#ifdef NDEBUG
# ifndef EIGEN_NO_DEBUG
@@ -536,7 +992,11 @@
// eigen_plain_assert is where we implement the workaround for the assert() bug in GCC <= 4.3, see bug 89
#ifdef EIGEN_NO_DEBUG
- #define eigen_plain_assert(x)
+ #ifdef SYCL_DEVICE_ONLY // used to silence the warning on SYCL device
+ #define eigen_plain_assert(x) EIGEN_UNUSED_VARIABLE(x)
+ #else
+ #define eigen_plain_assert(x)
+ #endif
#else
#if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO
namespace Eigen {
@@ -610,7 +1070,7 @@
// Suppresses 'unused variable' warnings.
namespace Eigen {
namespace internal {
- template<typename T> EIGEN_DEVICE_FUNC void ignore_unused_variable(const T&) {}
+ template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ignore_unused_variable(const T&) {}
}
}
#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);
@@ -624,169 +1084,75 @@ namespace Eigen {
#endif
-//------------------------------------------------------------------------------------------
-// Static and dynamic alignment control
+// Acts as a barrier preventing operations involving `X` from crossing. This
+// occurs, for example, in the fast rounding trick where a magic constant is
+// added then subtracted, which is otherwise compiled away with -ffast-math.
//
-// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
-// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
-// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
-// a default value is automatically computed based on architecture, compiler, and OS.
-//
-// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
-// to be used to declare statically aligned buffers.
-//------------------------------------------------------------------------------------------
-
-
-/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
- * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
- * so that vectorization doesn't affect binary compatibility.
- *
- * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
- * vectorized and non-vectorized code.
- */
-#if (defined __CUDACC__)
- #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
-#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
- #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
-#elif EIGEN_COMP_MSVC
- #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
-#elif EIGEN_COMP_SUNCC
- // FIXME not sure about this one:
- #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
-#else
- #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
-#endif
-
-// If the user explicitly disable vectorization, then we also disable alignment
-#if defined(EIGEN_DONT_VECTORIZE)
- #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
-#elif defined(EIGEN_VECTORIZE_AVX512)
- // 64 bytes static alignmeent is preferred only if really required
- #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
-#elif defined(__AVX__)
- // 32 bytes static alignmeent is preferred only if really required
- #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
-#else
- #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
-#endif
-
-
-// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
-#define EIGEN_MIN_ALIGN_BYTES 16
-
-// Defined the boundary (in bytes) on which the data needs to be aligned. Note
-// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
-// aligned at all regardless of the value of this #define.
-
-#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
-#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
-#endif
-
-// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprectated
-// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
-#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
- #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
- #undef EIGEN_MAX_STATIC_ALIGN_BYTES
- #endif
- #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
-#endif
-
-#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
-
- // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
-
- // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
- // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
- // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
- // certain common platform (compiler+architecture combinations) to avoid these problems.
- // Only static alignment is really problematic (relies on nonstandard compiler extensions),
- // try to keep heap alignment even when we have to disable static alignment.
- #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
- #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
- #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
- // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support.
- // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use.
- // 4.8 and newer seem definitely unaffected.
- #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
- #else
- #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
- #endif
-
- // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
- #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
- && !EIGEN_GCC3_OR_OLDER \
- && !EIGEN_COMP_SUNCC \
- && !EIGEN_OS_QNX
- #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
- #else
- #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
- #endif
-
- #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
- #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+// See bug 1674
+#if !defined(EIGEN_OPTIMIZATION_BARRIER)
+ #if EIGEN_COMP_GNUC
+ // According to https://gcc.gnu.org/onlinedocs/gcc/Constraints.html:
+ // X: Any operand whatsoever.
+ // r: A register operand is allowed provided that it is in a general
+ // register.
+ // g: Any register, memory or immediate integer operand is allowed, except
+ // for registers that are not general registers.
+ // w: (AArch32/AArch64) Floating point register, Advanced SIMD vector
+ // register or SVE vector register.
+ // x: (SSE) Any SSE register.
+ // (AArch64) Like w, but restricted to registers 0 to 15 inclusive.
+ // v: (PowerPC) An Altivec vector register.
+ // wa:(PowerPC) A VSX register.
+ //
+ // "X" (uppercase) should work for all cases, though this seems to fail for
+ // some versions of GCC for arm/aarch64 with
+ // "error: inconsistent operand constraints in an 'asm'"
+ // Clang x86_64/arm/aarch64 seems to require "g" to support both scalars and
+ // vectors, otherwise
+ // "error: non-trivial scalar-to-vector conversion, possible invalid
+ // constraint for vector type"
+ //
+ // GCC for ppc64le generates an internal compiler error with x/X/g.
+ // GCC for AVX generates an internal compiler error with X.
+ //
+ // Tested on icc/gcc/clang for sse, avx, avx2, avx512dq
+ // gcc for arm, aarch64,
+ // gcc for ppc64le,
+ // both vectors and scalars.
+ //
+ // Note that this is restricted to plain types - this will not work
+ // directly for std::complex<T>, Eigen::half, Eigen::bfloat16. For these,
+ // you will need to apply to the underlying POD type.
+ #if EIGEN_ARCH_PPC && EIGEN_COMP_GNUC_STRICT
+ // This seems to be broken on clang. Packet4f is loaded into a single
+ // register rather than a vector, zeroing out some entries. Integer
+ // types also generate a compile error.
+ // General, Altivec, VSX.
+ #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v,wa" (X));
+ #elif EIGEN_ARCH_ARM_OR_ARM64
+ // General, NEON.
+ #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+g,w" (X));
+ #elif EIGEN_ARCH_i386_OR_x86_64
+ // General, SSE.
+ #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+g,x" (X));
+ #else
+ // Not implemented for other architectures.
+ #define EIGEN_OPTIMIZATION_BARRIER(X)
+ #endif
#else
- #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
- #endif
-
-#endif
-
-// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_ALIGN_BYTES
-#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
-#undef EIGEN_MAX_STATIC_ALIGN_BYTES
-#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
-#endif
-
-#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
- #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
-#endif
-
-// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
-// It takes into account both the user choice to explicitly enable/disable alignment (by settting EIGEN_MAX_STATIC_ALIGN_BYTES)
-// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
-// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
-
-
-// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
-#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
-#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
-#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
-#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
-#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
-#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
-#else
-#define EIGEN_ALIGN_MAX
-#endif
-
-
-// Dynamic alignment control
-
-#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
-#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
-#endif
-
-#ifdef EIGEN_DONT_ALIGN
- #ifdef EIGEN_MAX_ALIGN_BYTES
- #undef EIGEN_MAX_ALIGN_BYTES
+ // Not implemented for other compilers.
+ #define EIGEN_OPTIMIZATION_BARRIER(X)
#endif
- #define EIGEN_MAX_ALIGN_BYTES 0
-#elif !defined(EIGEN_MAX_ALIGN_BYTES)
- #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
#endif
-#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
-#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+#if EIGEN_COMP_MSVC
+ // NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362.
+ // This workaround is ugly, but it does the job.
+# define EIGEN_CONST_CONDITIONAL(cond) (void)0, cond
#else
-#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
-#endif
-
-
-#ifndef EIGEN_UNALIGNED_VECTORIZE
-#define EIGEN_UNALIGNED_VECTORIZE 1
+# define EIGEN_CONST_CONDITIONAL(cond) cond
#endif
-//----------------------------------------------------------------------
-
-
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
#define EIGEN_RESTRICT
#endif
@@ -794,10 +1160,6 @@ namespace Eigen {
#define EIGEN_RESTRICT __restrict
#endif
-#ifndef EIGEN_STACK_ALLOCATION_LIMIT
-// 131072 == 128 KB
-#define EIGEN_STACK_ALLOCATION_LIMIT 131072
-#endif
#ifndef EIGEN_DEFAULT_IO_FORMAT
#ifdef EIGEN_MAKING_DOCS
@@ -812,7 +1174,23 @@ namespace Eigen {
// just an empty macro !
#define EIGEN_EMPTY
-#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
+
+// When compiling CUDA/HIP device code with NVCC or HIPCC
+// pull in math functions from the global namespace.
+// In host mode, and when device code is compiled with clang,
+// use the std versions.
+#if (defined(EIGEN_CUDA_ARCH) && defined(__NVCC__)) || defined(EIGEN_HIP_DEVICE_COMPILE)
+ #define EIGEN_USING_STD(FUNC) using ::FUNC;
+#else
+ #define EIGEN_USING_STD(FUNC) using std::FUNC;
+#endif
+
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || (EIGEN_COMP_MSVC == 1900 && EIGEN_COMP_NVCC))
+ // For older MSVC versions, as well as 1900 && CUDA 8, using the base operator is necessary,
+ // otherwise we get duplicate definition errors
+ // For later MSVC versions, we require explicit operator= definition, otherwise we get
+ // use of implicitly deleted operator errors.
+ // (cf Bugs 920, 1000, 1324, 2291)
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
using Base::operator =;
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
@@ -832,11 +1210,48 @@ namespace Eigen {
#endif
+/**
+ * \internal
+ * \brief Macro to explicitly define the default copy constructor.
+ * This is necessary, because the implicit definition is deprecated if the copy-assignment is overridden.
+ */
+#if EIGEN_HAS_CXX11
+#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS) CLASS(const CLASS&) = default;
+#else
+#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS)
+#endif
+
+
+
/** \internal
* \brief Macro to manually inherit assignment operators.
* This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined.
+ * With C++11 or later this also default-implements the copy-constructor
*/
-#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
+#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) \
+ EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
+ EIGEN_DEFAULT_COPY_CONSTRUCTOR(Derived)
+
+/** \internal
+ * \brief Macro to manually define default constructors and destructors.
+ * This is necessary when the copy constructor is re-defined.
+ * For empty helper classes this should usually be protected, to avoid accidentally creating empty objects.
+ *
+ * Hiding the default destructor lead to problems in C++03 mode together with boost::multiprecision
+ */
+#if EIGEN_HAS_CXX11
+#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived) \
+ Derived() = default; \
+ ~Derived() = default;
+#else
+#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived) \
+ Derived() {}; \
+ /* ~Derived() {}; */
+#endif
+
+
+
+
/**
* Just a side note. Commenting within defines works only by documenting
@@ -853,7 +1268,8 @@ namespace Eigen {
typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \
typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \
- enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
+ enum CompileTimeTraits \
+ { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
Flags = Eigen::internal::traits<Derived>::Flags, \
SizeAtCompileTime = Base::SizeAtCompileTime, \
@@ -898,6 +1314,14 @@ namespace Eigen {
#define EIGEN_IMPLIES(a,b) (!(a) || (b))
+#if EIGEN_HAS_BUILTIN(__builtin_expect) || EIGEN_COMP_GNUC
+#define EIGEN_PREDICT_FALSE(x) (__builtin_expect(x, false))
+#define EIGEN_PREDICT_TRUE(x) (__builtin_expect(false || (x), true))
+#else
+#define EIGEN_PREDICT_FALSE(x) (x)
+#define EIGEN_PREDICT_TRUE(x) (x)
+#endif
+
// the expression type of a standard coefficient wise binary operation
#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \
CwiseBinaryOp< \
@@ -929,14 +1353,14 @@ namespace Eigen {
const typename internal::plain_constant_type<EXPR,SCALAR>::type, const EXPR>
// Workaround for MSVC 2010 (see ML thread "patch with compile for for MSVC 2010")
-#if EIGEN_COMP_MSVC_STRICT<=1600
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC_STRICT<=1600)
#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) typename internal::enable_if<true,X>::type
#else
#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) X
#endif
#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \
- template <typename T> EIGEN_DEVICE_FUNC inline \
+ template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type,OPNAME))\
(METHOD)(const T& scalar) const { \
typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type PromotedT; \
@@ -945,7 +1369,7 @@ namespace Eigen {
}
#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
- template <typename T> EIGEN_DEVICE_FUNC inline friend \
+ template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend \
EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type,Derived,OPNAME)) \
(METHOD)(const T& scalar, const StorageBaseType& matrix) { \
typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type PromotedT; \
@@ -958,15 +1382,23 @@ namespace Eigen {
EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME)
+#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) && !defined(EIGEN_HIP_DEVICE_COMPILE)
+ #define EIGEN_EXCEPTIONS
+#endif
+
+
#ifdef EIGEN_EXCEPTIONS
# define EIGEN_THROW_X(X) throw X
# define EIGEN_THROW throw
# define EIGEN_TRY try
# define EIGEN_CATCH(X) catch (X)
#else
-# ifdef __CUDA_ARCH__
+# if defined(EIGEN_CUDA_ARCH)
# define EIGEN_THROW_X(X) asm("trap;")
# define EIGEN_THROW asm("trap;")
+# elif defined(EIGEN_HIP_DEVICE_COMPILE)
+# define EIGEN_THROW_X(X) asm("s_trap 0")
+# define EIGEN_THROW asm("s_trap 0")
# else
# define EIGEN_THROW_X(X) std::abort()
# define EIGEN_THROW std::abort()
@@ -986,7 +1418,47 @@ namespace Eigen {
# define EIGEN_NOEXCEPT
# define EIGEN_NOEXCEPT_IF(x)
# define EIGEN_NO_THROW throw()
-# define EIGEN_EXCEPTION_SPEC(X) throw(X)
+# if EIGEN_COMP_MSVC || EIGEN_COMP_CXXVER>=17
+ // MSVC does not support exception specifications (warning C4290),
+ // and they are deprecated in c++11 anyway. This is even an error in c++17.
+# define EIGEN_EXCEPTION_SPEC(X) throw()
+# else
+# define EIGEN_EXCEPTION_SPEC(X) throw(X)
+# endif
+#endif
+
+#if EIGEN_HAS_VARIADIC_TEMPLATES
+// The all function is used to enable a variadic version of eigen_assert which can take a parameter pack as its input.
+namespace Eigen {
+namespace internal {
+
+inline bool all(){ return true; }
+
+template<typename T, typename ...Ts>
+bool all(T t, Ts ... ts){ return t && all(ts...); }
+
+}
+}
+#endif
+
+#if EIGEN_HAS_CXX11_OVERRIDE_FINAL
+// provide override and final specifiers if they are available:
+# define EIGEN_OVERRIDE override
+# define EIGEN_FINAL final
+#else
+# define EIGEN_OVERRIDE
+# define EIGEN_FINAL
+#endif
+
+// Wrapping #pragma unroll in a macro since it is required for SYCL
+#if defined(SYCL_DEVICE_ONLY)
+ #if defined(_MSC_VER)
+ #define EIGEN_UNROLL_LOOP __pragma(unroll)
+ #else
+ #define EIGEN_UNROLL_LOOP _Pragma("unroll")
+ #endif
+#else
+ #define EIGEN_UNROLL_LOOP
#endif
#endif // EIGEN_MACROS_H
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index c634d7ea0..875318cdb 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -63,14 +63,28 @@ namespace Eigen {
namespace internal {
-EIGEN_DEVICE_FUNC
+EIGEN_DEVICE_FUNC
inline void throw_std_bad_alloc()
{
#ifdef EIGEN_EXCEPTIONS
throw std::bad_alloc();
#else
std::size_t huge = static_cast<std::size_t>(-1);
+ #if defined(EIGEN_HIPCC)
+ //
+ // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
+ // and as a consequence the code in the #else block triggers the hipcc warning :
+ // "no overloaded function has restriction specifiers that are compatible with the ambient context"
+ //
+ // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
+ // the same on "operator new"
+ // Reverting code back to the old version in this #if block for the hipcc compiler
+ //
new int[huge];
+ #else
+ void* unused = ::operator new(huge);
+ EIGEN_UNUSED_VARIABLE(unused);
+ #endif
#endif
}
@@ -83,19 +97,26 @@ inline void throw_std_bad_alloc()
/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
* Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
*/
-inline void* handmade_aligned_malloc(std::size_t size)
+EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
{
- void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES);
+ eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2");
+
+ EIGEN_USING_STD(malloc)
+ void *original = malloc(size+alignment);
+
if (original == 0) return 0;
- void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
+ void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
/** \internal Frees memory allocated with handmade_aligned_malloc */
-inline void handmade_aligned_free(void *ptr)
+EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
{
- if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
+ if (ptr) {
+ EIGEN_USING_STD(free)
+ free(*(reinterpret_cast<void**>(ptr) - 1));
+ }
}
/** \internal
@@ -114,7 +135,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
void *previous_aligned = static_cast<char *>(original)+previous_offset;
if(aligned!=previous_aligned)
std::memmove(aligned, previous_aligned, size);
-
+
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
@@ -142,7 +163,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
{
eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
}
-#else
+#else
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
{}
#endif
@@ -156,9 +177,12 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
void *result;
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
- result = std::malloc(size);
+
+ EIGEN_USING_STD(malloc)
+ result = malloc(size);
+
#if EIGEN_DEFAULT_ALIGN_BYTES==16
- eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator.");
+ eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator.");
#endif
#else
result = handmade_aligned_malloc(size);
@@ -174,7 +198,10 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
{
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
- std::free(ptr);
+
+ EIGEN_USING_STD(free)
+ free(ptr);
+
#else
handmade_aligned_free(ptr);
#endif
@@ -187,7 +214,7 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
*/
inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
{
- EIGEN_UNUSED_VARIABLE(old_size);
+ EIGEN_UNUSED_VARIABLE(old_size)
void *result;
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
@@ -218,7 +245,9 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std:
{
check_that_malloc_is_allowed();
- void *result = std::malloc(size);
+ EIGEN_USING_STD(malloc)
+ void *result = malloc(size);
+
if(!result && size)
throw_std_bad_alloc();
return result;
@@ -232,7 +261,8 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void
template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
{
- std::free(ptr);
+ EIGEN_USING_STD(free)
+ free(ptr);
}
template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
@@ -331,7 +361,7 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned
template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
{
destruct_elements_of_array<T>(ptr, size);
- aligned_free(ptr);
+ Eigen::internal::aligned_free(ptr);
}
/** \internal Deletes objects constructed with conditional_aligned_new
@@ -471,8 +501,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index
}
/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
- */
-template<typename Index>
+ */
+template<typename Index>
inline Index first_multiple(Index size, Index base)
{
return ((size+base-1)/base)*base;
@@ -493,6 +523,7 @@ template<typename T> struct smart_copy_helper<T,true> {
IntPtr size = IntPtr(end)-IntPtr(start);
if(size==0) return;
eigen_internal_assert(start!=0 && end!=0 && target!=0);
+ EIGEN_USING_STD(memcpy)
memcpy(target, start, size);
}
};
@@ -502,7 +533,7 @@ template<typename T> struct smart_copy_helper<T,false> {
{ std::copy(start, end, target); }
};
-// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
+// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
template<typename T, bool UseMemmove> struct smart_memmove_helper;
template<typename T> void smart_memmove(const T* start, const T* end, T* target)
@@ -522,19 +553,30 @@ template<typename T> struct smart_memmove_helper<T,true> {
template<typename T> struct smart_memmove_helper<T,false> {
static inline void run(const T* start, const T* end, T* target)
- {
+ {
if (UIntPtr(target) < UIntPtr(start))
{
std::copy(start, end, target);
}
- else
+ else
{
std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
- std::copy_backward(start, end, target + count);
+ std::copy_backward(start, end, target + count);
}
}
};
+#if EIGEN_HAS_RVALUE_REFERENCES
+template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
+{
+ return std::move(start, end, target);
+}
+#else
+template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
+{
+ return std::copy(start, end, target);
+}
+#endif
/*****************************************************************************
*** Implementation of runtime stack allocation (falling back to malloc) ***
@@ -542,7 +584,7 @@ template<typename T> struct smart_memmove_helper<T,false> {
// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
// to the appropriate stack allocation function
-#ifndef EIGEN_ALLOCA
+#if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE
#if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
#define EIGEN_ALLOCA alloca
#elif EIGEN_COMP_MSVC
@@ -550,6 +592,15 @@ template<typename T> struct smart_memmove_helper<T,false> {
#endif
#endif
+// With clang -Oz -mthumb, alloca changes the stack pointer in a way that is
+// not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because
+// the compiler still emits bad code because stack allocation checks use "<=".
+// TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772
+// is fixed.
+#if defined(__clang__) && defined(__thumb__)
+ #undef EIGEN_ALLOCA
+#endif
+
// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
template<typename T> class aligned_stack_memory_handler : noncopyable
@@ -561,12 +612,14 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
* In this case, the buffer elements will also be destructed when this handler will be destructed.
* Finally, if \a dealloc is true, then the pointer \a ptr is freed.
**/
+ EIGEN_DEVICE_FUNC
aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
: m_ptr(ptr), m_size(size), m_deallocate(dealloc)
{
if(NumTraits<T>::RequireInitialization && m_ptr)
Eigen::internal::construct_elements_of_array(m_ptr, size);
}
+ EIGEN_DEVICE_FUNC
~aligned_stack_memory_handler()
{
if(NumTraits<T>::RequireInitialization && m_ptr)
@@ -580,6 +633,60 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
bool m_deallocate;
};
+#ifdef EIGEN_ALLOCA
+
+template<typename Xpr, int NbEvaluations,
+ bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic
+ >
+struct local_nested_eval_wrapper
+{
+ static const bool NeedExternalBuffer = false;
+ typedef typename Xpr::Scalar Scalar;
+ typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType;
+ ObjectType object;
+
+ EIGEN_DEVICE_FUNC
+ local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr)
+ {
+ EIGEN_UNUSED_VARIABLE(ptr);
+ eigen_internal_assert(ptr==0);
+ }
+};
+
+template<typename Xpr, int NbEvaluations>
+struct local_nested_eval_wrapper<Xpr,NbEvaluations,true>
+{
+ static const bool NeedExternalBuffer = true;
+ typedef typename Xpr::Scalar Scalar;
+ typedef typename plain_object_eval<Xpr>::type PlainObject;
+ typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
+ ObjectType object;
+
+ EIGEN_DEVICE_FUNC
+ local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
+ : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()),
+ m_deallocate(ptr==0)
+ {
+ if(NumTraits<Scalar>::RequireInitialization && object.data())
+ Eigen::internal::construct_elements_of_array(object.data(), object.size());
+ object = xpr;
+ }
+
+ EIGEN_DEVICE_FUNC
+ ~local_nested_eval_wrapper()
+ {
+ if(NumTraits<Scalar>::RequireInitialization && object.data())
+ Eigen::internal::destruct_elements_of_array(object.data(), object.size());
+ if(m_deallocate)
+ Eigen::internal::aligned_free(object.data());
+ }
+
+private:
+ bool m_deallocate;
+};
+
+#endif // EIGEN_ALLOCA
+
template<typename T> class scoped_array : noncopyable
{
T* m_ptr;
@@ -603,13 +710,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
{
std::swap(a.ptr(),b.ptr());
}
-
+
} // end namespace internal
/** \internal
- * Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
- * if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
- * (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap.
+ *
+ * The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates,
+ * and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
+ * if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
+ * (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap.
* The allocated buffer is automatically deleted when exiting the scope of this declaration.
* If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
* Here is an example:
@@ -620,9 +729,17 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
* }
* \endcode
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
+ *
+ * The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to
+ * \code
+ * typename internal::nested_eval<XPRT_T,N>::type NAME(XPR);
+ * \endcode
+ * with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown.
+ * This is accomplished through alloca if this later is supported and if the required number of bytes
+ * is below EIGEN_STACK_ALLOCATION_LIMIT.
*/
#ifdef EIGEN_ALLOCA
-
+
#if EIGEN_DEFAULT_ALIGN_BYTES>0
// We always manually re-align the result of EIGEN_ALLOCA.
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
@@ -639,13 +756,23 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
: Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
+
+ #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \
+ Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
+ ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \
+ ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \
+ typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object)
+
#else
#define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
-
+
+
+#define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR)
+
#endif
@@ -653,32 +780,56 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
*****************************************************************************/
-#if EIGEN_MAX_ALIGN_BYTES!=0
+#if EIGEN_HAS_CXX17_OVERALIGN
+
+// C++17 -> no need to bother about alignment anymore :)
+
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
+
+#else
+
+// HIP does not support new/delete on device.
+#if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+ EIGEN_DEVICE_FUNC \
void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
EIGEN_CATCH (...) { return 0; } \
}
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
+ EIGEN_DEVICE_FUNC \
void *operator new(std::size_t size) { \
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
} \
+ EIGEN_DEVICE_FUNC \
void *operator new[](std::size_t size) { \
return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
} \
+ EIGEN_DEVICE_FUNC \
void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ EIGEN_DEVICE_FUNC \
void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ EIGEN_DEVICE_FUNC \
void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
+ EIGEN_DEVICE_FUNC \
void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
/* in-place new and delete. since (at least afaik) there is no actual */ \
/* memory allocated we can safely let the default implementation handle */ \
/* this particular case. */ \
+ EIGEN_DEVICE_FUNC \
static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
+ EIGEN_DEVICE_FUNC \
static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
+ EIGEN_DEVICE_FUNC \
void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
+ EIGEN_DEVICE_FUNC \
void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
/* nothrow-new (returns zero instead of std::bad_alloc) */ \
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
+ EIGEN_DEVICE_FUNC \
void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
} \
@@ -688,20 +839,34 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
#endif
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
-#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
- EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
+#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool( \
+ ((Size)!=Eigen::Dynamic) && \
+ (((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES )==0)) || \
+ ((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) || \
+ ((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0)) )))
+
+#endif
/****************************************************************************/
/** \class aligned_allocator
* \ingroup Core_Module
*
-* \brief STL compatible allocator to use with with 16 byte aligned types
+* \brief STL compatible allocator to use with types requiring a non standrad alignment.
+*
+* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
+* By default, it will thus provide at least 16 bytes alignment and more in following cases:
+* - 32 bytes alignment if AVX is enabled.
+* - 64 bytes alignment if AVX512 is enabled.
+*
+* This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
+* \link TopicPreprocessorDirectivesPerformance there \endlink.
*
* Example:
* \code
* // Matrix4f requires 16 bytes alignment:
-* std::map< int, Matrix4f, std::less<int>,
+* std::map< int, Matrix4f, std::less<int>,
* aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
* std::map< int, Vector3f > my_map_vec3;
@@ -736,6 +901,15 @@ public:
~aligned_allocator() {}
+ #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
+ // In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
+ // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
+ // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
+ size_type max_size() const {
+ return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T);
+ }
+ #endif
+
pointer allocate(size_type num, const void* /*hint*/ = 0)
{
internal::check_size_for_overflow<T>(num);
@@ -898,20 +1072,32 @@ inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
{
if(max_std_funcs>=4)
queryCacheSizes_intel_direct(l1,l2,l3);
- else
+ else if(max_std_funcs>=2)
queryCacheSizes_intel_codes(l1,l2,l3);
+ else
+ l1 = l2 = l3 = 0;
}
inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
{
int abcd[4];
abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
- EIGEN_CPUID(abcd,0x80000005,0);
- l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
- abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
- EIGEN_CPUID(abcd,0x80000006,0);
- l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
- l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
+
+ // First query the max supported function.
+ EIGEN_CPUID(abcd,0x80000000,0);
+ if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006))
+ {
+ EIGEN_CPUID(abcd,0x80000005,0);
+ l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
+ abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
+ EIGEN_CPUID(abcd,0x80000006,0);
+ l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
+ l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
+ }
+ else
+ {
+ l1 = l2 = l3 = 0;
+ }
}
#endif
@@ -927,7 +1113,7 @@ inline void queryCacheSizes(int& l1, int& l2, int& l3)
// identify the CPU vendor
EIGEN_CPUID(abcd,0x0,0);
- int max_std_funcs = abcd[1];
+ int max_std_funcs = abcd[0];
if(cpuid_is_vendor(abcd,GenuineIntel))
queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 7f6370755..81ae2a32d 100755
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -11,13 +11,54 @@
#ifndef EIGEN_META_H
#define EIGEN_META_H
-#if defined(__CUDA_ARCH__)
-#include <cfloat>
-#include <math_constants.h>
+#if defined(EIGEN_GPU_COMPILE_PHASE)
+
+ #include <cfloat>
+
+ #if defined(EIGEN_CUDA_ARCH)
+ #include <math_constants.h>
+ #endif
+
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
+ #include "Eigen/src/Core/arch/HIP/hcc/math_constants.h"
+ #endif
+
#endif
-#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L
+// Recent versions of ICC require <cstdint> for pointer types below.
+#define EIGEN_ICC_NEEDS_CSTDINT (EIGEN_COMP_ICC>=1600 && EIGEN_COMP_CXXVER >= 11)
+
+// Define portable (u)int{32,64} types
+#if EIGEN_HAS_CXX11 || EIGEN_ICC_NEEDS_CSTDINT
#include <cstdint>
+namespace Eigen {
+namespace numext {
+typedef std::uint8_t uint8_t;
+typedef std::int8_t int8_t;
+typedef std::uint16_t uint16_t;
+typedef std::int16_t int16_t;
+typedef std::uint32_t uint32_t;
+typedef std::int32_t int32_t;
+typedef std::uint64_t uint64_t;
+typedef std::int64_t int64_t;
+}
+}
+#else
+// Without c++11, all compilers able to compile Eigen also
+// provide the C99 stdint.h header file.
+#include <stdint.h>
+namespace Eigen {
+namespace numext {
+typedef ::uint8_t uint8_t;
+typedef ::int8_t int8_t;
+typedef ::uint16_t uint16_t;
+typedef ::int16_t int16_t;
+typedef ::uint32_t uint32_t;
+typedef ::int32_t int32_t;
+typedef ::uint64_t uint64_t;
+typedef ::int64_t int64_t;
+}
+}
#endif
namespace Eigen {
@@ -43,26 +84,33 @@ namespace internal {
// Only recent versions of ICC complain about using ptrdiff_t to hold pointers,
// and older versions do not provide *intptr_t types.
-#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L
+#if EIGEN_ICC_NEEDS_CSTDINT
typedef std::intptr_t IntPtr;
typedef std::uintptr_t UIntPtr;
#else
typedef std::ptrdiff_t IntPtr;
typedef std::size_t UIntPtr;
#endif
+#undef EIGEN_ICC_NEEDS_CSTDINT
struct true_type { enum { value = 1 }; };
struct false_type { enum { value = 0 }; };
+template<bool Condition>
+struct bool_constant;
+
+template<>
+struct bool_constant<true> : true_type {};
+
+template<>
+struct bool_constant<false> : false_type {};
+
template<bool Condition, typename Then, typename Else>
struct conditional { typedef Then type; };
template<typename Then, typename Else>
struct conditional <false, Then, Else> { typedef Else type; };
-template<typename T, typename U> struct is_same { enum { value = 0 }; };
-template<typename T> struct is_same<T,T> { enum { value = 1 }; };
-
template<typename T> struct remove_reference { typedef T type; };
template<typename T> struct remove_reference<T&> { typedef T type; };
@@ -97,17 +145,65 @@ template<> struct is_arithmetic<unsigned int> { enum { value = true }; };
template<> struct is_arithmetic<signed long> { enum { value = true }; };
template<> struct is_arithmetic<unsigned long> { enum { value = true }; };
-template<typename T> struct is_integral { enum { value = false }; };
-template<> struct is_integral<bool> { enum { value = true }; };
-template<> struct is_integral<char> { enum { value = true }; };
-template<> struct is_integral<signed char> { enum { value = true }; };
-template<> struct is_integral<unsigned char> { enum { value = true }; };
-template<> struct is_integral<signed short> { enum { value = true }; };
-template<> struct is_integral<unsigned short> { enum { value = true }; };
-template<> struct is_integral<signed int> { enum { value = true }; };
-template<> struct is_integral<unsigned int> { enum { value = true }; };
-template<> struct is_integral<signed long> { enum { value = true }; };
-template<> struct is_integral<unsigned long> { enum { value = true }; };
+template<typename T, typename U> struct is_same { enum { value = 0 }; };
+template<typename T> struct is_same<T,T> { enum { value = 1 }; };
+
+template< class T >
+struct is_void : is_same<void, typename remove_const<T>::type> {};
+
+#if EIGEN_HAS_CXX11
+template<> struct is_arithmetic<signed long long> { enum { value = true }; };
+template<> struct is_arithmetic<unsigned long long> { enum { value = true }; };
+using std::is_integral;
+#else
+template<typename T> struct is_integral { enum { value = false }; };
+template<> struct is_integral<bool> { enum { value = true }; };
+template<> struct is_integral<char> { enum { value = true }; };
+template<> struct is_integral<signed char> { enum { value = true }; };
+template<> struct is_integral<unsigned char> { enum { value = true }; };
+template<> struct is_integral<signed short> { enum { value = true }; };
+template<> struct is_integral<unsigned short> { enum { value = true }; };
+template<> struct is_integral<signed int> { enum { value = true }; };
+template<> struct is_integral<unsigned int> { enum { value = true }; };
+template<> struct is_integral<signed long> { enum { value = true }; };
+template<> struct is_integral<unsigned long> { enum { value = true }; };
+#if EIGEN_COMP_MSVC
+template<> struct is_integral<signed __int64> { enum { value = true }; };
+template<> struct is_integral<unsigned __int64> { enum { value = true }; };
+#endif
+#endif
+
+#if EIGEN_HAS_CXX11
+using std::make_unsigned;
+#else
+// TODO: Possibly improve this implementation of make_unsigned.
+// It is currently used only by
+// template<typename Scalar> struct random_default_impl<Scalar, false, true>.
+template<typename> struct make_unsigned;
+template<> struct make_unsigned<char> { typedef unsigned char type; };
+template<> struct make_unsigned<signed char> { typedef unsigned char type; };
+template<> struct make_unsigned<unsigned char> { typedef unsigned char type; };
+template<> struct make_unsigned<signed short> { typedef unsigned short type; };
+template<> struct make_unsigned<unsigned short> { typedef unsigned short type; };
+template<> struct make_unsigned<signed int> { typedef unsigned int type; };
+template<> struct make_unsigned<unsigned int> { typedef unsigned int type; };
+template<> struct make_unsigned<signed long> { typedef unsigned long type; };
+template<> struct make_unsigned<unsigned long> { typedef unsigned long type; };
+#if EIGEN_COMP_MSVC
+template<> struct make_unsigned<signed __int64> { typedef unsigned __int64 type; };
+template<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; };
+#endif
+
+// Some platforms define int64_t as `long long` even for C++03, where
+// `long long` is not guaranteed by the standard. In this case we are missing
+// the definition for make_unsigned. If we just define it, we run into issues
+// where `long long` doesn't exist in some compilers for C++03. We therefore add
+// the specialization for these platforms only.
+#if EIGEN_OS_MAC || EIGEN_COMP_MINGW
+template<> struct make_unsigned<unsigned long long> { typedef unsigned long long type; };
+template<> struct make_unsigned<long long> { typedef unsigned long long type; };
+#endif
+#endif
template <typename T> struct add_const { typedef const T type; };
template <typename T> struct add_const<T&> { typedef T& type; };
@@ -121,6 +217,11 @@ template<typename T> struct add_const_on_value_type<T*> { typedef T const
template<typename T> struct add_const_on_value_type<T* const> { typedef T const* const type; };
template<typename T> struct add_const_on_value_type<T const* const> { typedef T const* const type; };
+#if EIGEN_HAS_CXX11
+
+using std::is_convertible;
+
+#else
template<typename From, typename To>
struct is_convertible_impl
@@ -134,16 +235,19 @@ private:
struct yes {int a[1];};
struct no {int a[2];};
- static yes test(const To&, int);
+ template<typename T>
+ static yes test(T, int);
+
+ template<typename T>
static no test(any_conversion, ...);
public:
- static From ms_from;
+ static typename internal::remove_reference<From>::type* ms_from;
#ifdef __INTEL_COMPILER
#pragma warning push
#pragma warning ( disable : 2259 )
#endif
- enum { value = sizeof(test(ms_from, 0))==sizeof(yes) };
+ enum { value = sizeof(test<To>(*ms_from, 0))==sizeof(yes) };
#ifdef __INTEL_COMPILER
#pragma warning pop
#endif
@@ -152,10 +256,17 @@ public:
template<typename From, typename To>
struct is_convertible
{
- enum { value = is_convertible_impl<typename remove_all<From>::type,
- typename remove_all<To >::type>::value };
+ enum { value = is_convertible_impl<From,To>::value };
};
+template<typename T>
+struct is_convertible<T,T&> { enum { value = false }; };
+
+template<typename T>
+struct is_convertible<const T,const T&> { enum { value = true }; };
+
+#endif
+
/** \internal Allows to enable/disable an overload
* according to a compile time condition.
*/
@@ -164,7 +275,7 @@ template<bool Condition, typename T=void> struct enable_if;
template<typename T> struct enable_if<true,T>
{ typedef T type; };
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11
#if !defined(__FLT_EPSILON__)
#define __FLT_EPSILON__ FLT_EPSILON
#define __DBL_EPSILON__ DBL_EPSILON
@@ -175,7 +286,7 @@ namespace device {
template<typename T> struct numeric_limits
{
EIGEN_DEVICE_FUNC
- static T epsilon() { return 0; }
+ static EIGEN_CONSTEXPR T epsilon() { return 0; }
static T (max)() { assert(false && "Highest not supported for this type"); }
static T (min)() { assert(false && "Lowest not supported for this type"); }
static T infinity() { assert(false && "Infinity not supported for this type"); }
@@ -183,91 +294,130 @@ template<typename T> struct numeric_limits
};
template<> struct numeric_limits<float>
{
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static float epsilon() { return __FLT_EPSILON__; }
EIGEN_DEVICE_FUNC
- static float (max)() { return CUDART_MAX_NORMAL_F; }
- EIGEN_DEVICE_FUNC
+ static float (max)() {
+ #if defined(EIGEN_CUDA_ARCH)
+ return CUDART_MAX_NORMAL_F;
+ #else
+ return HIPRT_MAX_NORMAL_F;
+ #endif
+ }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static float (min)() { return FLT_MIN; }
EIGEN_DEVICE_FUNC
- static float infinity() { return CUDART_INF_F; }
+ static float infinity() {
+ #if defined(EIGEN_CUDA_ARCH)
+ return CUDART_INF_F;
+ #else
+ return HIPRT_INF_F;
+ #endif
+ }
EIGEN_DEVICE_FUNC
- static float quiet_NaN() { return CUDART_NAN_F; }
+ static float quiet_NaN() {
+ #if defined(EIGEN_CUDA_ARCH)
+ return CUDART_NAN_F;
+ #else
+ return HIPRT_NAN_F;
+ #endif
+ }
};
template<> struct numeric_limits<double>
{
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static double epsilon() { return __DBL_EPSILON__; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static double (max)() { return DBL_MAX; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static double (min)() { return DBL_MIN; }
EIGEN_DEVICE_FUNC
- static double infinity() { return CUDART_INF; }
+ static double infinity() {
+ #if defined(EIGEN_CUDA_ARCH)
+ return CUDART_INF;
+ #else
+ return HIPRT_INF;
+ #endif
+ }
EIGEN_DEVICE_FUNC
- static double quiet_NaN() { return CUDART_NAN; }
+ static double quiet_NaN() {
+ #if defined(EIGEN_CUDA_ARCH)
+ return CUDART_NAN;
+ #else
+ return HIPRT_NAN;
+ #endif
+ }
};
template<> struct numeric_limits<int>
{
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static int epsilon() { return 0; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static int (max)() { return INT_MAX; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static int (min)() { return INT_MIN; }
};
template<> struct numeric_limits<unsigned int>
{
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static unsigned int epsilon() { return 0; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static unsigned int (max)() { return UINT_MAX; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static unsigned int (min)() { return 0; }
};
template<> struct numeric_limits<long>
{
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static long epsilon() { return 0; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static long (max)() { return LONG_MAX; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static long (min)() { return LONG_MIN; }
};
template<> struct numeric_limits<unsigned long>
{
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static unsigned long epsilon() { return 0; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static unsigned long (max)() { return ULONG_MAX; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static unsigned long (min)() { return 0; }
};
template<> struct numeric_limits<long long>
{
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static long long epsilon() { return 0; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static long long (max)() { return LLONG_MAX; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static long long (min)() { return LLONG_MIN; }
};
template<> struct numeric_limits<unsigned long long>
{
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static unsigned long long epsilon() { return 0; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static unsigned long long (max)() { return ULLONG_MAX; }
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static unsigned long long (min)() { return 0; }
};
+template<> struct numeric_limits<bool>
+{
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+ static bool epsilon() { return false; }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+ static bool (max)() { return true; }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
+ static bool (min)() { return false; }
+};
}
-#endif
+#endif // defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11
/** \internal
- * A base class do disable default copy ctor and copy assignement operator.
+ * A base class do disable default copy ctor and copy assignment operator.
*/
class noncopyable
{
@@ -279,13 +429,82 @@ protected:
};
/** \internal
- * Convenient struct to get the result type of a unary or binary functor.
+ * Provides access to the number of elements in the object of as a compile-time constant expression.
+ * It "returns" Eigen::Dynamic if the size cannot be resolved at compile-time (default).
+ *
+ * Similar to std::tuple_size, but more general.
+ *
+ * It currently supports:
+ * - any types T defining T::SizeAtCompileTime
+ * - plain C arrays as T[N]
+ * - std::array (c++11)
+ * - some internal types such as SingleRange and AllRange
*
- * It supports both the current STL mechanism (using the result_type member) as well as
- * upcoming next STL generation (using a templated result member).
- * If none of these members is provided, then the type of the first argument is returned. FIXME, that behavior is a pretty bad hack.
+ * The second template parameter eases SFINAE-based specializations.
*/
-#if EIGEN_HAS_STD_RESULT_OF
+template<typename T, typename EnableIf = void> struct array_size {
+ enum { value = Dynamic };
+};
+
+template<typename T> struct array_size<T,typename internal::enable_if<((T::SizeAtCompileTime&0)==0)>::type> {
+ enum { value = T::SizeAtCompileTime };
+};
+
+template<typename T, int N> struct array_size<const T (&)[N]> {
+ enum { value = N };
+};
+template<typename T, int N> struct array_size<T (&)[N]> {
+ enum { value = N };
+};
+
+#if EIGEN_HAS_CXX11
+template<typename T, std::size_t N> struct array_size<const std::array<T,N> > {
+ enum { value = N };
+};
+template<typename T, std::size_t N> struct array_size<std::array<T,N> > {
+ enum { value = N };
+};
+#endif
+
+/** \internal
+ * Analogue of the std::size free function.
+ * It returns the size of the container or view \a x of type \c T
+ *
+ * It currently supports:
+ * - any types T defining a member T::size() const
+ * - plain C arrays as T[N]
+ *
+ */
+template<typename T>
+EIGEN_CONSTEXPR Index size(const T& x) { return x.size(); }
+
+template<typename T,std::size_t N>
+EIGEN_CONSTEXPR Index size(const T (&) [N]) { return N; }
+
+/** \internal
+ * Convenient struct to get the result type of a nullary, unary, binary, or
+ * ternary functor.
+ *
+ * Pre C++11:
+ * Supports both a Func::result_type member and templated
+ * Func::result<Func(ArgTypes...)>::type member.
+ *
+ * If none of these members is provided, then the type of the first
+ * argument is returned.
+ *
+ * Post C++11:
+ * This uses std::result_of. However, note the `type` member removes
+ * const and converts references/pointers to their corresponding value type.
+ */
+#if EIGEN_HAS_STD_INVOKE_RESULT
+template<typename T> struct result_of;
+
+template<typename F, typename... ArgTypes>
+struct result_of<F(ArgTypes...)> {
+ typedef typename std::invoke_result<F, ArgTypes...>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+#elif EIGEN_HAS_STD_RESULT_OF
template<typename T> struct result_of {
typedef typename std::result_of<T>::type type1;
typedef typename remove_all<type1>::type type;
@@ -297,6 +516,28 @@ struct has_none {int a[1];};
struct has_std_result_type {int a[2];};
struct has_tr1_result {int a[3];};
+template<typename Func, int SizeOf>
+struct nullary_result_of_select {};
+
+template<typename Func>
+struct nullary_result_of_select<Func, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
+
+template<typename Func>
+struct nullary_result_of_select<Func, sizeof(has_tr1_result)> {typedef typename Func::template result<Func()>::type type;};
+
+template<typename Func>
+struct result_of<Func()> {
+ template<typename T>
+ static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0);
+ template<typename T>
+ static has_tr1_result testFunctor(T const *, typename T::template result<T()>::type const * = 0);
+ static has_none testFunctor(...);
+
+ // note that the following indirection is needed for gcc-3.3
+ enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+ typedef typename nullary_result_of_select<Func, FunctorType>::type type;
+};
+
template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;};
@@ -366,6 +607,45 @@ struct result_of<Func(ArgType0,ArgType1,ArgType2)> {
enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
typedef typename ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, FunctorType>::type type;
};
+
+#endif
+
+#if EIGEN_HAS_STD_INVOKE_RESULT
+template<typename F, typename... ArgTypes>
+struct invoke_result {
+ typedef typename std::invoke_result<F, ArgTypes...>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+#elif EIGEN_HAS_CXX11
+template<typename F, typename... ArgTypes>
+struct invoke_result {
+ typedef typename result_of<F(ArgTypes...)>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+#else
+template<typename F, typename ArgType0 = void, typename ArgType1 = void, typename ArgType2 = void>
+struct invoke_result {
+ typedef typename result_of<F(ArgType0, ArgType1, ArgType2)>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+
+template<typename F>
+struct invoke_result<F, void, void, void> {
+ typedef typename result_of<F()>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+
+template<typename F, typename ArgType0>
+struct invoke_result<F, ArgType0, void, void> {
+ typedef typename result_of<F(ArgType0)>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
+
+template<typename F, typename ArgType0, typename ArgType1>
+struct invoke_result<F, ArgType0, ArgType1, void> {
+ typedef typename result_of<F(ArgType0, ArgType1)>::type type1;
+ typedef typename remove_all<type1>::type type;
+};
#endif
struct meta_yes { char a[1]; };
@@ -375,10 +655,10 @@ struct meta_no { char a[2]; };
template <typename T>
struct has_ReturnType
{
- template <typename C> static meta_yes testFunctor(typename C::ReturnType const *);
- template <typename C> static meta_no testFunctor(...);
+ template <typename C> static meta_yes testFunctor(C const *, typename C::ReturnType const * = 0);
+ template <typename C> static meta_no testFunctor(...);
- enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) };
+ enum { value = sizeof(testFunctor<T>(static_cast<T*>(0))) == sizeof(meta_yes) };
};
template<typename T> const T* return_ptr();
@@ -435,20 +715,25 @@ class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ?
/** \internal Computes the least common multiple of two positive integer A and B
- * at compile-time. It implements a naive algorithm testing all multiples of A.
- * It thus works better if A>=B.
+ * at compile-time.
*/
-template<int A, int B, int K=1, bool Done = ((A*K)%B)==0>
+template<int A, int B, int K=1, bool Done = ((A*K)%B)==0, bool Big=(A>=B)>
struct meta_least_common_multiple
{
enum { ret = meta_least_common_multiple<A,B,K+1>::ret };
};
+template<int A, int B, int K, bool Done>
+struct meta_least_common_multiple<A,B,K,Done,false>
+{
+ enum { ret = meta_least_common_multiple<B,A,K>::ret };
+};
template<int A, int B, int K>
-struct meta_least_common_multiple<A,B,K,true>
+struct meta_least_common_multiple<A,B,K,true,true>
{
enum { ret = A*K };
};
+
/** \internal determines whether the product of two numeric types is allowed and what the return type is */
template<typename T, typename U> struct scalar_product_traits
{
@@ -461,17 +746,27 @@ template<typename T, typename U> struct scalar_product_traits
// typedef typename scalar_product_traits<typename remove_all<ArgType0>::type, typename remove_all<ArgType1>::type>::ReturnType type;
// };
+/** \internal Obtains a POD type suitable to use as storage for an object of a size
+ * of at most Len bytes, aligned as specified by \c Align.
+ */
+template<unsigned Len, unsigned Align>
+struct aligned_storage {
+ struct type {
+ EIGEN_ALIGN_TO_BOUNDARY(Align) unsigned char data[Len];
+ };
+};
+
} // end namespace internal
namespace numext {
-
-#if defined(__CUDA_ARCH__)
+
+#if defined(EIGEN_GPU_COMPILE_PHASE)
template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }
#else
template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
#endif
-#if defined(__CUDA_ARCH__)
+#if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11
using internal::device::numeric_limits;
#else
using std::numeric_limits;
@@ -480,11 +775,36 @@ using std::numeric_limits;
// Integer division with rounding up.
// T is assumed to be an integer type with a>=0, and b>0
template<typename T>
+EIGEN_DEVICE_FUNC
T div_ceil(const T &a, const T &b)
{
return (a+b-1) / b;
}
+// The aim of the following functions is to bypass -Wfloat-equal warnings
+// when we really want a strict equality comparison on floating points.
+template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+bool equal_strict(const X& x,const Y& y) { return x == y; }
+
+#if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
+
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
+#endif
+
+template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+bool not_equal_strict(const X& x,const Y& y) { return x != y; }
+
+#if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
+
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
+#endif
+
} // end namespace numext
} // end namespace Eigen
diff --git a/Eigen/src/Core/util/ReenableStupidWarnings.h b/Eigen/src/Core/util/ReenableStupidWarnings.h
index 86b60f52f..1ce6fd1b0 100644
--- a/Eigen/src/Core/util/ReenableStupidWarnings.h
+++ b/Eigen/src/Core/util/ReenableStupidWarnings.h
@@ -1,4 +1,8 @@
-#ifdef EIGEN_WARNINGS_DISABLED
+#ifdef EIGEN_WARNINGS_DISABLED_2
+// "DisableStupidWarnings.h" was included twice recursively: Do not reenable warnings yet!
+# undef EIGEN_WARNINGS_DISABLED_2
+
+#elif defined(EIGEN_WARNINGS_DISABLED)
#undef EIGEN_WARNINGS_DISABLED
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
@@ -8,7 +12,7 @@
#pragma warning pop
#elif defined __clang__
#pragma clang diagnostic pop
- #elif defined __GNUC__ && __GNUC__>=6
+ #elif defined __GNUC__ && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
#pragma GCC diagnostic pop
#endif
diff --git a/Eigen/src/Core/util/ReshapedHelper.h b/Eigen/src/Core/util/ReshapedHelper.h
new file mode 100644
index 000000000..412432132
--- /dev/null
+++ b/Eigen/src/Core/util/ReshapedHelper.h
@@ -0,0 +1,51 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_RESHAPED_HELPER_H
+#define EIGEN_RESHAPED_HELPER_H
+
+namespace Eigen {
+
+enum AutoSize_t { AutoSize };
+const int AutoOrder = 2;
+
+namespace internal {
+
+template<typename SizeType,typename OtherSize, int TotalSize>
+struct get_compiletime_reshape_size {
+ enum { value = get_fixed_value<SizeType>::value };
+};
+
+template<typename SizeType>
+Index get_runtime_reshape_size(SizeType size, Index /*other*/, Index /*total*/) {
+ return internal::get_runtime_value(size);
+}
+
+template<typename OtherSize, int TotalSize>
+struct get_compiletime_reshape_size<AutoSize_t,OtherSize,TotalSize> {
+ enum {
+ other_size = get_fixed_value<OtherSize>::value,
+ value = (TotalSize==Dynamic || other_size==Dynamic) ? Dynamic : TotalSize / other_size };
+};
+
+inline Index get_runtime_reshape_size(AutoSize_t /*size*/, Index other, Index total) {
+ return total/other;
+}
+
+template<int Flags, int Order>
+struct get_compiletime_reshape_order {
+ enum { value = Order == AutoOrder ? Flags & RowMajorBit : Order };
+};
+
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_RESHAPED_HELPER_H
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h
index 983361a45..c45de5901 100644
--- a/Eigen/src/Core/util/StaticAssert.h
+++ b/Eigen/src/Core/util/StaticAssert.h
@@ -24,9 +24,10 @@
*
*/
+#ifndef EIGEN_STATIC_ASSERT
#ifndef EIGEN_NO_STATIC_ASSERT
- #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
+ #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (EIGEN_COMP_CXXVER >= 11) || (EIGEN_COMP_MSVC >= 1600))
// if native static_assert is enabled, let's use it
#define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
@@ -44,64 +45,68 @@
struct static_assertion<true>
{
enum {
- YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX,
- YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES,
- YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
- THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
- THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
- THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
- OUT_OF_RANGE_ACCESS,
- YOU_MADE_A_PROGRAMMING_MISTAKE,
- EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
- EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
- YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
- YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
- UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,
- THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES,
- FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED,
- NUMERIC_TYPE_MUST_BE_REAL,
- COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED,
- WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED,
- THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE,
- INVALID_MATRIX_PRODUCT,
- INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS,
- INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION,
- YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY,
- THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
- THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
- INVALID_MATRIX_TEMPLATE_PARAMETERS,
- INVALID_MATRIXBASE_TEMPLATE_PARAMETERS,
- BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
- THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
- THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
- THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES,
- YOU_ALREADY_SPECIFIED_THIS_STRIDE,
- INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION,
- THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD,
- PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1,
- THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS,
- YOU_CANNOT_MIX_ARRAYS_AND_MATRICES,
- YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION,
- THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
- YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
- THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
- THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS,
- THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
- THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
- YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
- YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
- THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
- THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
- OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
- IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
- STORAGE_LAYOUT_DOES_NOT_MATCH,
- EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE,
- THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS,
- MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
- THIS_TYPE_IS_NOT_SUPPORTED,
- STORAGE_KIND_MUST_MATCH,
- STORAGE_INDEX_MUST_MATCH,
- CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
+ YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1,
+ YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1,
+ YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1,
+ THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1,
+ THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1,
+ THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1,
+ OUT_OF_RANGE_ACCESS=1,
+ YOU_MADE_A_PROGRAMMING_MISTAKE=1,
+ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1,
+ EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1,
+ YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1,
+ YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1,
+ UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1,
+ THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1,
+ FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1,
+ NUMERIC_TYPE_MUST_BE_REAL=1,
+ COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1,
+ WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1,
+ THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1,
+ INVALID_MATRIX_PRODUCT=1,
+ INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1,
+ INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1,
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1,
+ THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1,
+ THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1,
+ INVALID_MATRIX_TEMPLATE_PARAMETERS=1,
+ INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1,
+ BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1,
+ THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1,
+ THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1,
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1,
+ YOU_ALREADY_SPECIFIED_THIS_STRIDE=1,
+ INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1,
+ THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1,
+ PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1,
+ THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1,
+ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1,
+ YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1,
+ THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1,
+ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1,
+ THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1,
+ THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1,
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1,
+ THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1,
+ YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1,
+ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1,
+ THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1,
+ THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1,
+ OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1,
+ IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1,
+ STORAGE_LAYOUT_DOES_NOT_MATCH=1,
+ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1,
+ THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1,
+ MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1,
+ THIS_TYPE_IS_NOT_SUPPORTED=1,
+ STORAGE_KIND_MUST_MATCH=1,
+ STORAGE_INDEX_MUST_MATCH=1,
+ CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1,
+ SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1,
+ INVALID_TEMPLATE_PARAMETER=1,
+ GPU_TENSOR_CONTRACTION_DOES_NOT_SUPPORT_OUTPUT_KERNELS=1,
+ THE_ARRAY_SIZE_SHOULD_EQUAL_WITH_PACKET_SIZE=1
};
};
@@ -131,7 +136,7 @@
#define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
#endif // EIGEN_NO_STATIC_ASSERT
-
+#endif // EIGEN_STATIC_ASSERT
// static assertion failing if the type \a TYPE is not a vector type
#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \
@@ -180,7 +185,7 @@
)
#define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \
- EIGEN_STATIC_ASSERT(!NumTraits<TYPE>::IsInteger, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
+ EIGEN_STATIC_ASSERT(!Eigen::NumTraits<TYPE>::IsInteger, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
// static assertion failing if it is guaranteed at compile-time that the two matrix expression types have different sizes
@@ -190,8 +195,8 @@
YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES)
#define EIGEN_STATIC_ASSERT_SIZE_1x1(TYPE) \
- EIGEN_STATIC_ASSERT((TYPE::RowsAtCompileTime == 1 || TYPE::RowsAtCompileTime == Dynamic) && \
- (TYPE::ColsAtCompileTime == 1 || TYPE::ColsAtCompileTime == Dynamic), \
+ EIGEN_STATIC_ASSERT((TYPE::RowsAtCompileTime == 1 || TYPE::RowsAtCompileTime == Eigen::Dynamic) && \
+ (TYPE::ColsAtCompileTime == 1 || TYPE::ColsAtCompileTime == Eigen::Dynamic), \
THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS)
#define EIGEN_STATIC_ASSERT_LVALUE(Derived) \
diff --git a/Eigen/src/Core/util/SymbolicIndex.h b/Eigen/src/Core/util/SymbolicIndex.h
new file mode 100644
index 000000000..354dd9add
--- /dev/null
+++ b/Eigen/src/Core/util/SymbolicIndex.h
@@ -0,0 +1,293 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SYMBOLIC_INDEX_H
+#define EIGEN_SYMBOLIC_INDEX_H
+
+namespace Eigen {
+
+/** \namespace Eigen::symbolic
+ * \ingroup Core_Module
+ *
+ * This namespace defines a set of classes and functions to build and evaluate symbolic expressions of scalar type Index.
+ * Here is a simple example:
+ *
+ * \code
+ * // First step, defines symbols:
+ * struct x_tag {}; static const symbolic::SymbolExpr<x_tag> x;
+ * struct y_tag {}; static const symbolic::SymbolExpr<y_tag> y;
+ * struct z_tag {}; static const symbolic::SymbolExpr<z_tag> z;
+ *
+ * // Defines an expression:
+ * auto expr = (x+3)/y+z;
+ *
+ * // And evaluate it: (c++14)
+ * std::cout << expr.eval(x=6,y=3,z=-13) << "\n";
+ *
+ * // In c++98/11, only one symbol per expression is supported for now:
+ * auto expr98 = (3-x)/2;
+ * std::cout << expr98.eval(x=6) << "\n";
+ * \endcode
+ *
+ * It is currently only used internally to define and manipulate the Eigen::last and Eigen::lastp1 symbols in Eigen::seq and Eigen::seqN.
+ *
+ */
+namespace symbolic {
+
+template<typename Tag> class Symbol;
+template<typename Arg0> class NegateExpr;
+template<typename Arg1,typename Arg2> class AddExpr;
+template<typename Arg1,typename Arg2> class ProductExpr;
+template<typename Arg1,typename Arg2> class QuotientExpr;
+
+// A simple wrapper around an integral value to provide the eval method.
+// We could also use a free-function symbolic_eval...
+template<typename IndexType=Index>
+class ValueExpr {
+public:
+ ValueExpr(IndexType val) : m_value(val) {}
+ template<typename T>
+ IndexType eval_impl(const T&) const { return m_value; }
+protected:
+ IndexType m_value;
+};
+
+// Specialization for compile-time value,
+// It is similar to ValueExpr(N) but this version helps the compiler to generate better code.
+template<int N>
+class ValueExpr<internal::FixedInt<N> > {
+public:
+ ValueExpr() {}
+ template<typename T>
+ EIGEN_CONSTEXPR Index eval_impl(const T&) const { return N; }
+};
+
+
+/** \class BaseExpr
+ * \ingroup Core_Module
+ * Common base class of any symbolic expressions
+ */
+template<typename Derived>
+class BaseExpr
+{
+public:
+ const Derived& derived() const { return *static_cast<const Derived*>(this); }
+
+ /** Evaluate the expression given the \a values of the symbols.
+ *
+ * \param values defines the values of the symbols, it can either be a SymbolValue or a std::tuple of SymbolValue
+ * as constructed by SymbolExpr::operator= operator.
+ *
+ */
+ template<typename T>
+ Index eval(const T& values) const { return derived().eval_impl(values); }
+
+#if EIGEN_HAS_CXX14
+ template<typename... Types>
+ Index eval(Types&&... values) const { return derived().eval_impl(std::make_tuple(values...)); }
+#endif
+
+ NegateExpr<Derived> operator-() const { return NegateExpr<Derived>(derived()); }
+
+ AddExpr<Derived,ValueExpr<> > operator+(Index b) const
+ { return AddExpr<Derived,ValueExpr<> >(derived(), b); }
+ AddExpr<Derived,ValueExpr<> > operator-(Index a) const
+ { return AddExpr<Derived,ValueExpr<> >(derived(), -a); }
+ ProductExpr<Derived,ValueExpr<> > operator*(Index a) const
+ { return ProductExpr<Derived,ValueExpr<> >(derived(),a); }
+ QuotientExpr<Derived,ValueExpr<> > operator/(Index a) const
+ { return QuotientExpr<Derived,ValueExpr<> >(derived(),a); }
+
+ friend AddExpr<Derived,ValueExpr<> > operator+(Index a, const BaseExpr& b)
+ { return AddExpr<Derived,ValueExpr<> >(b.derived(), a); }
+ friend AddExpr<NegateExpr<Derived>,ValueExpr<> > operator-(Index a, const BaseExpr& b)
+ { return AddExpr<NegateExpr<Derived>,ValueExpr<> >(-b.derived(), a); }
+ friend ProductExpr<ValueExpr<>,Derived> operator*(Index a, const BaseExpr& b)
+ { return ProductExpr<ValueExpr<>,Derived>(a,b.derived()); }
+ friend QuotientExpr<ValueExpr<>,Derived> operator/(Index a, const BaseExpr& b)
+ { return QuotientExpr<ValueExpr<>,Derived>(a,b.derived()); }
+
+ template<int N>
+ AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>) const
+ { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); }
+ template<int N>
+ AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N>) const
+ { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); }
+ template<int N>
+ ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N>) const
+ { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }
+ template<int N>
+ QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N>) const
+ { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }
+
+ template<int N>
+ friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>, const BaseExpr& b)
+ { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); }
+ template<int N>
+ friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N>, const BaseExpr& b)
+ { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); }
+ template<int N>
+ friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N>, const BaseExpr& b)
+ { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }
+ template<int N>
+ friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N>, const BaseExpr& b)
+ { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }
+
+#if (!EIGEN_HAS_CXX14)
+ template<int N>
+ AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)()) const
+ { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); }
+ template<int N>
+ AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N> (*)()) const
+ { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); }
+ template<int N>
+ ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N> (*)()) const
+ { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }
+ template<int N>
+ QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N> (*)()) const
+ { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }
+
+ template<int N>
+ friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)(), const BaseExpr& b)
+ { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); }
+ template<int N>
+ friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N> (*)(), const BaseExpr& b)
+ { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); }
+ template<int N>
+ friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N> (*)(), const BaseExpr& b)
+ { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }
+ template<int N>
+ friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N> (*)(), const BaseExpr& b)
+ { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }
+#endif
+
+
+ template<typename OtherDerived>
+ AddExpr<Derived,OtherDerived> operator+(const BaseExpr<OtherDerived> &b) const
+ { return AddExpr<Derived,OtherDerived>(derived(), b.derived()); }
+
+ template<typename OtherDerived>
+ AddExpr<Derived,NegateExpr<OtherDerived> > operator-(const BaseExpr<OtherDerived> &b) const
+ { return AddExpr<Derived,NegateExpr<OtherDerived> >(derived(), -b.derived()); }
+
+ template<typename OtherDerived>
+ ProductExpr<Derived,OtherDerived> operator*(const BaseExpr<OtherDerived> &b) const
+ { return ProductExpr<Derived,OtherDerived>(derived(), b.derived()); }
+
+ template<typename OtherDerived>
+ QuotientExpr<Derived,OtherDerived> operator/(const BaseExpr<OtherDerived> &b) const
+ { return QuotientExpr<Derived,OtherDerived>(derived(), b.derived()); }
+};
+
+template<typename T>
+struct is_symbolic {
+ // BaseExpr has no conversion ctor, so we only have to check whether T can be statically cast to its base class BaseExpr<T>.
+ enum { value = internal::is_convertible<T,BaseExpr<T> >::value };
+};
+
+/** Represents the actual value of a symbol identified by its tag
+ *
+ * It is the return type of SymbolValue::operator=, and most of the time this is only way it is used.
+ */
+template<typename Tag>
+class SymbolValue
+{
+public:
+ /** Default constructor from the value \a val */
+ SymbolValue(Index val) : m_value(val) {}
+
+ /** \returns the stored value of the symbol */
+ Index value() const { return m_value; }
+protected:
+ Index m_value;
+};
+
+/** Expression of a symbol uniquely identified by the template parameter type \c tag */
+template<typename tag>
+class SymbolExpr : public BaseExpr<SymbolExpr<tag> >
+{
+public:
+ /** Alias to the template parameter \c tag */
+ typedef tag Tag;
+
+ SymbolExpr() {}
+
+ /** Associate the value \a val to the given symbol \c *this, uniquely identified by its \c Tag.
+ *
+ * The returned object should be passed to ExprBase::eval() to evaluate a given expression with this specified runtime-time value.
+ */
+ SymbolValue<Tag> operator=(Index val) const {
+ return SymbolValue<Tag>(val);
+ }
+
+ Index eval_impl(const SymbolValue<Tag> &values) const { return values.value(); }
+
+#if EIGEN_HAS_CXX14
+ // C++14 versions suitable for multiple symbols
+ template<typename... Types>
+ Index eval_impl(const std::tuple<Types...>& values) const { return std::get<SymbolValue<Tag> >(values).value(); }
+#endif
+};
+
+template<typename Arg0>
+class NegateExpr : public BaseExpr<NegateExpr<Arg0> >
+{
+public:
+ NegateExpr(const Arg0& arg0) : m_arg0(arg0) {}
+
+ template<typename T>
+ Index eval_impl(const T& values) const { return -m_arg0.eval_impl(values); }
+protected:
+ Arg0 m_arg0;
+};
+
+template<typename Arg0, typename Arg1>
+class AddExpr : public BaseExpr<AddExpr<Arg0,Arg1> >
+{
+public:
+ AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
+
+ template<typename T>
+ Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) + m_arg1.eval_impl(values); }
+protected:
+ Arg0 m_arg0;
+ Arg1 m_arg1;
+};
+
+template<typename Arg0, typename Arg1>
+class ProductExpr : public BaseExpr<ProductExpr<Arg0,Arg1> >
+{
+public:
+ ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
+
+ template<typename T>
+ Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) * m_arg1.eval_impl(values); }
+protected:
+ Arg0 m_arg0;
+ Arg1 m_arg1;
+};
+
+template<typename Arg0, typename Arg1>
+class QuotientExpr : public BaseExpr<QuotientExpr<Arg0,Arg1> >
+{
+public:
+ QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
+
+ template<typename T>
+ Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) / m_arg1.eval_impl(values); }
+protected:
+ Arg0 m_arg0;
+ Arg1 m_arg1;
+};
+
+} // end namespace symbolic
+
+} // end namespace Eigen
+
+#endif // EIGEN_SYMBOLIC_INDEX_H
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index ba5bd186d..71c32b8a1 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -34,6 +34,26 @@ inline IndexDest convert_index(const IndexSrc& idx) {
return IndexDest(idx);
}
+// true if T can be considered as an integral index (i.e., and integral type or enum)
+template<typename T> struct is_valid_index_type
+{
+ enum { value =
+#if EIGEN_HAS_TYPE_TRAITS
+ internal::is_integral<T>::value || std::is_enum<T>::value
+#elif EIGEN_COMP_MSVC
+ internal::is_integral<T>::value || __is_enum(T)
+#else
+ // without C++11, we use is_convertible to Index instead of is_integral in order to treat enums as Index.
+ internal::is_convertible<T,Index>::value && !internal::is_same<T,float>::value && !is_same<T,double>::value
+#endif
+ };
+};
+
+// true if both types are not valid index types
+template<typename RowIndices, typename ColIndices>
+struct valid_indexed_view_overload {
+ enum { value = !(internal::is_valid_index_type<RowIndices>::value && internal::is_valid_index_type<ColIndices>::value) };
+};
// promote_scalar_arg is an helper used in operation between an expression and a scalar, like:
// expression * scalar
@@ -90,6 +110,9 @@ class no_assignment_operator
{
private:
no_assignment_operator& operator=(const no_assignment_operator&);
+ protected:
+ EIGEN_DEFAULT_COPY_CONSTRUCTOR(no_assignment_operator)
+ EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(no_assignment_operator)
};
/** \internal return the index type with the largest number of bits */
@@ -106,19 +129,23 @@ struct promote_index_type
template<typename T, int Value> class variable_if_dynamic
{
public:
- EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic)
+ EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(variable_if_dynamic)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {}
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+ T value() { return T(Value); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+ operator T() const { return T(Value); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ void setValue(T v) const { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
};
template<typename T> class variable_if_dynamic<T, Dynamic>
{
T m_value;
- EIGEN_DEVICE_FUNC variable_if_dynamic() { eigen_assert(false); }
public:
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value = 0) EIGEN_NO_THROW : m_value(value) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return m_value; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
};
@@ -129,8 +156,10 @@ template<typename T, int Value> class variable_if_dynamicindex
public:
EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {}
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR
+ T value() { return T(Value); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ void setValue(T) {}
};
template<typename T> class variable_if_dynamicindex<T, DynamicIndex>
@@ -155,16 +184,7 @@ template<typename T> struct functor_traits
template<typename T> struct packet_traits;
-template<typename T> struct unpacket_traits
-{
- typedef T type;
- typedef T half;
- enum
- {
- size = 1,
- alignment = 1
- };
-};
+template<typename T> struct unpacket_traits;
template<int Size, typename PacketType,
bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>
@@ -383,7 +403,7 @@ template<typename T> struct plain_matrix_type_row_major
typedef Matrix<typename traits<T>::Scalar,
Rows,
Cols,
- (MaxCols==1&&MaxRows!=1) ? RowMajor : ColMajor,
+ (MaxCols==1&&MaxRows!=1) ? ColMajor : RowMajor,
MaxRows,
MaxCols
> type;
@@ -400,7 +420,7 @@ struct ref_selector
T const&,
const T
>::type type;
-
+
typedef typename conditional<
bool(traits<T>::Flags & NestByRefBit),
T &,
@@ -438,7 +458,7 @@ template<typename T, int n, typename PlainObject = typename plain_object_eval<T>
{
enum {
ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost,
- CoeffReadCost = evaluator<T>::CoeffReadCost, // NOTE What if an evaluator evaluate itself into a tempory?
+ CoeffReadCost = evaluator<T>::CoeffReadCost, // NOTE What if an evaluator evaluate itself into a temporary?
// Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1.
// This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON
// for all evaluator creating a temporary. This flag is then propagated by the parent evaluators.
@@ -579,14 +599,14 @@ template<typename ExpressionType, typename Scalar = typename ExpressionType::Sca
struct plain_row_type
{
typedef Matrix<Scalar, 1, ExpressionType::ColsAtCompileTime,
- ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType;
+ int(ExpressionType::PlainObject::Options) | int(RowMajor), 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType;
typedef Array<Scalar, 1, ExpressionType::ColsAtCompileTime,
- ExpressionType::PlainObject::Options | RowMajor, 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType;
+ int(ExpressionType::PlainObject::Options) | int(RowMajor), 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType;
typedef typename conditional<
is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
MatrixRowType,
- ArrayRowType
+ ArrayRowType
>::type type;
};
@@ -601,7 +621,7 @@ struct plain_col_type
typedef typename conditional<
is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
MatrixColType,
- ArrayColType
+ ArrayColType
>::type type;
};
@@ -617,7 +637,7 @@ struct plain_diag_type
typedef typename conditional<
is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value,
MatrixDiagType,
- ArrayDiagType
+ ArrayDiagType
>::type type;
};
@@ -654,24 +674,39 @@ template<typename T> struct is_diagonal<DiagonalWrapper<T> >
template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> >
{ enum { ret = true }; };
+
+template<typename T> struct is_identity
+{ enum { value = false }; };
+
+template<typename T> struct is_identity<CwiseNullaryOp<internal::scalar_identity_op<typename T::Scalar>, T> >
+{ enum { value = true }; };
+
+
template<typename S1, typename S2> struct glue_shapes;
template<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type; };
template<typename T1, typename T2>
-bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<has_direct_access<T1>::ret&&has_direct_access<T2>::ret, T1>::type * = 0)
+struct possibly_same_dense {
+ enum { value = has_direct_access<T1>::ret && has_direct_access<T2>::ret && is_same<typename T1::Scalar,typename T2::Scalar>::value };
+};
+
+template<typename T1, typename T2>
+EIGEN_DEVICE_FUNC
+bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<possibly_same_dense<T1,T2>::value>::type * = 0)
{
return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride());
}
template<typename T1, typename T2>
-bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_access<T1>::ret&&has_direct_access<T2>::ret), T1>::type * = 0)
+EIGEN_DEVICE_FUNC
+bool is_same_dense(const T1 &, const T2 &, typename enable_if<!possibly_same_dense<T1,T2>::value>::type * = 0)
{
return false;
}
// Internal helper defining the cost of a scalar division for the type T.
// The default heuristic can be specialized for each scalar type and architecture.
-template<typename T,bool Vectorized=false,typename EnaleIf = void>
+template<typename T,bool Vectorized=false,typename EnableIf = void>
struct scalar_div_cost {
enum { value = 8*NumTraits<T>::MulCost };
};
@@ -718,7 +753,7 @@ std::string demangle_flags(int f)
if(f&DirectAccessBit) res += " | Direct";
if(f&NestByRefBit) res += " | NestByRef";
if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit";
-
+
return res;
}
#endif
@@ -815,7 +850,7 @@ struct ScalarBinaryOpTraits<void,void,BinaryOp>
#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
EIGEN_STATIC_ASSERT((Eigen::internal::has_ReturnType<ScalarBinaryOpTraits<LHS, RHS,BINOP> >::value), \
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-
+
} // end namespace Eigen
#endif // EIGEN_XPRHELPER_H