diff options
Diffstat (limited to 'Eigen/src')
31 files changed, 393 insertions, 208 deletions
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h index 0d34269fd..e10020d4f 100644 --- a/Eigen/src/Core/Array.h +++ b/Eigen/src/Core/Array.h @@ -231,10 +231,16 @@ class Array : Base(other) { } + private: + struct PrivateType {}; + public: + /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */ template<typename OtherDerived> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other) + EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other, + typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value, + PrivateType>::type = PrivateType()) : Base(other.derived()) { } diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index f0232f65e..3dbc7084c 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -175,7 +175,7 @@ template<typename Derived> class ArrayBase */ template<typename Derived> template<typename OtherDerived> -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other) { call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -188,7 +188,7 @@ ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other) */ template<typename Derived> template<typename OtherDerived> -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other) { call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -201,7 +201,7 @@ ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other) */ template<typename Derived> template<typename OtherDerived> -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other) { call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -214,7 +214,7 @@ ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other) */ template<typename Derived> template<typename OtherDerived> -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other) { call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>()); diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h index a04521a16..688aadd62 100644 --- a/Eigen/src/Core/ArrayWrapper.h +++ b/Eigen/src/Core/ArrayWrapper.h @@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> > // Let's remove NestByRefBit enum { Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } @@ -129,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> > // Let's remove NestByRefBit enum { Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index dd498f758..ddd607e38 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -105,7 +105,7 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp */ template<typename Derived> template<typename CustomNullaryOp> -EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) { return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func); @@ -150,7 +150,7 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func) */ template<typename Derived> template<typename CustomNullaryOp> -EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func) { return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func); @@ -192,7 +192,7 @@ DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value) * \sa class CwiseNullaryOp */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Constant(Index size, const Scalar& value) { return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value)); @@ -208,7 +208,7 @@ DenseBase<Derived>::Constant(Index size, const Scalar& value) * \sa class CwiseNullaryOp */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Constant(const Scalar& value) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) @@ -220,7 +220,7 @@ DenseBase<Derived>::Constant(const Scalar& value) * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&) */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -232,7 +232,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const * \sa LinSpaced(Scalar,Scalar) */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -264,7 +264,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -276,7 +276,7 @@ DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high) * Special version for fixed size types which does not require the size parameter. */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -286,7 +286,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high) /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ template<typename Derived> -bool DenseBase<Derived>::isApproxToConstant +EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant (const Scalar& val, const RealScalar& prec) const { typename internal::nested_eval<Derived,1>::type self(derived()); @@ -301,7 +301,7 @@ bool DenseBase<Derived>::isApproxToConstant * * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ template<typename Derived> -bool DenseBase<Derived>::isConstant +EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant (const Scalar& val, const RealScalar& prec) const { return isApproxToConstant(val, prec); @@ -312,7 +312,7 @@ bool DenseBase<Derived>::isConstant * \sa setConstant(), Constant(), class CwiseNullaryOp */ template<typename Derived> -EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val) { setConstant(val); } @@ -322,7 +322,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val) * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val) { return derived() = Constant(rows(), cols(), val); } @@ -337,7 +337,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val) * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val) { resize(size); @@ -356,7 +356,7 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val) * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val) { resize(rows, cols); @@ -380,7 +380,7 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val) * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize)); @@ -400,7 +400,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return setLinSpaced(size(), low, high); @@ -423,7 +423,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, * \sa Zero(), Zero(Index) */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Zero(Index rows, Index cols) { return Constant(rows, cols, Scalar(0)); @@ -446,7 +446,7 @@ DenseBase<Derived>::Zero(Index rows, Index cols) * \sa Zero(), Zero(Index,Index) */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Zero(Index size) { return Constant(size, Scalar(0)); @@ -463,7 +463,7 @@ DenseBase<Derived>::Zero(Index size) * \sa Zero(Index), Zero(Index,Index) */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Zero() { return Constant(Scalar(0)); @@ -478,7 +478,7 @@ DenseBase<Derived>::Zero() * \sa class CwiseNullaryOp, Zero() */ template<typename Derived> -bool DenseBase<Derived>::isZero(const RealScalar& prec) const +EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const { typename internal::nested_eval<Derived,1>::type self(derived()); for(Index j = 0; j < cols(); ++j) @@ -496,7 +496,7 @@ bool DenseBase<Derived>::isZero(const RealScalar& prec) const * \sa class CwiseNullaryOp, Zero() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero() { return setConstant(Scalar(0)); } @@ -511,7 +511,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero() * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setZero(Index newSize) { resize(newSize); @@ -529,7 +529,7 @@ PlainObjectBase<Derived>::setZero(Index newSize) * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setZero(Index rows, Index cols) { resize(rows, cols); @@ -553,7 +553,7 @@ PlainObjectBase<Derived>::setZero(Index rows, Index cols) * \sa Ones(), Ones(Index), isOnes(), class Ones */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Ones(Index rows, Index cols) { return Constant(rows, cols, Scalar(1)); @@ -576,7 +576,7 @@ DenseBase<Derived>::Ones(Index rows, Index cols) * \sa Ones(), Ones(Index,Index), isOnes(), class Ones */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Ones(Index newSize) { return Constant(newSize, Scalar(1)); @@ -593,7 +593,7 @@ DenseBase<Derived>::Ones(Index newSize) * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Ones() { return Constant(Scalar(1)); @@ -608,7 +608,7 @@ DenseBase<Derived>::Ones() * \sa class CwiseNullaryOp, Ones() */ template<typename Derived> -bool DenseBase<Derived>::isOnes +EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes (const RealScalar& prec) const { return isApproxToConstant(Scalar(1), prec); @@ -622,7 +622,7 @@ bool DenseBase<Derived>::isOnes * \sa class CwiseNullaryOp, Ones() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes() { return setConstant(Scalar(1)); } @@ -637,7 +637,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes() * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setOnes(Index newSize) { resize(newSize); @@ -655,7 +655,7 @@ PlainObjectBase<Derived>::setOnes(Index newSize) * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setOnes(Index rows, Index cols) { resize(rows, cols); @@ -679,7 +679,7 @@ PlainObjectBase<Derived>::setOnes(Index rows, Index cols) * \sa Identity(), setIdentity(), isIdentity() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType MatrixBase<Derived>::Identity(Index rows, Index cols) { return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>()); @@ -696,7 +696,7 @@ MatrixBase<Derived>::Identity(Index rows, Index cols) * \sa Identity(Index,Index), setIdentity(), isIdentity() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType MatrixBase<Derived>::Identity() { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) @@ -771,7 +771,7 @@ struct setIdentity_impl<Derived, true> * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity() { return internal::setIdentity_impl<Derived>::run(derived()); } @@ -787,7 +787,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity() * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols) { derived().resize(rows, cols); return setIdentity(); @@ -800,7 +800,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i); @@ -815,7 +815,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return BasisReturnType(SquareMatrixType::Identity(),i); @@ -828,7 +828,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX() { return Derived::Unit(0); } /** \returns an expression of the Y axis unit vector (0,1{,0}^*) @@ -838,7 +838,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY() { return Derived::Unit(1); } /** \returns an expression of the Z axis unit vector (0,0,1{,0}^*) @@ -848,7 +848,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ() { return Derived::Unit(2); } /** \returns an expression of the W axis unit vector (0,0,0,1) @@ -858,7 +858,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW() { return Derived::Unit(3); } } // end namespace Eigen diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 46fe5193c..90066ae73 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -296,7 +296,7 @@ template<typename Derived> class DenseBase EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue<OtherDerived>& func); - /** \ínternal + /** \internal * Copies \a other into *this without evaluating other. \returns a reference to *this. * \deprecated */ template<typename OtherDerived> @@ -484,9 +484,9 @@ template<typename Derived> class DenseBase return derived().coeff(0,0); } - bool all() const; - bool any() const; - Index count() const; + EIGEN_DEVICE_FUNC bool all() const; + EIGEN_DEVICE_FUNC bool any() const; + EIGEN_DEVICE_FUNC Index count() const; typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType; typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType; diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index f76995af9..b195506a9 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -14,6 +14,7 @@ namespace Eigen { /** \class EigenBase + * \ingroup Core_Module * * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). * @@ -128,6 +129,7 @@ template<typename Derived> struct EigenBase */ template<typename Derived> template<typename OtherDerived> +EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other) { call_assignment(derived(), other.derived()); @@ -136,6 +138,7 @@ Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other) template<typename Derived> template<typename OtherDerived> +EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other) { call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -144,6 +147,7 @@ Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other) template<typename Derived> template<typename OtherDerived> +EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other) { call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>()); diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 27033a2dd..029f8ac36 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -230,7 +230,7 @@ pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>( * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]} * Currently, this function is only used for scalar * complex products. */ -template<typename Packet> EIGEN_DEVICE_FUNC inline Packet +template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; } /** \internal \returns a packet with elements of \a *from quadrupled. @@ -278,7 +278,7 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a, } /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ -template<typename Packet> inline Packet +template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits<Packet>::type& a) { return a; } /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ @@ -482,7 +482,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& fro * by the current computation. */ template<typename Packet, int LoadMode> -inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) { return ploadt<Packet, LoadMode>(from); } diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 8d47fb8a4..a648aa0fa 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -1061,11 +1061,24 @@ double log(const double &x) { return ::log(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -typename NumTraits<T>::Real abs(const T &x) { +typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type +abs(const T &x) { EIGEN_USING_STD_MATH(abs); return abs(x); } +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type +abs(const T &x) { + return x; +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); } +EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float abs(const float &x) { return ::fabsf(x); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index f7cf04cde..ce412180a 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -294,7 +294,7 @@ template<typename Derived> class MatrixBase * fuzzy comparison such as isApprox() * \sa isApprox(), operator!= */ template<typename OtherDerived> - inline bool operator==(const MatrixBase<OtherDerived>& other) const + EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const { return cwiseEqual(other).all(); } /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other. @@ -302,7 +302,7 @@ template<typename Derived> class MatrixBase * fuzzy comparison such as isApprox() * \sa isApprox(), operator== */ template<typename OtherDerived> - inline bool operator!=(const MatrixBase<OtherDerived>& other) const + EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const { return cwiseNotEqual(other).any(); } NoAlias<Derived,Eigen::MatrixBase > noalias(); diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index dd61195bc..daf489878 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -215,6 +215,8 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); } EIGEN_DEVICE_FUNC static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); } + + static inline int digits10() { return NumTraits<Scalar>::digits10(); } }; template<> struct NumTraits<std::string> diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 583b7f59e..c42725dbd 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -207,6 +207,12 @@ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename static const bool value = true; }; +template<typename OtherXpr, typename Lhs, typename Rhs> +struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr, + const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > { + static const bool value = true; +}; + template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2> struct assignment_from_xpr_op_product { diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h index 719ed72a5..50099df82 100644 --- a/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -15,7 +15,7 @@ namespace Eigen { // TODO generalize the scalar type of 'other' template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>()); @@ -23,7 +23,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) } template<typename Derived> -EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>()); @@ -31,7 +31,7 @@ EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other) } template<typename Derived> -EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>()); @@ -39,7 +39,7 @@ EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other) } template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>()); diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index 960a58597..a8daea511 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -34,12 +34,12 @@ template<typename Decomposition, typename RhsType,typename StorageKind> struct s template<typename Decomposition, typename RhsType> struct solve_traits<Decomposition,RhsType,Dense> { - typedef Matrix<typename RhsType::Scalar, + typedef typename make_proper_matrix_type<typename RhsType::Scalar, Decomposition::ColsAtCompileTime, RhsType::ColsAtCompileTime, RhsType::PlainObject::Options, Decomposition::MaxColsAtCompileTime, - RhsType::MaxColsAtCompileTime> PlainObject; + RhsType::MaxColsAtCompileTime>::type PlainObject; }; template<typename Decomposition, typename RhsType> diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index d2fe1e199..be04ed44d 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -170,7 +170,8 @@ MatrixBase<Derived>::stableNorm() const enum { CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit) || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough - ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT) // ifwe cannot allocate on the stack, then let's not bother about this optimization + ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT) + && (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization }; typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>, typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper; diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 52892db38..294c517ea 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -13,7 +13,7 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, @@ -474,9 +474,59 @@ template<> struct is_arithmetic<half> { enum { value = true }; }; } // end namespace internal +} // end namespace Eigen + +namespace std { +template<> +struct numeric_limits<Eigen::half> { + static const bool is_specialized = true; + static const bool is_signed = true; + static const bool is_integer = false; + static const bool is_exact = false; + static const bool has_infinity = true; + static const bool has_quiet_NaN = true; + static const bool has_signaling_NaN = true; + static const float_denorm_style has_denorm = denorm_present; + static const bool has_denorm_loss = false; + static const std::float_round_style round_style = std::round_to_nearest; + static const bool is_iec559 = false; + static const bool is_bounded = false; + static const bool is_modulo = false; + static const int digits = 11; + static const int digits10 = 2; + //static const int max_digits10 = ; + static const int radix = 2; + static const int min_exponent = -13; + static const int min_exponent10 = -4; + static const int max_exponent = 16; + static const int max_exponent10 = 4; + static const bool traps = true; + static const bool tinyness_before = false; + + static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); } + static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); } + static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); } + static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); } + static Eigen::half round_error() { return Eigen::half(0.5); } + static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); } + static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); } +}; +} + +namespace Eigen { + template<> struct NumTraits<Eigen::half> : GenericNumTraits<Eigen::half> { + enum { + IsSigned = true, + IsInteger = false, + IsComplex = false, + RequireInitialization = false + }; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { return half_impl::raw_uint16_to_half(0x0800); } diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index ad66399e0..4dda63188 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -291,7 +291,7 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) { EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<float4,4>& kernel) { - double tmp = kernel.packet[0].y; + float tmp = kernel.packet[0].y; kernel.packet[0].y = kernel.packet[1].x; kernel.packet[1].x = tmp; diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 84a56bdcc..836fbc0dd 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -51,14 +51,17 @@ typedef uint32x4_t Packet4ui; #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ const Packet4i p4i_##NAME = pset1<Packet4i>(X) -// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function -// which available on LLVM and GCC (at least) -#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC +#if EIGEN_ARCH_ARM64 + // __builtin_prefetch tends to do nothing on ARM64 compilers because the + // prefetch instructions there are too detailed for __builtin_prefetch to map + // meaningfully to them. + #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : ); +#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR); #elif defined __pld #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR) -#elif !EIGEN_ARCH_ARM64 - #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); +#elif EIGEN_ARCH_ARM32 + #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : ); #else // by default no explicit prefetching #define EIGEN_ARM_PREFETCH(ADDR) @@ -113,7 +116,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { - const float32_t f[] = {0, 1, 2, 3}; + const float f[] = {0, 1, 2, 3}; Packet4f countdown = vld1q_f32(f); return vaddq_f32(pset1<Packet4f>(a), countdown); } diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 6a30466fb..b03be0269 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -44,16 +44,16 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> { linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), - m_interPacket(plset<Packet>(0)), m_flip(numext::abs(high)<numext::abs(low)) {} template<typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { + typedef typename NumTraits<Scalar>::Real RealScalar; if(m_flip) - return (i==0)? m_low : (m_high - (m_size1-i)*m_step); + return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step); else - return (i==m_size1)? m_high : (m_low + i*m_step); + return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step); } template<typename IndexType> @@ -63,7 +63,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) if(m_flip) { - Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket); + Packet pi = plset<Packet>(Scalar(i-m_size1)); Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi)); if(i==0) res = pinsertfirst(res, m_low); @@ -71,7 +71,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> } else { - Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket); + Packet pi = plset<Packet>(Scalar(i)); Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi)); if(i==m_size1-unpacket_traits<Packet>::size+1) res = pinsertlast(res, m_high); @@ -83,7 +83,6 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> const Scalar m_high; const Index m_size1; const Scalar m_step; - const Packet m_interPacket; const bool m_flip; }; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 7122efa60..e844e37d1 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -269,10 +269,13 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0, LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0, - RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0 + RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0, + SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0 }; Index size = mat.cols(); + if(SkipDiag) + size--; Index depth = actualLhs.cols(); typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar, @@ -283,10 +286,11 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> internal::general_matrix_matrix_triangular_product<Index, typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, - IsRowMajor ? RowMajor : ColMajor, UpLo> + IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)> ::run(size, depth, - &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), - mat.data(), mat.outerStride(), actualAlpha, blocking); + &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(), + &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(), + mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking); } }; @@ -294,6 +298,7 @@ template<typename MatrixType, unsigned int UpLo> template<typename ProductType> TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) { + EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED); eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta); diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h index 5b7c15cca..41e18ff07 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h @@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \ { \ - if (lhs==rhs) { \ + if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \ general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \ } else { \ diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 427d3cd6b..38d6ddb9a 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 3 -#define EIGEN_MINOR_VERSION 3 +#define EIGEN_MINOR_VERSION 4 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index f6ef1bcf6..3e5a9badb 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -423,7 +423,7 @@ typedef Map<Quaternion<double>, Aligned> QuaternionMapAlignedd; // Generic Quaternion * Quaternion product // This product can be specialized for a given architecture via the Arch template argument. namespace internal { -template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product +template<int Arch, class Derived1, class Derived2, typename Scalar> struct quat_product { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){ return Quaternion<Scalar> @@ -446,8 +446,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) return internal::quat_product<Architecture::Target, Derived, OtherDerived, - typename internal::traits<Derived>::Scalar, - EIGEN_PLAIN_ENUM_MIN(internal::traits<Derived>::Alignment, internal::traits<OtherDerived>::Alignment)>::run(*this, other); + typename internal::traits<Derived>::Scalar>::run(*this, other); } /** \sa operator*(Quaternion) */ @@ -672,7 +671,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> // Generic conjugate of a Quaternion namespace internal { -template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj +template<int Arch, class Derived, typename Scalar> struct quat_conj { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){ return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z()); @@ -691,8 +690,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::conjugate() const { return internal::quat_conj<Architecture::Target, Derived, - typename internal::traits<Derived>::Scalar, - internal::traits<Derived>::Alignment>::run(*this); + typename internal::traits<Derived>::Scalar>::run(*this); } diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index 1a86ff837..f68cab583 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -16,17 +16,23 @@ namespace Eigen { namespace internal { template<class Derived, class OtherDerived> -struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16> +struct quat_product<Architecture::SSE, Derived, OtherDerived, float> { + enum { + AAlignment = traits<Derived>::Alignment, + BAlignment = traits<OtherDerived>::Alignment, + ResAlignment = traits<Quaternion<float> >::Alignment + }; static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) { Quaternion<float> res; const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f); - __m128 a = _a.coeffs().template packet<Aligned16>(0); - __m128 b = _b.coeffs().template packet<Aligned16>(0); + __m128 a = _a.coeffs().template packet<AAlignment>(0); + __m128 b = _b.coeffs().template packet<BAlignment>(0); __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); - pstore(&res.x(), + pstoret<float,Packet4f,ResAlignment>( + &res.x(), _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)), _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0), vec4f_swizzle1(b,1,2,0,0))), @@ -36,14 +42,17 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16> } }; -template<class Derived, int Alignment> -struct quat_conj<Architecture::SSE, Derived, float, Alignment> +template<class Derived> +struct quat_conj<Architecture::SSE, Derived, float> { + enum { + ResAlignment = traits<Quaternion<float> >::Alignment + }; static inline Quaternion<float> run(const QuaternionBase<Derived>& q) { Quaternion<float> res; const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f); - pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<Alignment>(0))); + pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0))); return res; } }; @@ -52,6 +61,9 @@ struct quat_conj<Architecture::SSE, Derived, float, Alignment> template<typename VectorLhs,typename VectorRhs> struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> { + enum { + ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment + }; static inline typename plain_matrix_type<VectorLhs>::type run(const VectorLhs& lhs, const VectorRhs& rhs) { @@ -60,7 +72,7 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); typename plain_matrix_type<VectorLhs>::type res; - pstore(&res.x(),_mm_sub_ps(mul1,mul2)); + pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2)); return res; } }; @@ -68,9 +80,14 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> -template<class Derived, class OtherDerived, int Alignment> -struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> +template<class Derived, class OtherDerived> +struct quat_product<Architecture::SSE, Derived, OtherDerived, double> { + enum { + BAlignment = traits<OtherDerived>::Alignment, + ResAlignment = traits<Quaternion<double> >::Alignment + }; + static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) { const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); @@ -78,8 +95,8 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> Quaternion<double> res; const double* a = _a.coeffs().data(); - Packet2d b_xy = _b.coeffs().template packet<Alignment>(0); - Packet2d b_zw = _b.coeffs().template packet<Alignment>(2); + Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0); + Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2); Packet2d a_xx = pset1<Packet2d>(a[0]); Packet2d a_yy = pset1<Packet2d>(a[1]); Packet2d a_zz = pset1<Packet2d>(a[2]); @@ -97,9 +114,9 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw)); #ifdef EIGEN_VECTORIZE_SSE3 EIGEN_UNUSED_VARIABLE(mask) - pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2))); + pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2))); #else - pstore(&res.x(), padd(t1, pxor(mask,preverse(t2)))); + pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2)))); #endif /* @@ -111,25 +128,28 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy)); #ifdef EIGEN_VECTORIZE_SSE3 EIGEN_UNUSED_VARIABLE(mask) - pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); + pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); #else - pstore(&res.z(), psub(t1, pxor(mask,preverse(t2)))); + pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2)))); #endif return res; } }; -template<class Derived, int Alignment> -struct quat_conj<Architecture::SSE, Derived, double, Alignment> +template<class Derived> +struct quat_conj<Architecture::SSE, Derived, double> { + enum { + ResAlignment = traits<Quaternion<double> >::Alignment + }; static inline Quaternion<double> run(const QuaternionBase<Derived>& q) { Quaternion<double> res; const __m128d mask0 = _mm_setr_pd(-0.,-0.); const __m128d mask2 = _mm_setr_pd(-0.,0.); - pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<Alignment>(0))); - pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<Alignment>(2))); + pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0))); + pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2))); return res; } }; diff --git a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index 358444aff..facdaf890 100644 --- a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -152,13 +152,28 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar> { // Compute the inverse squared-norm of each column of mat m_invdiag.resize(mat.cols()); - for(Index j=0; j<mat.outerSize(); ++j) + if(MatType::IsRowMajor) { - RealScalar sum = mat.innerVector(j).squaredNorm(); - if(sum>0) - m_invdiag(j) = RealScalar(1)/sum; - else - m_invdiag(j) = RealScalar(1); + m_invdiag.setZero(); + for(Index j=0; j<mat.outerSize(); ++j) + { + for(typename MatType::InnerIterator it(mat,j); it; ++it) + m_invdiag(it.index()) += numext::abs2(it.value()); + } + for(Index j=0; j<mat.cols(); ++j) + if(numext::real(m_invdiag(j))>RealScalar(0)) + m_invdiag(j) = RealScalar(1)/numext::real(m_invdiag(j)); + } + else + { + for(Index j=0; j<mat.outerSize(); ++j) + { + RealScalar sum = mat.innerVector(j).squaredNorm(); + if(sum>RealScalar(0)) + m_invdiag(j) = RealScalar(1)/sum; + else + m_invdiag(j) = RealScalar(1); + } } Base::m_isInitialized = true; return *this; diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h index d25af8e90..c30326e1d 100644 --- a/Eigen/src/Jacobi/Jacobi.h +++ b/Eigen/src/Jacobi/Jacobi.h @@ -302,8 +302,12 @@ template<typename VectorX, typename VectorY, typename OtherScalar> void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j) { typedef typename VectorX::Scalar Scalar; - enum { PacketSize = packet_traits<Scalar>::size }; + enum { + PacketSize = packet_traits<Scalar>::size, + OtherPacketSize = packet_traits<OtherScalar>::size + }; typedef typename packet_traits<Scalar>::type Packet; + typedef typename packet_traits<OtherScalar>::type OtherPacket; eigen_assert(xpr_x.size() == xpr_y.size()); Index size = xpr_x.size(); Index incrx = xpr_x.derived().innerStride(); @@ -321,6 +325,7 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x if(VectorX::SizeAtCompileTime == Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && + (PacketSize == OtherPacketSize) && ((incrx==1 && incry==1) || PacketSize == 1)) { // both vectors are sequentially stored in memory => vectorization @@ -329,9 +334,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x Index alignedStart = internal::first_default_aligned(y, size); Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; - const Packet pc = pset1<Packet>(c); - const Packet ps = pset1<Packet>(s); - conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; + const OtherPacket pc = pset1<OtherPacket>(c); + const OtherPacket ps = pset1<OtherPacket>(s); + conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj; + conj_helper<OtherPacket,Packet,false,false> pm; for(Index i=0; i<alignedStart; ++i) { @@ -350,8 +356,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x { Packet xi = pload<Packet>(px); Packet yi = pload<Packet>(py); - pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); px += PacketSize; py += PacketSize; } @@ -365,10 +371,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x Packet xi1 = ploadu<Packet>(px+PacketSize); Packet yi = pload <Packet>(py); Packet yi1 = pload <Packet>(py+PacketSize); - pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1))); - pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); - pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1))); + pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1))); + pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1))); px += Peeling*PacketSize; py += Peeling*PacketSize; } @@ -376,8 +382,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x { Packet xi = ploadu<Packet>(x+peelingEnd); Packet yi = pload <Packet>(y+peelingEnd); - pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); } } @@ -393,19 +399,21 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x /*** fixed-size vectorized path ***/ else if(VectorX::SizeAtCompileTime != Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && + (PacketSize == OtherPacketSize) && (EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment { - const Packet pc = pset1<Packet>(c); - const Packet ps = pset1<Packet>(s); - conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; + const OtherPacket pc = pset1<OtherPacket>(c); + const OtherPacket ps = pset1<OtherPacket>(s); + conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj; + conj_helper<OtherPacket,Packet,false,false> pm; Scalar* EIGEN_RESTRICT px = x; Scalar* EIGEN_RESTRICT py = y; for(Index i=0; i<size; i+=PacketSize) { Packet xi = pload<Packet>(px); Packet yi = pload<Packet>(py); - pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); px += PacketSize; py += PacketSize; } diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h index 933cd564b..da85b4d6e 100644 --- a/Eigen/src/OrderingMethods/Eigen_Colamd.h +++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -1004,7 +1004,7 @@ static IndexType find_ordering /* return the number of garbage collections */ COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ; /* get pivot column from head of minimum degree list */ - while (head [min_score] == COLAMD_EMPTY && min_score < n_col) + while (min_score < n_col && head [min_score] == COLAMD_EMPTY) { min_score++ ; } diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index 0e47c8332..a7b47d55d 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -506,8 +506,8 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace() m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k); } - RealScalar threshold_helper = numext::abs2<Scalar>(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows); - RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon()); + RealScalar threshold_helper = numext::abs2<RealScalar>(m_colNormsUpdated.maxCoeff() * NumTraits<RealScalar>::epsilon()) / RealScalar(rows); + RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<RealScalar>::epsilon()); m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) m_maxpivot = RealScalar(0); @@ -553,12 +553,12 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace() // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf // and used in LAPACK routines xGEQPF and xGEQP3. // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html - if (m_colNormsUpdated.coeffRef(j) != 0) { + if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) { RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j); temp = (RealScalar(1) + temp) * (RealScalar(1) - temp); - temp = temp < 0 ? 0 : temp; - RealScalar temp2 = temp * numext::abs2<Scalar>(m_colNormsUpdated.coeffRef(j) / - m_colNormsDirect.coeffRef(j)); + temp = temp < RealScalar(0) ? RealScalar(0) : temp; + RealScalar temp2 = temp * numext::abs2<RealScalar>(m_colNormsUpdated.coeffRef(j) / + m_colNormsDirect.coeffRef(j)); if (temp2 <= norm_downdate_threshold) { // The updated norm has become too inaccurate so re-compute the column // norm directly. diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index 25fca6f4d..d7a4271cb 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h @@ -77,6 +77,7 @@ public: typedef _MatrixType MatrixType; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; + typedef typename NumTraits<RealScalar>::Literal Literal; enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, @@ -259,7 +260,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows RealScalar scale = matrix.cwiseAbs().maxCoeff(); - if(scale==RealScalar(0)) scale = RealScalar(1); + if(scale==Literal(0)) scale = Literal(1); MatrixX copy; if (m_isTranspose) copy = matrix.adjoint()/scale; else copy = matrix/scale; @@ -351,13 +352,13 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co Index k1=0, k2=0; for(Index j=0; j<n; ++j) { - if( (A.col(j).head(n1).array()!=0).any() ) + if( (A.col(j).head(n1).array()!=Literal(0)).any() ) { A1.col(k1) = A.col(j).head(n1); B1.row(k1) = B.row(j); ++k1; } - if( (A.col(j).tail(n2).array()!=0).any() ) + if( (A.col(j).tail(n2).array()!=Literal(0)).any() ) { A2.col(k2) = A.col(j).tail(n2); B2.row(k2) = B.row(j); @@ -449,11 +450,11 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, l = m_naiveU.row(1).segment(firstCol, k); f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1); } - if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1; + if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1); if (r0<considerZero) { - c0 = 1; - s0 = 0; + c0 = Literal(1); + s0 = Literal(0); } else { @@ -574,7 +575,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n); m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal(); ArrayRef diag = m_workspace.head(n); - diag(0) = 0; + diag(0) = Literal(0); // Allocate space for singular values and vectors singVals.resize(n); @@ -590,7 +591,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec // but others are interleaved and we must ignore them at this stage. // To this end, let's compute a permutation skipping them: Index actual_n = n; - while(actual_n>1 && diag(actual_n-1)==0) --actual_n; + while(actual_n>1 && diag(actual_n-1)==Literal(0)) --actual_n; Index m = 0; // size of the deflated problem for(Index k=0;k<actual_n;++k) if(abs(col0(k))>considerZero) @@ -691,7 +692,7 @@ template <typename MatrixType> typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift) { Index m = perm.size(); - RealScalar res = 1; + RealScalar res = Literal(1); for(Index i=0; i<m; ++i) { Index j = perm(i); @@ -710,16 +711,16 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d Index n = col0.size(); Index actual_n = n; - while(actual_n>1 && col0(actual_n-1)==0) --actual_n; + while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n; for (Index k = 0; k < n; ++k) { - if (col0(k) == 0 || actual_n==1) + if (col0(k) == Literal(0) || actual_n==1) { // if col0(k) == 0, then entry is deflated, so singular value is on diagonal // if actual_n==1, then the deflated problem is already diagonalized singVals(k) = k==0 ? col0(0) : diag(k); - mus(k) = 0; + mus(k) = Literal(0); shifts(k) = k==0 ? col0(0) : diag(k); continue; } @@ -733,13 +734,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d { // Skip deflated singular values Index l = k+1; - while(col0(l)==0) { ++l; eigen_internal_assert(l<actual_n); } + while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); } right = diag(l); } // first decide whether it's closer to the left end or the right end - RealScalar mid = left + (right-left) / 2; - RealScalar fMid = secularEq(mid, col0, diag, perm, diag, 0); + RealScalar mid = left + (right-left) / Literal(2); + RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(0)); #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << right-left << "\n"; std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right) << "\n"; @@ -755,7 +756,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d << " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0) << " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n"; #endif - RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right; + RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right; // measure everything relative to shift Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n); @@ -785,13 +786,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d // rational interpolation: fit a function of the form a / mu + b through the two previous // iterates and use its zero to compute the next iterate - bool useBisection = fPrev*fCur>0; - while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection) + bool useBisection = fPrev*fCur>Literal(0); + while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection) { ++m_numIters; // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples. - RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev); + RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev); RealScalar b = fCur - a / muCur; // And find mu such that f(mu)==0: RealScalar muZero = -a/b; @@ -803,8 +804,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d fCur = fZero; - if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true; - if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true; + if (shift == left && (muCur < Literal(0) || muCur > right - left)) useBisection = true; + if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true; if (abs(fCur)>abs(fPrev)) useBisection = true; } @@ -841,13 +842,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n"; } #endif - eigen_internal_assert(fLeft * fRight < 0); + eigen_internal_assert(fLeft * fRight < Literal(0)); - while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted))) + while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted))) { - RealScalar midShifted = (leftShifted + rightShifted) / 2; + RealScalar midShifted = (leftShifted + rightShifted) / Literal(2); fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift); - if (fLeft * fMid < 0) + if (fLeft * fMid < Literal(0)) { rightShifted = midShifted; } @@ -858,7 +859,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d } } - muCur = (leftShifted + rightShifted) / 2; + muCur = (leftShifted + rightShifted) / Literal(2); } singVals[k] = shift + muCur; @@ -892,8 +893,8 @@ void BDCSVD<MatrixType>::perturbCol0 // The offset permits to skip deflated entries while computing zhat for (Index k = 0; k < n; ++k) { - if (col0(k) == 0) // deflated - zhat(k) = 0; + if (col0(k) == Literal(0)) // deflated + zhat(k) = Literal(0); else { // see equation (3.6) @@ -918,7 +919,7 @@ void BDCSVD<MatrixType>::perturbCol0 std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n"; #endif RealScalar tmp = sqrt(prod); - zhat(k) = col0(k) > 0 ? tmp : -tmp; + zhat(k) = col0(k) > Literal(0) ? tmp : -tmp; } } } @@ -934,7 +935,7 @@ void BDCSVD<MatrixType>::computeSingVecs for (Index k = 0; k < n; ++k) { - if (zhat(k) == 0) + if (zhat(k) == Literal(0)) { U.col(k) = VectorType::Unit(n+1, k); if (m_compV) V.col(k) = VectorType::Unit(n, k); @@ -947,7 +948,7 @@ void BDCSVD<MatrixType>::computeSingVecs Index i = perm(l); U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); } - U(n,k) = 0; + U(n,k) = Literal(0); U.col(k).normalize(); if (m_compV) @@ -958,7 +959,7 @@ void BDCSVD<MatrixType>::computeSingVecs Index i = perm(l); V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); } - V(0,k) = -1; + V(0,k) = Literal(-1); V.col(k).normalize(); } } @@ -980,14 +981,14 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index RealScalar c = m_computed(start, start); RealScalar s = m_computed(start+i, start); RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); - if (r == 0) + if (r == Literal(0)) { - m_computed(start+i, start+i) = 0; + m_computed(start+i, start+i) = Literal(0); return; } m_computed(start,start) = r; - m_computed(start+i, start) = 0; - m_computed(start+i, start+i) = 0; + m_computed(start+i, start) = Literal(0); + m_computed(start+i, start+i) = Literal(0); JacobiRotation<RealScalar> J(c/r,-s/r); if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J); @@ -1020,7 +1021,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi << m_computed(firstColm + i+1, firstColm+i+1) << " " << m_computed(firstColm + i+2, firstColm+i+2) << "\n"; #endif - if (r==0) + if (r==Literal(0)) { m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j); return; @@ -1029,7 +1030,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi s/=r; m_computed(firstColm + i, firstColm) = r; m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i); - m_computed(firstColm + j, firstColm) = 0; + m_computed(firstColm + j, firstColm) = Literal(0); JacobiRotation<RealScalar> J(c,-s); if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J); @@ -1053,7 +1054,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)(); RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff(); RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag); - RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag); + RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag); #ifdef EIGEN_BDCSVD_SANITY_CHECKS assert(m_naiveU.allFinite()); @@ -1081,7 +1082,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n"; #endif - col0(i) = 0; + col0(i) = Literal(0); } //condition 4.3 diff --git a/Eigen/src/SVD/UpperBidiagonalization.h b/Eigen/src/SVD/UpperBidiagonalization.h index 0b1460894..11ac847e1 100644 --- a/Eigen/src/SVD/UpperBidiagonalization.h +++ b/Eigen/src/SVD/UpperBidiagonalization.h @@ -159,6 +159,8 @@ void upperbidiagonalization_blocked_helper(MatrixType& A, traits<MatrixType>::Flags & RowMajorBit> > Y) { typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename NumTraits<RealScalar>::Literal Literal; enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit }; typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride; typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride; @@ -263,7 +265,7 @@ void upperbidiagonalization_blocked_helper(MatrixType& A, SubMatType A10( A.block(bs,0, brows-bs,bs) ); SubMatType A01( A.block(0,bs, bs,bcols-bs) ); Scalar tmp = A01(bs-1,0); - A01(bs-1,0) = 1; + A01(bs-1,0) = Literal(1); A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint(); A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01; A01(bs-1,0) = tmp; diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index 9e39be738..5ab64f1a8 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -47,6 +47,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView enum { Mode = _Mode, + TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0), RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime, ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime }; @@ -368,7 +369,7 @@ struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, Pr // transpose everything Transpose<Dest> dstT(dst); - internal::sparse_selfadjoint_time_dense_product<RhsView::Mode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); + internal::sparse_selfadjoint_time_dense_product<RhsView::TransposeMode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); } }; diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h index dc74de935..91c09ab13 100644 --- a/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -10,19 +10,37 @@ #ifndef EIGEN_UMFPACKSUPPORT_H #define EIGEN_UMFPACKSUPPORT_H -namespace Eigen { +namespace Eigen { /* TODO extract L, extract U, compute det, etc... */ // generic double/complex<double> wrapper functions: -inline void umfpack_defaults(double control[UMFPACK_CONTROL], double) +inline void umfpack_defaults(double control[UMFPACK_CONTROL], double) { umfpack_di_defaults(control); } -inline void umfpack_defaults(double control[UMFPACK_CONTROL], std::complex<double>) +inline void umfpack_defaults(double control[UMFPACK_CONTROL], std::complex<double>) { umfpack_zi_defaults(control); } +inline void umfpack_report_info(double control[UMFPACK_CONTROL], double info[UMFPACK_INFO], double) +{ umfpack_di_report_info(control, info);} + +inline void umfpack_report_info(double control[UMFPACK_CONTROL], double info[UMFPACK_INFO], std::complex<double>) +{ umfpack_zi_report_info(control, info);} + +inline void umfpack_report_status(double control[UMFPACK_CONTROL], int status, double) +{ umfpack_di_report_status(control, status);} + +inline void umfpack_report_status(double control[UMFPACK_CONTROL], int status, std::complex<double>) +{ umfpack_zi_report_status(control, status);} + +inline void umfpack_report_control(double control[UMFPACK_CONTROL], double) +{ umfpack_di_report_control(control);} + +inline void umfpack_report_control(double control[UMFPACK_CONTROL], std::complex<double>) +{ umfpack_zi_report_control(control);} + inline void umfpack_free_numeric(void **Numeric, double) { umfpack_di_free_numeric(Numeric); *Numeric = 0; } @@ -156,6 +174,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > public: typedef Array<double, UMFPACK_CONTROL, 1> UmfpackControl; + typedef Array<double, UMFPACK_INFO, 1> UmfpackInfo; UmfPackLU() : m_dummy(0,0), mp_matrix(m_dummy) @@ -215,7 +234,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > return m_q; } - /** Computes the sparse Cholesky decomposition of \a matrix + /** Computes the sparse Cholesky decomposition of \a matrix * Note that the matrix should be column-major, and in compressed format for best performance. * \sa SparseMatrix::makeCompressed(). */ @@ -240,7 +259,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > { if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar()); if(m_numeric) umfpack_free_numeric(&m_numeric,Scalar()); - + grab(matrix.derived()); analyzePattern_impl(); @@ -267,7 +286,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > { return m_control; } - + /** Provides access to the control settings array used by UmfPack. * * If this array contains NaN's, the default values are used. @@ -278,7 +297,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > { return m_control; } - + /** Performs a numeric decomposition of \a matrix * * The given matrix must has the same sparcity than the matrix on which the pattern anylysis has been performed. @@ -293,10 +312,38 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > umfpack_free_numeric(&m_numeric,Scalar()); grab(matrix.derived()); - + factorize_impl(); } + /** Prints the current UmfPack control settings. + * + * \sa umfpackControl() + */ + void umfpackReportControl() + { + umfpack_report_control(m_control.data(), Scalar()); + } + + /** Prints statistics collected by UmfPack. + * + * \sa analyzePattern(), compute() + */ + void umfpackReportInfo() + { + eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()"); + umfpack_report_info(m_control.data(), m_umfpackInfo.data(), Scalar()); + } + + /** Prints the status of the previous factorization operation performed by UmfPack (symbolic or numerical factorization). + * + * \sa analyzePattern(), compute() + */ + void umfpackReportStatus() { + eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()"); + umfpack_report_status(m_control.data(), m_fact_errorCode, Scalar()); + } + /** \internal */ template<typename BDerived,typename XDerived> bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const; @@ -314,41 +361,42 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > m_numeric = 0; m_symbolic = 0; m_extractedDataAreDirty = true; + + umfpack_defaults(m_control.data(), Scalar()); } - + void analyzePattern_impl() { - umfpack_defaults(m_control.data(), Scalar()); - int errorCode = 0; - errorCode = umfpack_symbolic(internal::convert_index<int>(mp_matrix.rows()), - internal::convert_index<int>(mp_matrix.cols()), - mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), - &m_symbolic, m_control.data(), 0); + m_fact_errorCode = umfpack_symbolic(internal::convert_index<int>(mp_matrix.rows()), + internal::convert_index<int>(mp_matrix.cols()), + mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), + &m_symbolic, m_control.data(), m_umfpackInfo.data()); m_isInitialized = true; - m_info = errorCode ? InvalidInput : Success; + m_info = m_fact_errorCode ? InvalidInput : Success; m_analysisIsOk = true; m_factorizationIsOk = false; m_extractedDataAreDirty = true; } - + void factorize_impl() { + m_fact_errorCode = umfpack_numeric(mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), - m_symbolic, &m_numeric, m_control.data(), 0); + m_symbolic, &m_numeric, m_control.data(), m_umfpackInfo.data()); m_info = m_fact_errorCode == UMFPACK_OK ? Success : NumericalIssue; m_factorizationIsOk = true; m_extractedDataAreDirty = true; } - + template<typename MatrixDerived> void grab(const EigenBase<MatrixDerived> &A) { mp_matrix.~UmfpackMatrixRef(); ::new (&mp_matrix) UmfpackMatrixRef(A.derived()); } - + void grab(const UmfpackMatrixRef &A) { if(&(A.derived()) != &mp_matrix) @@ -357,19 +405,20 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > ::new (&mp_matrix) UmfpackMatrixRef(A); } } - + // cached data to reduce reallocation, etc. mutable LUMatrixType m_l; int m_fact_errorCode; UmfpackControl m_control; - + mutable UmfpackInfo m_umfpackInfo; + mutable LUMatrixType m_u; mutable IntColVectorType m_p; mutable IntRowVectorType m_q; UmfpackMatrixType m_dummy; UmfpackMatrixRef mp_matrix; - + void* m_numeric; void* m_symbolic; @@ -377,7 +426,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > int m_factorizationIsOk; int m_analysisIsOk; mutable bool m_extractedDataAreDirty; - + private: UmfPackLU(const UmfPackLU& ) { } }; @@ -427,7 +476,7 @@ bool UmfPackLU<MatrixType>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBas eigen_assert((BDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major rhs yet"); eigen_assert((XDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major result yet"); eigen_assert(b.derived().data() != x.derived().data() && " Umfpack does not support inplace solve"); - + int errorCode; Scalar* x_ptr = 0; Matrix<Scalar,Dynamic,1> x_tmp; @@ -442,7 +491,7 @@ bool UmfPackLU<MatrixType>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBas x_ptr = &x.col(j).coeffRef(0); errorCode = umfpack_solve(UMFPACK_A, mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), - x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, m_control.data(), 0); + x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, m_control.data(), m_umfpackInfo.data()); if(x.innerStride()!=1) x.col(j) = x_tmp; if (errorCode!=0) |