diff options
author | Miao Wang <miaowang@google.com> | 2017-07-06 22:29:41 +0000 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2017-07-06 22:29:41 +0000 |
commit | 63687fa8cbebcbae3d0a48a6c5d63677ffa6160e (patch) | |
tree | 28a6fbea7c5a1e9a0f1a209f8d33e6d72f1df851 | |
parent | 457e53bce95b3700e984cffd2ef8cfba1789ec6d (diff) | |
parent | c905f05b1b1277cc5b913652b2d4a57da12b89f7 (diff) | |
download | eigen-63687fa8cbebcbae3d0a48a6c5d63677ffa6160e.tar.gz |
Merge "Rebase Eigen to 3.3.4"
am: c905f05b1b
Change-Id: I75327153035c5b0a74bc6ea16a82271f5440fe1f
54 files changed, 4275 insertions, 586 deletions
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h index 0d34269fd..e10020d4f 100644 --- a/Eigen/src/Core/Array.h +++ b/Eigen/src/Core/Array.h @@ -231,10 +231,16 @@ class Array : Base(other) { } + private: + struct PrivateType {}; + public: + /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */ template<typename OtherDerived> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other) + EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other, + typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value, + PrivateType>::type = PrivateType()) : Base(other.derived()) { } diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index f0232f65e..3dbc7084c 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -175,7 +175,7 @@ template<typename Derived> class ArrayBase */ template<typename Derived> template<typename OtherDerived> -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other) { call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -188,7 +188,7 @@ ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other) */ template<typename Derived> template<typename OtherDerived> -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other) { call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -201,7 +201,7 @@ ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other) */ template<typename Derived> template<typename OtherDerived> -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other) { call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -214,7 +214,7 @@ ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other) */ template<typename Derived> template<typename OtherDerived> -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other) { call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>()); diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h index a04521a16..688aadd62 100644 --- a/Eigen/src/Core/ArrayWrapper.h +++ b/Eigen/src/Core/ArrayWrapper.h @@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> > // Let's remove NestByRefBit enum { Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } @@ -129,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> > // Let's remove NestByRefBit enum { Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index dd498f758..ddd607e38 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -105,7 +105,7 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp */ template<typename Derived> template<typename CustomNullaryOp> -EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) { return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func); @@ -150,7 +150,7 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func) */ template<typename Derived> template<typename CustomNullaryOp> -EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func) { return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func); @@ -192,7 +192,7 @@ DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value) * \sa class CwiseNullaryOp */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Constant(Index size, const Scalar& value) { return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value)); @@ -208,7 +208,7 @@ DenseBase<Derived>::Constant(Index size, const Scalar& value) * \sa class CwiseNullaryOp */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Constant(const Scalar& value) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) @@ -220,7 +220,7 @@ DenseBase<Derived>::Constant(const Scalar& value) * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&) */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -232,7 +232,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const * \sa LinSpaced(Scalar,Scalar) */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -264,7 +264,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -276,7 +276,7 @@ DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high) * Special version for fixed size types which does not require the size parameter. */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -286,7 +286,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high) /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ template<typename Derived> -bool DenseBase<Derived>::isApproxToConstant +EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant (const Scalar& val, const RealScalar& prec) const { typename internal::nested_eval<Derived,1>::type self(derived()); @@ -301,7 +301,7 @@ bool DenseBase<Derived>::isApproxToConstant * * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ template<typename Derived> -bool DenseBase<Derived>::isConstant +EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant (const Scalar& val, const RealScalar& prec) const { return isApproxToConstant(val, prec); @@ -312,7 +312,7 @@ bool DenseBase<Derived>::isConstant * \sa setConstant(), Constant(), class CwiseNullaryOp */ template<typename Derived> -EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val) { setConstant(val); } @@ -322,7 +322,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val) * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val) { return derived() = Constant(rows(), cols(), val); } @@ -337,7 +337,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val) * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val) { resize(size); @@ -356,7 +356,7 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val) * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val) { resize(rows, cols); @@ -380,7 +380,7 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val) * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize)); @@ -400,7 +400,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return setLinSpaced(size(), low, high); @@ -423,7 +423,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, * \sa Zero(), Zero(Index) */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Zero(Index rows, Index cols) { return Constant(rows, cols, Scalar(0)); @@ -446,7 +446,7 @@ DenseBase<Derived>::Zero(Index rows, Index cols) * \sa Zero(), Zero(Index,Index) */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Zero(Index size) { return Constant(size, Scalar(0)); @@ -463,7 +463,7 @@ DenseBase<Derived>::Zero(Index size) * \sa Zero(Index), Zero(Index,Index) */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Zero() { return Constant(Scalar(0)); @@ -478,7 +478,7 @@ DenseBase<Derived>::Zero() * \sa class CwiseNullaryOp, Zero() */ template<typename Derived> -bool DenseBase<Derived>::isZero(const RealScalar& prec) const +EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const { typename internal::nested_eval<Derived,1>::type self(derived()); for(Index j = 0; j < cols(); ++j) @@ -496,7 +496,7 @@ bool DenseBase<Derived>::isZero(const RealScalar& prec) const * \sa class CwiseNullaryOp, Zero() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero() { return setConstant(Scalar(0)); } @@ -511,7 +511,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero() * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setZero(Index newSize) { resize(newSize); @@ -529,7 +529,7 @@ PlainObjectBase<Derived>::setZero(Index newSize) * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setZero(Index rows, Index cols) { resize(rows, cols); @@ -553,7 +553,7 @@ PlainObjectBase<Derived>::setZero(Index rows, Index cols) * \sa Ones(), Ones(Index), isOnes(), class Ones */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Ones(Index rows, Index cols) { return Constant(rows, cols, Scalar(1)); @@ -576,7 +576,7 @@ DenseBase<Derived>::Ones(Index rows, Index cols) * \sa Ones(), Ones(Index,Index), isOnes(), class Ones */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Ones(Index newSize) { return Constant(newSize, Scalar(1)); @@ -593,7 +593,7 @@ DenseBase<Derived>::Ones(Index newSize) * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones */ template<typename Derived> -EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Ones() { return Constant(Scalar(1)); @@ -608,7 +608,7 @@ DenseBase<Derived>::Ones() * \sa class CwiseNullaryOp, Ones() */ template<typename Derived> -bool DenseBase<Derived>::isOnes +EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes (const RealScalar& prec) const { return isApproxToConstant(Scalar(1), prec); @@ -622,7 +622,7 @@ bool DenseBase<Derived>::isOnes * \sa class CwiseNullaryOp, Ones() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes() { return setConstant(Scalar(1)); } @@ -637,7 +637,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes() * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setOnes(Index newSize) { resize(newSize); @@ -655,7 +655,7 @@ PlainObjectBase<Derived>::setOnes(Index newSize) * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase<Derived>::setOnes(Index rows, Index cols) { resize(rows, cols); @@ -679,7 +679,7 @@ PlainObjectBase<Derived>::setOnes(Index rows, Index cols) * \sa Identity(), setIdentity(), isIdentity() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType MatrixBase<Derived>::Identity(Index rows, Index cols) { return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>()); @@ -696,7 +696,7 @@ MatrixBase<Derived>::Identity(Index rows, Index cols) * \sa Identity(Index,Index), setIdentity(), isIdentity() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType MatrixBase<Derived>::Identity() { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) @@ -771,7 +771,7 @@ struct setIdentity_impl<Derived, true> * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity() { return internal::setIdentity_impl<Derived>::run(derived()); } @@ -787,7 +787,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity() * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity() */ template<typename Derived> -EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols) { derived().resize(rows, cols); return setIdentity(); @@ -800,7 +800,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i); @@ -815,7 +815,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return BasisReturnType(SquareMatrixType::Identity(),i); @@ -828,7 +828,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX() { return Derived::Unit(0); } /** \returns an expression of the Y axis unit vector (0,1{,0}^*) @@ -838,7 +838,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY() { return Derived::Unit(1); } /** \returns an expression of the Z axis unit vector (0,0,1{,0}^*) @@ -848,7 +848,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ() { return Derived::Unit(2); } /** \returns an expression of the W axis unit vector (0,0,0,1) @@ -858,7 +858,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template<typename Derived> -EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW() { return Derived::Unit(3); } } // end namespace Eigen diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 46fe5193c..90066ae73 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -296,7 +296,7 @@ template<typename Derived> class DenseBase EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue<OtherDerived>& func); - /** \ínternal + /** \internal * Copies \a other into *this without evaluating other. \returns a reference to *this. * \deprecated */ template<typename OtherDerived> @@ -484,9 +484,9 @@ template<typename Derived> class DenseBase return derived().coeff(0,0); } - bool all() const; - bool any() const; - Index count() const; + EIGEN_DEVICE_FUNC bool all() const; + EIGEN_DEVICE_FUNC bool any() const; + EIGEN_DEVICE_FUNC Index count() const; typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType; typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType; diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index f76995af9..b195506a9 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -14,6 +14,7 @@ namespace Eigen { /** \class EigenBase + * \ingroup Core_Module * * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). * @@ -128,6 +129,7 @@ template<typename Derived> struct EigenBase */ template<typename Derived> template<typename OtherDerived> +EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other) { call_assignment(derived(), other.derived()); @@ -136,6 +138,7 @@ Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other) template<typename Derived> template<typename OtherDerived> +EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other) { call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -144,6 +147,7 @@ Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other) template<typename Derived> template<typename OtherDerived> +EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other) { call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>()); diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 27033a2dd..029f8ac36 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -230,7 +230,7 @@ pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>( * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]} * Currently, this function is only used for scalar * complex products. */ -template<typename Packet> EIGEN_DEVICE_FUNC inline Packet +template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; } /** \internal \returns a packet with elements of \a *from quadrupled. @@ -278,7 +278,7 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a, } /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ -template<typename Packet> inline Packet +template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits<Packet>::type& a) { return a; } /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ @@ -482,7 +482,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& fro * by the current computation. */ template<typename Packet, int LoadMode> -inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) { return ploadt<Packet, LoadMode>(from); } diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 8d47fb8a4..a648aa0fa 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -1061,11 +1061,24 @@ double log(const double &x) { return ::log(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -typename NumTraits<T>::Real abs(const T &x) { +typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type +abs(const T &x) { EIGEN_USING_STD_MATH(abs); return abs(x); } +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type +abs(const T &x) { + return x; +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); } +EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float abs(const float &x) { return ::fabsf(x); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index f7cf04cde..ce412180a 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -294,7 +294,7 @@ template<typename Derived> class MatrixBase * fuzzy comparison such as isApprox() * \sa isApprox(), operator!= */ template<typename OtherDerived> - inline bool operator==(const MatrixBase<OtherDerived>& other) const + EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const { return cwiseEqual(other).all(); } /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other. @@ -302,7 +302,7 @@ template<typename Derived> class MatrixBase * fuzzy comparison such as isApprox() * \sa isApprox(), operator== */ template<typename OtherDerived> - inline bool operator!=(const MatrixBase<OtherDerived>& other) const + EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const { return cwiseNotEqual(other).any(); } NoAlias<Derived,Eigen::MatrixBase > noalias(); diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index dd61195bc..daf489878 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -215,6 +215,8 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); } EIGEN_DEVICE_FUNC static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); } + + static inline int digits10() { return NumTraits<Scalar>::digits10(); } }; template<> struct NumTraits<std::string> diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 583b7f59e..c42725dbd 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -207,6 +207,12 @@ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename static const bool value = true; }; +template<typename OtherXpr, typename Lhs, typename Rhs> +struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr, + const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > { + static const bool value = true; +}; + template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2> struct assignment_from_xpr_op_product { diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h index 719ed72a5..50099df82 100644 --- a/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -15,7 +15,7 @@ namespace Eigen { // TODO generalize the scalar type of 'other' template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>()); @@ -23,7 +23,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) } template<typename Derived> -EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>()); @@ -31,7 +31,7 @@ EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other) } template<typename Derived> -EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>()); @@ -39,7 +39,7 @@ EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other) } template<typename Derived> -EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>()); diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index 960a58597..a8daea511 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -34,12 +34,12 @@ template<typename Decomposition, typename RhsType,typename StorageKind> struct s template<typename Decomposition, typename RhsType> struct solve_traits<Decomposition,RhsType,Dense> { - typedef Matrix<typename RhsType::Scalar, + typedef typename make_proper_matrix_type<typename RhsType::Scalar, Decomposition::ColsAtCompileTime, RhsType::ColsAtCompileTime, RhsType::PlainObject::Options, Decomposition::MaxColsAtCompileTime, - RhsType::MaxColsAtCompileTime> PlainObject; + RhsType::MaxColsAtCompileTime>::type PlainObject; }; template<typename Decomposition, typename RhsType> diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index d2fe1e199..be04ed44d 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -170,7 +170,8 @@ MatrixBase<Derived>::stableNorm() const enum { CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit) || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough - ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT) // ifwe cannot allocate on the stack, then let's not bother about this optimization + ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT) + && (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization }; typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>, typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper; diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 52892db38..294c517ea 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -13,7 +13,7 @@ // Redistribution and use in source and binary forms, with or without // modification, are permitted. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, @@ -474,9 +474,59 @@ template<> struct is_arithmetic<half> { enum { value = true }; }; } // end namespace internal +} // end namespace Eigen + +namespace std { +template<> +struct numeric_limits<Eigen::half> { + static const bool is_specialized = true; + static const bool is_signed = true; + static const bool is_integer = false; + static const bool is_exact = false; + static const bool has_infinity = true; + static const bool has_quiet_NaN = true; + static const bool has_signaling_NaN = true; + static const float_denorm_style has_denorm = denorm_present; + static const bool has_denorm_loss = false; + static const std::float_round_style round_style = std::round_to_nearest; + static const bool is_iec559 = false; + static const bool is_bounded = false; + static const bool is_modulo = false; + static const int digits = 11; + static const int digits10 = 2; + //static const int max_digits10 = ; + static const int radix = 2; + static const int min_exponent = -13; + static const int min_exponent10 = -4; + static const int max_exponent = 16; + static const int max_exponent10 = 4; + static const bool traps = true; + static const bool tinyness_before = false; + + static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); } + static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); } + static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); } + static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); } + static Eigen::half round_error() { return Eigen::half(0.5); } + static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); } + static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); } +}; +} + +namespace Eigen { + template<> struct NumTraits<Eigen::half> : GenericNumTraits<Eigen::half> { + enum { + IsSigned = true, + IsInteger = false, + IsComplex = false, + RequireInitialization = false + }; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { return half_impl::raw_uint16_to_half(0x0800); } diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index ad66399e0..4dda63188 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -291,7 +291,7 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) { EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<float4,4>& kernel) { - double tmp = kernel.packet[0].y; + float tmp = kernel.packet[0].y; kernel.packet[0].y = kernel.packet[1].x; kernel.packet[1].x = tmp; diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 84a56bdcc..836fbc0dd 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -51,14 +51,17 @@ typedef uint32x4_t Packet4ui; #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ const Packet4i p4i_##NAME = pset1<Packet4i>(X) -// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function -// which available on LLVM and GCC (at least) -#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC +#if EIGEN_ARCH_ARM64 + // __builtin_prefetch tends to do nothing on ARM64 compilers because the + // prefetch instructions there are too detailed for __builtin_prefetch to map + // meaningfully to them. + #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : ); +#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR); #elif defined __pld #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR) -#elif !EIGEN_ARCH_ARM64 - #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); +#elif EIGEN_ARCH_ARM32 + #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : ); #else // by default no explicit prefetching #define EIGEN_ARM_PREFETCH(ADDR) @@ -113,7 +116,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { - const float32_t f[] = {0, 1, 2, 3}; + const float f[] = {0, 1, 2, 3}; Packet4f countdown = vld1q_f32(f); return vaddq_f32(pset1<Packet4f>(a), countdown); } diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 6a30466fb..b03be0269 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -44,16 +44,16 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> { linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), - m_interPacket(plset<Packet>(0)), m_flip(numext::abs(high)<numext::abs(low)) {} template<typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { + typedef typename NumTraits<Scalar>::Real RealScalar; if(m_flip) - return (i==0)? m_low : (m_high - (m_size1-i)*m_step); + return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step); else - return (i==m_size1)? m_high : (m_low + i*m_step); + return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step); } template<typename IndexType> @@ -63,7 +63,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) if(m_flip) { - Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket); + Packet pi = plset<Packet>(Scalar(i-m_size1)); Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi)); if(i==0) res = pinsertfirst(res, m_low); @@ -71,7 +71,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> } else { - Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket); + Packet pi = plset<Packet>(Scalar(i)); Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi)); if(i==m_size1-unpacket_traits<Packet>::size+1) res = pinsertlast(res, m_high); @@ -83,7 +83,6 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> const Scalar m_high; const Index m_size1; const Scalar m_step; - const Packet m_interPacket; const bool m_flip; }; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 7122efa60..e844e37d1 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -269,10 +269,13 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0, LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0, - RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0 + RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0, + SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0 }; Index size = mat.cols(); + if(SkipDiag) + size--; Index depth = actualLhs.cols(); typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar, @@ -283,10 +286,11 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> internal::general_matrix_matrix_triangular_product<Index, typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, - IsRowMajor ? RowMajor : ColMajor, UpLo> + IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)> ::run(size, depth, - &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), - mat.data(), mat.outerStride(), actualAlpha, blocking); + &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(), + &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(), + mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking); } }; @@ -294,6 +298,7 @@ template<typename MatrixType, unsigned int UpLo> template<typename ProductType> TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) { + EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED); eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta); diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h index 5b7c15cca..41e18ff07 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h @@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \ { \ - if (lhs==rhs) { \ + if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \ general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \ } else { \ diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 427d3cd6b..38d6ddb9a 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 3 -#define EIGEN_MINOR_VERSION 3 +#define EIGEN_MINOR_VERSION 4 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index f6ef1bcf6..3e5a9badb 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -423,7 +423,7 @@ typedef Map<Quaternion<double>, Aligned> QuaternionMapAlignedd; // Generic Quaternion * Quaternion product // This product can be specialized for a given architecture via the Arch template argument. namespace internal { -template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product +template<int Arch, class Derived1, class Derived2, typename Scalar> struct quat_product { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){ return Quaternion<Scalar> @@ -446,8 +446,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) return internal::quat_product<Architecture::Target, Derived, OtherDerived, - typename internal::traits<Derived>::Scalar, - EIGEN_PLAIN_ENUM_MIN(internal::traits<Derived>::Alignment, internal::traits<OtherDerived>::Alignment)>::run(*this, other); + typename internal::traits<Derived>::Scalar>::run(*this, other); } /** \sa operator*(Quaternion) */ @@ -672,7 +671,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> // Generic conjugate of a Quaternion namespace internal { -template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj +template<int Arch, class Derived, typename Scalar> struct quat_conj { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){ return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z()); @@ -691,8 +690,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::conjugate() const { return internal::quat_conj<Architecture::Target, Derived, - typename internal::traits<Derived>::Scalar, - internal::traits<Derived>::Alignment>::run(*this); + typename internal::traits<Derived>::Scalar>::run(*this); } diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index 1a86ff837..f68cab583 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -16,17 +16,23 @@ namespace Eigen { namespace internal { template<class Derived, class OtherDerived> -struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16> +struct quat_product<Architecture::SSE, Derived, OtherDerived, float> { + enum { + AAlignment = traits<Derived>::Alignment, + BAlignment = traits<OtherDerived>::Alignment, + ResAlignment = traits<Quaternion<float> >::Alignment + }; static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) { Quaternion<float> res; const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f); - __m128 a = _a.coeffs().template packet<Aligned16>(0); - __m128 b = _b.coeffs().template packet<Aligned16>(0); + __m128 a = _a.coeffs().template packet<AAlignment>(0); + __m128 b = _b.coeffs().template packet<BAlignment>(0); __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); - pstore(&res.x(), + pstoret<float,Packet4f,ResAlignment>( + &res.x(), _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)), _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0), vec4f_swizzle1(b,1,2,0,0))), @@ -36,14 +42,17 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16> } }; -template<class Derived, int Alignment> -struct quat_conj<Architecture::SSE, Derived, float, Alignment> +template<class Derived> +struct quat_conj<Architecture::SSE, Derived, float> { + enum { + ResAlignment = traits<Quaternion<float> >::Alignment + }; static inline Quaternion<float> run(const QuaternionBase<Derived>& q) { Quaternion<float> res; const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f); - pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<Alignment>(0))); + pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0))); return res; } }; @@ -52,6 +61,9 @@ struct quat_conj<Architecture::SSE, Derived, float, Alignment> template<typename VectorLhs,typename VectorRhs> struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> { + enum { + ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment + }; static inline typename plain_matrix_type<VectorLhs>::type run(const VectorLhs& lhs, const VectorRhs& rhs) { @@ -60,7 +72,7 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); typename plain_matrix_type<VectorLhs>::type res; - pstore(&res.x(),_mm_sub_ps(mul1,mul2)); + pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2)); return res; } }; @@ -68,9 +80,14 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> -template<class Derived, class OtherDerived, int Alignment> -struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> +template<class Derived, class OtherDerived> +struct quat_product<Architecture::SSE, Derived, OtherDerived, double> { + enum { + BAlignment = traits<OtherDerived>::Alignment, + ResAlignment = traits<Quaternion<double> >::Alignment + }; + static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) { const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); @@ -78,8 +95,8 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> Quaternion<double> res; const double* a = _a.coeffs().data(); - Packet2d b_xy = _b.coeffs().template packet<Alignment>(0); - Packet2d b_zw = _b.coeffs().template packet<Alignment>(2); + Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0); + Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2); Packet2d a_xx = pset1<Packet2d>(a[0]); Packet2d a_yy = pset1<Packet2d>(a[1]); Packet2d a_zz = pset1<Packet2d>(a[2]); @@ -97,9 +114,9 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw)); #ifdef EIGEN_VECTORIZE_SSE3 EIGEN_UNUSED_VARIABLE(mask) - pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2))); + pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2))); #else - pstore(&res.x(), padd(t1, pxor(mask,preverse(t2)))); + pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2)))); #endif /* @@ -111,25 +128,28 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy)); #ifdef EIGEN_VECTORIZE_SSE3 EIGEN_UNUSED_VARIABLE(mask) - pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); + pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); #else - pstore(&res.z(), psub(t1, pxor(mask,preverse(t2)))); + pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2)))); #endif return res; } }; -template<class Derived, int Alignment> -struct quat_conj<Architecture::SSE, Derived, double, Alignment> +template<class Derived> +struct quat_conj<Architecture::SSE, Derived, double> { + enum { + ResAlignment = traits<Quaternion<double> >::Alignment + }; static inline Quaternion<double> run(const QuaternionBase<Derived>& q) { Quaternion<double> res; const __m128d mask0 = _mm_setr_pd(-0.,-0.); const __m128d mask2 = _mm_setr_pd(-0.,0.); - pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<Alignment>(0))); - pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<Alignment>(2))); + pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0))); + pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2))); return res; } }; diff --git a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index 358444aff..facdaf890 100644 --- a/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -152,13 +152,28 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar> { // Compute the inverse squared-norm of each column of mat m_invdiag.resize(mat.cols()); - for(Index j=0; j<mat.outerSize(); ++j) + if(MatType::IsRowMajor) { - RealScalar sum = mat.innerVector(j).squaredNorm(); - if(sum>0) - m_invdiag(j) = RealScalar(1)/sum; - else - m_invdiag(j) = RealScalar(1); + m_invdiag.setZero(); + for(Index j=0; j<mat.outerSize(); ++j) + { + for(typename MatType::InnerIterator it(mat,j); it; ++it) + m_invdiag(it.index()) += numext::abs2(it.value()); + } + for(Index j=0; j<mat.cols(); ++j) + if(numext::real(m_invdiag(j))>RealScalar(0)) + m_invdiag(j) = RealScalar(1)/numext::real(m_invdiag(j)); + } + else + { + for(Index j=0; j<mat.outerSize(); ++j) + { + RealScalar sum = mat.innerVector(j).squaredNorm(); + if(sum>RealScalar(0)) + m_invdiag(j) = RealScalar(1)/sum; + else + m_invdiag(j) = RealScalar(1); + } } Base::m_isInitialized = true; return *this; diff --git a/Eigen/src/Jacobi/Jacobi.h b/Eigen/src/Jacobi/Jacobi.h index d25af8e90..c30326e1d 100644 --- a/Eigen/src/Jacobi/Jacobi.h +++ b/Eigen/src/Jacobi/Jacobi.h @@ -302,8 +302,12 @@ template<typename VectorX, typename VectorY, typename OtherScalar> void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j) { typedef typename VectorX::Scalar Scalar; - enum { PacketSize = packet_traits<Scalar>::size }; + enum { + PacketSize = packet_traits<Scalar>::size, + OtherPacketSize = packet_traits<OtherScalar>::size + }; typedef typename packet_traits<Scalar>::type Packet; + typedef typename packet_traits<OtherScalar>::type OtherPacket; eigen_assert(xpr_x.size() == xpr_y.size()); Index size = xpr_x.size(); Index incrx = xpr_x.derived().innerStride(); @@ -321,6 +325,7 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x if(VectorX::SizeAtCompileTime == Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && + (PacketSize == OtherPacketSize) && ((incrx==1 && incry==1) || PacketSize == 1)) { // both vectors are sequentially stored in memory => vectorization @@ -329,9 +334,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x Index alignedStart = internal::first_default_aligned(y, size); Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; - const Packet pc = pset1<Packet>(c); - const Packet ps = pset1<Packet>(s); - conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; + const OtherPacket pc = pset1<OtherPacket>(c); + const OtherPacket ps = pset1<OtherPacket>(s); + conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj; + conj_helper<OtherPacket,Packet,false,false> pm; for(Index i=0; i<alignedStart; ++i) { @@ -350,8 +356,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x { Packet xi = pload<Packet>(px); Packet yi = pload<Packet>(py); - pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); px += PacketSize; py += PacketSize; } @@ -365,10 +371,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x Packet xi1 = ploadu<Packet>(px+PacketSize); Packet yi = pload <Packet>(py); Packet yi1 = pload <Packet>(py+PacketSize); - pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1))); - pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); - pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1))); + pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1))); + pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1))); px += Peeling*PacketSize; py += Peeling*PacketSize; } @@ -376,8 +382,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x { Packet xi = ploadu<Packet>(x+peelingEnd); Packet yi = pload <Packet>(y+peelingEnd); - pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); } } @@ -393,19 +399,21 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x /*** fixed-size vectorized path ***/ else if(VectorX::SizeAtCompileTime != Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && + (PacketSize == OtherPacketSize) && (EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment { - const Packet pc = pset1<Packet>(c); - const Packet ps = pset1<Packet>(s); - conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; + const OtherPacket pc = pset1<OtherPacket>(c); + const OtherPacket ps = pset1<OtherPacket>(s); + conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj; + conj_helper<OtherPacket,Packet,false,false> pm; Scalar* EIGEN_RESTRICT px = x; Scalar* EIGEN_RESTRICT py = y; for(Index i=0; i<size; i+=PacketSize) { Packet xi = pload<Packet>(px); Packet yi = pload<Packet>(py); - pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); px += PacketSize; py += PacketSize; } diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h index 933cd564b..da85b4d6e 100644 --- a/Eigen/src/OrderingMethods/Eigen_Colamd.h +++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -1004,7 +1004,7 @@ static IndexType find_ordering /* return the number of garbage collections */ COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ; /* get pivot column from head of minimum degree list */ - while (head [min_score] == COLAMD_EMPTY && min_score < n_col) + while (min_score < n_col && head [min_score] == COLAMD_EMPTY) { min_score++ ; } diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index 0e47c8332..a7b47d55d 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -506,8 +506,8 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace() m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k); } - RealScalar threshold_helper = numext::abs2<Scalar>(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows); - RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon()); + RealScalar threshold_helper = numext::abs2<RealScalar>(m_colNormsUpdated.maxCoeff() * NumTraits<RealScalar>::epsilon()) / RealScalar(rows); + RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<RealScalar>::epsilon()); m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) m_maxpivot = RealScalar(0); @@ -553,12 +553,12 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace() // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf // and used in LAPACK routines xGEQPF and xGEQP3. // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html - if (m_colNormsUpdated.coeffRef(j) != 0) { + if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) { RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j); temp = (RealScalar(1) + temp) * (RealScalar(1) - temp); - temp = temp < 0 ? 0 : temp; - RealScalar temp2 = temp * numext::abs2<Scalar>(m_colNormsUpdated.coeffRef(j) / - m_colNormsDirect.coeffRef(j)); + temp = temp < RealScalar(0) ? RealScalar(0) : temp; + RealScalar temp2 = temp * numext::abs2<RealScalar>(m_colNormsUpdated.coeffRef(j) / + m_colNormsDirect.coeffRef(j)); if (temp2 <= norm_downdate_threshold) { // The updated norm has become too inaccurate so re-compute the column // norm directly. diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index 25fca6f4d..d7a4271cb 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h @@ -77,6 +77,7 @@ public: typedef _MatrixType MatrixType; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; + typedef typename NumTraits<RealScalar>::Literal Literal; enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, @@ -259,7 +260,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows RealScalar scale = matrix.cwiseAbs().maxCoeff(); - if(scale==RealScalar(0)) scale = RealScalar(1); + if(scale==Literal(0)) scale = Literal(1); MatrixX copy; if (m_isTranspose) copy = matrix.adjoint()/scale; else copy = matrix/scale; @@ -351,13 +352,13 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co Index k1=0, k2=0; for(Index j=0; j<n; ++j) { - if( (A.col(j).head(n1).array()!=0).any() ) + if( (A.col(j).head(n1).array()!=Literal(0)).any() ) { A1.col(k1) = A.col(j).head(n1); B1.row(k1) = B.row(j); ++k1; } - if( (A.col(j).tail(n2).array()!=0).any() ) + if( (A.col(j).tail(n2).array()!=Literal(0)).any() ) { A2.col(k2) = A.col(j).tail(n2); B2.row(k2) = B.row(j); @@ -449,11 +450,11 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, l = m_naiveU.row(1).segment(firstCol, k); f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1); } - if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1; + if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1); if (r0<considerZero) { - c0 = 1; - s0 = 0; + c0 = Literal(1); + s0 = Literal(0); } else { @@ -574,7 +575,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n); m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal(); ArrayRef diag = m_workspace.head(n); - diag(0) = 0; + diag(0) = Literal(0); // Allocate space for singular values and vectors singVals.resize(n); @@ -590,7 +591,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec // but others are interleaved and we must ignore them at this stage. // To this end, let's compute a permutation skipping them: Index actual_n = n; - while(actual_n>1 && diag(actual_n-1)==0) --actual_n; + while(actual_n>1 && diag(actual_n-1)==Literal(0)) --actual_n; Index m = 0; // size of the deflated problem for(Index k=0;k<actual_n;++k) if(abs(col0(k))>considerZero) @@ -691,7 +692,7 @@ template <typename MatrixType> typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift) { Index m = perm.size(); - RealScalar res = 1; + RealScalar res = Literal(1); for(Index i=0; i<m; ++i) { Index j = perm(i); @@ -710,16 +711,16 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d Index n = col0.size(); Index actual_n = n; - while(actual_n>1 && col0(actual_n-1)==0) --actual_n; + while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n; for (Index k = 0; k < n; ++k) { - if (col0(k) == 0 || actual_n==1) + if (col0(k) == Literal(0) || actual_n==1) { // if col0(k) == 0, then entry is deflated, so singular value is on diagonal // if actual_n==1, then the deflated problem is already diagonalized singVals(k) = k==0 ? col0(0) : diag(k); - mus(k) = 0; + mus(k) = Literal(0); shifts(k) = k==0 ? col0(0) : diag(k); continue; } @@ -733,13 +734,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d { // Skip deflated singular values Index l = k+1; - while(col0(l)==0) { ++l; eigen_internal_assert(l<actual_n); } + while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); } right = diag(l); } // first decide whether it's closer to the left end or the right end - RealScalar mid = left + (right-left) / 2; - RealScalar fMid = secularEq(mid, col0, diag, perm, diag, 0); + RealScalar mid = left + (right-left) / Literal(2); + RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(0)); #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << right-left << "\n"; std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right) << "\n"; @@ -755,7 +756,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d << " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0) << " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n"; #endif - RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right; + RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right; // measure everything relative to shift Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n); @@ -785,13 +786,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d // rational interpolation: fit a function of the form a / mu + b through the two previous // iterates and use its zero to compute the next iterate - bool useBisection = fPrev*fCur>0; - while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection) + bool useBisection = fPrev*fCur>Literal(0); + while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection) { ++m_numIters; // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples. - RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev); + RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev); RealScalar b = fCur - a / muCur; // And find mu such that f(mu)==0: RealScalar muZero = -a/b; @@ -803,8 +804,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d fCur = fZero; - if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true; - if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true; + if (shift == left && (muCur < Literal(0) || muCur > right - left)) useBisection = true; + if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true; if (abs(fCur)>abs(fPrev)) useBisection = true; } @@ -841,13 +842,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n"; } #endif - eigen_internal_assert(fLeft * fRight < 0); + eigen_internal_assert(fLeft * fRight < Literal(0)); - while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted))) + while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted))) { - RealScalar midShifted = (leftShifted + rightShifted) / 2; + RealScalar midShifted = (leftShifted + rightShifted) / Literal(2); fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift); - if (fLeft * fMid < 0) + if (fLeft * fMid < Literal(0)) { rightShifted = midShifted; } @@ -858,7 +859,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d } } - muCur = (leftShifted + rightShifted) / 2; + muCur = (leftShifted + rightShifted) / Literal(2); } singVals[k] = shift + muCur; @@ -892,8 +893,8 @@ void BDCSVD<MatrixType>::perturbCol0 // The offset permits to skip deflated entries while computing zhat for (Index k = 0; k < n; ++k) { - if (col0(k) == 0) // deflated - zhat(k) = 0; + if (col0(k) == Literal(0)) // deflated + zhat(k) = Literal(0); else { // see equation (3.6) @@ -918,7 +919,7 @@ void BDCSVD<MatrixType>::perturbCol0 std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n"; #endif RealScalar tmp = sqrt(prod); - zhat(k) = col0(k) > 0 ? tmp : -tmp; + zhat(k) = col0(k) > Literal(0) ? tmp : -tmp; } } } @@ -934,7 +935,7 @@ void BDCSVD<MatrixType>::computeSingVecs for (Index k = 0; k < n; ++k) { - if (zhat(k) == 0) + if (zhat(k) == Literal(0)) { U.col(k) = VectorType::Unit(n+1, k); if (m_compV) V.col(k) = VectorType::Unit(n, k); @@ -947,7 +948,7 @@ void BDCSVD<MatrixType>::computeSingVecs Index i = perm(l); U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); } - U(n,k) = 0; + U(n,k) = Literal(0); U.col(k).normalize(); if (m_compV) @@ -958,7 +959,7 @@ void BDCSVD<MatrixType>::computeSingVecs Index i = perm(l); V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); } - V(0,k) = -1; + V(0,k) = Literal(-1); V.col(k).normalize(); } } @@ -980,14 +981,14 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index RealScalar c = m_computed(start, start); RealScalar s = m_computed(start+i, start); RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); - if (r == 0) + if (r == Literal(0)) { - m_computed(start+i, start+i) = 0; + m_computed(start+i, start+i) = Literal(0); return; } m_computed(start,start) = r; - m_computed(start+i, start) = 0; - m_computed(start+i, start+i) = 0; + m_computed(start+i, start) = Literal(0); + m_computed(start+i, start+i) = Literal(0); JacobiRotation<RealScalar> J(c/r,-s/r); if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J); @@ -1020,7 +1021,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi << m_computed(firstColm + i+1, firstColm+i+1) << " " << m_computed(firstColm + i+2, firstColm+i+2) << "\n"; #endif - if (r==0) + if (r==Literal(0)) { m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j); return; @@ -1029,7 +1030,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi s/=r; m_computed(firstColm + i, firstColm) = r; m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i); - m_computed(firstColm + j, firstColm) = 0; + m_computed(firstColm + j, firstColm) = Literal(0); JacobiRotation<RealScalar> J(c,-s); if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J); @@ -1053,7 +1054,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)(); RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff(); RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag); - RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag); + RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag); #ifdef EIGEN_BDCSVD_SANITY_CHECKS assert(m_naiveU.allFinite()); @@ -1081,7 +1082,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n"; #endif - col0(i) = 0; + col0(i) = Literal(0); } //condition 4.3 diff --git a/Eigen/src/SVD/UpperBidiagonalization.h b/Eigen/src/SVD/UpperBidiagonalization.h index 0b1460894..11ac847e1 100644 --- a/Eigen/src/SVD/UpperBidiagonalization.h +++ b/Eigen/src/SVD/UpperBidiagonalization.h @@ -159,6 +159,8 @@ void upperbidiagonalization_blocked_helper(MatrixType& A, traits<MatrixType>::Flags & RowMajorBit> > Y) { typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename NumTraits<RealScalar>::Literal Literal; enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit }; typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride; typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride; @@ -263,7 +265,7 @@ void upperbidiagonalization_blocked_helper(MatrixType& A, SubMatType A10( A.block(bs,0, brows-bs,bs) ); SubMatType A01( A.block(0,bs, bs,bcols-bs) ); Scalar tmp = A01(bs-1,0); - A01(bs-1,0) = 1; + A01(bs-1,0) = Literal(1); A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint(); A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01; A01(bs-1,0) = tmp; diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index 9e39be738..5ab64f1a8 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -47,6 +47,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView enum { Mode = _Mode, + TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0), RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime, ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime }; @@ -368,7 +369,7 @@ struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, Pr // transpose everything Transpose<Dest> dstT(dst); - internal::sparse_selfadjoint_time_dense_product<RhsView::Mode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); + internal::sparse_selfadjoint_time_dense_product<RhsView::TransposeMode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); } }; diff --git a/Eigen/src/UmfPackSupport/UmfPackSupport.h b/Eigen/src/UmfPackSupport/UmfPackSupport.h index dc74de935..91c09ab13 100644 --- a/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -10,19 +10,37 @@ #ifndef EIGEN_UMFPACKSUPPORT_H #define EIGEN_UMFPACKSUPPORT_H -namespace Eigen { +namespace Eigen { /* TODO extract L, extract U, compute det, etc... */ // generic double/complex<double> wrapper functions: -inline void umfpack_defaults(double control[UMFPACK_CONTROL], double) +inline void umfpack_defaults(double control[UMFPACK_CONTROL], double) { umfpack_di_defaults(control); } -inline void umfpack_defaults(double control[UMFPACK_CONTROL], std::complex<double>) +inline void umfpack_defaults(double control[UMFPACK_CONTROL], std::complex<double>) { umfpack_zi_defaults(control); } +inline void umfpack_report_info(double control[UMFPACK_CONTROL], double info[UMFPACK_INFO], double) +{ umfpack_di_report_info(control, info);} + +inline void umfpack_report_info(double control[UMFPACK_CONTROL], double info[UMFPACK_INFO], std::complex<double>) +{ umfpack_zi_report_info(control, info);} + +inline void umfpack_report_status(double control[UMFPACK_CONTROL], int status, double) +{ umfpack_di_report_status(control, status);} + +inline void umfpack_report_status(double control[UMFPACK_CONTROL], int status, std::complex<double>) +{ umfpack_zi_report_status(control, status);} + +inline void umfpack_report_control(double control[UMFPACK_CONTROL], double) +{ umfpack_di_report_control(control);} + +inline void umfpack_report_control(double control[UMFPACK_CONTROL], std::complex<double>) +{ umfpack_zi_report_control(control);} + inline void umfpack_free_numeric(void **Numeric, double) { umfpack_di_free_numeric(Numeric); *Numeric = 0; } @@ -156,6 +174,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > public: typedef Array<double, UMFPACK_CONTROL, 1> UmfpackControl; + typedef Array<double, UMFPACK_INFO, 1> UmfpackInfo; UmfPackLU() : m_dummy(0,0), mp_matrix(m_dummy) @@ -215,7 +234,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > return m_q; } - /** Computes the sparse Cholesky decomposition of \a matrix + /** Computes the sparse Cholesky decomposition of \a matrix * Note that the matrix should be column-major, and in compressed format for best performance. * \sa SparseMatrix::makeCompressed(). */ @@ -240,7 +259,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > { if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar()); if(m_numeric) umfpack_free_numeric(&m_numeric,Scalar()); - + grab(matrix.derived()); analyzePattern_impl(); @@ -267,7 +286,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > { return m_control; } - + /** Provides access to the control settings array used by UmfPack. * * If this array contains NaN's, the default values are used. @@ -278,7 +297,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > { return m_control; } - + /** Performs a numeric decomposition of \a matrix * * The given matrix must has the same sparcity than the matrix on which the pattern anylysis has been performed. @@ -293,10 +312,38 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > umfpack_free_numeric(&m_numeric,Scalar()); grab(matrix.derived()); - + factorize_impl(); } + /** Prints the current UmfPack control settings. + * + * \sa umfpackControl() + */ + void umfpackReportControl() + { + umfpack_report_control(m_control.data(), Scalar()); + } + + /** Prints statistics collected by UmfPack. + * + * \sa analyzePattern(), compute() + */ + void umfpackReportInfo() + { + eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()"); + umfpack_report_info(m_control.data(), m_umfpackInfo.data(), Scalar()); + } + + /** Prints the status of the previous factorization operation performed by UmfPack (symbolic or numerical factorization). + * + * \sa analyzePattern(), compute() + */ + void umfpackReportStatus() { + eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()"); + umfpack_report_status(m_control.data(), m_fact_errorCode, Scalar()); + } + /** \internal */ template<typename BDerived,typename XDerived> bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived> &x) const; @@ -314,41 +361,42 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > m_numeric = 0; m_symbolic = 0; m_extractedDataAreDirty = true; + + umfpack_defaults(m_control.data(), Scalar()); } - + void analyzePattern_impl() { - umfpack_defaults(m_control.data(), Scalar()); - int errorCode = 0; - errorCode = umfpack_symbolic(internal::convert_index<int>(mp_matrix.rows()), - internal::convert_index<int>(mp_matrix.cols()), - mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), - &m_symbolic, m_control.data(), 0); + m_fact_errorCode = umfpack_symbolic(internal::convert_index<int>(mp_matrix.rows()), + internal::convert_index<int>(mp_matrix.cols()), + mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), + &m_symbolic, m_control.data(), m_umfpackInfo.data()); m_isInitialized = true; - m_info = errorCode ? InvalidInput : Success; + m_info = m_fact_errorCode ? InvalidInput : Success; m_analysisIsOk = true; m_factorizationIsOk = false; m_extractedDataAreDirty = true; } - + void factorize_impl() { + m_fact_errorCode = umfpack_numeric(mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), - m_symbolic, &m_numeric, m_control.data(), 0); + m_symbolic, &m_numeric, m_control.data(), m_umfpackInfo.data()); m_info = m_fact_errorCode == UMFPACK_OK ? Success : NumericalIssue; m_factorizationIsOk = true; m_extractedDataAreDirty = true; } - + template<typename MatrixDerived> void grab(const EigenBase<MatrixDerived> &A) { mp_matrix.~UmfpackMatrixRef(); ::new (&mp_matrix) UmfpackMatrixRef(A.derived()); } - + void grab(const UmfpackMatrixRef &A) { if(&(A.derived()) != &mp_matrix) @@ -357,19 +405,20 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > ::new (&mp_matrix) UmfpackMatrixRef(A); } } - + // cached data to reduce reallocation, etc. mutable LUMatrixType m_l; int m_fact_errorCode; UmfpackControl m_control; - + mutable UmfpackInfo m_umfpackInfo; + mutable LUMatrixType m_u; mutable IntColVectorType m_p; mutable IntRowVectorType m_q; UmfpackMatrixType m_dummy; UmfpackMatrixRef mp_matrix; - + void* m_numeric; void* m_symbolic; @@ -377,7 +426,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > int m_factorizationIsOk; int m_analysisIsOk; mutable bool m_extractedDataAreDirty; - + private: UmfPackLU(const UmfPackLU& ) { } }; @@ -427,7 +476,7 @@ bool UmfPackLU<MatrixType>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBas eigen_assert((BDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major rhs yet"); eigen_assert((XDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major result yet"); eigen_assert(b.derived().data() != x.derived().data() && " Umfpack does not support inplace solve"); - + int errorCode; Scalar* x_ptr = 0; Matrix<Scalar,Dynamic,1> x_tmp; @@ -442,7 +491,7 @@ bool UmfPackLU<MatrixType>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBas x_ptr = &x.col(j).coeffRef(0); errorCode = umfpack_solve(UMFPACK_A, mp_matrix.outerIndexPtr(), mp_matrix.innerIndexPtr(), mp_matrix.valuePtr(), - x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, m_control.data(), 0); + x_ptr, &b.const_cast_derived().col(j).coeffRef(0), m_numeric, m_control.data(), m_umfpackInfo.data()); if(x.innerStride()!=1) x.col(j) = x_tmp; if (errorCode!=0) diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index 8d53f4ae2..932735698 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -38,25 +38,32 @@ if(SUPERLU_FOUND AND BLAS_FOUND) endif() -find_package(Pastix) -find_package(Scotch) -find_package(Metis) -if(PASTIX_FOUND AND BLAS_FOUND) +find_package(PASTIX QUIET COMPONENTS METIS SCOTCH) +# check that the PASTIX found is a version without MPI +find_path(PASTIX_pastix_nompi.h_INCLUDE_DIRS + NAMES pastix_nompi.h + HINTS ${PASTIX_INCLUDE_DIRS} +) +if (NOT PASTIX_pastix_nompi.h_INCLUDE_DIRS) + message(STATUS "A version of Pastix has been found but pastix_nompi.h does not exist in the include directory." + " Because Eigen tests require a version without MPI, we disable the Pastix backend.") +endif() +if(PASTIX_FOUND AND PASTIX_pastix_nompi.h_INCLUDE_DIRS AND BLAS_FOUND) add_definitions("-DEIGEN_PASTIX_SUPPORT") - include_directories(${PASTIX_INCLUDES}) + include_directories(${PASTIX_INCLUDE_DIRS_DEP}) if(SCOTCH_FOUND) - include_directories(${SCOTCH_INCLUDES}) + include_directories(${SCOTCH_INCLUDE_DIRS}) set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${SCOTCH_LIBRARIES}) elseif(METIS_FOUND) - include_directories(${METIS_INCLUDES}) + include_directories(${METIS_INCLUDE_DIRS}) set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${METIS_LIBRARIES}) endif(SCOTCH_FOUND) - set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES} ${ORDERING_LIBRARIES} ${BLAS_LIBRARIES}) - set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES} ${BLAS_LIBRARIES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES_DEP} ${ORDERING_LIBRARIES}) + set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES_DEP}) endif(PASTIX_FOUND AND BLAS_FOUND) if(METIS_FOUND) - include_directories(${METIS_INCLUDES}) + include_directories(${METIS_INCLUDE_DIRS}) set (SPARSE_LIBS ${SPARSE_LIBS} ${METIS_LIBRARIES}) add_definitions("-DEIGEN_METIS_SUPPORT") endif(METIS_FOUND) diff --git a/cmake/FindBLAS.cmake b/cmake/FindBLAS.cmake index 68c4e0724..9f74b07fe 100644 --- a/cmake/FindBLAS.cmake +++ b/cmake/FindBLAS.cmake @@ -1,385 +1,1363 @@ -# Find BLAS library +### # -# This module finds an installed library that implements the BLAS +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find BLAS library +# This module finds an installed fortran library that implements the BLAS # linear-algebra interface (see http://www.netlib.org/blas/). -# The list of libraries searched for is mainly taken +# The list of libraries searched for is taken # from the autoconf macro file, acx_blas.m4 (distributed at # http://ac-archive.sourceforge.net/ac-archive/acx_blas.html). # # This module sets the following variables: # BLAS_FOUND - set to true if a library implementing the BLAS interface # is found -# BLAS_INCLUDE_DIR - Directories containing the BLAS header files -# BLAS_DEFINITIONS - Compilation options to use BLAS -# BLAS_LINKER_FLAGS - Linker flags to use BLAS (excluding -l +# BLAS_LINKER_FLAGS - uncached list of required linker flags (excluding -l # and -L). -# BLAS_LIBRARIES_DIR - Directories containing the BLAS libraries. -# May be null if BLAS_LIBRARIES contains libraries name using full path. -# BLAS_LIBRARIES - List of libraries to link against BLAS interface. -# May be null if the compiler supports auto-link (e.g. VC++). -# BLAS_USE_FILE - The name of the cmake module to include to compile -# applications or libraries using BLAS. +# BLAS_COMPILER_FLAGS - uncached list of required compiler flags (including -I for mkl headers). +# BLAS_LIBRARIES - uncached list of libraries (using full path name) to +# link against to use BLAS +# BLAS95_LIBRARIES - uncached list of libraries (using full path name) +# to link against to use BLAS95 interface +# BLAS95_FOUND - set to true if a library implementing the BLAS f95 interface +# is found +# BLA_STATIC if set on this determines what kind of linkage we do (static) +# BLA_VENDOR if set checks only the specified vendor, if not set checks +# all the possibilities +# BLAS_VENDOR_FOUND stores the BLAS vendor found +# BLA_F95 if set on tries to find the f95 interfaces for BLAS/LAPACK +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DBLAS_DIR=path/to/blas): +# BLAS_DIR - Where to find the base directory of blas +# BLAS_INCDIR - Where to find the header files +# BLAS_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: BLAS_DIR, BLAS_INCDIR, BLAS_LIBDIR +# For MKL case and if no paths are given as hints, we will try to use the MKLROOT +# environment variable +# BLAS_VERBOSE Print some additional information during BLAS libraries detection +########## +### List of vendors (BLA_VENDOR) valid in this module +########## List of vendors (BLA_VENDOR) valid in this module +## Open (for OpenBlas), Eigen (for EigenBlas), Goto, ATLAS PhiPACK, +## CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT +## Intel10_32 (intel mkl v10 32 bit), Intel10_64lp (intel mkl v10 64 bit,lp thread model, lp64 model), +## Intel10_64lp_seq (intel mkl v10 64 bit,sequential code, lp64 model), +## Intel( older versions of mkl 32 and 64 bit), +## ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic +# C/CXX should be enabled to use Intel mkl +### +# We handle different modes to find the dependency +# +# - Detection if already installed on the system +# - BLAS libraries can be detected from different ways +# Here is the order of precedence: +# 1) we look in cmake variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined +# 2) we look in environment variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined +# 3) we look in common environnment variables depending on the system (INCLUDE, C_INCLUDE_PATH, CPATH - LIB, DYLD_LIBRARY_PATH, LD_LIBRARY_PATH) +# 4) we look in common system paths depending on the system, see for example paths contained in the following cmake variables: +# - CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES +# - CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_C_IMPLICIT_LINK_DIRECTORIES +# + +#============================================================================= +# Copyright 2007-2009 Kitware, Inc. # -# This module was modified by CGAL team: -# - find libraries for a C++ compiler, instead of Fortran -# - added BLAS_INCLUDE_DIR, BLAS_DEFINITIONS and BLAS_LIBRARIES_DIR -# - removed BLAS95_LIBRARIES +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + +## Some macros to print status when search for headers and libs +# This macro informs why the _lib_to_find file has not been found +macro(Print_Find_Library_Blas_Status _libname _lib_to_find) + + # save _libname upper/lower case + string(TOUPPER ${_libname} LIBNAME) + string(TOLOWER ${_libname} libname) + + # print status + #message(" ") + if(${LIBNAME}_LIBDIR) + message("${Yellow}${LIBNAME}_LIBDIR is defined but ${_lib_to_find}" + "has not been found in ${ARGN}${ColourReset}") + else() + if(${LIBNAME}_DIR) + message("${Yellow}${LIBNAME}_DIR is defined but ${_lib_to_find}" + "has not been found in ${ARGN}${ColourReset}") + else() + message("${Yellow}${_lib_to_find} not found." + "Nor ${LIBNAME}_DIR neither ${LIBNAME}_LIBDIR" + "are defined so that we look for ${_lib_to_find} in" + "system paths (Linux: LD_LIBRARY_PATH, Windows: LIB," + "Mac: DYLD_LIBRARY_PATH," + "CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES," + "CMAKE_C_IMPLICIT_LINK_DIRECTORIES)${ColourReset}") + if(_lib_env) + message("${Yellow}${_lib_to_find} has not been found in" + "${_lib_env}${ColourReset}") + endif() + endif() + endif() + message("${BoldYellow}Please indicate where to find ${_lib_to_find}. You have three options:\n" + "- Option 1: Provide the Installation directory of BLAS library with cmake option: -D${LIBNAME}_DIR=your/path/to/${libname}/\n" + "- Option 2: Provide the directory where to find the library with cmake option: -D${LIBNAME}_LIBDIR=your/path/to/${libname}/lib/\n" + "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" + "- Option 4: If your library provides a PkgConfig file, make sure pkg-config finds your library${ColourReset}") + +endmacro() + +# This macro informs why the _lib_to_find file has not been found +macro(Print_Find_Library_Blas_CheckFunc_Status _name) + + # save _libname upper/lower case + string(TOUPPER ${_name} FUNCNAME) + string(TOLOWER ${_name} funcname) + + # print status + #message(" ") + message("${Red}Libs have been found but check of symbol ${_name} failed " + "with following libraries ${ARGN}${ColourReset}") + message("${BoldRed}Please open your error file CMakeFiles/CMakeError.log" + "to figure out why it fails${ColourReset}") + #message(" ") + +endmacro() + +if (NOT BLAS_FOUND) + set(BLAS_DIR "" CACHE PATH "Installation directory of BLAS library") + if (NOT BLAS_FIND_QUIETLY) + message(STATUS "A cache variable, namely BLAS_DIR, has been set to specify the install directory of BLAS") + endif() +endif() + +option(BLAS_VERBOSE "Print some additional information during BLAS libraries detection" OFF) +mark_as_advanced(BLAS_VERBOSE) include(CheckFunctionExists) +include(CheckFortranFunctionExists) +set(_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) -# This macro checks for the existence of the combination of fortran libraries -# given by _list. If the combination is found, this macro checks (using the -# check_function_exists macro) whether can link against that library -# combination using the name of a routine given by _name using the linker -# flags given by _flags. If the combination of libraries is found and passes -# the link test, LIBRARIES is set to the list of complete library paths that -# have been found and DEFINITIONS to the required definitions. -# Otherwise, LIBRARIES is set to FALSE. -# N.B. _prefix is the prefix applied to the names of all cached variables that -# are generated internally and marked advanced by this macro. -macro(check_fortran_libraries DEFINITIONS LIBRARIES _prefix _name _flags _list _path) - #message("DEBUG: check_fortran_libraries(${_list} in ${_path})") - - # Check for the existence of the libraries given by _list - set(_libraries_found TRUE) - set(_libraries_work FALSE) - set(${DEFINITIONS} "") - set(${LIBRARIES} "") +# Check the language being used +get_property( _LANGUAGES_ GLOBAL PROPERTY ENABLED_LANGUAGES ) +if( _LANGUAGES_ MATCHES Fortran ) + set( _CHECK_FORTRAN TRUE ) +elseif( (_LANGUAGES_ MATCHES C) OR (_LANGUAGES_ MATCHES CXX) ) + set( _CHECK_FORTRAN FALSE ) +else() + if(BLAS_FIND_REQUIRED) + message(FATAL_ERROR "FindBLAS requires Fortran, C, or C++ to be enabled.") + else() + message(STATUS "Looking for BLAS... - NOT found (Unsupported languages)") + return() + endif() +endif() + +macro(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list _thread) + # This macro checks for the existence of the combination of fortran libraries + # given by _list. If the combination is found, this macro checks (using the + # Check_Fortran_Function_Exists macro) whether can link against that library + # combination using the name of a routine given by _name using the linker + # flags given by _flags. If the combination of libraries is found and passes + # the link test, LIBRARIES is set to the list of complete library paths that + # have been found. Otherwise, LIBRARIES is set to FALSE. + + # N.B. _prefix is the prefix applied to the names of all cached variables that + # are generated internally and marked advanced by this macro. + + set(_libdir ${ARGN}) + + set(_libraries_work TRUE) + set(${LIBRARIES}) set(_combined_name) + set(ENV_MKLROOT "$ENV{MKLROOT}") + set(ENV_BLAS_DIR "$ENV{BLAS_DIR}") + set(ENV_BLAS_LIBDIR "$ENV{BLAS_LIBDIR}") + if (NOT _libdir) + if (BLAS_LIBDIR) + list(APPEND _libdir "${BLAS_LIBDIR}") + elseif (BLAS_DIR) + list(APPEND _libdir "${BLAS_DIR}") + list(APPEND _libdir "${BLAS_DIR}/lib") + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + list(APPEND _libdir "${BLAS_DIR}/lib64") + list(APPEND _libdir "${BLAS_DIR}/lib/intel64") + else() + list(APPEND _libdir "${BLAS_DIR}/lib32") + list(APPEND _libdir "${BLAS_DIR}/lib/ia32") + endif() + elseif(ENV_BLAS_LIBDIR) + list(APPEND _libdir "${ENV_BLAS_LIBDIR}") + elseif(ENV_BLAS_DIR) + list(APPEND _libdir "${ENV_BLAS_DIR}") + list(APPEND _libdir "${ENV_BLAS_DIR}/lib") + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + list(APPEND _libdir "${ENV_BLAS_DIR}/lib64") + list(APPEND _libdir "${ENV_BLAS_DIR}/lib/intel64") + else() + list(APPEND _libdir "${ENV_BLAS_DIR}/lib32") + list(APPEND _libdir "${ENV_BLAS_DIR}/lib/ia32") + endif() + else() + if (ENV_MKLROOT) + list(APPEND _libdir "${ENV_MKLROOT}/lib") + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + list(APPEND _libdir "${ENV_MKLROOT}/lib64") + list(APPEND _libdir "${ENV_MKLROOT}/lib/intel64") + else() + list(APPEND _libdir "${ENV_MKLROOT}/lib32") + list(APPEND _libdir "${ENV_MKLROOT}/lib/ia32") + endif() + endif() + if (WIN32) + string(REPLACE ":" ";" _libdir2 "$ENV{LIB}") + elseif (APPLE) + string(REPLACE ":" ";" _libdir2 "$ENV{DYLD_LIBRARY_PATH}") + else () + string(REPLACE ":" ";" _libdir2 "$ENV{LD_LIBRARY_PATH}") + endif () + list(APPEND _libdir "${_libdir2}") + list(APPEND _libdir "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + endif () + + if (BLAS_VERBOSE) + message("${Cyan}Try to find BLAS libraries: ${_list}") + endif () + foreach(_library ${_list}) set(_combined_name ${_combined_name}_${_library}) - if(_libraries_found) - # search first in ${_path} - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS ${_path} NO_DEFAULT_PATH - ) - # if not found, search in environment variables and system - if ( WIN32 ) - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS ENV LIB - ) - elseif ( APPLE ) - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV DYLD_LIBRARY_PATH - ) + if(_libraries_work) + if (BLA_STATIC) + if (WIN32) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif () + if (APPLE) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES}) + else () + set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif () else () - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV LD_LIBRARY_PATH - ) - endif() + if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + # for ubuntu's libblas3gf and liblapack3gf packages + set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf) + endif () + endif () + find_library(${_prefix}_${_library}_LIBRARY + NAMES ${_library} + HINTS ${_libdir} + NO_DEFAULT_PATH + ) mark_as_advanced(${_prefix}_${_library}_LIBRARY) + # Print status if not found + # ------------------------- + if (NOT ${_prefix}_${_library}_LIBRARY AND NOT BLAS_FIND_QUIETLY AND BLAS_VERBOSE) + Print_Find_Library_Blas_Status(blas ${_library} ${_libdir}) + endif () set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) - set(_libraries_found ${${_prefix}_${_library}_LIBRARY}) - endif(_libraries_found) + set(_libraries_work ${${_prefix}_${_library}_LIBRARY}) + endif(_libraries_work) endforeach(_library ${_list}) - if(_libraries_found) - set(_libraries_found ${${LIBRARIES}}) - endif() - - # Test this combination of libraries with the Fortran/f2c interface. - # We test the Fortran interface first as it is well standardized. - if(_libraries_found AND NOT _libraries_work) - set(${DEFINITIONS} "-D${_prefix}_USE_F2C") - set(${LIBRARIES} ${_libraries_found}) - # Some C++ linkers require the f2c library to link with Fortran libraries. - # I do not know which ones, thus I just add the f2c library if it is available. - find_package( F2C QUIET ) - if ( F2C_FOUND ) - set(${DEFINITIONS} ${${DEFINITIONS}} ${F2C_DEFINITIONS}) - set(${LIBRARIES} ${${LIBRARIES}} ${F2C_LIBRARIES}) + + if(_libraries_work) + # Test this combination of libraries. + if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND BLA_STATIC) + list(INSERT ${LIBRARIES} 0 "-Wl,--start-group") + list(APPEND ${LIBRARIES} "-Wl,--end-group") + endif() + set(CMAKE_REQUIRED_LIBRARIES "${_flags};${${LIBRARIES}};${_thread}") + set(CMAKE_REQUIRED_FLAGS "${BLAS_COMPILER_FLAGS}") + if (BLAS_VERBOSE) + message("${Cyan}BLAS libs found for BLA_VENDOR ${BLA_VENDOR}." + "Try to compile symbol ${_name} with following libraries:" + "${CMAKE_REQUIRED_LIBRARIES}") + endif () + if(NOT BLAS_FOUND) + unset(${_prefix}${_combined_name}_WORKS CACHE) + endif() + if (_CHECK_FORTRAN) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + string(REPLACE "mkl_intel_lp64" "mkl_gf_lp64" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + string(REPLACE "mkl_intel_ilp64" "mkl_gf_ilp64" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + endif() + check_fortran_function_exists("${_name}" ${_prefix}${_combined_name}_WORKS) + else() + check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) endif() - set(CMAKE_REQUIRED_DEFINITIONS ${${DEFINITIONS}}) - set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}}) - #message("DEBUG: CMAKE_REQUIRED_DEFINITIONS = ${CMAKE_REQUIRED_DEFINITIONS}") - #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}") - # Check if function exists with f2c calling convention (ie a trailing underscore) - check_function_exists(${_name}_ ${_prefix}_${_name}_${_combined_name}_f2c_WORKS) - set(CMAKE_REQUIRED_DEFINITIONS} "") - set(CMAKE_REQUIRED_LIBRARIES "") - mark_as_advanced(${_prefix}_${_name}_${_combined_name}_f2c_WORKS) - set(_libraries_work ${${_prefix}_${_name}_${_combined_name}_f2c_WORKS}) - endif(_libraries_found AND NOT _libraries_work) - - # If not found, test this combination of libraries with a C interface. - # A few implementations (ie ACML) provide a C interface. Unfortunately, there is no standard. - if(_libraries_found AND NOT _libraries_work) - set(${DEFINITIONS} "") - set(${LIBRARIES} ${_libraries_found}) - set(CMAKE_REQUIRED_DEFINITIONS "") - set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}}) - #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}") - check_function_exists(${_name} ${_prefix}_${_name}${_combined_name}_WORKS) - set(CMAKE_REQUIRED_LIBRARIES "") - mark_as_advanced(${_prefix}_${_name}${_combined_name}_WORKS) - set(_libraries_work ${${_prefix}_${_name}${_combined_name}_WORKS}) - endif(_libraries_found AND NOT _libraries_work) - - # on failure - if(NOT _libraries_work) - set(${DEFINITIONS} "") - set(${LIBRARIES} FALSE) - endif() - #message("DEBUG: ${DEFINITIONS} = ${${DEFINITIONS}}") - #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}") -endmacro(check_fortran_libraries) + mark_as_advanced(${_prefix}${_combined_name}_WORKS) + set(_libraries_work ${${_prefix}${_combined_name}_WORKS}) + # Print status if not found + # ------------------------- + if (NOT _libraries_work AND NOT BLAS_FIND_QUIETLY AND BLAS_VERBOSE) + Print_Find_Library_Blas_CheckFunc_Status(${_name} ${CMAKE_REQUIRED_LIBRARIES}) + endif () + set(CMAKE_REQUIRED_LIBRARIES) + endif() + if(_libraries_work) + set(${LIBRARIES} ${${LIBRARIES}} ${_thread}) + else(_libraries_work) + set(${LIBRARIES} FALSE) + endif(_libraries_work) -# -# main -# +endmacro(Check_Fortran_Libraries) -# Is it already configured? -if (BLAS_LIBRARIES_DIR OR BLAS_LIBRARIES) - set(BLAS_FOUND TRUE) +set(BLAS_LINKER_FLAGS) +set(BLAS_LIBRARIES) +set(BLAS95_LIBRARIES) +if ($ENV{BLA_VENDOR} MATCHES ".+") + set(BLA_VENDOR $ENV{BLA_VENDOR}) +else () + if(NOT BLA_VENDOR) + set(BLA_VENDOR "All") + endif() +endif () -else() +#BLAS in intel mkl 10 library? (em64t 64bit) +if (BLA_VENDOR MATCHES "Intel*" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES OR BLA_VENDOR MATCHES "Intel*") + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + set(ENV_MKLROOT "$ENV{MKLROOT}") + set(ENV_BLAS_DIR "$ENV{BLAS_DIR}") + set(ENV_BLAS_INCDIR "$ENV{BLAS_INCDIR}") + if(ENV_BLAS_INCDIR) + list(APPEND _inc_env "${ENV_BLAS_INCDIR}") + elseif(ENV_BLAS_DIR) + list(APPEND _inc_env "${ENV_BLAS_DIR}") + list(APPEND _inc_env "${ENV_BLAS_DIR}/include") + else() + if (ENV_MKLROOT) + list(APPEND _inc_env "${ENV_MKLROOT}/include") + endif() + # system variables + if(WIN32) + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) - # reset variables - set( BLAS_INCLUDE_DIR "" ) - set( BLAS_DEFINITIONS "" ) - set( BLAS_LINKER_FLAGS "" ) - set( BLAS_LIBRARIES "" ) - set( BLAS_LIBRARIES_DIR "" ) + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_inc_env}") - # - # If Unix, search for BLAS function in possible libraries - # + # Try to find the fftw header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(BLAS_INCDIR) + set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_DIRS + NAMES mkl.h + HINTS ${BLAS_INCDIR}) + else() + if(BLAS_DIR) + set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_DIRS + NAMES mkl.h + HINTS ${BLAS_DIR} + PATH_SUFFIXES "include") + else() + set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_DIRS + NAMES mkl.h + HINTS ${PATH_TO_LOOK_FOR}) + endif() + endif() + mark_as_advanced(BLAS_mkl.h_DIRS) - # BLAS in ATLAS library? (http://math-atlas.sourceforge.net/) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + # If found, add path to cmake variable + # ------------------------------------ + if (BLAS_mkl.h_DIRS) + set(BLAS_INCLUDE_DIRS "${BLAS_mkl.h_DIRS}") + else () + set(BLAS_INCLUDE_DIRS "BLAS_INCLUDE_DIRS-NOTFOUND") + if(NOT BLAS_FIND_QUIETLY) + message(STATUS "Looking for BLAS -- mkl.h not found") + endif() + endif() + + if (WIN32) + string(REPLACE ":" ";" _libdir "$ENV{LIB}") + elseif (APPLE) + string(REPLACE ":" ";" _libdir "$ENV{DYLD_LIBRARY_PATH}") + else () + string(REPLACE ":" ";" _libdir "$ENV{LD_LIBRARY_PATH}") + endif () + list(APPEND _libdir "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + # libiomp5 + # -------- + set(OMP_iomp5_LIBRARY "OMP_iomp5_LIBRARY-NOTFOUND") + find_library(OMP_iomp5_LIBRARY + NAMES iomp5 + HINTS ${_libdir} + ) + mark_as_advanced(OMP_iomp5_LIBRARY) + set(OMP_LIB "") + # libgomp + # ------- + set(OMP_gomp_LIBRARY "OMP_gomp_LIBRARY-NOTFOUND") + find_library(OMP_gomp_LIBRARY + NAMES gomp + HINTS ${_libdir} + ) + mark_as_advanced(OMP_gomp_LIBRARY) + # choose one or another depending on the compilo + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + if (OMP_gomp_LIBRARY) + set(OMP_LIB "${OMP_gomp_LIBRARY}") + endif() + else(CMAKE_C_COMPILER_ID STREQUAL "Intel") + if (OMP_iomp5_LIBRARY) + set(OMP_LIB "${OMP_iomp5_LIBRARY}") + endif() + endif() + + if (UNIX AND NOT WIN32) + # m + find_library(M_LIBRARY + NAMES m + HINTS ${_libdir}) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + set(LM "-lm") + else() + set(LM "") + endif() + # Fortran + set(LGFORTRAN "") + if (CMAKE_C_COMPILER_ID MATCHES "GNU") + find_library( + FORTRAN_gfortran_LIBRARY + NAMES gfortran + HINTS ${_libdir} + ) + mark_as_advanced(FORTRAN_gfortran_LIBRARY) + if (FORTRAN_gfortran_LIBRARY) + set(LGFORTRAN "${FORTRAN_gfortran_LIBRARY}") + endif() + elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") + find_library( + FORTRAN_ifcore_LIBRARY + NAMES ifcore + HINTS ${_libdir} + ) + mark_as_advanced(FORTRAN_ifcore_LIBRARY) + if (FORTRAN_ifcore_LIBRARY) + set(LGFORTRAN "{FORTRAN_ifcore_LIBRARY}") + endif() + endif() + set(BLAS_COMPILER_FLAGS "") + if (NOT BLA_VENDOR STREQUAL "Intel10_64lp_seq") + if (CMAKE_C_COMPILER_ID STREQUAL "Intel") + list(APPEND BLAS_COMPILER_FLAGS "-openmp") + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND BLAS_COMPILER_FLAGS "-fopenmp") + endif() + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + if (BLA_VENDOR STREQUAL "Intel10_32") + list(APPEND BLAS_COMPILER_FLAGS "-m32") + else() + list(APPEND BLAS_COMPILER_FLAGS "-m64") + endif() + if (NOT BLA_VENDOR STREQUAL "Intel10_64lp_seq") + list(APPEND OMP_LIB "-ldl") + endif() + if (ENV_MKLROOT) + list(APPEND BLAS_COMPILER_FLAGS "-I${ENV_MKLROOT}/include") + endif() + endif() + + set(additional_flags "") + if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(additional_flags "-Wl,--no-as-needed") + endif() + endif () + + if (_LANGUAGES_ MATCHES C OR _LANGUAGES_ MATCHES CXX) + if(BLAS_FIND_QUIETLY OR NOT BLAS_FIND_REQUIRED) + find_package(Threads) + else() + find_package(Threads REQUIRED) + endif() + + set(BLAS_SEARCH_LIBS "") + + if(BLA_F95) + + set(BLAS_mkl_SEARCH_SYMBOL SGEMM) + set(_LIBRARIES BLAS95_LIBRARIES) + if (WIN32) + if (BLA_STATIC) + set(BLAS_mkl_DLL_SUFFIX "") + else() + set(BLAS_mkl_DLL_SUFFIX "_dll") + endif() + + # Find the main file (32-bit or 64-bit) + set(BLAS_SEARCH_LIBS_WIN_MAIN "") + if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_blas95${BLAS_mkl_DLL_SUFFIX} mkl_intel_c${BLAS_mkl_DLL_SUFFIX}") + endif() + if (BLA_VENDOR STREQUAL "Intel10_64lp*" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_blas95_lp64${BLAS_mkl_DLL_SUFFIX} mkl_intel_lp64${BLAS_mkl_DLL_SUFFIX}") + endif () + + # Add threading/sequential libs + set(BLAS_SEARCH_LIBS_WIN_THREAD "") + if (BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "mkl_sequential${BLAS_mkl_DLL_SUFFIX}") + endif() + if (NOT BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + # mkl >= 10.3 + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + endif() + + # Cartesian product of the above + foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN}) + foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD}) + list(APPEND BLAS_SEARCH_LIBS + "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}") + endforeach() + endforeach() + else (WIN32) + if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95 mkl_intel mkl_intel_thread mkl_core guide") + endif () + if (BLA_VENDOR STREQUAL "Intel10_64lp" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95 mkl_intel_lp64 mkl_intel_thread mkl_core guide") + # mkl >= 10.3 + if (CMAKE_C_COMPILER_ID STREQUAL "Intel") + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95_lp64 mkl_intel_lp64 mkl_intel_thread mkl_core") + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95_lp64 mkl_intel_lp64 mkl_gnu_thread mkl_core") + endif() + endif () + if (BLA_VENDOR STREQUAL "Intel10_64lp_seq" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_sequential mkl_core") + if (BLA_VENDOR STREQUAL "Intel10_64lp_seq") + set(OMP_LIB "") + endif() + endif () + endif (WIN32) + + else (BLA_F95) + + set(BLAS_mkl_SEARCH_SYMBOL sgemm) + set(_LIBRARIES BLAS_LIBRARIES) + if (WIN32) + if (BLA_STATIC) + set(BLAS_mkl_DLL_SUFFIX "") + else() + set(BLAS_mkl_DLL_SUFFIX "_dll") + endif() + + # Find the main file (32-bit or 64-bit) + set(BLAS_SEARCH_LIBS_WIN_MAIN "") + if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_intel_c${BLAS_mkl_DLL_SUFFIX}") + endif() + if (BLA_VENDOR STREQUAL "Intel10_64lp*" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_intel_lp64${BLAS_mkl_DLL_SUFFIX}") + endif () + + # Add threading/sequential libs + set(BLAS_SEARCH_LIBS_WIN_THREAD "") + if (NOT BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + # mkl >= 10.3 + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + endif() + if (BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "mkl_sequential${BLAS_mkl_DLL_SUFFIX}") + endif() + + # Cartesian product of the above + foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN}) + foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD}) + list(APPEND BLAS_SEARCH_LIBS + "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}") + endforeach() + endforeach() + else (WIN32) + if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel mkl_intel_thread mkl_core guide") + endif () + if (BLA_VENDOR STREQUAL "Intel10_64lp" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_intel_thread mkl_core guide") + # mkl >= 10.3 + if (CMAKE_C_COMPILER_ID STREQUAL "Intel") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_intel_thread mkl_core") + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_gnu_thread mkl_core") + endif() + endif () + if (BLA_VENDOR STREQUAL "Intel10_64lp_seq" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_sequential mkl_core") + if (BLA_VENDOR STREQUAL "Intel10_64lp_seq") + set(OMP_LIB "") + endif() + endif () + #older vesions of intel mkl libs + if (BLA_VENDOR STREQUAL "Intel" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl") + list(APPEND BLAS_SEARCH_LIBS + "mkl_ia32") + list(APPEND BLAS_SEARCH_LIBS + "mkl_em64t") + endif () + endif (WIN32) + + endif (BLA_F95) + + foreach (IT ${BLAS_SEARCH_LIBS}) + string(REPLACE " " ";" SEARCH_LIBS ${IT}) + if (${_LIBRARIES}) + else () + check_fortran_libraries( + ${_LIBRARIES} + BLAS + ${BLAS_mkl_SEARCH_SYMBOL} + "${additional_flags}" + "${SEARCH_LIBS}" + "${OMP_LIB};${CMAKE_THREAD_LIBS_INIT};${LM}" + ) + if(_LIBRARIES) + set(BLAS_LINKER_FLAGS "${additional_flags}") + endif() + endif() + endforeach () + if(NOT BLAS_FIND_QUIETLY) + if(${_LIBRARIES}) + message(STATUS "Looking for MKL BLAS: found") + else() + message(STATUS "Looking for MKL BLAS: not found") + endif() + endif() + if (${_LIBRARIES} AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Intel MKL") + endif() + endif (_LANGUAGES_ MATCHES C OR _LANGUAGES_ MATCHES CXX) + endif(NOT BLAS_LIBRARIES OR BLA_VENDOR MATCHES "Intel*") +endif (BLA_VENDOR MATCHES "Intel*" OR BLA_VENDOR STREQUAL "All") + + +if (BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + # gotoblas (http://www.tacc.utexas.edu/tacc-projects/gotoblas2) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "cblas;f77blas;atlas" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "goto2" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Goto BLAS: found") + else() + message(STATUS "Looking for Goto BLAS: not found") + endif() endif() + endif() + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Goto") + endif() - # BLAS in PhiPACK libraries? (requires generic BLAS lib, too) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS +endif (BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All") + + +# OpenBlas +if (BLA_VENDOR STREQUAL "Open" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + # openblas (http://www.openblas.net/) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "sgemm;dgemm;blas" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "openblas" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Open BLAS: found") + else() + message(STATUS "Looking for Open BLAS: not found") + endif() endif() + endif() + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Openblas") + endif() - # BLAS in Alpha CXML library? - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS +endif (BLA_VENDOR STREQUAL "Open" OR BLA_VENDOR STREQUAL "All") + + +# EigenBlas +if (BLA_VENDOR STREQUAL "Eigen" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + # eigenblas (http://eigen.tuxfamily.org/index.php?title=Main_Page) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "cxml" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "eigen_blas" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + message(STATUS "Looking for Eigen BLAS: found") + else() + message(STATUS "Looking for Eigen BLAS: not found") + endif() endif() + endif() - # BLAS in Alpha DXML library? (now called CXML, see above) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if(NOT BLAS_LIBRARIES) + # eigenblas (http://eigen.tuxfamily.org/index.php?title=Main_Page) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "dxml" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "eigen_blas_static" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Eigen BLAS: found") + else() + message(STATUS "Looking for Eigen BLAS: not found") + endif() endif() + endif() + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Eigen") + endif() - # BLAS in Sun Performance library? - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS +endif (BLA_VENDOR STREQUAL "Eigen" OR BLA_VENDOR STREQUAL "All") + + +if (BLA_VENDOR STREQUAL "ATLAS" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + # BLAS in ATLAS library? (http://math-atlas.sourceforge.net/) + check_fortran_libraries( BLAS_LIBRARIES BLAS - sgemm - "-xlic_lib=sunperf" - "sunperf;sunmath" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + dgemm + "" + "f77blas;atlas" + "" ) + if(NOT BLAS_FIND_QUIETLY) if(BLAS_LIBRARIES) - # Extra linker flag - set(BLAS_LINKER_FLAGS "-xlic_lib=sunperf") + message(STATUS "Looking for Atlas BLAS: found") + else() + message(STATUS "Looking for Atlas BLAS: not found") endif() endif() + endif() - # BLAS in SCSL library? (SGI/Cray Scientific Library) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Atlas") + endif() + +endif (BLA_VENDOR STREQUAL "ATLAS" OR BLA_VENDOR STREQUAL "All") + + +# BLAS in PhiPACK libraries? (requires generic BLAS lib, too) +if (BLA_VENDOR STREQUAL "PhiPACK" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "scsl" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "sgemm;dgemm;blas" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for PhiPACK BLAS: found") + else() + message(STATUS "Looking for PhiPACK BLAS: not found") + endif() endif() + endif() - # BLAS in SGIMATH library? - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "PhiPACK") + endif() + +endif (BLA_VENDOR STREQUAL "PhiPACK" OR BLA_VENDOR STREQUAL "All") + + +# BLAS in Alpha CXML library? +if (BLA_VENDOR STREQUAL "CXML" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "complib.sgimath" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "cxml" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for CXML BLAS: found") + else() + message(STATUS "Looking for CXML BLAS: not found") + endif() endif() + endif() - # BLAS in IBM ESSL library? (requires generic BLAS lib, too) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "CXML") + endif() + +endif (BLA_VENDOR STREQUAL "CXML" OR BLA_VENDOR STREQUAL "All") + + +# BLAS in Alpha DXML library? (now called CXML, see above) +if (BLA_VENDOR STREQUAL "DXML" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "essl;blas" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "dxml" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for DXML BLAS: found") + else() + message(STATUS "Looking for DXML BLAS: not found") + endif() endif() + endif() - #BLAS in intel mkl 10 library? (em64t 64bit) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "DXML") + endif() + +endif (BLA_VENDOR STREQUAL "DXML" OR BLA_VENDOR STREQUAL "All") + + +# BLAS in Sun Performance library? +if (BLA_VENDOR STREQUAL "SunPerf" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm + "-xlic_lib=sunperf" + "sunperf;sunmath" "" - "mkl_intel_lp64;mkl_intel_thread;mkl_core;guide;pthread" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" ) + if(BLAS_LIBRARIES) + set(BLAS_LINKER_FLAGS "-xlic_lib=sunperf") endif() + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for SunPerf BLAS: found") + else() + message(STATUS "Looking for SunPerf BLAS: not found") + endif() + endif() + endif() - ### windows version of intel mkl 10? - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "SunPerf") + endif() + +endif () + + +# BLAS in SCSL library? (SGI/Cray Scientific Library) +if (BLA_VENDOR STREQUAL "SCSL" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS - SGEMM + sgemm + "" + "scsl" "" - "mkl_c_dll;mkl_intel_thread_dll;mkl_core_dll;libguide40" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for SCSL BLAS: found") + else() + message(STATUS "Looking for SCSL BLAS: not found") + endif() endif() + endif() - #older versions of intel mkl libs + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "SunPerf") + endif() - # BLAS in intel mkl library? (shared) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS +endif () + + +# BLAS in SGIMATH library? +if (BLA_VENDOR STREQUAL "SGIMATH" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "mkl;guide;pthread" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "complib.sgimath" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for SGIMATH BLAS: found") + else() + message(STATUS "Looking for SGIMATH BLAS: not found") + endif() endif() + endif() - #BLAS in intel mkl library? (static, 32bit) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "SGIMATH") + endif() + +endif () + + +# BLAS in IBM ESSL library (requires generic BLAS lib, too) +if (BLA_VENDOR STREQUAL "IBMESSL" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "mkl_ia32;guide;pthread" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "essl;xlfmath;xlf90_r;blas" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for IBM ESSL BLAS: found") + else() + message(STATUS "Looking for IBM ESSL BLAS: not found") + endif() endif() + endif() - #BLAS in intel mkl library? (static, em64t 64bit) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "IBM ESSL") + endif() + +endif () + +# BLAS in IBM ESSL_MT library (requires generic BLAS lib, too) +if (BLA_VENDOR STREQUAL "IBMESSLMT" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "mkl_em64t;guide;pthread" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "esslsmp;xlsmp;xlfmath;xlf90_r;blas" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for IBM ESSL MT BLAS: found") + else() + message(STATUS "Looking for IBM ESSL MT BLAS: not found") + endif() endif() + endif() - #BLAS in acml library? - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "IBM ESSL MT") + endif() + +endif () + + +#BLAS in acml library? +if (BLA_VENDOR MATCHES "ACML.*" OR BLA_VENDOR STREQUAL "All") + + if( ((BLA_VENDOR STREQUAL "ACML") AND (NOT BLAS_ACML_LIB_DIRS)) OR + ((BLA_VENDOR STREQUAL "ACML_MP") AND (NOT BLAS_ACML_MP_LIB_DIRS)) OR + ((BLA_VENDOR STREQUAL "ACML_GPU") AND (NOT BLAS_ACML_GPU_LIB_DIRS))) + + # try to find acml in "standard" paths + if( WIN32 ) + file( GLOB _ACML_ROOT "C:/AMD/acml*/ACML-EULA.txt" ) + else() + file( GLOB _ACML_ROOT "/opt/acml*/ACML-EULA.txt" ) + endif() + if( WIN32 ) + file( GLOB _ACML_GPU_ROOT "C:/AMD/acml*/GPGPUexamples" ) + else() + file( GLOB _ACML_GPU_ROOT "/opt/acml*/GPGPUexamples" ) + endif() + list(GET _ACML_ROOT 0 _ACML_ROOT) + list(GET _ACML_GPU_ROOT 0 _ACML_GPU_ROOT) + + if( _ACML_ROOT ) + + get_filename_component( _ACML_ROOT ${_ACML_ROOT} PATH ) + if( SIZEOF_INTEGER EQUAL 8 ) + set( _ACML_PATH_SUFFIX "_int64" ) + else() + set( _ACML_PATH_SUFFIX "" ) + endif() + if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" ) + set( _ACML_COMPILER32 "ifort32" ) + set( _ACML_COMPILER64 "ifort64" ) + elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "SunPro" ) + set( _ACML_COMPILER32 "sun32" ) + set( _ACML_COMPILER64 "sun64" ) + elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" ) + set( _ACML_COMPILER32 "pgi32" ) + if( WIN32 ) + set( _ACML_COMPILER64 "win64" ) + else() + set( _ACML_COMPILER64 "pgi64" ) + endif() + elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "Open64" ) + # 32 bit builds not supported on Open64 but for code simplicity + # We'll just use the same directory twice + set( _ACML_COMPILER32 "open64_64" ) + set( _ACML_COMPILER64 "open64_64" ) + elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "NAG" ) + set( _ACML_COMPILER32 "nag32" ) + set( _ACML_COMPILER64 "nag64" ) + else() + set( _ACML_COMPILER32 "gfortran32" ) + set( _ACML_COMPILER64 "gfortran64" ) + endif() + + if( BLA_VENDOR STREQUAL "ACML_MP" ) + set(_ACML_MP_LIB_DIRS + "${_ACML_ROOT}/${_ACML_COMPILER32}_mp${_ACML_PATH_SUFFIX}/lib" + "${_ACML_ROOT}/${_ACML_COMPILER64}_mp${_ACML_PATH_SUFFIX}/lib" ) + else() + set(_ACML_LIB_DIRS + "${_ACML_ROOT}/${_ACML_COMPILER32}${_ACML_PATH_SUFFIX}/lib" + "${_ACML_ROOT}/${_ACML_COMPILER64}${_ACML_PATH_SUFFIX}/lib" ) + endif() + + endif(_ACML_ROOT) + + elseif(BLAS_${BLA_VENDOR}_LIB_DIRS) + + set(_${BLA_VENDOR}_LIB_DIRS ${BLAS_${BLA_VENDOR}_LIB_DIRS}) + + endif() + + if( BLA_VENDOR STREQUAL "ACML_MP" ) + foreach( BLAS_ACML_MP_LIB_DIRS ${_ACML_MP_LIB_DIRS}) + check_fortran_libraries ( + BLAS_LIBRARIES + BLAS + sgemm + "" "acml_mp;acml_mv" "" ${BLAS_ACML_MP_LIB_DIRS} + ) + if( BLAS_LIBRARIES ) + break() + endif() + endforeach() + elseif( BLA_VENDOR STREQUAL "ACML_GPU" ) + foreach( BLAS_ACML_GPU_LIB_DIRS ${_ACML_GPU_LIB_DIRS}) + check_fortran_libraries ( + BLAS_LIBRARIES + BLAS + sgemm + "" "acml;acml_mv;CALBLAS" "" ${BLAS_ACML_GPU_LIB_DIRS} + ) + if( BLAS_LIBRARIES ) + break() + endif() + endforeach() + else() + foreach( BLAS_ACML_LIB_DIRS ${_ACML_LIB_DIRS} ) + check_fortran_libraries ( + BLAS_LIBRARIES + BLAS + sgemm + "" "acml;acml_mv" "" ${BLAS_ACML_LIB_DIRS} + ) + if( BLAS_LIBRARIES ) + break() + endif() + endforeach() + endif() + + # Either acml or acml_mp should be in LD_LIBRARY_PATH but not both + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "acml" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "acml;acml_mv" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for ACML BLAS: found") + else() + message(STATUS "Looking for ACML BLAS: not found") + endif() endif() + endif() - # Apple BLAS library? - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "Accelerate" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "acml_mp;acml_mv" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for ACML BLAS: found") + else() + message(STATUS "Looking for ACML BLAS: not found") + endif() endif() + endif() - if ( NOT BLAS_LIBRARIES ) - check_fortran_libraries( - BLAS_DEFINITIONS + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "vecLib" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "acml;acml_mv;CALBLAS" + "" ) - endif ( NOT BLAS_LIBRARIES ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for ACML BLAS: found") + else() + message(STATUS "Looking for ACML BLAS: not found") + endif() + endif() + endif() - # Generic BLAS library? - # This configuration *must* be the last try as this library is notably slow. - if ( NOT BLAS_LIBRARIES ) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "ACML") + endif() + +endif (BLA_VENDOR MATCHES "ACML.*" OR BLA_VENDOR STREQUAL "All") # ACML + + +# Apple BLAS library? +if (BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS - sgemm + dgemm + "" + "Accelerate" "" - "blas" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Apple BLAS: found") + else() + message(STATUS "Looking for Apple BLAS: not found") + endif() endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Apple Accelerate") + endif() + +endif (BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All") - if(BLAS_LIBRARIES_DIR OR BLAS_LIBRARIES) + +if (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All") + + if ( NOT BLAS_LIBRARIES ) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + dgemm + "" + "vecLib" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for NAS BLAS: found") + else() + message(STATUS "Looking for NAS BLAS: not found") + endif() + endif() + endif () + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "NAS") + endif() + +endif (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All") + + +# Generic BLAS library? +if (BLA_VENDOR STREQUAL "Generic" OR BLA_VENDOR STREQUAL "All") + + set(BLAS_SEARCH_LIBS "blas;blas_LINUX;blas_MAC;blas_WINDOWS;refblas") + foreach (SEARCH_LIB ${BLAS_SEARCH_LIBS}) + if (BLAS_LIBRARIES) + else () + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "${SEARCH_LIB}" + "${LGFORTRAN}" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Generic BLAS: found") + else() + message(STATUS "Looking for Generic BLAS: not found") + endif() + endif() + endif() + endforeach () + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Netlib or other Generic libblas") + endif() + +endif (BLA_VENDOR STREQUAL "Generic" OR BLA_VENDOR STREQUAL "All") + + +if(BLA_F95) + + if(BLAS95_LIBRARIES) + set(BLAS95_FOUND TRUE) + else() + set(BLAS95_FOUND FALSE) + endif() + + if(NOT BLAS_FIND_QUIETLY) + if(BLAS95_FOUND) + message(STATUS "A library with BLAS95 API found.") + message(STATUS "BLAS_LIBRARIES ${BLAS_LIBRARIES}") + else(BLAS95_FOUND) + message(WARNING "BLA_VENDOR has been set to ${BLA_VENDOR} but blas 95 libraries could not be found or check of symbols failed." + "\nPlease indicate where to find blas libraries. You have three options:\n" + "- Option 1: Provide the installation directory of BLAS library with cmake option: -DBLAS_DIR=your/path/to/blas\n" + "- Option 2: Provide the directory where to find BLAS libraries with cmake option: -DBLAS_LIBDIR=your/path/to/blas/libs\n" + "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" + "\nTo follow libraries detection more precisely you can activate a verbose mode with -DBLAS_VERBOSE=ON at cmake configure." + "\nYou could also specify a BLAS vendor to look for by setting -DBLA_VENDOR=blas_vendor_name." + "\nList of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, Intel10_32 (intel mkl v10 32 bit)," + "Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," + "Intel( older versions of mkl 32 and 64 bit), ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") + if(BLAS_FIND_REQUIRED) + message(FATAL_ERROR + "A required library with BLAS95 API not found. Please specify library location.") + else() + message(STATUS + "A library with BLAS95 API not found. Please specify library location.") + endif() + endif(BLAS95_FOUND) + endif(NOT BLAS_FIND_QUIETLY) + + set(BLAS_FOUND TRUE) + set(BLAS_LIBRARIES "${BLAS95_LIBRARIES}") + +else(BLA_F95) + + if(BLAS_LIBRARIES) set(BLAS_FOUND TRUE) else() set(BLAS_FOUND FALSE) @@ -388,32 +1366,41 @@ else() if(NOT BLAS_FIND_QUIETLY) if(BLAS_FOUND) message(STATUS "A library with BLAS API found.") + message(STATUS "BLAS_LIBRARIES ${BLAS_LIBRARIES}") else(BLAS_FOUND) + message(WARNING "BLA_VENDOR has been set to ${BLA_VENDOR} but blas libraries could not be found or check of symbols failed." + "\nPlease indicate where to find blas libraries. You have three options:\n" + "- Option 1: Provide the installation directory of BLAS library with cmake option: -DBLAS_DIR=your/path/to/blas\n" + "- Option 2: Provide the directory where to find BLAS libraries with cmake option: -DBLAS_LIBDIR=your/path/to/blas/libs\n" + "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" + "\nTo follow libraries detection more precisely you can activate a verbose mode with -DBLAS_VERBOSE=ON at cmake configure." + "\nYou could also specify a BLAS vendor to look for by setting -DBLA_VENDOR=blas_vendor_name." + "\nList of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, Intel10_32 (intel mkl v10 32 bit)," + "Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," + "Intel( older versions of mkl 32 and 64 bit), ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") if(BLAS_FIND_REQUIRED) - message(FATAL_ERROR "A required library with BLAS API not found. Please specify library location.") + message(FATAL_ERROR + "A required library with BLAS API not found. Please specify library location.") else() - message(STATUS "A library with BLAS API not found. Please specify library location.") + message(STATUS + "A library with BLAS API not found. Please specify library location.") endif() endif(BLAS_FOUND) endif(NOT BLAS_FIND_QUIETLY) - # Add variables to cache - set( BLAS_INCLUDE_DIR "${BLAS_INCLUDE_DIR}" - CACHE PATH "Directories containing the BLAS header files" FORCE ) - set( BLAS_DEFINITIONS "${BLAS_DEFINITIONS}" - CACHE STRING "Compilation options to use BLAS" FORCE ) - set( BLAS_LINKER_FLAGS "${BLAS_LINKER_FLAGS}" - CACHE STRING "Linker flags to use BLAS" FORCE ) - set( BLAS_LIBRARIES "${BLAS_LIBRARIES}" - CACHE FILEPATH "BLAS libraries name" FORCE ) - set( BLAS_LIBRARIES_DIR "${BLAS_LIBRARIES_DIR}" - CACHE PATH "Directories containing the BLAS libraries" FORCE ) - - #message("DEBUG: BLAS_INCLUDE_DIR = ${BLAS_INCLUDE_DIR}") - #message("DEBUG: BLAS_DEFINITIONS = ${BLAS_DEFINITIONS}") - #message("DEBUG: BLAS_LINKER_FLAGS = ${BLAS_LINKER_FLAGS}") - #message("DEBUG: BLAS_LIBRARIES = ${BLAS_LIBRARIES}") - #message("DEBUG: BLAS_LIBRARIES_DIR = ${BLAS_LIBRARIES_DIR}") - #message("DEBUG: BLAS_FOUND = ${BLAS_FOUND}") - -endif(BLAS_LIBRARIES_DIR OR BLAS_LIBRARIES) +endif(BLA_F95) + +set(CMAKE_FIND_LIBRARY_SUFFIXES ${_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) + +if (BLAS_FOUND) + list(GET BLAS_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)") + string(REGEX REPLACE "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)" "" not_cached_dir "${first_lib_path}") + set(BLAS_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of BLAS library" FORCE) + else() + set(BLAS_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of BLAS library" FORCE) + endif() +endif() +mark_as_advanced(BLAS_DIR) +mark_as_advanced(BLAS_DIR_FOUND) diff --git a/cmake/FindBLASEXT.cmake b/cmake/FindBLASEXT.cmake new file mode 100644 index 000000000..0fe7fb849 --- /dev/null +++ b/cmake/FindBLASEXT.cmake @@ -0,0 +1,380 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find BLAS EXTENDED for MORSE projects: find include dirs and libraries +# +# This module allows to find BLAS libraries by calling the official FindBLAS module +# and handles the creation of different library lists whether the user wishes to link +# with a sequential BLAS or a multihreaded (BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES). +# BLAS is detected with a FindBLAS call then if the BLAS vendor is Intel10_64lp, ACML +# or IBMESSLMT then the module attempts to find the corresponding multithreaded libraries. +# +# The following variables have been added to manage links with sequential or multithreaded +# versions: +# BLAS_INCLUDE_DIRS - BLAS include directories +# BLAS_LIBRARY_DIRS - Link directories for BLAS libraries +# BLAS_SEQ_LIBRARIES - BLAS component libraries to be linked (sequential) +# BLAS_PAR_LIBRARIES - BLAS component libraries to be linked (multithreaded) + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2016 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +# macro to factorize this call +macro(find_package_blas) + if(BLASEXT_FIND_REQUIRED) + if(BLASEXT_FIND_QUIETLY) + find_package(BLAS REQUIRED QUIET) + else() + find_package(BLAS REQUIRED) + endif() + else() + if(BLASEXT_FIND_QUIETLY) + find_package(BLAS QUIET) + else() + find_package(BLAS) + endif() + endif() +endmacro() + +# add a cache variable to let the user specify the BLAS vendor +set(BLA_VENDOR "" CACHE STRING "list of possible BLAS vendor: + Open, Eigen, Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT, + Intel10_32 (intel mkl v10 32 bit), + Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), + Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model), + Intel( older versions of mkl 32 and 64 bit), + ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") + +if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "In FindBLASEXT") + message(STATUS "If you want to force the use of one specific library, " + "\n please specify the BLAS vendor by setting -DBLA_VENDOR=blas_vendor_name" + "\n at cmake configure.") + message(STATUS "List of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, " + "\n DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT, Intel10_32 (intel mkl v10 32 bit)," + "\n Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model)," + "\n Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," + "\n Intel( older versions of mkl 32 and 64 bit)," + "\n ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") +endif() + +if (NOT BLAS_FOUND) + # First try to detect two cases: + # 1: only SEQ libs are handled + # 2: both SEQ and PAR libs are handled + find_package_blas() +endif () + +# detect the cases where SEQ and PAR libs are handled +if(BLA_VENDOR STREQUAL "All" AND + (BLAS_mkl_core_LIBRARY OR BLAS_mkl_core_dll_LIBRARY) + ) + set(BLA_VENDOR "Intel") + if(BLAS_mkl_intel_LIBRARY) + set(BLA_VENDOR "Intel10_32") + endif() + if(BLAS_mkl_intel_lp64_LIBRARY) + set(BLA_VENDOR "Intel10_64lp") + endif() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the MKL." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +elseif(BLA_VENDOR STREQUAL "All" AND BLAS_acml_LIBRARY) + set(BLA_VENDOR "ACML") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the ACML." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +elseif(BLA_VENDOR STREQUAL "All" AND BLAS_essl_LIBRARY) + set(BLA_VENDOR "IBMESSL") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the ESSL." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +endif() + +# Intel case +if(BLA_VENDOR MATCHES "Intel*") + + ### + # look for include path if the BLAS vendor is Intel + ### + + # gather system include paths + unset(_inc_env) + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + set(ENV_MKLROOT "$ENV{MKLROOT}") + if (ENV_MKLROOT) + list(APPEND _inc_env "${ENV_MKLROOT}/include") + endif() + list(REMOVE_DUPLICATES _inc_env) + + # find mkl.h inside known include paths + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + if(BLAS_INCDIR) + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${BLAS_INCDIR}) + else() + if(BLAS_DIR) + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${BLAS_DIR} + PATH_SUFFIXES include) + else() + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${_inc_env}) + endif() + endif() + mark_as_advanced(BLAS_mkl.h_INCLUDE_DIRS) + ## Print status if not found + ## ------------------------- + #if (NOT BLAS_mkl.h_INCLUDE_DIRS AND MORSE_VERBOSE) + # Print_Find_Header_Status(blas mkl.h) + #endif () + set(BLAS_INCLUDE_DIRS "") + if(BLAS_mkl.h_INCLUDE_DIRS) + list(APPEND BLAS_INCLUDE_DIRS "${BLAS_mkl.h_INCLUDE_DIRS}" ) + endif() + + ### + # look for libs + ### + # if Intel 10 64 bit -> look for sequential and multithreaded versions + if(BLA_VENDOR MATCHES "Intel10_64lp*") + + ## look for the sequential version + set(BLA_VENDOR "Intel10_64lp_seq") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "Look for the sequential version Intel10_64lp_seq") + endif() + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "Intel10_64lp") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "Look for the multithreaded version Intel10_64lp") + endif() + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + + else() + + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + endif() + + # ACML case +elseif(BLA_VENDOR MATCHES "ACML*") + + ## look for the sequential version + set(BLA_VENDOR "ACML") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "ACML_MP") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + + # IBMESSL case +elseif(BLA_VENDOR MATCHES "IBMESSL*") + + ## look for the sequential version + set(BLA_VENDOR "IBMESSL") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "IBMESSLMT") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + +else() + + if(BLAS_FOUND) + # define the SEQ libs as the BLAS_LIBRARIES + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + +endif() + + +if(BLAS_SEQ_LIBRARIES) + set(BLAS_LIBRARIES "${BLAS_SEQ_LIBRARIES}") +endif() + +# extract libs paths +# remark: because it is not given by find_package(BLAS) +set(BLAS_LIBRARY_DIRS "") +string(REPLACE " " ";" BLAS_LIBRARIES "${BLAS_LIBRARIES}") +foreach(blas_lib ${BLAS_LIBRARIES}) + if (EXISTS "${blas_lib}") + get_filename_component(a_blas_lib_dir "${blas_lib}" PATH) + list(APPEND BLAS_LIBRARY_DIRS "${a_blas_lib_dir}" ) + else() + string(REPLACE "-L" "" blas_lib "${blas_lib}") + if (EXISTS "${blas_lib}") + list(APPEND BLAS_LIBRARY_DIRS "${blas_lib}" ) + else() + get_filename_component(a_blas_lib_dir "${blas_lib}" PATH) + if (EXISTS "${a_blas_lib_dir}") + list(APPEND BLAS_LIBRARY_DIRS "${a_blas_lib_dir}" ) + endif() + endif() + endif() +endforeach() +if (BLAS_LIBRARY_DIRS) + list(REMOVE_DUPLICATES BLAS_LIBRARY_DIRS) +endif () + +# check that BLAS has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +if(BLA_VENDOR MATCHES "Intel*") + if(BLA_VENDOR MATCHES "Intel10_64lp*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is Intel MKL:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS + BLAS_INCLUDE_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() + else() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS + BLAS_INCLUDE_DIRS) + endif() +elseif(BLA_VENDOR MATCHES "ACML*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is ACML:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() +elseif(BLA_VENDOR MATCHES "IBMESSL*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is ESSL:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() +else() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) +endif() diff --git a/cmake/FindHWLOC.cmake b/cmake/FindHWLOC.cmake new file mode 100644 index 000000000..a831b5c72 --- /dev/null +++ b/cmake/FindHWLOC.cmake @@ -0,0 +1,331 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find HWLOC include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(HWLOC +# [REQUIRED]) # Fail with error if hwloc is not found +# +# This module finds headers and hwloc library. +# Results are reported in variables: +# HWLOC_FOUND - True if headers and requested libraries were found +# HWLOC_INCLUDE_DIRS - hwloc include directories +# HWLOC_LIBRARY_DIRS - Link directories for hwloc libraries +# HWLOC_LIBRARIES - hwloc component libraries to be linked +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DHWLOC_DIR=path/to/hwloc): +# HWLOC_DIR - Where to find the base directory of hwloc +# HWLOC_INCDIR - Where to find the header files +# HWLOC_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: HWLOC_DIR, HWLOC_INCDIR, HWLOC_LIBDIR + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +include(CheckStructHasMember) +include(CheckCSourceCompiles) + +if (NOT HWLOC_FOUND) + set(HWLOC_DIR "" CACHE PATH "Installation directory of HWLOC library") + if (NOT HWLOC_FIND_QUIETLY) + message(STATUS "A cache variable, namely HWLOC_DIR, has been set to specify the install directory of HWLOC") + endif() +endif() + +set(ENV_HWLOC_DIR "$ENV{HWLOC_DIR}") +set(ENV_HWLOC_INCDIR "$ENV{HWLOC_INCDIR}") +set(ENV_HWLOC_LIBDIR "$ENV{HWLOC_LIBDIR}") +set(HWLOC_GIVEN_BY_USER "FALSE") +if ( HWLOC_DIR OR ( HWLOC_INCDIR AND HWLOC_LIBDIR) OR ENV_HWLOC_DIR OR (ENV_HWLOC_INCDIR AND ENV_HWLOC_LIBDIR) ) + set(HWLOC_GIVEN_BY_USER "TRUE") +endif() + +# Optionally use pkg-config to detect include/library dirs (if pkg-config is available) +# ------------------------------------------------------------------------------------- +include(FindPkgConfig) +find_package(PkgConfig QUIET) +if( PKG_CONFIG_EXECUTABLE AND NOT HWLOC_GIVEN_BY_USER ) + + pkg_search_module(HWLOC hwloc) + if (NOT HWLOC_FIND_QUIETLY) + if (HWLOC_FOUND AND HWLOC_LIBRARIES) + message(STATUS "Looking for HWLOC - found using PkgConfig") + #if(NOT HWLOC_INCLUDE_DIRS) + # message("${Magenta}HWLOC_INCLUDE_DIRS is empty using PkgConfig." + # "Perhaps the path to hwloc headers is already present in your" + # "C(PLUS)_INCLUDE_PATH environment variable.${ColourReset}") + #endif() + else() + message(STATUS "${Magenta}Looking for HWLOC - not found using PkgConfig." + "\n Perhaps you should add the directory containing hwloc.pc to" + "\n the PKG_CONFIG_PATH environment variable.${ColourReset}") + endif() + endif() + +endif( PKG_CONFIG_EXECUTABLE AND NOT HWLOC_GIVEN_BY_USER ) + +if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT HWLOC_FOUND) OR (HWLOC_GIVEN_BY_USER) ) + + if (NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for HWLOC - PkgConfig not used") + endif() + + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + if(ENV_HWLOC_INCDIR) + list(APPEND _inc_env "${ENV_HWLOC_INCDIR}") + elseif(ENV_HWLOC_DIR) + list(APPEND _inc_env "${ENV_HWLOC_DIR}") + list(APPEND _inc_env "${ENV_HWLOC_DIR}/include") + list(APPEND _inc_env "${ENV_HWLOC_DIR}/include/hwloc") + else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) + + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_inc_env}") + + # Try to find the hwloc header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(HWLOC_INCDIR) + set(HWLOC_hwloc.h_DIRS "HWLOC_hwloc.h_DIRS-NOTFOUND") + find_path(HWLOC_hwloc.h_DIRS + NAMES hwloc.h + HINTS ${HWLOC_INCDIR}) + else() + if(HWLOC_DIR) + set(HWLOC_hwloc.h_DIRS "HWLOC_hwloc.h_DIRS-NOTFOUND") + find_path(HWLOC_hwloc.h_DIRS + NAMES hwloc.h + HINTS ${HWLOC_DIR} + PATH_SUFFIXES "include" "include/hwloc") + else() + set(HWLOC_hwloc.h_DIRS "HWLOC_hwloc.h_DIRS-NOTFOUND") + find_path(HWLOC_hwloc.h_DIRS + NAMES hwloc.h + HINTS ${PATH_TO_LOOK_FOR} + PATH_SUFFIXES "hwloc") + endif() + endif() + mark_as_advanced(HWLOC_hwloc.h_DIRS) + + # Add path to cmake variable + # ------------------------------------ + if (HWLOC_hwloc.h_DIRS) + set(HWLOC_INCLUDE_DIRS "${HWLOC_hwloc.h_DIRS}") + else () + set(HWLOC_INCLUDE_DIRS "HWLOC_INCLUDE_DIRS-NOTFOUND") + if(NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for hwloc -- hwloc.h not found") + endif() + endif () + + if (HWLOC_INCLUDE_DIRS) + list(REMOVE_DUPLICATES HWLOC_INCLUDE_DIRS) + endif () + + + # Looking for lib + # --------------- + + # Add system library paths to search lib + # -------------------------------------- + unset(_lib_env) + if(ENV_HWLOC_LIBDIR) + list(APPEND _lib_env "${ENV_HWLOC_LIBDIR}") + elseif(ENV_HWLOC_DIR) + list(APPEND _lib_env "${ENV_HWLOC_DIR}") + list(APPEND _lib_env "${ENV_HWLOC_DIR}/lib") + else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + endif() + list(REMOVE_DUPLICATES _lib_env) + + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_lib_env}") + + # Try to find the hwloc lib in the given paths + # ---------------------------------------------- + + # call cmake macro to find the lib path + if(HWLOC_LIBDIR) + set(HWLOC_hwloc_LIBRARY "HWLOC_hwloc_LIBRARY-NOTFOUND") + find_library(HWLOC_hwloc_LIBRARY + NAMES hwloc + HINTS ${HWLOC_LIBDIR}) + else() + if(HWLOC_DIR) + set(HWLOC_hwloc_LIBRARY "HWLOC_hwloc_LIBRARY-NOTFOUND") + find_library(HWLOC_hwloc_LIBRARY + NAMES hwloc + HINTS ${HWLOC_DIR} + PATH_SUFFIXES lib lib32 lib64) + else() + set(HWLOC_hwloc_LIBRARY "HWLOC_hwloc_LIBRARY-NOTFOUND") + find_library(HWLOC_hwloc_LIBRARY + NAMES hwloc + HINTS ${PATH_TO_LOOK_FOR}) + endif() + endif() + mark_as_advanced(HWLOC_hwloc_LIBRARY) + + # If found, add path to cmake variable + # ------------------------------------ + if (HWLOC_hwloc_LIBRARY) + get_filename_component(hwloc_lib_path ${HWLOC_hwloc_LIBRARY} PATH) + # set cmake variables (respects naming convention) + set(HWLOC_LIBRARIES "${HWLOC_hwloc_LIBRARY}") + set(HWLOC_LIBRARY_DIRS "${hwloc_lib_path}") + else () + set(HWLOC_LIBRARIES "HWLOC_LIBRARIES-NOTFOUND") + set(HWLOC_LIBRARY_DIRS "HWLOC_LIBRARY_DIRS-NOTFOUND") + if(NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for hwloc -- lib hwloc not found") + endif() + endif () + + if (HWLOC_LIBRARY_DIRS) + list(REMOVE_DUPLICATES HWLOC_LIBRARY_DIRS) + endif () + + # check a function to validate the find + if(HWLOC_LIBRARIES) + + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # HWLOC + if (HWLOC_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${HWLOC_INCLUDE_DIRS}") + endif() + if (HWLOC_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${HWLOC_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${HWLOC_LIBRARIES}") + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(HWLOC_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(hwloc_topology_init HWLOC_WORKS) + mark_as_advanced(HWLOC_WORKS) + + if(NOT HWLOC_WORKS) + if(NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for hwloc : test of hwloc_topology_init with hwloc library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) + endif(HWLOC_LIBRARIES) + +endif( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT HWLOC_FOUND) OR (HWLOC_GIVEN_BY_USER) ) + +if (HWLOC_LIBRARIES) + if (HWLOC_LIBRARY_DIRS) + list(GET HWLOC_LIBRARY_DIRS 0 first_lib_path) + else() + list(GET HWLOC_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + endif() + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(HWLOC_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of HWLOC library" FORCE) + else() + set(HWLOC_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of HWLOC library" FORCE) + endif() +endif() +mark_as_advanced(HWLOC_DIR) +mark_as_advanced(HWLOC_DIR_FOUND) + +# check that HWLOC has been found +# ------------------------------- +include(FindPackageHandleStandardArgs) +if (PKG_CONFIG_EXECUTABLE AND HWLOC_FOUND) + find_package_handle_standard_args(HWLOC DEFAULT_MSG + HWLOC_LIBRARIES) +else() + find_package_handle_standard_args(HWLOC DEFAULT_MSG + HWLOC_LIBRARIES + HWLOC_WORKS) +endif() + +if (HWLOC_FOUND) + set(HWLOC_SAVE_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) + list(APPEND CMAKE_REQUIRED_INCLUDES ${HWLOC_INCLUDE_DIRS}) + + # test headers to guess the version + check_struct_has_member( "struct hwloc_obj" parent hwloc.h HAVE_HWLOC_PARENT_MEMBER ) + check_struct_has_member( "struct hwloc_cache_attr_s" size hwloc.h HAVE_HWLOC_CACHE_ATTR ) + check_c_source_compiles( "#include <hwloc.h> + int main(void) { hwloc_obj_t o; o->type = HWLOC_OBJ_PU; return 0;}" HAVE_HWLOC_OBJ_PU) + include(CheckLibraryExists) + check_library_exists(${HWLOC_LIBRARIES} hwloc_bitmap_free "" HAVE_HWLOC_BITMAP) + + set(CMAKE_REQUIRED_INCLUDES ${HWLOC_SAVE_CMAKE_REQUIRED_INCLUDES}) +endif() diff --git a/cmake/FindMetis.cmake b/cmake/FindMetis.cmake index 6a0ce790c..da2f1f1d7 100644 --- a/cmake/FindMetis.cmake +++ b/cmake/FindMetis.cmake @@ -1,59 +1,264 @@ -# Pastix requires METIS or METIS (partitioning and reordering tools) - -if (METIS_INCLUDES AND METIS_LIBRARIES) - set(METIS_FIND_QUIETLY TRUE) -endif (METIS_INCLUDES AND METIS_LIBRARIES) - -find_path(METIS_INCLUDES - NAMES - metis.h - PATHS - $ENV{METISDIR} - ${INCLUDE_INSTALL_DIR} - PATH_SUFFIXES - . - metis - include -) - -macro(_metis_check_version) - file(READ "${METIS_INCLUDES}/metis.h" _metis_version_header) - - string(REGEX MATCH "define[ \t]+METIS_VER_MAJOR[ \t]+([0-9]+)" _metis_major_version_match "${_metis_version_header}") - set(METIS_MAJOR_VERSION "${CMAKE_MATCH_1}") - string(REGEX MATCH "define[ \t]+METIS_VER_MINOR[ \t]+([0-9]+)" _metis_minor_version_match "${_metis_version_header}") - set(METIS_MINOR_VERSION "${CMAKE_MATCH_1}") - string(REGEX MATCH "define[ \t]+METIS_VER_SUBMINOR[ \t]+([0-9]+)" _metis_subminor_version_match "${_metis_version_header}") - set(METIS_SUBMINOR_VERSION "${CMAKE_MATCH_1}") - if(NOT METIS_MAJOR_VERSION) - message(STATUS "Could not determine Metis version. Assuming version 4.0.0") - set(METIS_VERSION 4.0.0) +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find METIS include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(METIS +# [REQUIRED] # Fail with error if metis is not found +# ) +# +# This module finds headers and metis library. +# Results are reported in variables: +# METIS_FOUND - True if headers and requested libraries were found +# METIS_INCLUDE_DIRS - metis include directories +# METIS_LIBRARY_DIRS - Link directories for metis libraries +# METIS_LIBRARIES - metis component libraries to be linked +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DMETIS_DIR=path/to/metis): +# METIS_DIR - Where to find the base directory of metis +# METIS_INCDIR - Where to find the header files +# METIS_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: METIS_DIR, METIS_INCDIR, METIS_LIBDIR + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +if (NOT METIS_FOUND) + set(METIS_DIR "" CACHE PATH "Installation directory of METIS library") + if (NOT METIS_FIND_QUIETLY) + message(STATUS "A cache variable, namely METIS_DIR, has been set to specify the install directory of METIS") + endif() +endif() + +# Looking for include +# ------------------- + +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_METIS_DIR "$ENV{METIS_DIR}") +set(ENV_METIS_INCDIR "$ENV{METIS_INCDIR}") +if(ENV_METIS_INCDIR) + list(APPEND _inc_env "${ENV_METIS_INCDIR}") +elseif(ENV_METIS_DIR) + list(APPEND _inc_env "${ENV_METIS_DIR}") + list(APPEND _inc_env "${ENV_METIS_DIR}/include") + list(APPEND _inc_env "${ENV_METIS_DIR}/include/metis") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") else() - set(METIS_VERSION ${METIS_MAJOR_VERSION}.${METIS_MINOR_VERSION}.${METIS_SUBMINOR_VERSION}) + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") endif() - if(${METIS_VERSION} VERSION_LESS ${Metis_FIND_VERSION}) - set(METIS_VERSION_OK FALSE) +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + + +# Try to find the metis header in the given paths +# ------------------------------------------------- +# call cmake macro to find the header path +if(METIS_INCDIR) + set(METIS_metis.h_DIRS "METIS_metis.h_DIRS-NOTFOUND") + find_path(METIS_metis.h_DIRS + NAMES metis.h + HINTS ${METIS_INCDIR}) +else() + if(METIS_DIR) + set(METIS_metis.h_DIRS "METIS_metis.h_DIRS-NOTFOUND") + find_path(METIS_metis.h_DIRS + NAMES metis.h + HINTS ${METIS_DIR} + PATH_SUFFIXES "include" "include/metis") else() - set(METIS_VERSION_OK TRUE) + set(METIS_metis.h_DIRS "METIS_metis.h_DIRS-NOTFOUND") + find_path(METIS_metis.h_DIRS + NAMES metis.h + HINTS ${_inc_env}) endif() +endif() +mark_as_advanced(METIS_metis.h_DIRS) - if(NOT METIS_VERSION_OK) - message(STATUS "Metis version ${METIS_VERSION} found in ${METIS_INCLUDES}, " - "but at least version ${Metis_FIND_VERSION} is required") - endif(NOT METIS_VERSION_OK) -endmacro(_metis_check_version) - if(METIS_INCLUDES AND Metis_FIND_VERSION) - _metis_check_version() +# If found, add path to cmake variable +# ------------------------------------ +if (METIS_metis.h_DIRS) + set(METIS_INCLUDE_DIRS "${METIS_metis.h_DIRS}") +else () + set(METIS_INCLUDE_DIRS "METIS_INCLUDE_DIRS-NOTFOUND") + if(NOT METIS_FIND_QUIETLY) + message(STATUS "Looking for metis -- metis.h not found") + endif() +endif() + + +# Looking for lib +# --------------- + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_METIS_LIBDIR "$ENV{METIS_LIBDIR}") +if(ENV_METIS_LIBDIR) + list(APPEND _lib_env "${ENV_METIS_LIBDIR}") +elseif(ENV_METIS_DIR) + list(APPEND _lib_env "${ENV_METIS_DIR}") + list(APPEND _lib_env "${ENV_METIS_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") else() - set(METIS_VERSION_OK TRUE) + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + +# Try to find the metis lib in the given paths +# ---------------------------------------------- +# call cmake macro to find the lib path +if(METIS_LIBDIR) + set(METIS_metis_LIBRARY "METIS_metis_LIBRARY-NOTFOUND") + find_library(METIS_metis_LIBRARY + NAMES metis + HINTS ${METIS_LIBDIR}) +else() + if(METIS_DIR) + set(METIS_metis_LIBRARY "METIS_metis_LIBRARY-NOTFOUND") + find_library(METIS_metis_LIBRARY + NAMES metis + HINTS ${METIS_DIR} + PATH_SUFFIXES lib lib32 lib64) + else() + set(METIS_metis_LIBRARY "METIS_metis_LIBRARY-NOTFOUND") + find_library(METIS_metis_LIBRARY + NAMES metis + HINTS ${_lib_env}) + endif() +endif() +mark_as_advanced(METIS_metis_LIBRARY) -find_library(METIS_LIBRARIES metis PATHS $ENV{METISDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib) +# If found, add path to cmake variable +# ------------------------------------ +if (METIS_metis_LIBRARY) + get_filename_component(metis_lib_path "${METIS_metis_LIBRARY}" PATH) + # set cmake variables + set(METIS_LIBRARIES "${METIS_metis_LIBRARY}") + set(METIS_LIBRARY_DIRS "${metis_lib_path}") +else () + set(METIS_LIBRARIES "METIS_LIBRARIES-NOTFOUND") + set(METIS_LIBRARY_DIRS "METIS_LIBRARY_DIRS-NOTFOUND") + if(NOT METIS_FIND_QUIETLY) + message(STATUS "Looking for metis -- lib metis not found") + endif() +endif () +# check a function to validate the find +if(METIS_LIBRARIES) + + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # METIS + if (METIS_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${METIS_INCLUDE_DIRS}") + endif() + if (METIS_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${METIS_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${METIS_LIBRARIES}") + # m + find_library(M_LIBRARY NAMES m) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + list(APPEND REQUIRED_LIBS "-lm") + endif() + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(METIS_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(METIS_NodeND METIS_WORKS) + mark_as_advanced(METIS_WORKS) + + if(NOT METIS_WORKS) + if(NOT METIS_FIND_QUIETLY) + message(STATUS "Looking for METIS : test of METIS_NodeND with METIS library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif(METIS_LIBRARIES) + +if (METIS_LIBRARIES) + list(GET METIS_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(METIS_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of METIS library" FORCE) + else() + set(METIS_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of METIS library" FORCE) + endif() +endif() +mark_as_advanced(METIS_DIR) +mark_as_advanced(METIS_DIR_FOUND) + +# check that METIS has been found +# --------------------------------- include(FindPackageHandleStandardArgs) find_package_handle_standard_args(METIS DEFAULT_MSG - METIS_INCLUDES METIS_LIBRARIES METIS_VERSION_OK) - -mark_as_advanced(METIS_INCLUDES METIS_LIBRARIES) + METIS_LIBRARIES + METIS_WORKS) +# +# TODO: Add possibility to check for specific functions in the library +# diff --git a/cmake/FindPTSCOTCH.cmake b/cmake/FindPTSCOTCH.cmake new file mode 100644 index 000000000..1396d0582 --- /dev/null +++ b/cmake/FindPTSCOTCH.cmake @@ -0,0 +1,423 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find PTSCOTCH include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(PTSCOTCH +# [REQUIRED] # Fail with error if ptscotch is not found +# [COMPONENTS <comp1> <comp2> ...] # dependencies +# ) +# +# PTSCOTCH depends on the following libraries: +# - Threads +# - MPI +# +# COMPONENTS can be some of the following: +# - ESMUMPS: to activate detection of PT-Scotch with the esmumps interface +# +# This module finds headers and ptscotch library. +# Results are reported in variables: +# PTSCOTCH_FOUND - True if headers and requested libraries were found +# PTSCOTCH_LINKER_FLAGS - list of required linker flags (excluding -l and -L) +# PTSCOTCH_INCLUDE_DIRS - ptscotch include directories +# PTSCOTCH_LIBRARY_DIRS - Link directories for ptscotch libraries +# PTSCOTCH_LIBRARIES - ptscotch component libraries to be linked +# PTSCOTCH_INCLUDE_DIRS_DEP - ptscotch + dependencies include directories +# PTSCOTCH_LIBRARY_DIRS_DEP - ptscotch + dependencies link directories +# PTSCOTCH_LIBRARIES_DEP - ptscotch libraries + dependencies +# PTSCOTCH_INTSIZE - Number of octets occupied by a SCOTCH_Num +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DPTSCOTCH=path/to/ptscotch): +# PTSCOTCH_DIR - Where to find the base directory of ptscotch +# PTSCOTCH_INCDIR - Where to find the header files +# PTSCOTCH_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: PTSCOTCH_DIR, PTSCOTCH_INCDIR, PTSCOTCH_LIBDIR + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2016 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +if (NOT PTSCOTCH_FOUND) + set(PTSCOTCH_DIR "" CACHE PATH "Installation directory of PTSCOTCH library") + if (NOT PTSCOTCH_FIND_QUIETLY) + message(STATUS "A cache variable, namely PTSCOTCH_DIR, has been set to specify the install directory of PTSCOTCH") + endif() +endif() + +# Set the version to find +set(PTSCOTCH_LOOK_FOR_ESMUMPS OFF) + +if( PTSCOTCH_FIND_COMPONENTS ) + foreach( component ${PTSCOTCH_FIND_COMPONENTS} ) + if (${component} STREQUAL "ESMUMPS") + # means we look for esmumps library + set(PTSCOTCH_LOOK_FOR_ESMUMPS ON) + endif() + endforeach() +endif() + +# PTSCOTCH depends on Threads, try to find it +if (NOT THREADS_FOUND) + if (PTSCOTCH_FIND_REQUIRED) + find_package(Threads REQUIRED) + else() + find_package(Threads) + endif() +endif() + +# PTSCOTCH depends on MPI, try to find it +if (NOT MPI_FOUND) + if (PTSCOTCH_FIND_REQUIRED) + find_package(MPI REQUIRED) + else() + find_package(MPI) + endif() +endif() + +# Looking for include +# ------------------- + +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_PTSCOTCH_DIR "$ENV{PTSCOTCH_DIR}") +set(ENV_PTSCOTCH_INCDIR "$ENV{PTSCOTCH_INCDIR}") +if(ENV_PTSCOTCH_INCDIR) + list(APPEND _inc_env "${ENV_PTSCOTCH_INCDIR}") +elseif(ENV_PTSCOTCH_DIR) + list(APPEND _inc_env "${ENV_PTSCOTCH_DIR}") + list(APPEND _inc_env "${ENV_PTSCOTCH_DIR}/include") + list(APPEND _inc_env "${ENV_PTSCOTCH_DIR}/include/ptscotch") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + + +# Try to find the ptscotch header in the given paths +# ------------------------------------------------- + +set(PTSCOTCH_hdrs_to_find "ptscotch.h;scotch.h") + +# call cmake macro to find the header path +if(PTSCOTCH_INCDIR) + foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) + set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND") + find_path(PTSCOTCH_${ptscotch_hdr}_DIRS + NAMES ${ptscotch_hdr} + HINTS ${PTSCOTCH_INCDIR}) + mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS) + endforeach() +else() + if(PTSCOTCH_DIR) + foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) + set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND") + find_path(PTSCOTCH_${ptscotch_hdr}_DIRS + NAMES ${ptscotch_hdr} + HINTS ${PTSCOTCH_DIR} + PATH_SUFFIXES "include" "include/scotch") + mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS) + endforeach() + else() + foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) + set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND") + find_path(PTSCOTCH_${ptscotch_hdr}_DIRS + NAMES ${ptscotch_hdr} + HINTS ${_inc_env} + PATH_SUFFIXES "scotch") + mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS) + endforeach() + endif() +endif() + +# If found, add path to cmake variable +# ------------------------------------ +foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) + if (PTSCOTCH_${ptscotch_hdr}_DIRS) + list(APPEND PTSCOTCH_INCLUDE_DIRS "${PTSCOTCH_${ptscotch_hdr}_DIRS}") + else () + set(PTSCOTCH_INCLUDE_DIRS "PTSCOTCH_INCLUDE_DIRS-NOTFOUND") + if (NOT PTSCOTCH_FIND_QUIETLY) + message(STATUS "Looking for ptscotch -- ${ptscotch_hdr} not found") + endif() + endif() +endforeach() +list(REMOVE_DUPLICATES PTSCOTCH_INCLUDE_DIRS) + +# Looking for lib +# --------------- + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_PTSCOTCH_LIBDIR "$ENV{PTSCOTCH_LIBDIR}") +if(ENV_PTSCOTCH_LIBDIR) + list(APPEND _lib_env "${ENV_PTSCOTCH_LIBDIR}") +elseif(ENV_PTSCOTCH_DIR) + list(APPEND _lib_env "${ENV_PTSCOTCH_DIR}") + list(APPEND _lib_env "${ENV_PTSCOTCH_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + +# Try to find the ptscotch lib in the given paths +# ---------------------------------------------- + +set(PTSCOTCH_libs_to_find "ptscotch;ptscotcherr") +if (PTSCOTCH_LOOK_FOR_ESMUMPS) + list(INSERT PTSCOTCH_libs_to_find 0 "ptesmumps") + list(APPEND PTSCOTCH_libs_to_find "esmumps" ) +endif() +list(APPEND PTSCOTCH_libs_to_find "scotch;scotcherr") + +# call cmake macro to find the lib path +if(PTSCOTCH_LIBDIR) + foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) + set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND") + find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY + NAMES ${ptscotch_lib} + HINTS ${PTSCOTCH_LIBDIR}) + endforeach() +else() + if(PTSCOTCH_DIR) + foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) + set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND") + find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY + NAMES ${ptscotch_lib} + HINTS ${PTSCOTCH_DIR} + PATH_SUFFIXES lib lib32 lib64) + endforeach() + else() + foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) + set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND") + find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY + NAMES ${ptscotch_lib} + HINTS ${_lib_env}) + endforeach() + endif() +endif() + +set(PTSCOTCH_LIBRARIES "") +set(PTSCOTCH_LIBRARY_DIRS "") +# If found, add path to cmake variable +# ------------------------------------ +foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) + + if (PTSCOTCH_${ptscotch_lib}_LIBRARY) + get_filename_component(${ptscotch_lib}_lib_path "${PTSCOTCH_${ptscotch_lib}_LIBRARY}" PATH) + # set cmake variables + list(APPEND PTSCOTCH_LIBRARIES "${PTSCOTCH_${ptscotch_lib}_LIBRARY}") + list(APPEND PTSCOTCH_LIBRARY_DIRS "${${ptscotch_lib}_lib_path}") + else () + list(APPEND PTSCOTCH_LIBRARIES "${PTSCOTCH_${ptscotch_lib}_LIBRARY}") + if (NOT PTSCOTCH_FIND_QUIETLY) + message(STATUS "Looking for ptscotch -- lib ${ptscotch_lib} not found") + endif() + endif () + + mark_as_advanced(PTSCOTCH_${ptscotch_lib}_LIBRARY) + +endforeach() +list(REMOVE_DUPLICATES PTSCOTCH_LIBRARY_DIRS) + +# check a function to validate the find +if(PTSCOTCH_LIBRARIES) + + set(REQUIRED_LDFLAGS) + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # PTSCOTCH + if (PTSCOTCH_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${PTSCOTCH_INCLUDE_DIRS}") + endif() + if (PTSCOTCH_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${PTSCOTCH_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${PTSCOTCH_LIBRARIES}") + # MPI + if (MPI_FOUND) + if (MPI_C_INCLUDE_PATH) + list(APPEND CMAKE_REQUIRED_INCLUDES "${MPI_C_INCLUDE_PATH}") + endif() + if (MPI_C_LINK_FLAGS) + if (${MPI_C_LINK_FLAGS} MATCHES " -") + string(REGEX REPLACE " -" "-" MPI_C_LINK_FLAGS ${MPI_C_LINK_FLAGS}) + endif() + list(APPEND REQUIRED_LDFLAGS "${MPI_C_LINK_FLAGS}") + endif() + list(APPEND REQUIRED_LIBS "${MPI_C_LIBRARIES}") + endif() + # THREADS + if(CMAKE_THREAD_LIBS_INIT) + list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") + endif() + set(Z_LIBRARY "Z_LIBRARY-NOTFOUND") + find_library(Z_LIBRARY NAMES z) + mark_as_advanced(Z_LIBRARY) + if(Z_LIBRARY) + list(APPEND REQUIRED_LIBS "-lz") + endif() + set(M_LIBRARY "M_LIBRARY-NOTFOUND") + find_library(M_LIBRARY NAMES m) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + list(APPEND REQUIRED_LIBS "-lm") + endif() + set(RT_LIBRARY "RT_LIBRARY-NOTFOUND") + find_library(RT_LIBRARY NAMES rt) + mark_as_advanced(RT_LIBRARY) + if(RT_LIBRARY) + list(APPEND REQUIRED_LIBS "-lrt") + endif() + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + list(APPEND CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(PTSCOTCH_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(SCOTCH_dgraphInit PTSCOTCH_WORKS) + mark_as_advanced(PTSCOTCH_WORKS) + + if(PTSCOTCH_WORKS) + # save link with dependencies + set(PTSCOTCH_LIBRARIES_DEP "${REQUIRED_LIBS}") + set(PTSCOTCH_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") + set(PTSCOTCH_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") + set(PTSCOTCH_LINKER_FLAGS "${REQUIRED_LDFLAGS}") + list(REMOVE_DUPLICATES PTSCOTCH_LIBRARY_DIRS_DEP) + list(REMOVE_DUPLICATES PTSCOTCH_INCLUDE_DIRS_DEP) + list(REMOVE_DUPLICATES PTSCOTCH_LINKER_FLAGS) + else() + if(NOT PTSCOTCH_FIND_QUIETLY) + message(STATUS "Looking for PTSCOTCH : test of SCOTCH_dgraphInit with PTSCOTCH library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif(PTSCOTCH_LIBRARIES) + +if (PTSCOTCH_LIBRARIES) + list(GET PTSCOTCH_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(PTSCOTCH_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of PTSCOTCH library" FORCE) + else() + set(PTSCOTCH_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of PTSCOTCH library" FORCE) + endif() +endif() +mark_as_advanced(PTSCOTCH_DIR) +mark_as_advanced(PTSCOTCH_DIR_FOUND) + +# Check the size of SCOTCH_Num +# --------------------------------- +set(CMAKE_REQUIRED_INCLUDES ${PTSCOTCH_INCLUDE_DIRS}) + +include(CheckCSourceRuns) +#stdio.h and stdint.h should be included by scotch.h directly +set(PTSCOTCH_C_TEST_SCOTCH_Num_4 " +#include <stdio.h> +#include <stdint.h> +#include <ptscotch.h> +int main(int argc, char **argv) { + if (sizeof(SCOTCH_Num) == 4) + return 0; + else + return 1; +} +") + +set(PTSCOTCH_C_TEST_SCOTCH_Num_8 " +#include <stdio.h> +#include <stdint.h> +#include <ptscotch.h> +int main(int argc, char **argv) { + if (sizeof(SCOTCH_Num) == 8) + return 0; + else + return 1; +} +") +check_c_source_runs("${PTSCOTCH_C_TEST_SCOTCH_Num_4}" PTSCOTCH_Num_4) +if(NOT PTSCOTCH_Num_4) + check_c_source_runs("${PTSCOTCH_C_TEST_SCOTCH_Num_8}" PTSCOTCH_Num_8) + if(NOT PTSCOTCH_Num_8) + set(PTSCOTCH_INTSIZE -1) + else() + set(PTSCOTCH_INTSIZE 8) + endif() +else() + set(PTSCOTCH_INTSIZE 4) +endif() +set(CMAKE_REQUIRED_INCLUDES "") + +# check that PTSCOTCH has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(PTSCOTCH DEFAULT_MSG + PTSCOTCH_LIBRARIES + PTSCOTCH_WORKS) +# +# TODO: Add possibility to check for specific functions in the library +# diff --git a/cmake/FindPastix.cmake b/cmake/FindPastix.cmake index e2e6c810d..470477fdc 100644 --- a/cmake/FindPastix.cmake +++ b/cmake/FindPastix.cmake @@ -1,25 +1,704 @@ -# Pastix lib requires linking to a blas library. -# It is up to the user of this module to find a BLAS and link to it. -# Pastix requires SCOTCH or METIS (partitioning and reordering tools) as well +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find PASTIX include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(PASTIX +# [REQUIRED] # Fail with error if pastix is not found +# [COMPONENTS <comp1> <comp2> ...] # dependencies +# ) +# +# PASTIX depends on the following libraries: +# - Threads, m, rt +# - MPI +# - HWLOC +# - BLAS +# +# COMPONENTS are optional libraries PASTIX could be linked with, +# Use it to drive detection of a specific compilation chain +# COMPONENTS can be some of the following: +# - MPI: to activate detection of the parallel MPI version (default) +# it looks for Threads, HWLOC, BLAS, MPI and ScaLAPACK libraries +# - SEQ: to activate detection of the sequential version (exclude MPI version) +# - STARPU: to activate detection of StarPU version +# it looks for MPI version of StarPU (default behaviour) +# if SEQ and STARPU are given, it looks for a StarPU without MPI +# - STARPU_CUDA: to activate detection of StarPU with CUDA +# - STARPU_FXT: to activate detection of StarPU with FxT +# - SCOTCH: to activate detection of PASTIX linked with SCOTCH +# - PTSCOTCH: to activate detection of PASTIX linked with SCOTCH +# - METIS: to activate detection of PASTIX linked with SCOTCH +# +# This module finds headers and pastix library. +# Results are reported in variables: +# PASTIX_FOUND - True if headers and requested libraries were found +# PASTIX_LINKER_FLAGS - list of required linker flags (excluding -l and -L) +# PASTIX_INCLUDE_DIRS - pastix include directories +# PASTIX_LIBRARY_DIRS - Link directories for pastix libraries +# PASTIX_LIBRARIES - pastix libraries +# PASTIX_INCLUDE_DIRS_DEP - pastix + dependencies include directories +# PASTIX_LIBRARY_DIRS_DEP - pastix + dependencies link directories +# PASTIX_LIBRARIES_DEP - pastix libraries + dependencies +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DPASTIX_DIR=path/to/pastix): +# PASTIX_DIR - Where to find the base directory of pastix +# PASTIX_INCDIR - Where to find the header files +# PASTIX_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: PASTIX_DIR, PASTIX_INCDIR, PASTIX_LIBDIR -if (PASTIX_INCLUDES AND PASTIX_LIBRARIES) - set(PASTIX_FIND_QUIETLY TRUE) -endif (PASTIX_INCLUDES AND PASTIX_LIBRARIES) +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) -find_path(PASTIX_INCLUDES - NAMES - pastix_nompi.h - PATHS - $ENV{PASTIXDIR} - ${INCLUDE_INSTALL_DIR} -) -find_library(PASTIX_LIBRARIES pastix PATHS $ENV{PASTIXDIR} ${LIB_INSTALL_DIR}) +if (NOT PASTIX_FOUND) + set(PASTIX_DIR "" CACHE PATH "Installation directory of PASTIX library") + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "A cache variable, namely PASTIX_DIR, has been set to specify the install directory of PASTIX") + endif() +endif() +# Set the version to find +set(PASTIX_LOOK_FOR_MPI ON) +set(PASTIX_LOOK_FOR_SEQ OFF) +set(PASTIX_LOOK_FOR_STARPU OFF) +set(PASTIX_LOOK_FOR_STARPU_CUDA OFF) +set(PASTIX_LOOK_FOR_STARPU_FXT OFF) +set(PASTIX_LOOK_FOR_SCOTCH ON) +set(PASTIX_LOOK_FOR_PTSCOTCH OFF) +set(PASTIX_LOOK_FOR_METIS OFF) +if( PASTIX_FIND_COMPONENTS ) + foreach( component ${PASTIX_FIND_COMPONENTS} ) + if (${component} STREQUAL "SEQ") + # means we look for the sequential version of PaStiX (without MPI) + set(PASTIX_LOOK_FOR_SEQ ON) + set(PASTIX_LOOK_FOR_MPI OFF) + endif() + if (${component} STREQUAL "MPI") + # means we look for the MPI version of PaStiX (default) + set(PASTIX_LOOK_FOR_SEQ OFF) + set(PASTIX_LOOK_FOR_MPI ON) + endif() + if (${component} STREQUAL "STARPU") + # means we look for PaStiX with StarPU + set(PASTIX_LOOK_FOR_STARPU ON) + endif() + if (${component} STREQUAL "STARPU_CUDA") + # means we look for PaStiX with StarPU + CUDA + set(PASTIX_LOOK_FOR_STARPU ON) + set(PASTIX_LOOK_FOR_STARPU_CUDA ON) + endif() + if (${component} STREQUAL "STARPU_FXT") + # means we look for PaStiX with StarPU + FxT + set(PASTIX_LOOK_FOR_STARPU_FXT ON) + endif() + if (${component} STREQUAL "SCOTCH") + set(PASTIX_LOOK_FOR_SCOTCH ON) + endif() + if (${component} STREQUAL "SCOTCH") + set(PASTIX_LOOK_FOR_PTSCOTCH ON) + endif() + if (${component} STREQUAL "METIS") + set(PASTIX_LOOK_FOR_METIS ON) + endif() + endforeach() +endif() +# Dependencies detection +# ---------------------- + + +# Required dependencies +# --------------------- + +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect pthread") +endif() +if (PASTIX_FIND_REQUIRED) + find_package(Threads REQUIRED QUIET) +else() + find_package(Threads QUIET) +endif() +set(PASTIX_EXTRA_LIBRARIES "") +if( THREADS_FOUND ) + list(APPEND PASTIX_EXTRA_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) +endif () + +# Add math library to the list of extra +# it normally exists on all common systems provided with a C compiler +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect libm") +endif() +set(PASTIX_M_LIBRARIES "") +if(UNIX OR WIN32) + find_library( + PASTIX_M_m_LIBRARY + NAMES m + ) + mark_as_advanced(PASTIX_M_m_LIBRARY) + if (PASTIX_M_m_LIBRARY) + list(APPEND PASTIX_M_LIBRARIES "${PASTIX_M_m_LIBRARY}") + list(APPEND PASTIX_EXTRA_LIBRARIES "${PASTIX_M_m_LIBRARY}") + else() + if (PASTIX_FIND_REQUIRED) + message(FATAL_ERROR "Could NOT find libm on your system." + "Are you sure to a have a C compiler installed?") + endif() + endif() +endif() + +# Try to find librt (libposix4 - POSIX.1b Realtime Extensions library) +# on Unix systems except Apple ones because it does not exist on it +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect librt") +endif() +set(PASTIX_RT_LIBRARIES "") +if(UNIX AND NOT APPLE) + find_library( + PASTIX_RT_rt_LIBRARY + NAMES rt + ) + mark_as_advanced(PASTIX_RT_rt_LIBRARY) + if (PASTIX_RT_rt_LIBRARY) + list(APPEND PASTIX_RT_LIBRARIES "${PASTIX_RT_rt_LIBRARY}") + list(APPEND PASTIX_EXTRA_LIBRARIES "${PASTIX_RT_rt_LIBRARY}") + else() + if (PASTIX_FIND_REQUIRED) + message(FATAL_ERROR "Could NOT find librt on your system") + endif() + endif() +endif() + +# PASTIX depends on HWLOC +#------------------------ +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect HWLOC") +endif() +if (PASTIX_FIND_REQUIRED) + find_package(HWLOC REQUIRED QUIET) +else() + find_package(HWLOC QUIET) +endif() + +# PASTIX depends on BLAS +#----------------------- +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect BLAS") +endif() +if (PASTIX_FIND_REQUIRED) + find_package(BLASEXT REQUIRED QUIET) +else() + find_package(BLASEXT QUIET) +endif() + +# Optional dependencies +# --------------------- + +# PASTIX may depend on MPI +#------------------------- +if (NOT MPI_FOUND AND PASTIX_LOOK_FOR_MPI) + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect MPI") + endif() + # allows to use an external mpi compilation by setting compilers with + # -DMPI_C_COMPILER=path/to/mpicc -DMPI_Fortran_COMPILER=path/to/mpif90 + # at cmake configure + if(NOT MPI_C_COMPILER) + set(MPI_C_COMPILER mpicc) + endif() + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_MPI) + find_package(MPI REQUIRED QUIET) + else() + find_package(MPI QUIET) + endif() + if (MPI_FOUND) + mark_as_advanced(MPI_LIBRARY) + mark_as_advanced(MPI_EXTRA_LIBRARY) + endif() +endif (NOT MPI_FOUND AND PASTIX_LOOK_FOR_MPI) + +# PASTIX may depend on STARPU +#---------------------------- +if( NOT STARPU_FOUND AND PASTIX_LOOK_FOR_STARPU) + + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect StarPU") + endif() + + set(PASTIX_STARPU_VERSION "1.1" CACHE STRING "oldest STARPU version desired") + + # create list of components in order to make a single call to find_package(starpu...) + # we explicitly need a StarPU version built with hwloc + set(STARPU_COMPONENT_LIST "HWLOC") + + # StarPU may depend on MPI + # allows to use an external mpi compilation by setting compilers with + # -DMPI_C_COMPILER=path/to/mpicc -DMPI_Fortran_COMPILER=path/to/mpif90 + # at cmake configure + if (PASTIX_LOOK_FOR_MPI) + if(NOT MPI_C_COMPILER) + set(MPI_C_COMPILER mpicc) + endif() + list(APPEND STARPU_COMPONENT_LIST "MPI") + endif() + if (PASTIX_LOOK_FOR_STARPU_CUDA) + list(APPEND STARPU_COMPONENT_LIST "CUDA") + endif() + if (PASTIX_LOOK_FOR_STARPU_FXT) + list(APPEND STARPU_COMPONENT_LIST "FXT") + endif() + # set the list of optional dependencies we may discover + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_STARPU) + find_package(STARPU ${PASTIX_STARPU_VERSION} REQUIRED + COMPONENTS ${STARPU_COMPONENT_LIST}) + else() + find_package(STARPU ${PASTIX_STARPU_VERSION} + COMPONENTS ${STARPU_COMPONENT_LIST}) + endif() + +endif( NOT STARPU_FOUND AND PASTIX_LOOK_FOR_STARPU) + +# PASTIX may depends on SCOTCH +#----------------------------- +if (NOT SCOTCH_FOUND AND PASTIX_LOOK_FOR_SCOTCH) + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect SCOTCH") + endif() + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_SCOTCH) + find_package(SCOTCH REQUIRED QUIET) + else() + find_package(SCOTCH QUIET) + endif() +endif() + +# PASTIX may depends on PTSCOTCH +#------------------------------- +if (NOT PTSCOTCH_FOUND AND PASTIX_LOOK_FOR_PTSCOTCH) + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect PTSCOTCH") + endif() + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_PTSCOTCH) + find_package(PTSCOTCH REQUIRED QUIET) + else() + find_package(PTSCOTCH QUIET) + endif() +endif() + +# PASTIX may depends on METIS +#---------------------------- +if (NOT METIS_FOUND AND PASTIX_LOOK_FOR_METIS) + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect METIS") + endif() + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_METIS) + find_package(METIS REQUIRED QUIET) + else() + find_package(METIS QUIET) + endif() +endif() + +# Error if pastix required and no partitioning lib found +if (PASTIX_FIND_REQUIRED AND NOT SCOTCH_FOUND AND NOT PTSCOTCH_FOUND AND NOT METIS_FOUND) + message(FATAL_ERROR "Could NOT find any partitioning library on your system" + " (install scotch, ptscotch or metis)") +endif() + + +# Looking for PaStiX +# ------------------ + +# Looking for include +# ------------------- + +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_PASTIX_DIR "$ENV{PASTIX_DIR}") +set(ENV_PASTIX_INCDIR "$ENV{PASTIX_INCDIR}") +if(ENV_PASTIX_INCDIR) + list(APPEND _inc_env "${ENV_PASTIX_INCDIR}") +elseif(ENV_PASTIX_DIR) + list(APPEND _inc_env "${ENV_PASTIX_DIR}") + list(APPEND _inc_env "${ENV_PASTIX_DIR}/include") + list(APPEND _inc_env "${ENV_PASTIX_DIR}/include/pastix") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + + +# Try to find the pastix header in the given paths +# --------------------------------------------------- +# call cmake macro to find the header path +if(PASTIX_INCDIR) + set(PASTIX_pastix.h_DIRS "PASTIX_pastix.h_DIRS-NOTFOUND") + find_path(PASTIX_pastix.h_DIRS + NAMES pastix.h + HINTS ${PASTIX_INCDIR}) +else() + if(PASTIX_DIR) + set(PASTIX_pastix.h_DIRS "PASTIX_pastix.h_DIRS-NOTFOUND") + find_path(PASTIX_pastix.h_DIRS + NAMES pastix.h + HINTS ${PASTIX_DIR} + PATH_SUFFIXES "include" "include/pastix") + else() + set(PASTIX_pastix.h_DIRS "PASTIX_pastix.h_DIRS-NOTFOUND") + find_path(PASTIX_pastix.h_DIRS + NAMES pastix.h + HINTS ${_inc_env} + PATH_SUFFIXES "pastix") + endif() +endif() +mark_as_advanced(PASTIX_pastix.h_DIRS) + +# If found, add path to cmake variable +# ------------------------------------ +if (PASTIX_pastix.h_DIRS) + set(PASTIX_INCLUDE_DIRS "${PASTIX_pastix.h_DIRS}") +else () + set(PASTIX_INCLUDE_DIRS "PASTIX_INCLUDE_DIRS-NOTFOUND") + if(NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for pastix -- pastix.h not found") + endif() +endif() + + +# Looking for lib +# --------------- + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_PASTIX_LIBDIR "$ENV{PASTIX_LIBDIR}") +if(ENV_PASTIX_LIBDIR) + list(APPEND _lib_env "${ENV_PASTIX_LIBDIR}") +elseif(ENV_PASTIX_DIR) + list(APPEND _lib_env "${ENV_PASTIX_DIR}") + list(APPEND _lib_env "${ENV_PASTIX_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + +# Try to find the pastix lib in the given paths +# ------------------------------------------------ + +# create list of libs to find +set(PASTIX_libs_to_find "pastix_murge;pastix") + +# call cmake macro to find the lib path +if(PASTIX_LIBDIR) + foreach(pastix_lib ${PASTIX_libs_to_find}) + set(PASTIX_${pastix_lib}_LIBRARY "PASTIX_${pastix_lib}_LIBRARY-NOTFOUND") + find_library(PASTIX_${pastix_lib}_LIBRARY + NAMES ${pastix_lib} + HINTS ${PASTIX_LIBDIR}) + endforeach() +else() + if(PASTIX_DIR) + foreach(pastix_lib ${PASTIX_libs_to_find}) + set(PASTIX_${pastix_lib}_LIBRARY "PASTIX_${pastix_lib}_LIBRARY-NOTFOUND") + find_library(PASTIX_${pastix_lib}_LIBRARY + NAMES ${pastix_lib} + HINTS ${PASTIX_DIR} + PATH_SUFFIXES lib lib32 lib64) + endforeach() + else() + foreach(pastix_lib ${PASTIX_libs_to_find}) + set(PASTIX_${pastix_lib}_LIBRARY "PASTIX_${pastix_lib}_LIBRARY-NOTFOUND") + find_library(PASTIX_${pastix_lib}_LIBRARY + NAMES ${pastix_lib} + HINTS ${_lib_env}) + endforeach() + endif() +endif() + +# If found, add path to cmake variable +# ------------------------------------ +foreach(pastix_lib ${PASTIX_libs_to_find}) + + get_filename_component(${pastix_lib}_lib_path ${PASTIX_${pastix_lib}_LIBRARY} PATH) + # set cmake variables (respects naming convention) + if (PASTIX_LIBRARIES) + list(APPEND PASTIX_LIBRARIES "${PASTIX_${pastix_lib}_LIBRARY}") + else() + set(PASTIX_LIBRARIES "${PASTIX_${pastix_lib}_LIBRARY}") + endif() + if (PASTIX_LIBRARY_DIRS) + list(APPEND PASTIX_LIBRARY_DIRS "${${pastix_lib}_lib_path}") + else() + set(PASTIX_LIBRARY_DIRS "${${pastix_lib}_lib_path}") + endif() + mark_as_advanced(PASTIX_${pastix_lib}_LIBRARY) + +endforeach(pastix_lib ${PASTIX_libs_to_find}) + +# check a function to validate the find +if(PASTIX_LIBRARIES) + + set(REQUIRED_LDFLAGS) + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # PASTIX + if (PASTIX_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${PASTIX_INCLUDE_DIRS}") + endif() + foreach(libdir ${PASTIX_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + set(REQUIRED_LIBS "${PASTIX_LIBRARIES}") + # STARPU + if (PASTIX_LOOK_FOR_STARPU AND STARPU_FOUND) + if (STARPU_INCLUDE_DIRS_DEP) + list(APPEND REQUIRED_INCDIRS "${STARPU_INCLUDE_DIRS_DEP}") + elseif (STARPU_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${STARPU_INCLUDE_DIRS}") + endif() + if(STARPU_LIBRARY_DIRS_DEP) + list(APPEND REQUIRED_LIBDIRS "${STARPU_LIBRARY_DIRS_DEP}") + elseif(STARPU_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${STARPU_LIBRARY_DIRS}") + endif() + if (STARPU_LIBRARIES_DEP) + list(APPEND REQUIRED_LIBS "${STARPU_LIBRARIES_DEP}") + elseif (STARPU_LIBRARIES) + foreach(lib ${STARPU_LIBRARIES}) + if (EXISTS ${lib} OR ${lib} MATCHES "^-") + list(APPEND REQUIRED_LIBS "${lib}") + else() + list(APPEND REQUIRED_LIBS "-l${lib}") + endif() + endforeach() + endif() + endif() + # CUDA + if (PASTIX_LOOK_FOR_STARPU_CUDA AND CUDA_FOUND) + if (CUDA_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${CUDA_INCLUDE_DIRS}") + endif() + foreach(libdir ${CUDA_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${CUDA_CUBLAS_LIBRARIES};${CUDA_LIBRARIES}") + endif() + # MPI + if (PASTIX_LOOK_FOR_MPI AND MPI_FOUND) + if (MPI_C_INCLUDE_PATH) + list(APPEND REQUIRED_INCDIRS "${MPI_C_INCLUDE_PATH}") + endif() + if (MPI_C_LINK_FLAGS) + if (${MPI_C_LINK_FLAGS} MATCHES " -") + string(REGEX REPLACE " -" "-" MPI_C_LINK_FLAGS ${MPI_C_LINK_FLAGS}) + endif() + list(APPEND REQUIRED_LDFLAGS "${MPI_C_LINK_FLAGS}") + endif() + list(APPEND REQUIRED_LIBS "${MPI_C_LIBRARIES}") + endif() + # HWLOC + if (HWLOC_FOUND) + if (HWLOC_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${HWLOC_INCLUDE_DIRS}") + endif() + foreach(libdir ${HWLOC_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + foreach(lib ${HWLOC_LIBRARIES}) + if (EXISTS ${lib} OR ${lib} MATCHES "^-") + list(APPEND REQUIRED_LIBS "${lib}") + else() + list(APPEND REQUIRED_LIBS "-l${lib}") + endif() + endforeach() + endif() + # BLAS + if (BLAS_FOUND) + if (BLAS_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${BLAS_INCLUDE_DIRS}") + endif() + foreach(libdir ${BLAS_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${BLAS_LIBRARIES}") + if (BLAS_LINKER_FLAGS) + list(APPEND REQUIRED_LDFLAGS "${BLAS_LINKER_FLAGS}") + endif() + endif() + # SCOTCH + if (PASTIX_LOOK_FOR_SCOTCH AND SCOTCH_FOUND) + if (SCOTCH_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${SCOTCH_INCLUDE_DIRS}") + endif() + foreach(libdir ${SCOTCH_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${SCOTCH_LIBRARIES}") + endif() + # PTSCOTCH + if (PASTIX_LOOK_FOR_PTSCOTCH AND PTSCOTCH_FOUND) + if (PTSCOTCH_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${PTSCOTCH_INCLUDE_DIRS}") + endif() + foreach(libdir ${PTSCOTCH_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${PTSCOTCH_LIBRARIES}") + endif() + # METIS + if (PASTIX_LOOK_FOR_METIS AND METIS_FOUND) + if (METIS_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${METIS_INCLUDE_DIRS}") + endif() + foreach(libdir ${METIS_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${METIS_LIBRARIES}") + endif() + # Fortran + if (CMAKE_C_COMPILER_ID MATCHES "GNU") + find_library( + FORTRAN_gfortran_LIBRARY + NAMES gfortran + HINTS ${_lib_env} + ) + mark_as_advanced(FORTRAN_gfortran_LIBRARY) + if (FORTRAN_gfortran_LIBRARY) + list(APPEND REQUIRED_LIBS "${FORTRAN_gfortran_LIBRARY}") + endif() + elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") + find_library( + FORTRAN_ifcore_LIBRARY + NAMES ifcore + HINTS ${_lib_env} + ) + mark_as_advanced(FORTRAN_ifcore_LIBRARY) + if (FORTRAN_ifcore_LIBRARY) + list(APPEND REQUIRED_LIBS "${FORTRAN_ifcore_LIBRARY}") + endif() + endif() + # EXTRA LIBS such that pthread, m, rt + list(APPEND REQUIRED_LIBS ${PASTIX_EXTRA_LIBRARIES}) + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + list(APPEND CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(PASTIX_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(pastix PASTIX_WORKS) + mark_as_advanced(PASTIX_WORKS) + + if(PASTIX_WORKS) + # save link with dependencies + set(PASTIX_LIBRARIES_DEP "${REQUIRED_LIBS}") + set(PASTIX_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") + set(PASTIX_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") + set(PASTIX_LINKER_FLAGS "${REQUIRED_LDFLAGS}") + list(REMOVE_DUPLICATES PASTIX_LIBRARY_DIRS_DEP) + list(REMOVE_DUPLICATES PASTIX_INCLUDE_DIRS_DEP) + list(REMOVE_DUPLICATES PASTIX_LINKER_FLAGS) + else() + if(NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX : test of pastix() fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + message(STATUS "Maybe PASTIX is linked with specific libraries. " + "Have you tried with COMPONENTS (MPI/SEQ, STARPU, STARPU_CUDA, SCOTCH, PTSCOTCH, METIS)? " + "See the explanation in FindPASTIX.cmake.") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif(PASTIX_LIBRARIES) + +if (PASTIX_LIBRARIES) + list(GET PASTIX_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(PASTIX_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of PASTIX library" FORCE) + else() + set(PASTIX_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of PASTIX library" FORCE) + endif() +endif() +mark_as_advanced(PASTIX_DIR) +mark_as_advanced(PASTIX_DIR_FOUND) + +# check that PASTIX has been found +# --------------------------------- include(FindPackageHandleStandardArgs) find_package_handle_standard_args(PASTIX DEFAULT_MSG - PASTIX_INCLUDES PASTIX_LIBRARIES) - -mark_as_advanced(PASTIX_INCLUDES PASTIX_LIBRARIES) + PASTIX_LIBRARIES + PASTIX_WORKS) diff --git a/cmake/FindScotch.cmake b/cmake/FindScotch.cmake index 530340b16..89d295ac2 100644 --- a/cmake/FindScotch.cmake +++ b/cmake/FindScotch.cmake @@ -1,24 +1,369 @@ -# Pastix requires SCOTCH or METIS (partitioning and reordering tools) +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find SCOTCH include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(SCOTCH +# [REQUIRED] # Fail with error if scotch is not found +# [COMPONENTS <comp1> <comp2> ...] # dependencies +# ) +# +# COMPONENTS can be some of the following: +# - ESMUMPS: to activate detection of Scotch with the esmumps interface +# +# This module finds headers and scotch library. +# Results are reported in variables: +# SCOTCH_FOUND - True if headers and requested libraries were found +# SCOTCH_INCLUDE_DIRS - scotch include directories +# SCOTCH_LIBRARY_DIRS - Link directories for scotch libraries +# SCOTCH_LIBRARIES - scotch component libraries to be linked +# SCOTCH_INTSIZE - Number of octets occupied by a SCOTCH_Num +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DSCOTCH=path/to/scotch): +# SCOTCH_DIR - Where to find the base directory of scotch +# SCOTCH_INCDIR - Where to find the header files +# SCOTCH_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: SCOTCH_DIR, SCOTCH_INCDIR, SCOTCH_LIBDIR -if (SCOTCH_INCLUDES AND SCOTCH_LIBRARIES) - set(SCOTCH_FIND_QUIETLY TRUE) -endif (SCOTCH_INCLUDES AND SCOTCH_LIBRARIES) +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) -find_path(SCOTCH_INCLUDES - NAMES - scotch.h - PATHS - $ENV{SCOTCHDIR} - ${INCLUDE_INSTALL_DIR} - PATH_SUFFIXES - scotch -) +if (NOT SCOTCH_FOUND) + set(SCOTCH_DIR "" CACHE PATH "Installation directory of SCOTCH library") + if (NOT SCOTCH_FIND_QUIETLY) + message(STATUS "A cache variable, namely SCOTCH_DIR, has been set to specify the install directory of SCOTCH") + endif() +endif() +# Set the version to find +set(SCOTCH_LOOK_FOR_ESMUMPS OFF) -find_library(SCOTCH_LIBRARIES scotch PATHS $ENV{SCOTCHDIR} ${LIB_INSTALL_DIR}) +if( SCOTCH_FIND_COMPONENTS ) + foreach( component ${SCOTCH_FIND_COMPONENTS} ) + if (${component} STREQUAL "ESMUMPS") + # means we look for esmumps library + set(SCOTCH_LOOK_FOR_ESMUMPS ON) + endif() + endforeach() +endif() +# SCOTCH may depend on Threads, try to find it +if (NOT THREADS_FOUND) + if (SCOTCH_FIND_REQUIRED) + find_package(Threads REQUIRED) + else() + find_package(Threads) + endif() +endif() + +# Looking for include +# ------------------- + +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_SCOTCH_DIR "$ENV{SCOTCH_DIR}") +set(ENV_SCOTCH_INCDIR "$ENV{SCOTCH_INCDIR}") +if(ENV_SCOTCH_INCDIR) + list(APPEND _inc_env "${ENV_SCOTCH_INCDIR}") +elseif(ENV_SCOTCH_DIR) + list(APPEND _inc_env "${ENV_SCOTCH_DIR}") + list(APPEND _inc_env "${ENV_SCOTCH_DIR}/include") + list(APPEND _inc_env "${ENV_SCOTCH_DIR}/include/scotch") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + + +# Try to find the scotch header in the given paths +# ------------------------------------------------- +# call cmake macro to find the header path +if(SCOTCH_INCDIR) + set(SCOTCH_scotch.h_DIRS "SCOTCH_scotch.h_DIRS-NOTFOUND") + find_path(SCOTCH_scotch.h_DIRS + NAMES scotch.h + HINTS ${SCOTCH_INCDIR}) +else() + if(SCOTCH_DIR) + set(SCOTCH_scotch.h_DIRS "SCOTCH_scotch.h_DIRS-NOTFOUND") + find_path(SCOTCH_scotch.h_DIRS + NAMES scotch.h + HINTS ${SCOTCH_DIR} + PATH_SUFFIXES "include" "include/scotch") + else() + set(SCOTCH_scotch.h_DIRS "SCOTCH_scotch.h_DIRS-NOTFOUND") + find_path(SCOTCH_scotch.h_DIRS + NAMES scotch.h + HINTS ${_inc_env} + PATH_SUFFIXES "scotch") + endif() +endif() +mark_as_advanced(SCOTCH_scotch.h_DIRS) + +# If found, add path to cmake variable +# ------------------------------------ +if (SCOTCH_scotch.h_DIRS) + set(SCOTCH_INCLUDE_DIRS "${SCOTCH_scotch.h_DIRS}") +else () + set(SCOTCH_INCLUDE_DIRS "SCOTCH_INCLUDE_DIRS-NOTFOUND") + if (NOT SCOTCH_FIND_QUIETLY) + message(STATUS "Looking for scotch -- scotch.h not found") + endif() +endif() +list(REMOVE_DUPLICATES SCOTCH_INCLUDE_DIRS) + +# Looking for lib +# --------------- + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_SCOTCH_LIBDIR "$ENV{SCOTCH_LIBDIR}") +if(ENV_SCOTCH_LIBDIR) + list(APPEND _lib_env "${ENV_SCOTCH_LIBDIR}") +elseif(ENV_SCOTCH_DIR) + list(APPEND _lib_env "${ENV_SCOTCH_DIR}") + list(APPEND _lib_env "${ENV_SCOTCH_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + +# Try to find the scotch lib in the given paths +# ---------------------------------------------- + +set(SCOTCH_libs_to_find "scotch;scotcherrexit") +if (SCOTCH_LOOK_FOR_ESMUMPS) + list(INSERT SCOTCH_libs_to_find 0 "esmumps") +endif() + +# call cmake macro to find the lib path +if(SCOTCH_LIBDIR) + foreach(scotch_lib ${SCOTCH_libs_to_find}) + set(SCOTCH_${scotch_lib}_LIBRARY "SCOTCH_${scotch_lib}_LIBRARY-NOTFOUND") + find_library(SCOTCH_${scotch_lib}_LIBRARY + NAMES ${scotch_lib} + HINTS ${SCOTCH_LIBDIR}) + endforeach() +else() + if(SCOTCH_DIR) + foreach(scotch_lib ${SCOTCH_libs_to_find}) + set(SCOTCH_${scotch_lib}_LIBRARY "SCOTCH_${scotch_lib}_LIBRARY-NOTFOUND") + find_library(SCOTCH_${scotch_lib}_LIBRARY + NAMES ${scotch_lib} + HINTS ${SCOTCH_DIR} + PATH_SUFFIXES lib lib32 lib64) + endforeach() + else() + foreach(scotch_lib ${SCOTCH_libs_to_find}) + set(SCOTCH_${scotch_lib}_LIBRARY "SCOTCH_${scotch_lib}_LIBRARY-NOTFOUND") + find_library(SCOTCH_${scotch_lib}_LIBRARY + NAMES ${scotch_lib} + HINTS ${_lib_env}) + endforeach() + endif() +endif() + +set(SCOTCH_LIBRARIES "") +set(SCOTCH_LIBRARY_DIRS "") +# If found, add path to cmake variable +# ------------------------------------ +foreach(scotch_lib ${SCOTCH_libs_to_find}) + + if (SCOTCH_${scotch_lib}_LIBRARY) + get_filename_component(${scotch_lib}_lib_path "${SCOTCH_${scotch_lib}_LIBRARY}" PATH) + # set cmake variables + list(APPEND SCOTCH_LIBRARIES "${SCOTCH_${scotch_lib}_LIBRARY}") + list(APPEND SCOTCH_LIBRARY_DIRS "${${scotch_lib}_lib_path}") + else () + list(APPEND SCOTCH_LIBRARIES "${SCOTCH_${scotch_lib}_LIBRARY}") + if (NOT SCOTCH_FIND_QUIETLY) + message(STATUS "Looking for scotch -- lib ${scotch_lib} not found") + endif() + endif () + + mark_as_advanced(SCOTCH_${scotch_lib}_LIBRARY) + +endforeach() +list(REMOVE_DUPLICATES SCOTCH_LIBRARY_DIRS) + +# check a function to validate the find +if(SCOTCH_LIBRARIES) + + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # SCOTCH + if (SCOTCH_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${SCOTCH_INCLUDE_DIRS}") + endif() + if (SCOTCH_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${SCOTCH_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${SCOTCH_LIBRARIES}") + # THREADS + if(CMAKE_THREAD_LIBS_INIT) + list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") + endif() + set(Z_LIBRARY "Z_LIBRARY-NOTFOUND") + find_library(Z_LIBRARY NAMES z) + mark_as_advanced(Z_LIBRARY) + if(Z_LIBRARY) + list(APPEND REQUIRED_LIBS "-lz") + endif() + set(M_LIBRARY "M_LIBRARY-NOTFOUND") + find_library(M_LIBRARY NAMES m) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + list(APPEND REQUIRED_LIBS "-lm") + endif() + set(RT_LIBRARY "RT_LIBRARY-NOTFOUND") + find_library(RT_LIBRARY NAMES rt) + mark_as_advanced(RT_LIBRARY) + if(RT_LIBRARY) + list(APPEND REQUIRED_LIBS "-lrt") + endif() + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(SCOTCH_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(SCOTCH_graphInit SCOTCH_WORKS) + mark_as_advanced(SCOTCH_WORKS) + + if(SCOTCH_WORKS) + # save link with dependencies + set(SCOTCH_LIBRARIES "${REQUIRED_LIBS}") + else() + if(NOT SCOTCH_FIND_QUIETLY) + message(STATUS "Looking for SCOTCH : test of SCOTCH_graphInit with SCOTCH library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif(SCOTCH_LIBRARIES) + +if (SCOTCH_LIBRARIES) + list(GET SCOTCH_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(SCOTCH_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of SCOTCH library" FORCE) + else() + set(SCOTCH_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of SCOTCH library" FORCE) + endif() +endif() +mark_as_advanced(SCOTCH_DIR) +mark_as_advanced(SCOTCH_DIR_FOUND) + +# Check the size of SCOTCH_Num +# --------------------------------- +set(CMAKE_REQUIRED_INCLUDES ${SCOTCH_INCLUDE_DIRS}) + +include(CheckCSourceRuns) +#stdio.h and stdint.h should be included by scotch.h directly +set(SCOTCH_C_TEST_SCOTCH_Num_4 " +#include <stdio.h> +#include <stdint.h> +#include <scotch.h> +int main(int argc, char **argv) { + if (sizeof(SCOTCH_Num) == 4) + return 0; + else + return 1; +} +") + +set(SCOTCH_C_TEST_SCOTCH_Num_8 " +#include <stdio.h> +#include <stdint.h> +#include <scotch.h> +int main(int argc, char **argv) { + if (sizeof(SCOTCH_Num) == 8) + return 0; + else + return 1; +} +") +check_c_source_runs("${SCOTCH_C_TEST_SCOTCH_Num_4}" SCOTCH_Num_4) +if(NOT SCOTCH_Num_4) + check_c_source_runs("${SCOTCH_C_TEST_SCOTCH_Num_8}" SCOTCH_Num_8) + if(NOT SCOTCH_Num_8) + set(SCOTCH_INTSIZE -1) + else() + set(SCOTCH_INTSIZE 8) + endif() +else() + set(SCOTCH_INTSIZE 4) +endif() +set(CMAKE_REQUIRED_INCLUDES "") + +# check that SCOTCH has been found +# --------------------------------- include(FindPackageHandleStandardArgs) find_package_handle_standard_args(SCOTCH DEFAULT_MSG - SCOTCH_INCLUDES SCOTCH_LIBRARIES) - -mark_as_advanced(SCOTCH_INCLUDES SCOTCH_LIBRARIES) + SCOTCH_LIBRARIES + SCOTCH_WORKS) +# +# TODO: Add possibility to check for specific functions in the library +# diff --git a/doc/AsciiQuickReference.txt b/doc/AsciiQuickReference.txt index 8409f8850..0ca54cef3 100644 --- a/doc/AsciiQuickReference.txt +++ b/doc/AsciiQuickReference.txt @@ -140,7 +140,7 @@ R.array().abs() // abs(P) R.cwiseAbs2() // abs(P.^2) R.array().abs2() // abs(P.^2) (R.array() < s).select(P,Q ); // (R < s ? P : Q) -R = (Q.array()==0).select(P,A) // R(Q==0) = P(Q==0) +R = (Q.array()==0).select(P,R) // R(Q==0) = P(Q==0) R = P.unaryExpr(ptr_fun(func)) // R = arrayfun(func, P) // with: scalar func(const scalar &x); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2141d07c2..0747aa6cb 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -27,7 +27,7 @@ endif() if(NOT EIGEN_Fortran_COMPILER_WORKS) # search for a default Lapack library to complete Eigen's one - find_package(LAPACK) + find_package(LAPACK QUIET) endif() # configure blas/lapack (use Eigen's ones) @@ -80,23 +80,30 @@ else() endif() -find_package(Pastix) -find_package(Scotch) -find_package(Metis 5.0 REQUIRED) -if(PASTIX_FOUND) +find_package(PASTIX QUIET COMPONENTS METIS SCOTCH) +# check that the PASTIX found is a version without MPI +find_path(PASTIX_pastix_nompi.h_INCLUDE_DIRS + NAMES pastix_nompi.h + HINTS ${PASTIX_INCLUDE_DIRS} +) +if (NOT PASTIX_pastix_nompi.h_INCLUDE_DIRS) + message(STATUS "A version of Pastix has been found but pastix_nompi.h does not exist in the include directory." + " Because Eigen tests require a version without MPI, we disable the Pastix backend.") +endif() +if(PASTIX_FOUND AND PASTIX_pastix_nompi.h_INCLUDE_DIRS) add_definitions("-DEIGEN_PASTIX_SUPPORT") - include_directories(${PASTIX_INCLUDES}) + include_directories(${PASTIX_INCLUDE_DIRS_DEP}) if(SCOTCH_FOUND) - include_directories(${SCOTCH_INCLUDES}) + include_directories(${SCOTCH_INCLUDE_DIRS}) set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${SCOTCH_LIBRARIES}) elseif(METIS_FOUND) - include_directories(${METIS_INCLUDES}) + include_directories(${METIS_INCLUDE_DIRS}) set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${METIS_LIBRARIES}) else(SCOTCH_FOUND) ei_add_property(EIGEN_MISSING_BACKENDS "PaStiX, ") endif(SCOTCH_FOUND) - set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES} ${ORDERING_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) - set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES_DEP} ${ORDERING_LIBRARIES}) + set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES_DEP}) ei_add_property(EIGEN_TESTED_BACKENDS "PaStiX, ") else() ei_add_property(EIGEN_MISSING_BACKENDS "PaStiX, ") @@ -104,7 +111,7 @@ endif() if(METIS_FOUND) add_definitions("-DEIGEN_METIS_SUPPORT") - include_directories(${METIS_INCLUDES}) + include_directories(${METIS_INCLUDE_DIRS}) ei_add_property(EIGEN_TESTED_BACKENDS "METIS, ") else() ei_add_property(EIGEN_MISSING_BACKENDS "METIS, ") @@ -141,6 +148,7 @@ add_custom_target(BuildOfficial) ei_add_test(rand) ei_add_test(meta) +ei_add_test(numext) ei_add_test(sizeof) ei_add_test(dynalloc) ei_add_test(nomalloc) diff --git a/test/array_for_matrix.cpp b/test/array_for_matrix.cpp index c1501947b..b8721391f 100644 --- a/test/array_for_matrix.cpp +++ b/test/array_for_matrix.cpp @@ -235,12 +235,31 @@ template<typename MatrixTraits> void resize(const MatrixTraits& t) VERIFY(a1.size()==cols); } +template<int> void regression_bug_654() { ArrayXf a = RowVectorXf(3); VectorXf v = Array<float,1,Dynamic>(3); } +// Check propagation of LvalueBit through Array/Matrix-Wrapper +template<int> +void regrrssion_bug_1410() +{ + const Matrix4i M; + const Array4i A; + ArrayWrapper<const Matrix4i> MA = M.array(); + MA.row(0); + MatrixWrapper<const Array4i> AM = A.matrix(); + AM.row(0); + + VERIFY((internal::traits<ArrayWrapper<const Matrix4i> >::Flags&LvalueBit)==0); + VERIFY((internal::traits<MatrixWrapper<const Array4i> >::Flags&LvalueBit)==0); + + VERIFY((internal::traits<ArrayWrapper<Matrix4i> >::Flags&LvalueBit)==LvalueBit); + VERIFY((internal::traits<MatrixWrapper<Array4i> >::Flags&LvalueBit)==LvalueBit); +} + void test_array_for_matrix() { for(int i = 0; i < g_repeat; i++) { @@ -280,5 +299,6 @@ void test_array_for_matrix() CALL_SUBTEST_5( resize(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_6( resize(MatrixXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); } - CALL_SUBTEST_6( regression_bug_654() ); + CALL_SUBTEST_6( regression_bug_654<0>() ); + CALL_SUBTEST_6( regrrssion_bug_1410<0>() ); } diff --git a/test/half_float.cpp b/test/half_float.cpp index f8d438e2f..3d2410aef 100644 --- a/test/half_float.cpp +++ b/test/half_float.cpp @@ -96,12 +96,24 @@ void test_conversion() void test_numtraits() { - std::cout << "epsilon = " << NumTraits<half>::epsilon() << std::endl; - std::cout << "highest = " << NumTraits<half>::highest() << std::endl; - std::cout << "lowest = " << NumTraits<half>::lowest() << std::endl; - std::cout << "inifinty = " << NumTraits<half>::infinity() << std::endl; - std::cout << "nan = " << NumTraits<half>::quiet_NaN() << std::endl; - + std::cout << "epsilon = " << NumTraits<half>::epsilon() << " (0x" << std::hex << NumTraits<half>::epsilon().x << ")" << std::endl; + std::cout << "highest = " << NumTraits<half>::highest() << " (0x" << std::hex << NumTraits<half>::highest().x << ")" << std::endl; + std::cout << "lowest = " << NumTraits<half>::lowest() << " (0x" << std::hex << NumTraits<half>::lowest().x << ")" << std::endl; + std::cout << "min = " << (std::numeric_limits<half>::min)() << " (0x" << std::hex << half((std::numeric_limits<half>::min)()).x << ")" << std::endl; + std::cout << "denorm min = " << (std::numeric_limits<half>::denorm_min)() << " (0x" << std::hex << half((std::numeric_limits<half>::denorm_min)()).x << ")" << std::endl; + std::cout << "infinity = " << NumTraits<half>::infinity() << " (0x" << std::hex << NumTraits<half>::infinity().x << ")" << std::endl; + std::cout << "quiet nan = " << NumTraits<half>::quiet_NaN() << " (0x" << std::hex << NumTraits<half>::quiet_NaN().x << ")" << std::endl; + std::cout << "signaling nan = " << std::numeric_limits<half>::signaling_NaN() << " (0x" << std::hex << std::numeric_limits<half>::signaling_NaN().x << ")" << std::endl; + + VERIFY(NumTraits<half>::IsSigned); + + VERIFY_IS_EQUAL( std::numeric_limits<half>::infinity().x, half(std::numeric_limits<float>::infinity()).x ); + VERIFY_IS_EQUAL( std::numeric_limits<half>::quiet_NaN().x, half(std::numeric_limits<float>::quiet_NaN()).x ); + VERIFY_IS_EQUAL( std::numeric_limits<half>::signaling_NaN().x, half(std::numeric_limits<float>::signaling_NaN()).x ); + VERIFY( (std::numeric_limits<half>::min)() > half(0.f) ); + VERIFY( (std::numeric_limits<half>::denorm_min)() > half(0.f) ); + VERIFY( (std::numeric_limits<half>::min)()/half(2) > half(0.f) ); + VERIFY_IS_EQUAL( (std::numeric_limits<half>::denorm_min)()/half(2), half(0.f) ); } void test_arithmetic() diff --git a/test/lscg.cpp b/test/lscg.cpp index daa62a954..d49ee00c3 100644 --- a/test/lscg.cpp +++ b/test/lscg.cpp @@ -14,12 +14,20 @@ template<typename T> void test_lscg_T() { LeastSquaresConjugateGradient<SparseMatrix<T> > lscg_colmajor_diag; LeastSquaresConjugateGradient<SparseMatrix<T>, IdentityPreconditioner> lscg_colmajor_I; + LeastSquaresConjugateGradient<SparseMatrix<T,RowMajor> > lscg_rowmajor_diag; + LeastSquaresConjugateGradient<SparseMatrix<T,RowMajor>, IdentityPreconditioner> lscg_rowmajor_I; CALL_SUBTEST( check_sparse_square_solving(lscg_colmajor_diag) ); CALL_SUBTEST( check_sparse_square_solving(lscg_colmajor_I) ); CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_colmajor_diag) ); CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_colmajor_I) ); + + CALL_SUBTEST( check_sparse_square_solving(lscg_rowmajor_diag) ); + CALL_SUBTEST( check_sparse_square_solving(lscg_rowmajor_I) ); + + CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_rowmajor_diag) ); + CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_rowmajor_I) ); } void test_lscg() diff --git a/test/main.h b/test/main.h index 25d2dcf43..bd5325196 100644 --- a/test/main.h +++ b/test/main.h @@ -310,6 +310,17 @@ template<> inline float test_precision<std::complex<float> >() { return test_pre template<> inline double test_precision<std::complex<double> >() { return test_precision<double>(); } template<> inline long double test_precision<std::complex<long double> >() { return test_precision<long double>(); } +inline bool test_isApprox(const short& a, const short& b) +{ return internal::isApprox(a, b, test_precision<short>()); } +inline bool test_isApprox(const unsigned short& a, const unsigned short& b) +{ return internal::isApprox(a, b, test_precision<unsigned long>()); } +inline bool test_isApprox(const unsigned int& a, const unsigned int& b) +{ return internal::isApprox(a, b, test_precision<unsigned int>()); } +inline bool test_isApprox(const long& a, const long& b) +{ return internal::isApprox(a, b, test_precision<long>()); } +inline bool test_isApprox(const unsigned long& a, const unsigned long& b) +{ return internal::isApprox(a, b, test_precision<unsigned long>()); } + inline bool test_isApprox(const int& a, const int& b) { return internal::isApprox(a, b, test_precision<int>()); } inline bool test_isMuchSmallerThan(const int& a, const int& b) diff --git a/test/numext.cpp b/test/numext.cpp new file mode 100644 index 000000000..3de33e2f9 --- /dev/null +++ b/test/numext.cpp @@ -0,0 +1,53 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template<typename T> +void check_abs() { + typedef typename NumTraits<T>::Real Real; + + if(NumTraits<T>::IsSigned) + VERIFY_IS_EQUAL(numext::abs(-T(1)), T(1)); + VERIFY_IS_EQUAL(numext::abs(T(0)), T(0)); + VERIFY_IS_EQUAL(numext::abs(T(1)), T(1)); + + for(int k=0; k<g_repeat*100; ++k) + { + T x = internal::random<T>(); + if(!internal::is_same<T,bool>::value) + x = x/Real(2); + if(NumTraits<T>::IsSigned) + { + VERIFY_IS_EQUAL(numext::abs(x), numext::abs(-x)); + VERIFY( numext::abs(-x) >= Real(0)); + } + VERIFY( numext::abs(x) >= Real(0)); + VERIFY_IS_APPROX( numext::abs2(x), numext::abs2(numext::abs(x)) ); + } +} + +void test_numext() { + CALL_SUBTEST( check_abs<bool>() ); + CALL_SUBTEST( check_abs<signed char>() ); + CALL_SUBTEST( check_abs<unsigned char>() ); + CALL_SUBTEST( check_abs<short>() ); + CALL_SUBTEST( check_abs<unsigned short>() ); + CALL_SUBTEST( check_abs<int>() ); + CALL_SUBTEST( check_abs<unsigned int>() ); + CALL_SUBTEST( check_abs<long>() ); + CALL_SUBTEST( check_abs<unsigned long>() ); + CALL_SUBTEST( check_abs<half>() ); + CALL_SUBTEST( check_abs<float>() ); + CALL_SUBTEST( check_abs<double>() ); + CALL_SUBTEST( check_abs<long double>() ); + + CALL_SUBTEST( check_abs<std::complex<float> >() ); + CALL_SUBTEST( check_abs<std::complex<double> >() ); +} diff --git a/test/product_mmtr.cpp b/test/product_mmtr.cpp index f6e4bb1ae..d3e24b012 100644 --- a/test/product_mmtr.cpp +++ b/test/product_mmtr.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2010-2017 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -10,12 +10,19 @@ #include "main.h" #define CHECK_MMTR(DEST, TRI, OP) { \ + ref3 = DEST; \ ref2 = ref1 = DEST; \ DEST.template triangularView<TRI>() OP; \ ref1 OP; \ ref2.template triangularView<TRI>() \ = ref1.template triangularView<TRI>(); \ VERIFY_IS_APPROX(DEST,ref2); \ + \ + DEST = ref3; \ + ref3 = ref2; \ + ref3.diagonal() = DEST.diagonal(); \ + DEST.template triangularView<TRI|ZeroDiag>() OP; \ + VERIFY_IS_APPROX(DEST,ref3); \ } template<typename Scalar> void mmtr(int size) @@ -27,7 +34,7 @@ template<typename Scalar> void mmtr(int size) MatrixColMaj matc = MatrixColMaj::Zero(size, size); MatrixRowMaj matr = MatrixRowMaj::Zero(size, size); - MatrixColMaj ref1(size, size), ref2(size, size); + MatrixColMaj ref1(size, size), ref2(size, size), ref3(size,size); MatrixColMaj soc(size,othersize); soc.setRandom(); MatrixColMaj osc(othersize,size); osc.setRandom(); diff --git a/test/product_notemporary.cpp b/test/product_notemporary.cpp index 8bf71b4f2..30592b79e 100644 --- a/test/product_notemporary.cpp +++ b/test/product_notemporary.cpp @@ -51,6 +51,7 @@ template<typename MatrixType> void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * (m1 * m2.transpose()), 0); VERIFY_EVALUATION_COUNT( m3 = m3 + (m1 * m2.adjoint()), 1); + VERIFY_EVALUATION_COUNT( m3 = m3 - (m1 * m2.adjoint()), 1); VERIFY_EVALUATION_COUNT( m3 = m3 + (m1 * m2.adjoint()).transpose(), 1); VERIFY_EVALUATION_COUNT( m3.noalias() = m3 + m1 * m2.transpose(), 0); diff --git a/test/sparse_product.cpp b/test/sparse_product.cpp index c1edd26e3..197586741 100644 --- a/test/sparse_product.cpp +++ b/test/sparse_product.cpp @@ -297,6 +297,10 @@ template<typename SparseMatrixType> void sparse_product() VERIFY_IS_APPROX(x=mLo.template selfadjointView<Lower>()*b, refX=refS*b); VERIFY_IS_APPROX(x=mS.template selfadjointView<Upper|Lower>()*b, refX=refS*b); + VERIFY_IS_APPROX(x=b * mUp.template selfadjointView<Upper>(), refX=b*refS); + VERIFY_IS_APPROX(x=b * mLo.template selfadjointView<Lower>(), refX=b*refS); + VERIFY_IS_APPROX(x=b * mS.template selfadjointView<Upper|Lower>(), refX=b*refS); + VERIFY_IS_APPROX(x.noalias()+=mUp.template selfadjointView<Upper>()*b, refX+=refS*b); VERIFY_IS_APPROX(x.noalias()-=mLo.template selfadjointView<Lower>()*b, refX-=refS*b); VERIFY_IS_APPROX(x.noalias()+=mS.template selfadjointView<Upper|Lower>()*b, refX+=refS*b); diff --git a/unsupported/Eigen/CXX11/src/Tensor/README.md b/unsupported/Eigen/CXX11/src/Tensor/README.md index 02146527b..98e83811b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/README.md +++ b/unsupported/Eigen/CXX11/src/Tensor/README.md @@ -75,16 +75,16 @@ large enough to hold all the data. // Map a tensor of ints on top of stack-allocated storage. int storage[128]; // 2 x 4 x 2 x 8 = 128 - TensorMap<int, 4> t_4d(storage, 2, 4, 2, 8); + TensorMap<Tensor<int, 4>> t_4d(storage, 2, 4, 2, 8); // The same storage can be viewed as a different tensor. // You can also pass the sizes as an array. - TensorMap<int, 2> t_2d(storage, 16, 8); + TensorMap<Tensor<int, 2>> t_2d(storage, 16, 8); // You can also map fixed-size tensors. Here we get a 1d view of // the 2d fixed-size tensor. Tensor<float, Sizes<4, 5>> t_4x3; - TensorMap<float, 1> t_12(t_4x3, 12); + TensorMap<Tensor<float, 1>> t_12(t_4x3, 12); #### Class TensorRef diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 50fedf6ac..279fe5cd3 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -108,7 +108,9 @@ class AutoDiffScalar template<typename OtherDerType> AutoDiffScalar(const AutoDiffScalar<OtherDerType>& other #ifndef EIGEN_PARSED_BY_DOXYGEN - , typename internal::enable_if<internal::is_same<Scalar, typename internal::traits<typename internal::remove_all<OtherDerType>::type>::Scalar>::value,void*>::type = 0 + , typename internal::enable_if< + internal::is_same<Scalar, typename internal::traits<typename internal::remove_all<OtherDerType>::type>::Scalar>::value + && internal::is_convertible<OtherDerType,DerType>::value , void*>::type = 0 #endif ) : m_value(other.value()), m_derivatives(other.derivatives()) @@ -681,4 +683,11 @@ template<typename DerType> struct NumTraits<AutoDiffScalar<DerType> > } +namespace std { +template <typename T> +class numeric_limits<Eigen::AutoDiffScalar<T> > + : public numeric_limits<typename T::Scalar> {}; + +} // namespace std + #endif // EIGEN_AUTODIFF_SCALAR_H diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h index db2449d02..3f7d77710 100644 --- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h +++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h @@ -398,8 +398,8 @@ struct matrix_function_compute template <typename MatrixType> struct matrix_function_compute<MatrixType, 0> { - template <typename AtomicType, typename ResultType> - static void run(const MatrixType& A, AtomicType& atomic, ResultType &result) + template <typename MatA, typename AtomicType, typename ResultType> + static void run(const MatA& A, AtomicType& atomic, ResultType &result) { typedef internal::traits<MatrixType> Traits; typedef typename Traits::Scalar Scalar; @@ -422,11 +422,10 @@ struct matrix_function_compute<MatrixType, 0> template <typename MatrixType> struct matrix_function_compute<MatrixType, 1> { - template <typename AtomicType, typename ResultType> - static void run(const MatrixType& A, AtomicType& atomic, ResultType &result) + template <typename MatA, typename AtomicType, typename ResultType> + static void run(const MatA& A, AtomicType& atomic, ResultType &result) { typedef internal::traits<MatrixType> Traits; - typedef typename MatrixType::Index Index; // compute Schur decomposition of A const ComplexSchur<MatrixType> schurOfA(A); @@ -514,7 +513,7 @@ template<typename Derived> class MatrixFunctionReturnValue typedef internal::MatrixFunctionAtomic<DynMatrixType> AtomicType; AtomicType atomic(m_f); - internal::matrix_function_compute<NestedEvalTypeClean>::run(m_A, atomic, result); + internal::matrix_function_compute<typename NestedEvalTypeClean::PlainObject>::run(m_A, atomic, result); } Index rows() const { return m_A.rows(); } diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h index 1acfbed9e..ff8f6e732 100644 --- a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +++ b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h @@ -339,7 +339,7 @@ public: typedef internal::MatrixLogarithmAtomic<DynMatrixType> AtomicType; AtomicType atomic; - internal::matrix_function_compute<DerivedEvalTypeClean>::run(m_A, atomic, result); + internal::matrix_function_compute<typename DerivedEvalTypeClean::PlainObject>::run(m_A, atomic, result); } Index rows() const { return m_A.rows(); } diff --git a/unsupported/test/autodiff_scalar.cpp b/unsupported/test/autodiff_scalar.cpp index 4df2f5c57..9cf11280c 100644 --- a/unsupported/test/autodiff_scalar.cpp +++ b/unsupported/test/autodiff_scalar.cpp @@ -72,6 +72,20 @@ template<typename Scalar> void check_hyperbolic_functions() VERIFY_IS_APPROX(res3.derivatives().x(), Scalar(0.339540557256150)); } +template <typename Scalar> +void check_limits_specialization() +{ + typedef Eigen::Matrix<Scalar, 1, 1> Deriv; + typedef Eigen::AutoDiffScalar<Deriv> AD; + + typedef std::numeric_limits<AD> A; + typedef std::numeric_limits<Scalar> B; + +#if EIGEN_HAS_CXX11 + VERIFY(bool(std::is_base_of<B, A>::value)); +#endif +} + void test_autodiff_scalar() { for(int i = 0; i < g_repeat; i++) { @@ -79,5 +93,6 @@ void test_autodiff_scalar() CALL_SUBTEST_2( check_atan2<double>() ); CALL_SUBTEST_3( check_hyperbolic_functions<float>() ); CALL_SUBTEST_4( check_hyperbolic_functions<double>() ); + CALL_SUBTEST_5( check_limits_specialization<double>()); } } |