aboutsummaryrefslogtreecommitdiff
path: root/unsupported/Eigen/src
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/src')
-rwxr-xr-xunsupported/Eigen/src/AutoDiff/AutoDiffScalar.h111
-rw-r--r--unsupported/Eigen/src/BVH/KdBVH.h3
-rw-r--r--unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h25
-rw-r--r--unsupported/Eigen/src/EulerAngles/CMakeLists.txt4
-rw-r--r--unsupported/Eigen/src/EulerAngles/EulerAngles.h257
-rw-r--r--unsupported/Eigen/src/EulerAngles/EulerSystem.h197
-rw-r--r--unsupported/Eigen/src/FFT/ei_fftw_impl.h4
-rw-r--r--unsupported/Eigen/src/FFT/ei_kissfft_impl.h53
-rw-r--r--unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h10
-rw-r--r--unsupported/Eigen/src/IterativeSolvers/DGMRES.h122
-rw-r--r--unsupported/Eigen/src/IterativeSolvers/GMRES.h38
-rwxr-xr-xunsupported/Eigen/src/IterativeSolvers/IDRS.h436
-rw-r--r--unsupported/Eigen/src/IterativeSolvers/IterationController.h2
-rw-r--r--unsupported/Eigen/src/IterativeSolvers/MINRES.h38
-rw-r--r--unsupported/Eigen/src/IterativeSolvers/Scaling.h6
-rw-r--r--unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h4
-rw-r--r--unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h2
-rw-r--r--unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h6
-rw-r--r--unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h51
-rw-r--r--unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h35
-rw-r--r--unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h22
-rw-r--r--unsupported/Eigen/src/MatrixFunctions/MatrixPower.h32
-rw-r--r--unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h34
-rw-r--r--unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h6
-rw-r--r--unsupported/Eigen/src/NonLinearOptimization/qrsolv.h2
-rw-r--r--unsupported/Eigen/src/NonLinearOptimization/r1updt.h2
-rw-r--r--unsupported/Eigen/src/Polynomials/Companion.h94
-rw-r--r--unsupported/Eigen/src/Polynomials/PolynomialSolver.h46
-rw-r--r--unsupported/Eigen/src/Polynomials/PolynomialUtils.h8
-rw-r--r--unsupported/Eigen/src/Skyline/SkylineInplaceLU.h4
-rw-r--r--unsupported/Eigen/src/Skyline/SkylineMatrix.h18
-rw-r--r--unsupported/Eigen/src/Skyline/SkylineMatrixBase.h10
-rw-r--r--unsupported/Eigen/src/Skyline/SkylineStorage.h2
-rw-r--r--unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h2
-rw-r--r--unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h22
-rw-r--r--unsupported/Eigen/src/SparseExtra/MarketIO.h96
-rw-r--r--unsupported/Eigen/src/SparseExtra/RandomSetter.h54
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h286
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h68
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h357
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h66
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h1959
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h118
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h67
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h55
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h58
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h140
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h11
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h1048
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h23
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h46
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h16
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h46
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h16
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h165
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h369
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h54
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h34
-rw-r--r--unsupported/Eigen/src/Splines/Spline.h9
-rw-r--r--unsupported/Eigen/src/Splines/SplineFitting.h11
-rw-r--r--unsupported/Eigen/src/Splines/SplineFwd.h2
61 files changed, 5695 insertions, 1187 deletions
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index 279fe5cd3..0f166e35f 100755
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -26,11 +26,11 @@ void make_coherent(const A& a, const B&b)
make_coherent_impl<A,B>::run(a.const_cast_derived(), b.const_cast_derived());
}
-template<typename _DerType, bool Enable> struct auto_diff_special_op;
+template<typename DerivativeType, bool Enable> struct auto_diff_special_op;
} // end namespace internal
-template<typename _DerType> class AutoDiffScalar;
+template<typename DerivativeType> class AutoDiffScalar;
template<typename NewDerType>
inline AutoDiffScalar<NewDerType> MakeAutoDiffScalar(const typename NewDerType::Scalar& value, const NewDerType &der) {
@@ -38,16 +38,16 @@ inline AutoDiffScalar<NewDerType> MakeAutoDiffScalar(const typename NewDerType::
}
/** \class AutoDiffScalar
- * \brief A scalar type replacement with automatic differentation capability
+ * \brief A scalar type replacement with automatic differentiation capability
*
- * \param _DerType the vector type used to store/represent the derivatives. The base scalar type
+ * \param DerivativeType the vector type used to store/represent the derivatives. The base scalar type
* as well as the number of derivatives to compute are determined from this type.
* Typical choices include, e.g., \c Vector4f for 4 derivatives, or \c VectorXf
* if the number of derivatives is not known at compile time, and/or, the number
* of derivatives is large.
- * Note that _DerType can also be a reference (e.g., \c VectorXf&) to wrap a
+ * Note that DerivativeType can also be a reference (e.g., \c VectorXf&) to wrap a
* existing vector into an AutoDiffScalar.
- * Finally, _DerType can also be any Eigen compatible expression.
+ * Finally, DerivativeType can also be any Eigen compatible expression.
*
* This class represents a scalar value while tracking its respective derivatives using Eigen's expression
* template mechanism.
@@ -63,17 +63,17 @@ inline AutoDiffScalar<NewDerType> MakeAutoDiffScalar(const typename NewDerType::
*
*/
-template<typename _DerType>
+template<typename DerivativeType>
class AutoDiffScalar
: public internal::auto_diff_special_op
- <_DerType, !internal::is_same<typename internal::traits<typename internal::remove_all<_DerType>::type>::Scalar,
- typename NumTraits<typename internal::traits<typename internal::remove_all<_DerType>::type>::Scalar>::Real>::value>
+ <DerivativeType, !internal::is_same<typename internal::traits<typename internal::remove_all<DerivativeType>::type>::Scalar,
+ typename NumTraits<typename internal::traits<typename internal::remove_all<DerivativeType>::type>::Scalar>::Real>::value>
{
public:
typedef internal::auto_diff_special_op
- <_DerType, !internal::is_same<typename internal::traits<typename internal::remove_all<_DerType>::type>::Scalar,
- typename NumTraits<typename internal::traits<typename internal::remove_all<_DerType>::type>::Scalar>::Real>::value> Base;
- typedef typename internal::remove_all<_DerType>::type DerType;
+ <DerivativeType, !internal::is_same<typename internal::traits<typename internal::remove_all<DerivativeType>::type>::Scalar,
+ typename NumTraits<typename internal::traits<typename internal::remove_all<DerivativeType>::type>::Scalar>::Real>::value> Base;
+ typedef typename internal::remove_all<DerivativeType>::type DerType;
typedef typename internal::traits<DerType>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real Real;
@@ -382,16 +382,16 @@ class AutoDiffScalar
namespace internal {
-template<typename _DerType>
-struct auto_diff_special_op<_DerType, true>
-// : auto_diff_scalar_op<_DerType, typename NumTraits<Scalar>::Real,
+template<typename DerivativeType>
+struct auto_diff_special_op<DerivativeType, true>
+// : auto_diff_scalar_op<DerivativeType, typename NumTraits<Scalar>::Real,
// is_same<Scalar,typename NumTraits<Scalar>::Real>::value>
{
- typedef typename remove_all<_DerType>::type DerType;
+ typedef typename remove_all<DerivativeType>::type DerType;
typedef typename traits<DerType>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real Real;
-// typedef auto_diff_scalar_op<_DerType, typename NumTraits<Scalar>::Real,
+// typedef auto_diff_scalar_op<DerivativeType, typename NumTraits<Scalar>::Real,
// is_same<Scalar,typename NumTraits<Scalar>::Real>::value> Base;
// using Base::operator+;
@@ -401,8 +401,8 @@ struct auto_diff_special_op<_DerType, true>
// using Base::operator*;
// using Base::operator*=;
- const AutoDiffScalar<_DerType>& derived() const { return *static_cast<const AutoDiffScalar<_DerType>*>(this); }
- AutoDiffScalar<_DerType>& derived() { return *static_cast<AutoDiffScalar<_DerType>*>(this); }
+ const AutoDiffScalar<DerivativeType>& derived() const { return *static_cast<const AutoDiffScalar<DerivativeType>*>(this); }
+ AutoDiffScalar<DerivativeType>& derived() { return *static_cast<AutoDiffScalar<DerivativeType>*>(this); }
inline const AutoDiffScalar<DerType&> operator+(const Real& other) const
@@ -410,12 +410,12 @@ struct auto_diff_special_op<_DerType, true>
return AutoDiffScalar<DerType&>(derived().value() + other, derived().derivatives());
}
- friend inline const AutoDiffScalar<DerType&> operator+(const Real& a, const AutoDiffScalar<_DerType>& b)
+ friend inline const AutoDiffScalar<DerType&> operator+(const Real& a, const AutoDiffScalar<DerivativeType>& b)
{
return AutoDiffScalar<DerType&>(a + b.value(), b.derivatives());
}
- inline AutoDiffScalar<_DerType>& operator+=(const Real& other)
+ inline AutoDiffScalar<DerivativeType>& operator+=(const Real& other)
{
derived().value() += other;
return derived();
@@ -431,28 +431,46 @@ struct auto_diff_special_op<_DerType, true>
}
friend inline const AutoDiffScalar<typename CwiseUnaryOp<bind1st_op<scalar_product_op<Real,Scalar> >, DerType>::Type >
- operator*(const Real& other, const AutoDiffScalar<_DerType>& a)
+ operator*(const Real& other, const AutoDiffScalar<DerivativeType>& a)
{
return AutoDiffScalar<typename CwiseUnaryOp<bind1st_op<scalar_product_op<Real,Scalar> >, DerType>::Type >(
a.value() * other,
a.derivatives() * other);
}
- inline AutoDiffScalar<_DerType>& operator*=(const Scalar& other)
+ inline AutoDiffScalar<DerivativeType>& operator*=(const Scalar& other)
{
*this = *this * other;
return derived();
}
};
-template<typename _DerType>
-struct auto_diff_special_op<_DerType, false>
+template<typename DerivativeType>
+struct auto_diff_special_op<DerivativeType, false>
{
void operator*() const;
void operator-() const;
void operator+() const;
};
+template<typename BinOp, typename A, typename B, typename RefType>
+void make_coherent_expression(CwiseBinaryOp<BinOp,A,B> xpr, const RefType &ref)
+{
+ make_coherent(xpr.const_cast_derived().lhs(), ref);
+ make_coherent(xpr.const_cast_derived().rhs(), ref);
+}
+
+template<typename UnaryOp, typename A, typename RefType>
+void make_coherent_expression(const CwiseUnaryOp<UnaryOp,A> &xpr, const RefType &ref)
+{
+ make_coherent(xpr.nestedExpression().const_cast_derived(), ref);
+}
+
+// needed for compilation only
+template<typename UnaryOp, typename A, typename RefType>
+void make_coherent_expression(const CwiseNullaryOp<UnaryOp,A> &, const RefType &)
+{}
+
template<typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols, typename B>
struct make_coherent_impl<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols>, B> {
typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> A;
@@ -462,6 +480,10 @@ struct make_coherent_impl<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows,
a.resize(b.size());
a.setZero();
}
+ else if (B::SizeAtCompileTime==Dynamic && a.size()!=0 && b.size()==0)
+ {
+ make_coherent_expression(b,a);
+ }
}
};
@@ -474,13 +496,17 @@ struct make_coherent_impl<A, Matrix<B_Scalar, B_Rows, B_Cols, B_Options, B_MaxRo
b.resize(a.size());
b.setZero();
}
+ else if (A::SizeAtCompileTime==Dynamic && b.size()!=0 && a.size()==0)
+ {
+ make_coherent_expression(a,b);
+ }
}
};
template<typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols,
typename B_Scalar, int B_Rows, int B_Cols, int B_Options, int B_MaxRows, int B_MaxCols>
struct make_coherent_impl<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols>,
- Matrix<B_Scalar, B_Rows, B_Cols, B_Options, B_MaxRows, B_MaxCols> > {
+ Matrix<B_Scalar, B_Rows, B_Cols, B_Options, B_MaxRows, B_MaxCols> > {
typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> A;
typedef Matrix<B_Scalar, B_Rows, B_Cols, B_Options, B_MaxRows, B_MaxCols> B;
static void run(A& a, B& b) {
@@ -534,42 +560,48 @@ struct ScalarBinaryOpTraits<typename DerType::Scalar,AutoDiffScalar<DerType>, Bi
EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename Eigen::internal::remove_all<DerType>::type, typename Eigen::internal::traits<typename Eigen::internal::remove_all<DerType>::type>::Scalar, product) > \
FUNC(const Eigen::AutoDiffScalar<DerType>& x) { \
using namespace Eigen; \
- EIGEN_UNUSED typedef typename Eigen::internal::traits<typename Eigen::internal::remove_all<DerType>::type>::Scalar Scalar; \
+ typedef typename Eigen::internal::traits<typename Eigen::internal::remove_all<DerType>::type>::Scalar Scalar; \
+ EIGEN_UNUSED_VARIABLE(sizeof(Scalar)); \
CODE; \
}
template<typename DerType>
+struct CleanedUpDerType {
+ typedef AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> type;
+};
+
+template<typename DerType>
inline const AutoDiffScalar<DerType>& conj(const AutoDiffScalar<DerType>& x) { return x; }
template<typename DerType>
inline const AutoDiffScalar<DerType>& real(const AutoDiffScalar<DerType>& x) { return x; }
template<typename DerType>
inline typename DerType::Scalar imag(const AutoDiffScalar<DerType>&) { return 0.; }
template<typename DerType, typename T>
-inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (min)(const AutoDiffScalar<DerType>& x, const T& y) {
- typedef AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> ADS;
+inline typename CleanedUpDerType<DerType>::type (min)(const AutoDiffScalar<DerType>& x, const T& y) {
+ typedef typename CleanedUpDerType<DerType>::type ADS;
return (x <= y ? ADS(x) : ADS(y));
}
template<typename DerType, typename T>
-inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (max)(const AutoDiffScalar<DerType>& x, const T& y) {
- typedef AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> ADS;
+inline typename CleanedUpDerType<DerType>::type (max)(const AutoDiffScalar<DerType>& x, const T& y) {
+ typedef typename CleanedUpDerType<DerType>::type ADS;
return (x >= y ? ADS(x) : ADS(y));
}
template<typename DerType, typename T>
-inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (min)(const T& x, const AutoDiffScalar<DerType>& y) {
- typedef AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> ADS;
+inline typename CleanedUpDerType<DerType>::type (min)(const T& x, const AutoDiffScalar<DerType>& y) {
+ typedef typename CleanedUpDerType<DerType>::type ADS;
return (x < y ? ADS(x) : ADS(y));
}
template<typename DerType, typename T>
-inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (max)(const T& x, const AutoDiffScalar<DerType>& y) {
- typedef AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> ADS;
+inline typename CleanedUpDerType<DerType>::type (max)(const T& x, const AutoDiffScalar<DerType>& y) {
+ typedef typename CleanedUpDerType<DerType>::type ADS;
return (x > y ? ADS(x) : ADS(y));
}
template<typename DerType>
-inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (min)(const AutoDiffScalar<DerType>& x, const AutoDiffScalar<DerType>& y) {
+inline typename CleanedUpDerType<DerType>::type (min)(const AutoDiffScalar<DerType>& x, const AutoDiffScalar<DerType>& y) {
return (x.value() < y.value() ? x : y);
}
template<typename DerType>
-inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (max)(const AutoDiffScalar<DerType>& x, const AutoDiffScalar<DerType>& y) {
+inline typename CleanedUpDerType<DerType>::type (max)(const AutoDiffScalar<DerType>& x, const AutoDiffScalar<DerType>& y) {
return (x.value() >= y.value() ? x : y);
}
@@ -684,10 +716,15 @@ template<typename DerType> struct NumTraits<AutoDiffScalar<DerType> >
}
namespace std {
+
template <typename T>
class numeric_limits<Eigen::AutoDiffScalar<T> >
: public numeric_limits<typename T::Scalar> {};
+template <typename T>
+class numeric_limits<Eigen::AutoDiffScalar<T&> >
+ : public numeric_limits<typename T::Scalar> {};
+
} // namespace std
#endif // EIGEN_AUTODIFF_SCALAR_H
diff --git a/unsupported/Eigen/src/BVH/KdBVH.h b/unsupported/Eigen/src/BVH/KdBVH.h
index 1b8d75865..2d5b76ad0 100644
--- a/unsupported/Eigen/src/BVH/KdBVH.h
+++ b/unsupported/Eigen/src/BVH/KdBVH.h
@@ -35,6 +35,7 @@ struct get_boxes_helper {
{
outBoxes.insert(outBoxes.end(), boxBegin, boxEnd);
eigen_assert(outBoxes.size() == objects.size());
+ EIGEN_ONLY_USED_FOR_DEBUG(objects);
}
};
@@ -170,7 +171,7 @@ private:
typedef internal::vector_int_pair<Scalar, Dim> VIPair;
typedef std::vector<VIPair, aligned_allocator<VIPair> > VIPairList;
typedef Matrix<Scalar, Dim, 1> VectorType;
- struct VectorComparator //compares vectors, or, more specificall, VIPairs along a particular dimension
+ struct VectorComparator //compares vectors, or more specifically, VIPairs along a particular dimension
{
VectorComparator(int inDim) : dim(inDim) {}
inline bool operator()(const VIPair &v1, const VIPair &v2) const { return v1.first[dim] < v2.first[dim]; }
diff --git a/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h b/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h
index 866a8a460..0fbd84772 100644
--- a/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h
+++ b/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h
@@ -3,29 +3,14 @@
//
// Copyright (C) 2012 David Harmon <dharmon@gmail.com>
//
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_ARPACKGENERALIZEDSELFADJOINTEIGENSOLVER_H
#define EIGEN_ARPACKGENERALIZEDSELFADJOINTEIGENSOLVER_H
-#include <Eigen/Dense>
+#include "../../../../Eigen/Dense"
namespace Eigen {
@@ -300,7 +285,7 @@ public:
/** \brief Reports whether previous computation was successful.
*
- * \returns \c Success if computation was succesful, \c NoConvergence otherwise.
+ * \returns \c Success if computation was successful, \c NoConvergence otherwise.
*/
ComputationInfo info() const
{
diff --git a/unsupported/Eigen/src/EulerAngles/CMakeLists.txt b/unsupported/Eigen/src/EulerAngles/CMakeLists.txt
index 40af550e8..22088eb30 100644
--- a/unsupported/Eigen/src/EulerAngles/CMakeLists.txt
+++ b/unsupported/Eigen/src/EulerAngles/CMakeLists.txt
@@ -1,6 +1,6 @@
-FILE(GLOB Eigen_EulerAngles_SRCS "*.h")
+file(GLOB Eigen_EulerAngles_SRCS "*.h")
-INSTALL(FILES
+install(FILES
${Eigen_EulerAngles_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/EulerAngles COMPONENT Devel
)
diff --git a/unsupported/Eigen/src/EulerAngles/EulerAngles.h b/unsupported/Eigen/src/EulerAngles/EulerAngles.h
index 13a0da1ab..e43cdb7fb 100644
--- a/unsupported/Eigen/src/EulerAngles/EulerAngles.h
+++ b/unsupported/Eigen/src/EulerAngles/EulerAngles.h
@@ -12,11 +12,6 @@
namespace Eigen
{
- /*template<typename Other,
- int OtherRows=Other::RowsAtCompileTime,
- int OtherCols=Other::ColsAtCompileTime>
- struct ei_eulerangles_assign_impl;*/
-
/** \class EulerAngles
*
* \ingroup EulerAngles_Module
@@ -36,7 +31,7 @@ namespace Eigen
* ### Rotation representation and conversions ###
*
* It has been proved(see Wikipedia link below) that every rotation can be represented
- * by Euler angles, but there is no singular representation (e.g. unlike rotation matrices).
+ * by Euler angles, but there is no single representation (e.g. unlike rotation matrices).
* Therefore, you can convert from Eigen rotation and to them
* (including rotation matrices, which is not called "rotations" by Eigen design).
*
@@ -55,33 +50,27 @@ namespace Eigen
* Additionally, some axes related computation is done in compile time.
*
* #### Euler angles ranges in conversions ####
+ * Rotations representation as EulerAngles are not single (unlike matrices),
+ * and even have infinite EulerAngles representations.<BR>
+ * For example, add or subtract 2*PI from either angle of EulerAngles
+ * and you'll get the same rotation.
+ * This is the general reason for infinite representation,
+ * but it's not the only general reason for not having a single representation.
*
- * When converting some rotation to Euler angles, there are some ways you can guarantee
- * the Euler angles ranges.
+ * When converting rotation to EulerAngles, this class convert it to specific ranges
+ * When converting some rotation to EulerAngles, the rules for ranges are as follow:
+ * - If the rotation we converting from is an EulerAngles
+ * (even when it represented as RotationBase explicitly), angles ranges are __undefined__.
+ * - otherwise, alpha and gamma angles will be in the range [-PI, PI].<BR>
+ * As for Beta angle:
+ * - If the system is Tait-Bryan, the beta angle will be in the range [-PI/2, PI/2].
+ * - otherwise:
+ * - If the beta axis is positive, the beta angle will be in the range [0, PI]
+ * - If the beta axis is negative, the beta angle will be in the range [-PI, 0]
*
- * #### implicit ranges ####
- * When using implicit ranges, all angles are guarantee to be in the range [-PI, +PI],
- * unless you convert from some other Euler angles.
- * In this case, the range is __undefined__ (might be even less than -PI or greater than +2*PI).
* \sa EulerAngles(const MatrixBase<Derived>&)
* \sa EulerAngles(const RotationBase<Derived, 3>&)
*
- * #### explicit ranges ####
- * When using explicit ranges, all angles are guarantee to be in the range you choose.
- * In the range Boolean parameter, you're been ask whether you prefer the positive range or not:
- * - _true_ - force the range between [0, +2*PI]
- * - _false_ - force the range between [-PI, +PI]
- *
- * ##### compile time ranges #####
- * This is when you have compile time ranges and you prefer to
- * use template parameter. (e.g. for performance)
- * \sa FromRotation()
- *
- * ##### run-time time ranges #####
- * Run-time ranges are also supported.
- * \sa EulerAngles(const MatrixBase<Derived>&, bool, bool, bool)
- * \sa EulerAngles(const RotationBase<Derived, 3>&, bool, bool, bool)
- *
* ### Convenient user typedefs ###
*
* Convenient typedefs for EulerAngles exist for float and double scalar,
@@ -103,7 +92,7 @@ namespace Eigen
*
* More information about Euler angles: https://en.wikipedia.org/wiki/Euler_angles
*
- * \tparam _Scalar the scalar type, i.e., the type of the angles.
+ * \tparam _Scalar the scalar type, i.e. the type of the angles.
*
* \tparam _System the EulerSystem to use, which represents the axes of rotation.
*/
@@ -111,8 +100,11 @@ namespace Eigen
class EulerAngles : public RotationBase<EulerAngles<_Scalar, _System>, 3>
{
public:
+ typedef RotationBase<EulerAngles<_Scalar, _System>, 3> Base;
+
/** the scalar type of the angles */
typedef _Scalar Scalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
/** the EulerSystem to use, which represents the axes of rotation. */
typedef _System System;
@@ -146,67 +138,56 @@ namespace Eigen
public:
/** Default constructor without initialization. */
EulerAngles() {}
- /** Constructs and initialize Euler angles(\p alpha, \p beta, \p gamma). */
+ /** Constructs and initialize an EulerAngles (\p alpha, \p beta, \p gamma). */
EulerAngles(const Scalar& alpha, const Scalar& beta, const Scalar& gamma) :
m_angles(alpha, beta, gamma) {}
- /** Constructs and initialize Euler angles from a 3x3 rotation matrix \p m.
- *
- * \note All angles will be in the range [-PI, PI].
- */
- template<typename Derived>
- EulerAngles(const MatrixBase<Derived>& m) { *this = m; }
+ // TODO: Test this constructor
+ /** Constructs and initialize an EulerAngles from the array data {alpha, beta, gamma} */
+ explicit EulerAngles(const Scalar* data) : m_angles(data) {}
- /** Constructs and initialize Euler angles from a 3x3 rotation matrix \p m,
- * with options to choose for each angle the requested range.
- *
- * If positive range is true, then the specified angle will be in the range [0, +2*PI].
- * Otherwise, the specified angle will be in the range [-PI, +PI].
+ /** Constructs and initializes an EulerAngles from either:
+ * - a 3x3 rotation matrix expression(i.e. pure orthogonal matrix with determinant of +1),
+ * - a 3D vector expression representing Euler angles.
*
- * \param m The 3x3 rotation matrix to convert
- * \param positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
- * \param positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
- * \param positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
- */
+ * \note If \p other is a 3x3 rotation matrix, the angles range rules will be as follow:<BR>
+ * Alpha and gamma angles will be in the range [-PI, PI].<BR>
+ * As for Beta angle:
+ * - If the system is Tait-Bryan, the beta angle will be in the range [-PI/2, PI/2].
+ * - otherwise:
+ * - If the beta axis is positive, the beta angle will be in the range [0, PI]
+ * - If the beta axis is negative, the beta angle will be in the range [-PI, 0]
+ */
template<typename Derived>
- EulerAngles(
- const MatrixBase<Derived>& m,
- bool positiveRangeAlpha,
- bool positiveRangeBeta,
- bool positiveRangeGamma) {
-
- System::CalcEulerAngles(*this, m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma);
- }
+ explicit EulerAngles(const MatrixBase<Derived>& other) { *this = other; }
/** Constructs and initialize Euler angles from a rotation \p rot.
*
- * \note All angles will be in the range [-PI, PI], unless \p rot is an EulerAngles.
- * If rot is an EulerAngles, expected EulerAngles range is __undefined__.
- * (Use other functions here for enforcing range if this effect is desired)
+ * \note If \p rot is an EulerAngles (even when it represented as RotationBase explicitly),
+ * angles ranges are __undefined__.
+ * Otherwise, alpha and gamma angles will be in the range [-PI, PI].<BR>
+ * As for Beta angle:
+ * - If the system is Tait-Bryan, the beta angle will be in the range [-PI/2, PI/2].
+ * - otherwise:
+ * - If the beta axis is positive, the beta angle will be in the range [0, PI]
+ * - If the beta axis is negative, the beta angle will be in the range [-PI, 0]
*/
template<typename Derived>
- EulerAngles(const RotationBase<Derived, 3>& rot) { *this = rot; }
+ EulerAngles(const RotationBase<Derived, 3>& rot) { System::CalcEulerAngles(*this, rot.toRotationMatrix()); }
- /** Constructs and initialize Euler angles from a rotation \p rot,
- * with options to choose for each angle the requested range.
- *
- * If positive range is true, then the specified angle will be in the range [0, +2*PI].
- * Otherwise, the specified angle will be in the range [-PI, +PI].
- *
- * \param rot The 3x3 rotation matrix to convert
- * \param positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
- * \param positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
- * \param positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
- */
- template<typename Derived>
- EulerAngles(
- const RotationBase<Derived, 3>& rot,
- bool positiveRangeAlpha,
- bool positiveRangeBeta,
- bool positiveRangeGamma) {
-
- System::CalcEulerAngles(*this, rot.toRotationMatrix(), positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma);
- }
+ /*EulerAngles(const QuaternionType& q)
+ {
+ // TODO: Implement it in a faster way for quaternions
+ // According to http://www.euclideanspace.com/maths/geometry/rotations/conversions/quaternionToEuler/
+ // we can compute only the needed matrix cells and then convert to euler angles. (see ZYX example below)
+ // Currently we compute all matrix cells from quaternion.
+
+ // Special case only for ZYX
+ //Scalar y2 = q.y() * q.y();
+ //m_angles[0] = std::atan2(2*(q.w()*q.z() + q.x()*q.y()), (1 - 2*(y2 + q.z()*q.z())));
+ //m_angles[1] = std::asin( 2*(q.w()*q.y() - q.z()*q.x()));
+ //m_angles[2] = std::atan2(2*(q.w()*q.x() + q.y()*q.z()), (1 - 2*(q.x()*q.x() + y2)));
+ }*/
/** \returns The angle values stored in a vector (alpha, beta, gamma). */
const Vector3& angles() const { return m_angles; }
@@ -246,90 +227,48 @@ namespace Eigen
return inverse();
}
- /** Constructs and initialize Euler angles from a 3x3 rotation matrix \p m,
- * with options to choose for each angle the requested range (__only in compile time__).
+ /** Set \c *this from either:
+ * - a 3x3 rotation matrix expression(i.e. pure orthogonal matrix with determinant of +1),
+ * - a 3D vector expression representing Euler angles.
*
- * If positive range is true, then the specified angle will be in the range [0, +2*PI].
- * Otherwise, the specified angle will be in the range [-PI, +PI].
- *
- * \param m The 3x3 rotation matrix to convert
- * \tparam positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
- * \tparam positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
- * \tparam positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
- */
- template<
- bool PositiveRangeAlpha,
- bool PositiveRangeBeta,
- bool PositiveRangeGamma,
- typename Derived>
- static EulerAngles FromRotation(const MatrixBase<Derived>& m)
- {
- EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived, 3, 3)
-
- EulerAngles e;
- System::template CalcEulerAngles<
- PositiveRangeAlpha, PositiveRangeBeta, PositiveRangeGamma, _Scalar>(e, m);
- return e;
- }
-
- /** Constructs and initialize Euler angles from a rotation \p rot,
- * with options to choose for each angle the requested range (__only in compile time__).
- *
- * If positive range is true, then the specified angle will be in the range [0, +2*PI].
- * Otherwise, the specified angle will be in the range [-PI, +PI].
- *
- * \param rot The 3x3 rotation matrix to convert
- * \tparam positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
- * \tparam positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
- * \tparam positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ * See EulerAngles(const MatrixBase<Derived, 3>&) for more information about
+ * angles ranges output.
*/
- template<
- bool PositiveRangeAlpha,
- bool PositiveRangeBeta,
- bool PositiveRangeGamma,
- typename Derived>
- static EulerAngles FromRotation(const RotationBase<Derived, 3>& rot)
- {
- return FromRotation<PositiveRangeAlpha, PositiveRangeBeta, PositiveRangeGamma>(rot.toRotationMatrix());
- }
-
- /*EulerAngles& fromQuaternion(const QuaternionType& q)
+ template<class Derived>
+ EulerAngles& operator=(const MatrixBase<Derived>& other)
{
- // TODO: Implement it in a faster way for quaternions
- // According to http://www.euclideanspace.com/maths/geometry/rotations/conversions/quaternionToEuler/
- // we can compute only the needed matrix cells and then convert to euler angles. (see ZYX example below)
- // Currently we compute all matrix cells from quaternion.
-
- // Special case only for ZYX
- //Scalar y2 = q.y() * q.y();
- //m_angles[0] = std::atan2(2*(q.w()*q.z() + q.x()*q.y()), (1 - 2*(y2 + q.z()*q.z())));
- //m_angles[1] = std::asin( 2*(q.w()*q.y() - q.z()*q.x()));
- //m_angles[2] = std::atan2(2*(q.w()*q.x() + q.y()*q.z()), (1 - 2*(q.x()*q.x() + y2)));
- }*/
-
- /** Set \c *this from a rotation matrix(i.e. pure orthogonal matrix with determinant of +1). */
- template<typename Derived>
- EulerAngles& operator=(const MatrixBase<Derived>& m) {
- EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived, 3, 3)
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename Derived::Scalar>::value),
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
- System::CalcEulerAngles(*this, m);
+ internal::eulerangles_assign_impl<System, Derived>::run(*this, other.derived());
return *this;
}
// TODO: Assign and construct from another EulerAngles (with different system)
- /** Set \c *this from a rotation. */
+ /** Set \c *this from a rotation.
+ *
+ * See EulerAngles(const RotationBase<Derived, 3>&) for more information about
+ * angles ranges output.
+ */
template<typename Derived>
EulerAngles& operator=(const RotationBase<Derived, 3>& rot) {
System::CalcEulerAngles(*this, rot.toRotationMatrix());
return *this;
}
- // TODO: Support isApprox function
+ /** \returns \c true if \c *this is approximately equal to \a other, within the precision
+ * determined by \a prec.
+ *
+ * \sa MatrixBase::isApprox() */
+ bool isApprox(const EulerAngles& other,
+ const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
+ { return angles().isApprox(other.angles(), prec); }
/** \returns an equivalent 3x3 rotation matrix. */
Matrix3 toRotationMatrix() const
{
+ // TODO: Calc it faster
return static_cast<QuaternionType>(*this).toRotationMatrix();
}
@@ -347,6 +286,15 @@ namespace Eigen
s << eulerAngles.angles().transpose();
return s;
}
+
+ /** \returns \c *this with scalar type casted to \a NewScalarType */
+ template <typename NewScalarType>
+ EulerAngles<NewScalarType, System> cast() const
+ {
+ EulerAngles<NewScalarType, System> e;
+ e.angles() = angles().template cast<NewScalarType>();
+ return e;
+ }
};
#define EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(AXES, SCALAR_TYPE, SCALAR_POSTFIX) \
@@ -379,8 +327,29 @@ EIGEN_EULER_ANGLES_TYPEDEFS(double, d)
{
typedef _Scalar Scalar;
};
+
+ // set from a rotation matrix
+ template<class System, class Other>
+ struct eulerangles_assign_impl<System,Other,3,3>
+ {
+ typedef typename Other::Scalar Scalar;
+ static void run(EulerAngles<Scalar, System>& e, const Other& m)
+ {
+ System::CalcEulerAngles(e, m);
+ }
+ };
+
+ // set from a vector of Euler angles
+ template<class System, class Other>
+ struct eulerangles_assign_impl<System,Other,3,1>
+ {
+ typedef typename Other::Scalar Scalar;
+ static void run(EulerAngles<Scalar, System>& e, const Other& vec)
+ {
+ e.angles() = vec;
+ }
+ };
}
-
}
#endif // EIGEN_EULERANGLESCLASS_H
diff --git a/unsupported/Eigen/src/EulerAngles/EulerSystem.h b/unsupported/Eigen/src/EulerAngles/EulerSystem.h
index 98f9f647d..2a833b0a4 100644
--- a/unsupported/Eigen/src/EulerAngles/EulerSystem.h
+++ b/unsupported/Eigen/src/EulerAngles/EulerSystem.h
@@ -12,13 +12,13 @@
namespace Eigen
{
- // Forward declerations
+ // Forward declarations
template <typename _Scalar, class _System>
class EulerAngles;
namespace internal
{
- // TODO: Check if already exists on the rest API
+ // TODO: Add this trait to the Eigen internal API?
template <int Num, bool IsPositive = (Num > 0)>
struct Abs
{
@@ -36,6 +36,12 @@ namespace Eigen
{
enum { value = Axis != 0 && Abs<Axis>::value <= 3 };
};
+
+ template<typename System,
+ typename Other,
+ int OtherRows=Other::RowsAtCompileTime,
+ int OtherCols=Other::ColsAtCompileTime>
+ struct eulerangles_assign_impl;
}
#define EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(COND,MSG) typedef char static_assertion_##MSG[(COND)?1:-1]
@@ -69,7 +75,7 @@ namespace Eigen
*
* You can use this class to get two things:
* - Build an Euler system, and then pass it as a template parameter to EulerAngles.
- * - Query some compile time data about an Euler system. (e.g. Whether it's tait bryan)
+ * - Query some compile time data about an Euler system. (e.g. Whether it's Tait-Bryan)
*
* Euler rotation is a set of three rotation on fixed axes. (see \ref EulerAngles)
* This meta-class store constantly those signed axes. (see \ref EulerAxis)
@@ -80,7 +86,7 @@ namespace Eigen
* signed axes{+X,+Y,+Z,-X,-Y,-Z} are supported:
* - all axes X, Y, Z in each valid order (see below what order is valid)
* - rotation over the axis is supported both over the positive and negative directions.
- * - both tait bryan and proper/classic Euler angles (i.e. the opposite).
+ * - both Tait-Bryan and proper/classic Euler angles (i.e. the opposite).
*
* Since EulerSystem support both positive and negative directions,
* you may call this rotation distinction in other names:
@@ -90,7 +96,7 @@ namespace Eigen
* Notice all axed combination are valid, and would trigger a static assertion.
* Same unsigned axes can't be neighbors, e.g. {X,X,Y} is invalid.
* This yield two and only two classes:
- * - _tait bryan_ - all unsigned axes are distinct, e.g. {X,Y,Z}
+ * - _Tait-Bryan_ - all unsigned axes are distinct, e.g. {X,Y,Z}
* - _proper/classic Euler angles_ - The first and the third unsigned axes is equal,
* and the second is different, e.g. {X,Y,X}
*
@@ -112,9 +118,9 @@ namespace Eigen
*
* \tparam _AlphaAxis the first fixed EulerAxis
*
- * \tparam _AlphaAxis the second fixed EulerAxis
+ * \tparam _BetaAxis the second fixed EulerAxis
*
- * \tparam _AlphaAxis the third fixed EulerAxis
+ * \tparam _GammaAxis the third fixed EulerAxis
*/
template <int _AlphaAxis, int _BetaAxis, int _GammaAxis>
class EulerSystem
@@ -138,14 +144,16 @@ namespace Eigen
BetaAxisAbs = internal::Abs<BetaAxis>::value, /*!< the second rotation axis unsigned */
GammaAxisAbs = internal::Abs<GammaAxis>::value, /*!< the third rotation axis unsigned */
- IsAlphaOpposite = (AlphaAxis < 0) ? 1 : 0, /*!< weather alpha axis is negative */
- IsBetaOpposite = (BetaAxis < 0) ? 1 : 0, /*!< weather beta axis is negative */
- IsGammaOpposite = (GammaAxis < 0) ? 1 : 0, /*!< weather gamma axis is negative */
-
- IsOdd = ((AlphaAxisAbs)%3 == (BetaAxisAbs - 1)%3) ? 0 : 1, /*!< weather the Euler system is odd */
- IsEven = IsOdd ? 0 : 1, /*!< weather the Euler system is even */
+ IsAlphaOpposite = (AlphaAxis < 0) ? 1 : 0, /*!< whether alpha axis is negative */
+ IsBetaOpposite = (BetaAxis < 0) ? 1 : 0, /*!< whether beta axis is negative */
+ IsGammaOpposite = (GammaAxis < 0) ? 1 : 0, /*!< whether gamma axis is negative */
+
+ // Parity is even if alpha axis X is followed by beta axis Y, or Y is followed
+ // by Z, or Z is followed by X; otherwise it is odd.
+ IsOdd = ((AlphaAxisAbs)%3 == (BetaAxisAbs - 1)%3) ? 0 : 1, /*!< whether the Euler system is odd */
+ IsEven = IsOdd ? 0 : 1, /*!< whether the Euler system is even */
- IsTaitBryan = ((unsigned)AlphaAxisAbs != (unsigned)GammaAxisAbs) ? 1 : 0 /*!< weather the Euler system is tait bryan */
+ IsTaitBryan = ((unsigned)AlphaAxisAbs != (unsigned)GammaAxisAbs) ? 1 : 0 /*!< whether the Euler system is Tait-Bryan */
};
private:
@@ -165,86 +173,84 @@ namespace Eigen
EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT((unsigned)BetaAxisAbs != (unsigned)GammaAxisAbs,
BETA_AXIS_CANT_BE_EQUAL_TO_GAMMA_AXIS);
- enum
- {
+ static const int
// I, J, K are the pivot indexes permutation for the rotation matrix, that match this Euler system.
// They are used in this class converters.
// They are always different from each other, and their possible values are: 0, 1, or 2.
- I = AlphaAxisAbs - 1,
- J = (AlphaAxisAbs - 1 + 1 + IsOdd)%3,
- K = (AlphaAxisAbs - 1 + 2 - IsOdd)%3
- };
+ I_ = AlphaAxisAbs - 1,
+ J_ = (AlphaAxisAbs - 1 + 1 + IsOdd)%3,
+ K_ = (AlphaAxisAbs - 1 + 2 - IsOdd)%3
+ ;
// TODO: Get @mat parameter in form that avoids double evaluation.
template <typename Derived>
static void CalcEulerAngles_imp(Matrix<typename MatrixBase<Derived>::Scalar, 3, 1>& res, const MatrixBase<Derived>& mat, internal::true_type /*isTaitBryan*/)
{
using std::atan2;
- using std::sin;
- using std::cos;
+ using std::sqrt;
typedef typename Derived::Scalar Scalar;
- typedef Matrix<Scalar,2,1> Vector2;
-
- res[0] = atan2(mat(J,K), mat(K,K));
- Scalar c2 = Vector2(mat(I,I), mat(I,J)).norm();
- if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0))) {
- if(res[0] > Scalar(0)) {
- res[0] -= Scalar(EIGEN_PI);
- }
- else {
- res[0] += Scalar(EIGEN_PI);
- }
- res[1] = atan2(-mat(I,K), -c2);
+
+ const Scalar plusMinus = IsEven? 1 : -1;
+ const Scalar minusPlus = IsOdd? 1 : -1;
+
+ const Scalar Rsum = sqrt((mat(I_,I_) * mat(I_,I_) + mat(I_,J_) * mat(I_,J_) + mat(J_,K_) * mat(J_,K_) + mat(K_,K_) * mat(K_,K_))/2);
+ res[1] = atan2(plusMinus * mat(I_,K_), Rsum);
+
+ // There is a singularity when cos(beta) == 0
+ if(Rsum > 4 * NumTraits<Scalar>::epsilon()) {// cos(beta) != 0
+ res[0] = atan2(minusPlus * mat(J_, K_), mat(K_, K_));
+ res[2] = atan2(minusPlus * mat(I_, J_), mat(I_, I_));
+ }
+ else if(plusMinus * mat(I_, K_) > 0) {// cos(beta) == 0 and sin(beta) == 1
+ Scalar spos = mat(J_, I_) + plusMinus * mat(K_, J_); // 2*sin(alpha + plusMinus * gamma
+ Scalar cpos = mat(J_, J_) + minusPlus * mat(K_, I_); // 2*cos(alpha + plusMinus * gamma)
+ Scalar alphaPlusMinusGamma = atan2(spos, cpos);
+ res[0] = alphaPlusMinusGamma;
+ res[2] = 0;
+ }
+ else {// cos(beta) == 0 and sin(beta) == -1
+ Scalar sneg = plusMinus * (mat(K_, J_) + minusPlus * mat(J_, I_)); // 2*sin(alpha + minusPlus*gamma)
+ Scalar cneg = mat(J_, J_) + plusMinus * mat(K_, I_); // 2*cos(alpha + minusPlus*gamma)
+ Scalar alphaMinusPlusBeta = atan2(sneg, cneg);
+ res[0] = alphaMinusPlusBeta;
+ res[2] = 0;
}
- else
- res[1] = atan2(-mat(I,K), c2);
- Scalar s1 = sin(res[0]);
- Scalar c1 = cos(res[0]);
- res[2] = atan2(s1*mat(K,I)-c1*mat(J,I), c1*mat(J,J) - s1 * mat(K,J));
}
template <typename Derived>
- static void CalcEulerAngles_imp(Matrix<typename MatrixBase<Derived>::Scalar,3,1>& res, const MatrixBase<Derived>& mat, internal::false_type /*isTaitBryan*/)
+ static void CalcEulerAngles_imp(Matrix<typename MatrixBase<Derived>::Scalar,3,1>& res,
+ const MatrixBase<Derived>& mat, internal::false_type /*isTaitBryan*/)
{
using std::atan2;
- using std::sin;
- using std::cos;
+ using std::sqrt;
typedef typename Derived::Scalar Scalar;
- typedef Matrix<Scalar,2,1> Vector2;
-
- res[0] = atan2(mat(J,I), mat(K,I));
- if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0)))
- {
- if(res[0] > Scalar(0)) {
- res[0] -= Scalar(EIGEN_PI);
- }
- else {
- res[0] += Scalar(EIGEN_PI);
- }
- Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm();
- res[1] = -atan2(s2, mat(I,I));
- }
- else
- {
- Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm();
- res[1] = atan2(s2, mat(I,I));
- }
- // With a=(0,1,0), we have i=0; j=1; k=2, and after computing the first two angles,
- // we can compute their respective rotation, and apply its inverse to M. Since the result must
- // be a rotation around x, we have:
- //
- // c2 s1.s2 c1.s2 1 0 0
- // 0 c1 -s1 * M = 0 c3 s3
- // -s2 s1.c2 c1.c2 0 -s3 c3
- //
- // Thus: m11.c1 - m21.s1 = c3 & m12.c1 - m22.s1 = s3
+ const Scalar plusMinus = IsEven? 1 : -1;
+ const Scalar minusPlus = IsOdd? 1 : -1;
+
+ const Scalar Rsum = sqrt((mat(I_, J_) * mat(I_, J_) + mat(I_, K_) * mat(I_, K_) + mat(J_, I_) * mat(J_, I_) + mat(K_, I_) * mat(K_, I_)) / 2);
- Scalar s1 = sin(res[0]);
- Scalar c1 = cos(res[0]);
- res[2] = atan2(c1*mat(J,K)-s1*mat(K,K), c1*mat(J,J) - s1 * mat(K,J));
+ res[1] = atan2(Rsum, mat(I_, I_));
+
+ // There is a singularity when sin(beta) == 0
+ if(Rsum > 4 * NumTraits<Scalar>::epsilon()) {// sin(beta) != 0
+ res[0] = atan2(mat(J_, I_), minusPlus * mat(K_, I_));
+ res[2] = atan2(mat(I_, J_), plusMinus * mat(I_, K_));
+ }
+ else if(mat(I_, I_) > 0) {// sin(beta) == 0 and cos(beta) == 1
+ Scalar spos = plusMinus * mat(K_, J_) + minusPlus * mat(J_, K_); // 2*sin(alpha + gamma)
+ Scalar cpos = mat(J_, J_) + mat(K_, K_); // 2*cos(alpha + gamma)
+ res[0] = atan2(spos, cpos);
+ res[2] = 0;
+ }
+ else {// sin(beta) == 0 and cos(beta) == -1
+ Scalar sneg = plusMinus * mat(K_, J_) + plusMinus * mat(J_, K_); // 2*sin(alpha - gamma)
+ Scalar cneg = mat(J_, J_) - mat(K_, K_); // 2*cos(alpha - gamma)
+ res[0] = atan2(sneg, cneg);
+ res[2] = 0;
+ }
}
template<typename Scalar>
@@ -252,55 +258,28 @@ namespace Eigen
EulerAngles<Scalar, EulerSystem>& res,
const typename EulerAngles<Scalar, EulerSystem>::Matrix3& mat)
{
- CalcEulerAngles(res, mat, false, false, false);
- }
-
- template<
- bool PositiveRangeAlpha,
- bool PositiveRangeBeta,
- bool PositiveRangeGamma,
- typename Scalar>
- static void CalcEulerAngles(
- EulerAngles<Scalar, EulerSystem>& res,
- const typename EulerAngles<Scalar, EulerSystem>::Matrix3& mat)
- {
- CalcEulerAngles(res, mat, PositiveRangeAlpha, PositiveRangeBeta, PositiveRangeGamma);
- }
-
- template<typename Scalar>
- static void CalcEulerAngles(
- EulerAngles<Scalar, EulerSystem>& res,
- const typename EulerAngles<Scalar, EulerSystem>::Matrix3& mat,
- bool PositiveRangeAlpha,
- bool PositiveRangeBeta,
- bool PositiveRangeGamma)
- {
CalcEulerAngles_imp(
res.angles(), mat,
typename internal::conditional<IsTaitBryan, internal::true_type, internal::false_type>::type());
- if (IsAlphaOpposite == IsOdd)
+ if (IsAlphaOpposite)
res.alpha() = -res.alpha();
- if (IsBetaOpposite == IsOdd)
+ if (IsBetaOpposite)
res.beta() = -res.beta();
- if (IsGammaOpposite == IsOdd)
+ if (IsGammaOpposite)
res.gamma() = -res.gamma();
-
- // Saturate results to the requested range
- if (PositiveRangeAlpha && (res.alpha() < 0))
- res.alpha() += Scalar(2 * EIGEN_PI);
-
- if (PositiveRangeBeta && (res.beta() < 0))
- res.beta() += Scalar(2 * EIGEN_PI);
-
- if (PositiveRangeGamma && (res.gamma() < 0))
- res.gamma() += Scalar(2 * EIGEN_PI);
}
template <typename _Scalar, class _System>
friend class Eigen::EulerAngles;
+
+ template<typename System,
+ typename Other,
+ int OtherRows,
+ int OtherCols>
+ friend struct internal::eulerangles_assign_impl;
};
#define EIGEN_EULER_SYSTEM_TYPEDEF(A, B, C) \
diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
index d49aa17f5..1c2cd24a0 100644
--- a/unsupported/Eigen/src/FFT/ei_fftw_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
@@ -231,6 +231,8 @@ namespace internal {
protected:
typedef fftw_plan<Scalar> PlanData;
+ typedef Eigen::numext::int64_t int64_t;
+
typedef std::map<int64_t,PlanData> PlanMap;
PlanMap m_plans;
@@ -257,5 +259,3 @@ namespace internal {
} // end namespace internal
} // end namespace Eigen
-
-/* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
index be51b4e6f..430953aee 100644
--- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
@@ -25,16 +25,47 @@ struct kiss_cpx_fft
std::vector<Complex> m_scratchBuf;
bool m_inverse;
- inline
- void make_twiddles(int nfft,bool inverse)
+ inline void make_twiddles(int nfft, bool inverse)
+ {
+ using numext::sin;
+ using numext::cos;
+ m_inverse = inverse;
+ m_twiddles.resize(nfft);
+ double phinc = 0.25 * double(EIGEN_PI) / nfft;
+ Scalar flip = inverse ? Scalar(1) : Scalar(-1);
+ m_twiddles[0] = Complex(Scalar(1), Scalar(0));
+ if ((nfft&1)==0)
+ m_twiddles[nfft/2] = Complex(Scalar(-1), Scalar(0));
+ int i=1;
+ for (;i*8<nfft;++i)
{
- using std::acos;
- m_inverse = inverse;
- m_twiddles.resize(nfft);
- Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft;
- for (int i=0;i<nfft;++i)
- m_twiddles[i] = exp( Complex(0,i*phinc) );
+ Scalar c = Scalar(cos(i*8*phinc));
+ Scalar s = Scalar(sin(i*8*phinc));
+ m_twiddles[i] = Complex(c, s*flip);
+ m_twiddles[nfft-i] = Complex(c, -s*flip);
}
+ for (;i*4<nfft;++i)
+ {
+ Scalar c = Scalar(cos((2*nfft-8*i)*phinc));
+ Scalar s = Scalar(sin((2*nfft-8*i)*phinc));
+ m_twiddles[i] = Complex(s, c*flip);
+ m_twiddles[nfft-i] = Complex(s, -c*flip);
+ }
+ for (;i*8<3*nfft;++i)
+ {
+ Scalar c = Scalar(cos((8*i-2*nfft)*phinc));
+ Scalar s = Scalar(sin((8*i-2*nfft)*phinc));
+ m_twiddles[i] = Complex(-s, c*flip);
+ m_twiddles[nfft-i] = Complex(-s, -c*flip);
+ }
+ for (;i*2<nfft;++i)
+ {
+ Scalar c = Scalar(cos((4*nfft-8*i)*phinc));
+ Scalar s = Scalar(sin((4*nfft-8*i)*phinc));
+ m_twiddles[i] = Complex(-c, s*flip);
+ m_twiddles[nfft-i] = Complex(-c, -s*flip);
+ }
+ }
void factorize(int nfft)
{
@@ -316,8 +347,8 @@ struct kissfft_impl
// use optimized mode for even real
fwd( dst, reinterpret_cast<const Complex*> (src), ncfft);
- Complex dc = dst[0].real() + dst[0].imag();
- Complex nyquist = dst[0].real() - dst[0].imag();
+ Complex dc(dst[0].real() + dst[0].imag());
+ Complex nyquist(dst[0].real() - dst[0].imag());
int k;
for ( k=1;k <= ncfft2 ; ++k ) {
Complex fpk = dst[k];
@@ -416,5 +447,3 @@ struct kissfft_impl
} // end namespace internal
} // end namespace Eigen
-
-/* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h b/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
index dc0093eb9..e7d70f39d 100644
--- a/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
+++ b/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
@@ -31,13 +31,13 @@
#ifndef EIGEN_CONSTRAINEDCG_H
#define EIGEN_CONSTRAINEDCG_H
-#include <Eigen/Core>
+#include "../../../../Eigen/Core"
namespace Eigen {
namespace internal {
-/** \ingroup IterativeSolvers_Module
+/** \ingroup IterativeLinearSolvers_Module
* Compute the pseudo inverse of the non-square matrix C such that
* \f$ CINV = (C * C^T)^{-1} * C \f$ based on a conjugate gradient method.
*
@@ -96,10 +96,10 @@ void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV)
-/** \ingroup IterativeSolvers_Module
+/** \ingroup IterativeLinearSolvers_Module
* Constrained conjugate gradient
*
- * Computes the minimum of \f$ 1/2((Ax).x) - bx \f$ under the contraint \f$ Cx \le f \f$
+ * Computes the minimum of \f$ 1/2((Ax).x) - bx \f$ under the constraint \f$ Cx \le f \f$
*/
template<typename TMatrix, typename CMatrix,
typename VectorX, typename VectorB, typename VectorF>
@@ -158,8 +158,6 @@ void constrained_cg(const TMatrix& A, const CMatrix& C, VectorX& x,
rho = r.dot(z);
if (iter.finished(rho)) break;
-
- if (iter.noiseLevel() > 0 && transition) std::cerr << "CCG: transition\n";
if (transition || iter.first()) gamma = 0.0;
else gamma = (std::max)(0.0, (rho - old_z.dot(z)) / rho_1);
p = z + gamma*p;
diff --git a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
index bae04fc30..5ae011b75 100644
--- a/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/DGMRES.h
@@ -10,7 +10,7 @@
#ifndef EIGEN_DGMRES_H
#define EIGEN_DGMRES_H
-#include <Eigen/Eigenvalues>
+#include "../../../../Eigen/Eigenvalues"
namespace Eigen {
@@ -39,7 +39,6 @@ template <typename VectorType, typename IndexType>
void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::Scalar& ncut)
{
eigen_assert(vec.size() == perm.size());
- typedef typename IndexType::Scalar Index;
bool flag;
for (Index k = 0; k < ncut; k++)
{
@@ -58,7 +57,7 @@ void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::
}
/**
- * \ingroup IterativeLInearSolvers_Module
+ * \ingroup IterativeLinearSolvers_Module
* \brief A Restarted GMRES with deflation.
* This class implements a modification of the GMRES solver for
* sparse linear systems. The basis is built with modified
@@ -89,7 +88,7 @@ void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::
* [1] D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid
* Algebraic Solvers for Linear Systems Arising from Compressible
* Flows, Computers and Fluids, In Press,
- * http://dx.doi.org/10.1016/j.compfluid.2012.03.023
+ * https://doi.org/10.1016/j.compfluid.2012.03.023
* [2] K. Burrage and J. Erhel, On the performance of various
* adaptive preconditioned GMRES strategies, 5(1998), 101-121.
* [3] J. Erhel, K. Burrage and B. Pohl, Restarted GMRES
@@ -110,9 +109,9 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
using Base::m_tolerance;
public:
using Base::_solve_impl;
+ using Base::_solve_with_guess_impl;
typedef _MatrixType MatrixType;
typedef typename MatrixType::Scalar Scalar;
- typedef typename MatrixType::Index Index;
typedef typename MatrixType::StorageIndex StorageIndex;
typedef typename MatrixType::RealScalar RealScalar;
typedef _Preconditioner Preconditioner;
@@ -143,44 +142,30 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
/** \internal */
template<typename Rhs,typename Dest>
- void _solve_with_guess_impl(const Rhs& b, Dest& x) const
- {
- bool failed = false;
- for(int j=0; j<b.cols(); ++j)
- {
- m_iterations = Base::maxIterations();
- m_error = Base::m_tolerance;
-
- typename Dest::ColXpr xj(x,j);
- dgmres(matrix(), b.col(j), xj, Base::m_preconditioner);
- }
- m_info = failed ? NumericalIssue
- : m_error <= Base::m_tolerance ? Success
- : NoConvergence;
- m_isInitialized = true;
- }
-
- /** \internal */
- template<typename Rhs,typename Dest>
- void _solve_impl(const Rhs& b, MatrixBase<Dest>& x) const
+ void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const
{
- x = b;
- _solve_with_guess_impl(b,x.derived());
+ EIGEN_STATIC_ASSERT(Rhs::ColsAtCompileTime==1 || Dest::ColsAtCompileTime==1, YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX);
+
+ m_iterations = Base::maxIterations();
+ m_error = Base::m_tolerance;
+
+ dgmres(matrix(), b, x, Base::m_preconditioner);
}
+
/**
* Get the restart value
*/
- int restart() { return m_restart; }
+ Index restart() { return m_restart; }
/**
* Set the restart value (default is 30)
*/
- void set_restart(const int restart) { m_restart=restart; }
+ void set_restart(const Index restart) { m_restart=restart; }
/**
* Set the number of eigenvalues to deflate at each restart
*/
- void setEigenv(const int neig)
+ void setEigenv(const Index neig)
{
m_neig = neig;
if (neig+1 > m_maxNeig) m_maxNeig = neig+1; // To allow for complex conjugates
@@ -189,12 +174,12 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
/**
* Get the size of the deflation subspace size
*/
- int deflSize() {return m_r; }
+ Index deflSize() {return m_r; }
/**
* Set the maximum size of the deflation subspace
*/
- void setMaxEigenv(const int maxNeig) { m_maxNeig = maxNeig; }
+ void setMaxEigenv(const Index maxNeig) { m_maxNeig = maxNeig; }
protected:
// DGMRES algorithm
@@ -202,27 +187,27 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
void dgmres(const MatrixType& mat,const Rhs& rhs, Dest& x, const Preconditioner& precond) const;
// Perform one cycle of GMRES
template<typename Dest>
- int dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, int& nbIts) const;
+ Index dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const;
// Compute data to use for deflation
- int dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const;
+ Index dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const;
// Apply deflation to a vector
template<typename RhsType, typename DestType>
- int dgmresApplyDeflation(const RhsType& In, DestType& Out) const;
+ Index dgmresApplyDeflation(const RhsType& In, DestType& Out) const;
ComplexVector schurValues(const ComplexSchur<DenseMatrix>& schurofH) const;
ComplexVector schurValues(const RealSchur<DenseMatrix>& schurofH) const;
// Init data for deflation
void dgmresInitDeflation(Index& rows) const;
mutable DenseMatrix m_V; // Krylov basis vectors
mutable DenseMatrix m_H; // Hessenberg matrix
- mutable DenseMatrix m_Hes; // Initial hessenberg matrix wihout Givens rotations applied
+ mutable DenseMatrix m_Hes; // Initial hessenberg matrix without Givens rotations applied
mutable Index m_restart; // Maximum size of the Krylov subspace
mutable DenseMatrix m_U; // Vectors that form the basis of the invariant subspace
mutable DenseMatrix m_MU; // matrix operator applied to m_U (for next cycles)
mutable DenseMatrix m_T; /* T=U^T*M^{-1}*A*U */
mutable PartialPivLU<DenseMatrix> m_luT; // LU factorization of m_T
mutable StorageIndex m_neig; //Number of eigenvalues to extract at each restart
- mutable int m_r; // Current number of deflated eigenvalues, size of m_U
- mutable int m_maxNeig; // Maximum number of eigenvalues to deflate
+ mutable Index m_r; // Current number of deflated eigenvalues, size of m_U
+ mutable Index m_maxNeig; // Maximum number of eigenvalues to deflate
mutable RealScalar m_lambdaN; //Modulus of the largest eigenvalue of A
mutable bool m_isDeflAllocated;
mutable bool m_isDeflInitialized;
@@ -243,18 +228,30 @@ template<typename Rhs, typename Dest>
void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rhs& rhs, Dest& x,
const Preconditioner& precond) const
{
+ const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
+
+ RealScalar normRhs = rhs.norm();
+ if(normRhs <= considerAsZero)
+ {
+ x.setZero();
+ m_error = 0;
+ return;
+ }
+
//Initialization
- int n = mat.rows();
+ m_isDeflInitialized = false;
+ Index n = mat.rows();
DenseVector r0(n);
- int nbIts = 0;
+ Index nbIts = 0;
m_H.resize(m_restart+1, m_restart);
m_Hes.resize(m_restart, m_restart);
m_V.resize(n,m_restart+1);
- //Initial residual vector and intial norm
- x = precond.solve(x);
+ //Initial residual vector and initial norm
+ if(x.squaredNorm()==0)
+ x = precond.solve(rhs);
r0 = rhs - mat * x;
RealScalar beta = r0.norm();
- RealScalar normRhs = rhs.norm();
+
m_error = beta/normRhs;
if(m_error < m_tolerance)
m_info = Success;
@@ -267,8 +264,10 @@ void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rh
dgmresCycle(mat, precond, x, r0, beta, normRhs, nbIts);
// Compute the new residual vector for the restart
- if (nbIts < m_iterations && m_info == NoConvergence)
- r0 = rhs - mat * x;
+ if (nbIts < m_iterations && m_info == NoConvergence) {
+ r0 = rhs - mat * x;
+ beta = r0.norm();
+ }
}
}
@@ -284,7 +283,7 @@ void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rh
*/
template< typename _MatrixType, typename _Preconditioner>
template<typename Dest>
-int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, int& nbIts) const
+Index DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const
{
//Initialization
DenseVector g(m_restart+1); // Right hand side of the least square problem
@@ -293,8 +292,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con
m_V.col(0) = r0/beta;
m_info = NoConvergence;
std::vector<JacobiRotation<Scalar> >gr(m_restart); // Givens rotations
- int it = 0; // Number of inner iterations
- int n = mat.rows();
+ Index it = 0; // Number of inner iterations
+ Index n = mat.rows();
DenseVector tv1(n), tv2(n); //Temporary vectors
while (m_info == NoConvergence && it < m_restart && nbIts < m_iterations)
{
@@ -312,7 +311,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con
// Orthogonalize it with the previous basis in the basis using modified Gram-Schmidt
Scalar coef;
- for (int i = 0; i <= it; ++i)
+ for (Index i = 0; i <= it; ++i)
{
coef = tv1.dot(m_V.col(i));
tv1 = tv1 - coef * m_V.col(i);
@@ -328,7 +327,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con
// FIXME Check for happy breakdown
// Update Hessenberg matrix with Givens rotations
- for (int i = 1; i <= it; ++i)
+ for (Index i = 1; i <= it; ++i)
{
m_H.col(it).applyOnTheLeft(i-1,i,gr[i-1].adjoint());
}
@@ -394,7 +393,6 @@ inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_Matr
template< typename _MatrixType, typename _Preconditioner>
inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_MatrixType, _Preconditioner>::schurValues(const RealSchur<DenseMatrix>& schurofH) const
{
- typedef typename MatrixType::Index Index;
const DenseMatrix& T = schurofH.matrixT();
Index it = T.rows();
ComplexVector eig(it);
@@ -418,7 +416,7 @@ inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_Matr
}
template< typename _MatrixType, typename _Preconditioner>
-int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const
+Index DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const
{
// First, find the Schur form of the Hessenberg matrix H
typename internal::conditional<NumTraits<Scalar>::IsComplex, ComplexSchur<DenseMatrix>, RealSchur<DenseMatrix> >::type schurofH;
@@ -433,8 +431,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
// Reorder the absolute values of Schur values
DenseRealVector modulEig(it);
- for (int j=0; j<it; ++j) modulEig(j) = std::abs(eig(j));
- perm.setLinSpaced(it,0,it-1);
+ for (Index j=0; j<it; ++j) modulEig(j) = std::abs(eig(j));
+ perm.setLinSpaced(it,0,internal::convert_index<StorageIndex>(it-1));
internal::sortWithPermutation(modulEig, perm, neig);
if (!m_lambdaN)
@@ -442,7 +440,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
m_lambdaN = (std::max)(modulEig.maxCoeff(), m_lambdaN);
}
//Count the real number of extracted eigenvalues (with complex conjugates)
- int nbrEig = 0;
+ Index nbrEig = 0;
while (nbrEig < neig)
{
if(eig(perm(it-nbrEig-1)).imag() == RealScalar(0)) nbrEig++;
@@ -451,7 +449,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
// Extract the Schur vectors corresponding to the smallest Ritz values
DenseMatrix Sr(it, nbrEig);
Sr.setZero();
- for (int j = 0; j < nbrEig; j++)
+ for (Index j = 0; j < nbrEig; j++)
{
Sr.col(j) = schurofH.matrixU().col(perm(it-j-1));
}
@@ -462,8 +460,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
if (m_r)
{
// Orthogonalize X against m_U using modified Gram-Schmidt
- for (int j = 0; j < nbrEig; j++)
- for (int k =0; k < m_r; k++)
+ for (Index j = 0; j < nbrEig; j++)
+ for (Index k =0; k < m_r; k++)
X.col(j) = X.col(j) - (m_U.col(k).dot(X.col(j)))*m_U.col(k);
}
@@ -473,7 +471,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
dgmresInitDeflation(m);
DenseMatrix MX(m, nbrEig);
DenseVector tv1(m);
- for (int j = 0; j < nbrEig; j++)
+ for (Index j = 0; j < nbrEig; j++)
{
tv1 = mat * X.col(j);
MX.col(j) = precond.solve(tv1);
@@ -488,8 +486,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
}
// Save X into m_U and m_MX in m_MU
- for (int j = 0; j < nbrEig; j++) m_U.col(m_r+j) = X.col(j);
- for (int j = 0; j < nbrEig; j++) m_MU.col(m_r+j) = MX.col(j);
+ for (Index j = 0; j < nbrEig; j++) m_U.col(m_r+j) = X.col(j);
+ for (Index j = 0; j < nbrEig; j++) m_MU.col(m_r+j) = MX.col(j);
// Increase the size of the invariant subspace
m_r += nbrEig;
@@ -502,7 +500,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
}
template<typename _MatrixType, typename _Preconditioner>
template<typename RhsType, typename DestType>
-int DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const
+Index DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const
{
DenseVector x1 = m_U.leftCols(m_r).transpose() * x;
y = x + m_U.leftCols(m_r) * ( m_lambdaN * m_luT.solve(x1) - x1);
diff --git a/unsupported/Eigen/src/IterativeSolvers/GMRES.h b/unsupported/Eigen/src/IterativeSolvers/GMRES.h
index 5a82b0df6..ff912094f 100644
--- a/unsupported/Eigen/src/IterativeSolvers/GMRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/GMRES.h
@@ -21,7 +21,7 @@ namespace internal {
*
* Parameters:
* \param mat matrix of linear system of equations
-* \param Rhs right hand side vector of linear system of equations
+* \param rhs right hand side vector of linear system of equations
* \param x on input: initial guess, on output: solution
* \param precond preconditioner used
* \param iters on input: maximum number of iterations to perform
@@ -64,6 +64,15 @@ bool gmres(const MatrixType & mat, const Rhs & rhs, Dest & x, const Precondition
typedef Matrix < Scalar, Dynamic, 1 > VectorType;
typedef Matrix < Scalar, Dynamic, Dynamic, ColMajor> FMatrixType;
+ const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
+
+ if(rhs.norm() <= considerAsZero)
+ {
+ x.setZero();
+ tol_error = 0;
+ return true;
+ }
+
RealScalar tol = tol_error;
const Index maxIters = iters;
iters = 0;
@@ -307,31 +316,14 @@ public:
/** \internal */
template<typename Rhs,typename Dest>
- void _solve_with_guess_impl(const Rhs& b, Dest& x) const
+ void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const
{
- bool failed = false;
- for(Index j=0; j<b.cols(); ++j)
- {
- m_iterations = Base::maxIterations();
- m_error = Base::m_tolerance;
-
- typename Dest::ColXpr xj(x,j);
- if(!internal::gmres(matrix(), b.col(j), xj, Base::m_preconditioner, m_iterations, m_restart, m_error))
- failed = true;
- }
- m_info = failed ? NumericalIssue
+ m_iterations = Base::maxIterations();
+ m_error = Base::m_tolerance;
+ bool ret = internal::gmres(matrix(), b, x, Base::m_preconditioner, m_iterations, m_restart, m_error);
+ m_info = (!ret) ? NumericalIssue
: m_error <= Base::m_tolerance ? Success
: NoConvergence;
- m_isInitialized = true;
- }
-
- /** \internal */
- template<typename Rhs,typename Dest>
- void _solve_impl(const Rhs& b, MatrixBase<Dest> &x) const
- {
- x = b;
- if(x.squaredNorm() == 0) return; // Check Zero right hand side
- _solve_with_guess_impl(b,x.derived());
}
protected:
diff --git a/unsupported/Eigen/src/IterativeSolvers/IDRS.h b/unsupported/Eigen/src/IterativeSolvers/IDRS.h
new file mode 100755
index 000000000..90d20fad4
--- /dev/null
+++ b/unsupported/Eigen/src/IterativeSolvers/IDRS.h
@@ -0,0 +1,436 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2020 Chris Schoutrop <c.e.m.schoutrop@tue.nl>
+// Copyright (C) 2020 Jens Wehner <j.wehner@esciencecenter.nl>
+// Copyright (C) 2020 Jan van Dijk <j.v.dijk@tue.nl>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_IDRS_H
+#define EIGEN_IDRS_H
+
+namespace Eigen
+{
+
+ namespace internal
+ {
+ /** \internal Low-level Induced Dimension Reduction algoritm
+ \param A The matrix A
+ \param b The right hand side vector b
+ \param x On input and initial solution, on output the computed solution.
+ \param precond A preconditioner being able to efficiently solve for an
+ approximation of Ax=b (regardless of b)
+ \param iter On input the max number of iteration, on output the number of performed iterations.
+ \param relres On input the tolerance error, on output an estimation of the relative error.
+ \param S On input Number of the dimension of the shadow space.
+ \param smoothing switches residual smoothing on.
+ \param angle small omega lead to faster convergence at the expense of numerical stability
+ \param replacement switches on a residual replacement strategy to increase accuracy of residual at the expense of more Mat*vec products
+ \return false in the case of numerical issue, for example a break down of IDRS.
+ */
+ template<typename Vector, typename RealScalar>
+ typename Vector::Scalar omega(const Vector& t, const Vector& s, RealScalar angle)
+ {
+ using numext::abs;
+ typedef typename Vector::Scalar Scalar;
+ const RealScalar ns = s.norm();
+ const RealScalar nt = t.norm();
+ const Scalar ts = t.dot(s);
+ const RealScalar rho = abs(ts / (nt * ns));
+
+ if (rho < angle) {
+ if (ts == Scalar(0)) {
+ return Scalar(0);
+ }
+ // Original relation for om is given by
+ // om = om * angle / rho;
+ // To alleviate potential (near) division by zero this can be rewritten as
+ // om = angle * (ns / nt) * (ts / abs(ts)) = angle * (ns / nt) * sgn(ts)
+ return angle * (ns / nt) * (ts / abs(ts));
+ }
+ return ts / (nt * nt);
+ }
+
+ template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
+ bool idrs(const MatrixType& A, const Rhs& b, Dest& x, const Preconditioner& precond,
+ Index& iter,
+ typename Dest::RealScalar& relres, Index S, bool smoothing, typename Dest::RealScalar angle, bool replacement)
+ {
+ typedef typename Dest::RealScalar RealScalar;
+ typedef typename Dest::Scalar Scalar;
+ typedef Matrix<Scalar, Dynamic, 1> VectorType;
+ typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> DenseMatrixType;
+ const Index N = b.size();
+ S = S < x.rows() ? S : x.rows();
+ const RealScalar tol = relres;
+ const Index maxit = iter;
+
+ Index replacements = 0;
+ bool trueres = false;
+
+ FullPivLU<DenseMatrixType> lu_solver;
+
+ DenseMatrixType P;
+ {
+ HouseholderQR<DenseMatrixType> qr(DenseMatrixType::Random(N, S));
+ P = (qr.householderQ() * DenseMatrixType::Identity(N, S));
+ }
+
+ const RealScalar normb = b.norm();
+
+ if (internal::isApprox(normb, RealScalar(0)))
+ {
+ //Solution is the zero vector
+ x.setZero();
+ iter = 0;
+ relres = 0;
+ return true;
+ }
+ // from http://homepage.tudelft.nl/1w5b5/IDRS/manual.pdf
+ // A peak in the residual is considered dangerously high if‖ri‖/‖b‖> C(tol/epsilon).
+ // With epsilon the
+ // relative machine precision. The factor tol/epsilon corresponds to the size of a
+ // finite precision number that is so large that the absolute round-off error in
+ // this number, when propagated through the process, makes it impossible to
+ // achieve the required accuracy.The factor C accounts for the accumulation of
+ // round-off errors. This parameter has beenset to 10−3.
+ // mp is epsilon/C
+ // 10^3 * eps is very conservative, so normally no residual replacements will take place.
+ // It only happens if things go very wrong. Too many restarts may ruin the convergence.
+ const RealScalar mp = RealScalar(1e3) * NumTraits<Scalar>::epsilon();
+
+
+
+ //Compute initial residual
+ const RealScalar tolb = tol * normb; //Relative tolerance
+ VectorType r = b - A * x;
+
+ VectorType x_s, r_s;
+
+ if (smoothing)
+ {
+ x_s = x;
+ r_s = r;
+ }
+
+ RealScalar normr = r.norm();
+
+ if (normr <= tolb)
+ {
+ //Initial guess is a good enough solution
+ iter = 0;
+ relres = normr / normb;
+ return true;
+ }
+
+ DenseMatrixType G = DenseMatrixType::Zero(N, S);
+ DenseMatrixType U = DenseMatrixType::Zero(N, S);
+ DenseMatrixType M = DenseMatrixType::Identity(S, S);
+ VectorType t(N), v(N);
+ Scalar om = 1.;
+
+ //Main iteration loop, guild G-spaces:
+ iter = 0;
+
+ while (normr > tolb && iter < maxit)
+ {
+ //New right hand size for small system:
+ VectorType f = (r.adjoint() * P).adjoint();
+
+ for (Index k = 0; k < S; ++k)
+ {
+ //Solve small system and make v orthogonal to P:
+ //c = M(k:s,k:s)\f(k:s);
+ lu_solver.compute(M.block(k , k , S -k, S - k ));
+ VectorType c = lu_solver.solve(f.segment(k , S - k ));
+ //v = r - G(:,k:s)*c;
+ v = r - G.rightCols(S - k ) * c;
+ //Preconditioning
+ v = precond.solve(v);
+
+ //Compute new U(:,k) and G(:,k), G(:,k) is in space G_j
+ U.col(k) = U.rightCols(S - k ) * c + om * v;
+ G.col(k) = A * U.col(k );
+
+ //Bi-Orthogonalise the new basis vectors:
+ for (Index i = 0; i < k-1 ; ++i)
+ {
+ //alpha = ( P(:,i)'*G(:,k) )/M(i,i);
+ Scalar alpha = P.col(i ).dot(G.col(k )) / M(i, i );
+ G.col(k ) = G.col(k ) - alpha * G.col(i );
+ U.col(k ) = U.col(k ) - alpha * U.col(i );
+ }
+
+ //New column of M = P'*G (first k-1 entries are zero)
+ //M(k:s,k) = (G(:,k)'*P(:,k:s))';
+ M.block(k , k , S - k , 1) = (G.col(k ).adjoint() * P.rightCols(S - k )).adjoint();
+
+ if (internal::isApprox(M(k,k), Scalar(0)))
+ {
+ return false;
+ }
+
+ //Make r orthogonal to q_i, i = 0..k-1
+ Scalar beta = f(k ) / M(k , k );
+ r = r - beta * G.col(k );
+ x = x + beta * U.col(k );
+ normr = r.norm();
+
+ if (replacement && normr > tolb / mp)
+ {
+ trueres = true;
+ }
+
+ //Smoothing:
+ if (smoothing)
+ {
+ t = r_s - r;
+ //gamma is a Scalar, but the conversion is not allowed
+ Scalar gamma = t.dot(r_s) / t.norm();
+ r_s = r_s - gamma * t;
+ x_s = x_s - gamma * (x_s - x);
+ normr = r_s.norm();
+ }
+
+ if (normr < tolb || iter == maxit)
+ {
+ break;
+ }
+
+ //New f = P'*r (first k components are zero)
+ if (k < S-1)
+ {
+ f.segment(k + 1, S - (k + 1) ) = f.segment(k + 1 , S - (k + 1)) - beta * M.block(k + 1 , k , S - (k + 1), 1);
+ }
+ }//end for
+
+ if (normr < tolb || iter == maxit)
+ {
+ break;
+ }
+
+ //Now we have sufficient vectors in G_j to compute residual in G_j+1
+ //Note: r is already perpendicular to P so v = r
+ //Preconditioning
+ v = r;
+ v = precond.solve(v);
+
+ //Matrix-vector multiplication:
+ t = A * v;
+
+ //Computation of a new omega
+ om = internal::omega(t, r, angle);
+
+ if (om == RealScalar(0.0))
+ {
+ return false;
+ }
+
+ r = r - om * t;
+ x = x + om * v;
+ normr = r.norm();
+
+ if (replacement && normr > tolb / mp)
+ {
+ trueres = true;
+ }
+
+ //Residual replacement?
+ if (trueres && normr < normb)
+ {
+ r = b - A * x;
+ trueres = false;
+ replacements++;
+ }
+
+ //Smoothing:
+ if (smoothing)
+ {
+ t = r_s - r;
+ Scalar gamma = t.dot(r_s) /t.norm();
+ r_s = r_s - gamma * t;
+ x_s = x_s - gamma * (x_s - x);
+ normr = r_s.norm();
+ }
+
+ iter++;
+
+ }//end while
+
+ if (smoothing)
+ {
+ x = x_s;
+ }
+ relres=normr/normb;
+ return true;
+ }
+
+ } // namespace internal
+
+ template <typename _MatrixType, typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar> >
+ class IDRS;
+
+ namespace internal
+ {
+
+ template <typename _MatrixType, typename _Preconditioner>
+ struct traits<Eigen::IDRS<_MatrixType, _Preconditioner> >
+ {
+ typedef _MatrixType MatrixType;
+ typedef _Preconditioner Preconditioner;
+ };
+
+ } // namespace internal
+
+
+/** \ingroup IterativeLinearSolvers_Module
+ * \brief The Induced Dimension Reduction method (IDR(s)) is a short-recurrences Krylov method for sparse square problems.
+ *
+ * This class allows to solve for A.x = b sparse linear problems. The vectors x and b can be either dense or sparse.
+ * he Induced Dimension Reduction method, IDR(), is a robust and efficient short-recurrence Krylov subspace method for
+ * solving large nonsymmetric systems of linear equations.
+ *
+ * For indefinite systems IDR(S) outperforms both BiCGStab and BiCGStab(L). Additionally, IDR(S) can handle matrices
+ * with complex eigenvalues more efficiently than BiCGStab.
+ *
+ * Many problems that do not converge for BiCGSTAB converge for IDR(s) (for larger values of s). And if both methods
+ * converge the convergence for IDR(s) is typically much faster for difficult systems (for example indefinite problems).
+ *
+ * IDR(s) is a limited memory finite termination method. In exact arithmetic it converges in at most N+N/s iterations,
+ * with N the system size. It uses a fixed number of 4+3s vector. In comparison, BiCGSTAB terminates in 2N iterations
+ * and uses 7 vectors. GMRES terminates in at most N iterations, and uses I+3 vectors, with I the number of iterations.
+ * Restarting GMRES limits the memory consumption, but destroys the finite termination property.
+ *
+ * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.
+ * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
+ *
+ * \implsparsesolverconcept
+ *
+ * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
+ * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
+ * and NumTraits<Scalar>::epsilon() for the tolerance.
+ *
+ * The tolerance corresponds to the relative residual error: |Ax-b|/|b|
+ *
+ * \b Performance: when using sparse matrices, best performance is achied for a row-major sparse matrix format.
+ * Moreover, in this case multi-threading can be exploited if the user code is compiled with OpenMP enabled.
+ * See \ref TopicMultiThreading for details.
+ *
+ * By default the iterations start with x=0 as an initial guess of the solution.
+ * One can control the start using the solveWithGuess() method.
+ *
+ * IDR(s) can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink.
+ *
+ * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
+ */
+ template <typename _MatrixType, typename _Preconditioner>
+ class IDRS : public IterativeSolverBase<IDRS<_MatrixType, _Preconditioner> >
+ {
+
+ public:
+ typedef _MatrixType MatrixType;
+ typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::RealScalar RealScalar;
+ typedef _Preconditioner Preconditioner;
+
+ private:
+ typedef IterativeSolverBase<IDRS> Base;
+ using Base::m_error;
+ using Base::m_info;
+ using Base::m_isInitialized;
+ using Base::m_iterations;
+ using Base::matrix;
+ Index m_S;
+ bool m_smoothing;
+ RealScalar m_angle;
+ bool m_residual;
+
+ public:
+ /** Default constructor. */
+ IDRS(): m_S(4), m_smoothing(false), m_angle(RealScalar(0.7)), m_residual(false) {}
+
+ /** Initialize the solver with matrix \a A for further \c Ax=b solving.
+
+ This constructor is a shortcut for the default constructor followed
+ by a call to compute().
+
+ \warning this class stores a reference to the matrix A as well as some
+ precomputed values that depend on it. Therefore, if \a A is changed
+ this class becomes invalid. Call compute() to update it with the new
+ matrix A, or modify a copy of A.
+ */
+ template <typename MatrixDerived>
+ explicit IDRS(const EigenBase<MatrixDerived>& A) : Base(A.derived()), m_S(4), m_smoothing(false),
+ m_angle(RealScalar(0.7)), m_residual(false) {}
+
+
+ /** \internal */
+ /** Loops over the number of columns of b and does the following:
+ 1. sets the tolerence and maxIterations
+ 2. Calls the function that has the core solver routine
+ */
+ template <typename Rhs, typename Dest>
+ void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const
+ {
+ m_iterations = Base::maxIterations();
+ m_error = Base::m_tolerance;
+
+ bool ret = internal::idrs(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_S,m_smoothing,m_angle,m_residual);
+
+ m_info = (!ret) ? NumericalIssue : m_error <= Base::m_tolerance ? Success : NoConvergence;
+ }
+
+ /** Sets the parameter S, indicating the dimension of the shadow space. Default is 4*/
+ void setS(Index S)
+ {
+ if (S < 1)
+ {
+ S = 4;
+ }
+
+ m_S = S;
+ }
+
+ /** Switches off and on smoothing.
+ Residual smoothing results in monotonically decreasing residual norms at
+ the expense of two extra vectors of storage and a few extra vector
+ operations. Although monotonic decrease of the residual norms is a
+ desirable property, the rate of convergence of the unsmoothed process and
+ the smoothed process is basically the same. Default is off */
+ void setSmoothing(bool smoothing)
+ {
+ m_smoothing=smoothing;
+ }
+
+ /** The angle must be a real scalar. In IDR(s), a value for the
+ iteration parameter omega must be chosen in every s+1th step. The most
+ natural choice is to select a value to minimize the norm of the next residual.
+ This corresponds to the parameter omega = 0. In practice, this may lead to
+ values of omega that are so small that the other iteration parameters
+ cannot be computed with sufficient accuracy. In such cases it is better to
+ increase the value of omega sufficiently such that a compromise is reached
+ between accurate computations and reduction of the residual norm. The
+ parameter angle =0.7 (”maintaining the convergence strategy”)
+ results in such a compromise. */
+ void setAngle(RealScalar angle)
+ {
+ m_angle=angle;
+ }
+
+ /** The parameter replace is a logical that determines whether a
+ residual replacement strategy is employed to increase the accuracy of the
+ solution. */
+ void setResidualUpdate(bool update)
+ {
+ m_residual=update;
+ }
+
+ };
+
+} // namespace Eigen
+
+#endif /* EIGEN_IDRS_H */
diff --git a/unsupported/Eigen/src/IterativeSolvers/IterationController.h b/unsupported/Eigen/src/IterativeSolvers/IterationController.h
index c9c1a4be2..a116e09e2 100644
--- a/unsupported/Eigen/src/IterativeSolvers/IterationController.h
+++ b/unsupported/Eigen/src/IterativeSolvers/IterationController.h
@@ -60,7 +60,7 @@
namespace Eigen {
-/** \ingroup IterativeSolvers_Module
+/** \ingroup IterativeLinearSolvers_Module
* \class IterationController
*
* \brief Controls the iterations of the iterative solvers
diff --git a/unsupported/Eigen/src/IterativeSolvers/MINRES.h b/unsupported/Eigen/src/IterativeSolvers/MINRES.h
index 256990c1a..5db454d24 100644
--- a/unsupported/Eigen/src/IterativeSolvers/MINRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/MINRES.h
@@ -3,6 +3,7 @@
//
// Copyright (C) 2012 Giacomo Po <gpo@ucla.edu>
// Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2018 David Hyde <dabh@stanford.edu>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -64,8 +65,6 @@ namespace Eigen {
eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
RealScalar beta_new(sqrt(beta_new2));
const RealScalar beta_one(beta_new);
- v_new /= beta_new;
- w_new /= beta_new;
// Initialize other variables
RealScalar c(1.0); // the cosine of the Givens rotation
RealScalar c_old(1.0);
@@ -83,18 +82,18 @@ namespace Eigen {
/* Note that there are 4 variants on the Lanczos algorithm. These are
* described in Paige, C. C. (1972). Computational variants of
* the Lanczos method for the eigenproblem. IMA Journal of Applied
- * Mathematics, 10(3), 373–381. The current implementation corresponds
+ * Mathematics, 10(3), 373-381. The current implementation corresponds
* to the case A(2,7) in the paper. It also corresponds to
- * algorithm 6.14 in Y. Saad, Iterative Methods for Sparse Linear
+ * algorithm 6.14 in Y. Saad, Iterative Methods for Sparse Linear
* Systems, 2003 p.173. For the preconditioned version see
* A. Greenbaum, Iterative Methods for Solving Linear Systems, SIAM (1987).
*/
const RealScalar beta(beta_new);
v_old = v; // update: at first time step, this makes v_old = 0 so value of beta doesn't matter
-// const VectorType v_old(v); // NOT SURE IF CREATING v_old EVERY ITERATION IS EFFICIENT
+ v_new /= beta_new; // overwrite v_new for next iteration
+ w_new /= beta_new; // overwrite w_new for next iteration
v = v_new; // update
w = w_new; // update
-// const VectorType w(w_new); // NOT SURE IF CREATING w EVERY ITERATION IS EFFICIENT
v_new.noalias() = mat*w - beta*v_old; // compute v_new
const RealScalar alpha = v_new.dot(w);
v_new -= alpha*v; // overwrite v_new
@@ -102,8 +101,6 @@ namespace Eigen {
beta_new2 = v_new.dot(w_new); // compute beta_new
eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
beta_new = sqrt(beta_new2); // compute beta_new
- v_new /= beta_new; // overwrite v_new for next iteration
- w_new /= beta_new; // overwrite w_new for next iteration
// Givens rotation
const RealScalar r2 =s*alpha+c*c_old*beta; // s, s_old, c and c_old are still from previous iteration
@@ -117,7 +114,6 @@ namespace Eigen {
// Update solution
p_oold = p_old;
-// const VectorType p_oold(p_old); // NOT SURE IF CREATING p_oold EVERY ITERATION IS EFFICIENT
p_old = p;
p.noalias()=(w-r2*p_old-r3*p_oold) /r1; // IS NOALIAS REQUIRED?
x += beta_one*c*eta*p;
@@ -237,7 +233,7 @@ namespace Eigen {
/** \internal */
template<typename Rhs,typename Dest>
- void _solve_with_guess_impl(const Rhs& b, Dest& x) const
+ void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const
{
typedef typename Base::MatrixWrapper MatrixWrapper;
typedef typename Base::ActualMatrixType ActualMatrixType;
@@ -257,28 +253,11 @@ namespace Eigen {
m_iterations = Base::maxIterations();
m_error = Base::m_tolerance;
RowMajorWrapper row_mat(matrix());
- for(int j=0; j<b.cols(); ++j)
- {
- m_iterations = Base::maxIterations();
- m_error = Base::m_tolerance;
-
- typename Dest::ColXpr xj(x,j);
- internal::minres(SelfAdjointWrapper(row_mat), b.col(j), xj,
- Base::m_preconditioner, m_iterations, m_error);
- }
-
- m_isInitialized = true;
+ internal::minres(SelfAdjointWrapper(row_mat), b, x,
+ Base::m_preconditioner, m_iterations, m_error);
m_info = m_error <= Base::m_tolerance ? Success : NoConvergence;
}
- /** \internal */
- template<typename Rhs,typename Dest>
- void _solve_impl(const Rhs& b, MatrixBase<Dest> &x) const
- {
- x.setZero();
- _solve_with_guess_impl(b,x.derived());
- }
-
protected:
};
@@ -286,4 +265,3 @@ namespace Eigen {
} // end namespace Eigen
#endif // EIGEN_MINRES_H
-
diff --git a/unsupported/Eigen/src/IterativeSolvers/Scaling.h b/unsupported/Eigen/src/IterativeSolvers/Scaling.h
index d113e6e90..9b3eb53e0 100644
--- a/unsupported/Eigen/src/IterativeSolvers/Scaling.h
+++ b/unsupported/Eigen/src/IterativeSolvers/Scaling.h
@@ -104,12 +104,18 @@ class IterScaling
for (int i = 0; i < m; ++i)
{
Dr(i) = std::sqrt(Dr(i));
+ }
+ for (int i = 0; i < n; ++i)
+ {
Dc(i) = std::sqrt(Dc(i));
}
// Save the scaling factors
for (int i = 0; i < m; ++i)
{
m_left(i) /= Dr(i);
+ }
+ for (int i = 0; i < n; ++i)
+ {
m_right(i) /= Dc(i);
}
// Scale the rows and the columns of the matrix
diff --git a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
index 582fa8512..6a9b0be88 100644
--- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
+++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
@@ -235,10 +235,10 @@ struct traits<KroneckerProductSparse<_Lhs,_Rhs> >
MaxRowsAtCompileTime = size_at_compile_time<traits<Lhs>::MaxRowsAtCompileTime, traits<Rhs>::MaxRowsAtCompileTime>::ret,
MaxColsAtCompileTime = size_at_compile_time<traits<Lhs>::MaxColsAtCompileTime, traits<Rhs>::MaxColsAtCompileTime>::ret,
- EvalToRowMajor = (LhsFlags & RhsFlags & RowMajorBit),
+ EvalToRowMajor = (int(LhsFlags) & int(RhsFlags) & RowMajorBit),
RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit),
- Flags = ((LhsFlags | RhsFlags) & HereditaryBits & RemovedBits)
+ Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & RemovedBits)
| EvalBeforeNestingBit,
CoeffReadCost = HugeCost
};
diff --git a/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h b/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h
index ae9d793b1..123485817 100644
--- a/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h
+++ b/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h
@@ -73,7 +73,7 @@ void lmqrsolv(
qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj;
wa[k] = temp;
- /* accumulate the tranformation in the row of s. */
+ /* accumulate the transformation in the row of s. */
for (i = k+1; i<n; ++i) {
temp = givens.c() * s(i,k) + givens.s() * sdiag[i];
sdiag[i] = -givens.s() * s(i,k) + givens.c() * sdiag[i];
diff --git a/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h b/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h
index 995427978..62561da1d 100644
--- a/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h
+++ b/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h
@@ -117,7 +117,7 @@ class LevenbergMarquardt : internal::no_assignment_operator
typedef typename JacobianType::RealScalar RealScalar;
typedef typename QRSolver::StorageIndex PermIndex;
typedef Matrix<Scalar,Dynamic,1> FVectorType;
- typedef PermutationMatrix<Dynamic,Dynamic> PermutationType;
+ typedef PermutationMatrix<Dynamic,Dynamic,int> PermutationType;
public:
LevenbergMarquardt(FunctorType& functor)
: m_functor(functor),m_nfev(0),m_njev(0),m_fnorm(0.0),m_gnorm(0),
@@ -233,9 +233,9 @@ class LevenbergMarquardt : internal::no_assignment_operator
/**
* \brief Reports whether the minimization was successful
- * \returns \c Success if the minimization was succesful,
+ * \returns \c Success if the minimization was successful,
* \c NumericalIssue if a numerical problem arises during the
- * minimization process, for exemple during the QR factorization
+ * minimization process, for example during the QR factorization
* \c NoConvergence if the minimization did not converge after
* the maximum number of function evaluation allowed
* \c InvalidInput if the input matrix is invalid
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
index bb6d9e1fe..02284b0dd 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
@@ -234,12 +234,13 @@ struct matrix_exp_computeUV<MatrixType, float>
template <typename MatrixType>
struct matrix_exp_computeUV<MatrixType, double>
{
+ typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
template <typename ArgType>
static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings)
{
using std::frexp;
using std::pow;
- const double l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
+ const RealScalar l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
squarings = 0;
if (l1norm < 1.495585217958292e-002) {
matrix_exp_pade3(arg, U, V);
@@ -250,10 +251,10 @@ struct matrix_exp_computeUV<MatrixType, double>
} else if (l1norm < 2.097847961257068e+000) {
matrix_exp_pade9(arg, U, V);
} else {
- const double maxnorm = 5.371920351148152;
+ const RealScalar maxnorm = 5.371920351148152;
frexp(l1norm / maxnorm, &squarings);
if (squarings < 0) squarings = 0;
- MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<double>(squarings));
+ MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<RealScalar>(squarings));
matrix_exp_pade13(A, U, V);
}
}
@@ -313,7 +314,7 @@ struct matrix_exp_computeUV<MatrixType, long double>
matrix_exp_pade17(A, U, V);
}
-#elif LDBL_MANT_DIG <= 112 // quadruple precison
+#elif LDBL_MANT_DIG <= 113 // quadruple precision
if (l1norm < 1.639394610288918690547467954466970e-005L) {
matrix_exp_pade3(arg, U, V);
@@ -326,6 +327,7 @@ struct matrix_exp_computeUV<MatrixType, long double>
} else if (l1norm < 1.125358383453143065081397882891878e+000L) {
matrix_exp_pade13(arg, U, V);
} else {
+ const long double maxnorm = 2.884233277829519311757165057717815L;
frexp(l1norm / maxnorm, &squarings);
if (squarings < 0) squarings = 0;
MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
@@ -342,6 +344,27 @@ struct matrix_exp_computeUV<MatrixType, long double>
}
};
+template<typename T> struct is_exp_known_type : false_type {};
+template<> struct is_exp_known_type<float> : true_type {};
+template<> struct is_exp_known_type<double> : true_type {};
+#if LDBL_MANT_DIG <= 113
+template<> struct is_exp_known_type<long double> : true_type {};
+#endif
+
+template <typename ArgType, typename ResultType>
+void matrix_exp_compute(const ArgType& arg, ResultType &result, true_type) // natively supported scalar type
+{
+ typedef typename ArgType::PlainObject MatrixType;
+ MatrixType U, V;
+ int squarings;
+ matrix_exp_computeUV<MatrixType>::run(arg, U, V, squarings); // Pade approximant is (U+V) / (-U+V)
+ MatrixType numer = U + V;
+ MatrixType denom = -U + V;
+ result = denom.partialPivLu().solve(numer);
+ for (int i=0; i<squarings; i++)
+ result *= result; // undo scaling by repeated squaring
+}
+
/* Computes the matrix exponential
*
@@ -349,26 +372,13 @@ struct matrix_exp_computeUV<MatrixType, long double>
* \param result variable in which result will be stored
*/
template <typename ArgType, typename ResultType>
-void matrix_exp_compute(const ArgType& arg, ResultType &result)
+void matrix_exp_compute(const ArgType& arg, ResultType &result, false_type) // default
{
typedef typename ArgType::PlainObject MatrixType;
-#if LDBL_MANT_DIG > 112 // rarely happens
typedef typename traits<MatrixType>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename std::complex<RealScalar> ComplexScalar;
- if (sizeof(RealScalar) > 14) {
- result = arg.matrixFunction(internal::stem_function_exp<ComplexScalar>);
- return;
- }
-#endif
- MatrixType U, V;
- int squarings;
- matrix_exp_computeUV<MatrixType>::run(arg, U, V, squarings); // Pade approximant is (U+V) / (-U+V)
- MatrixType numer = U + V;
- MatrixType denom = -U + V;
- result = denom.partialPivLu().solve(numer);
- for (int i=0; i<squarings; i++)
- result *= result; // undo scaling by repeated squaring
+ result = arg.matrixFunction(internal::stem_function_exp<ComplexScalar>);
}
} // end namespace Eigen::internal
@@ -386,7 +396,6 @@ void matrix_exp_compute(const ArgType& arg, ResultType &result)
template<typename Derived> struct MatrixExponentialReturnValue
: public ReturnByValue<MatrixExponentialReturnValue<Derived> >
{
- typedef typename Derived::Index Index;
public:
/** \brief Constructor.
*
@@ -402,7 +411,7 @@ template<typename Derived> struct MatrixExponentialReturnValue
inline void evalTo(ResultType& result) const
{
const typename internal::nested_eval<Derived, 10>::type tmp(m_src);
- internal::matrix_exp_compute(tmp, result);
+ internal::matrix_exp_compute(tmp, result, internal::is_exp_known_type<typename Derived::RealScalar>());
}
Index rows() const { return m_src.rows(); }
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
index 3f7d77710..cc12ab62b 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -7,8 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_MATRIX_FUNCTION
-#define EIGEN_MATRIX_FUNCTION
+#ifndef EIGEN_MATRIX_FUNCTION_H
+#define EIGEN_MATRIX_FUNCTION_H
#include "StemFunction.h"
@@ -53,7 +53,7 @@ template <typename MatrixType>
typename NumTraits<typename MatrixType::Scalar>::Real matrix_function_compute_mu(const MatrixType& A)
{
typedef typename plain_col_type<MatrixType>::type VectorType;
- typename MatrixType::Index rows = A.rows();
+ Index rows = A.rows();
const MatrixType N = MatrixType::Identity(rows, rows) - A;
VectorType e = VectorType::Ones(rows);
N.template triangularView<Upper>().solveInPlace(e);
@@ -65,7 +65,6 @@ MatrixType MatrixFunctionAtomic<MatrixType>::compute(const MatrixType& A)
{
// TODO: Use that A is upper triangular
typedef typename NumTraits<Scalar>::Real RealScalar;
- typedef typename MatrixType::Index Index;
Index rows = A.rows();
Scalar avgEival = A.trace() / Scalar(RealScalar(rows));
MatrixType Ashifted = A - avgEival * MatrixType::Identity(rows, rows);
@@ -73,10 +72,10 @@ MatrixType MatrixFunctionAtomic<MatrixType>::compute(const MatrixType& A)
MatrixType F = m_f(avgEival, 0) * MatrixType::Identity(rows, rows);
MatrixType P = Ashifted;
MatrixType Fincr;
- for (Index s = 1; s < 1.1 * rows + 10; s++) { // upper limit is fairly arbitrary
+ for (Index s = 1; double(s) < 1.1 * double(rows) + 10.0; s++) { // upper limit is fairly arbitrary
Fincr = m_f(avgEival, static_cast<int>(s)) * P;
F += Fincr;
- P = Scalar(RealScalar(1.0/(s + 1))) * P * Ashifted;
+ P = Scalar(RealScalar(1)/RealScalar(s + 1)) * P * Ashifted;
// test whether Taylor series converged
const RealScalar F_norm = F.cwiseAbs().rowwise().sum().maxCoeff();
@@ -131,7 +130,6 @@ typename ListOfClusters::iterator matrix_function_find_cluster(Index key, ListOf
template <typename EivalsType, typename Cluster>
void matrix_function_partition_eigenvalues(const EivalsType& eivals, std::list<Cluster>& clusters)
{
- typedef typename EivalsType::Index Index;
typedef typename EivalsType::RealScalar RealScalar;
for (Index i=0; i<eivals.rows(); ++i) {
// Find cluster containing i-th ei'val, adding a new cluster if necessary
@@ -179,7 +177,7 @@ void matrix_function_compute_block_start(const VectorType& clusterSize, VectorTy
{
blockStart.resize(clusterSize.rows());
blockStart(0) = 0;
- for (typename VectorType::Index i = 1; i < clusterSize.rows(); i++) {
+ for (Index i = 1; i < clusterSize.rows(); i++) {
blockStart(i) = blockStart(i-1) + clusterSize(i-1);
}
}
@@ -188,7 +186,6 @@ void matrix_function_compute_block_start(const VectorType& clusterSize, VectorTy
template <typename EivalsType, typename ListOfClusters, typename VectorType>
void matrix_function_compute_map(const EivalsType& eivals, const ListOfClusters& clusters, VectorType& eivalToCluster)
{
- typedef typename EivalsType::Index Index;
eivalToCluster.resize(eivals.rows());
Index clusterIndex = 0;
for (typename ListOfClusters::const_iterator cluster = clusters.begin(); cluster != clusters.end(); ++cluster) {
@@ -205,7 +202,6 @@ void matrix_function_compute_map(const EivalsType& eivals, const ListOfClusters&
template <typename DynVectorType, typename VectorType>
void matrix_function_compute_permutation(const DynVectorType& blockStart, const DynVectorType& eivalToCluster, VectorType& permutation)
{
- typedef typename VectorType::Index Index;
DynVectorType indexNextEntry = blockStart;
permutation.resize(eivalToCluster.rows());
for (Index i = 0; i < eivalToCluster.rows(); i++) {
@@ -219,7 +215,6 @@ void matrix_function_compute_permutation(const DynVectorType& blockStart, const
template <typename VectorType, typename MatrixType>
void matrix_function_permute_schur(VectorType& permutation, MatrixType& U, MatrixType& T)
{
- typedef typename VectorType::Index Index;
for (Index i = 0; i < permutation.rows() - 1; i++) {
Index j;
for (j = i; j < permutation.rows(); j++) {
@@ -247,7 +242,7 @@ template <typename MatrixType, typename AtomicType, typename VectorType>
void matrix_function_compute_block_atomic(const MatrixType& T, AtomicType& atomic, const VectorType& blockStart, const VectorType& clusterSize, MatrixType& fT)
{
fT.setZero(T.rows(), T.cols());
- for (typename VectorType::Index i = 0; i < clusterSize.rows(); ++i) {
+ for (Index i = 0; i < clusterSize.rows(); ++i) {
fT.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i))
= atomic.compute(T.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i)));
}
@@ -285,7 +280,6 @@ MatrixType matrix_function_solve_triangular_sylvester(const MatrixType& A, const
eigen_assert(C.rows() == A.rows());
eigen_assert(C.cols() == B.rows());
- typedef typename MatrixType::Index Index;
typedef typename MatrixType::Scalar Scalar;
Index m = A.rows();
@@ -330,11 +324,8 @@ void matrix_function_compute_above_diagonal(const MatrixType& T, const VectorTyp
{
typedef internal::traits<MatrixType> Traits;
typedef typename MatrixType::Scalar Scalar;
- typedef typename MatrixType::Index Index;
- static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
- static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
static const int Options = MatrixType::Options;
- typedef Matrix<Scalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
+ typedef Matrix<Scalar, Dynamic, Dynamic, Options, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime> DynMatrixType;
for (Index k = 1; k < clusterSize.rows(); k++) {
for (Index i = 0; i < clusterSize.rows() - k; i++) {
@@ -428,7 +419,8 @@ struct matrix_function_compute<MatrixType, 1>
typedef internal::traits<MatrixType> Traits;
// compute Schur decomposition of A
- const ComplexSchur<MatrixType> schurOfA(A);
+ const ComplexSchur<MatrixType> schurOfA(A);
+ eigen_assert(schurOfA.info()==Success);
MatrixType T = schurOfA.matrixT();
MatrixType U = schurOfA.matrixU();
@@ -480,7 +472,6 @@ template<typename Derived> class MatrixFunctionReturnValue
{
public:
typedef typename Derived::Scalar Scalar;
- typedef typename Derived::Index Index;
typedef typename internal::stem_function<Scalar>::type StemFunction;
protected:
@@ -505,10 +496,8 @@ template<typename Derived> class MatrixFunctionReturnValue
typedef typename internal::nested_eval<Derived, 10>::type NestedEvalType;
typedef typename internal::remove_all<NestedEvalType>::type NestedEvalTypeClean;
typedef internal::traits<NestedEvalTypeClean> Traits;
- static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
- static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
- typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
+ typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime> DynMatrixType;
typedef internal::MatrixFunctionAtomic<DynMatrixType> AtomicType;
AtomicType atomic(m_f);
@@ -577,4 +566,4 @@ const MatrixFunctionReturnValue<Derived> MatrixBase<Derived>::cosh() const
} // end namespace Eigen
-#endif // EIGEN_MATRIX_FUNCTION
+#endif // EIGEN_MATRIX_FUNCTION_H
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
index ff8f6e732..e917013e0 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
@@ -62,8 +62,8 @@ void matrix_log_compute_2x2(const MatrixType& A, MatrixType& result)
else
{
// computation in previous branch is inaccurate if A(1,1) \approx A(0,0)
- int unwindingNumber = static_cast<int>(ceil((imag(logA11 - logA00) - RealScalar(EIGEN_PI)) / RealScalar(2*EIGEN_PI)));
- result(0,1) = A(0,1) * (numext::log1p(y/A(0,0)) + Scalar(0,2*EIGEN_PI*unwindingNumber)) / y;
+ RealScalar unwindingNumber = ceil((imag(logA11 - logA00) - RealScalar(EIGEN_PI)) / RealScalar(2*EIGEN_PI));
+ result(0,1) = A(0,1) * (numext::log1p(y/A(0,0)) + Scalar(0,RealScalar(2*EIGEN_PI)*unwindingNumber)) / y;
}
}
@@ -135,7 +135,8 @@ void matrix_log_compute_pade(MatrixType& result, const MatrixType& T, int degree
const int minPadeDegree = 3;
const int maxPadeDegree = 11;
assert(degree >= minPadeDegree && degree <= maxPadeDegree);
-
+ // FIXME this creates float-conversion-warnings if these are enabled.
+ // Either manually convert each value, or disable the warning locally
const RealScalar nodes[][maxPadeDegree] = {
{ 0.1127016653792583114820734600217600L, 0.5000000000000000000000000000000000L, // degree 3
0.8872983346207416885179265399782400L },
@@ -232,12 +233,13 @@ void matrix_log_compute_big(const MatrixType& A, MatrixType& result)
int degree;
MatrixType T = A, sqrtT;
- int maxPadeDegree = matrix_log_max_pade_degree<Scalar>::value;
- const RealScalar maxNormForPade = maxPadeDegree<= 5? 5.3149729967117310e-1L: // single precision
+ const int maxPadeDegree = matrix_log_max_pade_degree<Scalar>::value;
+ const RealScalar maxNormForPade = RealScalar(
+ maxPadeDegree<= 5? 5.3149729967117310e-1L: // single precision
maxPadeDegree<= 7? 2.6429608311114350e-1L: // double precision
maxPadeDegree<= 8? 2.32777776523703892094e-1L: // extended precision
maxPadeDegree<=10? 1.05026503471351080481093652651105e-1L: // double-double
- 1.1880960220216759245467951592883642e-1L; // quadruple precision
+ 1.1880960220216759245467951592883642e-1L); // quadruple precision
while (true) {
RealScalar normTminusI = (T - MatrixType::Identity(T.rows(), T.rows())).cwiseAbs().colwise().sum().maxCoeff();
@@ -254,7 +256,7 @@ void matrix_log_compute_big(const MatrixType& A, MatrixType& result)
}
matrix_log_compute_pade(result, T, degree);
- result *= pow(RealScalar(2), numberOfSquareRoots);
+ result *= pow(RealScalar(2), RealScalar(numberOfSquareRoots)); // TODO replace by bitshift if possible
}
/** \ingroup MatrixFunctions_Module
@@ -324,7 +326,7 @@ public:
/** \brief Compute the matrix logarithm.
*
- * \param[out] result Logarithm of \p A, where \A is as specified in the constructor.
+ * \param[out] result Logarithm of \c A, where \c A is as specified in the constructor.
*/
template <typename ResultType>
inline void evalTo(ResultType& result) const
@@ -332,10 +334,8 @@ public:
typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean;
typedef internal::traits<DerivedEvalTypeClean> Traits;
- static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
- static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
- typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
+ typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime> DynMatrixType;
typedef internal::MatrixLogarithmAtomic<DynMatrixType> AtomicType;
AtomicType atomic;
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
index ebc433d89..d7672d7c9 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
@@ -40,7 +40,6 @@ class MatrixPowerParenthesesReturnValue : public ReturnByValue< MatrixPowerParen
{
public:
typedef typename MatrixType::RealScalar RealScalar;
- typedef typename MatrixType::Index Index;
/**
* \brief Constructor.
@@ -57,8 +56,8 @@ class MatrixPowerParenthesesReturnValue : public ReturnByValue< MatrixPowerParen
* \param[out] result
*/
template<typename ResultType>
- inline void evalTo(ResultType& res) const
- { m_pow.compute(res, m_p); }
+ inline void evalTo(ResultType& result) const
+ { m_pow.compute(result, m_p); }
Index rows() const { return m_pow.rows(); }
Index cols() const { return m_pow.cols(); }
@@ -81,7 +80,7 @@ class MatrixPowerParenthesesReturnValue : public ReturnByValue< MatrixPowerParen
*
* \note Currently this class is only used by MatrixPower. One may
* insist that this be nested into MatrixPower. This class is here to
- * faciliate future development of triangular matrix functions.
+ * facilitate future development of triangular matrix functions.
*/
template<typename MatrixType>
class MatrixPowerAtomic : internal::noncopyable
@@ -94,7 +93,6 @@ class MatrixPowerAtomic : internal::noncopyable
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef std::complex<RealScalar> ComplexScalar;
- typedef typename MatrixType::Index Index;
typedef Block<MatrixType,Dynamic,Dynamic> ResultType;
const MatrixType& m_A;
@@ -162,11 +160,11 @@ template<typename MatrixType>
void MatrixPowerAtomic<MatrixType>::computePade(int degree, const MatrixType& IminusT, ResultType& res) const
{
int i = 2*degree;
- res = (m_p-degree) / (2*i-2) * IminusT;
+ res = (m_p-RealScalar(degree)) / RealScalar(2*i-2) * IminusT;
for (--i; i; --i) {
res = (MatrixType::Identity(IminusT.rows(), IminusT.cols()) + res).template triangularView<Upper>()
- .solve((i==1 ? -m_p : i&1 ? (-m_p-i/2)/(2*i) : (m_p-i/2)/(2*i-2)) * IminusT).eval();
+ .solve((i==1 ? -m_p : i&1 ? (-m_p-RealScalar(i/2))/RealScalar(2*i) : (m_p-RealScalar(i/2))/RealScalar(2*i-2)) * IminusT).eval();
}
res += MatrixType::Identity(IminusT.rows(), IminusT.cols());
}
@@ -196,11 +194,12 @@ void MatrixPowerAtomic<MatrixType>::computeBig(ResultType& res) const
{
using std::ldexp;
const int digits = std::numeric_limits<RealScalar>::digits;
- const RealScalar maxNormForPade = digits <= 24? 4.3386528e-1L // single precision
+ const RealScalar maxNormForPade = RealScalar(
+ digits <= 24? 4.3386528e-1L // single precision
: digits <= 53? 2.789358995219730e-1L // double precision
: digits <= 64? 2.4471944416607995472e-1L // extended precision
: digits <= 106? 1.1016843812851143391275867258512e-1L // double-double
- : 9.134603732914548552537150753385375e-2L; // quadruple precision
+ : 9.134603732914548552537150753385375e-2L); // quadruple precision
MatrixType IminusT, sqrtT, T = m_A.template triangularView<Upper>();
RealScalar normIminusT;
int degree, degree2, numberOfSquareRoots = 0;
@@ -298,8 +297,8 @@ MatrixPowerAtomic<MatrixType>::computeSuperDiag(const ComplexScalar& curr, const
ComplexScalar logCurr = log(curr);
ComplexScalar logPrev = log(prev);
- int unwindingNumber = ceil((numext::imag(logCurr - logPrev) - RealScalar(EIGEN_PI)) / RealScalar(2*EIGEN_PI));
- ComplexScalar w = numext::log1p((curr-prev)/prev)/RealScalar(2) + ComplexScalar(0, EIGEN_PI*unwindingNumber);
+ RealScalar unwindingNumber = ceil((numext::imag(logCurr - logPrev) - RealScalar(EIGEN_PI)) / RealScalar(2*EIGEN_PI));
+ ComplexScalar w = numext::log1p((curr-prev)/prev)/RealScalar(2) + ComplexScalar(0, RealScalar(EIGEN_PI)*unwindingNumber);
return RealScalar(2) * exp(RealScalar(0.5) * p * (logCurr + logPrev)) * sinh(p * w) / (curr - prev);
}
@@ -340,7 +339,6 @@ class MatrixPower : internal::noncopyable
private:
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
- typedef typename MatrixType::Index Index;
public:
/**
@@ -600,7 +598,6 @@ class MatrixPowerReturnValue : public ReturnByValue< MatrixPowerReturnValue<Deri
public:
typedef typename Derived::PlainObject PlainObject;
typedef typename Derived::RealScalar RealScalar;
- typedef typename Derived::Index Index;
/**
* \brief Constructor.
@@ -618,8 +615,8 @@ class MatrixPowerReturnValue : public ReturnByValue< MatrixPowerReturnValue<Deri
* constructor.
*/
template<typename ResultType>
- inline void evalTo(ResultType& res) const
- { MatrixPower<PlainObject>(m_A.eval()).compute(res, m_p); }
+ inline void evalTo(ResultType& result) const
+ { MatrixPower<PlainObject>(m_A.eval()).compute(result, m_p); }
Index rows() const { return m_A.rows(); }
Index cols() const { return m_A.cols(); }
@@ -648,7 +645,6 @@ class MatrixComplexPowerReturnValue : public ReturnByValue< MatrixComplexPowerRe
public:
typedef typename Derived::PlainObject PlainObject;
typedef typename std::complex<typename Derived::RealScalar> ComplexScalar;
- typedef typename Derived::Index Index;
/**
* \brief Constructor.
@@ -669,8 +665,8 @@ class MatrixComplexPowerReturnValue : public ReturnByValue< MatrixComplexPowerRe
* constructor.
*/
template<typename ResultType>
- inline void evalTo(ResultType& res) const
- { res = (m_p * m_A.log()).exp(); }
+ inline void evalTo(ResultType& result) const
+ { result = (m_p * m_A.log()).exp(); }
Index rows() const { return m_A.rows(); }
Index cols() const { return m_A.cols(); }
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h b/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
index afd88ec4d..e363e779d 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
@@ -17,7 +17,7 @@ namespace internal {
// pre: T.block(i,i,2,2) has complex conjugate eigenvalues
// post: sqrtT.block(i,i,2,2) is square root of T.block(i,i,2,2)
template <typename MatrixType, typename ResultType>
-void matrix_sqrt_quasi_triangular_2x2_diagonal_block(const MatrixType& T, typename MatrixType::Index i, ResultType& sqrtT)
+void matrix_sqrt_quasi_triangular_2x2_diagonal_block(const MatrixType& T, Index i, ResultType& sqrtT)
{
// TODO: This case (2-by-2 blocks with complex conjugate eigenvalues) is probably hidden somewhere
// in EigenSolver. If we expose it, we could call it directly from here.
@@ -32,7 +32,7 @@ void matrix_sqrt_quasi_triangular_2x2_diagonal_block(const MatrixType& T, typena
// all blocks of sqrtT to left of and below (i,j) are correct
// post: sqrtT(i,j) has the correct value
template <typename MatrixType, typename ResultType>
-void matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT)
+void matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(const MatrixType& T, Index i, Index j, ResultType& sqrtT)
{
typedef typename traits<MatrixType>::Scalar Scalar;
Scalar tmp = (sqrtT.row(i).segment(i+1,j-i-1) * sqrtT.col(j).segment(i+1,j-i-1)).value();
@@ -41,7 +41,7 @@ void matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(const MatrixType& T, ty
// similar to compute1x1offDiagonalBlock()
template <typename MatrixType, typename ResultType>
-void matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT)
+void matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(const MatrixType& T, Index i, Index j, ResultType& sqrtT)
{
typedef typename traits<MatrixType>::Scalar Scalar;
Matrix<Scalar,1,2> rhs = T.template block<1,2>(i,j);
@@ -54,7 +54,7 @@ void matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(const MatrixType& T, ty
// similar to compute1x1offDiagonalBlock()
template <typename MatrixType, typename ResultType>
-void matrix_sqrt_quasi_triangular_2x1_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT)
+void matrix_sqrt_quasi_triangular_2x1_off_diagonal_block(const MatrixType& T, Index i, Index j, ResultType& sqrtT)
{
typedef typename traits<MatrixType>::Scalar Scalar;
Matrix<Scalar,2,1> rhs = T.template block<2,1>(i,j);
@@ -101,7 +101,7 @@ void matrix_sqrt_quasi_triangular_solve_auxiliary_equation(MatrixType& X, const
// similar to compute1x1offDiagonalBlock()
template <typename MatrixType, typename ResultType>
-void matrix_sqrt_quasi_triangular_2x2_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT)
+void matrix_sqrt_quasi_triangular_2x2_off_diagonal_block(const MatrixType& T, Index i, Index j, ResultType& sqrtT)
{
typedef typename traits<MatrixType>::Scalar Scalar;
Matrix<Scalar,2,2> A = sqrtT.template block<2,2>(i,i);
@@ -120,7 +120,6 @@ template <typename MatrixType, typename ResultType>
void matrix_sqrt_quasi_triangular_diagonal(const MatrixType& T, ResultType& sqrtT)
{
using std::sqrt;
- typedef typename MatrixType::Index Index;
const Index size = T.rows();
for (Index i = 0; i < size; i++) {
if (i == size - 1 || T.coeff(i+1, i) == 0) {
@@ -139,7 +138,6 @@ void matrix_sqrt_quasi_triangular_diagonal(const MatrixType& T, ResultType& sqrt
template <typename MatrixType, typename ResultType>
void matrix_sqrt_quasi_triangular_off_diagonal(const MatrixType& T, ResultType& sqrtT)
{
- typedef typename MatrixType::Index Index;
const Index size = T.rows();
for (Index j = 1; j < size; j++) {
if (T.coeff(j, j-1) != 0) // if T(j-1:j, j-1:j) is a 2-by-2 block
@@ -206,8 +204,7 @@ template <typename MatrixType, typename ResultType>
void matrix_sqrt_triangular(const MatrixType &arg, ResultType &result)
{
using std::sqrt;
- typedef typename MatrixType::Index Index;
- typedef typename MatrixType::Scalar Scalar;
+ typedef typename MatrixType::Scalar Scalar;
eigen_assert(arg.rows() == arg.cols());
@@ -256,18 +253,19 @@ struct matrix_sqrt_compute
template <typename MatrixType>
struct matrix_sqrt_compute<MatrixType, 0>
{
+ typedef typename MatrixType::PlainObject PlainType;
template <typename ResultType>
static void run(const MatrixType &arg, ResultType &result)
{
eigen_assert(arg.rows() == arg.cols());
// Compute Schur decomposition of arg
- const RealSchur<MatrixType> schurOfA(arg);
- const MatrixType& T = schurOfA.matrixT();
- const MatrixType& U = schurOfA.matrixU();
+ const RealSchur<PlainType> schurOfA(arg);
+ const PlainType& T = schurOfA.matrixT();
+ const PlainType& U = schurOfA.matrixU();
// Compute square root of T
- MatrixType sqrtT = MatrixType::Zero(arg.rows(), arg.cols());
+ PlainType sqrtT = PlainType::Zero(arg.rows(), arg.cols());
matrix_sqrt_quasi_triangular(T, sqrtT);
// Compute square root of arg
@@ -281,18 +279,19 @@ struct matrix_sqrt_compute<MatrixType, 0>
template <typename MatrixType>
struct matrix_sqrt_compute<MatrixType, 1>
{
+ typedef typename MatrixType::PlainObject PlainType;
template <typename ResultType>
static void run(const MatrixType &arg, ResultType &result)
{
eigen_assert(arg.rows() == arg.cols());
// Compute Schur decomposition of arg
- const ComplexSchur<MatrixType> schurOfA(arg);
- const MatrixType& T = schurOfA.matrixT();
- const MatrixType& U = schurOfA.matrixU();
+ const ComplexSchur<PlainType> schurOfA(arg);
+ const PlainType& T = schurOfA.matrixT();
+ const PlainType& U = schurOfA.matrixU();
// Compute square root of T
- MatrixType sqrtT;
+ PlainType sqrtT;
matrix_sqrt_triangular(T, sqrtT);
// Compute square root of arg
@@ -318,7 +317,6 @@ template<typename Derived> class MatrixSquareRootReturnValue
: public ReturnByValue<MatrixSquareRootReturnValue<Derived> >
{
protected:
- typedef typename Derived::Index Index;
typedef typename internal::ref_selector<Derived>::type DerivedNested;
public:
diff --git a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
index 8fe3ed86b..07c5ef014 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
@@ -52,7 +52,7 @@ public:
Parameters()
: factor(Scalar(100.))
, maxfev(1000)
- , xtol(std::sqrt(NumTraits<Scalar>::epsilon()))
+ , xtol(numext::sqrt(NumTraits<Scalar>::epsilon()))
, nb_of_subdiagonals(-1)
, nb_of_superdiagonals(-1)
, epsfcn(Scalar(0.)) {}
@@ -70,7 +70,7 @@ public:
HybridNonLinearSolverSpace::Status hybrj1(
FVectorType &x,
- const Scalar tol = std::sqrt(NumTraits<Scalar>::epsilon())
+ const Scalar tol = numext::sqrt(NumTraits<Scalar>::epsilon())
);
HybridNonLinearSolverSpace::Status solveInit(FVectorType &x);
@@ -79,7 +79,7 @@ public:
HybridNonLinearSolverSpace::Status hybrd1(
FVectorType &x,
- const Scalar tol = std::sqrt(NumTraits<Scalar>::epsilon())
+ const Scalar tol = numext::sqrt(NumTraits<Scalar>::epsilon())
);
HybridNonLinearSolverSpace::Status solveNumericalDiffInit(FVectorType &x);
diff --git a/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h b/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h
index feafd62a8..4f2f560b3 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h
@@ -61,7 +61,7 @@ void qrsolv(
qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj;
wa[k] = temp;
- /* accumulate the tranformation in the row of s. */
+ /* accumulate the transformation in the row of s. */
for (i = k+1; i<n; ++i) {
temp = givens.c() * s(i,k) + givens.s() * sdiag[i];
sdiag[i] = -givens.s() * s(i,k) + givens.c() * sdiag[i];
diff --git a/unsupported/Eigen/src/NonLinearOptimization/r1updt.h b/unsupported/Eigen/src/NonLinearOptimization/r1updt.h
index f28766061..09fc65255 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/r1updt.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/r1updt.h
@@ -22,7 +22,7 @@ void r1updt(
Scalar temp;
JacobiRotation<Scalar> givens;
- // r1updt had a broader usecase, but we dont use it here. And, more
+ // r1updt had a broader usecase, but we don't use it here. And, more
// importantly, we can not test it.
eigen_assert(m==n);
eigen_assert(u.size()==m);
diff --git a/unsupported/Eigen/src/Polynomials/Companion.h b/unsupported/Eigen/src/Polynomials/Companion.h
index b515c2920..59a15b098 100644
--- a/unsupported/Eigen/src/Polynomials/Companion.h
+++ b/unsupported/Eigen/src/Polynomials/Companion.h
@@ -20,12 +20,6 @@ namespace internal {
#ifndef EIGEN_PARSED_BY_DOXYGEN
-template <typename T>
-T radix(){ return 2; }
-
-template <typename T>
-T radix2(){ return radix<T>()*radix<T>(); }
-
template<int Size>
struct decrement_if_fixed_size
{
@@ -75,8 +69,7 @@ class companion
void setPolynomial( const VectorType& poly )
{
const Index deg = poly.size()-1;
- m_monic = -1/poly[deg] * poly.head(deg);
- //m_bl_diag.setIdentity( deg-1 );
+ m_monic = -poly.head(deg)/poly[deg];
m_bl_diag.setOnes(deg-1);
}
@@ -89,13 +82,13 @@ class companion
{
const Index deg = m_monic.size();
const Index deg_1 = deg-1;
- DenseCompanionMatrixType companion(deg,deg);
- companion <<
+ DenseCompanionMatrixType companMat(deg,deg);
+ companMat <<
( LeftBlock(deg,deg_1)
<< LeftBlockFirstRow::Zero(1,deg_1),
BottomLeftBlock::Identity(deg-1,deg-1)*m_bl_diag.asDiagonal() ).finished()
, m_monic;
- return companion;
+ return companMat;
}
@@ -104,20 +97,20 @@ class companion
/** Helper function for the balancing algorithm.
* \returns true if the row and the column, having colNorm and rowNorm
* as norms, are balanced, false otherwise.
- * colB and rowB are repectively the multipliers for
+ * colB and rowB are respectively the multipliers for
* the column and the row in order to balance them.
* */
- bool balanced( Scalar colNorm, Scalar rowNorm,
- bool& isBalanced, Scalar& colB, Scalar& rowB );
+ bool balanced( RealScalar colNorm, RealScalar rowNorm,
+ bool& isBalanced, RealScalar& colB, RealScalar& rowB );
/** Helper function for the balancing algorithm.
* \returns true if the row and the column, having colNorm and rowNorm
* as norms, are balanced, false otherwise.
- * colB and rowB are repectively the multipliers for
+ * colB and rowB are respectively the multipliers for
* the column and the row in order to balance them.
* */
- bool balancedR( Scalar colNorm, Scalar rowNorm,
- bool& isBalanced, Scalar& colB, Scalar& rowB );
+ bool balancedR( RealScalar colNorm, RealScalar rowNorm,
+ bool& isBalanced, RealScalar& colB, RealScalar& rowB );
public:
/**
@@ -139,10 +132,13 @@ class companion
template< typename _Scalar, int _Deg >
inline
-bool companion<_Scalar,_Deg>::balanced( Scalar colNorm, Scalar rowNorm,
- bool& isBalanced, Scalar& colB, Scalar& rowB )
+bool companion<_Scalar,_Deg>::balanced( RealScalar colNorm, RealScalar rowNorm,
+ bool& isBalanced, RealScalar& colB, RealScalar& rowB )
{
- if( Scalar(0) == colNorm || Scalar(0) == rowNorm ){ return true; }
+ if( RealScalar(0) == colNorm || RealScalar(0) == rowNorm
+ || !(numext::isfinite)(colNorm) || !(numext::isfinite)(rowNorm)){
+ return true;
+ }
else
{
//To find the balancing coefficients, if the radix is 2,
@@ -150,53 +146,61 @@ bool companion<_Scalar,_Deg>::balanced( Scalar colNorm, Scalar rowNorm,
// \f$ 2^{2\sigma-1} < rowNorm / colNorm \le 2^{2\sigma+1} \f$
// then the balancing coefficient for the row is \f$ 1/2^{\sigma} \f$
// and the balancing coefficient for the column is \f$ 2^{\sigma} \f$
- rowB = rowNorm / radix<Scalar>();
- colB = Scalar(1);
- const Scalar s = colNorm + rowNorm;
-
- while (colNorm < rowB)
+ const RealScalar radix = RealScalar(2);
+ const RealScalar radix2 = RealScalar(4);
+
+ rowB = rowNorm / radix;
+ colB = RealScalar(1);
+ const RealScalar s = colNorm + rowNorm;
+
+ // Find sigma s.t. rowNorm / 2 <= 2^(2*sigma) * colNorm
+ RealScalar scout = colNorm;
+ while (scout < rowB)
{
- colB *= radix<Scalar>();
- colNorm *= radix2<Scalar>();
+ colB *= radix;
+ scout *= radix2;
}
-
- rowB = rowNorm * radix<Scalar>();
-
- while (colNorm >= rowB)
+
+ // We now have an upper-bound for sigma, try to lower it.
+ // Find sigma s.t. 2^(2*sigma) * colNorm / 2 < rowNorm
+ scout = colNorm * (colB / radix) * colB; // Avoid overflow.
+ while (scout >= rowNorm)
{
- colB /= radix<Scalar>();
- colNorm /= radix2<Scalar>();
+ colB /= radix;
+ scout /= radix2;
}
- //This line is used to avoid insubstantial balancing
- if ((rowNorm + colNorm) < Scalar(0.95) * s * colB)
+ // This line is used to avoid insubstantial balancing.
+ if ((rowNorm + radix * scout) < RealScalar(0.95) * s * colB)
{
isBalanced = false;
- rowB = Scalar(1) / colB;
+ rowB = RealScalar(1) / colB;
return false;
}
- else{
- return true; }
+ else
+ {
+ return true;
+ }
}
}
template< typename _Scalar, int _Deg >
inline
-bool companion<_Scalar,_Deg>::balancedR( Scalar colNorm, Scalar rowNorm,
- bool& isBalanced, Scalar& colB, Scalar& rowB )
+bool companion<_Scalar,_Deg>::balancedR( RealScalar colNorm, RealScalar rowNorm,
+ bool& isBalanced, RealScalar& colB, RealScalar& rowB )
{
- if( Scalar(0) == colNorm || Scalar(0) == rowNorm ){ return true; }
+ if( RealScalar(0) == colNorm || RealScalar(0) == rowNorm ){ return true; }
else
{
/**
* Set the norm of the column and the row to the geometric mean
* of the row and column norm
*/
- const _Scalar q = colNorm/rowNorm;
+ const RealScalar q = colNorm/rowNorm;
if( !isApprox( q, _Scalar(1) ) )
{
rowB = sqrt( colNorm/rowNorm );
- colB = Scalar(1)/rowB;
+ colB = RealScalar(1)/rowB;
isBalanced = false;
return false;
@@ -219,8 +223,8 @@ void companion<_Scalar,_Deg>::balance()
while( !hasConverged )
{
hasConverged = true;
- Scalar colNorm,rowNorm;
- Scalar colB,rowB;
+ RealScalar colNorm,rowNorm;
+ RealScalar colB,rowB;
//First row, first column excluding the diagonal
//==============================================
diff --git a/unsupported/Eigen/src/Polynomials/PolynomialSolver.h b/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
index 03198ec8e..5e0ecbb43 100644
--- a/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
+++ b/unsupported/Eigen/src/Polynomials/PolynomialSolver.h
@@ -99,7 +99,7 @@ class PolynomialSolverBase
*/
inline const RootType& greatestRoot() const
{
- std::greater<Scalar> greater;
+ std::greater<RealScalar> greater;
return selectComplexRoot_withRespectToNorm( greater );
}
@@ -108,7 +108,7 @@ class PolynomialSolverBase
*/
inline const RootType& smallestRoot() const
{
- std::less<Scalar> less;
+ std::less<RealScalar> less;
return selectComplexRoot_withRespectToNorm( less );
}
@@ -126,7 +126,7 @@ class PolynomialSolverBase
for( Index i=0; i<m_roots.size(); ++i )
{
- if( abs( m_roots[i].imag() ) < absImaginaryThreshold )
+ if( abs( m_roots[i].imag() ) <= absImaginaryThreshold )
{
if( !hasArealRoot )
{
@@ -144,10 +144,10 @@ class PolynomialSolverBase
}
}
}
- else
+ else if(!hasArealRoot)
{
if( abs( m_roots[i].imag() ) < abs( m_roots[res].imag() ) ){
- res = i; }
+ res = i;}
}
}
return numext::real_ref(m_roots[res]);
@@ -167,7 +167,7 @@ class PolynomialSolverBase
for( Index i=0; i<m_roots.size(); ++i )
{
- if( abs( m_roots[i].imag() ) < absImaginaryThreshold )
+ if( abs( m_roots[i].imag() ) <= absImaginaryThreshold )
{
if( !hasArealRoot )
{
@@ -213,7 +213,7 @@ class PolynomialSolverBase
bool& hasArealRoot,
const RealScalar& absImaginaryThreshold = NumTraits<Scalar>::dummy_precision() ) const
{
- std::greater<Scalar> greater;
+ std::greater<RealScalar> greater;
return selectRealRoot_withRespectToAbsRealPart( greater, hasArealRoot, absImaginaryThreshold );
}
@@ -236,7 +236,7 @@ class PolynomialSolverBase
bool& hasArealRoot,
const RealScalar& absImaginaryThreshold = NumTraits<Scalar>::dummy_precision() ) const
{
- std::less<Scalar> less;
+ std::less<RealScalar> less;
return selectRealRoot_withRespectToAbsRealPart( less, hasArealRoot, absImaginaryThreshold );
}
@@ -259,7 +259,7 @@ class PolynomialSolverBase
bool& hasArealRoot,
const RealScalar& absImaginaryThreshold = NumTraits<Scalar>::dummy_precision() ) const
{
- std::greater<Scalar> greater;
+ std::greater<RealScalar> greater;
return selectRealRoot_withRespectToRealPart( greater, hasArealRoot, absImaginaryThreshold );
}
@@ -282,7 +282,7 @@ class PolynomialSolverBase
bool& hasArealRoot,
const RealScalar& absImaginaryThreshold = NumTraits<Scalar>::dummy_precision() ) const
{
- std::less<Scalar> less;
+ std::less<RealScalar> less;
return selectRealRoot_withRespectToRealPart( less, hasArealRoot, absImaginaryThreshold );
}
@@ -327,7 +327,7 @@ class PolynomialSolverBase
* However, almost always, correct accuracy is reached even in these cases for 64bit
* (double) floating types and small polynomial degree (<20).
*/
-template< typename _Scalar, int _Deg >
+template<typename _Scalar, int _Deg>
class PolynomialSolver : public PolynomialSolverBase<_Scalar,_Deg>
{
public:
@@ -337,7 +337,10 @@ class PolynomialSolver : public PolynomialSolverBase<_Scalar,_Deg>
EIGEN_POLYNOMIAL_SOLVER_BASE_INHERITED_TYPES( PS_Base )
typedef Matrix<Scalar,_Deg,_Deg> CompanionMatrixType;
- typedef EigenSolver<CompanionMatrixType> EigenSolverType;
+ typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
+ ComplexEigenSolver<CompanionMatrixType>,
+ EigenSolver<CompanionMatrixType> >::type EigenSolverType;
+ typedef typename internal::conditional<NumTraits<Scalar>::IsComplex, Scalar, std::complex<Scalar> >::type ComplexScalar;
public:
/** Computes the complex roots of a new polynomial. */
@@ -352,6 +355,25 @@ class PolynomialSolver : public PolynomialSolverBase<_Scalar,_Deg>
companion.balance();
m_eigenSolver.compute( companion.denseMatrix() );
m_roots = m_eigenSolver.eigenvalues();
+ // cleanup noise in imaginary part of real roots:
+ // if the imaginary part is rather small compared to the real part
+ // and that cancelling the imaginary part yield a smaller evaluation,
+ // then it's safe to keep the real part only.
+ RealScalar coarse_prec = RealScalar(std::pow(4,poly.size()+1))*NumTraits<RealScalar>::epsilon();
+ for(Index i = 0; i<m_roots.size(); ++i)
+ {
+ if( internal::isMuchSmallerThan(numext::abs(numext::imag(m_roots[i])),
+ numext::abs(numext::real(m_roots[i])),
+ coarse_prec) )
+ {
+ ComplexScalar as_real_root = ComplexScalar(numext::real(m_roots[i]));
+ if( numext::abs(poly_eval(poly, as_real_root))
+ <= numext::abs(poly_eval(poly, m_roots[i])))
+ {
+ m_roots[i] = as_real_root;
+ }
+ }
+ }
}
else if(poly.size () == 2)
{
diff --git a/unsupported/Eigen/src/Polynomials/PolynomialUtils.h b/unsupported/Eigen/src/Polynomials/PolynomialUtils.h
index 40ba65b7e..394e857ac 100644
--- a/unsupported/Eigen/src/Polynomials/PolynomialUtils.h
+++ b/unsupported/Eigen/src/Polynomials/PolynomialUtils.h
@@ -20,8 +20,8 @@ namespace Eigen {
* e.g. \f$ 1 + 3x^2 \f$ is stored as a vector \f$ [ 1, 0, 3 ] \f$.
* \param[in] x : the value to evaluate the polynomial at.
*
- * <i><b>Note for stability:</b></i>
- * <dd> \f$ |x| \le 1 \f$ </dd>
+ * \note for stability:
+ * \f$ |x| \le 1 \f$
*/
template <typename Polynomials, typename T>
inline
@@ -67,8 +67,8 @@ T poly_eval( const Polynomials& poly, const T& x )
* by degrees i.e. poly[i] is the coefficient of degree i of the polynomial
* e.g. \f$ 1 + 3x^2 \f$ is stored as a vector \f$ [ 1, 0, 3 ] \f$.
*
- * <i><b>Precondition:</b></i>
- * <dd> the leading coefficient of the input polynomial poly must be non zero </dd>
+ * \pre
+ * the leading coefficient of the input polynomial poly must be non zero
*/
template <typename Polynomial>
inline
diff --git a/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h b/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h
index a1f54ed35..6d0370d5b 100644
--- a/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h
+++ b/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h
@@ -41,7 +41,7 @@ public:
/** Sets the relative threshold value used to prune zero coefficients during the decomposition.
*
- * Setting a value greater than zero speeds up computation, and yields to an imcomplete
+ * Setting a value greater than zero speeds up computation, and yields to an incomplete
* factorization with fewer non zero coefficients. Such approximate factors are especially
* useful to initialize an iterative solver.
*
@@ -349,4 +349,4 @@ bool SkylineInplaceLU<MatrixType>::solve(const MatrixBase<BDerived> &b, MatrixBa
} // end namespace Eigen
-#endif // EIGEN_SKYLINELU_H
+#endif // EIGEN_SKYLINEINPLACELU_H
diff --git a/unsupported/Eigen/src/Skyline/SkylineMatrix.h b/unsupported/Eigen/src/Skyline/SkylineMatrix.h
index a2a8933ca..7c7eace7f 100644
--- a/unsupported/Eigen/src/Skyline/SkylineMatrix.h
+++ b/unsupported/Eigen/src/Skyline/SkylineMatrix.h
@@ -206,26 +206,26 @@ public:
if (col > row) //upper matrix
{
const Index minOuterIndex = inner - m_data.upperProfile(inner);
- eigen_assert(outer >= minOuterIndex && "you try to acces a coeff that do not exist in the storage");
+ eigen_assert(outer >= minOuterIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner)));
}
if (col < row) //lower matrix
{
const Index minInnerIndex = outer - m_data.lowerProfile(outer);
- eigen_assert(inner >= minInnerIndex && "you try to acces a coeff that do not exist in the storage");
+ eigen_assert(inner >= minInnerIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer)));
}
} else {
if (outer > inner) //upper matrix
{
const Index maxOuterIndex = inner + m_data.upperProfile(inner);
- eigen_assert(outer <= maxOuterIndex && "you try to acces a coeff that do not exist in the storage");
+ eigen_assert(outer <= maxOuterIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.upper(m_colStartIndex[inner] + (outer - inner));
}
if (outer < inner) //lower matrix
{
const Index maxInnerIndex = outer + m_data.lowerProfile(outer);
- eigen_assert(inner <= maxInnerIndex && "you try to acces a coeff that do not exist in the storage");
+ eigen_assert(inner <= maxInnerIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer));
}
}
@@ -300,11 +300,11 @@ public:
if (IsRowMajor) {
const Index minInnerIndex = outer - m_data.lowerProfile(outer);
- eigen_assert(inner >= minInnerIndex && "you try to acces a coeff that do not exist in the storage");
+ eigen_assert(inner >= minInnerIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer)));
} else {
const Index maxInnerIndex = outer + m_data.lowerProfile(outer);
- eigen_assert(inner <= maxInnerIndex && "you try to acces a coeff that do not exist in the storage");
+ eigen_assert(inner <= maxInnerIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer));
}
}
@@ -336,11 +336,11 @@ public:
if (IsRowMajor) {
const Index minOuterIndex = inner - m_data.upperProfile(inner);
- eigen_assert(outer >= minOuterIndex && "you try to acces a coeff that do not exist in the storage");
+ eigen_assert(outer >= minOuterIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner)));
} else {
const Index maxOuterIndex = inner + m_data.upperProfile(inner);
- eigen_assert(outer <= maxOuterIndex && "you try to acces a coeff that do not exist in the storage");
+ eigen_assert(outer <= maxOuterIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.upper(m_colStartIndex[inner] + (outer - inner));
}
}
@@ -859,4 +859,4 @@ protected:
} // end namespace Eigen
-#endif // EIGEN_SkylineMatrix_H
+#endif // EIGEN_SKYLINEMATRIX_H
diff --git a/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h b/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h
index b3a237230..b0d5e1001 100644
--- a/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h
+++ b/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h
@@ -12,7 +12,7 @@
#include "SkylineUtil.h"
-namespace Eigen {
+namespace Eigen {
/** \ingroup Skyline_Module
*
@@ -102,18 +102,18 @@ public:
#endif // not EIGEN_PARSED_BY_DOXYGEN
/** \returns the number of rows. \sa cols(), RowsAtCompileTime */
- inline Index rows() const {
+ inline EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT {
return derived().rows();
}
/** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
- inline Index cols() const {
+ inline EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT {
return derived().cols();
}
/** \returns the number of coefficients, which is \a rows()*cols().
* \sa rows(), cols(), SizeAtCompileTime. */
- inline Index size() const {
+ inline EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT {
return rows() * cols();
}
@@ -209,4 +209,4 @@ protected:
} // end namespace Eigen
-#endif // EIGEN_SkylineMatrixBase_H
+#endif // EIGEN_SKYLINEMATRIXBASE_H
diff --git a/unsupported/Eigen/src/Skyline/SkylineStorage.h b/unsupported/Eigen/src/Skyline/SkylineStorage.h
index 378a8deb4..cc7514f12 100644
--- a/unsupported/Eigen/src/Skyline/SkylineStorage.h
+++ b/unsupported/Eigen/src/Skyline/SkylineStorage.h
@@ -256,4 +256,4 @@ public:
} // end namespace Eigen
-#endif // EIGEN_COMPRESSED_STORAGE_H
+#endif // EIGEN_SKYLINE_STORAGE_H
diff --git a/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h b/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h
index 0e8350a7d..536a0c320 100644
--- a/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h
+++ b/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h
@@ -931,7 +931,7 @@ class BlockSparseMatrix : public SparseMatrixBase<BlockSparseMatrix<_Scalar,_Blo
}
/**
- * \returns the starting position of the block <id> in the array of values
+ * \returns the starting position of the block \p id in the array of values
*/
Index blockPtr(Index id) const
{
diff --git a/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h b/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h
index 037a13f86..42c99e467 100644
--- a/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h
+++ b/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h
@@ -187,7 +187,7 @@ template<typename _Scalar, int _Options, typename _StorageIndex>
/** Does nothing: provided for compatibility with SparseMatrix */
inline void finalize() {}
- /** Suppress all nonzeros which are smaller than \a reference under the tolerence \a epsilon */
+ /** Suppress all nonzeros which are smaller than \a reference under the tolerance \a epsilon */
void prune(Scalar reference, RealScalar epsilon = NumTraits<RealScalar>::dummy_precision())
{
for (Index j=0; j<outerSize(); ++j)
@@ -224,31 +224,43 @@ template<typename _Scalar, int _Options, typename _StorageIndex>
}
}
- /** The class DynamicSparseMatrix is deprectaed */
+ /** The class DynamicSparseMatrix is deprecated */
EIGEN_DEPRECATED inline DynamicSparseMatrix()
: m_innerSize(0), m_data(0)
{
+ #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
+ EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
+ #endif
eigen_assert(innerSize()==0 && outerSize()==0);
}
- /** The class DynamicSparseMatrix is deprectaed */
+ /** The class DynamicSparseMatrix is deprecated */
EIGEN_DEPRECATED inline DynamicSparseMatrix(Index rows, Index cols)
: m_innerSize(0)
{
+ #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
+ EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
+ #endif
resize(rows, cols);
}
- /** The class DynamicSparseMatrix is deprectaed */
+ /** The class DynamicSparseMatrix is deprecated */
template<typename OtherDerived>
EIGEN_DEPRECATED explicit inline DynamicSparseMatrix(const SparseMatrixBase<OtherDerived>& other)
: m_innerSize(0)
{
- Base::operator=(other.derived());
+ #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
+ EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
+ #endif
+ Base::operator=(other.derived());
}
inline DynamicSparseMatrix(const DynamicSparseMatrix& other)
: Base(), m_innerSize(0)
{
+ #ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
+ EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
+ #endif
*this = other.derived();
}
diff --git a/unsupported/Eigen/src/SparseExtra/MarketIO.h b/unsupported/Eigen/src/SparseExtra/MarketIO.h
index cdc14f86e..dd786d561 100644
--- a/unsupported/Eigen/src/SparseExtra/MarketIO.h
+++ b/unsupported/Eigen/src/SparseExtra/MarketIO.h
@@ -12,38 +12,38 @@
#define EIGEN_SPARSE_MARKET_IO_H
#include <iostream>
+#include <vector>
namespace Eigen {
namespace internal
{
- template <typename Scalar>
- inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, Scalar& value)
+ template <typename Scalar, typename StorageIndex>
+ inline void GetMarketLine (const char* line, StorageIndex& i, StorageIndex& j, Scalar& value)
{
- line >> i >> j >> value;
- i--;
- j--;
- if(i>=0 && j>=0 && i<M && j<N)
- {
- return true;
- }
- else
- return false;
+ std::stringstream sline(line);
+ sline >> i >> j >> value;
}
- template <typename Scalar>
- inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, std::complex<Scalar>& value)
+
+ template<> inline void GetMarketLine (const char* line, int& i, int& j, float& value)
+ { std::sscanf(line, "%d %d %g", &i, &j, &value); }
+
+ template<> inline void GetMarketLine (const char* line, int& i, int& j, double& value)
+ { std::sscanf(line, "%d %d %lg", &i, &j, &value); }
+
+ template<> inline void GetMarketLine (const char* line, int& i, int& j, std::complex<float>& value)
+ { std::sscanf(line, "%d %d %g %g", &i, &j, &numext::real_ref(value), &numext::imag_ref(value)); }
+
+ template<> inline void GetMarketLine (const char* line, int& i, int& j, std::complex<double>& value)
+ { std::sscanf(line, "%d %d %lg %lg", &i, &j, &numext::real_ref(value), &numext::imag_ref(value)); }
+
+ template <typename Scalar, typename StorageIndex>
+ inline void GetMarketLine (const char* line, StorageIndex& i, StorageIndex& j, std::complex<Scalar>& value)
{
+ std::stringstream sline(line);
Scalar valR, valI;
- line >> i >> j >> valR >> valI;
- i--;
- j--;
- if(i>=0 && j>=0 && i<M && j<N)
- {
- value = std::complex<Scalar>(valR, valI);
- return true;
- }
- else
- return false;
+ sline >> i >> j >> valR >> valI;
+ value = std::complex<Scalar>(valR,valI);
}
template <typename RealScalar>
@@ -81,13 +81,13 @@ namespace internal
}
}
- template<typename Scalar>
- inline void PutMatrixElt(Scalar value, int row, int col, std::ofstream& out)
+ template<typename Scalar, typename StorageIndex>
+ inline void PutMatrixElt(Scalar value, StorageIndex row, StorageIndex col, std::ofstream& out)
{
out << row << " "<< col << " " << value << "\n";
}
- template<typename Scalar>
- inline void PutMatrixElt(std::complex<Scalar> value, int row, int col, std::ofstream& out)
+ template<typename Scalar, typename StorageIndex>
+ inline void PutMatrixElt(std::complex<Scalar> value, StorageIndex row, StorageIndex col, std::ofstream& out)
{
out << row << " " << col << " " << value.real() << " " << value.imag() << "\n";
}
@@ -101,14 +101,15 @@ namespace internal
template<typename Scalar>
inline void putVectorElt(std::complex<Scalar> value, std::ofstream& out)
{
- out << value.real << " " << value.imag()<< "\n";
+ out << value.real() << " " << value.imag()<< "\n";
}
-} // end namepsace internal
+} // end namespace internal
inline bool getMarketHeader(const std::string& filename, int& sym, bool& iscomplex, bool& isvector)
{
sym = 0;
+ iscomplex = false;
isvector = false;
std::ifstream in(filename.c_str(),std::ios::in);
if(!in)
@@ -133,17 +134,20 @@ template<typename SparseMatrixType>
bool loadMarket(SparseMatrixType& mat, const std::string& filename)
{
typedef typename SparseMatrixType::Scalar Scalar;
- typedef typename SparseMatrixType::Index Index;
+ typedef typename SparseMatrixType::StorageIndex StorageIndex;
std::ifstream input(filename.c_str(),std::ios::in);
if(!input)
return false;
+
+ char rdbuffer[4096];
+ input.rdbuf()->pubsetbuf(rdbuffer, 4096);
const int maxBuffersize = 2048;
char buffer[maxBuffersize];
bool readsizes = false;
- typedef Triplet<Scalar,Index> T;
+ typedef Triplet<Scalar,StorageIndex> T;
std::vector<T> elements;
Index M(-1), N(-1), NNZ(-1);
@@ -154,33 +158,36 @@ bool loadMarket(SparseMatrixType& mat, const std::string& filename)
//NOTE An appropriate test should be done on the header to get the symmetry
if(buffer[0]=='%')
continue;
-
- std::stringstream line(buffer);
-
+
if(!readsizes)
{
+ std::stringstream line(buffer);
line >> M >> N >> NNZ;
- if(M > 0 && N > 0 && NNZ > 0)
+ if(M > 0 && N > 0)
{
readsizes = true;
- //std::cout << "sizes: " << M << "," << N << "," << NNZ << "\n";
mat.resize(M,N);
mat.reserve(NNZ);
}
}
else
{
- Index i(-1), j(-1);
+ StorageIndex i(-1), j(-1);
Scalar value;
- if( internal::GetMarketLine(line, M, N, i, j, value) )
+ internal::GetMarketLine(buffer, i, j, value);
+
+ i--;
+ j--;
+ if(i>=0 && j>=0 && i<M && j<N)
{
- ++ count;
+ ++count;
elements.push_back(T(i,j,value));
}
- else
+ else
std::cerr << "Invalid read: " << i << "," << j << "\n";
}
}
+
mat.setFromTriplets(elements.begin(), elements.end());
if(count!=NNZ)
std::cerr << count << "!=" << NNZ << "\n";
@@ -225,12 +232,13 @@ template<typename SparseMatrixType>
bool saveMarket(const SparseMatrixType& mat, const std::string& filename, int sym = 0)
{
typedef typename SparseMatrixType::Scalar Scalar;
+ typedef typename SparseMatrixType::RealScalar RealScalar;
std::ofstream out(filename.c_str(),std::ios::out);
if(!out)
return false;
out.flags(std::ios_base::scientific);
- out.precision(64);
+ out.precision(std::numeric_limits<RealScalar>::digits10 + 2);
std::string header;
internal::putMarketHeader<Scalar>(header, sym);
out << header << std::endl;
@@ -241,7 +249,6 @@ bool saveMarket(const SparseMatrixType& mat, const std::string& filename, int sy
{
++ count;
internal::PutMatrixElt(it.value(), it.row()+1, it.col()+1, out);
- // out << it.row()+1 << " " << it.col()+1 << " " << it.value() << "\n";
}
out.close();
return true;
@@ -250,13 +257,14 @@ bool saveMarket(const SparseMatrixType& mat, const std::string& filename, int sy
template<typename VectorType>
bool saveMarketVector (const VectorType& vec, const std::string& filename)
{
- typedef typename VectorType::Scalar Scalar;
+ typedef typename VectorType::Scalar Scalar;
+ typedef typename VectorType::RealScalar RealScalar;
std::ofstream out(filename.c_str(),std::ios::out);
if(!out)
return false;
out.flags(std::ios_base::scientific);
- out.precision(64);
+ out.precision(std::numeric_limits<RealScalar>::digits10 + 2);
if(internal::is_same<Scalar, std::complex<float> >::value || internal::is_same<Scalar, std::complex<double> >::value)
out << "%%MatrixMarket matrix array complex general\n";
else
diff --git a/unsupported/Eigen/src/SparseExtra/RandomSetter.h b/unsupported/Eigen/src/SparseExtra/RandomSetter.h
index ee97299af..985702b5f 100644
--- a/unsupported/Eigen/src/SparseExtra/RandomSetter.h
+++ b/unsupported/Eigen/src/SparseExtra/RandomSetter.h
@@ -10,7 +10,13 @@
#ifndef EIGEN_RANDOMSETTER_H
#define EIGEN_RANDOMSETTER_H
-namespace Eigen {
+#if defined(EIGEN_GOOGLEHASH_SUPPORT)
+// Ensure the ::google namespace exists, required for checking existence of
+// ::google::dense_hash_map and ::google::sparse_hash_map.
+namespace google {}
+#endif
+
+namespace Eigen {
/** Represents a std::map
*
@@ -56,7 +62,26 @@ template<typename Scalar> struct StdUnorderedMapTraits
};
#endif // EIGEN_UNORDERED_MAP_SUPPORT
-#ifdef _DENSE_HASH_MAP_H_
+#if defined(EIGEN_GOOGLEHASH_SUPPORT)
+
+namespace google {
+
+// Namespace work-around, since sometimes dense_hash_map and sparse_hash_map
+// are in the global namespace, and other times they are under ::google.
+using namespace ::google;
+
+template<typename KeyType, typename Scalar>
+struct DenseHashMap {
+ typedef dense_hash_map<KeyType, Scalar> type;
+};
+
+template<typename KeyType, typename Scalar>
+struct SparseHashMap {
+ typedef sparse_hash_map<KeyType, Scalar> type;
+};
+
+} // namespace google
+
/** Represents a google::dense_hash_map
*
* \see RandomSetter
@@ -64,7 +89,7 @@ template<typename Scalar> struct StdUnorderedMapTraits
template<typename Scalar> struct GoogleDenseHashMapTraits
{
typedef int KeyType;
- typedef google::dense_hash_map<KeyType,Scalar> Type;
+ typedef typename google::DenseHashMap<KeyType,Scalar>::type Type;
enum {
IsSorted = 0
};
@@ -72,9 +97,7 @@ template<typename Scalar> struct GoogleDenseHashMapTraits
static void setInvalidKey(Type& map, const KeyType& k)
{ map.set_empty_key(k); }
};
-#endif
-#ifdef _SPARSE_HASH_MAP_H_
/** Represents a google::sparse_hash_map
*
* \see RandomSetter
@@ -82,7 +105,7 @@ template<typename Scalar> struct GoogleDenseHashMapTraits
template<typename Scalar> struct GoogleSparseHashMapTraits
{
typedef int KeyType;
- typedef google::sparse_hash_map<KeyType,Scalar> Type;
+ typedef typename google::SparseHashMap<KeyType,Scalar>::type Type;
enum {
IsSorted = 0
};
@@ -134,18 +157,17 @@ template<typename Scalar> struct GoogleSparseHashMapTraits
* GoogleSparseHashMapTraits, GnuHashMapTraits, and finally StdMapTraits.
*
* For performance and memory consumption reasons it is highly recommended to use one of
- * the Google's hash_map implementation. To enable the support for them, you have two options:
- * - \#include <google/dense_hash_map> yourself \b before Eigen/Sparse header
- * - define EIGEN_GOOGLEHASH_SUPPORT
- * In the later case the inclusion of <google/dense_hash_map> is made for you.
+ * Google's hash_map implementations. To enable the support for them, you must define
+ * EIGEN_GOOGLEHASH_SUPPORT. This will include both <google/dense_hash_map> and
+ * <google/sparse_hash_map> for you.
*
- * \see http://code.google.com/p/google-sparsehash/
+ * \see https://github.com/sparsehash/sparsehash
*/
template<typename SparseMatrixType,
template <typename T> class MapTraits =
-#if defined _DENSE_HASH_MAP_H_
+#if defined(EIGEN_GOOGLEHASH_SUPPORT)
GoogleDenseHashMapTraits
-#elif defined _HASH_MAP
+#elif defined(_HASH_MAP)
GnuHashMapTraits
#else
StdMapTraits
@@ -249,10 +271,10 @@ class RandomSetter
}
}
// prefix sum
- Index count = 0;
+ StorageIndex count = 0;
for (Index j=0; j<mp_target->outerSize(); ++j)
{
- Index tmp = positions[j];
+ StorageIndex tmp = positions[j];
mp_target->outerIndexPtr()[j] = count;
positions[j] = count;
count += tmp;
@@ -281,7 +303,7 @@ class RandomSetter
mp_target->innerIndexPtr()[i+1] = mp_target->innerIndexPtr()[i];
--i;
}
- mp_target->innerIndexPtr()[i+1] = inner;
+ mp_target->innerIndexPtr()[i+1] = internal::convert_index<StorageIndex>(inner);
mp_target->valuePtr()[i+1] = it->second.value;
}
}
diff --git a/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h
new file mode 100644
index 000000000..41d2bf61c
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsArrayAPI.h
@@ -0,0 +1,286 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_BESSELFUNCTIONS_ARRAYAPI_H
+#define EIGEN_BESSELFUNCTIONS_ARRAYAPI_H
+
+namespace Eigen {
+
+/** \returns an expression of the coefficient-wise i0(\a x) to the given
+ * arrays.
+ *
+ * It returns the modified Bessel function of the first kind of order zero.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of i0(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::bessel_i0()
+ */
+template <typename Derived>
+EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_i0_op<typename Derived::Scalar>, const Derived>
+bessel_i0(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_i0_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise i0e(\a x) to the given
+ * arrays.
+ *
+ * It returns the exponentially scaled modified Bessel
+ * function of the first kind of order zero.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of i0e(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::bessel_i0e()
+ */
+template <typename Derived>
+EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_i0e_op<typename Derived::Scalar>, const Derived>
+bessel_i0e(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_i0e_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise i1(\a x) to the given
+ * arrays.
+ *
+ * It returns the modified Bessel function of the first kind of order one.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of i1(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::bessel_i1()
+ */
+template <typename Derived>
+EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_i1_op<typename Derived::Scalar>, const Derived>
+bessel_i1(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_i1_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise i1e(\a x) to the given
+ * arrays.
+ *
+ * It returns the exponentially scaled modified Bessel
+ * function of the first kind of order one.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of i1e(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::bessel_i1e()
+ */
+template <typename Derived>
+EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_i1e_op<typename Derived::Scalar>, const Derived>
+bessel_i1e(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_i1e_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise k0(\a x) to the given
+ * arrays.
+ *
+ * It returns the modified Bessel function of the second kind of order zero.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of k0(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::bessel_k0()
+ */
+template <typename Derived>
+EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_k0_op<typename Derived::Scalar>, const Derived>
+bessel_k0(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_k0_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise k0e(\a x) to the given
+ * arrays.
+ *
+ * It returns the exponentially scaled modified Bessel
+ * function of the second kind of order zero.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of k0e(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::bessel_k0e()
+ */
+template <typename Derived>
+EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_k0e_op<typename Derived::Scalar>, const Derived>
+bessel_k0e(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_k0e_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise k1(\a x) to the given
+ * arrays.
+ *
+ * It returns the modified Bessel function of the second kind of order one.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of k1(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::bessel_k1()
+ */
+template <typename Derived>
+EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_k1_op<typename Derived::Scalar>, const Derived>
+bessel_k1(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_k1_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise k1e(\a x) to the given
+ * arrays.
+ *
+ * It returns the exponentially scaled modified Bessel
+ * function of the second kind of order one.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of k1e(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::bessel_k1e()
+ */
+template <typename Derived>
+EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_k1e_op<typename Derived::Scalar>, const Derived>
+bessel_k1e(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_k1e_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise j0(\a x) to the given
+ * arrays.
+ *
+ * It returns the Bessel function of the first kind of order zero.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of j0(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::bessel_j0()
+ */
+template <typename Derived>
+EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_j0_op<typename Derived::Scalar>, const Derived>
+bessel_j0(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_j0_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise y0(\a x) to the given
+ * arrays.
+ *
+ * It returns the Bessel function of the second kind of order zero.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of y0(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::bessel_y0()
+ */
+template <typename Derived>
+EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_y0_op<typename Derived::Scalar>, const Derived>
+bessel_y0(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_y0_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise j1(\a x) to the given
+ * arrays.
+ *
+ * It returns the modified Bessel function of the first kind of order one.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of j1(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::bessel_j1()
+ */
+template <typename Derived>
+EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_j1_op<typename Derived::Scalar>, const Derived>
+bessel_j1(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_j1_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+/** \returns an expression of the coefficient-wise y1(\a x) to the given
+ * arrays.
+ *
+ * It returns the Bessel function of the second kind of order one.
+ *
+ * \param x is the argument
+ *
+ * \note This function supports only float and double scalar types. To support
+ * other scalar types, the user has to provide implementations of y1(T) for
+ * any scalar type T to be supported.
+ *
+ * \sa ArrayBase::bessel_y1()
+ */
+template <typename Derived>
+EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_y1_op<typename Derived::Scalar>, const Derived>
+bessel_y1(const Eigen::ArrayBase<Derived>& x) {
+ return Eigen::CwiseUnaryOp<
+ Eigen::internal::scalar_bessel_y1_op<typename Derived::Scalar>,
+ const Derived>(x.derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_BESSELFUNCTIONS_ARRAYAPI_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h
new file mode 100644
index 000000000..6049cc2fe
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsBFloat16.h
@@ -0,0 +1,68 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BESSELFUNCTIONS_BFLOAT16_H
+#define EIGEN_BESSELFUNCTIONS_BFLOAT16_H
+
+namespace Eigen {
+namespace numext {
+
+#if EIGEN_HAS_C99_MATH
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_i0(const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::bessel_i0(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_i0e(const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::bessel_i0e(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_i1(const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::bessel_i1(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_i1e(const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::bessel_i1e(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_j0(const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::bessel_j0(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_j1(const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::bessel_j1(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_y0(const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::bessel_y0(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_y1(const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::bessel_y1(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_k0(const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::bessel_k0(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_k0e(const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::bessel_k0e(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_k1(const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::bessel_k1(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bessel_k1e(const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::bessel_k1e(static_cast<float>(x)));
+}
+#endif
+
+} // end namespace numext
+} // end namespace Eigen
+
+#endif // EIGEN_BESSELFUNCTIONS_BFLOAT16_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h
new file mode 100644
index 000000000..8606a9f8e
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsFunctors.h
@@ -0,0 +1,357 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BESSELFUNCTIONS_FUNCTORS_H
+#define EIGEN_BESSELFUNCTIONS_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal
+ * \brief Template functor to compute the modified Bessel function of the first
+ * kind of order zero.
+ * \sa class CwiseUnaryOp, Cwise::bessel_i0()
+ */
+template <typename Scalar>
+struct scalar_bessel_i0_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i0_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
+ using numext::bessel_i0;
+ return bessel_i0(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return internal::pbessel_i0(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_bessel_i0_op<Scalar> > {
+ enum {
+ // On average, a Chebyshev polynomial of order N=20 is computed.
+ // The cost is N multiplications and 2N additions. We also add
+ // the cost of an additional exp over i0e.
+ Cost = 28 * NumTraits<Scalar>::MulCost + 48 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBessel
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the exponentially scaled modified Bessel
+ * function of the first kind of order zero
+ * \sa class CwiseUnaryOp, Cwise::bessel_i0e()
+ */
+template <typename Scalar>
+struct scalar_bessel_i0e_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i0e_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
+ using numext::bessel_i0e;
+ return bessel_i0e(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return internal::pbessel_i0e(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_bessel_i0e_op<Scalar> > {
+ enum {
+ // On average, a Chebyshev polynomial of order N=20 is computed.
+ // The cost is N multiplications and 2N additions.
+ Cost = 20 * NumTraits<Scalar>::MulCost + 40 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBessel
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the modified Bessel function of the first
+ * kind of order one
+ * \sa class CwiseUnaryOp, Cwise::bessel_i1()
+ */
+template <typename Scalar>
+struct scalar_bessel_i1_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i1_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
+ using numext::bessel_i1;
+ return bessel_i1(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return internal::pbessel_i1(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_bessel_i1_op<Scalar> > {
+ enum {
+ // On average, a Chebyshev polynomial of order N=20 is computed.
+ // The cost is N multiplications and 2N additions. We also add
+ // the cost of an additional exp over i1e.
+ Cost = 28 * NumTraits<Scalar>::MulCost + 48 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBessel
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the exponentially scaled modified Bessel
+ * function of the first kind of order zero
+ * \sa class CwiseUnaryOp, Cwise::bessel_i1e()
+ */
+template <typename Scalar>
+struct scalar_bessel_i1e_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_i1e_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
+ using numext::bessel_i1e;
+ return bessel_i1e(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return internal::pbessel_i1e(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_bessel_i1e_op<Scalar> > {
+ enum {
+ // On average, a Chebyshev polynomial of order N=20 is computed.
+ // The cost is N multiplications and 2N additions.
+ Cost = 20 * NumTraits<Scalar>::MulCost + 40 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBessel
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the Bessel function of the second kind of
+ * order zero
+ * \sa class CwiseUnaryOp, Cwise::bessel_j0()
+ */
+template <typename Scalar>
+struct scalar_bessel_j0_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_j0_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
+ using numext::bessel_j0;
+ return bessel_j0(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return internal::pbessel_j0(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_bessel_j0_op<Scalar> > {
+ enum {
+ // 6 polynomial of order ~N=8 is computed.
+ // The cost is N multiplications and N additions each, along with a
+ // sine, cosine and rsqrt cost.
+ Cost = 63 * NumTraits<Scalar>::MulCost + 48 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBessel
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the Bessel function of the second kind of
+ * order zero
+ * \sa class CwiseUnaryOp, Cwise::bessel_y0()
+ */
+template <typename Scalar>
+struct scalar_bessel_y0_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_y0_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
+ using numext::bessel_y0;
+ return bessel_y0(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return internal::pbessel_y0(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_bessel_y0_op<Scalar> > {
+ enum {
+ // 6 polynomial of order ~N=8 is computed.
+ // The cost is N multiplications and N additions each, along with a
+ // sine, cosine, rsqrt and j0 cost.
+ Cost = 126 * NumTraits<Scalar>::MulCost + 96 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBessel
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the Bessel function of the first kind of
+ * order one
+ * \sa class CwiseUnaryOp, Cwise::bessel_j1()
+ */
+template <typename Scalar>
+struct scalar_bessel_j1_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_j1_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
+ using numext::bessel_j1;
+ return bessel_j1(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return internal::pbessel_j1(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_bessel_j1_op<Scalar> > {
+ enum {
+ // 6 polynomial of order ~N=8 is computed.
+ // The cost is N multiplications and N additions each, along with a
+ // sine, cosine and rsqrt cost.
+ Cost = 63 * NumTraits<Scalar>::MulCost + 48 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBessel
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the Bessel function of the second kind of
+ * order one
+ * \sa class CwiseUnaryOp, Cwise::bessel_j1e()
+ */
+template <typename Scalar>
+struct scalar_bessel_y1_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_y1_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
+ using numext::bessel_y1;
+ return bessel_y1(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return internal::pbessel_y1(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_bessel_y1_op<Scalar> > {
+ enum {
+ // 6 polynomial of order ~N=8 is computed.
+ // The cost is N multiplications and N additions each, along with a
+ // sine, cosine, rsqrt and j1 cost.
+ Cost = 126 * NumTraits<Scalar>::MulCost + 96 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBessel
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the modified Bessel function of the second
+ * kind of order zero
+ * \sa class CwiseUnaryOp, Cwise::bessel_k0()
+ */
+template <typename Scalar>
+struct scalar_bessel_k0_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k0_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
+ using numext::bessel_k0;
+ return bessel_k0(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return internal::pbessel_k0(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_bessel_k0_op<Scalar> > {
+ enum {
+ // On average, a Chebyshev polynomial of order N=10 is computed.
+ // The cost is N multiplications and 2N additions. In addition we compute
+ // i0, a log, exp and prsqrt and sin and cos.
+ Cost = 68 * NumTraits<Scalar>::MulCost + 88 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBessel
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the exponentially scaled modified Bessel
+ * function of the second kind of order zero
+ * \sa class CwiseUnaryOp, Cwise::bessel_k0e()
+ */
+template <typename Scalar>
+struct scalar_bessel_k0e_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k0e_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
+ using numext::bessel_k0e;
+ return bessel_k0e(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return internal::pbessel_k0e(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_bessel_k0e_op<Scalar> > {
+ enum {
+ // On average, a Chebyshev polynomial of order N=10 is computed.
+ // The cost is N multiplications and 2N additions. In addition we compute
+ // i0, a log, exp and prsqrt and sin and cos.
+ Cost = 68 * NumTraits<Scalar>::MulCost + 88 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBessel
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the modified Bessel function of the
+ * second kind of order one
+ * \sa class CwiseUnaryOp, Cwise::bessel_k1()
+ */
+template <typename Scalar>
+struct scalar_bessel_k1_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k1_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
+ using numext::bessel_k1;
+ return bessel_k1(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return internal::pbessel_k1(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_bessel_k1_op<Scalar> > {
+ enum {
+ // On average, a Chebyshev polynomial of order N=10 is computed.
+ // The cost is N multiplications and 2N additions. In addition we compute
+ // i1, a log, exp and prsqrt and sin and cos.
+ Cost = 68 * NumTraits<Scalar>::MulCost + 88 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBessel
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the exponentially scaled modified Bessel
+ * function of the second kind of order one
+ * \sa class CwiseUnaryOp, Cwise::bessel_k1e()
+ */
+template <typename Scalar>
+struct scalar_bessel_k1e_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bessel_k1e_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
+ using numext::bessel_k1e;
+ return bessel_k1e(x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return internal::pbessel_k1e(x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_bessel_k1e_op<Scalar> > {
+ enum {
+ // On average, a Chebyshev polynomial of order N=10 is computed.
+ // The cost is N multiplications and 2N additions. In addition we compute
+ // i1, a log, exp and prsqrt and sin and cos.
+ Cost = 68 * NumTraits<Scalar>::MulCost + 88 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBessel
+ };
+};
+
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_BESSELFUNCTIONS_FUNCTORS_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h
new file mode 100644
index 000000000..8930d1a3c
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsHalf.h
@@ -0,0 +1,66 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BESSELFUNCTIONS_HALF_H
+#define EIGEN_BESSELFUNCTIONS_HALF_H
+
+namespace Eigen {
+namespace numext {
+
+#if EIGEN_HAS_C99_MATH
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_i0(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::bessel_i0(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_i0e(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::bessel_i0e(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_i1(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::bessel_i1(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_i1e(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::bessel_i1e(static_cast<float>(x)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_j0(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::bessel_j0(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_j1(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::bessel_j1(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_y0(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::bessel_y0(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_y1(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::bessel_y1(static_cast<float>(x)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_k0(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::bessel_k0(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_k0e(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::bessel_k0e(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_k1(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::bessel_k1(static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half bessel_k1e(const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::bessel_k1e(static_cast<float>(x)));
+}
+#endif
+
+} // end namespace numext
+} // end namespace Eigen
+
+#endif // EIGEN_BESSELFUNCTIONS_HALF_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h
new file mode 100644
index 000000000..24812be1b
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsImpl.h
@@ -0,0 +1,1959 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BESSEL_FUNCTIONS_H
+#define EIGEN_BESSEL_FUNCTIONS_H
+
+namespace Eigen {
+namespace internal {
+
+// Parts of this code are based on the Cephes Math Library.
+//
+// Cephes Math Library Release 2.8: June, 2000
+// Copyright 1984, 1987, 1992, 2000 by Stephen L. Moshier
+//
+// Permission has been kindly provided by the original author
+// to incorporate the Cephes software into the Eigen codebase:
+//
+// From: Stephen Moshier
+// To: Eugene Brevdo
+// Subject: Re: Permission to wrap several cephes functions in Eigen
+//
+// Hello Eugene,
+//
+// Thank you for writing.
+//
+// If your licensing is similar to BSD, the formal way that has been
+// handled is simply to add a statement to the effect that you are incorporating
+// the Cephes software by permission of the author.
+//
+// Good luck with your project,
+// Steve
+
+
+/****************************************************************************
+ * Implementation of Bessel function, based on Cephes *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct bessel_i0e_retval {
+ typedef Scalar type;
+};
+
+template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
+struct generic_i0e {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T&) {
+ EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return ScalarType(0);
+ }
+};
+
+template <typename T>
+struct generic_i0e<T, float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* i0ef.c
+ *
+ * Modified Bessel function of order zero,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, i0ef();
+ *
+ * y = i0ef( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of order zero of the argument.
+ *
+ * The function is defined as i0e(x) = exp(-|x|) j0( ix ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 100000 3.7e-7 7.0e-8
+ * See i0f().
+ *
+ */
+
+ const float A[] = {-1.30002500998624804212E-8f, 6.04699502254191894932E-8f,
+ -2.67079385394061173391E-7f, 1.11738753912010371815E-6f,
+ -4.41673835845875056359E-6f, 1.64484480707288970893E-5f,
+ -5.75419501008210370398E-5f, 1.88502885095841655729E-4f,
+ -5.76375574538582365885E-4f, 1.63947561694133579842E-3f,
+ -4.32430999505057594430E-3f, 1.05464603945949983183E-2f,
+ -2.37374148058994688156E-2f, 4.93052842396707084878E-2f,
+ -9.49010970480476444210E-2f, 1.71620901522208775349E-1f,
+ -3.04682672343198398683E-1f, 6.76795274409476084995E-1f};
+
+ const float B[] = {3.39623202570838634515E-9f, 2.26666899049817806459E-8f,
+ 2.04891858946906374183E-7f, 2.89137052083475648297E-6f,
+ 6.88975834691682398426E-5f, 3.36911647825569408990E-3f,
+ 8.04490411014108831608E-1f};
+ T y = pabs(x);
+ T y_le_eight = internal::pchebevl<T, 18>::run(
+ pmadd(pset1<T>(0.5f), y, pset1<T>(-2.0f)), A);
+ T y_gt_eight = pmul(
+ internal::pchebevl<T, 7>::run(
+ psub(pdiv(pset1<T>(32.0f), y), pset1<T>(2.0f)), B),
+ prsqrt(y));
+ // TODO: Perhaps instead check whether all packet elements are in
+ // [-8, 8] and evaluate a branch based off of that. It's possible
+ // in practice most elements are in this region.
+ return pselect(pcmp_le(y, pset1<T>(8.0f)), y_le_eight, y_gt_eight);
+ }
+};
+
+template <typename T>
+struct generic_i0e<T, double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* i0e.c
+ *
+ * Modified Bessel function of order zero,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, i0e();
+ *
+ * y = i0e( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of order zero of the argument.
+ *
+ * The function is defined as i0e(x) = exp(-|x|) j0( ix ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 30000 5.4e-16 1.2e-16
+ * See i0().
+ *
+ */
+
+ const double A[] = {-4.41534164647933937950E-18, 3.33079451882223809783E-17,
+ -2.43127984654795469359E-16, 1.71539128555513303061E-15,
+ -1.16853328779934516808E-14, 7.67618549860493561688E-14,
+ -4.85644678311192946090E-13, 2.95505266312963983461E-12,
+ -1.72682629144155570723E-11, 9.67580903537323691224E-11,
+ -5.18979560163526290666E-10, 2.65982372468238665035E-9,
+ -1.30002500998624804212E-8, 6.04699502254191894932E-8,
+ -2.67079385394061173391E-7, 1.11738753912010371815E-6,
+ -4.41673835845875056359E-6, 1.64484480707288970893E-5,
+ -5.75419501008210370398E-5, 1.88502885095841655729E-4,
+ -5.76375574538582365885E-4, 1.63947561694133579842E-3,
+ -4.32430999505057594430E-3, 1.05464603945949983183E-2,
+ -2.37374148058994688156E-2, 4.93052842396707084878E-2,
+ -9.49010970480476444210E-2, 1.71620901522208775349E-1,
+ -3.04682672343198398683E-1, 6.76795274409476084995E-1};
+ const double B[] = {
+ -7.23318048787475395456E-18, -4.83050448594418207126E-18,
+ 4.46562142029675999901E-17, 3.46122286769746109310E-17,
+ -2.82762398051658348494E-16, -3.42548561967721913462E-16,
+ 1.77256013305652638360E-15, 3.81168066935262242075E-15,
+ -9.55484669882830764870E-15, -4.15056934728722208663E-14,
+ 1.54008621752140982691E-14, 3.85277838274214270114E-13,
+ 7.18012445138366623367E-13, -1.79417853150680611778E-12,
+ -1.32158118404477131188E-11, -3.14991652796324136454E-11,
+ 1.18891471078464383424E-11, 4.94060238822496958910E-10,
+ 3.39623202570838634515E-9, 2.26666899049817806459E-8,
+ 2.04891858946906374183E-7, 2.89137052083475648297E-6,
+ 6.88975834691682398426E-5, 3.36911647825569408990E-3,
+ 8.04490411014108831608E-1};
+ T y = pabs(x);
+ T y_le_eight = internal::pchebevl<T, 30>::run(
+ pmadd(pset1<T>(0.5), y, pset1<T>(-2.0)), A);
+ T y_gt_eight = pmul(
+ internal::pchebevl<T, 25>::run(
+ psub(pdiv(pset1<T>(32.0), y), pset1<T>(2.0)), B),
+ prsqrt(y));
+ // TODO: Perhaps instead check whether all packet elements are in
+ // [-8, 8] and evaluate a branch based off of that. It's possible
+ // in practice most elements are in this region.
+ return pselect(pcmp_le(y, pset1<T>(8.0)), y_le_eight, y_gt_eight);
+ }
+};
+
+template <typename T>
+struct bessel_i0e_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_i0e<T>::run(x);
+ }
+};
+
+template <typename Scalar>
+struct bessel_i0_retval {
+ typedef Scalar type;
+};
+
+template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
+struct generic_i0 {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ return pmul(
+ pexp(pabs(x)),
+ generic_i0e<T, ScalarType>::run(x));
+ }
+};
+
+template <typename T>
+struct bessel_i0_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_i0<T>::run(x);
+ }
+};
+
+template <typename Scalar>
+struct bessel_i1e_retval {
+ typedef Scalar type;
+};
+
+template <typename T, typename ScalarType = typename unpacket_traits<T>::type >
+struct generic_i1e {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T&) {
+ EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return ScalarType(0);
+ }
+};
+
+template <typename T>
+struct generic_i1e<T, float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* i1ef.c
+ *
+ * Modified Bessel function of order one,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, i1ef();
+ *
+ * y = i1ef( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of order one of the argument.
+ *
+ * The function is defined as i1(x) = -i exp(-|x|) j1( ix ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 1.5e-6 1.5e-7
+ * See i1().
+ *
+ */
+ const float A[] = {9.38153738649577178388E-9f, -4.44505912879632808065E-8f,
+ 2.00329475355213526229E-7f, -8.56872026469545474066E-7f,
+ 3.47025130813767847674E-6f, -1.32731636560394358279E-5f,
+ 4.78156510755005422638E-5f, -1.61760815825896745588E-4f,
+ 5.12285956168575772895E-4f, -1.51357245063125314899E-3f,
+ 4.15642294431288815669E-3f, -1.05640848946261981558E-2f,
+ 2.47264490306265168283E-2f, -5.29459812080949914269E-2f,
+ 1.02643658689847095384E-1f, -1.76416518357834055153E-1f,
+ 2.52587186443633654823E-1f};
+
+ const float B[] = {-3.83538038596423702205E-9f, -2.63146884688951950684E-8f,
+ -2.51223623787020892529E-7f, -3.88256480887769039346E-6f,
+ -1.10588938762623716291E-4f, -9.76109749136146840777E-3f,
+ 7.78576235018280120474E-1f};
+
+
+ T y = pabs(x);
+ T y_le_eight = pmul(y, internal::pchebevl<T, 17>::run(
+ pmadd(pset1<T>(0.5f), y, pset1<T>(-2.0f)), A));
+ T y_gt_eight = pmul(
+ internal::pchebevl<T, 7>::run(
+ psub(pdiv(pset1<T>(32.0f), y),
+ pset1<T>(2.0f)), B),
+ prsqrt(y));
+ // TODO: Perhaps instead check whether all packet elements are in
+ // [-8, 8] and evaluate a branch based off of that. It's possible
+ // in practice most elements are in this region.
+ y = pselect(pcmp_le(y, pset1<T>(8.0f)), y_le_eight, y_gt_eight);
+ return pselect(pcmp_lt(x, pset1<T>(0.0f)), pnegate(y), y);
+ }
+};
+
+template <typename T>
+struct generic_i1e<T, double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* i1e.c
+ *
+ * Modified Bessel function of order one,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, i1e();
+ *
+ * y = i1e( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of order one of the argument.
+ *
+ * The function is defined as i1(x) = -i exp(-|x|) j1( ix ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 2.0e-15 2.0e-16
+ * See i1().
+ *
+ */
+ const double A[] = {2.77791411276104639959E-18, -2.11142121435816608115E-17,
+ 1.55363195773620046921E-16, -1.10559694773538630805E-15,
+ 7.60068429473540693410E-15, -5.04218550472791168711E-14,
+ 3.22379336594557470981E-13, -1.98397439776494371520E-12,
+ 1.17361862988909016308E-11, -6.66348972350202774223E-11,
+ 3.62559028155211703701E-10, -1.88724975172282928790E-9,
+ 9.38153738649577178388E-9, -4.44505912879632808065E-8,
+ 2.00329475355213526229E-7, -8.56872026469545474066E-7,
+ 3.47025130813767847674E-6, -1.32731636560394358279E-5,
+ 4.78156510755005422638E-5, -1.61760815825896745588E-4,
+ 5.12285956168575772895E-4, -1.51357245063125314899E-3,
+ 4.15642294431288815669E-3, -1.05640848946261981558E-2,
+ 2.47264490306265168283E-2, -5.29459812080949914269E-2,
+ 1.02643658689847095384E-1, -1.76416518357834055153E-1,
+ 2.52587186443633654823E-1};
+ const double B[] = {
+ 7.51729631084210481353E-18, 4.41434832307170791151E-18,
+ -4.65030536848935832153E-17, -3.20952592199342395980E-17,
+ 2.96262899764595013876E-16, 3.30820231092092828324E-16,
+ -1.88035477551078244854E-15, -3.81440307243700780478E-15,
+ 1.04202769841288027642E-14, 4.27244001671195135429E-14,
+ -2.10154184277266431302E-14, -4.08355111109219731823E-13,
+ -7.19855177624590851209E-13, 2.03562854414708950722E-12,
+ 1.41258074366137813316E-11, 3.25260358301548823856E-11,
+ -1.89749581235054123450E-11, -5.58974346219658380687E-10,
+ -3.83538038596423702205E-9, -2.63146884688951950684E-8,
+ -2.51223623787020892529E-7, -3.88256480887769039346E-6,
+ -1.10588938762623716291E-4, -9.76109749136146840777E-3,
+ 7.78576235018280120474E-1};
+ T y = pabs(x);
+ T y_le_eight = pmul(y, internal::pchebevl<T, 29>::run(
+ pmadd(pset1<T>(0.5), y, pset1<T>(-2.0)), A));
+ T y_gt_eight = pmul(
+ internal::pchebevl<T, 25>::run(
+ psub(pdiv(pset1<T>(32.0), y),
+ pset1<T>(2.0)), B),
+ prsqrt(y));
+ // TODO: Perhaps instead check whether all packet elements are in
+ // [-8, 8] and evaluate a branch based off of that. It's possible
+ // in practice most elements are in this region.
+ y = pselect(pcmp_le(y, pset1<T>(8.0)), y_le_eight, y_gt_eight);
+ return pselect(pcmp_lt(x, pset1<T>(0.0)), pnegate(y), y);
+ }
+};
+
+template <typename T>
+struct bessel_i1e_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_i1e<T>::run(x);
+ }
+};
+
+template <typename T>
+struct bessel_i1_retval {
+ typedef T type;
+};
+
+template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
+struct generic_i1 {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ return pmul(
+ pexp(pabs(x)),
+ generic_i1e<T, ScalarType>::run(x));
+ }
+};
+
+template <typename T>
+struct bessel_i1_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_i1<T>::run(x);
+ }
+};
+
+template <typename T>
+struct bessel_k0e_retval {
+ typedef T type;
+};
+
+template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
+struct generic_k0e {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T&) {
+ EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return ScalarType(0);
+ }
+};
+
+template <typename T>
+struct generic_k0e<T, float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* k0ef.c
+ * Modified Bessel function, third kind, order zero,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, k0ef();
+ *
+ * y = k0ef( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of the third kind of order zero of the argument.
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 8.1e-7 7.8e-8
+ * See k0().
+ *
+ */
+
+ const float A[] = {1.90451637722020886025E-9f, 2.53479107902614945675E-7f,
+ 2.28621210311945178607E-5f, 1.26461541144692592338E-3f,
+ 3.59799365153615016266E-2f, 3.44289899924628486886E-1f,
+ -5.35327393233902768720E-1f};
+
+ const float B[] = {-1.69753450938905987466E-9f, 8.57403401741422608519E-9f,
+ -4.66048989768794782956E-8f, 2.76681363944501510342E-7f,
+ -1.83175552271911948767E-6f, 1.39498137188764993662E-5f,
+ -1.28495495816278026384E-4f, 1.56988388573005337491E-3f,
+ -3.14481013119645005427E-2f, 2.44030308206595545468E0f};
+ const T MAXNUM = pset1<T>(NumTraits<float>::infinity());
+ const T two = pset1<T>(2.0);
+ T x_le_two = internal::pchebevl<T, 7>::run(
+ pmadd(x, x, pset1<T>(-2.0)), A);
+ x_le_two = pmadd(
+ generic_i0<T, float>::run(x), pnegate(
+ plog(pmul(pset1<T>(0.5), x))), x_le_two);
+ x_le_two = pmul(pexp(x), x_le_two);
+ T x_gt_two = pmul(
+ internal::pchebevl<T, 10>::run(
+ psub(pdiv(pset1<T>(8.0), x), two), B),
+ prsqrt(x));
+ return pselect(
+ pcmp_le(x, pset1<T>(0.0)),
+ MAXNUM,
+ pselect(pcmp_le(x, two), x_le_two, x_gt_two));
+ }
+};
+
+template <typename T>
+struct generic_k0e<T, double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* k0e.c
+ * Modified Bessel function, third kind, order zero,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, k0e();
+ *
+ * y = k0e( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of the third kind of order zero of the argument.
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 1.4e-15 1.4e-16
+ * See k0().
+ *
+ */
+
+ const double A[] = {
+ 1.37446543561352307156E-16,
+ 4.25981614279661018399E-14,
+ 1.03496952576338420167E-11,
+ 1.90451637722020886025E-9,
+ 2.53479107902614945675E-7,
+ 2.28621210311945178607E-5,
+ 1.26461541144692592338E-3,
+ 3.59799365153615016266E-2,
+ 3.44289899924628486886E-1,
+ -5.35327393233902768720E-1};
+ const double B[] = {
+ 5.30043377268626276149E-18, -1.64758043015242134646E-17,
+ 5.21039150503902756861E-17, -1.67823109680541210385E-16,
+ 5.51205597852431940784E-16, -1.84859337734377901440E-15,
+ 6.34007647740507060557E-15, -2.22751332699166985548E-14,
+ 8.03289077536357521100E-14, -2.98009692317273043925E-13,
+ 1.14034058820847496303E-12, -4.51459788337394416547E-12,
+ 1.85594911495471785253E-11, -7.95748924447710747776E-11,
+ 3.57739728140030116597E-10, -1.69753450938905987466E-9,
+ 8.57403401741422608519E-9, -4.66048989768794782956E-8,
+ 2.76681363944501510342E-7, -1.83175552271911948767E-6,
+ 1.39498137188764993662E-5, -1.28495495816278026384E-4,
+ 1.56988388573005337491E-3, -3.14481013119645005427E-2,
+ 2.44030308206595545468E0
+ };
+ const T MAXNUM = pset1<T>(NumTraits<double>::infinity());
+ const T two = pset1<T>(2.0);
+ T x_le_two = internal::pchebevl<T, 10>::run(
+ pmadd(x, x, pset1<T>(-2.0)), A);
+ x_le_two = pmadd(
+ generic_i0<T, double>::run(x), pmul(
+ pset1<T>(-1.0), plog(pmul(pset1<T>(0.5), x))), x_le_two);
+ x_le_two = pmul(pexp(x), x_le_two);
+ x_le_two = pselect(pcmp_le(x, pset1<T>(0.0)), MAXNUM, x_le_two);
+ T x_gt_two = pmul(
+ internal::pchebevl<T, 25>::run(
+ psub(pdiv(pset1<T>(8.0), x), two), B),
+ prsqrt(x));
+ return pselect(pcmp_le(x, two), x_le_two, x_gt_two);
+ }
+};
+
+template <typename T>
+struct bessel_k0e_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_k0e<T>::run(x);
+ }
+};
+
+template <typename T>
+struct bessel_k0_retval {
+ typedef T type;
+};
+
+template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
+struct generic_k0 {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T&) {
+ EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return ScalarType(0);
+ }
+};
+
+template <typename T>
+struct generic_k0<T, float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* k0f.c
+ * Modified Bessel function, third kind, order zero
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, k0f();
+ *
+ * y = k0f( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns modified Bessel function of the third kind
+ * of order zero of the argument.
+ *
+ * The range is partitioned into the two intervals [0,8] and
+ * (8, infinity). Chebyshev polynomial expansions are employed
+ * in each interval.
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Tested at 2000 random points between 0 and 8. Peak absolute
+ * error (relative when K0 > 1) was 1.46e-14; rms, 4.26e-15.
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 7.8e-7 8.5e-8
+ *
+ * ERROR MESSAGES:
+ *
+ * message condition value returned
+ * K0 domain x <= 0 MAXNUM
+ *
+ */
+
+ const float A[] = {1.90451637722020886025E-9f, 2.53479107902614945675E-7f,
+ 2.28621210311945178607E-5f, 1.26461541144692592338E-3f,
+ 3.59799365153615016266E-2f, 3.44289899924628486886E-1f,
+ -5.35327393233902768720E-1f};
+
+ const float B[] = {-1.69753450938905987466E-9f, 8.57403401741422608519E-9f,
+ -4.66048989768794782956E-8f, 2.76681363944501510342E-7f,
+ -1.83175552271911948767E-6f, 1.39498137188764993662E-5f,
+ -1.28495495816278026384E-4f, 1.56988388573005337491E-3f,
+ -3.14481013119645005427E-2f, 2.44030308206595545468E0f};
+ const T MAXNUM = pset1<T>(NumTraits<float>::infinity());
+ const T two = pset1<T>(2.0);
+ T x_le_two = internal::pchebevl<T, 7>::run(
+ pmadd(x, x, pset1<T>(-2.0)), A);
+ x_le_two = pmadd(
+ generic_i0<T, float>::run(x), pnegate(
+ plog(pmul(pset1<T>(0.5), x))), x_le_two);
+ x_le_two = pselect(pcmp_le(x, pset1<T>(0.0)), MAXNUM, x_le_two);
+ T x_gt_two = pmul(
+ pmul(
+ pexp(pnegate(x)),
+ internal::pchebevl<T, 10>::run(
+ psub(pdiv(pset1<T>(8.0), x), two), B)),
+ prsqrt(x));
+ return pselect(pcmp_le(x, two), x_le_two, x_gt_two);
+ }
+};
+
+template <typename T>
+struct generic_k0<T, double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /*
+ *
+ * Modified Bessel function, third kind, order zero,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, k0();
+ *
+ * y = k0( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of the third kind of order zero of the argument.
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 1.4e-15 1.4e-16
+ * See k0().
+ *
+ */
+ const double A[] = {
+ 1.37446543561352307156E-16,
+ 4.25981614279661018399E-14,
+ 1.03496952576338420167E-11,
+ 1.90451637722020886025E-9,
+ 2.53479107902614945675E-7,
+ 2.28621210311945178607E-5,
+ 1.26461541144692592338E-3,
+ 3.59799365153615016266E-2,
+ 3.44289899924628486886E-1,
+ -5.35327393233902768720E-1};
+ const double B[] = {
+ 5.30043377268626276149E-18, -1.64758043015242134646E-17,
+ 5.21039150503902756861E-17, -1.67823109680541210385E-16,
+ 5.51205597852431940784E-16, -1.84859337734377901440E-15,
+ 6.34007647740507060557E-15, -2.22751332699166985548E-14,
+ 8.03289077536357521100E-14, -2.98009692317273043925E-13,
+ 1.14034058820847496303E-12, -4.51459788337394416547E-12,
+ 1.85594911495471785253E-11, -7.95748924447710747776E-11,
+ 3.57739728140030116597E-10, -1.69753450938905987466E-9,
+ 8.57403401741422608519E-9, -4.66048989768794782956E-8,
+ 2.76681363944501510342E-7, -1.83175552271911948767E-6,
+ 1.39498137188764993662E-5, -1.28495495816278026384E-4,
+ 1.56988388573005337491E-3, -3.14481013119645005427E-2,
+ 2.44030308206595545468E0
+ };
+ const T MAXNUM = pset1<T>(NumTraits<double>::infinity());
+ const T two = pset1<T>(2.0);
+ T x_le_two = internal::pchebevl<T, 10>::run(
+ pmadd(x, x, pset1<T>(-2.0)), A);
+ x_le_two = pmadd(
+ generic_i0<T, double>::run(x), pnegate(
+ plog(pmul(pset1<T>(0.5), x))), x_le_two);
+ x_le_two = pselect(pcmp_le(x, pset1<T>(0.0)), MAXNUM, x_le_two);
+ T x_gt_two = pmul(
+ pmul(
+ pexp(-x),
+ internal::pchebevl<T, 25>::run(
+ psub(pdiv(pset1<T>(8.0), x), two), B)),
+ prsqrt(x));
+ return pselect(pcmp_le(x, two), x_le_two, x_gt_two);
+ }
+};
+
+template <typename T>
+struct bessel_k0_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_k0<T>::run(x);
+ }
+};
+
+template <typename T>
+struct bessel_k1e_retval {
+ typedef T type;
+};
+
+template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
+struct generic_k1e {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T&) {
+ EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return ScalarType(0);
+ }
+};
+
+template <typename T>
+struct generic_k1e<T, float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* k1ef.c
+ *
+ * Modified Bessel function, third kind, order one,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, k1ef();
+ *
+ * y = k1ef( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of the third kind of order one of the argument:
+ *
+ * k1e(x) = exp(x) * k1(x).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 4.9e-7 6.7e-8
+ * See k1().
+ *
+ */
+
+ const float A[] = {-2.21338763073472585583E-8f, -2.43340614156596823496E-6f,
+ -1.73028895751305206302E-4f, -6.97572385963986435018E-3f,
+ -1.22611180822657148235E-1f, -3.53155960776544875667E-1f,
+ 1.52530022733894777053E0f};
+ const float B[] = {2.01504975519703286596E-9f, -1.03457624656780970260E-8f,
+ 5.74108412545004946722E-8f, -3.50196060308781257119E-7f,
+ 2.40648494783721712015E-6f, -1.93619797416608296024E-5f,
+ 1.95215518471351631108E-4f, -2.85781685962277938680E-3f,
+ 1.03923736576817238437E-1f, 2.72062619048444266945E0f};
+ const T MAXNUM = pset1<T>(NumTraits<float>::infinity());
+ const T two = pset1<T>(2.0);
+ T x_le_two = pdiv(internal::pchebevl<T, 7>::run(
+ pmadd(x, x, pset1<T>(-2.0)), A), x);
+ x_le_two = pmadd(
+ generic_i1<T, float>::run(x), plog(pmul(pset1<T>(0.5), x)), x_le_two);
+ x_le_two = pmul(x_le_two, pexp(x));
+ x_le_two = pselect(pcmp_le(x, pset1<T>(0.0)), MAXNUM, x_le_two);
+ T x_gt_two = pmul(
+ internal::pchebevl<T, 10>::run(
+ psub(pdiv(pset1<T>(8.0), x), two), B),
+ prsqrt(x));
+ return pselect(pcmp_le(x, two), x_le_two, x_gt_two);
+ }
+};
+
+template <typename T>
+struct generic_k1e<T, double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* k1e.c
+ *
+ * Modified Bessel function, third kind, order one,
+ * exponentially scaled
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, k1e();
+ *
+ * y = k1e( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns exponentially scaled modified Bessel function
+ * of the third kind of order one of the argument:
+ *
+ * k1e(x) = exp(x) * k1(x).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 7.8e-16 1.2e-16
+ * See k1().
+ *
+ */
+ const double A[] = {-7.02386347938628759343E-18, -2.42744985051936593393E-15,
+ -6.66690169419932900609E-13, -1.41148839263352776110E-10,
+ -2.21338763073472585583E-8, -2.43340614156596823496E-6,
+ -1.73028895751305206302E-4, -6.97572385963986435018E-3,
+ -1.22611180822657148235E-1, -3.53155960776544875667E-1,
+ 1.52530022733894777053E0};
+ const double B[] = {-5.75674448366501715755E-18, 1.79405087314755922667E-17,
+ -5.68946255844285935196E-17, 1.83809354436663880070E-16,
+ -6.05704724837331885336E-16, 2.03870316562433424052E-15,
+ -7.01983709041831346144E-15, 2.47715442448130437068E-14,
+ -8.97670518232499435011E-14, 3.34841966607842919884E-13,
+ -1.28917396095102890680E-12, 5.13963967348173025100E-12,
+ -2.12996783842756842877E-11, 9.21831518760500529508E-11,
+ -4.19035475934189648750E-10, 2.01504975519703286596E-9,
+ -1.03457624656780970260E-8, 5.74108412545004946722E-8,
+ -3.50196060308781257119E-7, 2.40648494783721712015E-6,
+ -1.93619797416608296024E-5, 1.95215518471351631108E-4,
+ -2.85781685962277938680E-3, 1.03923736576817238437E-1,
+ 2.72062619048444266945E0};
+ const T MAXNUM = pset1<T>(NumTraits<double>::infinity());
+ const T two = pset1<T>(2.0);
+ T x_le_two = pdiv(internal::pchebevl<T, 11>::run(
+ pmadd(x, x, pset1<T>(-2.0)), A), x);
+ x_le_two = pmadd(
+ generic_i1<T, double>::run(x), plog(pmul(pset1<T>(0.5), x)), x_le_two);
+ x_le_two = pmul(x_le_two, pexp(x));
+ x_le_two = pselect(pcmp_le(x, pset1<T>(0.0)), MAXNUM, x_le_two);
+ T x_gt_two = pmul(
+ internal::pchebevl<T, 25>::run(
+ psub(pdiv(pset1<T>(8.0), x), two), B),
+ prsqrt(x));
+ return pselect(pcmp_le(x, two), x_le_two, x_gt_two);
+ }
+};
+
+template <typename T>
+struct bessel_k1e_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_k1e<T>::run(x);
+ }
+};
+
+template <typename T>
+struct bessel_k1_retval {
+ typedef T type;
+};
+
+template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
+struct generic_k1 {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T&) {
+ EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return ScalarType(0);
+ }
+};
+
+template <typename T>
+struct generic_k1<T, float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* k1f.c
+ * Modified Bessel function, third kind, order one
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, k1f();
+ *
+ * y = k1f( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Computes the modified Bessel function of the third kind
+ * of order one of the argument.
+ *
+ * The range is partitioned into the two intervals [0,2] and
+ * (2, infinity). Chebyshev polynomial expansions are employed
+ * in each interval.
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 4.6e-7 7.6e-8
+ *
+ * ERROR MESSAGES:
+ *
+ * message condition value returned
+ * k1 domain x <= 0 MAXNUM
+ *
+ */
+
+ const float A[] = {-2.21338763073472585583E-8f, -2.43340614156596823496E-6f,
+ -1.73028895751305206302E-4f, -6.97572385963986435018E-3f,
+ -1.22611180822657148235E-1f, -3.53155960776544875667E-1f,
+ 1.52530022733894777053E0f};
+ const float B[] = {2.01504975519703286596E-9f, -1.03457624656780970260E-8f,
+ 5.74108412545004946722E-8f, -3.50196060308781257119E-7f,
+ 2.40648494783721712015E-6f, -1.93619797416608296024E-5f,
+ 1.95215518471351631108E-4f, -2.85781685962277938680E-3f,
+ 1.03923736576817238437E-1f, 2.72062619048444266945E0f};
+ const T MAXNUM = pset1<T>(NumTraits<float>::infinity());
+ const T two = pset1<T>(2.0);
+ T x_le_two = pdiv(internal::pchebevl<T, 7>::run(
+ pmadd(x, x, pset1<T>(-2.0)), A), x);
+ x_le_two = pmadd(
+ generic_i1<T, float>::run(x), plog(pmul(pset1<T>(0.5), x)), x_le_two);
+ x_le_two = pselect(pcmp_le(x, pset1<T>(0.0)), MAXNUM, x_le_two);
+ T x_gt_two = pmul(
+ pexp(pnegate(x)),
+ pmul(
+ internal::pchebevl<T, 10>::run(
+ psub(pdiv(pset1<T>(8.0), x), two), B),
+ prsqrt(x)));
+ return pselect(pcmp_le(x, two), x_le_two, x_gt_two);
+ }
+};
+
+template <typename T>
+struct generic_k1<T, double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* k1.c
+ * Modified Bessel function, third kind, order one
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, k1f();
+ *
+ * y = k1f( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Computes the modified Bessel function of the third kind
+ * of order one of the argument.
+ *
+ * The range is partitioned into the two intervals [0,2] and
+ * (2, infinity). Chebyshev polynomial expansions are employed
+ * in each interval.
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 30 30000 4.6e-7 7.6e-8
+ *
+ * ERROR MESSAGES:
+ *
+ * message condition value returned
+ * k1 domain x <= 0 MAXNUM
+ *
+ */
+ const double A[] = {-7.02386347938628759343E-18, -2.42744985051936593393E-15,
+ -6.66690169419932900609E-13, -1.41148839263352776110E-10,
+ -2.21338763073472585583E-8, -2.43340614156596823496E-6,
+ -1.73028895751305206302E-4, -6.97572385963986435018E-3,
+ -1.22611180822657148235E-1, -3.53155960776544875667E-1,
+ 1.52530022733894777053E0};
+ const double B[] = {-5.75674448366501715755E-18, 1.79405087314755922667E-17,
+ -5.68946255844285935196E-17, 1.83809354436663880070E-16,
+ -6.05704724837331885336E-16, 2.03870316562433424052E-15,
+ -7.01983709041831346144E-15, 2.47715442448130437068E-14,
+ -8.97670518232499435011E-14, 3.34841966607842919884E-13,
+ -1.28917396095102890680E-12, 5.13963967348173025100E-12,
+ -2.12996783842756842877E-11, 9.21831518760500529508E-11,
+ -4.19035475934189648750E-10, 2.01504975519703286596E-9,
+ -1.03457624656780970260E-8, 5.74108412545004946722E-8,
+ -3.50196060308781257119E-7, 2.40648494783721712015E-6,
+ -1.93619797416608296024E-5, 1.95215518471351631108E-4,
+ -2.85781685962277938680E-3, 1.03923736576817238437E-1,
+ 2.72062619048444266945E0};
+ const T MAXNUM = pset1<T>(NumTraits<double>::infinity());
+ const T two = pset1<T>(2.0);
+ T x_le_two = pdiv(internal::pchebevl<T, 11>::run(
+ pmadd(x, x, pset1<T>(-2.0)), A), x);
+ x_le_two = pmadd(
+ generic_i1<T, double>::run(x), plog(pmul(pset1<T>(0.5), x)), x_le_two);
+ x_le_two = pselect(pcmp_le(x, pset1<T>(0.0)), MAXNUM, x_le_two);
+ T x_gt_two = pmul(
+ pexp(-x),
+ pmul(
+ internal::pchebevl<T, 25>::run(
+ psub(pdiv(pset1<T>(8.0), x), two), B),
+ prsqrt(x)));
+ return pselect(pcmp_le(x, two), x_le_two, x_gt_two);
+ }
+};
+
+template <typename T>
+struct bessel_k1_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_k1<T>::run(x);
+ }
+};
+
+template <typename T>
+struct bessel_j0_retval {
+ typedef T type;
+};
+
+template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
+struct generic_j0 {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T&) {
+ EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return ScalarType(0);
+ }
+};
+
+template <typename T>
+struct generic_j0<T, float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* j0f.c
+ * Bessel function of order zero
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, j0f();
+ *
+ * y = j0f( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns Bessel function of order zero of the argument.
+ *
+ * The domain is divided into the intervals [0, 2] and
+ * (2, infinity). In the first interval the following polynomial
+ * approximation is used:
+ *
+ *
+ * 2 2 2
+ * (w - r ) (w - r ) (w - r ) P(w)
+ * 1 2 3
+ *
+ * 2
+ * where w = x and the three r's are zeros of the function.
+ *
+ * In the second interval, the modulus and phase are approximated
+ * by polynomials of the form Modulus(x) = sqrt(1/x) Q(1/x)
+ * and Phase(x) = x + 1/x R(1/x^2) - pi/4. The function is
+ *
+ * j0(x) = Modulus(x) cos( Phase(x) ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Absolute error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 2 100000 1.3e-7 3.6e-8
+ * IEEE 2, 32 100000 1.9e-7 5.4e-8
+ *
+ */
+
+ const float JP[] = {-6.068350350393235E-008f, 6.388945720783375E-006f,
+ -3.969646342510940E-004f, 1.332913422519003E-002f,
+ -1.729150680240724E-001f};
+ const float MO[] = {-6.838999669318810E-002f, 1.864949361379502E-001f,
+ -2.145007480346739E-001f, 1.197549369473540E-001f,
+ -3.560281861530129E-003f, -4.969382655296620E-002f,
+ -3.355424622293709E-006f, 7.978845717621440E-001f};
+ const float PH[] = {3.242077816988247E+001f, -3.630592630518434E+001f,
+ 1.756221482109099E+001f, -4.974978466280903E+000f,
+ 1.001973420681837E+000f, -1.939906941791308E-001f,
+ 6.490598792654666E-002f, -1.249992184872738E-001f};
+ const T DR1 = pset1<T>(5.78318596294678452118f);
+ const T NEG_PIO4F = pset1<T>(-0.7853981633974483096f); /* -pi / 4 */
+ T y = pabs(x);
+ T z = pmul(y, y);
+ T y_le_two = pselect(
+ pcmp_lt(y, pset1<T>(1.0e-3f)),
+ pmadd(z, pset1<T>(-0.25f), pset1<T>(1.0f)),
+ pmul(psub(z, DR1), internal::ppolevl<T, 4>::run(z, JP)));
+ T q = pdiv(pset1<T>(1.0f), y);
+ T w = prsqrt(y);
+ T p = pmul(w, internal::ppolevl<T, 7>::run(q, MO));
+ w = pmul(q, q);
+ T yn = pmadd(q, internal::ppolevl<T, 7>::run(w, PH), NEG_PIO4F);
+ T y_gt_two = pmul(p, pcos(padd(yn, y)));
+ return pselect(pcmp_le(y, pset1<T>(2.0)), y_le_two, y_gt_two);
+ }
+};
+
+template <typename T>
+struct generic_j0<T, double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* j0.c
+ * Bessel function of order zero
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, j0();
+ *
+ * y = j0( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns Bessel function of order zero of the argument.
+ *
+ * The domain is divided into the intervals [0, 5] and
+ * (5, infinity). In the first interval the following rational
+ * approximation is used:
+ *
+ *
+ * 2 2
+ * (w - r ) (w - r ) P (w) / Q (w)
+ * 1 2 3 8
+ *
+ * 2
+ * where w = x and the two r's are zeros of the function.
+ *
+ * In the second interval, the Hankel asymptotic expansion
+ * is employed with two rational functions of degree 6/6
+ * and 7/7.
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Absolute error:
+ * arithmetic domain # trials peak rms
+ * DEC 0, 30 10000 4.4e-17 6.3e-18
+ * IEEE 0, 30 60000 4.2e-16 1.1e-16
+ *
+ */
+ const double PP[] = {7.96936729297347051624E-4, 8.28352392107440799803E-2,
+ 1.23953371646414299388E0, 5.44725003058768775090E0,
+ 8.74716500199817011941E0, 5.30324038235394892183E0,
+ 9.99999999999999997821E-1};
+ const double PQ[] = {9.24408810558863637013E-4, 8.56288474354474431428E-2,
+ 1.25352743901058953537E0, 5.47097740330417105182E0,
+ 8.76190883237069594232E0, 5.30605288235394617618E0,
+ 1.00000000000000000218E0};
+ const double QP[] = {-1.13663838898469149931E-2, -1.28252718670509318512E0,
+ -1.95539544257735972385E1, -9.32060152123768231369E1,
+ -1.77681167980488050595E2, -1.47077505154951170175E2,
+ -5.14105326766599330220E1, -6.05014350600728481186E0};
+ const double QQ[] = {1.00000000000000000000E0, 6.43178256118178023184E1,
+ 8.56430025976980587198E2, 3.88240183605401609683E3,
+ 7.24046774195652478189E3, 5.93072701187316984827E3,
+ 2.06209331660327847417E3, 2.42005740240291393179E2};
+ const double RP[] = {-4.79443220978201773821E9, 1.95617491946556577543E12,
+ -2.49248344360967716204E14, 9.70862251047306323952E15};
+ const double RQ[] = {1.00000000000000000000E0, 4.99563147152651017219E2,
+ 1.73785401676374683123E5, 4.84409658339962045305E7,
+ 1.11855537045356834862E10, 2.11277520115489217587E12,
+ 3.10518229857422583814E14, 3.18121955943204943306E16,
+ 1.71086294081043136091E18};
+ const T DR1 = pset1<T>(5.78318596294678452118E0);
+ const T DR2 = pset1<T>(3.04712623436620863991E1);
+ const T SQ2OPI = pset1<T>(7.9788456080286535587989E-1); /* sqrt(2 / pi) */
+ const T NEG_PIO4 = pset1<T>(-0.7853981633974483096); /* pi / 4 */
+
+ T y = pabs(x);
+ T z = pmul(y, y);
+ T y_le_five = pselect(
+ pcmp_lt(y, pset1<T>(1.0e-5)),
+ pmadd(z, pset1<T>(-0.25), pset1<T>(1.0)),
+ pmul(pmul(psub(z, DR1), psub(z, DR2)),
+ pdiv(internal::ppolevl<T, 3>::run(z, RP),
+ internal::ppolevl<T, 8>::run(z, RQ))));
+ T s = pdiv(pset1<T>(25.0), z);
+ T p = pdiv(
+ internal::ppolevl<T, 6>::run(s, PP),
+ internal::ppolevl<T, 6>::run(s, PQ));
+ T q = pdiv(
+ internal::ppolevl<T, 7>::run(s, QP),
+ internal::ppolevl<T, 7>::run(s, QQ));
+ T yn = padd(y, NEG_PIO4);
+ T w = pdiv(pset1<T>(-5.0), y);
+ p = pmadd(p, pcos(yn), pmul(w, pmul(q, psin(yn))));
+ T y_gt_five = pmul(p, pmul(SQ2OPI, prsqrt(y)));
+ return pselect(pcmp_le(y, pset1<T>(5.0)), y_le_five, y_gt_five);
+ }
+};
+
+template <typename T>
+struct bessel_j0_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_j0<T>::run(x);
+ }
+};
+
+template <typename T>
+struct bessel_y0_retval {
+ typedef T type;
+};
+
+template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
+struct generic_y0 {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T&) {
+ EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return ScalarType(0);
+ }
+};
+
+template <typename T>
+struct generic_y0<T, float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* j0f.c
+ * Bessel function of the second kind, order zero
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, y0f();
+ *
+ * y = y0f( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns Bessel function of the second kind, of order
+ * zero, of the argument.
+ *
+ * The domain is divided into the intervals [0, 2] and
+ * (2, infinity). In the first interval a rational approximation
+ * R(x) is employed to compute
+ *
+ * 2 2 2
+ * y0(x) = (w - r ) (w - r ) (w - r ) R(x) + 2/pi ln(x) j0(x).
+ * 1 2 3
+ *
+ * Thus a call to j0() is required. The three zeros are removed
+ * from R(x) to improve its numerical stability.
+ *
+ * In the second interval, the modulus and phase are approximated
+ * by polynomials of the form Modulus(x) = sqrt(1/x) Q(1/x)
+ * and Phase(x) = x + 1/x S(1/x^2) - pi/4. Then the function is
+ *
+ * y0(x) = Modulus(x) sin( Phase(x) ).
+ *
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Absolute error, when y0(x) < 1; else relative error:
+ *
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 2 100000 2.4e-7 3.4e-8
+ * IEEE 2, 32 100000 1.8e-7 5.3e-8
+ *
+ */
+
+ const float YP[] = {9.454583683980369E-008f, -9.413212653797057E-006f,
+ 5.344486707214273E-004f, -1.584289289821316E-002f,
+ 1.707584643733568E-001f};
+ const float MO[] = {-6.838999669318810E-002f, 1.864949361379502E-001f,
+ -2.145007480346739E-001f, 1.197549369473540E-001f,
+ -3.560281861530129E-003f, -4.969382655296620E-002f,
+ -3.355424622293709E-006f, 7.978845717621440E-001f};
+ const float PH[] = {3.242077816988247E+001f, -3.630592630518434E+001f,
+ 1.756221482109099E+001f, -4.974978466280903E+000f,
+ 1.001973420681837E+000f, -1.939906941791308E-001f,
+ 6.490598792654666E-002f, -1.249992184872738E-001f};
+ const T YZ1 = pset1<T>(0.43221455686510834878f);
+ const T TWOOPI = pset1<T>(0.636619772367581343075535f); /* 2 / pi */
+ const T NEG_PIO4F = pset1<T>(-0.7853981633974483096f); /* -pi / 4 */
+ const T NEG_MAXNUM = pset1<T>(-NumTraits<float>::infinity());
+ T z = pmul(x, x);
+ T x_le_two = pmul(TWOOPI, pmul(plog(x), generic_j0<T, float>::run(x)));
+ x_le_two = pmadd(
+ psub(z, YZ1), internal::ppolevl<T, 4>::run(z, YP), x_le_two);
+ x_le_two = pselect(pcmp_le(x, pset1<T>(0.0)), NEG_MAXNUM, x_le_two);
+ T q = pdiv(pset1<T>(1.0), x);
+ T w = prsqrt(x);
+ T p = pmul(w, internal::ppolevl<T, 7>::run(q, MO));
+ T u = pmul(q, q);
+ T xn = pmadd(q, internal::ppolevl<T, 7>::run(u, PH), NEG_PIO4F);
+ T x_gt_two = pmul(p, psin(padd(xn, x)));
+ return pselect(pcmp_le(x, pset1<T>(2.0)), x_le_two, x_gt_two);
+ }
+};
+
+template <typename T>
+struct generic_y0<T, double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* j0.c
+ * Bessel function of the second kind, order zero
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, y0();
+ *
+ * y = y0( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns Bessel function of the second kind, of order
+ * zero, of the argument.
+ *
+ * The domain is divided into the intervals [0, 5] and
+ * (5, infinity). In the first interval a rational approximation
+ * R(x) is employed to compute
+ * y0(x) = R(x) + 2 * log(x) * j0(x) / PI.
+ * Thus a call to j0() is required.
+ *
+ * In the second interval, the Hankel asymptotic expansion
+ * is employed with two rational functions of degree 6/6
+ * and 7/7.
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Absolute error, when y0(x) < 1; else relative error:
+ *
+ * arithmetic domain # trials peak rms
+ * DEC 0, 30 9400 7.0e-17 7.9e-18
+ * IEEE 0, 30 30000 1.3e-15 1.6e-16
+ *
+ */
+ const double PP[] = {7.96936729297347051624E-4, 8.28352392107440799803E-2,
+ 1.23953371646414299388E0, 5.44725003058768775090E0,
+ 8.74716500199817011941E0, 5.30324038235394892183E0,
+ 9.99999999999999997821E-1};
+ const double PQ[] = {9.24408810558863637013E-4, 8.56288474354474431428E-2,
+ 1.25352743901058953537E0, 5.47097740330417105182E0,
+ 8.76190883237069594232E0, 5.30605288235394617618E0,
+ 1.00000000000000000218E0};
+ const double QP[] = {-1.13663838898469149931E-2, -1.28252718670509318512E0,
+ -1.95539544257735972385E1, -9.32060152123768231369E1,
+ -1.77681167980488050595E2, -1.47077505154951170175E2,
+ -5.14105326766599330220E1, -6.05014350600728481186E0};
+ const double QQ[] = {1.00000000000000000000E0, 6.43178256118178023184E1,
+ 8.56430025976980587198E2, 3.88240183605401609683E3,
+ 7.24046774195652478189E3, 5.93072701187316984827E3,
+ 2.06209331660327847417E3, 2.42005740240291393179E2};
+ const double YP[] = {1.55924367855235737965E4, -1.46639295903971606143E7,
+ 5.43526477051876500413E9, -9.82136065717911466409E11,
+ 8.75906394395366999549E13, -3.46628303384729719441E15,
+ 4.42733268572569800351E16, -1.84950800436986690637E16};
+ const double YQ[] = {1.00000000000000000000E0, 1.04128353664259848412E3,
+ 6.26107330137134956842E5, 2.68919633393814121987E8,
+ 8.64002487103935000337E10, 2.02979612750105546709E13,
+ 3.17157752842975028269E15, 2.50596256172653059228E17};
+ const T SQ2OPI = pset1<T>(7.9788456080286535587989E-1); /* sqrt(2 / pi) */
+ const T TWOOPI = pset1<T>(0.636619772367581343075535); /* 2 / pi */
+ const T NEG_PIO4 = pset1<T>(-0.7853981633974483096); /* -pi / 4 */
+ const T NEG_MAXNUM = pset1<T>(-NumTraits<double>::infinity());
+
+ T z = pmul(x, x);
+ T x_le_five = pdiv(internal::ppolevl<T, 7>::run(z, YP),
+ internal::ppolevl<T, 7>::run(z, YQ));
+ x_le_five = pmadd(
+ pmul(TWOOPI, plog(x)), generic_j0<T, double>::run(x), x_le_five);
+ x_le_five = pselect(pcmp_le(x, pset1<T>(0.0)), NEG_MAXNUM, x_le_five);
+ T s = pdiv(pset1<T>(25.0), z);
+ T p = pdiv(
+ internal::ppolevl<T, 6>::run(s, PP),
+ internal::ppolevl<T, 6>::run(s, PQ));
+ T q = pdiv(
+ internal::ppolevl<T, 7>::run(s, QP),
+ internal::ppolevl<T, 7>::run(s, QQ));
+ T xn = padd(x, NEG_PIO4);
+ T w = pdiv(pset1<T>(5.0), x);
+ p = pmadd(p, psin(xn), pmul(w, pmul(q, pcos(xn))));
+ T x_gt_five = pmul(p, pmul(SQ2OPI, prsqrt(x)));
+ return pselect(pcmp_le(x, pset1<T>(5.0)), x_le_five, x_gt_five);
+ }
+};
+
+template <typename T>
+struct bessel_y0_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_y0<T>::run(x);
+ }
+};
+
+template <typename T>
+struct bessel_j1_retval {
+ typedef T type;
+};
+
+template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
+struct generic_j1 {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T&) {
+ EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return ScalarType(0);
+ }
+};
+
+template <typename T>
+struct generic_j1<T, float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* j1f.c
+ * Bessel function of order one
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float x, y, j1f();
+ *
+ * y = j1f( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns Bessel function of order one of the argument.
+ *
+ * The domain is divided into the intervals [0, 2] and
+ * (2, infinity). In the first interval a polynomial approximation
+ * 2
+ * (w - r ) x P(w)
+ * 1
+ * 2
+ * is used, where w = x and r is the first zero of the function.
+ *
+ * In the second interval, the modulus and phase are approximated
+ * by polynomials of the form Modulus(x) = sqrt(1/x) Q(1/x)
+ * and Phase(x) = x + 1/x R(1/x^2) - 3pi/4. The function is
+ *
+ * j0(x) = Modulus(x) cos( Phase(x) ).
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Absolute error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 2 100000 1.2e-7 2.5e-8
+ * IEEE 2, 32 100000 2.0e-7 5.3e-8
+ *
+ *
+ */
+
+ const float JP[] = {-4.878788132172128E-009f, 6.009061827883699E-007f,
+ -4.541343896997497E-005f, 1.937383947804541E-003f,
+ -3.405537384615824E-002f};
+ const float MO1[] = {6.913942741265801E-002f, -2.284801500053359E-001f,
+ 3.138238455499697E-001f, -2.102302420403875E-001f,
+ 5.435364690523026E-003f, 1.493389585089498E-001f,
+ 4.976029650847191E-006f, 7.978845453073848E-001f};
+ const float PH1[] = {-4.497014141919556E+001f, 5.073465654089319E+001f,
+ -2.485774108720340E+001f, 7.222973196770240E+000f,
+ -1.544842782180211E+000f, 3.503787691653334E-001f,
+ -1.637986776941202E-001f, 3.749989509080821E-001f};
+ const T Z1 = pset1<T>(1.46819706421238932572E1f);
+ const T NEG_THPIO4F = pset1<T>(-2.35619449019234492885f); /* -3*pi/4 */
+
+ T y = pabs(x);
+ T z = pmul(y, y);
+ T y_le_two = pmul(
+ psub(z, Z1),
+ pmul(x, internal::ppolevl<T, 4>::run(z, JP)));
+ T q = pdiv(pset1<T>(1.0f), y);
+ T w = prsqrt(y);
+ T p = pmul(w, internal::ppolevl<T, 7>::run(q, MO1));
+ w = pmul(q, q);
+ T yn = pmadd(q, internal::ppolevl<T, 7>::run(w, PH1), NEG_THPIO4F);
+ T y_gt_two = pmul(p, pcos(padd(yn, y)));
+ // j1 is an odd function. This implementation differs from cephes to
+ // take this fact in to account. Cephes returns -j1(x) for y > 2 range.
+ y_gt_two = pselect(
+ pcmp_lt(x, pset1<T>(0.0f)), pnegate(y_gt_two), y_gt_two);
+ return pselect(pcmp_le(y, pset1<T>(2.0f)), y_le_two, y_gt_two);
+ }
+};
+
+template <typename T>
+struct generic_j1<T, double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* j1.c
+ * Bessel function of order one
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, j1();
+ *
+ * y = j1( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns Bessel function of order one of the argument.
+ *
+ * The domain is divided into the intervals [0, 8] and
+ * (8, infinity). In the first interval a 24 term Chebyshev
+ * expansion is used. In the second, the asymptotic
+ * trigonometric representation is employed using two
+ * rational functions of degree 5/5.
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Absolute error:
+ * arithmetic domain # trials peak rms
+ * DEC 0, 30 10000 4.0e-17 1.1e-17
+ * IEEE 0, 30 30000 2.6e-16 1.1e-16
+ *
+ */
+ const double PP[] = {7.62125616208173112003E-4, 7.31397056940917570436E-2,
+ 1.12719608129684925192E0, 5.11207951146807644818E0,
+ 8.42404590141772420927E0, 5.21451598682361504063E0,
+ 1.00000000000000000254E0};
+ const double PQ[] = {5.71323128072548699714E-4, 6.88455908754495404082E-2,
+ 1.10514232634061696926E0, 5.07386386128601488557E0,
+ 8.39985554327604159757E0, 5.20982848682361821619E0,
+ 9.99999999999999997461E-1};
+ const double QP[] = {5.10862594750176621635E-2, 4.98213872951233449420E0,
+ 7.58238284132545283818E1, 3.66779609360150777800E2,
+ 7.10856304998926107277E2, 5.97489612400613639965E2,
+ 2.11688757100572135698E2, 2.52070205858023719784E1};
+ const double QQ[] = {1.00000000000000000000E0, 7.42373277035675149943E1,
+ 1.05644886038262816351E3, 4.98641058337653607651E3,
+ 9.56231892404756170795E3, 7.99704160447350683650E3,
+ 2.82619278517639096600E3, 3.36093607810698293419E2};
+ const double RP[] = {-8.99971225705559398224E8, 4.52228297998194034323E11,
+ -7.27494245221818276015E13, 3.68295732863852883286E15};
+ const double RQ[] = {1.00000000000000000000E0, 6.20836478118054335476E2,
+ 2.56987256757748830383E5, 8.35146791431949253037E7,
+ 2.21511595479792499675E10, 4.74914122079991414898E12,
+ 7.84369607876235854894E14, 8.95222336184627338078E16,
+ 5.32278620332680085395E18};
+ const T Z1 = pset1<T>(1.46819706421238932572E1);
+ const T Z2 = pset1<T>(4.92184563216946036703E1);
+ const T NEG_THPIO4 = pset1<T>(-2.35619449019234492885); /* -3*pi/4 */
+ const T SQ2OPI = pset1<T>(7.9788456080286535587989E-1); /* sqrt(2 / pi) */
+ T y = pabs(x);
+ T z = pmul(y, y);
+ T y_le_five = pdiv(internal::ppolevl<T, 3>::run(z, RP),
+ internal::ppolevl<T, 8>::run(z, RQ));
+ y_le_five = pmul(pmul(pmul(y_le_five, x), psub(z, Z1)), psub(z, Z2));
+ T s = pdiv(pset1<T>(25.0), z);
+ T p = pdiv(
+ internal::ppolevl<T, 6>::run(s, PP),
+ internal::ppolevl<T, 6>::run(s, PQ));
+ T q = pdiv(
+ internal::ppolevl<T, 7>::run(s, QP),
+ internal::ppolevl<T, 7>::run(s, QQ));
+ T yn = padd(y, NEG_THPIO4);
+ T w = pdiv(pset1<T>(-5.0), y);
+ p = pmadd(p, pcos(yn), pmul(w, pmul(q, psin(yn))));
+ T y_gt_five = pmul(p, pmul(SQ2OPI, prsqrt(y)));
+ // j1 is an odd function. This implementation differs from cephes to
+ // take this fact in to account. Cephes returns -j1(x) for y > 5 range.
+ y_gt_five = pselect(
+ pcmp_lt(x, pset1<T>(0.0)), pnegate(y_gt_five), y_gt_five);
+ return pselect(pcmp_le(y, pset1<T>(5.0)), y_le_five, y_gt_five);
+ }
+};
+
+template <typename T>
+struct bessel_j1_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_j1<T>::run(x);
+ }
+};
+
+template <typename T>
+struct bessel_y1_retval {
+ typedef T type;
+};
+
+template <typename T, typename ScalarType = typename unpacket_traits<T>::type>
+struct generic_y1 {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T&) {
+ EIGEN_STATIC_ASSERT((internal::is_same<T, T>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return ScalarType(0);
+ }
+};
+
+template <typename T>
+struct generic_y1<T, float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* j1f.c
+ * Bessel function of second kind of order one
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, y1();
+ *
+ * y = y1( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns Bessel function of the second kind of order one
+ * of the argument.
+ *
+ * The domain is divided into the intervals [0, 2] and
+ * (2, infinity). In the first interval a rational approximation
+ * R(x) is employed to compute
+ *
+ * 2
+ * y0(x) = (w - r ) x R(x^2) + 2/pi (ln(x) j1(x) - 1/x) .
+ * 1
+ *
+ * Thus a call to j1() is required.
+ *
+ * In the second interval, the modulus and phase are approximated
+ * by polynomials of the form Modulus(x) = sqrt(1/x) Q(1/x)
+ * and Phase(x) = x + 1/x S(1/x^2) - 3pi/4. Then the function is
+ *
+ * y0(x) = Modulus(x) sin( Phase(x) ).
+ *
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Absolute error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0, 2 100000 2.2e-7 4.6e-8
+ * IEEE 2, 32 100000 1.9e-7 5.3e-8
+ *
+ * (error criterion relative when |y1| > 1).
+ *
+ */
+
+ const float YP[] = {8.061978323326852E-009f, -9.496460629917016E-007f,
+ 6.719543806674249E-005f, -2.641785726447862E-003f,
+ 4.202369946500099E-002f};
+ const float MO1[] = {6.913942741265801E-002f, -2.284801500053359E-001f,
+ 3.138238455499697E-001f, -2.102302420403875E-001f,
+ 5.435364690523026E-003f, 1.493389585089498E-001f,
+ 4.976029650847191E-006f, 7.978845453073848E-001f};
+ const float PH1[] = {-4.497014141919556E+001f, 5.073465654089319E+001f,
+ -2.485774108720340E+001f, 7.222973196770240E+000f,
+ -1.544842782180211E+000f, 3.503787691653334E-001f,
+ -1.637986776941202E-001f, 3.749989509080821E-001f};
+ const T YO1 = pset1<T>(4.66539330185668857532f);
+ const T NEG_THPIO4F = pset1<T>(-2.35619449019234492885f); /* -3*pi/4 */
+ const T TWOOPI = pset1<T>(0.636619772367581343075535f); /* 2/pi */
+ const T NEG_MAXNUM = pset1<T>(-NumTraits<float>::infinity());
+
+ T z = pmul(x, x);
+ T x_le_two = pmul(psub(z, YO1), internal::ppolevl<T, 4>::run(z, YP));
+ x_le_two = pmadd(
+ x_le_two, x,
+ pmul(TWOOPI, pmadd(
+ generic_j1<T, float>::run(x), plog(x),
+ pdiv(pset1<T>(-1.0f), x))));
+ x_le_two = pselect(pcmp_lt(x, pset1<T>(0.0f)), NEG_MAXNUM, x_le_two);
+
+ T q = pdiv(pset1<T>(1.0), x);
+ T w = prsqrt(x);
+ T p = pmul(w, internal::ppolevl<T, 7>::run(q, MO1));
+ w = pmul(q, q);
+ T xn = pmadd(q, internal::ppolevl<T, 7>::run(w, PH1), NEG_THPIO4F);
+ T x_gt_two = pmul(p, psin(padd(xn, x)));
+ return pselect(pcmp_le(x, pset1<T>(2.0)), x_le_two, x_gt_two);
+ }
+};
+
+template <typename T>
+struct generic_y1<T, double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ /* j1.c
+ * Bessel function of second kind of order one
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, y1();
+ *
+ * y = y1( x );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns Bessel function of the second kind of order one
+ * of the argument.
+ *
+ * The domain is divided into the intervals [0, 8] and
+ * (8, infinity). In the first interval a 25 term Chebyshev
+ * expansion is used, and a call to j1() is required.
+ * In the second, the asymptotic trigonometric representation
+ * is employed using two rational functions of degree 5/5.
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Absolute error:
+ * arithmetic domain # trials peak rms
+ * DEC 0, 30 10000 8.6e-17 1.3e-17
+ * IEEE 0, 30 30000 1.0e-15 1.3e-16
+ *
+ * (error criterion relative when |y1| > 1).
+ *
+ */
+ const double PP[] = {7.62125616208173112003E-4, 7.31397056940917570436E-2,
+ 1.12719608129684925192E0, 5.11207951146807644818E0,
+ 8.42404590141772420927E0, 5.21451598682361504063E0,
+ 1.00000000000000000254E0};
+ const double PQ[] = {5.71323128072548699714E-4, 6.88455908754495404082E-2,
+ 1.10514232634061696926E0, 5.07386386128601488557E0,
+ 8.39985554327604159757E0, 5.20982848682361821619E0,
+ 9.99999999999999997461E-1};
+ const double QP[] = {5.10862594750176621635E-2, 4.98213872951233449420E0,
+ 7.58238284132545283818E1, 3.66779609360150777800E2,
+ 7.10856304998926107277E2, 5.97489612400613639965E2,
+ 2.11688757100572135698E2, 2.52070205858023719784E1};
+ const double QQ[] = {1.00000000000000000000E0, 7.42373277035675149943E1,
+ 1.05644886038262816351E3, 4.98641058337653607651E3,
+ 9.56231892404756170795E3, 7.99704160447350683650E3,
+ 2.82619278517639096600E3, 3.36093607810698293419E2};
+ const double YP[] = {1.26320474790178026440E9, -6.47355876379160291031E11,
+ 1.14509511541823727583E14, -8.12770255501325109621E15,
+ 2.02439475713594898196E17, -7.78877196265950026825E17};
+ const double YQ[] = {1.00000000000000000000E0, 5.94301592346128195359E2,
+ 2.35564092943068577943E5, 7.34811944459721705660E7,
+ 1.87601316108706159478E10, 3.88231277496238566008E12,
+ 6.20557727146953693363E14, 6.87141087355300489866E16,
+ 3.97270608116560655612E18};
+ const T SQ2OPI = pset1<T>(.79788456080286535588);
+ const T NEG_THPIO4 = pset1<T>(-2.35619449019234492885); /* -3*pi/4 */
+ const T TWOOPI = pset1<T>(0.636619772367581343075535); /* 2/pi */
+ const T NEG_MAXNUM = pset1<T>(-NumTraits<double>::infinity());
+
+ T z = pmul(x, x);
+ T x_le_five = pdiv(internal::ppolevl<T, 5>::run(z, YP),
+ internal::ppolevl<T, 8>::run(z, YQ));
+ x_le_five = pmadd(
+ x_le_five, x, pmul(
+ TWOOPI, pmadd(generic_j1<T, double>::run(x), plog(x),
+ pdiv(pset1<T>(-1.0), x))));
+
+ x_le_five = pselect(pcmp_le(x, pset1<T>(0.0)), NEG_MAXNUM, x_le_five);
+ T s = pdiv(pset1<T>(25.0), z);
+ T p = pdiv(
+ internal::ppolevl<T, 6>::run(s, PP),
+ internal::ppolevl<T, 6>::run(s, PQ));
+ T q = pdiv(
+ internal::ppolevl<T, 7>::run(s, QP),
+ internal::ppolevl<T, 7>::run(s, QQ));
+ T xn = padd(x, NEG_THPIO4);
+ T w = pdiv(pset1<T>(5.0), x);
+ p = pmadd(p, psin(xn), pmul(w, pmul(q, pcos(xn))));
+ T x_gt_five = pmul(p, pmul(SQ2OPI, prsqrt(x)));
+ return pselect(pcmp_le(x, pset1<T>(5.0)), x_le_five, x_gt_five);
+ }
+};
+
+template <typename T>
+struct bessel_y1_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE T run(const T x) {
+ return generic_y1<T>::run(x);
+ }
+};
+
+} // end namespace internal
+
+namespace numext {
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_i0, Scalar)
+ bessel_i0(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(bessel_i0, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_i0e, Scalar)
+ bessel_i0e(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(bessel_i0e, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_i1, Scalar)
+ bessel_i1(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(bessel_i1, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_i1e, Scalar)
+ bessel_i1e(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(bessel_i1e, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_k0, Scalar)
+ bessel_k0(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(bessel_k0, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_k0e, Scalar)
+ bessel_k0e(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(bessel_k0e, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_k1, Scalar)
+ bessel_k1(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(bessel_k1, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_k1e, Scalar)
+ bessel_k1e(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(bessel_k1e, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_j0, Scalar)
+ bessel_j0(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(bessel_j0, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_y0, Scalar)
+ bessel_y0(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(bessel_y0, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_j1, Scalar)
+ bessel_j1(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(bessel_j1, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(bessel_y1, Scalar)
+ bessel_y1(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(bessel_y1, Scalar)::run(x);
+}
+
+} // end namespace numext
+
+} // end namespace Eigen
+
+#endif // EIGEN_BESSEL_FUNCTIONS_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h
new file mode 100644
index 000000000..943d10f6a
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/BesselFunctionsPacketMath.h
@@ -0,0 +1,118 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_BESSELFUNCTIONS_PACKETMATH_H
+#define EIGEN_BESSELFUNCTIONS_PACKETMATH_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order zero i0(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pbessel_i0(const Packet& x) {
+ return numext::bessel_i0(x);
+}
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order zero i0e(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pbessel_i0e(const Packet& x) {
+ return numext::bessel_i0e(x);
+}
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order one i1(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pbessel_i1(const Packet& x) {
+ return numext::bessel_i1(x);
+}
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order one i1e(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pbessel_i1e(const Packet& x) {
+ return numext::bessel_i1e(x);
+}
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order zero j0(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pbessel_j0(const Packet& x) {
+ return numext::bessel_j0(x);
+}
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order zero j1(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pbessel_j1(const Packet& x) {
+ return numext::bessel_j1(x);
+}
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order one y0(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pbessel_y0(const Packet& x) {
+ return numext::bessel_y0(x);
+}
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order one y1(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pbessel_y1(const Packet& x) {
+ return numext::bessel_y1(x);
+}
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order zero k0(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pbessel_k0(const Packet& x) {
+ return numext::bessel_k0(x);
+}
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order zero k0e(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pbessel_k0e(const Packet& x) {
+ return numext::bessel_k0e(x);
+}
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order one k1e(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pbessel_k1(const Packet& x) {
+ return numext::bessel_k1(x);
+}
+
+/** \internal \returns the exponentially scaled modified Bessel function of
+ * order one k1e(\a a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pbessel_k1e(const Packet& x) {
+ return numext::bessel_k1e(x);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_BESSELFUNCTIONS_PACKETMATH_H
+
diff --git a/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h b/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h
new file mode 100644
index 000000000..d7b231adb
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/HipVectorCompatibility.h
@@ -0,0 +1,67 @@
+#ifndef HIP_VECTOR_COMPATIBILITY_H
+#define HIP_VECTOR_COMPATIBILITY_H
+
+namespace hip_impl {
+ template <typename, typename, unsigned int> struct Scalar_accessor;
+} // end namespace hip_impl
+
+namespace Eigen {
+namespace internal {
+
+#define HIP_SCALAR_ACCESSOR_BUILDER(NAME) \
+template <typename T, typename U, unsigned int n> \
+struct NAME <hip_impl::Scalar_accessor<T, U, n>> : NAME <T> {};
+
+#define HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(NAME) \
+template <typename T, typename U, unsigned int n> \
+struct NAME##_impl <hip_impl::Scalar_accessor<T, U, n>> : NAME##_impl <T> {}; \
+template <typename T, typename U, unsigned int n> \
+struct NAME##_retval <hip_impl::Scalar_accessor<T, U, n>> : NAME##_retval <T> {};
+
+#define HIP_SCALAR_ACCESSOR_BUILDER_IGAMMA(NAME) \
+template <typename T, typename U, unsigned int n, IgammaComputationMode mode> \
+struct NAME <hip_impl::Scalar_accessor<T, U, n>, mode> : NAME <T, mode> {};
+
+#if EIGEN_HAS_C99_MATH
+HIP_SCALAR_ACCESSOR_BUILDER(betainc_helper)
+HIP_SCALAR_ACCESSOR_BUILDER(incbeta_cfe)
+
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(erf)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(erfc)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(igammac)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(lgamma)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(ndtri)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(polygamma)
+
+HIP_SCALAR_ACCESSOR_BUILDER_IGAMMA(igamma_generic_impl)
+#endif
+
+HIP_SCALAR_ACCESSOR_BUILDER(digamma_impl_maybe_poly)
+HIP_SCALAR_ACCESSOR_BUILDER(zeta_impl_series)
+
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_i0)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_i0e)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_i1)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_i1e)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_j0)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_j1)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_k0)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_k0e)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_k1)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_k1e)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_y0)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(bessel_y1)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(betainc)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(digamma)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(gamma_sample_der_alpha)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(igamma_der_a)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(igamma)
+HIP_SCALAR_ACCESSOR_BUILDER_RETVAL(zeta)
+
+HIP_SCALAR_ACCESSOR_BUILDER_IGAMMA(igamma_series_impl)
+HIP_SCALAR_ACCESSOR_BUILDER_IGAMMA(igammac_cf_impl)
+
+} // end namespace internal
+} // end namespace Eigen
+
+#endif // HIP_VECTOR_COMPATIBILITY_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
index ed415db99..691ff4d03 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
@@ -24,7 +24,7 @@ namespace Eigen {
* \sa Eigen::igammac(), Eigen::lgamma()
*/
template<typename Derived,typename ExponentDerived>
-inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
+EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x)
{
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
@@ -33,6 +33,48 @@ igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerive
);
}
+/** \cpp11 \returns an expression of the coefficient-wise igamma_der_a(\a a, \a x) to the given arrays.
+ *
+ * This function computes the coefficient-wise derivative of the incomplete
+ * gamma function with respect to the parameter a.
+ *
+ * \note This function supports only float and double scalar types in c++11
+ * mode. To support other scalar types,
+ * or float/double in non c++11 mode, the user has to provide implementations
+ * of igamma_der_a(T,T) for any scalar
+ * type T to be supported.
+ *
+ * \sa Eigen::igamma(), Eigen::lgamma()
+ */
+template <typename Derived, typename ExponentDerived>
+EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_der_a_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
+igamma_der_a(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) {
+ return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_der_a_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
+ a.derived(),
+ x.derived());
+}
+
+/** \cpp11 \returns an expression of the coefficient-wise gamma_sample_der_alpha(\a alpha, \a sample) to the given arrays.
+ *
+ * This function computes the coefficient-wise derivative of the sample
+ * of a Gamma(alpha, 1) random variable with respect to the parameter alpha.
+ *
+ * \note This function supports only float and double scalar types in c++11
+ * mode. To support other scalar types,
+ * or float/double in non c++11 mode, the user has to provide implementations
+ * of gamma_sample_der_alpha(T,T) for any scalar
+ * type T to be supported.
+ *
+ * \sa Eigen::igamma(), Eigen::lgamma()
+ */
+template <typename AlphaDerived, typename SampleDerived>
+EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp<Eigen::internal::scalar_gamma_sample_der_alpha_op<typename AlphaDerived::Scalar>, const AlphaDerived, const SampleDerived>
+gamma_sample_der_alpha(const Eigen::ArrayBase<AlphaDerived>& alpha, const Eigen::ArrayBase<SampleDerived>& sample) {
+ return Eigen::CwiseBinaryOp<Eigen::internal::scalar_gamma_sample_der_alpha_op<typename AlphaDerived::Scalar>, const AlphaDerived, const SampleDerived>(
+ alpha.derived(),
+ sample.derived());
+}
+
/** \cpp11 \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays.
*
* This function computes the coefficient-wise complementary incomplete gamma function.
@@ -44,7 +86,7 @@ igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerive
* \sa Eigen::igamma(), Eigen::lgamma()
*/
template<typename Derived,typename ExponentDerived>
-inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
+EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x)
{
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
@@ -66,7 +108,7 @@ igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDeriv
// * \warning Be careful with the order of the parameters: x.polygamma(n) is equivalent to polygamma(n,x)
// * \sa ArrayBase::polygamma()
template<typename DerivedN,typename DerivedX>
-inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_polygamma_op<typename DerivedX::Scalar>, const DerivedN, const DerivedX>
+EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp<Eigen::internal::scalar_polygamma_op<typename DerivedX::Scalar>, const DerivedN, const DerivedX>
polygamma(const Eigen::ArrayBase<DerivedN>& n, const Eigen::ArrayBase<DerivedX>& x)
{
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_polygamma_op<typename DerivedX::Scalar>, const DerivedN, const DerivedX>(
@@ -86,7 +128,7 @@ polygamma(const Eigen::ArrayBase<DerivedN>& n, const Eigen::ArrayBase<DerivedX>&
* \sa Eigen::betainc(), Eigen::lgamma()
*/
template<typename ArgADerived, typename ArgBDerived, typename ArgXDerived>
-inline const Eigen::CwiseTernaryOp<Eigen::internal::scalar_betainc_op<typename ArgXDerived::Scalar>, const ArgADerived, const ArgBDerived, const ArgXDerived>
+EIGEN_STRONG_INLINE const Eigen::CwiseTernaryOp<Eigen::internal::scalar_betainc_op<typename ArgXDerived::Scalar>, const ArgADerived, const ArgBDerived, const ArgXDerived>
betainc(const Eigen::ArrayBase<ArgADerived>& a, const Eigen::ArrayBase<ArgBDerived>& b, const Eigen::ArrayBase<ArgXDerived>& x)
{
return Eigen::CwiseTernaryOp<Eigen::internal::scalar_betainc_op<typename ArgXDerived::Scalar>, const ArgADerived, const ArgBDerived, const ArgXDerived>(
@@ -101,7 +143,7 @@ betainc(const Eigen::ArrayBase<ArgADerived>& a, const Eigen::ArrayBase<ArgBDeriv
*
* It returns the Riemann zeta function of two arguments \a x and \a q:
*
- * \param x is the exposent, it must be > 1
+ * \param x is the exponent, it must be > 1
* \param q is the shift, it must be > 0
*
* \note This function supports only float and double scalar types. To support other scalar types, the user has
@@ -110,7 +152,7 @@ betainc(const Eigen::ArrayBase<ArgADerived>& a, const Eigen::ArrayBase<ArgBDeriv
* \sa ArrayBase::zeta()
*/
template<typename DerivedX,typename DerivedQ>
-inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_zeta_op<typename DerivedX::Scalar>, const DerivedX, const DerivedQ>
+EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp<Eigen::internal::scalar_zeta_op<typename DerivedX::Scalar>, const DerivedX, const DerivedQ>
zeta(const Eigen::ArrayBase<DerivedX>& x, const Eigen::ArrayBase<DerivedQ>& q)
{
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_zeta_op<typename DerivedX::Scalar>, const DerivedX, const DerivedQ>(
@@ -119,6 +161,7 @@ zeta(const Eigen::ArrayBase<DerivedX>& x, const Eigen::ArrayBase<DerivedQ>& q)
);
}
+
} // end namespace Eigen
#endif // EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h
new file mode 100644
index 000000000..2d94231f0
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsBFloat16.h
@@ -0,0 +1,58 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPECIALFUNCTIONS_BFLOAT16_H
+#define EIGEN_SPECIALFUNCTIONS_BFLOAT16_H
+
+namespace Eigen {
+namespace numext {
+
+#if EIGEN_HAS_C99_MATH
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 lgamma(const Eigen::bfloat16& a) {
+ return Eigen::bfloat16(Eigen::numext::lgamma(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 digamma(const Eigen::bfloat16& a) {
+ return Eigen::bfloat16(Eigen::numext::digamma(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 zeta(const Eigen::bfloat16& x, const Eigen::bfloat16& q) {
+ return Eigen::bfloat16(Eigen::numext::zeta(static_cast<float>(x), static_cast<float>(q)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 polygamma(const Eigen::bfloat16& n, const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::polygamma(static_cast<float>(n), static_cast<float>(x)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 erf(const Eigen::bfloat16& a) {
+ return Eigen::bfloat16(Eigen::numext::erf(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 erfc(const Eigen::bfloat16& a) {
+ return Eigen::bfloat16(Eigen::numext::erfc(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 ndtri(const Eigen::bfloat16& a) {
+ return Eigen::bfloat16(Eigen::numext::ndtri(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 igamma(const Eigen::bfloat16& a, const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 igamma_der_a(const Eigen::bfloat16& a, const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::igamma_der_a(static_cast<float>(a), static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 gamma_sample_der_alpha(const Eigen::bfloat16& alpha, const Eigen::bfloat16& sample) {
+ return Eigen::bfloat16(Eigen::numext::gamma_sample_der_alpha(static_cast<float>(alpha), static_cast<float>(sample)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 igammac(const Eigen::bfloat16& a, const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::igammac(static_cast<float>(a), static_cast<float>(x)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 betainc(const Eigen::bfloat16& a, const Eigen::bfloat16& b, const Eigen::bfloat16& x) {
+ return Eigen::bfloat16(Eigen::numext::betainc(static_cast<float>(a), static_cast<float>(b), static_cast<float>(x)));
+}
+#endif
+
+} // end namespace numext
+} // end namespace Eigen
+
+#endif // EIGEN_SPECIALFUNCTIONS_BFLOAT16_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
index d8f2363be..abefe99b7 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
@@ -41,6 +41,60 @@ struct functor_traits<scalar_igamma_op<Scalar> > {
};
};
+/** \internal
+ * \brief Template functor to compute the derivative of the incomplete gamma
+ * function igamma_der_a(a, x)
+ *
+ * \sa class CwiseBinaryOp, Cwise::igamma_der_a
+ */
+template <typename Scalar>
+struct scalar_igamma_der_a_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_der_a_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a, const Scalar& x) const {
+ using numext::igamma_der_a;
+ return igamma_der_a(a, x);
+ }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const {
+ return internal::pigamma_der_a(a, x);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_igamma_der_a_op<Scalar> > {
+ enum {
+ // 2x the cost of igamma
+ Cost = 40 * NumTraits<Scalar>::MulCost + 20 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasIGammaDerA
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the derivative of the sample
+ * of a Gamma(alpha, 1) random variable with respect to the parameter alpha
+ * gamma_sample_der_alpha(alpha, sample)
+ *
+ * \sa class CwiseBinaryOp, Cwise::gamma_sample_der_alpha
+ */
+template <typename Scalar>
+struct scalar_gamma_sample_der_alpha_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_gamma_sample_der_alpha_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& alpha, const Scalar& sample) const {
+ using numext::gamma_sample_der_alpha;
+ return gamma_sample_der_alpha(alpha, sample);
+ }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& alpha, const Packet& sample) const {
+ return internal::pgamma_sample_der_alpha(alpha, sample);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_gamma_sample_der_alpha_op<Scalar> > {
+ enum {
+ // 2x the cost of igamma, minus the lgamma cost (the lgamma cancels out)
+ Cost = 30 * NumTraits<Scalar>::MulCost + 15 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasGammaSampleDerAlpha
+ };
+};
/** \internal
* \brief Template functor to compute the complementary incomplete gamma function igammac(a, x)
@@ -101,11 +155,11 @@ struct functor_traits<scalar_betainc_op<Scalar> > {
*/
template<typename Scalar> struct scalar_lgamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
using numext::lgamma; return lgamma(a);
}
typedef typename packet_traits<Scalar>::type Packet;
- EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::plgamma(a); }
};
template<typename Scalar>
struct functor_traits<scalar_lgamma_op<Scalar> >
@@ -123,11 +177,11 @@ struct functor_traits<scalar_lgamma_op<Scalar> >
*/
template<typename Scalar> struct scalar_digamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
using numext::digamma; return digamma(a);
}
typedef typename packet_traits<Scalar>::type Packet;
- EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::pdigamma(a); }
};
template<typename Scalar>
struct functor_traits<scalar_digamma_op<Scalar> >
@@ -145,11 +199,11 @@ struct functor_traits<scalar_digamma_op<Scalar> >
*/
template<typename Scalar> struct scalar_zeta_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& q) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& x, const Scalar& q) const {
using numext::zeta; return zeta(x, q);
}
typedef typename packet_traits<Scalar>::type Packet;
- EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); }
};
template<typename Scalar>
struct functor_traits<scalar_zeta_op<Scalar> >
@@ -167,11 +221,11 @@ struct functor_traits<scalar_zeta_op<Scalar> >
*/
template<typename Scalar> struct scalar_polygamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& n, const Scalar& x) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& n, const Scalar& x) const {
using numext::polygamma; return polygamma(n, x);
}
typedef typename packet_traits<Scalar>::type Packet;
- EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); }
};
template<typename Scalar>
struct functor_traits<scalar_polygamma_op<Scalar> >
@@ -184,25 +238,40 @@ struct functor_traits<scalar_polygamma_op<Scalar> >
};
/** \internal
- * \brief Template functor to compute the Gauss error function of a
- * scalar
- * \sa class CwiseUnaryOp, Cwise::erf()
+ * \brief Template functor to compute the error function of a scalar
+ * \sa class CwiseUnaryOp, ArrayBase::erf()
*/
template<typename Scalar> struct scalar_erf_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
- using numext::erf; return erf(a);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar
+ operator()(const Scalar& a) const {
+ return numext::erf(a);
+ }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
+ return perf(x);
}
- typedef typename packet_traits<Scalar>::type Packet;
- EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perf(a); }
};
-template<typename Scalar>
-struct functor_traits<scalar_erf_op<Scalar> >
-{
+template <typename Scalar>
+struct functor_traits<scalar_erf_op<Scalar> > {
enum {
- // Guesstimate
- Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
- PacketAccess = packet_traits<Scalar>::HasErf
+ PacketAccess = packet_traits<Scalar>::HasErf,
+ Cost =
+ (PacketAccess
+#ifdef EIGEN_VECTORIZE_FMA
+ // TODO(rmlarsen): Move the FMA cost model to a central location.
+ // Haswell can issue 2 add/mul/madd per cycle.
+ // 10 pmadd, 2 pmul, 1 div, 2 other
+ ? (2 * NumTraits<Scalar>::AddCost +
+ 7 * NumTraits<Scalar>::MulCost +
+ scalar_div_cost<Scalar, packet_traits<Scalar>::HasDiv>::value)
+#else
+ ? (12 * NumTraits<Scalar>::AddCost +
+ 12 * NumTraits<Scalar>::MulCost +
+ scalar_div_cost<Scalar, packet_traits<Scalar>::HasDiv>::value)
+#endif
+ // Assume for simplicity that this is as expensive as an exp().
+ : (functor_traits<scalar_exp_op<Scalar> >::Cost))
};
};
@@ -213,11 +282,11 @@ struct functor_traits<scalar_erf_op<Scalar> >
*/
template<typename Scalar> struct scalar_erfc_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op)
- EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
using numext::erfc; return erfc(a);
}
typedef typename packet_traits<Scalar>::type Packet;
- EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perfc(a); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::perfc(a); }
};
template<typename Scalar>
struct functor_traits<scalar_erfc_op<Scalar> >
@@ -229,6 +298,31 @@ struct functor_traits<scalar_erfc_op<Scalar> >
};
};
+/** \internal
+ * \brief Template functor to compute the Inverse of the normal distribution
+ * function of a scalar
+ * \sa class CwiseUnaryOp, Cwise::ndtri()
+ */
+template<typename Scalar> struct scalar_ndtri_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_ndtri_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
+ using numext::ndtri; return ndtri(a);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::pndtri(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_ndtri_op<Scalar> >
+{
+ enum {
+ // On average, We are evaluating rational functions with degree N=9 in the
+ // numerator and denominator. This results in 2*N additions and 2*N
+ // multiplications.
+ Cost = 18 * NumTraits<Scalar>::MulCost + 18 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasNdtri
+ };
+};
+
} // end namespace internal
} // end namespace Eigen
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
index 553bcda6a..2a3a53168 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
@@ -30,9 +30,20 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::ha
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) {
return Eigen::half(Eigen::numext::erfc(static_cast<float>(a)));
}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ndtri(const Eigen::half& a) {
+ return Eigen::half(Eigen::numext::ndtri(static_cast<float>(a)));
+}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) {
return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x)));
}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma_der_a(const Eigen::half& a, const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::igamma_der_a(static_cast<float>(a), static_cast<float>(x)));
+}
+template <>
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half gamma_sample_der_alpha(const Eigen::half& alpha, const Eigen::half& sample) {
+ return Eigen::half(Eigen::numext::gamma_sample_der_alpha(static_cast<float>(alpha), static_cast<float>(sample)));
+}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) {
return Eigen::half(Eigen::numext::igammac(static_cast<float>(a), static_cast<float>(x)));
}
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index f524d7137..f1c260e29 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -36,66 +36,6 @@ namespace internal {
// Good luck with your project,
// Steve
-namespace cephes {
-
-/* polevl (modified for Eigen)
- *
- * Evaluate polynomial
- *
- *
- *
- * SYNOPSIS:
- *
- * int N;
- * Scalar x, y, coef[N+1];
- *
- * y = polevl<decltype(x), N>( x, coef);
- *
- *
- *
- * DESCRIPTION:
- *
- * Evaluates polynomial of degree N:
- *
- * 2 N
- * y = C + C x + C x +...+ C x
- * 0 1 2 N
- *
- * Coefficients are stored in reverse order:
- *
- * coef[0] = C , ..., coef[N] = C .
- * N 0
- *
- * The function p1evl() assumes that coef[N] = 1.0 and is
- * omitted from the array. Its calling arguments are
- * otherwise the same as polevl().
- *
- *
- * The Eigen implementation is templatized. For best speed, store
- * coef as a const array (constexpr), e.g.
- *
- * const double coef[] = {1.0, 2.0, 3.0, ...};
- *
- */
-template <typename Scalar, int N>
-struct polevl {
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Scalar run(const Scalar x, const Scalar coef[]) {
- EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N];
- }
-};
-
-template <typename Scalar>
-struct polevl<Scalar, 0> {
- EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Scalar run(const Scalar, const Scalar coef[]) {
- return coef[0];
- }
-};
-
-} // end namespace cephes
/****************************************************************************
* Implementation of lgamma, requires C++11/C99 *
@@ -117,13 +57,27 @@ struct lgamma_retval {
};
#if EIGEN_HAS_C99_MATH
+// Since glibc 2.19
+#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 19) || __GLIBC__>2) \
+ && (defined(_DEFAULT_SOURCE) || defined(_BSD_SOURCE) || defined(_SVID_SOURCE))
+#define EIGEN_HAS_LGAMMA_R
+#endif
+
+// Glibc versions before 2.19
+#if defined(__GLIBC__) && ((__GLIBC__==2 && __GLIBC_MINOR__ < 19) || __GLIBC__<2) \
+ && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE))
+#define EIGEN_HAS_LGAMMA_R
+#endif
+
template <>
struct lgamma_impl<float> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE float run(float x) {
-#if !defined(__CUDA_ARCH__) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
- int signgam;
- return ::lgammaf_r(x, &signgam);
+#if !defined(EIGEN_GPU_COMPILE_PHASE) && defined (EIGEN_HAS_LGAMMA_R) && !defined(__APPLE__)
+ int dummy;
+ return ::lgammaf_r(x, &dummy);
+#elif defined(SYCL_DEVICE_ONLY)
+ return cl::sycl::lgamma(x);
#else
return ::lgammaf(x);
#endif
@@ -134,14 +88,18 @@ template <>
struct lgamma_impl<double> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE double run(double x) {
-#if !defined(__CUDA_ARCH__) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
- int signgam;
- return ::lgamma_r(x, &signgam);
+#if !defined(EIGEN_GPU_COMPILE_PHASE) && defined(EIGEN_HAS_LGAMMA_R) && !defined(__APPLE__)
+ int dummy;
+ return ::lgamma_r(x, &dummy);
+#elif defined(SYCL_DEVICE_ONLY)
+ return cl::sycl::lgamma(x);
#else
return ::lgamma(x);
#endif
}
};
+
+#undef EIGEN_HAS_LGAMMA_R
#endif
/****************************************************************************
@@ -191,7 +149,7 @@ struct digamma_impl_maybe_poly<float> {
float z;
if (s < 1.0e8f) {
z = 1.0f / (s * s);
- return z * cephes::polevl<float, 3>::run(z, A);
+ return z * internal::ppolevl<float, 3>::run(z, A);
} else return 0.0f;
}
};
@@ -213,7 +171,7 @@ struct digamma_impl_maybe_poly<double> {
double z;
if (s < 1.0e17) {
z = 1.0 / (s * s);
- return z * cephes::polevl<double, 6>::run(z, A);
+ return z * internal::ppolevl<double, 6>::run(z, A);
}
else return 0.0;
}
@@ -283,7 +241,7 @@ struct digamma_impl {
Scalar p, q, nz, s, w, y;
bool negative = false;
- const Scalar maxnum = NumTraits<Scalar>::infinity();
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
const Scalar m_pi = Scalar(EIGEN_PI);
const Scalar zero = Scalar(0);
@@ -296,7 +254,7 @@ struct digamma_impl {
q = x;
p = numext::floor(q);
if (p == q) {
- return maxnum;
+ return nan;
}
/* Remove the zeros of tan(m_pi x)
* by subtracting the nearest integer from x
@@ -335,13 +293,63 @@ struct digamma_impl {
* Implementation of erf, requires C++11/C99 *
****************************************************************************/
-template <typename Scalar>
+/** \internal \returns the error function of \a a (coeff-wise)
+ Doesn't do anything fancy, just a 13/8-degree rational interpolant which
+ is accurate up to a couple of ulp in the range [-4, 4], outside of which
+ fl(erf(x)) = +/-1.
+
+ This implementation works on both scalars and Ts.
+*/
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erf_float(const T& a_x) {
+ // Clamp the inputs to the range [-4, 4] since anything outside
+ // this range is +/-1.0f in single-precision.
+ const T plus_4 = pset1<T>(4.f);
+ const T minus_4 = pset1<T>(-4.f);
+ const T x = pmax(pmin(a_x, plus_4), minus_4);
+ // The monomial coefficients of the numerator polynomial (odd).
+ const T alpha_1 = pset1<T>(-1.60960333262415e-02f);
+ const T alpha_3 = pset1<T>(-2.95459980854025e-03f);
+ const T alpha_5 = pset1<T>(-7.34990630326855e-04f);
+ const T alpha_7 = pset1<T>(-5.69250639462346e-05f);
+ const T alpha_9 = pset1<T>(-2.10102402082508e-06f);
+ const T alpha_11 = pset1<T>(2.77068142495902e-08f);
+ const T alpha_13 = pset1<T>(-2.72614225801306e-10f);
+
+ // The monomial coefficients of the denominator polynomial (even).
+ const T beta_0 = pset1<T>(-1.42647390514189e-02f);
+ const T beta_2 = pset1<T>(-7.37332916720468e-03f);
+ const T beta_4 = pset1<T>(-1.68282697438203e-03f);
+ const T beta_6 = pset1<T>(-2.13374055278905e-04f);
+ const T beta_8 = pset1<T>(-1.45660718464996e-05f);
+
+ // Since the polynomials are odd/even, we need x^2.
+ const T x2 = pmul(x, x);
+
+ // Evaluate the numerator polynomial p.
+ T p = pmadd(x2, alpha_13, alpha_11);
+ p = pmadd(x2, p, alpha_9);
+ p = pmadd(x2, p, alpha_7);
+ p = pmadd(x2, p, alpha_5);
+ p = pmadd(x2, p, alpha_3);
+ p = pmadd(x2, p, alpha_1);
+ p = pmul(x, p);
+
+ // Evaluate the denominator polynomial p.
+ T q = pmadd(x2, beta_8, beta_6);
+ q = pmadd(x2, q, beta_4);
+ q = pmadd(x2, q, beta_2);
+ q = pmadd(x2, q, beta_0);
+
+ // Divide the numerator by the denominator.
+ return pdiv(p, q);
+}
+
+template <typename T>
struct erf_impl {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
- EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
- THIS_TYPE_IS_NOT_SUPPORTED);
- return Scalar(0);
+ static EIGEN_STRONG_INLINE T run(const T& x) {
+ return generic_fast_erf_float(x);
}
};
@@ -354,13 +362,25 @@ struct erf_retval {
template <>
struct erf_impl<float> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); }
+ static EIGEN_STRONG_INLINE float run(float x) {
+#if defined(SYCL_DEVICE_ONLY)
+ return cl::sycl::erf(x);
+#else
+ return generic_fast_erf_float(x);
+#endif
+ }
};
template <>
struct erf_impl<double> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); }
+ static EIGEN_STRONG_INLINE double run(double x) {
+#if defined(SYCL_DEVICE_ONLY)
+ return cl::sycl::erf(x);
+#else
+ return ::erf(x);
+#endif
+ }
};
#endif // EIGEN_HAS_C99_MATH
@@ -387,16 +407,270 @@ struct erfc_retval {
template <>
struct erfc_impl<float> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); }
+ static EIGEN_STRONG_INLINE float run(const float x) {
+#if defined(SYCL_DEVICE_ONLY)
+ return cl::sycl::erfc(x);
+#else
+ return ::erfcf(x);
+#endif
+ }
};
template <>
struct erfc_impl<double> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); }
+ static EIGEN_STRONG_INLINE double run(const double x) {
+#if defined(SYCL_DEVICE_ONLY)
+ return cl::sycl::erfc(x);
+#else
+ return ::erfc(x);
+#endif
+ }
+};
+#endif // EIGEN_HAS_C99_MATH
+
+
+/***************************************************************************
+* Implementation of ndtri. *
+****************************************************************************/
+
+/* Inverse of Normal distribution function (modified for Eigen).
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, ndtri();
+ *
+ * x = ndtri( y );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns the argument, x, for which the area under the
+ * Gaussian probability density function (integrated from
+ * minus infinity to x) is equal to y.
+ *
+ *
+ * For small arguments 0 < y < exp(-2), the program computes
+ * z = sqrt( -2.0 * log(y) ); then the approximation is
+ * x = z - log(z)/z - (1/z) P(1/z) / Q(1/z).
+ * There are two rational functions P/Q, one for 0 < y < exp(-32)
+ * and the other for y up to exp(-2). For larger arguments,
+ * w = y - 0.5, and x/sqrt(2pi) = w + w**3 R(w**2)/S(w**2)).
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * DEC 0.125, 1 5500 9.5e-17 2.1e-17
+ * DEC 6e-39, 0.135 3500 5.7e-17 1.3e-17
+ * IEEE 0.125, 1 20000 7.2e-16 1.3e-16
+ * IEEE 3e-308, 0.135 50000 4.6e-16 9.8e-17
+ *
+ *
+ * ERROR MESSAGES:
+ *
+ * message condition value returned
+ * ndtri domain x <= 0 -MAXNUM
+ * ndtri domain x >= 1 MAXNUM
+ *
+ */
+ /*
+ Cephes Math Library Release 2.2: June, 1992
+ Copyright 1985, 1987, 1992 by Stephen L. Moshier
+ Direct inquiries to 30 Frost Street, Cambridge, MA 02140
+ */
+
+
+// TODO: Add a cheaper approximation for float.
+
+
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T flipsign(
+ const T& should_flipsign, const T& x) {
+ typedef typename unpacket_traits<T>::type Scalar;
+ const T sign_mask = pset1<T>(Scalar(-0.0));
+ T sign_bit = pand<T>(should_flipsign, sign_mask);
+ return pxor<T>(sign_bit, x);
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double flipsign<double>(
+ const double& should_flipsign, const double& x) {
+ return should_flipsign == 0 ? x : -x;
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float flipsign<float>(
+ const float& should_flipsign, const float& x) {
+ return should_flipsign == 0 ? x : -x;
+}
+
+// We split this computation in to two so that in the scalar path
+// only one branch is evaluated (due to our template specialization of pselect
+// being an if statement.)
+
+template <typename T, typename ScalarType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_gt_exp_neg_two(const T& b) {
+ const ScalarType p0[] = {
+ ScalarType(-5.99633501014107895267e1),
+ ScalarType(9.80010754185999661536e1),
+ ScalarType(-5.66762857469070293439e1),
+ ScalarType(1.39312609387279679503e1),
+ ScalarType(-1.23916583867381258016e0)
+ };
+ const ScalarType q0[] = {
+ ScalarType(1.0),
+ ScalarType(1.95448858338141759834e0),
+ ScalarType(4.67627912898881538453e0),
+ ScalarType(8.63602421390890590575e1),
+ ScalarType(-2.25462687854119370527e2),
+ ScalarType(2.00260212380060660359e2),
+ ScalarType(-8.20372256168333339912e1),
+ ScalarType(1.59056225126211695515e1),
+ ScalarType(-1.18331621121330003142e0)
+ };
+ const T sqrt2pi = pset1<T>(ScalarType(2.50662827463100050242e0));
+ const T half = pset1<T>(ScalarType(0.5));
+ T c, c2, ndtri_gt_exp_neg_two;
+
+ c = psub(b, half);
+ c2 = pmul(c, c);
+ ndtri_gt_exp_neg_two = pmadd(c, pmul(
+ c2, pdiv(
+ internal::ppolevl<T, 4>::run(c2, p0),
+ internal::ppolevl<T, 8>::run(c2, q0))), c);
+ return pmul(ndtri_gt_exp_neg_two, sqrt2pi);
+}
+
+template <typename T, typename ScalarType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_lt_exp_neg_two(
+ const T& b, const T& should_flipsign) {
+ /* Approximation for interval z = sqrt(-2 log a ) between 2 and 8
+ * i.e., a between exp(-2) = .135 and exp(-32) = 1.27e-14.
+ */
+ const ScalarType p1[] = {
+ ScalarType(4.05544892305962419923e0),
+ ScalarType(3.15251094599893866154e1),
+ ScalarType(5.71628192246421288162e1),
+ ScalarType(4.40805073893200834700e1),
+ ScalarType(1.46849561928858024014e1),
+ ScalarType(2.18663306850790267539e0),
+ ScalarType(-1.40256079171354495875e-1),
+ ScalarType(-3.50424626827848203418e-2),
+ ScalarType(-8.57456785154685413611e-4)
+ };
+ const ScalarType q1[] = {
+ ScalarType(1.0),
+ ScalarType(1.57799883256466749731e1),
+ ScalarType(4.53907635128879210584e1),
+ ScalarType(4.13172038254672030440e1),
+ ScalarType(1.50425385692907503408e1),
+ ScalarType(2.50464946208309415979e0),
+ ScalarType(-1.42182922854787788574e-1),
+ ScalarType(-3.80806407691578277194e-2),
+ ScalarType(-9.33259480895457427372e-4)
+ };
+ /* Approximation for interval z = sqrt(-2 log a ) between 8 and 64
+ * i.e., a between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890.
+ */
+ const ScalarType p2[] = {
+ ScalarType(3.23774891776946035970e0),
+ ScalarType(6.91522889068984211695e0),
+ ScalarType(3.93881025292474443415e0),
+ ScalarType(1.33303460815807542389e0),
+ ScalarType(2.01485389549179081538e-1),
+ ScalarType(1.23716634817820021358e-2),
+ ScalarType(3.01581553508235416007e-4),
+ ScalarType(2.65806974686737550832e-6),
+ ScalarType(6.23974539184983293730e-9)
+ };
+ const ScalarType q2[] = {
+ ScalarType(1.0),
+ ScalarType(6.02427039364742014255e0),
+ ScalarType(3.67983563856160859403e0),
+ ScalarType(1.37702099489081330271e0),
+ ScalarType(2.16236993594496635890e-1),
+ ScalarType(1.34204006088543189037e-2),
+ ScalarType(3.28014464682127739104e-4),
+ ScalarType(2.89247864745380683936e-6),
+ ScalarType(6.79019408009981274425e-9)
+ };
+ const T eight = pset1<T>(ScalarType(8.0));
+ const T one = pset1<T>(ScalarType(1));
+ const T neg_two = pset1<T>(ScalarType(-2));
+ T x, x0, x1, z;
+
+ x = psqrt(pmul(neg_two, plog(b)));
+ x0 = psub(x, pdiv(plog(x), x));
+ z = pdiv(one, x);
+ x1 = pmul(
+ z, pselect(
+ pcmp_lt(x, eight),
+ pdiv(internal::ppolevl<T, 8>::run(z, p1),
+ internal::ppolevl<T, 8>::run(z, q1)),
+ pdiv(internal::ppolevl<T, 8>::run(z, p2),
+ internal::ppolevl<T, 8>::run(z, q2))));
+ return flipsign(should_flipsign, psub(x0, x1));
+}
+
+template <typename T, typename ScalarType>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T generic_ndtri(const T& a) {
+ const T maxnum = pset1<T>(NumTraits<ScalarType>::infinity());
+ const T neg_maxnum = pset1<T>(-NumTraits<ScalarType>::infinity());
+
+ const T zero = pset1<T>(ScalarType(0));
+ const T one = pset1<T>(ScalarType(1));
+ // exp(-2)
+ const T exp_neg_two = pset1<T>(ScalarType(0.13533528323661269189));
+ T b, ndtri, should_flipsign;
+
+ should_flipsign = pcmp_le(a, psub(one, exp_neg_two));
+ b = pselect(should_flipsign, a, psub(one, a));
+
+ ndtri = pselect(
+ pcmp_lt(exp_neg_two, b),
+ generic_ndtri_gt_exp_neg_two<T, ScalarType>(b),
+ generic_ndtri_lt_exp_neg_two<T, ScalarType>(b, should_flipsign));
+
+ return pselect(
+ pcmp_le(a, zero), neg_maxnum,
+ pselect(pcmp_le(one, a), maxnum, ndtri));
+}
+
+template <typename Scalar>
+struct ndtri_retval {
+ typedef Scalar type;
+};
+
+#if !EIGEN_HAS_C99_MATH
+
+template <typename Scalar>
+struct ndtri_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+# else
+
+template <typename Scalar>
+struct ndtri_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar x) {
+ return generic_ndtri<Scalar, Scalar>(x);
+ }
};
+
#endif // EIGEN_HAS_C99_MATH
+
/**************************************************************************************************************
* Implementation of igammac (complemented incomplete gamma integral), based on Cephes but requires C++11/C99 *
**************************************************************************************************************/
@@ -452,6 +726,228 @@ struct cephes_helper<double> {
}
};
+enum IgammaComputationMode { VALUE, DERIVATIVE, SAMPLE_DERIVATIVE };
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC
+static EIGEN_STRONG_INLINE Scalar main_igamma_term(Scalar a, Scalar x) {
+ /* Compute x**a * exp(-x) / gamma(a) */
+ Scalar logax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
+ if (logax < -numext::log(NumTraits<Scalar>::highest()) ||
+ // Assuming x and a aren't Nan.
+ (numext::isnan)(logax)) {
+ return Scalar(0);
+ }
+ return numext::exp(logax);
+}
+
+template <typename Scalar, IgammaComputationMode mode>
+EIGEN_DEVICE_FUNC
+int igamma_num_iterations() {
+ /* Returns the maximum number of internal iterations for igamma computation.
+ */
+ if (mode == VALUE) {
+ return 2000;
+ }
+
+ if (internal::is_same<Scalar, float>::value) {
+ return 200;
+ } else if (internal::is_same<Scalar, double>::value) {
+ return 500;
+ } else {
+ return 2000;
+ }
+}
+
+template <typename Scalar, IgammaComputationMode mode>
+struct igammac_cf_impl {
+ /* Computes igamc(a, x) or derivative (depending on the mode)
+ * using the continued fraction expansion of the complementary
+ * incomplete Gamma function.
+ *
+ * Preconditions:
+ * a > 0
+ * x >= 1
+ * x >= a
+ */
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar a, Scalar x) {
+ const Scalar zero = 0;
+ const Scalar one = 1;
+ const Scalar two = 2;
+ const Scalar machep = cephes_helper<Scalar>::machep();
+ const Scalar big = cephes_helper<Scalar>::big();
+ const Scalar biginv = cephes_helper<Scalar>::biginv();
+
+ if ((numext::isinf)(x)) {
+ return zero;
+ }
+
+ Scalar ax = main_igamma_term<Scalar>(a, x);
+ // This is independent of mode. If this value is zero,
+ // then the function value is zero. If the function value is zero,
+ // then we are in a neighborhood where the function value evalutes to zero,
+ // so the derivative is zero.
+ if (ax == zero) {
+ return zero;
+ }
+
+ // continued fraction
+ Scalar y = one - a;
+ Scalar z = x + y + one;
+ Scalar c = zero;
+ Scalar pkm2 = one;
+ Scalar qkm2 = x;
+ Scalar pkm1 = x + one;
+ Scalar qkm1 = z * x;
+ Scalar ans = pkm1 / qkm1;
+
+ Scalar dpkm2_da = zero;
+ Scalar dqkm2_da = zero;
+ Scalar dpkm1_da = zero;
+ Scalar dqkm1_da = -x;
+ Scalar dans_da = (dpkm1_da - ans * dqkm1_da) / qkm1;
+
+ for (int i = 0; i < igamma_num_iterations<Scalar, mode>(); i++) {
+ c += one;
+ y += one;
+ z += two;
+
+ Scalar yc = y * c;
+ Scalar pk = pkm1 * z - pkm2 * yc;
+ Scalar qk = qkm1 * z - qkm2 * yc;
+
+ Scalar dpk_da = dpkm1_da * z - pkm1 - dpkm2_da * yc + pkm2 * c;
+ Scalar dqk_da = dqkm1_da * z - qkm1 - dqkm2_da * yc + qkm2 * c;
+
+ if (qk != zero) {
+ Scalar ans_prev = ans;
+ ans = pk / qk;
+
+ Scalar dans_da_prev = dans_da;
+ dans_da = (dpk_da - ans * dqk_da) / qk;
+
+ if (mode == VALUE) {
+ if (numext::abs(ans_prev - ans) <= machep * numext::abs(ans)) {
+ break;
+ }
+ } else {
+ if (numext::abs(dans_da - dans_da_prev) <= machep) {
+ break;
+ }
+ }
+ }
+
+ pkm2 = pkm1;
+ pkm1 = pk;
+ qkm2 = qkm1;
+ qkm1 = qk;
+
+ dpkm2_da = dpkm1_da;
+ dpkm1_da = dpk_da;
+ dqkm2_da = dqkm1_da;
+ dqkm1_da = dqk_da;
+
+ if (numext::abs(pk) > big) {
+ pkm2 *= biginv;
+ pkm1 *= biginv;
+ qkm2 *= biginv;
+ qkm1 *= biginv;
+
+ dpkm2_da *= biginv;
+ dpkm1_da *= biginv;
+ dqkm2_da *= biginv;
+ dqkm1_da *= biginv;
+ }
+ }
+
+ /* Compute x**a * exp(-x) / gamma(a) */
+ Scalar dlogax_da = numext::log(x) - digamma_impl<Scalar>::run(a);
+ Scalar dax_da = ax * dlogax_da;
+
+ switch (mode) {
+ case VALUE:
+ return ans * ax;
+ case DERIVATIVE:
+ return ans * dax_da + dans_da * ax;
+ case SAMPLE_DERIVATIVE:
+ default: // this is needed to suppress clang warning
+ return -(dans_da + ans * dlogax_da) * x;
+ }
+ }
+};
+
+template <typename Scalar, IgammaComputationMode mode>
+struct igamma_series_impl {
+ /* Computes igam(a, x) or its derivative (depending on the mode)
+ * using the series expansion of the incomplete Gamma function.
+ *
+ * Preconditions:
+ * x > 0
+ * a > 0
+ * !(x > 1 && x > a)
+ */
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar a, Scalar x) {
+ const Scalar zero = 0;
+ const Scalar one = 1;
+ const Scalar machep = cephes_helper<Scalar>::machep();
+
+ Scalar ax = main_igamma_term<Scalar>(a, x);
+
+ // This is independent of mode. If this value is zero,
+ // then the function value is zero. If the function value is zero,
+ // then we are in a neighborhood where the function value evalutes to zero,
+ // so the derivative is zero.
+ if (ax == zero) {
+ return zero;
+ }
+
+ ax /= a;
+
+ /* power series */
+ Scalar r = a;
+ Scalar c = one;
+ Scalar ans = one;
+
+ Scalar dc_da = zero;
+ Scalar dans_da = zero;
+
+ for (int i = 0; i < igamma_num_iterations<Scalar, mode>(); i++) {
+ r += one;
+ Scalar term = x / r;
+ Scalar dterm_da = -x / (r * r);
+ dc_da = term * dc_da + dterm_da * c;
+ dans_da += dc_da;
+ c *= term;
+ ans += c;
+
+ if (mode == VALUE) {
+ if (c <= machep * ans) {
+ break;
+ }
+ } else {
+ if (numext::abs(dc_da) <= machep * numext::abs(dans_da)) {
+ break;
+ }
+ }
+ }
+
+ Scalar dlogax_da = numext::log(x) - digamma_impl<Scalar>::run(a + one);
+ Scalar dax_da = ax * dlogax_da;
+
+ switch (mode) {
+ case VALUE:
+ return ans * ax;
+ case DERIVATIVE:
+ return ans * dax_da + dans_da * ax;
+ case SAMPLE_DERIVATIVE:
+ default: // this is needed to suppress clang warning
+ return -(dans_da + ans * dlogax_da) * x / a;
+ }
+ }
+};
+
#if !EIGEN_HAS_C99_MATH
template <typename Scalar>
@@ -466,8 +962,6 @@ struct igammac_impl {
#else
-template <typename Scalar> struct igamma_impl; // predeclare igamma_impl
-
template <typename Scalar>
struct igammac_impl {
EIGEN_DEVICE_FUNC
@@ -535,93 +1029,15 @@ struct igammac_impl {
return nan;
}
- if ((x < one) || (x < a)) {
- /* The checks above ensure that we meet the preconditions for
- * igamma_impl::Impl(), so call it, rather than igamma_impl::Run().
- * Calling Run() would also work, but in that case the compiler may not be
- * able to prove that igammac_impl::Run and igamma_impl::Run are not
- * mutually recursive. This leads to worse code, particularly on
- * platforms like nvptx, where recursion is allowed only begrudgingly.
- */
- return (one - igamma_impl<Scalar>::Impl(a, x));
- }
-
- return Impl(a, x);
- }
-
- private:
- /* igamma_impl calls igammac_impl::Impl. */
- friend struct igamma_impl<Scalar>;
-
- /* Actually computes igamc(a, x).
- *
- * Preconditions:
- * a > 0
- * x >= 1
- * x >= a
- */
- EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) {
- const Scalar zero = 0;
- const Scalar one = 1;
- const Scalar two = 2;
- const Scalar machep = cephes_helper<Scalar>::machep();
- const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
- const Scalar big = cephes_helper<Scalar>::big();
- const Scalar biginv = cephes_helper<Scalar>::biginv();
- const Scalar inf = NumTraits<Scalar>::infinity();
-
- Scalar ans, ax, c, yc, r, t, y, z;
- Scalar pk, pkm1, pkm2, qk, qkm1, qkm2;
-
- if (x == inf) return zero; // std::isinf crashes on CUDA
-
- /* Compute x**a * exp(-x) / gamma(a) */
- ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
- if (ax < -maxlog) { // underflow
- return zero;
+ if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans
+ return nan;
}
- ax = numext::exp(ax);
- // continued fraction
- y = one - a;
- z = x + y + one;
- c = zero;
- pkm2 = one;
- qkm2 = x;
- pkm1 = x + one;
- qkm1 = z * x;
- ans = pkm1 / qkm1;
-
- while (true) {
- c += one;
- y += one;
- z += two;
- yc = y * c;
- pk = pkm1 * z - pkm2 * yc;
- qk = qkm1 * z - qkm2 * yc;
- if (qk != zero) {
- r = pk / qk;
- t = numext::abs((ans - r) / r);
- ans = r;
- } else {
- t = one;
- }
- pkm2 = pkm1;
- pkm1 = pk;
- qkm2 = qkm1;
- qkm1 = qk;
- if (numext::abs(pk) > big) {
- pkm2 *= biginv;
- pkm1 *= biginv;
- qkm2 *= biginv;
- qkm1 *= biginv;
- }
- if (t <= machep) {
- break;
- }
+ if ((x < one) || (x < a)) {
+ return (one - igamma_series_impl<Scalar, VALUE>::run(a, x));
}
- return (ans * ax);
+ return igammac_cf_impl<Scalar, VALUE>::run(a, x);
}
};
@@ -631,15 +1047,10 @@ struct igammac_impl {
* Implementation of igamma (incomplete gamma integral), based on Cephes but requires C++11/C99 *
************************************************************************************************/
-template <typename Scalar>
-struct igamma_retval {
- typedef Scalar type;
-};
-
#if !EIGEN_HAS_C99_MATH
-template <typename Scalar>
-struct igamma_impl {
+template <typename Scalar, IgammaComputationMode mode>
+struct igamma_generic_impl {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) {
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
@@ -650,69 +1061,17 @@ struct igamma_impl {
#else
-template <typename Scalar>
-struct igamma_impl {
+template <typename Scalar, IgammaComputationMode mode>
+struct igamma_generic_impl {
EIGEN_DEVICE_FUNC
static Scalar run(Scalar a, Scalar x) {
- /* igam()
- * Incomplete gamma integral
- *
- *
- *
- * SYNOPSIS:
- *
- * double a, x, y, igam();
- *
- * y = igam( a, x );
- *
- * DESCRIPTION:
- *
- * The function is defined by
- *
- * x
- * -
- * 1 | | -t a-1
- * igam(a,x) = ----- | e t dt.
- * - | |
- * | (a) -
- * 0
- *
- *
- * In this implementation both arguments must be positive.
- * The integral is evaluated by either a power series or
- * continued fraction expansion, depending on the relative
- * values of a and x.
- *
- * ACCURACY (double):
- *
- * Relative error:
- * arithmetic domain # trials peak rms
- * IEEE 0,30 200000 3.6e-14 2.9e-15
- * IEEE 0,100 300000 9.9e-14 1.5e-14
- *
- *
- * ACCURACY (float):
- *
- * Relative error:
- * arithmetic domain # trials peak rms
- * IEEE 0,30 20000 7.8e-6 5.9e-7
- *
- */
- /*
- Cephes Math Library Release 2.2: June, 1992
- Copyright 1985, 1987, 1992 by Stephen L. Moshier
- Direct inquiries to 30 Frost Street, Cambridge, MA 02140
- */
-
-
- /* left tail of incomplete gamma function:
- *
- * inf. k
- * a -x - x
- * x e > ----------
- * - -
- * k=0 | (a+k+1)
+ /* Depending on the mode, returns
+ * - VALUE: incomplete Gamma function igamma(a, x)
+ * - DERIVATIVE: derivative of incomplete Gamma function d/da igamma(a, x)
+ * - SAMPLE_DERIVATIVE: implicit derivative of a Gamma random variable
+ * x ~ Gamma(x | a, 1), dx/da = -1 / Gamma(x | a, 1) * d igamma(a, x) / dx
*
+ * Derivatives are implemented by forward-mode differentiation.
*/
const Scalar zero = 0;
const Scalar one = 1;
@@ -724,67 +1083,167 @@ struct igamma_impl {
return nan;
}
+ if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans
+ return nan;
+ }
+
if ((x > one) && (x > a)) {
- /* The checks above ensure that we meet the preconditions for
- * igammac_impl::Impl(), so call it, rather than igammac_impl::Run().
- * Calling Run() would also work, but in that case the compiler may not be
- * able to prove that igammac_impl::Run and igamma_impl::Run are not
- * mutually recursive. This leads to worse code, particularly on
- * platforms like nvptx, where recursion is allowed only begrudgingly.
- */
- return (one - igammac_impl<Scalar>::Impl(a, x));
+ Scalar ret = igammac_cf_impl<Scalar, mode>::run(a, x);
+ if (mode == VALUE) {
+ return one - ret;
+ } else {
+ return -ret;
+ }
}
- return Impl(a, x);
+ return igamma_series_impl<Scalar, mode>::run(a, x);
}
+};
+
+#endif // EIGEN_HAS_C99_MATH
- private:
- /* igammac_impl calls igamma_impl::Impl. */
- friend struct igammac_impl<Scalar>;
+template <typename Scalar>
+struct igamma_retval {
+ typedef Scalar type;
+};
- /* Actually computes igam(a, x).
+template <typename Scalar>
+struct igamma_impl : igamma_generic_impl<Scalar, VALUE> {
+ /* igam()
+ * Incomplete gamma integral.
+ *
+ * The CDF of Gamma(a, 1) random variable at the point x.
+ *
+ * Accuracy estimation. For each a in [10^-2, 10^-1...10^3] we sample
+ * 50 Gamma random variables x ~ Gamma(x | a, 1), a total of 300 points.
+ * The ground truth is computed by mpmath. Mean absolute error:
+ * float: 1.26713e-05
+ * double: 2.33606e-12
+ *
+ * Cephes documentation below.
+ *
+ * SYNOPSIS:
+ *
+ * double a, x, y, igam();
+ *
+ * y = igam( a, x );
+ *
+ * DESCRIPTION:
+ *
+ * The function is defined by
+ *
+ * x
+ * -
+ * 1 | | -t a-1
+ * igam(a,x) = ----- | e t dt.
+ * - | |
+ * | (a) -
+ * 0
+ *
+ *
+ * In this implementation both arguments must be positive.
+ * The integral is evaluated by either a power series or
+ * continued fraction expansion, depending on the relative
+ * values of a and x.
+ *
+ * ACCURACY (double):
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 200000 3.6e-14 2.9e-15
+ * IEEE 0,100 300000 9.9e-14 1.5e-14
+ *
+ *
+ * ACCURACY (float):
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 20000 7.8e-6 5.9e-7
*
- * Preconditions:
- * x > 0
- * a > 0
- * !(x > 1 && x > a)
*/
- EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) {
- const Scalar zero = 0;
- const Scalar one = 1;
- const Scalar machep = cephes_helper<Scalar>::machep();
- const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
+ /*
+ Cephes Math Library Release 2.2: June, 1992
+ Copyright 1985, 1987, 1992 by Stephen L. Moshier
+ Direct inquiries to 30 Frost Street, Cambridge, MA 02140
+ */
- Scalar ans, ax, c, r;
+ /* left tail of incomplete gamma function:
+ *
+ * inf. k
+ * a -x - x
+ * x e > ----------
+ * - -
+ * k=0 | (a+k+1)
+ *
+ */
+};
- /* Compute x**a * exp(-x) / gamma(a) */
- ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
- if (ax < -maxlog) {
- // underflow
- return zero;
- }
- ax = numext::exp(ax);
+template <typename Scalar>
+struct igamma_der_a_retval : igamma_retval<Scalar> {};
- /* power series */
- r = a;
- c = one;
- ans = one;
+template <typename Scalar>
+struct igamma_der_a_impl : igamma_generic_impl<Scalar, DERIVATIVE> {
+ /* Derivative of the incomplete Gamma function with respect to a.
+ *
+ * Computes d/da igamma(a, x) by forward differentiation of the igamma code.
+ *
+ * Accuracy estimation. For each a in [10^-2, 10^-1...10^3] we sample
+ * 50 Gamma random variables x ~ Gamma(x | a, 1), a total of 300 points.
+ * The ground truth is computed by mpmath. Mean absolute error:
+ * float: 6.17992e-07
+ * double: 4.60453e-12
+ *
+ * Reference:
+ * R. Moore. "Algorithm AS 187: Derivatives of the incomplete gamma
+ * integral". Journal of the Royal Statistical Society. 1982
+ */
+};
- while (true) {
- r += one;
- c *= x/r;
- ans += c;
- if (c/ans <= machep) {
- break;
- }
- }
+template <typename Scalar>
+struct gamma_sample_der_alpha_retval : igamma_retval<Scalar> {};
- return (ans * ax / a);
- }
+template <typename Scalar>
+struct gamma_sample_der_alpha_impl
+ : igamma_generic_impl<Scalar, SAMPLE_DERIVATIVE> {
+ /* Derivative of a Gamma random variable sample with respect to alpha.
+ *
+ * Consider a sample of a Gamma random variable with the concentration
+ * parameter alpha: sample ~ Gamma(alpha, 1). The reparameterization
+ * derivative that we want to compute is dsample / dalpha =
+ * d igammainv(alpha, u) / dalpha, where u = igamma(alpha, sample).
+ * However, this formula is numerically unstable and expensive, so instead
+ * we use implicit differentiation:
+ *
+ * igamma(alpha, sample) = u, where u ~ Uniform(0, 1).
+ * Apply d / dalpha to both sides:
+ * d igamma(alpha, sample) / dalpha
+ * + d igamma(alpha, sample) / dsample * dsample/dalpha = 0
+ * d igamma(alpha, sample) / dalpha
+ * + Gamma(sample | alpha, 1) dsample / dalpha = 0
+ * dsample/dalpha = - (d igamma(alpha, sample) / dalpha)
+ * / Gamma(sample | alpha, 1)
+ *
+ * Here Gamma(sample | alpha, 1) is the PDF of the Gamma distribution
+ * (note that the derivative of the CDF w.r.t. sample is the PDF).
+ * See the reference below for more details.
+ *
+ * The derivative of igamma(alpha, sample) is computed by forward
+ * differentiation of the igamma code. Division by the Gamma PDF is performed
+ * in the same code, increasing the accuracy and speed due to cancellation
+ * of some terms.
+ *
+ * Accuracy estimation. For each alpha in [10^-2, 10^-1...10^3] we sample
+ * 50 Gamma random variables sample ~ Gamma(sample | alpha, 1), a total of 300
+ * points. The ground truth is computed by mpmath. Mean absolute error:
+ * float: 2.1686e-06
+ * double: 1.4774e-12
+ *
+ * Reference:
+ * M. Figurnov, S. Mohamed, A. Mnih "Implicit Reparameterization Gradients".
+ * 2018
+ */
};
-#endif // EIGEN_HAS_C99_MATH
-
/*****************************************************************************
* Implementation of Riemann zeta function of two arguments, based on Cephes *
*****************************************************************************/
@@ -944,7 +1403,12 @@ struct zeta_impl {
{
if(q == numext::floor(q))
{
- return maxnum;
+ if (x == numext::floor(x) && long(x) % 2 == 0) {
+ return maxnum;
+ }
+ else {
+ return nan;
+ }
}
p = x;
r = numext::floor(p);
@@ -1020,11 +1484,11 @@ struct polygamma_impl {
Scalar nplus = n + one;
const Scalar nan = NumTraits<Scalar>::quiet_NaN();
- // Check that n is an integer
- if (numext::floor(n) != n) {
+ // Check that n is a non-negative integer
+ if (numext::floor(n) != n || n < zero) {
return nan;
}
- // Just return the digamma function for n = 1
+ // Just return the digamma function for n = 0
else if (n == zero) {
return digamma_impl<Scalar>::run(x);
}
@@ -1392,7 +1856,7 @@ struct betainc_helper<double> {
if ((a + b) < maxgam && numext::abs(u) < maxlog) {
t = gamma(a + b) / (gamma(a) * gamma(b));
s = s * t * pow(x, a);
- } else {
+ }
*/
t = lgamma_impl<double>::run(a + b) - lgamma_impl<double>::run(a) -
lgamma_impl<double>::run(b) + u + numext::log(s);
@@ -1540,12 +2004,30 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar)
}
template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(ndtri, Scalar)
+ ndtri(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(ndtri, Scalar)::run(x);
+}
+
+template <typename Scalar>
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar)
igamma(const Scalar& a, const Scalar& x) {
return EIGEN_MATHFUNC_IMPL(igamma, Scalar)::run(a, x);
}
template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma_der_a, Scalar)
+ igamma_der_a(const Scalar& a, const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(igamma_der_a, Scalar)::run(a, x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(gamma_sample_der_alpha, Scalar)
+ gamma_sample_der_alpha(const Scalar& a, const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(gamma_sample_der_alpha, Scalar)::run(a, x);
+}
+
+template <typename Scalar>
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igammac, Scalar)
igammac(const Scalar& a, const Scalar& x) {
return EIGEN_MATHFUNC_IMPL(igammac, Scalar)::run(a, x);
@@ -1558,8 +2040,6 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(betainc, Scalar)
}
} // end namespace numext
-
-
} // end namespace Eigen
#endif // EIGEN_SPECIAL_FUNCTIONS_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
index 46d60d323..2bb017921 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
@@ -38,10 +38,32 @@ Packet perf(const Packet& a) { using numext::erf; return erf(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
+/** \internal \returns the ndtri(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pndtri(const Packet& a) {
+ typedef typename unpacket_traits<Packet>::type ScalarType;
+ using internal::generic_ndtri; return generic_ndtri<Packet, ScalarType>(a);
+}
+
/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }
+/** \internal \returns the derivative of the incomplete gamma function
+ * igamma_der_a(\a a, \a x) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigamma_der_a(const Packet& a, const Packet& x) {
+ using numext::igamma_der_a; return igamma_der_a(a, x);
+}
+
+/** \internal \returns compute the derivative of the sample
+ * of Gamma(alpha, 1) random variable with respect to the parameter a
+ * gamma_sample_der_alpha(\a alpha, \a sample) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pgamma_sample_der_alpha(const Packet& alpha, const Packet& sample) {
+ using numext::gamma_sample_der_alpha; return gamma_sample_der_alpha(alpha, sample);
+}
+
/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); }
@@ -55,4 +77,3 @@ Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext
} // end namespace Eigen
#endif // EIGEN_SPECIALFUNCTIONS_PACKETMATH_H
-
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h
new file mode 100644
index 000000000..2d7669209
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/AVX/BesselFunctions.h
@@ -0,0 +1,46 @@
+#ifndef EIGEN_AVX_BESSELFUNCTIONS_H
+#define EIGEN_AVX_BESSELFUNCTIONS_H
+
+namespace Eigen {
+namespace internal {
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_i0)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_i0)
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_i0e)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_i0e)
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_i1)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_i1)
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_i1e)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_i1e)
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_j0)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_j0)
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_j1)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_j1)
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_k0)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_k0)
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_k0e)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_k0e)
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_k1)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_k1)
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_k1e)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_k1e)
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_y0)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_y0)
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pbessel_y1)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pbessel_y1)
+
+} // namespace internal
+} // namespace Eigen
+
+#endif // EIGEN_AVX_BESSELFUNCTIONS_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h
new file mode 100644
index 000000000..35e62a8ac
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/AVX/SpecialFunctions.h
@@ -0,0 +1,16 @@
+#ifndef EIGEN_AVX_SPECIALFUNCTIONS_H
+#define EIGEN_AVX_SPECIALFUNCTIONS_H
+
+namespace Eigen {
+namespace internal {
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, perf)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, perf)
+
+F16_PACKET_FUNCTION(Packet8f, Packet8h, pndtri)
+BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pndtri)
+
+} // namespace internal
+} // namespace Eigen
+
+#endif // EIGEN_AVX_SPECIAL_FUNCTIONS_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h
new file mode 100644
index 000000000..7dd3c3e5b
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h
@@ -0,0 +1,46 @@
+#ifndef EIGEN_AVX512_BESSELFUNCTIONS_H
+#define EIGEN_AVX512_BESSELFUNCTIONS_H
+
+namespace Eigen {
+namespace internal {
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_i0)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_i0)
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_i0e)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_i0e)
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_i1)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_i1)
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_i1e)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_i1e)
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_j0)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_j0)
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_j1)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_j1)
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_k0)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_k0)
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_k0e)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_k0e)
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_k1)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_k1)
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_k1e)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_k1e)
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_y0)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_y0)
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_y1)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_y1)
+
+} // namespace internal
+} // namespace Eigen
+
+#endif // EIGEN_AVX512_BESSELFUNCTIONS_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h
new file mode 100644
index 000000000..79878f2b6
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/SpecialFunctions.h
@@ -0,0 +1,16 @@
+#ifndef EIGEN_AVX512_SPECIALFUNCTIONS_H
+#define EIGEN_AVX512_SPECIALFUNCTIONS_H
+
+namespace Eigen {
+namespace internal {
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, perf)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, perf)
+
+F16_PACKET_FUNCTION(Packet16f, Packet16h, pndtri)
+BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pndtri)
+
+} // namespace internal
+} // namespace Eigen
+
+#endif // EIGEN_AVX512_SPECIAL_FUNCTIONS_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
deleted file mode 100644
index ec4fa8448..000000000
--- a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
+++ /dev/null
@@ -1,165 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CUDA_SPECIALFUNCTIONS_H
-#define EIGEN_CUDA_SPECIALFUNCTIONS_H
-
-namespace Eigen {
-
-namespace internal {
-
-// Make sure this is only available when targeting a GPU: we don't want to
-// introduce conflicts between these packet_traits definitions and the ones
-// we'll use on the host side (SSE, AVX, ...)
-#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 plgamma<float4>(const float4& a)
-{
- return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 plgamma<double2>(const double2& a)
-{
- using numext::lgamma;
- return make_double2(lgamma(a.x), lgamma(a.y));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 pdigamma<float4>(const float4& a)
-{
- using numext::digamma;
- return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 pdigamma<double2>(const double2& a)
-{
- using numext::digamma;
- return make_double2(digamma(a.x), digamma(a.y));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 pzeta<float4>(const float4& x, const float4& q)
-{
- using numext::zeta;
- return make_float4(zeta(x.x, q.x), zeta(x.y, q.y), zeta(x.z, q.z), zeta(x.w, q.w));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 pzeta<double2>(const double2& x, const double2& q)
-{
- using numext::zeta;
- return make_double2(zeta(x.x, q.x), zeta(x.y, q.y));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 ppolygamma<float4>(const float4& n, const float4& x)
-{
- using numext::polygamma;
- return make_float4(polygamma(n.x, x.x), polygamma(n.y, x.y), polygamma(n.z, x.z), polygamma(n.w, x.w));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 ppolygamma<double2>(const double2& n, const double2& x)
-{
- using numext::polygamma;
- return make_double2(polygamma(n.x, x.x), polygamma(n.y, x.y));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 perf<float4>(const float4& a)
-{
- return make_float4(erff(a.x), erff(a.y), erff(a.z), erff(a.w));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 perf<double2>(const double2& a)
-{
- using numext::erf;
- return make_double2(erf(a.x), erf(a.y));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 perfc<float4>(const float4& a)
-{
- using numext::erfc;
- return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 perfc<double2>(const double2& a)
-{
- using numext::erfc;
- return make_double2(erfc(a.x), erfc(a.y));
-}
-
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 pigamma<float4>(const float4& a, const float4& x)
-{
- using numext::igamma;
- return make_float4(
- igamma(a.x, x.x),
- igamma(a.y, x.y),
- igamma(a.z, x.z),
- igamma(a.w, x.w));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 pigamma<double2>(const double2& a, const double2& x)
-{
- using numext::igamma;
- return make_double2(igamma(a.x, x.x), igamma(a.y, x.y));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 pigammac<float4>(const float4& a, const float4& x)
-{
- using numext::igammac;
- return make_float4(
- igammac(a.x, x.x),
- igammac(a.y, x.y),
- igammac(a.z, x.z),
- igammac(a.w, x.w));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 pigammac<double2>(const double2& a, const double2& x)
-{
- using numext::igammac;
- return make_double2(igammac(a.x, x.x), igammac(a.y, x.y));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 pbetainc<float4>(const float4& a, const float4& b, const float4& x)
-{
- using numext::betainc;
- return make_float4(
- betainc(a.x, b.x, x.x),
- betainc(a.y, b.y, x.y),
- betainc(a.z, b.z, x.z),
- betainc(a.w, b.w, x.w));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 pbetainc<double2>(const double2& a, const double2& b, const double2& x)
-{
- using numext::betainc;
- return make_double2(betainc(a.x, b.x, x.x), betainc(a.y, b.y, x.y));
-}
-
-#endif
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_CUDA_SPECIALFUNCTIONS_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h
new file mode 100644
index 000000000..dd3bf4dd1
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/GPU/SpecialFunctions.h
@@ -0,0 +1,369 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_GPU_SPECIALFUNCTIONS_H
+#define EIGEN_GPU_SPECIALFUNCTIONS_H
+
+namespace Eigen {
+
+namespace internal {
+
+// Make sure this is only available when targeting a GPU: we don't want to
+// introduce conflicts between these packet_traits definitions and the ones
+// we'll use on the host side (SSE, AVX, ...)
+#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 plgamma<float4>(const float4& a)
+{
+ return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 plgamma<double2>(const double2& a)
+{
+ using numext::lgamma;
+ return make_double2(lgamma(a.x), lgamma(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pdigamma<float4>(const float4& a)
+{
+ using numext::digamma;
+ return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pdigamma<double2>(const double2& a)
+{
+ using numext::digamma;
+ return make_double2(digamma(a.x), digamma(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pzeta<float4>(const float4& x, const float4& q)
+{
+ using numext::zeta;
+ return make_float4(zeta(x.x, q.x), zeta(x.y, q.y), zeta(x.z, q.z), zeta(x.w, q.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pzeta<double2>(const double2& x, const double2& q)
+{
+ using numext::zeta;
+ return make_double2(zeta(x.x, q.x), zeta(x.y, q.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 ppolygamma<float4>(const float4& n, const float4& x)
+{
+ using numext::polygamma;
+ return make_float4(polygamma(n.x, x.x), polygamma(n.y, x.y), polygamma(n.z, x.z), polygamma(n.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 ppolygamma<double2>(const double2& n, const double2& x)
+{
+ using numext::polygamma;
+ return make_double2(polygamma(n.x, x.x), polygamma(n.y, x.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 perf<float4>(const float4& a)
+{
+ return make_float4(erff(a.x), erff(a.y), erff(a.z), erff(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 perf<double2>(const double2& a)
+{
+ using numext::erf;
+ return make_double2(erf(a.x), erf(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 perfc<float4>(const float4& a)
+{
+ using numext::erfc;
+ return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 perfc<double2>(const double2& a)
+{
+ using numext::erfc;
+ return make_double2(erfc(a.x), erfc(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pndtri<float4>(const float4& a)
+{
+ using numext::ndtri;
+ return make_float4(ndtri(a.x), ndtri(a.y), ndtri(a.z), ndtri(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pndtri<double2>(const double2& a)
+{
+ using numext::ndtri;
+ return make_double2(ndtri(a.x), ndtri(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pigamma<float4>(const float4& a, const float4& x)
+{
+ using numext::igamma;
+ return make_float4(
+ igamma(a.x, x.x),
+ igamma(a.y, x.y),
+ igamma(a.z, x.z),
+ igamma(a.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pigamma<double2>(const double2& a, const double2& x)
+{
+ using numext::igamma;
+ return make_double2(igamma(a.x, x.x), igamma(a.y, x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pigamma_der_a<float4>(
+ const float4& a, const float4& x) {
+ using numext::igamma_der_a;
+ return make_float4(igamma_der_a(a.x, x.x), igamma_der_a(a.y, x.y),
+ igamma_der_a(a.z, x.z), igamma_der_a(a.w, x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pigamma_der_a<double2>(const double2& a, const double2& x) {
+ using numext::igamma_der_a;
+ return make_double2(igamma_der_a(a.x, x.x), igamma_der_a(a.y, x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pgamma_sample_der_alpha<float4>(
+ const float4& alpha, const float4& sample) {
+ using numext::gamma_sample_der_alpha;
+ return make_float4(
+ gamma_sample_der_alpha(alpha.x, sample.x),
+ gamma_sample_der_alpha(alpha.y, sample.y),
+ gamma_sample_der_alpha(alpha.z, sample.z),
+ gamma_sample_der_alpha(alpha.w, sample.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pgamma_sample_der_alpha<double2>(const double2& alpha, const double2& sample) {
+ using numext::gamma_sample_der_alpha;
+ return make_double2(
+ gamma_sample_der_alpha(alpha.x, sample.x),
+ gamma_sample_der_alpha(alpha.y, sample.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pigammac<float4>(const float4& a, const float4& x)
+{
+ using numext::igammac;
+ return make_float4(
+ igammac(a.x, x.x),
+ igammac(a.y, x.y),
+ igammac(a.z, x.z),
+ igammac(a.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pigammac<double2>(const double2& a, const double2& x)
+{
+ using numext::igammac;
+ return make_double2(igammac(a.x, x.x), igammac(a.y, x.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pbetainc<float4>(const float4& a, const float4& b, const float4& x)
+{
+ using numext::betainc;
+ return make_float4(
+ betainc(a.x, b.x, x.x),
+ betainc(a.y, b.y, x.y),
+ betainc(a.z, b.z, x.z),
+ betainc(a.w, b.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pbetainc<double2>(const double2& a, const double2& b, const double2& x)
+{
+ using numext::betainc;
+ return make_double2(betainc(a.x, b.x, x.x), betainc(a.y, b.y, x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_i0e<float4>(const float4& x) {
+ using numext::bessel_i0e;
+ return make_float4(bessel_i0e(x.x), bessel_i0e(x.y), bessel_i0e(x.z), bessel_i0e(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pbessel_i0e<double2>(const double2& x) {
+ using numext::bessel_i0e;
+ return make_double2(bessel_i0e(x.x), bessel_i0e(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_i0<float4>(const float4& x) {
+ using numext::bessel_i0;
+ return make_float4(bessel_i0(x.x), bessel_i0(x.y), bessel_i0(x.z), bessel_i0(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pbessel_i0<double2>(const double2& x) {
+ using numext::bessel_i0;
+ return make_double2(bessel_i0(x.x), bessel_i0(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_i1e<float4>(const float4& x) {
+ using numext::bessel_i1e;
+ return make_float4(bessel_i1e(x.x), bessel_i1e(x.y), bessel_i1e(x.z), bessel_i1e(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pbessel_i1e<double2>(const double2& x) {
+ using numext::bessel_i1e;
+ return make_double2(bessel_i1e(x.x), bessel_i1e(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_i1<float4>(const float4& x) {
+ using numext::bessel_i1;
+ return make_float4(bessel_i1(x.x), bessel_i1(x.y), bessel_i1(x.z), bessel_i1(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pbessel_i1<double2>(const double2& x) {
+ using numext::bessel_i1;
+ return make_double2(bessel_i1(x.x), bessel_i1(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_k0e<float4>(const float4& x) {
+ using numext::bessel_k0e;
+ return make_float4(bessel_k0e(x.x), bessel_k0e(x.y), bessel_k0e(x.z), bessel_k0e(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pbessel_k0e<double2>(const double2& x) {
+ using numext::bessel_k0e;
+ return make_double2(bessel_k0e(x.x), bessel_k0e(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_k0<float4>(const float4& x) {
+ using numext::bessel_k0;
+ return make_float4(bessel_k0(x.x), bessel_k0(x.y), bessel_k0(x.z), bessel_k0(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pbessel_k0<double2>(const double2& x) {
+ using numext::bessel_k0;
+ return make_double2(bessel_k0(x.x), bessel_k0(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_k1e<float4>(const float4& x) {
+ using numext::bessel_k1e;
+ return make_float4(bessel_k1e(x.x), bessel_k1e(x.y), bessel_k1e(x.z), bessel_k1e(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pbessel_k1e<double2>(const double2& x) {
+ using numext::bessel_k1e;
+ return make_double2(bessel_k1e(x.x), bessel_k1e(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_k1<float4>(const float4& x) {
+ using numext::bessel_k1;
+ return make_float4(bessel_k1(x.x), bessel_k1(x.y), bessel_k1(x.z), bessel_k1(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pbessel_k1<double2>(const double2& x) {
+ using numext::bessel_k1;
+ return make_double2(bessel_k1(x.x), bessel_k1(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_j0<float4>(const float4& x) {
+ using numext::bessel_j0;
+ return make_float4(bessel_j0(x.x), bessel_j0(x.y), bessel_j0(x.z), bessel_j0(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pbessel_j0<double2>(const double2& x) {
+ using numext::bessel_j0;
+ return make_double2(bessel_j0(x.x), bessel_j0(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_j1<float4>(const float4& x) {
+ using numext::bessel_j1;
+ return make_float4(bessel_j1(x.x), bessel_j1(x.y), bessel_j1(x.z), bessel_j1(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pbessel_j1<double2>(const double2& x) {
+ using numext::bessel_j1;
+ return make_double2(bessel_j1(x.x), bessel_j1(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_y0<float4>(const float4& x) {
+ using numext::bessel_y0;
+ return make_float4(bessel_y0(x.x), bessel_y0(x.y), bessel_y0(x.z), bessel_y0(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pbessel_y0<double2>(const double2& x) {
+ using numext::bessel_y0;
+ return make_double2(bessel_y0(x.x), bessel_y0(x.y));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pbessel_y1<float4>(const float4& x) {
+ using numext::bessel_y1;
+ return make_float4(bessel_y1(x.x), bessel_y1(x.y), bessel_y1(x.z), bessel_y1(x.w));
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
+pbessel_y1<double2>(const double2& x) {
+ using numext::bessel_y1;
+ return make_double2(bessel_y1(x.x), bessel_y1(x.y));
+}
+
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_GPU_SPECIALFUNCTIONS_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h
new file mode 100644
index 000000000..67433b057
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/NEON/BesselFunctions.h
@@ -0,0 +1,54 @@
+#ifndef EIGEN_NEON_BESSELFUNCTIONS_H
+#define EIGEN_NEON_BESSELFUNCTIONS_H
+
+namespace Eigen {
+namespace internal {
+
+#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+
+#define NEON_HALF_TO_FLOAT_FUNCTIONS(METHOD) \
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+Packet8hf METHOD<Packet8hf>(const Packet8hf& x) { \
+ const Packet4f lo = METHOD<Packet4f>(vcvt_f32_f16(vget_low_f16(x))); \
+ const Packet4f hi = METHOD<Packet4f>(vcvt_f32_f16(vget_high_f16(x))); \
+ return vcombine_f16(vcvt_f16_f32(lo), vcvt_f16_f32(hi)); \
+} \
+ \
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+Packet4hf METHOD<Packet4hf>(const Packet4hf& x) { \
+ return vcvt_f16_f32(METHOD<Packet4f>(vcvt_f32_f16(x))); \
+}
+
+NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_i0)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_i0e)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_i1)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_i1e)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_j0)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_j1)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_k0)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_k0e)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_k1)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_k1e)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_y0)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pbessel_y1)
+
+#undef NEON_HALF_TO_FLOAT_FUNCTIONS
+#endif
+
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_i0)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_i0e)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_i1)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_i1e)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_j0)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_j1)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_k0)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_k0e)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_k1)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_k1e)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_y0)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pbessel_y1)
+
+} // namespace internal
+} // namespace Eigen
+
+#endif // EIGEN_NEON_BESSELFUNCTIONS_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h
new file mode 100644
index 000000000..ec9295197
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/NEON/SpecialFunctions.h
@@ -0,0 +1,34 @@
+#ifndef EIGEN_NEON_SPECIALFUNCTIONS_H
+#define EIGEN_NEON_SPECIALFUNCTIONS_H
+
+namespace Eigen {
+namespace internal {
+
+#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
+
+#define NEON_HALF_TO_FLOAT_FUNCTIONS(METHOD) \
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+Packet8hf METHOD<Packet8hf>(const Packet8hf& x) { \
+ const Packet4f lo = METHOD<Packet4f>(vcvt_f32_f16(vget_low_f16(x))); \
+ const Packet4f hi = METHOD<Packet4f>(vcvt_f32_f16(vget_high_f16(x))); \
+ return vcombine_f16(vcvt_f16_f32(lo), vcvt_f16_f32(hi)); \
+} \
+ \
+template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+Packet4hf METHOD<Packet4hf>(const Packet4hf& x) { \
+ return vcvt_f16_f32(METHOD<Packet4f>(vcvt_f32_f16(x))); \
+}
+
+NEON_HALF_TO_FLOAT_FUNCTIONS(perf)
+NEON_HALF_TO_FLOAT_FUNCTIONS(pndtri)
+
+#undef NEON_HALF_TO_FLOAT_FUNCTIONS
+#endif
+
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, perf)
+BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pndtri)
+
+} // namespace internal
+} // namespace Eigen
+
+#endif // EIGEN_NEON_SPECIALFUNCTIONS_H
diff --git a/unsupported/Eigen/src/Splines/Spline.h b/unsupported/Eigen/src/Splines/Spline.h
index 627f6e482..79edd52ce 100644
--- a/unsupported/Eigen/src/Splines/Spline.h
+++ b/unsupported/Eigen/src/Splines/Spline.h
@@ -191,7 +191,7 @@ namespace Eigen
DenseIndex span(Scalar u) const;
/**
- * \brief Computes the spang within the provided knot vector in which u is falling.
+ * \brief Computes the span within the provided knot vector in which u is falling.
**/
static DenseIndex Span(typename SplineTraits<Spline>::Scalar u, DenseIndex degree, const typename SplineTraits<Spline>::KnotVectorType& knots);
@@ -249,15 +249,13 @@ namespace Eigen
DenseIndex degree,
const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& knots)
{
- typedef typename Spline<_Scalar, _Dim, _Degree>::BasisVectorType BasisVectorType;
-
const DenseIndex p = degree;
const DenseIndex i = Spline::Span(u, degree, knots);
const KnotVectorType& U = knots;
BasisVectorType left(p+1); left(0) = Scalar(0);
- BasisVectorType right(p+1); right(0) = Scalar(0);
+ BasisVectorType right(p+1); right(0) = Scalar(0);
VectorBlock<BasisVectorType,Degree>(left,1,p) = u - VectorBlock<const KnotVectorType,Degree>(U,i+1-p,p).reverse();
VectorBlock<BasisVectorType,Degree>(right,1,p) = VectorBlock<const KnotVectorType,Degree>(U,i+1,p) - u;
@@ -380,9 +378,6 @@ namespace Eigen
typedef Spline<_Scalar, _Dim, _Degree> SplineType;
enum { Order = SplineTraits<SplineType>::OrderAtCompileTime };
- typedef typename SplineTraits<SplineType>::Scalar Scalar;
- typedef typename SplineTraits<SplineType>::BasisVectorType BasisVectorType;
-
const DenseIndex span = SplineType::Span(u, p, U);
const DenseIndex n = (std::min)(p, order);
diff --git a/unsupported/Eigen/src/Splines/SplineFitting.h b/unsupported/Eigen/src/Splines/SplineFitting.h
index c761a9b3d..9f6e8afa0 100644
--- a/unsupported/Eigen/src/Splines/SplineFitting.h
+++ b/unsupported/Eigen/src/Splines/SplineFitting.h
@@ -17,8 +17,8 @@
#include "SplineFwd.h"
-#include <Eigen/LU>
-#include <Eigen/QR>
+#include "../../../../Eigen/LU"
+#include "../../../../Eigen/QR"
namespace Eigen
{
@@ -181,7 +181,7 @@ namespace Eigen
* \ingroup Splines_Module
*
* \param[in] pts The data points to which a spline should be fit.
- * \param[out] chord_lengths The resulting chord lenggth vector.
+ * \param[out] chord_lengths The resulting chord length vector.
*
* \sa Les Piegl and Wayne Tiller, The NURBS book (2nd ed.), 1997, 9.2.1 Global Curve Interpolation to Point Data
**/
@@ -385,7 +385,7 @@ namespace Eigen
{
const DenseIndex span = SplineType::Span(parameters[i], degree, knots);
- if (derivativeIndices[derivativeIndex] == i)
+ if (derivativeIndex < derivativeIndices.size() && derivativeIndices[derivativeIndex] == i)
{
A.block(row, span - degree, 2, degree + 1)
= SplineType::BasisFunctionDerivatives(parameters[i], 1, degree, knots);
@@ -395,8 +395,9 @@ namespace Eigen
}
else
{
- A.row(row++).segment(span - degree, degree + 1)
+ A.row(row).segment(span - degree, degree + 1)
= SplineType::BasisFunctions(parameters[i], degree, knots);
+ b.col(row++) = points.col(i);
}
}
b.col(0) = points.col(0);
diff --git a/unsupported/Eigen/src/Splines/SplineFwd.h b/unsupported/Eigen/src/Splines/SplineFwd.h
index 0a95fbf3e..00d6b4921 100644
--- a/unsupported/Eigen/src/Splines/SplineFwd.h
+++ b/unsupported/Eigen/src/Splines/SplineFwd.h
@@ -10,7 +10,7 @@
#ifndef EIGEN_SPLINES_FWD_H
#define EIGEN_SPLINES_FWD_H
-#include <Eigen/Core>
+#include "../../../../Eigen/Core"
namespace Eigen
{