aboutsummaryrefslogtreecommitdiff
path: root/Eigen/src/Core/MathFunctions.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/MathFunctions.h')
-rw-r--r--Eigen/src/Core/MathFunctions.h954
1 files changed, 790 insertions, 164 deletions
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index a648aa0fa..61b78f4f2 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -2,6 +2,7 @@
// for linear algebra.
//
// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@@ -10,10 +11,11 @@
#ifndef EIGEN_MATHFUNCTIONS_H
#define EIGEN_MATHFUNCTIONS_H
-// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html
// TODO this should better be moved to NumTraits
-#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
-
+// Source: WolframAlpha
+#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
+#define EIGEN_LOG2E 1.442695040888963407359924681001892137426645954152985934135449406931109219L
+#define EIGEN_LN2 0.693147180559945309417232121458176568075500134360255254120680009493393621L
namespace Eigen {
@@ -97,7 +99,7 @@ struct real_default_impl<Scalar,true>
template<typename Scalar> struct real_impl : real_default_impl<Scalar> {};
-#ifdef __CUDA_ARCH__
+#if defined(EIGEN_GPU_COMPILE_PHASE)
template<typename T>
struct real_impl<std::complex<T> >
{
@@ -145,7 +147,7 @@ struct imag_default_impl<Scalar,true>
template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {};
-#ifdef __CUDA_ARCH__
+#if defined(EIGEN_GPU_COMPILE_PHASE)
template<typename T>
struct imag_impl<std::complex<T> >
{
@@ -213,12 +215,12 @@ struct imag_ref_default_impl
template<typename Scalar>
struct imag_ref_default_impl<Scalar, false>
{
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static inline Scalar run(Scalar&)
{
return Scalar(0);
}
- EIGEN_DEVICE_FUNC
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR
static inline const Scalar run(const Scalar&)
{
return Scalar(0);
@@ -239,7 +241,7 @@ struct imag_ref_retval
****************************************************************************/
template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
-struct conj_impl
+struct conj_default_impl
{
EIGEN_DEVICE_FUNC
static inline Scalar run(const Scalar& x)
@@ -249,7 +251,7 @@ struct conj_impl
};
template<typename Scalar>
-struct conj_impl<Scalar,true>
+struct conj_default_impl<Scalar,true>
{
EIGEN_DEVICE_FUNC
static inline Scalar run(const Scalar& x)
@@ -259,6 +261,9 @@ struct conj_impl<Scalar,true>
}
};
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct conj_impl : conj_default_impl<Scalar, IsComplex> {};
+
template<typename Scalar>
struct conj_retval
{
@@ -287,7 +292,7 @@ struct abs2_impl_default<Scalar, true> // IsComplex
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
- return real(x)*real(x) + imag(x)*imag(x);
+ return x.real()*x.real() + x.imag()*x.imag();
}
};
@@ -309,18 +314,80 @@ struct abs2_retval
};
/****************************************************************************
+* Implementation of sqrt/rsqrt *
+****************************************************************************/
+
+template<typename Scalar>
+struct sqrt_impl
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_ALWAYS_INLINE Scalar run(const Scalar& x)
+ {
+ EIGEN_USING_STD(sqrt);
+ return sqrt(x);
+ }
+};
+
+// Complex sqrt defined in MathFunctionsImpl.h.
+template<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& a_x);
+
+// Custom implementation is faster than `std::sqrt`, works on
+// GPU, and correctly handles special cases (unlike MSVC).
+template<typename T>
+struct sqrt_impl<std::complex<T> >
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x)
+ {
+ return complex_sqrt<T>(x);
+ }
+};
+
+template<typename Scalar>
+struct sqrt_retval
+{
+ typedef Scalar type;
+};
+
+// Default implementation relies on numext::sqrt, at bottom of file.
+template<typename T>
+struct rsqrt_impl;
+
+// Complex rsqrt defined in MathFunctionsImpl.h.
+template<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& a_x);
+
+template<typename T>
+struct rsqrt_impl<std::complex<T> >
+{
+ EIGEN_DEVICE_FUNC
+ static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x)
+ {
+ return complex_rsqrt<T>(x);
+ }
+};
+
+template<typename Scalar>
+struct rsqrt_retval
+{
+ typedef Scalar type;
+};
+
+/****************************************************************************
* Implementation of norm1 *
****************************************************************************/
template<typename Scalar, bool IsComplex>
-struct norm1_default_impl
+struct norm1_default_impl;
+
+template<typename Scalar>
+struct norm1_default_impl<Scalar,true>
{
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_DEVICE_FUNC
static inline RealScalar run(const Scalar& x)
{
- EIGEN_USING_STD_MATH(abs);
- return abs(real(x)) + abs(imag(x));
+ EIGEN_USING_STD(abs);
+ return abs(x.real()) + abs(x.imag());
}
};
@@ -330,7 +397,7 @@ struct norm1_default_impl<Scalar, false>
EIGEN_DEVICE_FUNC
static inline Scalar run(const Scalar& x)
{
- EIGEN_USING_STD_MATH(abs);
+ EIGEN_USING_STD(abs);
return abs(x);
}
};
@@ -348,31 +415,7 @@ struct norm1_retval
* Implementation of hypot *
****************************************************************************/
-template<typename Scalar>
-struct hypot_impl
-{
- typedef typename NumTraits<Scalar>::Real RealScalar;
- static inline RealScalar run(const Scalar& x, const Scalar& y)
- {
- EIGEN_USING_STD_MATH(abs);
- EIGEN_USING_STD_MATH(sqrt);
- RealScalar _x = abs(x);
- RealScalar _y = abs(y);
- Scalar p, qp;
- if(_x>_y)
- {
- p = _x;
- qp = _y / p;
- }
- else
- {
- p = _y;
- qp = _x / p;
- }
- if(p==RealScalar(0)) return RealScalar(0);
- return p * sqrt(RealScalar(1) + qp*qp);
- }
-};
+template<typename Scalar> struct hypot_impl;
template<typename Scalar>
struct hypot_retval
@@ -384,7 +427,7 @@ struct hypot_retval
* Implementation of cast *
****************************************************************************/
-template<typename OldType, typename NewType>
+template<typename OldType, typename NewType, typename EnableIf = void>
struct cast_impl
{
EIGEN_DEVICE_FUNC
@@ -394,6 +437,22 @@ struct cast_impl
}
};
+// Casting from S -> Complex<T> leads to an implicit conversion from S to T,
+// generating warnings on clang. Here we explicitly cast the real component.
+template<typename OldType, typename NewType>
+struct cast_impl<OldType, NewType,
+ typename internal::enable_if<
+ !NumTraits<OldType>::IsComplex && NumTraits<NewType>::IsComplex
+ >::type>
+{
+ EIGEN_DEVICE_FUNC
+ static inline NewType run(const OldType& x)
+ {
+ typedef typename NumTraits<NewType>::Real NewReal;
+ return static_cast<NewType>(static_cast<NewReal>(x));
+ }
+};
+
// here, for once, we're plainly returning NewType: we don't want cast to do weird things.
template<typename OldType, typename NewType>
@@ -407,29 +466,59 @@ inline NewType cast(const OldType& x)
* Implementation of round *
****************************************************************************/
+template<typename Scalar>
+struct round_impl
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
#if EIGEN_HAS_CXX11_MATH
- template<typename Scalar>
- struct round_impl {
- static inline Scalar run(const Scalar& x)
- {
- EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
- using std::round;
- return round(x);
- }
- };
+ EIGEN_USING_STD(round);
+#endif
+ return Scalar(round(x));
+ }
+};
+
+#if !EIGEN_HAS_CXX11_MATH
+#if EIGEN_HAS_C99_MATH
+// Use ::roundf for float.
+template<>
+struct round_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static inline float run(const float& x)
+ {
+ return ::roundf(x);
+ }
+};
#else
- template<typename Scalar>
- struct round_impl
+template<typename Scalar>
+struct round_using_floor_ceil_impl
+{
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(const Scalar& x)
{
- static inline Scalar run(const Scalar& x)
- {
- EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
- EIGEN_USING_STD_MATH(floor);
- EIGEN_USING_STD_MATH(ceil);
- return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5));
+ EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
+ // Without C99 round/roundf, resort to floor/ceil.
+ EIGEN_USING_STD(floor);
+ EIGEN_USING_STD(ceil);
+ // If not enough precision to resolve a decimal at all, return the input.
+ // Otherwise, adding 0.5 can trigger an increment by 1.
+ const Scalar limit = Scalar(1ull << (NumTraits<Scalar>::digits() - 1));
+ if (x >= limit || x <= -limit) {
+ return x;
}
- };
-#endif
+ return (x > Scalar(0)) ? Scalar(floor(x + Scalar(0.5))) : Scalar(ceil(x - Scalar(0.5)));
+ }
+};
+
+template<>
+struct round_impl<float> : round_using_floor_ceil_impl<float> {};
+
+template<>
+struct round_impl<double> : round_using_floor_ceil_impl<double> {};
+#endif // EIGEN_HAS_C99_MATH
+#endif // !EIGEN_HAS_CXX11_MATH
template<typename Scalar>
struct round_retval
@@ -438,43 +527,112 @@ struct round_retval
};
/****************************************************************************
-* Implementation of arg *
+* Implementation of rint *
****************************************************************************/
+template<typename Scalar>
+struct rint_impl {
+ EIGEN_DEVICE_FUNC
+ static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
#if EIGEN_HAS_CXX11_MATH
- template<typename Scalar>
- struct arg_impl {
- static inline Scalar run(const Scalar& x)
- {
- EIGEN_USING_STD_MATH(arg);
- return arg(x);
- }
- };
-#else
- template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
- struct arg_default_impl
+ EIGEN_USING_STD(rint);
+#endif
+ return rint(x);
+ }
+};
+
+#if !EIGEN_HAS_CXX11_MATH
+template<>
+struct rint_impl<double> {
+ EIGEN_DEVICE_FUNC
+ static inline double run(const double& x)
{
- typedef typename NumTraits<Scalar>::Real RealScalar;
- EIGEN_DEVICE_FUNC
- static inline RealScalar run(const Scalar& x)
- {
- return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); }
- };
+ return ::rint(x);
+ }
+};
+template<>
+struct rint_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static inline float run(const float& x)
+ {
+ return ::rintf(x);
+ }
+};
+#endif
- template<typename Scalar>
- struct arg_default_impl<Scalar,true>
+template<typename Scalar>
+struct rint_retval
+{
+ typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of arg *
+****************************************************************************/
+
+// Visual Studio 2017 has a bug where arg(float) returns 0 for negative inputs.
+// This seems to be fixed in VS 2019.
+#if EIGEN_HAS_CXX11_MATH && (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920)
+// std::arg is only defined for types of std::complex, or integer types or float/double/long double
+template<typename Scalar,
+ bool HasStdImpl = NumTraits<Scalar>::IsComplex || is_integral<Scalar>::value
+ || is_same<Scalar, float>::value || is_same<Scalar, double>::value
+ || is_same<Scalar, long double>::value >
+struct arg_default_impl;
+
+template<typename Scalar>
+struct arg_default_impl<Scalar, true> {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
{
- typedef typename NumTraits<Scalar>::Real RealScalar;
- EIGEN_DEVICE_FUNC
- static inline RealScalar run(const Scalar& x)
- {
- EIGEN_USING_STD_MATH(arg);
- return arg(x);
- }
- };
+ #if defined(EIGEN_HIP_DEVICE_COMPILE)
+ // HIP does not seem to have a native device side implementation for the math routine "arg"
+ using std::arg;
+ #else
+ EIGEN_USING_STD(arg);
+ #endif
+ return static_cast<RealScalar>(arg(x));
+ }
+};
- template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {};
+// Must be non-complex floating-point type (e.g. half/bfloat16).
+template<typename Scalar>
+struct arg_default_impl<Scalar, false> {
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return (x < Scalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0);
+ }
+};
+#else
+template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct arg_default_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ return (x < RealScalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0);
+ }
+};
+
+template<typename Scalar>
+struct arg_default_impl<Scalar,true>
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ EIGEN_DEVICE_FUNC
+ static inline RealScalar run(const Scalar& x)
+ {
+ EIGEN_USING_STD(arg);
+ return arg(x);
+ }
+};
#endif
+template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {};
template<typename Scalar>
struct arg_retval
@@ -483,6 +641,80 @@ struct arg_retval
};
/****************************************************************************
+* Implementation of expm1 *
+****************************************************************************/
+
+// This implementation is based on GSL Math's expm1.
+namespace std_fallback {
+ // fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar,
+ // or that there is no suitable std::expm1 function available. Implementation
+ // attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php.
+ template<typename Scalar>
+ EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+
+ EIGEN_USING_STD(exp);
+ Scalar u = exp(x);
+ if (numext::equal_strict(u, Scalar(1))) {
+ return x;
+ }
+ Scalar um1 = u - RealScalar(1);
+ if (numext::equal_strict(um1, Scalar(-1))) {
+ return RealScalar(-1);
+ }
+
+ EIGEN_USING_STD(log);
+ Scalar logu = log(u);
+ return numext::equal_strict(u, logu) ? u : (u - RealScalar(1)) * x / logu;
+ }
+}
+
+template<typename Scalar>
+struct expm1_impl {
+ EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+ #if EIGEN_HAS_CXX11_MATH
+ using std::expm1;
+ #else
+ using std_fallback::expm1;
+ #endif
+ return expm1(x);
+ }
+};
+
+template<typename Scalar>
+struct expm1_retval
+{
+ typedef Scalar type;
+};
+
+/****************************************************************************
+* Implementation of log *
+****************************************************************************/
+
+// Complex log defined in MathFunctionsImpl.h.
+template<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z);
+
+template<typename Scalar>
+struct log_impl {
+ EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
+ {
+ EIGEN_USING_STD(log);
+ return static_cast<Scalar>(log(x));
+ }
+};
+
+template<typename Scalar>
+struct log_impl<std::complex<Scalar> > {
+ EIGEN_DEVICE_FUNC static inline std::complex<Scalar> run(const std::complex<Scalar>& z)
+ {
+ return complex_log(z);
+ }
+};
+
+/****************************************************************************
* Implementation of log1p *
****************************************************************************/
@@ -493,25 +725,38 @@ namespace std_fallback {
EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) {
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
typedef typename NumTraits<Scalar>::Real RealScalar;
- EIGEN_USING_STD_MATH(log);
+ EIGEN_USING_STD(log);
Scalar x1p = RealScalar(1) + x;
- return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
+ Scalar log_1p = log_impl<Scalar>::run(x1p);
+ const bool is_small = numext::equal_strict(x1p, Scalar(1));
+ const bool is_inf = numext::equal_strict(x1p, log_1p);
+ return (is_small || is_inf) ? x : x * (log_1p / (x1p - RealScalar(1)));
}
}
template<typename Scalar>
struct log1p_impl {
- static inline Scalar run(const Scalar& x)
+ EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
{
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
#if EIGEN_HAS_CXX11_MATH
using std::log1p;
- #endif
+ #else
using std_fallback::log1p;
+ #endif
return log1p(x);
}
};
+// Specialization for complex types that are not supported by std::log1p.
+template <typename RealScalar>
+struct log1p_impl<std::complex<RealScalar> > {
+ EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(
+ const std::complex<RealScalar>& x) {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
+ return std_fallback::log1p(x);
+ }
+};
template<typename Scalar>
struct log1p_retval
@@ -530,7 +775,7 @@ struct pow_impl
typedef typename ScalarBinaryOpTraits<ScalarX,ScalarY,internal::scalar_pow_op<ScalarX,ScalarY> >::ReturnType result_type;
static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y)
{
- EIGEN_USING_STD_MATH(pow);
+ EIGEN_USING_STD(pow);
return pow(x, y);
}
};
@@ -640,21 +885,28 @@ template<typename Scalar>
struct random_default_impl<Scalar, false, true>
{
static inline Scalar run(const Scalar& x, const Scalar& y)
- {
- typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
- if(y<x)
+ {
+ if (y <= x)
return x;
- // the following difference might overflow on a 32 bits system,
- // but since y>=x the result converted to an unsigned long is still correct.
- std::size_t range = ScalarX(y)-ScalarX(x);
- std::size_t offset = 0;
- // rejection sampling
- std::size_t divisor = 1;
- std::size_t multiplier = 1;
- if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1);
- else multiplier = 1 + range/(std::size_t(RAND_MAX)+1);
+ // ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself.
+ typedef typename make_unsigned<Scalar>::type ScalarU;
+ // ScalarX is the widest of ScalarU and unsigned int.
+ // We'll deal only with ScalarX and unsigned int below thus avoiding signed
+ // types and arithmetic and signed overflows (which are undefined behavior).
+ typedef typename conditional<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned>::type ScalarX;
+ // The following difference doesn't overflow, provided our integer types are two's
+ // complement and have the same number of padding bits in signed and unsigned variants.
+ // This is the case in most modern implementations of C++.
+ ScalarX range = ScalarX(y) - ScalarX(x);
+ ScalarX offset = 0;
+ ScalarX divisor = 1;
+ ScalarX multiplier = 1;
+ const unsigned rand_max = RAND_MAX;
+ if (range <= rand_max) divisor = (rand_max + 1) / (range + 1);
+ else multiplier = 1 + range / (rand_max + 1);
+ // Rejection sampling.
do {
- offset = (std::size_t(std::rand()) * multiplier) / divisor;
+ offset = (unsigned(std::rand()) * multiplier) / divisor;
} while (offset > range);
return Scalar(ScalarX(x) + offset);
}
@@ -679,8 +931,8 @@ struct random_default_impl<Scalar, true, false>
{
static inline Scalar run(const Scalar& x, const Scalar& y)
{
- return Scalar(random(real(x), real(y)),
- random(imag(x), imag(y)));
+ return Scalar(random(x.real(), y.real()),
+ random(x.imag(), y.imag()));
}
static inline Scalar run()
{
@@ -701,7 +953,7 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
return EIGEN_MATHFUNC_IMPL(random, Scalar)::run();
}
-// Implementatin of is* functions
+// Implementation of is* functions
// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang.
#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG)
@@ -730,7 +982,7 @@ EIGEN_DEVICE_FUNC
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
isfinite_impl(const T& x)
{
- #ifdef __CUDA_ARCH__
+ #if defined(EIGEN_GPU_COMPILE_PHASE)
return (::isfinite)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isfinite;
@@ -745,7 +997,7 @@ EIGEN_DEVICE_FUNC
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
isinf_impl(const T& x)
{
- #ifdef __CUDA_ARCH__
+ #if defined(EIGEN_GPU_COMPILE_PHASE)
return (::isinf)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isinf;
@@ -760,7 +1012,7 @@ EIGEN_DEVICE_FUNC
typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type
isnan_impl(const T& x)
{
- #ifdef __CUDA_ARCH__
+ #if defined(EIGEN_GPU_COMPILE_PHASE)
return (::isnan)(x);
#elif EIGEN_USE_STD_FPCLASSIFY
using std::isnan;
@@ -817,7 +1069,6 @@ template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x)
template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
template<typename T> T generic_fast_tanh_float(const T& a_x);
-
} // end namespace internal
/****************************************************************************
@@ -826,12 +1077,12 @@ template<typename T> T generic_fast_tanh_float(const T& a_x);
namespace numext {
-#ifndef __CUDA_ARCH__
+#if (!defined(EIGEN_GPUCC) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
{
- EIGEN_USING_STD_MATH(min);
+ EIGEN_USING_STD(min)
return min EIGEN_NOT_A_MACRO (x,y);
}
@@ -839,7 +1090,7 @@ template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
{
- EIGEN_USING_STD_MATH(max);
+ EIGEN_USING_STD(max)
return max EIGEN_NOT_A_MACRO (x,y);
}
#else
@@ -855,6 +1106,24 @@ EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
{
return fminf(x, y);
}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y)
+{
+ return fmin(x, y);
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y)
+{
+#if defined(EIGEN_HIPCC)
+ // no "fminl" on HIP yet
+ return (x < y) ? x : y;
+#else
+ return fminl(x, y);
+#endif
+}
+
template<typename T>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
@@ -867,6 +1136,92 @@ EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
{
return fmaxf(x, y);
}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y)
+{
+ return fmax(x, y);
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y)
+{
+#if defined(EIGEN_HIPCC)
+ // no "fmaxl" on HIP yet
+ return (x > y) ? x : y;
+#else
+ return fmaxl(x, y);
+#endif
+}
+#endif
+
+#if defined(SYCL_DEVICE_ONLY)
+
+
+#define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \
+ SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_char) \
+ SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_short) \
+ SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_int) \
+ SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_long)
+#define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \
+ SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_char) \
+ SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_short) \
+ SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_int) \
+ SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_long)
+#define SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \
+ SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_uchar) \
+ SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_ushort) \
+ SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_uint) \
+ SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_ulong)
+#define SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \
+ SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_uchar) \
+ SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_ushort) \
+ SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_uint) \
+ SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_ulong)
+#define SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(NAME, FUNC) \
+ SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \
+ SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY(NAME, FUNC)
+#define SYCL_SPECIALIZE_INTEGER_TYPES_UNARY(NAME, FUNC) \
+ SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \
+ SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY(NAME, FUNC)
+#define SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(NAME, FUNC) \
+ SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_float) \
+ SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC,cl::sycl::cl_double)
+#define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(NAME, FUNC) \
+ SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_float) \
+ SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC,cl::sycl::cl_double)
+#define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(NAME, FUNC, RET_TYPE) \
+ SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_float) \
+ SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_double)
+
+#define SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE) \
+template<> \
+ EIGEN_DEVICE_FUNC \
+ EIGEN_ALWAYS_INLINE RET_TYPE NAME(const ARG_TYPE& x) { \
+ return cl::sycl::FUNC(x); \
+ }
+
+#define SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, TYPE) \
+ SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, TYPE, TYPE)
+
+#define SYCL_SPECIALIZE_GEN1_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE1, ARG_TYPE2) \
+ template<> \
+ EIGEN_DEVICE_FUNC \
+ EIGEN_ALWAYS_INLINE RET_TYPE NAME(const ARG_TYPE1& x, const ARG_TYPE2& y) { \
+ return cl::sycl::FUNC(x, y); \
+ }
+
+#define SYCL_SPECIALIZE_GEN2_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE) \
+ SYCL_SPECIALIZE_GEN1_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE, ARG_TYPE)
+
+#define SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, TYPE) \
+ SYCL_SPECIALIZE_GEN2_BINARY_FUNC(NAME, FUNC, TYPE, TYPE)
+
+SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(mini, min)
+SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(mini, fmin)
+SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(maxi, max)
+SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(maxi, fmax)
+
#endif
@@ -933,6 +1288,37 @@ inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
}
+EIGEN_DEVICE_FUNC
+inline bool abs2(bool x) { return x; }
+
+template<typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE T absdiff(const T& x, const T& y)
+{
+ return x > y ? x - y : y - x;
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE float absdiff(const float& x, const float& y)
+{
+ return fabsf(x - y);
+}
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE double absdiff(const double& x, const double& y)
+{
+ return fabs(x - y);
+}
+
+#if !defined(EIGEN_GPUCC)
+// HIP and CUDA do not support long double.
+template<>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE long double absdiff(const long double& x, const long double& y) {
+ return fabsl(x - y);
+}
+#endif
+
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
@@ -947,6 +1333,10 @@ inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar&
return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
}
+#if defined(SYCL_DEVICE_ONLY)
+ SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(hypot, hypot)
+#endif
+
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
@@ -954,7 +1344,11 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
}
-#ifdef __CUDACC__
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log1p, log1p)
+#endif
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float log1p(const float &x) { return ::log1pf(x); }
@@ -969,10 +1363,27 @@ inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const Scala
return internal::pow_impl<ScalarX,ScalarY>::run(x, y);
}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(pow, pow)
+#endif
+
template<typename T> EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); }
template<typename T> EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); }
template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); }
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isnan, isnan, bool)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isinf, isinf, bool)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isfinite, isfinite, bool)
+#endif
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(rint, Scalar) rint(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(rint, Scalar)::run(x);
+}
+
template<typename Scalar>
EIGEN_DEVICE_FUNC
inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
@@ -980,15 +1391,23 @@ inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);
}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round)
+#endif
+
template<typename T>
EIGEN_DEVICE_FUNC
T (floor)(const T& x)
{
- EIGEN_USING_STD_MATH(floor);
+ EIGEN_USING_STD(floor)
return floor(x);
}
-#ifdef __CUDACC__
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(floor, floor)
+#endif
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float floor(const float &x) { return ::floorf(x); }
@@ -1000,11 +1419,15 @@ template<typename T>
EIGEN_DEVICE_FUNC
T (ceil)(const T& x)
{
- EIGEN_USING_STD_MATH(ceil);
+ EIGEN_USING_STD(ceil);
return ceil(x);
}
-#ifdef __CUDACC__
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil)
+#endif
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float ceil(const float &x) { return ::ceilf(x); }
@@ -1030,28 +1453,49 @@ inline int log2(int x)
/** \returns the square root of \a x.
*
- * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
+ * It is essentially equivalent to
+ * \code using std::sqrt; return sqrt(x); \endcode
* but slightly faster for float/double and some compilers (e.g., gcc), thanks to
* specializations when SSE is enabled.
*
* It's usage is justified in performance critical functions, like norm/normalize.
*/
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE EIGEN_MATHFUNC_RETVAL(sqrt, Scalar) sqrt(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(sqrt, Scalar)::run(x);
+}
+
+// Boolean specialization, avoids implicit float to bool conversion (-Wimplicit-conversion-floating-point-to-bool).
+template<>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC
+bool sqrt<bool>(const bool &x) { return x; }
+
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sqrt, sqrt)
+#endif
+
+/** \returns the reciprocal square root of \a x. **/
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-T sqrt(const T &x)
+T rsqrt(const T& x)
{
- EIGEN_USING_STD_MATH(sqrt);
- return sqrt(x);
+ return internal::rsqrt_impl<T>::run(x);
}
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T log(const T &x) {
- EIGEN_USING_STD_MATH(log);
- return log(x);
+ return internal::log_impl<T>::run(x);
}
-#ifdef __CUDACC__
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log, log)
+#endif
+
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float log(const float &x) { return ::logf(x); }
@@ -1063,7 +1507,7 @@ template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
abs(const T &x) {
- EIGEN_USING_STD_MATH(abs);
+ EIGEN_USING_STD(abs);
return abs(x);
}
@@ -1074,12 +1518,12 @@ abs(const T &x) {
return x;
}
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); }
-EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_INTEGER_TYPES_UNARY(abs, abs)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(abs, fabs)
+#endif
-#ifdef __CUDACC__
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float abs(const float &x) { return ::fabsf(x); }
@@ -1100,26 +1544,69 @@ double abs(const std::complex<double>& x) {
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T exp(const T &x) {
- EIGEN_USING_STD_MATH(exp);
+ EIGEN_USING_STD(exp);
return exp(x);
}
-#ifdef __CUDACC__
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp, exp)
+#endif
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float exp(const float &x) { return ::expf(x); }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double exp(const double &x) { return ::exp(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+std::complex<float> exp(const std::complex<float>& x) {
+ float com = ::expf(x.real());
+ float res_real = com * ::cosf(x.imag());
+ float res_imag = com * ::sinf(x.imag());
+ return std::complex<float>(res_real, res_imag);
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+std::complex<double> exp(const std::complex<double>& x) {
+ double com = ::exp(x.real());
+ double res_real = com * ::cos(x.imag());
+ double res_imag = com * ::sin(x.imag());
+ return std::complex<double>(res_real, res_imag);
+}
+#endif
+
+template<typename Scalar>
+EIGEN_DEVICE_FUNC
+inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x)
+{
+ return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x);
+}
+
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(expm1, expm1)
+#endif
+
+#if defined(EIGEN_GPUCC)
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float expm1(const float &x) { return ::expm1f(x); }
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double expm1(const double &x) { return ::expm1(x); }
#endif
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T cos(const T &x) {
- EIGEN_USING_STD_MATH(cos);
+ EIGEN_USING_STD(cos);
return cos(x);
}
-#ifdef __CUDACC__
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cos,cos)
+#endif
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float cos(const float &x) { return ::cosf(x); }
@@ -1130,11 +1617,15 @@ double cos(const double &x) { return ::cos(x); }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T sin(const T &x) {
- EIGEN_USING_STD_MATH(sin);
+ EIGEN_USING_STD(sin);
return sin(x);
}
-#ifdef __CUDACC__
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sin, sin)
+#endif
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float sin(const float &x) { return ::sinf(x); }
@@ -1145,11 +1636,15 @@ double sin(const double &x) { return ::sin(x); }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T tan(const T &x) {
- EIGEN_USING_STD_MATH(tan);
+ EIGEN_USING_STD(tan);
return tan(x);
}
-#ifdef __CUDACC__
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tan, tan)
+#endif
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tan(const float &x) { return ::tanf(x); }
@@ -1160,11 +1655,25 @@ double tan(const double &x) { return ::tan(x); }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T acos(const T &x) {
- EIGEN_USING_STD_MATH(acos);
+ EIGEN_USING_STD(acos);
return acos(x);
}
-#ifdef __CUDACC__
+#if EIGEN_HAS_CXX11_MATH
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T acosh(const T &x) {
+ EIGEN_USING_STD(acosh);
+ return static_cast<T>(acosh(x));
+}
+#endif
+
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acos, acos)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acosh, acosh)
+#endif
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float acos(const float &x) { return ::acosf(x); }
@@ -1175,11 +1684,25 @@ double acos(const double &x) { return ::acos(x); }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T asin(const T &x) {
- EIGEN_USING_STD_MATH(asin);
+ EIGEN_USING_STD(asin);
return asin(x);
}
-#ifdef __CUDACC__
+#if EIGEN_HAS_CXX11_MATH
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T asinh(const T &x) {
+ EIGEN_USING_STD(asinh);
+ return static_cast<T>(asinh(x));
+}
+#endif
+
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asin, asin)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asinh, asinh)
+#endif
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float asin(const float &x) { return ::asinf(x); }
@@ -1190,11 +1713,25 @@ double asin(const double &x) { return ::asin(x); }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T atan(const T &x) {
- EIGEN_USING_STD_MATH(atan);
- return atan(x);
+ EIGEN_USING_STD(atan);
+ return static_cast<T>(atan(x));
+}
+
+#if EIGEN_HAS_CXX11_MATH
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T atanh(const T &x) {
+ EIGEN_USING_STD(atanh);
+ return static_cast<T>(atanh(x));
}
+#endif
+
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atan, atan)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atanh, atanh)
+#endif
-#ifdef __CUDACC__
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float atan(const float &x) { return ::atanf(x); }
@@ -1206,11 +1743,15 @@ double atan(const double &x) { return ::atan(x); }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T cosh(const T &x) {
- EIGEN_USING_STD_MATH(cosh);
- return cosh(x);
+ EIGEN_USING_STD(cosh);
+ return static_cast<T>(cosh(x));
}
-#ifdef __CUDACC__
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cosh, cosh)
+#endif
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float cosh(const float &x) { return ::coshf(x); }
@@ -1221,11 +1762,15 @@ double cosh(const double &x) { return ::cosh(x); }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T sinh(const T &x) {
- EIGEN_USING_STD_MATH(sinh);
- return sinh(x);
+ EIGEN_USING_STD(sinh);
+ return static_cast<T>(sinh(x));
}
-#ifdef __CUDACC__
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sinh, sinh)
+#endif
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float sinh(const float &x) { return ::sinhf(x); }
@@ -1236,16 +1781,20 @@ double sinh(const double &x) { return ::sinh(x); }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T tanh(const T &x) {
- EIGEN_USING_STD_MATH(tanh);
+ EIGEN_USING_STD(tanh);
return tanh(x);
}
-#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH
+#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY)
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tanh(float x) { return internal::generic_fast_tanh_float(x); }
#endif
-#ifdef __CUDACC__
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tanh, tanh)
+#endif
+
+#if defined(EIGEN_GPUCC)
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float tanh(const float &x) { return ::tanhf(x); }
@@ -1256,11 +1805,15 @@ double tanh(const double &x) { return ::tanh(x); }
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T fmod(const T& a, const T& b) {
- EIGEN_USING_STD_MATH(fmod);
+ EIGEN_USING_STD(fmod);
return fmod(a, b);
}
-#ifdef __CUDACC__
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(fmod, fmod)
+#endif
+
+#if defined(EIGEN_GPUCC)
template <>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float fmod(const float& a, const float& b) {
@@ -1274,6 +1827,23 @@ double fmod(const double& a, const double& b) {
}
#endif
+#if defined(SYCL_DEVICE_ONLY)
+#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY
+#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY
+#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY
+#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY
+#undef SYCL_SPECIALIZE_INTEGER_TYPES_BINARY
+#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY
+#undef SYCL_SPECIALIZE_FLOATING_TYPES_BINARY
+#undef SYCL_SPECIALIZE_FLOATING_TYPES_UNARY
+#undef SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE
+#undef SYCL_SPECIALIZE_GEN_UNARY_FUNC
+#undef SYCL_SPECIALIZE_UNARY_FUNC
+#undef SYCL_SPECIALIZE_GEN1_BINARY_FUNC
+#undef SYCL_SPECIALIZE_GEN2_BINARY_FUNC
+#undef SYCL_SPECIALIZE_BINARY_FUNC
+#endif
+
} // end namespace numext
namespace internal {
@@ -1397,18 +1967,23 @@ template<> struct random_impl<bool>
{
return random<int>(0,1)==0 ? false : true;
}
+
+ static inline bool run(const bool& a, const bool& b)
+ {
+ return random<int>(a, b)==0 ? false : true;
+ }
};
template<> struct scalar_fuzzy_impl<bool>
{
typedef bool RealScalar;
-
+
template<typename OtherScalar> EIGEN_DEVICE_FUNC
static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)
{
return !x;
}
-
+
EIGEN_DEVICE_FUNC
static inline bool isApprox(bool x, bool y, bool)
{
@@ -1420,10 +1995,61 @@ template<> struct scalar_fuzzy_impl<bool>
{
return (!x) || y;
}
-
+
+};
+
+} // end namespace internal
+
+// Default implementations that rely on other numext implementations
+namespace internal {
+
+// Specialization for complex types that are not supported by std::expm1.
+template <typename RealScalar>
+struct expm1_impl<std::complex<RealScalar> > {
+ EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(
+ const std::complex<RealScalar>& x) {
+ EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
+ RealScalar xr = x.real();
+ RealScalar xi = x.imag();
+ // expm1(z) = exp(z) - 1
+ // = exp(x + i * y) - 1
+ // = exp(x) * (cos(y) + i * sin(y)) - 1
+ // = exp(x) * cos(y) - 1 + i * exp(x) * sin(y)
+ // Imag(expm1(z)) = exp(x) * sin(y)
+ // Real(expm1(z)) = exp(x) * cos(y) - 1
+ // = exp(x) * cos(y) - 1.
+ // = expm1(x) + exp(x) * (cos(y) - 1)
+ // = expm1(x) + exp(x) * (2 * sin(y / 2) ** 2)
+ RealScalar erm1 = numext::expm1<RealScalar>(xr);
+ RealScalar er = erm1 + RealScalar(1.);
+ RealScalar sin2 = numext::sin(xi / RealScalar(2.));
+ sin2 = sin2 * sin2;
+ RealScalar s = numext::sin(xi);
+ RealScalar real_part = erm1 - RealScalar(2.) * er * sin2;
+ return std::complex<RealScalar>(real_part, er * s);
+ }
+};
+
+template<typename T>
+struct rsqrt_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_ALWAYS_INLINE T run(const T& x) {
+ return T(1)/numext::sqrt(x);
+ }
+};
+
+#if defined(EIGEN_GPU_COMPILE_PHASE)
+template<typename T>
+struct conj_impl<std::complex<T>, true>
+{
+ EIGEN_DEVICE_FUNC
+ static inline std::complex<T> run(const std::complex<T>& x)
+ {
+ return std::complex<T>(numext::real(x), -numext::imag(x));
+ }
};
+#endif
-
} // end namespace internal
} // end namespace Eigen