diff options
Diffstat (limited to 'Eigen/src/Core/MathFunctions.h')
-rw-r--r-- | Eigen/src/Core/MathFunctions.h | 954 |
1 files changed, 790 insertions, 164 deletions
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index a648aa0fa..61b78f4f2 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -10,10 +11,11 @@ #ifndef EIGEN_MATHFUNCTIONS_H #define EIGEN_MATHFUNCTIONS_H -// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html // TODO this should better be moved to NumTraits -#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L - +// Source: WolframAlpha +#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L +#define EIGEN_LOG2E 1.442695040888963407359924681001892137426645954152985934135449406931109219L +#define EIGEN_LN2 0.693147180559945309417232121458176568075500134360255254120680009493393621L namespace Eigen { @@ -97,7 +99,7 @@ struct real_default_impl<Scalar,true> template<typename Scalar> struct real_impl : real_default_impl<Scalar> {}; -#ifdef __CUDA_ARCH__ +#if defined(EIGEN_GPU_COMPILE_PHASE) template<typename T> struct real_impl<std::complex<T> > { @@ -145,7 +147,7 @@ struct imag_default_impl<Scalar,true> template<typename Scalar> struct imag_impl : imag_default_impl<Scalar> {}; -#ifdef __CUDA_ARCH__ +#if defined(EIGEN_GPU_COMPILE_PHASE) template<typename T> struct imag_impl<std::complex<T> > { @@ -213,12 +215,12 @@ struct imag_ref_default_impl template<typename Scalar> struct imag_ref_default_impl<Scalar, false> { - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline Scalar run(Scalar&) { return Scalar(0); } - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR static inline const Scalar run(const Scalar&) { return Scalar(0); @@ -239,7 +241,7 @@ struct imag_ref_retval ****************************************************************************/ template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex> -struct conj_impl +struct conj_default_impl { EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) @@ -249,7 +251,7 @@ struct conj_impl }; template<typename Scalar> -struct conj_impl<Scalar,true> +struct conj_default_impl<Scalar,true> { EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) @@ -259,6 +261,9 @@ struct conj_impl<Scalar,true> } }; +template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex> +struct conj_impl : conj_default_impl<Scalar, IsComplex> {}; + template<typename Scalar> struct conj_retval { @@ -287,7 +292,7 @@ struct abs2_impl_default<Scalar, true> // IsComplex EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { - return real(x)*real(x) + imag(x)*imag(x); + return x.real()*x.real() + x.imag()*x.imag(); } }; @@ -309,18 +314,80 @@ struct abs2_retval }; /**************************************************************************** +* Implementation of sqrt/rsqrt * +****************************************************************************/ + +template<typename Scalar> +struct sqrt_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_ALWAYS_INLINE Scalar run(const Scalar& x) + { + EIGEN_USING_STD(sqrt); + return sqrt(x); + } +}; + +// Complex sqrt defined in MathFunctionsImpl.h. +template<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& a_x); + +// Custom implementation is faster than `std::sqrt`, works on +// GPU, and correctly handles special cases (unlike MSVC). +template<typename T> +struct sqrt_impl<std::complex<T> > +{ + EIGEN_DEVICE_FUNC + static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) + { + return complex_sqrt<T>(x); + } +}; + +template<typename Scalar> +struct sqrt_retval +{ + typedef Scalar type; +}; + +// Default implementation relies on numext::sqrt, at bottom of file. +template<typename T> +struct rsqrt_impl; + +// Complex rsqrt defined in MathFunctionsImpl.h. +template<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& a_x); + +template<typename T> +struct rsqrt_impl<std::complex<T> > +{ + EIGEN_DEVICE_FUNC + static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) + { + return complex_rsqrt<T>(x); + } +}; + +template<typename Scalar> +struct rsqrt_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** * Implementation of norm1 * ****************************************************************************/ template<typename Scalar, bool IsComplex> -struct norm1_default_impl +struct norm1_default_impl; + +template<typename Scalar> +struct norm1_default_impl<Scalar,true> { typedef typename NumTraits<Scalar>::Real RealScalar; EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { - EIGEN_USING_STD_MATH(abs); - return abs(real(x)) + abs(imag(x)); + EIGEN_USING_STD(abs); + return abs(x.real()) + abs(x.imag()); } }; @@ -330,7 +397,7 @@ struct norm1_default_impl<Scalar, false> EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { - EIGEN_USING_STD_MATH(abs); + EIGEN_USING_STD(abs); return abs(x); } }; @@ -348,31 +415,7 @@ struct norm1_retval * Implementation of hypot * ****************************************************************************/ -template<typename Scalar> -struct hypot_impl -{ - typedef typename NumTraits<Scalar>::Real RealScalar; - static inline RealScalar run(const Scalar& x, const Scalar& y) - { - EIGEN_USING_STD_MATH(abs); - EIGEN_USING_STD_MATH(sqrt); - RealScalar _x = abs(x); - RealScalar _y = abs(y); - Scalar p, qp; - if(_x>_y) - { - p = _x; - qp = _y / p; - } - else - { - p = _y; - qp = _x / p; - } - if(p==RealScalar(0)) return RealScalar(0); - return p * sqrt(RealScalar(1) + qp*qp); - } -}; +template<typename Scalar> struct hypot_impl; template<typename Scalar> struct hypot_retval @@ -384,7 +427,7 @@ struct hypot_retval * Implementation of cast * ****************************************************************************/ -template<typename OldType, typename NewType> +template<typename OldType, typename NewType, typename EnableIf = void> struct cast_impl { EIGEN_DEVICE_FUNC @@ -394,6 +437,22 @@ struct cast_impl } }; +// Casting from S -> Complex<T> leads to an implicit conversion from S to T, +// generating warnings on clang. Here we explicitly cast the real component. +template<typename OldType, typename NewType> +struct cast_impl<OldType, NewType, + typename internal::enable_if< + !NumTraits<OldType>::IsComplex && NumTraits<NewType>::IsComplex + >::type> +{ + EIGEN_DEVICE_FUNC + static inline NewType run(const OldType& x) + { + typedef typename NumTraits<NewType>::Real NewReal; + return static_cast<NewType>(static_cast<NewReal>(x)); + } +}; + // here, for once, we're plainly returning NewType: we don't want cast to do weird things. template<typename OldType, typename NewType> @@ -407,29 +466,59 @@ inline NewType cast(const OldType& x) * Implementation of round * ****************************************************************************/ +template<typename Scalar> +struct round_impl +{ + EIGEN_DEVICE_FUNC + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) #if EIGEN_HAS_CXX11_MATH - template<typename Scalar> - struct round_impl { - static inline Scalar run(const Scalar& x) - { - EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) - using std::round; - return round(x); - } - }; + EIGEN_USING_STD(round); +#endif + return Scalar(round(x)); + } +}; + +#if !EIGEN_HAS_CXX11_MATH +#if EIGEN_HAS_C99_MATH +// Use ::roundf for float. +template<> +struct round_impl<float> { + EIGEN_DEVICE_FUNC + static inline float run(const float& x) + { + return ::roundf(x); + } +}; #else - template<typename Scalar> - struct round_impl +template<typename Scalar> +struct round_using_floor_ceil_impl +{ + EIGEN_DEVICE_FUNC + static inline Scalar run(const Scalar& x) { - static inline Scalar run(const Scalar& x) - { - EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) - EIGEN_USING_STD_MATH(floor); - EIGEN_USING_STD_MATH(ceil); - return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5)); + EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) + // Without C99 round/roundf, resort to floor/ceil. + EIGEN_USING_STD(floor); + EIGEN_USING_STD(ceil); + // If not enough precision to resolve a decimal at all, return the input. + // Otherwise, adding 0.5 can trigger an increment by 1. + const Scalar limit = Scalar(1ull << (NumTraits<Scalar>::digits() - 1)); + if (x >= limit || x <= -limit) { + return x; } - }; -#endif + return (x > Scalar(0)) ? Scalar(floor(x + Scalar(0.5))) : Scalar(ceil(x - Scalar(0.5))); + } +}; + +template<> +struct round_impl<float> : round_using_floor_ceil_impl<float> {}; + +template<> +struct round_impl<double> : round_using_floor_ceil_impl<double> {}; +#endif // EIGEN_HAS_C99_MATH +#endif // !EIGEN_HAS_CXX11_MATH template<typename Scalar> struct round_retval @@ -438,43 +527,112 @@ struct round_retval }; /**************************************************************************** -* Implementation of arg * +* Implementation of rint * ****************************************************************************/ +template<typename Scalar> +struct rint_impl { + EIGEN_DEVICE_FUNC + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) #if EIGEN_HAS_CXX11_MATH - template<typename Scalar> - struct arg_impl { - static inline Scalar run(const Scalar& x) - { - EIGEN_USING_STD_MATH(arg); - return arg(x); - } - }; -#else - template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex> - struct arg_default_impl + EIGEN_USING_STD(rint); +#endif + return rint(x); + } +}; + +#if !EIGEN_HAS_CXX11_MATH +template<> +struct rint_impl<double> { + EIGEN_DEVICE_FUNC + static inline double run(const double& x) { - typedef typename NumTraits<Scalar>::Real RealScalar; - EIGEN_DEVICE_FUNC - static inline RealScalar run(const Scalar& x) - { - return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); } - }; + return ::rint(x); + } +}; +template<> +struct rint_impl<float> { + EIGEN_DEVICE_FUNC + static inline float run(const float& x) + { + return ::rintf(x); + } +}; +#endif - template<typename Scalar> - struct arg_default_impl<Scalar,true> +template<typename Scalar> +struct rint_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of arg * +****************************************************************************/ + +// Visual Studio 2017 has a bug where arg(float) returns 0 for negative inputs. +// This seems to be fixed in VS 2019. +#if EIGEN_HAS_CXX11_MATH && (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920) +// std::arg is only defined for types of std::complex, or integer types or float/double/long double +template<typename Scalar, + bool HasStdImpl = NumTraits<Scalar>::IsComplex || is_integral<Scalar>::value + || is_same<Scalar, float>::value || is_same<Scalar, double>::value + || is_same<Scalar, long double>::value > +struct arg_default_impl; + +template<typename Scalar> +struct arg_default_impl<Scalar, true> { + typedef typename NumTraits<Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) { - typedef typename NumTraits<Scalar>::Real RealScalar; - EIGEN_DEVICE_FUNC - static inline RealScalar run(const Scalar& x) - { - EIGEN_USING_STD_MATH(arg); - return arg(x); - } - }; + #if defined(EIGEN_HIP_DEVICE_COMPILE) + // HIP does not seem to have a native device side implementation for the math routine "arg" + using std::arg; + #else + EIGEN_USING_STD(arg); + #endif + return static_cast<RealScalar>(arg(x)); + } +}; - template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {}; +// Must be non-complex floating-point type (e.g. half/bfloat16). +template<typename Scalar> +struct arg_default_impl<Scalar, false> { + typedef typename NumTraits<Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return (x < Scalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0); + } +}; +#else +template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex> +struct arg_default_impl +{ + typedef typename NumTraits<Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return (x < RealScalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0); + } +}; + +template<typename Scalar> +struct arg_default_impl<Scalar,true> +{ + typedef typename NumTraits<Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + EIGEN_USING_STD(arg); + return arg(x); + } +}; #endif +template<typename Scalar> struct arg_impl : arg_default_impl<Scalar> {}; template<typename Scalar> struct arg_retval @@ -483,6 +641,80 @@ struct arg_retval }; /**************************************************************************** +* Implementation of expm1 * +****************************************************************************/ + +// This implementation is based on GSL Math's expm1. +namespace std_fallback { + // fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar, + // or that there is no suitable std::expm1 function available. Implementation + // attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php. + template<typename Scalar> + EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + typedef typename NumTraits<Scalar>::Real RealScalar; + + EIGEN_USING_STD(exp); + Scalar u = exp(x); + if (numext::equal_strict(u, Scalar(1))) { + return x; + } + Scalar um1 = u - RealScalar(1); + if (numext::equal_strict(um1, Scalar(-1))) { + return RealScalar(-1); + } + + EIGEN_USING_STD(log); + Scalar logu = log(u); + return numext::equal_strict(u, logu) ? u : (u - RealScalar(1)) * x / logu; + } +} + +template<typename Scalar> +struct expm1_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + #if EIGEN_HAS_CXX11_MATH + using std::expm1; + #else + using std_fallback::expm1; + #endif + return expm1(x); + } +}; + +template<typename Scalar> +struct expm1_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of log * +****************************************************************************/ + +// Complex log defined in MathFunctionsImpl.h. +template<typename T> EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z); + +template<typename Scalar> +struct log_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) + { + EIGEN_USING_STD(log); + return static_cast<Scalar>(log(x)); + } +}; + +template<typename Scalar> +struct log_impl<std::complex<Scalar> > { + EIGEN_DEVICE_FUNC static inline std::complex<Scalar> run(const std::complex<Scalar>& z) + { + return complex_log(z); + } +}; + +/**************************************************************************** * Implementation of log1p * ****************************************************************************/ @@ -493,25 +725,38 @@ namespace std_fallback { EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) typedef typename NumTraits<Scalar>::Real RealScalar; - EIGEN_USING_STD_MATH(log); + EIGEN_USING_STD(log); Scalar x1p = RealScalar(1) + x; - return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); + Scalar log_1p = log_impl<Scalar>::run(x1p); + const bool is_small = numext::equal_strict(x1p, Scalar(1)); + const bool is_inf = numext::equal_strict(x1p, log_1p); + return (is_small || is_inf) ? x : x * (log_1p / (x1p - RealScalar(1))); } } template<typename Scalar> struct log1p_impl { - static inline Scalar run(const Scalar& x) + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) #if EIGEN_HAS_CXX11_MATH using std::log1p; - #endif + #else using std_fallback::log1p; + #endif return log1p(x); } }; +// Specialization for complex types that are not supported by std::log1p. +template <typename RealScalar> +struct log1p_impl<std::complex<RealScalar> > { + EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run( + const std::complex<RealScalar>& x) { + EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar) + return std_fallback::log1p(x); + } +}; template<typename Scalar> struct log1p_retval @@ -530,7 +775,7 @@ struct pow_impl typedef typename ScalarBinaryOpTraits<ScalarX,ScalarY,internal::scalar_pow_op<ScalarX,ScalarY> >::ReturnType result_type; static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y) { - EIGEN_USING_STD_MATH(pow); + EIGEN_USING_STD(pow); return pow(x, y); } }; @@ -640,21 +885,28 @@ template<typename Scalar> struct random_default_impl<Scalar, false, true> { static inline Scalar run(const Scalar& x, const Scalar& y) - { - typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; - if(y<x) + { + if (y <= x) return x; - // the following difference might overflow on a 32 bits system, - // but since y>=x the result converted to an unsigned long is still correct. - std::size_t range = ScalarX(y)-ScalarX(x); - std::size_t offset = 0; - // rejection sampling - std::size_t divisor = 1; - std::size_t multiplier = 1; - if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1); - else multiplier = 1 + range/(std::size_t(RAND_MAX)+1); + // ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself. + typedef typename make_unsigned<Scalar>::type ScalarU; + // ScalarX is the widest of ScalarU and unsigned int. + // We'll deal only with ScalarX and unsigned int below thus avoiding signed + // types and arithmetic and signed overflows (which are undefined behavior). + typedef typename conditional<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned>::type ScalarX; + // The following difference doesn't overflow, provided our integer types are two's + // complement and have the same number of padding bits in signed and unsigned variants. + // This is the case in most modern implementations of C++. + ScalarX range = ScalarX(y) - ScalarX(x); + ScalarX offset = 0; + ScalarX divisor = 1; + ScalarX multiplier = 1; + const unsigned rand_max = RAND_MAX; + if (range <= rand_max) divisor = (rand_max + 1) / (range + 1); + else multiplier = 1 + range / (rand_max + 1); + // Rejection sampling. do { - offset = (std::size_t(std::rand()) * multiplier) / divisor; + offset = (unsigned(std::rand()) * multiplier) / divisor; } while (offset > range); return Scalar(ScalarX(x) + offset); } @@ -679,8 +931,8 @@ struct random_default_impl<Scalar, true, false> { static inline Scalar run(const Scalar& x, const Scalar& y) { - return Scalar(random(real(x), real(y)), - random(imag(x), imag(y))); + return Scalar(random(x.real(), y.real()), + random(x.imag(), y.imag())); } static inline Scalar run() { @@ -701,7 +953,7 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); } -// Implementatin of is* functions +// Implementation of is* functions // std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang. #if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG) @@ -730,7 +982,7 @@ EIGEN_DEVICE_FUNC typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type isfinite_impl(const T& x) { - #ifdef __CUDA_ARCH__ + #if defined(EIGEN_GPU_COMPILE_PHASE) return (::isfinite)(x); #elif EIGEN_USE_STD_FPCLASSIFY using std::isfinite; @@ -745,7 +997,7 @@ EIGEN_DEVICE_FUNC typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type isinf_impl(const T& x) { - #ifdef __CUDA_ARCH__ + #if defined(EIGEN_GPU_COMPILE_PHASE) return (::isinf)(x); #elif EIGEN_USE_STD_FPCLASSIFY using std::isinf; @@ -760,7 +1012,7 @@ EIGEN_DEVICE_FUNC typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>::IsComplex),bool>::type isnan_impl(const T& x) { - #ifdef __CUDA_ARCH__ + #if defined(EIGEN_GPU_COMPILE_PHASE) return (::isnan)(x); #elif EIGEN_USE_STD_FPCLASSIFY using std::isnan; @@ -817,7 +1069,6 @@ template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x) template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x); template<typename T> T generic_fast_tanh_float(const T& a_x); - } // end namespace internal /**************************************************************************** @@ -826,12 +1077,12 @@ template<typename T> T generic_fast_tanh_float(const T& a_x); namespace numext { -#ifndef __CUDA_ARCH__ +#if (!defined(EIGEN_GPUCC) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)) template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) { - EIGEN_USING_STD_MATH(min); + EIGEN_USING_STD(min) return min EIGEN_NOT_A_MACRO (x,y); } @@ -839,7 +1090,7 @@ template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) { - EIGEN_USING_STD_MATH(max); + EIGEN_USING_STD(max) return max EIGEN_NOT_A_MACRO (x,y); } #else @@ -855,6 +1106,24 @@ EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) { return fminf(x, y); } +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y) +{ + return fmin(x, y); +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y) +{ +#if defined(EIGEN_HIPCC) + // no "fminl" on HIP yet + return (x < y) ? x : y; +#else + return fminl(x, y); +#endif +} + template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) @@ -867,6 +1136,92 @@ EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) { return fmaxf(x, y); } +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y) +{ + return fmax(x, y); +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y) +{ +#if defined(EIGEN_HIPCC) + // no "fmaxl" on HIP yet + return (x > y) ? x : y; +#else + return fmaxl(x, y); +#endif +} +#endif + +#if defined(SYCL_DEVICE_ONLY) + + +#define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_char) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_short) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_int) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_long) +#define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_char) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_short) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_int) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_long) +#define SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_uchar) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_ushort) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_uint) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_ulong) +#define SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_uchar) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_ushort) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_uint) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_ulong) +#define SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(NAME, FUNC) \ + SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \ + SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) +#define SYCL_SPECIALIZE_INTEGER_TYPES_UNARY(NAME, FUNC) \ + SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \ + SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) +#define SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(NAME, FUNC) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_float) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC,cl::sycl::cl_double) +#define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(NAME, FUNC) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_float) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC,cl::sycl::cl_double) +#define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(NAME, FUNC, RET_TYPE) \ + SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_float) \ + SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_double) + +#define SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE) \ +template<> \ + EIGEN_DEVICE_FUNC \ + EIGEN_ALWAYS_INLINE RET_TYPE NAME(const ARG_TYPE& x) { \ + return cl::sycl::FUNC(x); \ + } + +#define SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, TYPE) \ + SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, TYPE, TYPE) + +#define SYCL_SPECIALIZE_GEN1_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE1, ARG_TYPE2) \ + template<> \ + EIGEN_DEVICE_FUNC \ + EIGEN_ALWAYS_INLINE RET_TYPE NAME(const ARG_TYPE1& x, const ARG_TYPE2& y) { \ + return cl::sycl::FUNC(x, y); \ + } + +#define SYCL_SPECIALIZE_GEN2_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE) \ + SYCL_SPECIALIZE_GEN1_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE, ARG_TYPE) + +#define SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, TYPE) \ + SYCL_SPECIALIZE_GEN2_BINARY_FUNC(NAME, FUNC, TYPE, TYPE) + +SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(mini, min) +SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(mini, fmin) +SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(maxi, max) +SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(maxi, fmax) + #endif @@ -933,6 +1288,37 @@ inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x); } +EIGEN_DEVICE_FUNC +inline bool abs2(bool x) { return x; } + +template<typename T> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T absdiff(const T& x, const T& y) +{ + return x > y ? x - y : y - x; +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE float absdiff(const float& x, const float& y) +{ + return fabsf(x - y); +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE double absdiff(const double& x, const double& y) +{ + return fabs(x - y); +} + +#if !defined(EIGEN_GPUCC) +// HIP and CUDA do not support long double. +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE long double absdiff(const long double& x, const long double& y) { + return fabsl(x - y); +} +#endif + template<typename Scalar> EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x) @@ -947,6 +1333,10 @@ inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y); } +#if defined(SYCL_DEVICE_ONLY) + SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(hypot, hypot) +#endif + template<typename Scalar> EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) @@ -954,7 +1344,11 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } -#ifdef __CUDACC__ +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log1p, log1p) +#endif + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log1p(const float &x) { return ::log1pf(x); } @@ -969,10 +1363,27 @@ inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const Scala return internal::pow_impl<ScalarX,ScalarY>::run(x, y); } +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(pow, pow) +#endif + template<typename T> EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } template<typename T> EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isnan, isnan, bool) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isinf, isinf, bool) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isfinite, isfinite, bool) +#endif + +template<typename Scalar> +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(rint, Scalar) rint(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(rint, Scalar)::run(x); +} + template<typename Scalar> EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) @@ -980,15 +1391,23 @@ inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x); } +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round) +#endif + template<typename T> EIGEN_DEVICE_FUNC T (floor)(const T& x) { - EIGEN_USING_STD_MATH(floor); + EIGEN_USING_STD(floor) return floor(x); } -#ifdef __CUDACC__ +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(floor, floor) +#endif + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float floor(const float &x) { return ::floorf(x); } @@ -1000,11 +1419,15 @@ template<typename T> EIGEN_DEVICE_FUNC T (ceil)(const T& x) { - EIGEN_USING_STD_MATH(ceil); + EIGEN_USING_STD(ceil); return ceil(x); } -#ifdef __CUDACC__ +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil) +#endif + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float ceil(const float &x) { return ::ceilf(x); } @@ -1030,28 +1453,49 @@ inline int log2(int x) /** \returns the square root of \a x. * - * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode, + * It is essentially equivalent to + * \code using std::sqrt; return sqrt(x); \endcode * but slightly faster for float/double and some compilers (e.g., gcc), thanks to * specializations when SSE is enabled. * * It's usage is justified in performance critical functions, like norm/normalize. */ +template<typename Scalar> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE EIGEN_MATHFUNC_RETVAL(sqrt, Scalar) sqrt(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(sqrt, Scalar)::run(x); +} + +// Boolean specialization, avoids implicit float to bool conversion (-Wimplicit-conversion-floating-point-to-bool). +template<> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC +bool sqrt<bool>(const bool &x) { return x; } + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sqrt, sqrt) +#endif + +/** \returns the reciprocal square root of \a x. **/ template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -T sqrt(const T &x) +T rsqrt(const T& x) { - EIGEN_USING_STD_MATH(sqrt); - return sqrt(x); + return internal::rsqrt_impl<T>::run(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T log(const T &x) { - EIGEN_USING_STD_MATH(log); - return log(x); + return internal::log_impl<T>::run(x); } -#ifdef __CUDACC__ +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log, log) +#endif + + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log(const float &x) { return ::logf(x); } @@ -1063,7 +1507,7 @@ template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type abs(const T &x) { - EIGEN_USING_STD_MATH(abs); + EIGEN_USING_STD(abs); return abs(x); } @@ -1074,12 +1518,12 @@ abs(const T &x) { return x; } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); } -EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_INTEGER_TYPES_UNARY(abs, abs) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(abs, fabs) +#endif -#ifdef __CUDACC__ +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float abs(const float &x) { return ::fabsf(x); } @@ -1100,26 +1544,69 @@ double abs(const std::complex<double>& x) { template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T exp(const T &x) { - EIGEN_USING_STD_MATH(exp); + EIGEN_USING_STD(exp); return exp(x); } -#ifdef __CUDACC__ +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp, exp) +#endif + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp(const float &x) { return ::expf(x); } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp(const double &x) { return ::exp(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +std::complex<float> exp(const std::complex<float>& x) { + float com = ::expf(x.real()); + float res_real = com * ::cosf(x.imag()); + float res_imag = com * ::sinf(x.imag()); + return std::complex<float>(res_real, res_imag); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +std::complex<double> exp(const std::complex<double>& x) { + double com = ::exp(x.real()); + double res_real = com * ::cos(x.imag()); + double res_imag = com * ::sin(x.imag()); + return std::complex<double>(res_real, res_imag); +} +#endif + +template<typename Scalar> +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(expm1, expm1) +#endif + +#if defined(EIGEN_GPUCC) +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float expm1(const float &x) { return ::expm1f(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double expm1(const double &x) { return ::expm1(x); } #endif template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cos(const T &x) { - EIGEN_USING_STD_MATH(cos); + EIGEN_USING_STD(cos); return cos(x); } -#ifdef __CUDACC__ +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cos,cos) +#endif + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cos(const float &x) { return ::cosf(x); } @@ -1130,11 +1617,15 @@ double cos(const double &x) { return ::cos(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T sin(const T &x) { - EIGEN_USING_STD_MATH(sin); + EIGEN_USING_STD(sin); return sin(x); } -#ifdef __CUDACC__ +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sin, sin) +#endif + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sin(const float &x) { return ::sinf(x); } @@ -1145,11 +1636,15 @@ double sin(const double &x) { return ::sin(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T tan(const T &x) { - EIGEN_USING_STD_MATH(tan); + EIGEN_USING_STD(tan); return tan(x); } -#ifdef __CUDACC__ +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tan, tan) +#endif + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tan(const float &x) { return ::tanf(x); } @@ -1160,11 +1655,25 @@ double tan(const double &x) { return ::tan(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T acos(const T &x) { - EIGEN_USING_STD_MATH(acos); + EIGEN_USING_STD(acos); return acos(x); } -#ifdef __CUDACC__ +#if EIGEN_HAS_CXX11_MATH +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T acosh(const T &x) { + EIGEN_USING_STD(acosh); + return static_cast<T>(acosh(x)); +} +#endif + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acos, acos) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acosh, acosh) +#endif + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float acos(const float &x) { return ::acosf(x); } @@ -1175,11 +1684,25 @@ double acos(const double &x) { return ::acos(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T asin(const T &x) { - EIGEN_USING_STD_MATH(asin); + EIGEN_USING_STD(asin); return asin(x); } -#ifdef __CUDACC__ +#if EIGEN_HAS_CXX11_MATH +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T asinh(const T &x) { + EIGEN_USING_STD(asinh); + return static_cast<T>(asinh(x)); +} +#endif + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asin, asin) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asinh, asinh) +#endif + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float asin(const float &x) { return ::asinf(x); } @@ -1190,11 +1713,25 @@ double asin(const double &x) { return ::asin(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T atan(const T &x) { - EIGEN_USING_STD_MATH(atan); - return atan(x); + EIGEN_USING_STD(atan); + return static_cast<T>(atan(x)); +} + +#if EIGEN_HAS_CXX11_MATH +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T atanh(const T &x) { + EIGEN_USING_STD(atanh); + return static_cast<T>(atanh(x)); } +#endif + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atan, atan) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atanh, atanh) +#endif -#ifdef __CUDACC__ +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float atan(const float &x) { return ::atanf(x); } @@ -1206,11 +1743,15 @@ double atan(const double &x) { return ::atan(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cosh(const T &x) { - EIGEN_USING_STD_MATH(cosh); - return cosh(x); + EIGEN_USING_STD(cosh); + return static_cast<T>(cosh(x)); } -#ifdef __CUDACC__ +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cosh, cosh) +#endif + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cosh(const float &x) { return ::coshf(x); } @@ -1221,11 +1762,15 @@ double cosh(const double &x) { return ::cosh(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T sinh(const T &x) { - EIGEN_USING_STD_MATH(sinh); - return sinh(x); + EIGEN_USING_STD(sinh); + return static_cast<T>(sinh(x)); } -#ifdef __CUDACC__ +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sinh, sinh) +#endif + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sinh(const float &x) { return ::sinhf(x); } @@ -1236,16 +1781,20 @@ double sinh(const double &x) { return ::sinh(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T tanh(const T &x) { - EIGEN_USING_STD_MATH(tanh); + EIGEN_USING_STD(tanh); return tanh(x); } -#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH +#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY) EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::generic_fast_tanh_float(x); } #endif -#ifdef __CUDACC__ +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tanh, tanh) +#endif + +#if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(const float &x) { return ::tanhf(x); } @@ -1256,11 +1805,15 @@ double tanh(const double &x) { return ::tanh(x); } template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T fmod(const T& a, const T& b) { - EIGEN_USING_STD_MATH(fmod); + EIGEN_USING_STD(fmod); return fmod(a, b); } -#ifdef __CUDACC__ +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(fmod, fmod) +#endif + +#if defined(EIGEN_GPUCC) template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float fmod(const float& a, const float& b) { @@ -1274,6 +1827,23 @@ double fmod(const double& a, const double& b) { } #endif +#if defined(SYCL_DEVICE_ONLY) +#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY +#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY +#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY +#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY +#undef SYCL_SPECIALIZE_INTEGER_TYPES_BINARY +#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY +#undef SYCL_SPECIALIZE_FLOATING_TYPES_BINARY +#undef SYCL_SPECIALIZE_FLOATING_TYPES_UNARY +#undef SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE +#undef SYCL_SPECIALIZE_GEN_UNARY_FUNC +#undef SYCL_SPECIALIZE_UNARY_FUNC +#undef SYCL_SPECIALIZE_GEN1_BINARY_FUNC +#undef SYCL_SPECIALIZE_GEN2_BINARY_FUNC +#undef SYCL_SPECIALIZE_BINARY_FUNC +#endif + } // end namespace numext namespace internal { @@ -1397,18 +1967,23 @@ template<> struct random_impl<bool> { return random<int>(0,1)==0 ? false : true; } + + static inline bool run(const bool& a, const bool& b) + { + return random<int>(a, b)==0 ? false : true; + } }; template<> struct scalar_fuzzy_impl<bool> { typedef bool RealScalar; - + template<typename OtherScalar> EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) { return !x; } - + EIGEN_DEVICE_FUNC static inline bool isApprox(bool x, bool y, bool) { @@ -1420,10 +1995,61 @@ template<> struct scalar_fuzzy_impl<bool> { return (!x) || y; } - + +}; + +} // end namespace internal + +// Default implementations that rely on other numext implementations +namespace internal { + +// Specialization for complex types that are not supported by std::expm1. +template <typename RealScalar> +struct expm1_impl<std::complex<RealScalar> > { + EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run( + const std::complex<RealScalar>& x) { + EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar) + RealScalar xr = x.real(); + RealScalar xi = x.imag(); + // expm1(z) = exp(z) - 1 + // = exp(x + i * y) - 1 + // = exp(x) * (cos(y) + i * sin(y)) - 1 + // = exp(x) * cos(y) - 1 + i * exp(x) * sin(y) + // Imag(expm1(z)) = exp(x) * sin(y) + // Real(expm1(z)) = exp(x) * cos(y) - 1 + // = exp(x) * cos(y) - 1. + // = expm1(x) + exp(x) * (cos(y) - 1) + // = expm1(x) + exp(x) * (2 * sin(y / 2) ** 2) + RealScalar erm1 = numext::expm1<RealScalar>(xr); + RealScalar er = erm1 + RealScalar(1.); + RealScalar sin2 = numext::sin(xi / RealScalar(2.)); + sin2 = sin2 * sin2; + RealScalar s = numext::sin(xi); + RealScalar real_part = erm1 - RealScalar(2.) * er * sin2; + return std::complex<RealScalar>(real_part, er * s); + } +}; + +template<typename T> +struct rsqrt_impl { + EIGEN_DEVICE_FUNC + static EIGEN_ALWAYS_INLINE T run(const T& x) { + return T(1)/numext::sqrt(x); + } +}; + +#if defined(EIGEN_GPU_COMPILE_PHASE) +template<typename T> +struct conj_impl<std::complex<T>, true> +{ + EIGEN_DEVICE_FUNC + static inline std::complex<T> run(const std::complex<T>& x) + { + return std::complex<T>(numext::real(x), -numext::imag(x)); + } }; +#endif - } // end namespace internal } // end namespace Eigen |