diff options
Diffstat (limited to 'Eigen/src/Core/arch/SYCL')
-rw-r--r-- | Eigen/src/Core/arch/SYCL/InteropHeaders.h | 232 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SYCL/MathFunctions.h | 301 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SYCL/PacketMath.h | 670 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SYCL/SyclMemoryModel.h | 694 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SYCL/TypeCasting.h | 85 |
5 files changed, 1982 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h new file mode 100644 index 000000000..10856ff5e --- /dev/null +++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h @@ -0,0 +1,232 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * InteropHeaders.h + * + * \brief: + * InteropHeaders + * + *****************************************************************/ + +#ifndef EIGEN_INTEROP_HEADERS_SYCL_H +#define EIGEN_INTEROP_HEADERS_SYCL_H + +namespace Eigen { + +#if !defined(EIGEN_DONT_VECTORIZE_SYCL) + +namespace internal { + +template <int has_blend, int lengths> +struct sycl_packet_traits : default_packet_traits { + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = lengths, + HasHalfPacket = 0, + HasDiv = 1, + HasLog = 1, + HasExp = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasSin = 1, + HasCos = 1, + HasTan = 1, + HasASin = 1, + HasACos = 1, + HasATan = 1, + HasSinh = 1, + HasCosh = 1, + HasTanh = 1, + HasLGamma = 0, + HasDiGamma = 0, + HasZeta = 0, + HasPolygamma = 0, + HasErf = 0, + HasErfc = 0, + HasNdtri = 0, + HasIGamma = 0, + HasIGammac = 0, + HasBetaInc = 0, + HasBlend = has_blend, + // This flag is used to indicate whether packet comparison is supported. + // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true. + HasCmp = 1, + HasMax = 1, + HasMin = 1, + HasMul = 1, + HasAdd = 1, + HasFloor = 1, + HasRound = 1, + HasRint = 1, + HasLog1p = 1, + HasExpm1 = 1, + HasCeil = 1, + }; +}; + +#ifdef SYCL_DEVICE_ONLY +#define SYCL_PACKET_TRAITS(packet_type, has_blend, unpacket_type, lengths) \ + template <> \ + struct packet_traits<unpacket_type> \ + : sycl_packet_traits<has_blend, lengths> { \ + typedef packet_type type; \ + typedef packet_type half; \ + }; + +SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, float, 4) +SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, const float, 4) +SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, double, 2) +SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, const double, 2) +#undef SYCL_PACKET_TRAITS + +// Make sure this is only available when targeting a GPU: we don't want to +// introduce conflicts between these packet_traits definitions and the ones +// we'll use on the host side (SSE, AVX, ...) +#define SYCL_ARITHMETIC(packet_type) \ + template <> \ + struct is_arithmetic<packet_type> { \ + enum { value = true }; \ + }; +SYCL_ARITHMETIC(cl::sycl::cl_float4) +SYCL_ARITHMETIC(cl::sycl::cl_double2) +#undef SYCL_ARITHMETIC + +#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths) \ + template <> \ + struct unpacket_traits<packet_type> { \ + typedef unpacket_type type; \ + enum { size = lengths, vectorizable = true, alignment = Aligned16 }; \ + typedef packet_type half; \ + }; +SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4) +SYCL_UNPACKET_TRAITS(cl::sycl::cl_double2, double, 2) + +#undef SYCL_UNPACKET_TRAITS +#endif + +} // end namespace internal + +#endif + +namespace TensorSycl { +namespace internal { + +template <typename PacketReturnType, int PacketSize> +struct PacketWrapper; +// This function should never get called on the device +#ifndef SYCL_DEVICE_ONLY +template <typename PacketReturnType, int PacketSize> +struct PacketWrapper { + typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type + Scalar; + template <typename Index> + EIGEN_DEVICE_FUNC static Scalar scalarize(Index, PacketReturnType &) { + eigen_assert(false && "THERE IS NO PACKETIZE VERSION FOR THE CHOSEN TYPE"); + abort(); + } + EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(Scalar in, + Scalar) { + return ::Eigen::internal::template plset<PacketReturnType>(in); + } + EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType, Scalar *) { + eigen_assert(false && "THERE IS NO PACKETIZE VERSION FOR THE CHOSEN TYPE"); + abort(); + } +}; + +#elif defined(SYCL_DEVICE_ONLY) +template <typename PacketReturnType> +struct PacketWrapper<PacketReturnType, 4> { + typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type + Scalar; + template <typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index index, PacketReturnType &in) { + switch (index) { + case 0: + return in.x(); + case 1: + return in.y(); + case 2: + return in.z(); + case 3: + return in.w(); + default: + //INDEX MUST BE BETWEEN 0 and 3.There is no abort function in SYCL kernel. so we cannot use abort here. + // The code will never reach here + __builtin_unreachable(); + } + __builtin_unreachable(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type( + Scalar in, Scalar other) { + return PacketReturnType(in, other, other, other); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) { + lhs = PacketReturnType(rhs[0], rhs[1], rhs[2], rhs[3]); + } +}; + +template <typename PacketReturnType> +struct PacketWrapper<PacketReturnType, 1> { + typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type + Scalar; + template <typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index, PacketReturnType &in) { + return in; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type(Scalar in, + Scalar) { + return PacketReturnType(in); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) { + lhs = rhs[0]; + } +}; + +template <typename PacketReturnType> +struct PacketWrapper<PacketReturnType, 2> { + typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type + Scalar; + template <typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index index, PacketReturnType &in) { + switch (index) { + case 0: + return in.x(); + case 1: + return in.y(); + default: + //INDEX MUST BE BETWEEN 0 and 1.There is no abort function in SYCL kernel. so we cannot use abort here. + // The code will never reach here + __builtin_unreachable(); + } + __builtin_unreachable(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type( + Scalar in, Scalar other) { + return PacketReturnType(in, other); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) { + lhs = PacketReturnType(rhs[0], rhs[1]); + } +}; + +#endif + +} // end namespace internal +} // end namespace TensorSycl +} // end namespace Eigen + +#endif // EIGEN_INTEROP_HEADERS_SYCL_H diff --git a/Eigen/src/Core/arch/SYCL/MathFunctions.h b/Eigen/src/Core/arch/SYCL/MathFunctions.h new file mode 100644 index 000000000..2ab0f2a76 --- /dev/null +++ b/Eigen/src/Core/arch/SYCL/MathFunctions.h @@ -0,0 +1,301 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * MathFunctions.h + * + * \brief: + * MathFunctions + * + *****************************************************************/ + +#ifndef EIGEN_MATH_FUNCTIONS_SYCL_H +#define EIGEN_MATH_FUNCTIONS_SYCL_H +namespace Eigen { + +namespace internal { + +// Make sure this is only available when targeting a GPU: we don't want to +// introduce conflicts between these packet_traits definitions and the ones +// we'll use on the host side (SSE, AVX, ...) +#if defined(SYCL_DEVICE_ONLY) +#define SYCL_PLOG(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::log(a); \ + } + +SYCL_PLOG(cl::sycl::cl_float4) +SYCL_PLOG(cl::sycl::cl_double2) +#undef SYCL_PLOG + +#define SYCL_PLOG1P(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog1p<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::log1p(a); \ + } + +SYCL_PLOG1P(cl::sycl::cl_float4) +SYCL_PLOG1P(cl::sycl::cl_double2) +#undef SYCL_PLOG1P + +#define SYCL_PLOG10(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog10<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::log10(a); \ + } + +SYCL_PLOG10(cl::sycl::cl_float4) +SYCL_PLOG10(cl::sycl::cl_double2) +#undef SYCL_PLOG10 + +#define SYCL_PEXP(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pexp<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::exp(a); \ + } + +SYCL_PEXP(cl::sycl::cl_float4) +SYCL_PEXP(cl::sycl::cl_float) +SYCL_PEXP(cl::sycl::cl_double2) +#undef SYCL_PEXP + +#define SYCL_PEXPM1(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pexpm1<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::expm1(a); \ + } + +SYCL_PEXPM1(cl::sycl::cl_float4) +SYCL_PEXPM1(cl::sycl::cl_double2) +#undef SYCL_PEXPM1 + +#define SYCL_PSQRT(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psqrt<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::sqrt(a); \ + } + +SYCL_PSQRT(cl::sycl::cl_float4) +SYCL_PSQRT(cl::sycl::cl_double2) +#undef SYCL_PSQRT + +#define SYCL_PRSQRT(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type prsqrt<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::rsqrt(a); \ + } + +SYCL_PRSQRT(cl::sycl::cl_float4) +SYCL_PRSQRT(cl::sycl::cl_double2) +#undef SYCL_PRSQRT + +/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ +#define SYCL_PSIN(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psin<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::sin(a); \ + } + +SYCL_PSIN(cl::sycl::cl_float4) +SYCL_PSIN(cl::sycl::cl_double2) +#undef SYCL_PSIN + +/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ +#define SYCL_PCOS(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pcos<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::cos(a); \ + } + +SYCL_PCOS(cl::sycl::cl_float4) +SYCL_PCOS(cl::sycl::cl_double2) +#undef SYCL_PCOS + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ +#define SYCL_PTAN(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ptan<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::tan(a); \ + } + +SYCL_PTAN(cl::sycl::cl_float4) +SYCL_PTAN(cl::sycl::cl_double2) +#undef SYCL_PTAN + +/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ +#define SYCL_PASIN(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pasin<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::asin(a); \ + } + +SYCL_PASIN(cl::sycl::cl_float4) +SYCL_PASIN(cl::sycl::cl_double2) +#undef SYCL_PASIN + +/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ +#define SYCL_PACOS(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pacos<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::acos(a); \ + } + +SYCL_PACOS(cl::sycl::cl_float4) +SYCL_PACOS(cl::sycl::cl_double2) +#undef SYCL_PACOS + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ +#define SYCL_PATAN(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type patan<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::atan(a); \ + } + +SYCL_PATAN(cl::sycl::cl_float4) +SYCL_PATAN(cl::sycl::cl_double2) +#undef SYCL_PATAN + +/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ +#define SYCL_PSINH(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psinh<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::sinh(a); \ + } + +SYCL_PSINH(cl::sycl::cl_float4) +SYCL_PSINH(cl::sycl::cl_double2) +#undef SYCL_PSINH + +/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ +#define SYCL_PCOSH(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pcosh<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::cosh(a); \ + } + +SYCL_PCOSH(cl::sycl::cl_float4) +SYCL_PCOSH(cl::sycl::cl_double2) +#undef SYCL_PCOSH + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ +#define SYCL_PTANH(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ptanh<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::tanh(a); \ + } + +SYCL_PTANH(cl::sycl::cl_float4) +SYCL_PTANH(cl::sycl::cl_double2) +#undef SYCL_PTANH + +#define SYCL_PCEIL(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pceil<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::ceil(a); \ + } + +SYCL_PCEIL(cl::sycl::cl_float4) +SYCL_PCEIL(cl::sycl::cl_double2) +#undef SYCL_PCEIL + +#define SYCL_PROUND(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pround<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::round(a); \ + } + +SYCL_PROUND(cl::sycl::cl_float4) +SYCL_PROUND(cl::sycl::cl_double2) +#undef SYCL_PROUND + +#define SYCL_PRINT(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type print<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::rint(a); \ + } + +SYCL_PRINT(cl::sycl::cl_float4) +SYCL_PRINT(cl::sycl::cl_double2) +#undef SYCL_PRINT + +#define SYCL_FLOOR(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pfloor<packet_type>( \ + const packet_type& a) { \ + return cl::sycl::floor(a); \ + } + +SYCL_FLOOR(cl::sycl::cl_float4) +SYCL_FLOOR(cl::sycl::cl_double2) +#undef SYCL_FLOOR + +#define SYCL_PMIN(packet_type, expr) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pmin<packet_type>( \ + const packet_type& a, const packet_type& b) { \ + return expr; \ + } + +SYCL_PMIN(cl::sycl::cl_float4, cl::sycl::fmin(a, b)) +SYCL_PMIN(cl::sycl::cl_double2, cl::sycl::fmin(a, b)) +#undef SYCL_PMIN + +#define SYCL_PMAX(packet_type, expr) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pmax<packet_type>( \ + const packet_type& a, const packet_type& b) { \ + return expr; \ + } + +SYCL_PMAX(cl::sycl::cl_float4, cl::sycl::fmax(a, b)) +SYCL_PMAX(cl::sycl::cl_double2, cl::sycl::fmax(a, b)) +#undef SYCL_PMAX + +#define SYCL_PLDEXP(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pldexp( \ + const packet_type& a, const packet_type& exponent) { \ + return cl::sycl::ldexp( \ + a, exponent.template convert<cl::sycl::cl_int, \ + cl::sycl::rounding_mode::automatic>()); \ + } + +SYCL_PLDEXP(cl::sycl::cl_float4) +SYCL_PLDEXP(cl::sycl::cl_double2) +#undef SYCL_PLDEXP + +#endif +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATH_FUNCTIONS_SYCL_H diff --git a/Eigen/src/Core/arch/SYCL/PacketMath.h b/Eigen/src/Core/arch/SYCL/PacketMath.h new file mode 100644 index 000000000..87badc076 --- /dev/null +++ b/Eigen/src/Core/arch/SYCL/PacketMath.h @@ -0,0 +1,670 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * PacketMath.h + * + * \brief: + * PacketMath + * + *****************************************************************/ + +#ifndef EIGEN_PACKET_MATH_SYCL_H +#define EIGEN_PACKET_MATH_SYCL_H +#include <type_traits> +namespace Eigen { + +namespace internal { +#ifdef SYCL_DEVICE_ONLY + +#define SYCL_PLOADT_RO(address_space_target) \ + template <typename packet_type, int Alignment> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro( \ + typename cl::sycl::multi_ptr< \ + const typename unpacket_traits<packet_type>::type, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + from) { \ + typedef typename unpacket_traits<packet_type>::type scalar; \ + typedef cl::sycl::multi_ptr< \ + scalar, cl::sycl::access::address_space::address_space_target> \ + multi_ptr; \ + auto res = packet_type( \ + static_cast<typename unpacket_traits<packet_type>::type>(0)); \ + res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from))); \ + return res; \ + } + +SYCL_PLOADT_RO(global_space) +SYCL_PLOADT_RO(local_space) +#undef SYCL_PLOADT_RO +#endif + +template <typename packet_type, int Alignment, typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type +ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess< + cl::sycl::access::mode::read_write, T>& from) { + return ploadt_ro<packet_type, Alignment>(from.get_pointer()); +} + +#ifdef SYCL_DEVICE_ONLY +#define SYCL_PLOAD(address_space_target, Alignment, AlignedType) \ + template <typename packet_type> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \ + typename cl::sycl::multi_ptr< \ + const typename unpacket_traits<packet_type>::type, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + from) { \ + return ploadt_ro<packet_type, Alignment>(from); \ + } + +// global space +SYCL_PLOAD(global_space, Unaligned, u) +SYCL_PLOAD(global_space, Aligned, ) +// local space +SYCL_PLOAD(local_space, Unaligned, u) +SYCL_PLOAD(local_space, Aligned, ) + +#undef SYCL_PLOAD +#endif + +#define SYCL_PLOAD(Alignment, AlignedType) \ + template <typename packet_type> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \ + const Eigen::TensorSycl::internal::RangeAccess< \ + cl::sycl::access::mode::read_write, \ + typename unpacket_traits<packet_type>::type> \ + from) { \ + return ploadt_ro<packet_type, Alignment>(from); \ + } +SYCL_PLOAD(Unaligned, u) +SYCL_PLOAD(Aligned, ) +#undef SYCL_PLOAD + +#ifdef SYCL_DEVICE_ONLY +/** \internal \returns a packet version of \a *from. + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +#define SYCL_PLOADT(address_space_target) \ + template <typename packet_type, int Alignment> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt( \ + typename cl::sycl::multi_ptr< \ + const typename unpacket_traits<packet_type>::type, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + from) { \ + if (Alignment >= unpacket_traits<packet_type>::alignment) \ + return pload<packet_type>(from); \ + else \ + return ploadu<packet_type>(from); \ + } + +// global space +SYCL_PLOADT(global_space) +// local space +SYCL_PLOADT(local_space) +#undef SYCL_PLOADT +#endif + +template <typename packet_type, int Alignment> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type +ploadt(const Eigen::TensorSycl::internal::RangeAccess< + cl::sycl::access::mode::read_write, + typename unpacket_traits<packet_type>::type>& from) { + return ploadt<packet_type, Alignment>(from.get_pointer()); +} +#ifdef SYCL_DEVICE_ONLY + +// private_space +#define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type \ + ploadt_ro<packet_type, Alignment>( \ + const typename unpacket_traits<packet_type>::type* from) { \ + typedef typename unpacket_traits<packet_type>::type scalar; \ + auto res = packet_type(static_cast<scalar>(0)); \ + res.template load<cl::sycl::access::address_space::private_space>( \ + 0, const_cast<scalar*>(from)); \ + return res; \ + } + +SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned) +SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned) +SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned) +SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned) + +#define SYCL_PLOAD_SPECIAL(packet_type, alignment_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \ + const typename unpacket_traits<packet_type>::type* from) { \ + typedef typename unpacket_traits<packet_type>::type scalar; \ + auto res = packet_type(static_cast<scalar>(0)); \ + res.template load<cl::sycl::access::address_space::private_space>( \ + 0, const_cast<scalar*>(from)); \ + return res; \ + } +SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, ) +SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, ) +SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u) +SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u) + +#undef SYCL_PLOAD_SPECIAL + +#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \ + typename cl::sycl::multi_ptr< \ + scalar, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + to, \ + const packet_type& from) { \ + typedef cl::sycl::multi_ptr< \ + scalar, cl::sycl::access::address_space::address_space_target> \ + multi_ptr; \ + from.store(0, multi_ptr(to)); \ + } + +// global space +SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, ) +SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u) +SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, ) +SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u) +SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, ) +SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u) +SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, ) +SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u) + +SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, ) +SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u) +SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, ) +SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u) +#undef SYCL_PSTORE + +#define SYCL_PSTORE_T(address_space_target) \ + template <typename scalar, typename packet_type, int Alignment> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( \ + typename cl::sycl::multi_ptr< \ + scalar, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + to, \ + const packet_type& from) { \ + if (Alignment) \ + pstore(to, from); \ + else \ + pstoreu(to, from); \ + } + +SYCL_PSTORE_T(global_space) + +SYCL_PSTORE_T(local_space) + +#undef SYCL_PSTORE_T + +#define SYCL_PSET1(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \ + const typename unpacket_traits<packet_type>::type& from) { \ + return packet_type(from); \ + } + +// global space +SYCL_PSET1(cl::sycl::cl_float4) +SYCL_PSET1(cl::sycl::cl_double2) + +#undef SYCL_PSET1 + +template <typename packet_type> +struct get_base_packet { + template <typename sycl_multi_pointer> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type + get_ploaddup(sycl_multi_pointer) {} + + template <typename sycl_multi_pointer> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type + get_pgather(sycl_multi_pointer, Index) {} +}; + +template <> +struct get_base_packet<cl::sycl::cl_float4> { + template <typename sycl_multi_pointer> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup( + sycl_multi_pointer from) { + return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]); + } + template <typename sycl_multi_pointer> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather( + sycl_multi_pointer from, Index stride) { + return cl::sycl::cl_float4(from[0 * stride], from[1 * stride], + from[2 * stride], from[3 * stride]); + } + + template <typename sycl_multi_pointer> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter( + sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) { + auto tmp = stride; + to[0] = from.x(); + to[tmp] = from.y(); + to[tmp += stride] = from.z(); + to[tmp += stride] = from.w(); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset( + const float& a) { + return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1), + static_cast<float>(a + 2), + static_cast<float>(a + 3)); + } +}; + +template <> +struct get_base_packet<cl::sycl::cl_double2> { + template <typename sycl_multi_pointer> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 + get_ploaddup(const sycl_multi_pointer from) { + return cl::sycl::cl_double2(from[0], from[0]); + } + + template <typename sycl_multi_pointer, typename Index> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather( + const sycl_multi_pointer from, Index stride) { + return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]); + } + + template <typename sycl_multi_pointer> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter( + sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) { + to[0] = from.x(); + to[stride] = from.y(); + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset( + const double& a) { + return cl::sycl::cl_double2(static_cast<double>(a), + static_cast<double>(a + 1)); + } +}; + +#define SYCL_PLOAD_DUP(address_space_target) \ + template <typename packet_type> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \ + typename cl::sycl::multi_ptr< \ + const typename unpacket_traits<packet_type>::type, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + from) { \ + return get_base_packet<packet_type>::get_ploaddup(from); \ + } + +// global space +SYCL_PLOAD_DUP(global_space) +// local_space +SYCL_PLOAD_DUP(local_space) +#undef SYCL_PLOAD_DUP + +#define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \ + const typename unpacket_traits<packet_type>::type* from) { \ + return get_base_packet<packet_type>::get_ploaddup(from); \ + } + +SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4) +SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2) + +#undef SYCL_PLOAD_DUP_SPECILIZE + +#define SYCL_PLSET(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \ + const typename unpacket_traits<packet_type>::type& a) { \ + return get_base_packet<packet_type>::set_plset(a); \ + } + +SYCL_PLSET(cl::sycl::cl_float4) +SYCL_PLSET(cl::sycl::cl_double2) + +#undef SYCL_PLSET + +#define SYCL_PGATHER(address_space_target) \ + template <typename Scalar, typename packet_type> \ + EIGEN_DEVICE_FUNC inline packet_type pgather( \ + typename cl::sycl::multi_ptr< \ + const typename unpacket_traits<packet_type>::type, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + from, \ + Index stride) { \ + return get_base_packet<packet_type>::get_pgather(from, stride); \ + } + +// global space +SYCL_PGATHER(global_space) +// local space +SYCL_PGATHER(local_space) + +#undef SYCL_PGATHER + +#define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \ + pgather<scalar, packet_type>( \ + const typename unpacket_traits<packet_type>::type* from, Index stride) { \ + return get_base_packet<packet_type>::get_pgather(from, stride); \ + } + +SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4) +SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2) + +#undef SYCL_PGATHER_SPECILIZE + +#define SYCL_PSCATTER(address_space_target) \ + template <typename Scalar, typename packet_type> \ + EIGEN_DEVICE_FUNC inline void pscatter( \ + typename cl::sycl::multi_ptr< \ + typename unpacket_traits<packet_type>::type, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + to, \ + const packet_type& from, Index stride) { \ + get_base_packet<packet_type>::set_pscatter(to, from, stride); \ + } + +// global space +SYCL_PSCATTER(global_space) +// local space +SYCL_PSCATTER(local_space) + +#undef SYCL_PSCATTER + +#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \ + typename unpacket_traits<packet_type>::type * to, \ + const packet_type& from, Index stride) { \ + get_base_packet<packet_type>::set_pscatter(to, from, stride); \ + } + +SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4) +SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2) + +#undef SYCL_PSCATTER_SPECILIZE + +#define SYCL_PMAD(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( \ + const packet_type& a, const packet_type& b, const packet_type& c) { \ + return cl::sycl::mad(a, b, c); \ + } + +SYCL_PMAD(cl::sycl::cl_float4) +SYCL_PMAD(cl::sycl::cl_double2) +#undef SYCL_PMAD + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>( + const cl::sycl::cl_float4& a) { + return a.x(); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>( + const cl::sycl::cl_double2& a) { + return a.x(); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>( + const cl::sycl::cl_float4& a) { + return a.x() + a.y() + a.z() + a.w(); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>( + const cl::sycl::cl_double2& a) { + return a.x() + a.y(); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>( + const cl::sycl::cl_float4& a) { + return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), + cl::sycl::fmax(a.z(), a.w())); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>( + const cl::sycl::cl_double2& a) { + return cl::sycl::fmax(a.x(), a.y()); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>( + const cl::sycl::cl_float4& a) { + return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), + cl::sycl::fmin(a.z(), a.w())); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>( + const cl::sycl::cl_double2& a) { + return cl::sycl::fmin(a.x(), a.y()); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>( + const cl::sycl::cl_float4& a) { + return a.x() * a.y() * a.z() * a.w(); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>( + const cl::sycl::cl_double2& a) { + return a.x() * a.y(); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 +pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) { + return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), + cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w())); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 +pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) { + return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y())); +} + +template <typename Packet> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet &a, + const Packet &b) { + return ((a <= b) + .template convert<typename unpacket_traits<Packet>::type, + cl::sycl::rounding_mode::automatic>()); +} + +template <typename Packet> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet &a, + const Packet &b) { + return ((a < b) + .template convert<typename unpacket_traits<Packet>::type, + cl::sycl::rounding_mode::automatic>()); +} + +template <typename Packet> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet &a, + const Packet &b) { + return ((a == b) + .template convert<typename unpacket_traits<Packet>::type, + cl::sycl::rounding_mode::automatic>()); +} + +#define SYCL_PCMP(OP, TYPE) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE &a, \ + const TYPE &b) { \ + return sycl_pcmp_##OP<TYPE>(a, b); \ + } + +SYCL_PCMP(le, cl::sycl::cl_float4) +SYCL_PCMP(lt, cl::sycl::cl_float4) +SYCL_PCMP(eq, cl::sycl::cl_float4) +SYCL_PCMP(le, cl::sycl::cl_double2) +SYCL_PCMP(lt, cl::sycl::cl_double2) +SYCL_PCMP(eq, cl::sycl::cl_double2) +#undef SYCL_PCMP + +template <typename T> struct convert_to_integer; + +template <> struct convert_to_integer<float> { + using type = std::int32_t; + using packet_type = cl::sycl::cl_int4; +}; +template <> struct convert_to_integer<double> { + using type = std::int64_t; + using packet_type = cl::sycl::cl_long2; +}; + +template <typename PacketIn> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename convert_to_integer< + typename unpacket_traits<PacketIn>::type>::packet_type +vector_as_int(const PacketIn &p) { + return ( + p.template convert<typename convert_to_integer< + typename unpacket_traits<PacketIn>::type>::type, + cl::sycl::rounding_mode::automatic>()); +} + +template <typename packetOut, typename PacketIn> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packetOut +convert_vector(const PacketIn &p) { + return (p.template convert<typename unpacket_traits<packetOut>::type, + cl::sycl::rounding_mode::automatic>()); +} + +#define SYCL_PAND(TYPE) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand<TYPE>(const TYPE &a, \ + const TYPE &b) { \ + return convert_vector<TYPE>(vector_as_int(a) & vector_as_int(b)); \ + } +SYCL_PAND(cl::sycl::cl_float4) +SYCL_PAND(cl::sycl::cl_double2) +#undef SYCL_PAND + +#define SYCL_POR(TYPE) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por<TYPE>(const TYPE &a, \ + const TYPE &b) { \ + return convert_vector<TYPE>(vector_as_int(a) | vector_as_int(b)); \ + } + +SYCL_POR(cl::sycl::cl_float4) +SYCL_POR(cl::sycl::cl_double2) +#undef SYCL_POR + +#define SYCL_PXOR(TYPE) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor<TYPE>(const TYPE &a, \ + const TYPE &b) { \ + return convert_vector<TYPE>(vector_as_int(a) ^ vector_as_int(b)); \ + } + +SYCL_PXOR(cl::sycl::cl_float4) +SYCL_PXOR(cl::sycl::cl_double2) +#undef SYCL_PXOR + +#define SYCL_PANDNOT(TYPE) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot<TYPE>(const TYPE &a, \ + const TYPE &b) { \ + return convert_vector<TYPE>(vector_as_int(a) & (~vector_as_int(b))); \ + } +SYCL_PANDNOT(cl::sycl::cl_float4) +SYCL_PANDNOT(cl::sycl::cl_double2) +#undef SYCL_PANDNOT + +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose( + PacketBlock<cl::sycl::cl_float4, 4>& kernel) { + float tmp = kernel.packet[0].y(); + kernel.packet[0].y() = kernel.packet[1].x(); + kernel.packet[1].x() = tmp; + + tmp = kernel.packet[0].z(); + kernel.packet[0].z() = kernel.packet[2].x(); + kernel.packet[2].x() = tmp; + + tmp = kernel.packet[0].w(); + kernel.packet[0].w() = kernel.packet[3].x(); + kernel.packet[3].x() = tmp; + + tmp = kernel.packet[1].z(); + kernel.packet[1].z() = kernel.packet[2].y(); + kernel.packet[2].y() = tmp; + + tmp = kernel.packet[1].w(); + kernel.packet[1].w() = kernel.packet[3].y(); + kernel.packet[3].y() = tmp; + + tmp = kernel.packet[2].w(); + kernel.packet[2].w() = kernel.packet[3].z(); + kernel.packet[3].z() = tmp; +} + +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose( + PacketBlock<cl::sycl::cl_double2, 2>& kernel) { + double tmp = kernel.packet[0].y(); + kernel.packet[0].y() = kernel.packet[1].x(); + kernel.packet[1].x() = tmp; +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend( + const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket, + const cl::sycl::cl_float4& thenPacket, + const cl::sycl::cl_float4& elsePacket) { + cl::sycl::cl_int4 condition( + ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, + ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1); + return cl::sycl::select(thenPacket, elsePacket, condition); +} + +template <> +inline cl::sycl::cl_double2 pblend( + const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket, + const cl::sycl::cl_double2& thenPacket, + const cl::sycl::cl_double2& elsePacket) { + cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1, + ifPacket.select[1] ? 0 : -1); + return cl::sycl::select(thenPacket, elsePacket, condition); +} +#endif // SYCL_DEVICE_ONLY + +#define SYCL_PSTORE(alignment) \ + template <typename packet_type> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \ + const Eigen::TensorSycl::internal::RangeAccess< \ + cl::sycl::access::mode::read_write, \ + typename unpacket_traits<packet_type>::type>& to, \ + const packet_type& from) { \ + pstore##alignment(to.get_pointer(), from); \ + } + +// global space +SYCL_PSTORE() +SYCL_PSTORE(u) + +#undef SYCL_PSTORE + +template <typename scalar, typename packet_type, int Alignment> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( + Eigen::TensorSycl::internal::RangeAccess< + cl::sycl::access::mode::read_write, + typename unpacket_traits<packet_type>::type> + to, + const packet_type& from) { + pstoret<scalar, packet_type, Alignment>(to.get_pointer(), from); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PACKET_MATH_SYCL_H diff --git a/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h b/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h new file mode 100644 index 000000000..f81e59db5 --- /dev/null +++ b/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h @@ -0,0 +1,694 @@ +/*************************************************************************** + * Copyright (C) 2017 Codeplay Software Limited + * This Source Code Form is subject to the terms of the Mozilla + * Public License v. 2.0. If a copy of the MPL was not distributed + * with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * + * SyclMemoryModel.h + * + * Description: + * Interface for SYCL buffers to behave as a non-dereferenceable pointer + * Interface for Placeholder accessor to behave as a pointer on both host + * and device + * + * Authors: + * + * Ruyman Reyes Codeplay Software Ltd. + * Mehdi Goli Codeplay Software Ltd. + * Vanya Yaneva Codeplay Software Ltd. + * + **************************************************************************/ + +#if defined(EIGEN_USE_SYCL) && \ + !defined(EIGEN_CXX11_TENSOR_TENSOR_SYCL_STORAGE_MEMORY_H) +#define EIGEN_CXX11_TENSOR_TENSOR_SYCL_STORAGE_MEMORY_H + +#include <CL/sycl.hpp> +#ifdef EIGEN_EXCEPTIONS +#include <stdexcept> +#endif +#include <cstddef> +#include <queue> +#include <set> +#include <unordered_map> + +namespace Eigen { +namespace TensorSycl { +namespace internal { + +using sycl_acc_target = cl::sycl::access::target; +using sycl_acc_mode = cl::sycl::access::mode; + +/** + * Default values for template arguments + */ +using buffer_data_type_t = uint8_t; +const sycl_acc_target default_acc_target = sycl_acc_target::global_buffer; +const sycl_acc_mode default_acc_mode = sycl_acc_mode::read_write; + +/** + * PointerMapper + * Associates fake pointers with buffers. + * + */ +class PointerMapper { + public: + using base_ptr_t = std::intptr_t; + + /* Structure of a virtual pointer + * + * |================================================| + * | POINTER ADDRESS | + * |================================================| + */ + struct virtual_pointer_t { + /* Type for the pointers + */ + base_ptr_t m_contents; + + /** Conversions from virtual_pointer_t to + * void * should just reinterpret_cast the integer number + */ + operator void *() const { return reinterpret_cast<void *>(m_contents); } + + /** + * Convert back to the integer number. + */ + operator base_ptr_t() const { return m_contents; } + + /** + * Add a certain value to the pointer to create a + * new pointer to that offset + */ + virtual_pointer_t operator+(size_t off) { return m_contents + off; } + + /* Numerical order for sorting pointers in containers. */ + bool operator<(virtual_pointer_t rhs) const { + return (static_cast<base_ptr_t>(m_contents) < + static_cast<base_ptr_t>(rhs.m_contents)); + } + + bool operator>(virtual_pointer_t rhs) const { + return (static_cast<base_ptr_t>(m_contents) > + static_cast<base_ptr_t>(rhs.m_contents)); + } + + /** + * Numerical order for sorting pointers in containers + */ + bool operator==(virtual_pointer_t rhs) const { + return (static_cast<base_ptr_t>(m_contents) == + static_cast<base_ptr_t>(rhs.m_contents)); + } + + /** + * Simple forward to the equality overload. + */ + bool operator!=(virtual_pointer_t rhs) const { + return !(this->operator==(rhs)); + } + + /** + * Converts a void * into a virtual pointer structure. + * Note that this will only work if the void * was + * already a virtual_pointer_t, but we have no way of + * checking + */ + virtual_pointer_t(const void *ptr) + : m_contents(reinterpret_cast<base_ptr_t>(ptr)){}; + + /** + * Creates a virtual_pointer_t from the given integer + * number + */ + virtual_pointer_t(base_ptr_t u) : m_contents(u){}; + }; + + /* Definition of a null pointer + */ + const virtual_pointer_t null_virtual_ptr = nullptr; + + /** + * Whether if a pointer is null or not. + * A pointer is nullptr if the value is of null_virtual_ptr + */ + static inline bool is_nullptr(virtual_pointer_t ptr) { + return (static_cast<void *>(ptr) == nullptr); + } + + /* basic type for all buffers + */ + using buffer_t = cl::sycl::buffer_mem; + + /** + * Node that stores information about a device allocation. + * Nodes are sorted by size to organise a free list of nodes + * that can be recovered. + */ + struct pMapNode_t { + buffer_t m_buffer; + size_t m_size; + bool m_free; + + pMapNode_t(buffer_t b, size_t size, bool f) + : m_buffer{b}, m_size{size}, m_free{f} { + m_buffer.set_final_data(nullptr); + } + + bool operator<=(const pMapNode_t &rhs) { return (m_size <= rhs.m_size); } + }; + + /** Storage of the pointer / buffer tree + */ + using pointerMap_t = std::map<virtual_pointer_t, pMapNode_t>; + + /** + * Obtain the insertion point in the pointer map for + * a pointer of the given size. + * \param requiredSize Size attemted to reclaim + */ + typename pointerMap_t::iterator get_insertion_point(size_t requiredSize) { + typename pointerMap_t::iterator retVal; + bool reuse = false; + if (!m_freeList.empty()) { + // try to re-use an existing block + for (auto freeElem : m_freeList) { + if (freeElem->second.m_size >= requiredSize) { + retVal = freeElem; + reuse = true; + // Element is not going to be free anymore + m_freeList.erase(freeElem); + break; + } + } + } + if (!reuse) { + retVal = std::prev(m_pointerMap.end()); + } + return retVal; + } + + /** + * Returns an iterator to the node that stores the information + * of the given virtual pointer from the given pointer map structure. + * If pointer is not found, throws std::out_of_range. + * If the pointer map structure is empty, throws std::out_of_range + * + * \param pMap the pointerMap_t structure storing all the pointers + * \param virtual_pointer_ptr The virtual pointer to obtain the node of + * \throws std::out:of_range if the pointer is not found or pMap is empty + */ + typename pointerMap_t::iterator get_node(const virtual_pointer_t ptr) { + if (this->count() == 0) { + m_pointerMap.clear(); + EIGEN_THROW_X(std::out_of_range("There are no pointers allocated\n")); + + } + if (is_nullptr(ptr)) { + m_pointerMap.clear(); + EIGEN_THROW_X(std::out_of_range("Cannot access null pointer\n")); + } + // The previous element to the lower bound is the node that + // holds this memory address + auto node = m_pointerMap.lower_bound(ptr); + // If the value of the pointer is not the one of the node + // then we return the previous one + if (node == std::end(m_pointerMap)) { + --node; + } else if (node->first != ptr) { + if (node == std::begin(m_pointerMap)) { + m_pointerMap.clear(); + EIGEN_THROW_X( + std::out_of_range("The pointer is not registered in the map\n")); + + } + --node; + } + + return node; + } + + /* get_buffer. + * Returns a buffer from the map using the pointer address + */ + template <typename buffer_data_type = buffer_data_type_t> + cl::sycl::buffer<buffer_data_type, 1> get_buffer( + const virtual_pointer_t ptr) { + using sycl_buffer_t = cl::sycl::buffer<buffer_data_type, 1>; + + // get_node() returns a `buffer_mem`, so we need to cast it to a `buffer<>`. + // We can do this without the `buffer_mem` being a pointer, as we + // only declare member variables in the base class (`buffer_mem`) and not in + // the child class (`buffer<>). + auto node = get_node(ptr); + eigen_assert(node->first == ptr || node->first < ptr); + eigen_assert(ptr < static_cast<virtual_pointer_t>(node->second.m_size + + node->first)); + return *(static_cast<sycl_buffer_t *>(&node->second.m_buffer)); + } + + /** + * @brief Returns an accessor to the buffer of the given virtual pointer + * @param accessMode + * @param accessTarget + * @param ptr The virtual pointer + */ + template <sycl_acc_mode access_mode = default_acc_mode, + sycl_acc_target access_target = default_acc_target, + typename buffer_data_type = buffer_data_type_t> + cl::sycl::accessor<buffer_data_type, 1, access_mode, access_target> + get_access(const virtual_pointer_t ptr) { + auto buf = get_buffer<buffer_data_type>(ptr); + return buf.template get_access<access_mode, access_target>(); + } + + /** + * @brief Returns an accessor to the buffer of the given virtual pointer + * in the given command group scope + * @param accessMode + * @param accessTarget + * @param ptr The virtual pointer + * @param cgh Reference to the command group scope + */ + template <sycl_acc_mode access_mode = default_acc_mode, + sycl_acc_target access_target = default_acc_target, + typename buffer_data_type = buffer_data_type_t> + cl::sycl::accessor<buffer_data_type, 1, access_mode, access_target> + get_access(const virtual_pointer_t ptr, cl::sycl::handler &cgh) { + auto buf = get_buffer<buffer_data_type>(ptr); + return buf.template get_access<access_mode, access_target>(cgh); + } + + /* + * Returns the offset from the base address of this pointer. + */ + inline std::ptrdiff_t get_offset(const virtual_pointer_t ptr) { + // The previous element to the lower bound is the node that + // holds this memory address + auto node = get_node(ptr); + auto start = node->first; + eigen_assert(start == ptr || start < ptr); + eigen_assert(ptr < start + node->second.m_size); + return (ptr - start); + } + + /* + * Returns the number of elements by which the given pointer is offset from + * the base address. + */ + template <typename buffer_data_type> + inline size_t get_element_offset(const virtual_pointer_t ptr) { + return get_offset(ptr) / sizeof(buffer_data_type); + } + + /** + * Constructs the PointerMapper structure. + */ + PointerMapper(base_ptr_t baseAddress = 4096) + : m_pointerMap{}, m_freeList{}, m_baseAddress{baseAddress} { + if (m_baseAddress == 0) { + EIGEN_THROW_X(std::invalid_argument("Base address cannot be zero\n")); + } + }; + + /** + * PointerMapper cannot be copied or moved + */ + PointerMapper(const PointerMapper &) = delete; + + /** + * Empty the pointer list + */ + inline void clear() { + m_freeList.clear(); + m_pointerMap.clear(); + } + + /* add_pointer. + * Adds an existing pointer to the map and returns the virtual pointer id. + */ + inline virtual_pointer_t add_pointer(const buffer_t &b) { + return add_pointer_impl(b); + } + + /* add_pointer. + * Adds a pointer to the map and returns the virtual pointer id. + */ + inline virtual_pointer_t add_pointer(buffer_t &&b) { + return add_pointer_impl(b); + } + + /** + * @brief Fuses the given node with the previous nodes in the + * pointer map if they are free + * + * @param node A reference to the free node to be fused + */ + void fuse_forward(typename pointerMap_t::iterator &node) { + while (node != std::prev(m_pointerMap.end())) { + // if following node is free + // remove it and extend the current node with its size + auto fwd_node = std::next(node); + if (!fwd_node->second.m_free) { + break; + } + auto fwd_size = fwd_node->second.m_size; + m_freeList.erase(fwd_node); + m_pointerMap.erase(fwd_node); + + node->second.m_size += fwd_size; + } + } + + /** + * @brief Fuses the given node with the following nodes in the + * pointer map if they are free + * + * @param node A reference to the free node to be fused + */ + void fuse_backward(typename pointerMap_t::iterator &node) { + while (node != m_pointerMap.begin()) { + // if previous node is free, extend it + // with the size of the current one + auto prev_node = std::prev(node); + if (!prev_node->second.m_free) { + break; + } + prev_node->second.m_size += node->second.m_size; + + // remove the current node + m_freeList.erase(node); + m_pointerMap.erase(node); + + // point to the previous node + node = prev_node; + } + } + + /* remove_pointer. + * Removes the given pointer from the map. + * The pointer is allowed to be reused only if ReUse if true. + */ + template <bool ReUse = true> + void remove_pointer(const virtual_pointer_t ptr) { + if (is_nullptr(ptr)) { + return; + } + auto node = this->get_node(ptr); + + node->second.m_free = true; + m_freeList.emplace(node); + + // Fuse the node + // with free nodes before and after it + fuse_forward(node); + fuse_backward(node); + + // If after fusing the node is the last one + // simply remove it (since it is free) + if (node == std::prev(m_pointerMap.end())) { + m_freeList.erase(node); + m_pointerMap.erase(node); + } + } + + /* count. + * Return the number of active pointers (i.e, pointers that + * have been malloc but not freed). + */ + size_t count() const { return (m_pointerMap.size() - m_freeList.size()); } + + private: + /* add_pointer_impl. + * Adds a pointer to the map and returns the virtual pointer id. + * BufferT is either a const buffer_t& or a buffer_t&&. + */ + template <class BufferT> + virtual_pointer_t add_pointer_impl(BufferT b) { + virtual_pointer_t retVal = nullptr; + size_t bufSize = b.get_count(); + pMapNode_t p{b, bufSize, false}; + // If this is the first pointer: + if (m_pointerMap.empty()) { + virtual_pointer_t initialVal{m_baseAddress}; + m_pointerMap.emplace(initialVal, p); + return initialVal; + } + + auto lastElemIter = get_insertion_point(bufSize); + // We are recovering an existing free node + if (lastElemIter->second.m_free) { + lastElemIter->second.m_buffer = b; + lastElemIter->second.m_free = false; + + // If the recovered node is bigger than the inserted one + // add a new free node with the remaining space + if (lastElemIter->second.m_size > bufSize) { + // create a new node with the remaining space + auto remainingSize = lastElemIter->second.m_size - bufSize; + pMapNode_t p2{b, remainingSize, true}; + + // update size of the current node + lastElemIter->second.m_size = bufSize; + + // add the new free node + auto newFreePtr = lastElemIter->first + bufSize; + auto freeNode = m_pointerMap.emplace(newFreePtr, p2).first; + m_freeList.emplace(freeNode); + } + + retVal = lastElemIter->first; + } else { + size_t lastSize = lastElemIter->second.m_size; + retVal = lastElemIter->first + lastSize; + m_pointerMap.emplace(retVal, p); + } + return retVal; + } + + /** + * Compare two iterators to pointer map entries according to + * the size of the allocation on the device. + */ + struct SortBySize { + bool operator()(typename pointerMap_t::iterator a, + typename pointerMap_t::iterator b) const { + return ((a->first < b->first) && (a->second <= b->second)) || + ((a->first < b->first) && (b->second <= a->second)); + } + }; + + /* Maps the pointer addresses to buffer and size pairs. + */ + pointerMap_t m_pointerMap; + + /* List of free nodes available for re-using + */ + std::set<typename pointerMap_t::iterator, SortBySize> m_freeList; + + /* Base address used when issuing the first virtual pointer, allows users + * to specify alignment. Cannot be zero. */ + std::intptr_t m_baseAddress; +}; + +/* remove_pointer. + * Removes the given pointer from the map. + * The pointer is allowed to be reused only if ReUse if true. + */ +template <> +inline void PointerMapper::remove_pointer<false>(const virtual_pointer_t ptr) { + if (is_nullptr(ptr)) { + return; + } + m_pointerMap.erase(this->get_node(ptr)); +} + +/** + * Malloc-like interface to the pointer-mapper. + * Given a size, creates a byte-typed buffer and returns a + * fake pointer to keep track of it. + * \param size Size in bytes of the desired allocation + * \throw cl::sycl::exception if error while creating the buffer + */ +inline void *SYCLmalloc(size_t size, PointerMapper &pMap) { + if (size == 0) { + return nullptr; + } + // Create a generic buffer of the given size + using buffer_t = cl::sycl::buffer<buffer_data_type_t, 1>; + auto thePointer = pMap.add_pointer(buffer_t(cl::sycl::range<1>{size})); + // Store the buffer on the global list + return static_cast<void *>(thePointer); +} + +/** + * Free-like interface to the pointer mapper. + * Given a fake-pointer created with the virtual-pointer malloc, + * destroys the buffer and remove it from the list. + * If ReUse is false, the pointer is not added to the freeList, + * it should be false only for sub-buffers. + */ +template <bool ReUse = true, typename PointerMapper> +inline void SYCLfree(void *ptr, PointerMapper &pMap) { + pMap.template remove_pointer<ReUse>(ptr); +} + +/** + * Clear all the memory allocated by SYCL. + */ +template <typename PointerMapper> +inline void SYCLfreeAll(PointerMapper &pMap) { + pMap.clear(); +} + +template <cl::sycl::access::mode AcMd, typename T> +struct RangeAccess { + static const auto global_access = cl::sycl::access::target::global_buffer; + static const auto is_place_holder = cl::sycl::access::placeholder::true_t; + typedef T scalar_t; + typedef scalar_t &ref_t; + typedef typename cl::sycl::global_ptr<scalar_t>::pointer_t ptr_t; + + // the accessor type does not necessarily the same as T + typedef cl::sycl::accessor<scalar_t, 1, AcMd, global_access, is_place_holder> + accessor; + + typedef RangeAccess<AcMd, T> self_t; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RangeAccess(accessor access, + size_t offset, + std::intptr_t virtual_ptr) + : access_(access), offset_(offset), virtual_ptr_(virtual_ptr) {} + + RangeAccess(cl::sycl::buffer<scalar_t, 1> buff = + cl::sycl::buffer<scalar_t, 1>(cl::sycl::range<1>(1))) + : access_{accessor{buff}}, offset_(0), virtual_ptr_(-1) {} + + // This should be only used for null constructor on the host side + RangeAccess(std::nullptr_t) : RangeAccess() {} + // This template parameter must be removed and scalar_t should be replaced + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptr_t get_pointer() const { + return (access_.get_pointer().get() + offset_); + } + template <typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t &operator+=(Index offset) { + offset_ += (offset); + return *this; + } + template <typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t operator+(Index offset) const { + return self_t(access_, offset_ + offset, virtual_ptr_); + } + template <typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t operator-(Index offset) const { + return self_t(access_, offset_ - offset, virtual_ptr_); + } + template <typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t &operator-=(Index offset) { + offset_ -= offset; + return *this; + } + + // THIS IS FOR NULL COMPARISON ONLY + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend bool operator==( + const RangeAccess &lhs, std::nullptr_t) { + return ((lhs.virtual_ptr_ == -1)); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend bool operator!=( + const RangeAccess &lhs, std::nullptr_t i) { + return !(lhs == i); + } + + // THIS IS FOR NULL COMPARISON ONLY + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend bool operator==( + std::nullptr_t, const RangeAccess &rhs) { + return ((rhs.virtual_ptr_ == -1)); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend bool operator!=( + std::nullptr_t i, const RangeAccess &rhs) { + return !(i == rhs); + } + // Prefix operator (Increment and return value) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t &operator++() { + offset_++; + return (*this); + } + + // Postfix operator (Return value and increment) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t operator++(int i) { + EIGEN_UNUSED_VARIABLE(i); + self_t temp_iterator(*this); + offset_++; + return temp_iterator; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t get_size() const { + return (access_.get_count() - offset_); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t get_offset() const { + return offset_; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_offset(std::ptrdiff_t offset) { + offset_ = offset; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ref_t operator*() const { + return *get_pointer(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ref_t operator*() { + return *get_pointer(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptr_t operator->() = delete; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ref_t operator[](int x) { + return *(get_pointer() + x); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ref_t operator[](int x) const { + return *(get_pointer() + x); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_t *get_virtual_pointer() const { + return reinterpret_cast<scalar_t *>(virtual_ptr_ + + (offset_ * sizeof(scalar_t))); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit operator bool() const { + return (virtual_ptr_ != -1); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator RangeAccess<AcMd, const T>() { + return RangeAccess<AcMd, const T>(access_, offset_, virtual_ptr_); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + operator RangeAccess<AcMd, const T>() const { + return RangeAccess<AcMd, const T>(access_, offset_, virtual_ptr_); + } + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind( + cl::sycl::handler &cgh) const { + cgh.require(access_); + } + + private: + accessor access_; + size_t offset_; + std::intptr_t virtual_ptr_; // the location of the buffer in the map +}; + +template <cl::sycl::access::mode AcMd, typename T> +struct RangeAccess<AcMd, const T> : RangeAccess<AcMd, T> { + typedef RangeAccess<AcMd, T> Base; + using Base::Base; +}; + +} // namespace internal +} // namespace TensorSycl +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_SYCL_STORAGE_MEMORY_H diff --git a/Eigen/src/Core/arch/SYCL/TypeCasting.h b/Eigen/src/Core/arch/SYCL/TypeCasting.h new file mode 100644 index 000000000..9208ab21d --- /dev/null +++ b/Eigen/src/Core/arch/SYCL/TypeCasting.h @@ -0,0 +1,85 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TypeCasting.h + * + * \brief: + * TypeCasting + * + *****************************************************************/ + +#ifndef EIGEN_TYPE_CASTING_SYCL_H +#define EIGEN_TYPE_CASTING_SYCL_H + +namespace Eigen { + +namespace internal { +#ifdef SYCL_DEVICE_ONLY +template <> +struct type_casting_traits<float, int> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4 +pcast<cl::sycl::cl_float4, cl::sycl::cl_int4>(const cl::sycl::cl_float4& a) { + return a + .template convert<cl::sycl::cl_int, cl::sycl::rounding_mode::automatic>(); +} + +template <> +struct type_casting_traits<int, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; +}; + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 +pcast<cl::sycl::cl_int4, cl::sycl::cl_float4>(const cl::sycl::cl_int4& a) { + return a.template convert<cl::sycl::cl_float, + cl::sycl::rounding_mode::automatic>(); +} + +template <> +struct type_casting_traits<double, float> { + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; +}; + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 +pcast<cl::sycl::cl_double2, cl::sycl::cl_float4>( + const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) { + auto a1 = a.template convert<cl::sycl::cl_float, + cl::sycl::rounding_mode::automatic>(); + auto b1 = b.template convert<cl::sycl::cl_float, + cl::sycl::rounding_mode::automatic>(); + return cl::sycl::float4(a1.x(), a1.y(), b1.x(), b1.y()); +} + +template <> +struct type_casting_traits<float, double> { + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; +}; + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 +pcast<cl::sycl::cl_float4, cl::sycl::cl_double2>(const cl::sycl::cl_float4& a) { + // Simply discard the second half of the input + return cl::sycl::cl_double2(a.x(), a.y()); +} + +#endif +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TYPE_CASTING_SYCL_H |