aboutsummaryrefslogtreecommitdiff
path: root/Eigen/src/Core/arch/SYCL
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/arch/SYCL')
-rw-r--r--Eigen/src/Core/arch/SYCL/InteropHeaders.h232
-rw-r--r--Eigen/src/Core/arch/SYCL/MathFunctions.h301
-rw-r--r--Eigen/src/Core/arch/SYCL/PacketMath.h670
-rw-r--r--Eigen/src/Core/arch/SYCL/SyclMemoryModel.h694
-rw-r--r--Eigen/src/Core/arch/SYCL/TypeCasting.h85
5 files changed, 1982 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
new file mode 100644
index 000000000..10856ff5e
--- /dev/null
+++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
@@ -0,0 +1,232 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * InteropHeaders.h
+ *
+ * \brief:
+ * InteropHeaders
+ *
+ *****************************************************************/
+
+#ifndef EIGEN_INTEROP_HEADERS_SYCL_H
+#define EIGEN_INTEROP_HEADERS_SYCL_H
+
+namespace Eigen {
+
+#if !defined(EIGEN_DONT_VECTORIZE_SYCL)
+
+namespace internal {
+
+template <int has_blend, int lengths>
+struct sycl_packet_traits : default_packet_traits {
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = lengths,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasSin = 1,
+ HasCos = 1,
+ HasTan = 1,
+ HasASin = 1,
+ HasACos = 1,
+ HasATan = 1,
+ HasSinh = 1,
+ HasCosh = 1,
+ HasTanh = 1,
+ HasLGamma = 0,
+ HasDiGamma = 0,
+ HasZeta = 0,
+ HasPolygamma = 0,
+ HasErf = 0,
+ HasErfc = 0,
+ HasNdtri = 0,
+ HasIGamma = 0,
+ HasIGammac = 0,
+ HasBetaInc = 0,
+ HasBlend = has_blend,
+ // This flag is used to indicate whether packet comparison is supported.
+ // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
+ HasCmp = 1,
+ HasMax = 1,
+ HasMin = 1,
+ HasMul = 1,
+ HasAdd = 1,
+ HasFloor = 1,
+ HasRound = 1,
+ HasRint = 1,
+ HasLog1p = 1,
+ HasExpm1 = 1,
+ HasCeil = 1,
+ };
+};
+
+#ifdef SYCL_DEVICE_ONLY
+#define SYCL_PACKET_TRAITS(packet_type, has_blend, unpacket_type, lengths) \
+ template <> \
+ struct packet_traits<unpacket_type> \
+ : sycl_packet_traits<has_blend, lengths> { \
+ typedef packet_type type; \
+ typedef packet_type half; \
+ };
+
+SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, float, 4)
+SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, const float, 4)
+SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, double, 2)
+SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, const double, 2)
+#undef SYCL_PACKET_TRAITS
+
+// Make sure this is only available when targeting a GPU: we don't want to
+// introduce conflicts between these packet_traits definitions and the ones
+// we'll use on the host side (SSE, AVX, ...)
+#define SYCL_ARITHMETIC(packet_type) \
+ template <> \
+ struct is_arithmetic<packet_type> { \
+ enum { value = true }; \
+ };
+SYCL_ARITHMETIC(cl::sycl::cl_float4)
+SYCL_ARITHMETIC(cl::sycl::cl_double2)
+#undef SYCL_ARITHMETIC
+
+#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths) \
+ template <> \
+ struct unpacket_traits<packet_type> { \
+ typedef unpacket_type type; \
+ enum { size = lengths, vectorizable = true, alignment = Aligned16 }; \
+ typedef packet_type half; \
+ };
+SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4)
+SYCL_UNPACKET_TRAITS(cl::sycl::cl_double2, double, 2)
+
+#undef SYCL_UNPACKET_TRAITS
+#endif
+
+} // end namespace internal
+
+#endif
+
+namespace TensorSycl {
+namespace internal {
+
+template <typename PacketReturnType, int PacketSize>
+struct PacketWrapper;
+// This function should never get called on the device
+#ifndef SYCL_DEVICE_ONLY
+template <typename PacketReturnType, int PacketSize>
+struct PacketWrapper {
+ typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
+ Scalar;
+ template <typename Index>
+ EIGEN_DEVICE_FUNC static Scalar scalarize(Index, PacketReturnType &) {
+ eigen_assert(false && "THERE IS NO PACKETIZE VERSION FOR THE CHOSEN TYPE");
+ abort();
+ }
+ EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(Scalar in,
+ Scalar) {
+ return ::Eigen::internal::template plset<PacketReturnType>(in);
+ }
+ EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType, Scalar *) {
+ eigen_assert(false && "THERE IS NO PACKETIZE VERSION FOR THE CHOSEN TYPE");
+ abort();
+ }
+};
+
+#elif defined(SYCL_DEVICE_ONLY)
+template <typename PacketReturnType>
+struct PacketWrapper<PacketReturnType, 4> {
+ typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
+ Scalar;
+ template <typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index index, PacketReturnType &in) {
+ switch (index) {
+ case 0:
+ return in.x();
+ case 1:
+ return in.y();
+ case 2:
+ return in.z();
+ case 3:
+ return in.w();
+ default:
+ //INDEX MUST BE BETWEEN 0 and 3.There is no abort function in SYCL kernel. so we cannot use abort here.
+ // The code will never reach here
+ __builtin_unreachable();
+ }
+ __builtin_unreachable();
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type(
+ Scalar in, Scalar other) {
+ return PacketReturnType(in, other, other, other);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
+ lhs = PacketReturnType(rhs[0], rhs[1], rhs[2], rhs[3]);
+ }
+};
+
+template <typename PacketReturnType>
+struct PacketWrapper<PacketReturnType, 1> {
+ typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
+ Scalar;
+ template <typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index, PacketReturnType &in) {
+ return in;
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type(Scalar in,
+ Scalar) {
+ return PacketReturnType(in);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
+ lhs = rhs[0];
+ }
+};
+
+template <typename PacketReturnType>
+struct PacketWrapper<PacketReturnType, 2> {
+ typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
+ Scalar;
+ template <typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index index, PacketReturnType &in) {
+ switch (index) {
+ case 0:
+ return in.x();
+ case 1:
+ return in.y();
+ default:
+ //INDEX MUST BE BETWEEN 0 and 1.There is no abort function in SYCL kernel. so we cannot use abort here.
+ // The code will never reach here
+ __builtin_unreachable();
+ }
+ __builtin_unreachable();
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type(
+ Scalar in, Scalar other) {
+ return PacketReturnType(in, other);
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
+ lhs = PacketReturnType(rhs[0], rhs[1]);
+ }
+};
+
+#endif
+
+} // end namespace internal
+} // end namespace TensorSycl
+} // end namespace Eigen
+
+#endif // EIGEN_INTEROP_HEADERS_SYCL_H
diff --git a/Eigen/src/Core/arch/SYCL/MathFunctions.h b/Eigen/src/Core/arch/SYCL/MathFunctions.h
new file mode 100644
index 000000000..2ab0f2a76
--- /dev/null
+++ b/Eigen/src/Core/arch/SYCL/MathFunctions.h
@@ -0,0 +1,301 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * MathFunctions.h
+ *
+ * \brief:
+ * MathFunctions
+ *
+ *****************************************************************/
+
+#ifndef EIGEN_MATH_FUNCTIONS_SYCL_H
+#define EIGEN_MATH_FUNCTIONS_SYCL_H
+namespace Eigen {
+
+namespace internal {
+
+// Make sure this is only available when targeting a GPU: we don't want to
+// introduce conflicts between these packet_traits definitions and the ones
+// we'll use on the host side (SSE, AVX, ...)
+#if defined(SYCL_DEVICE_ONLY)
+#define SYCL_PLOG(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::log(a); \
+ }
+
+SYCL_PLOG(cl::sycl::cl_float4)
+SYCL_PLOG(cl::sycl::cl_double2)
+#undef SYCL_PLOG
+
+#define SYCL_PLOG1P(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog1p<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::log1p(a); \
+ }
+
+SYCL_PLOG1P(cl::sycl::cl_float4)
+SYCL_PLOG1P(cl::sycl::cl_double2)
+#undef SYCL_PLOG1P
+
+#define SYCL_PLOG10(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog10<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::log10(a); \
+ }
+
+SYCL_PLOG10(cl::sycl::cl_float4)
+SYCL_PLOG10(cl::sycl::cl_double2)
+#undef SYCL_PLOG10
+
+#define SYCL_PEXP(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pexp<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::exp(a); \
+ }
+
+SYCL_PEXP(cl::sycl::cl_float4)
+SYCL_PEXP(cl::sycl::cl_float)
+SYCL_PEXP(cl::sycl::cl_double2)
+#undef SYCL_PEXP
+
+#define SYCL_PEXPM1(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pexpm1<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::expm1(a); \
+ }
+
+SYCL_PEXPM1(cl::sycl::cl_float4)
+SYCL_PEXPM1(cl::sycl::cl_double2)
+#undef SYCL_PEXPM1
+
+#define SYCL_PSQRT(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psqrt<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::sqrt(a); \
+ }
+
+SYCL_PSQRT(cl::sycl::cl_float4)
+SYCL_PSQRT(cl::sycl::cl_double2)
+#undef SYCL_PSQRT
+
+#define SYCL_PRSQRT(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type prsqrt<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::rsqrt(a); \
+ }
+
+SYCL_PRSQRT(cl::sycl::cl_float4)
+SYCL_PRSQRT(cl::sycl::cl_double2)
+#undef SYCL_PRSQRT
+
+/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
+#define SYCL_PSIN(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psin<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::sin(a); \
+ }
+
+SYCL_PSIN(cl::sycl::cl_float4)
+SYCL_PSIN(cl::sycl::cl_double2)
+#undef SYCL_PSIN
+
+/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
+#define SYCL_PCOS(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pcos<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::cos(a); \
+ }
+
+SYCL_PCOS(cl::sycl::cl_float4)
+SYCL_PCOS(cl::sycl::cl_double2)
+#undef SYCL_PCOS
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
+#define SYCL_PTAN(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ptan<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::tan(a); \
+ }
+
+SYCL_PTAN(cl::sycl::cl_float4)
+SYCL_PTAN(cl::sycl::cl_double2)
+#undef SYCL_PTAN
+
+/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
+#define SYCL_PASIN(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pasin<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::asin(a); \
+ }
+
+SYCL_PASIN(cl::sycl::cl_float4)
+SYCL_PASIN(cl::sycl::cl_double2)
+#undef SYCL_PASIN
+
+/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
+#define SYCL_PACOS(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pacos<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::acos(a); \
+ }
+
+SYCL_PACOS(cl::sycl::cl_float4)
+SYCL_PACOS(cl::sycl::cl_double2)
+#undef SYCL_PACOS
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
+#define SYCL_PATAN(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type patan<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::atan(a); \
+ }
+
+SYCL_PATAN(cl::sycl::cl_float4)
+SYCL_PATAN(cl::sycl::cl_double2)
+#undef SYCL_PATAN
+
+/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
+#define SYCL_PSINH(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psinh<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::sinh(a); \
+ }
+
+SYCL_PSINH(cl::sycl::cl_float4)
+SYCL_PSINH(cl::sycl::cl_double2)
+#undef SYCL_PSINH
+
+/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
+#define SYCL_PCOSH(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pcosh<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::cosh(a); \
+ }
+
+SYCL_PCOSH(cl::sycl::cl_float4)
+SYCL_PCOSH(cl::sycl::cl_double2)
+#undef SYCL_PCOSH
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
+#define SYCL_PTANH(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ptanh<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::tanh(a); \
+ }
+
+SYCL_PTANH(cl::sycl::cl_float4)
+SYCL_PTANH(cl::sycl::cl_double2)
+#undef SYCL_PTANH
+
+#define SYCL_PCEIL(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pceil<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::ceil(a); \
+ }
+
+SYCL_PCEIL(cl::sycl::cl_float4)
+SYCL_PCEIL(cl::sycl::cl_double2)
+#undef SYCL_PCEIL
+
+#define SYCL_PROUND(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pround<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::round(a); \
+ }
+
+SYCL_PROUND(cl::sycl::cl_float4)
+SYCL_PROUND(cl::sycl::cl_double2)
+#undef SYCL_PROUND
+
+#define SYCL_PRINT(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type print<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::rint(a); \
+ }
+
+SYCL_PRINT(cl::sycl::cl_float4)
+SYCL_PRINT(cl::sycl::cl_double2)
+#undef SYCL_PRINT
+
+#define SYCL_FLOOR(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pfloor<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::floor(a); \
+ }
+
+SYCL_FLOOR(cl::sycl::cl_float4)
+SYCL_FLOOR(cl::sycl::cl_double2)
+#undef SYCL_FLOOR
+
+#define SYCL_PMIN(packet_type, expr) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pmin<packet_type>( \
+ const packet_type& a, const packet_type& b) { \
+ return expr; \
+ }
+
+SYCL_PMIN(cl::sycl::cl_float4, cl::sycl::fmin(a, b))
+SYCL_PMIN(cl::sycl::cl_double2, cl::sycl::fmin(a, b))
+#undef SYCL_PMIN
+
+#define SYCL_PMAX(packet_type, expr) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pmax<packet_type>( \
+ const packet_type& a, const packet_type& b) { \
+ return expr; \
+ }
+
+SYCL_PMAX(cl::sycl::cl_float4, cl::sycl::fmax(a, b))
+SYCL_PMAX(cl::sycl::cl_double2, cl::sycl::fmax(a, b))
+#undef SYCL_PMAX
+
+#define SYCL_PLDEXP(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pldexp( \
+ const packet_type& a, const packet_type& exponent) { \
+ return cl::sycl::ldexp( \
+ a, exponent.template convert<cl::sycl::cl_int, \
+ cl::sycl::rounding_mode::automatic>()); \
+ }
+
+SYCL_PLDEXP(cl::sycl::cl_float4)
+SYCL_PLDEXP(cl::sycl::cl_double2)
+#undef SYCL_PLDEXP
+
+#endif
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATH_FUNCTIONS_SYCL_H
diff --git a/Eigen/src/Core/arch/SYCL/PacketMath.h b/Eigen/src/Core/arch/SYCL/PacketMath.h
new file mode 100644
index 000000000..87badc076
--- /dev/null
+++ b/Eigen/src/Core/arch/SYCL/PacketMath.h
@@ -0,0 +1,670 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * PacketMath.h
+ *
+ * \brief:
+ * PacketMath
+ *
+ *****************************************************************/
+
+#ifndef EIGEN_PACKET_MATH_SYCL_H
+#define EIGEN_PACKET_MATH_SYCL_H
+#include <type_traits>
+namespace Eigen {
+
+namespace internal {
+#ifdef SYCL_DEVICE_ONLY
+
+#define SYCL_PLOADT_RO(address_space_target) \
+ template <typename packet_type, int Alignment> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro( \
+ typename cl::sycl::multi_ptr< \
+ const typename unpacket_traits<packet_type>::type, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ from) { \
+ typedef typename unpacket_traits<packet_type>::type scalar; \
+ typedef cl::sycl::multi_ptr< \
+ scalar, cl::sycl::access::address_space::address_space_target> \
+ multi_ptr; \
+ auto res = packet_type( \
+ static_cast<typename unpacket_traits<packet_type>::type>(0)); \
+ res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from))); \
+ return res; \
+ }
+
+SYCL_PLOADT_RO(global_space)
+SYCL_PLOADT_RO(local_space)
+#undef SYCL_PLOADT_RO
+#endif
+
+template <typename packet_type, int Alignment, typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
+ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess<
+ cl::sycl::access::mode::read_write, T>& from) {
+ return ploadt_ro<packet_type, Alignment>(from.get_pointer());
+}
+
+#ifdef SYCL_DEVICE_ONLY
+#define SYCL_PLOAD(address_space_target, Alignment, AlignedType) \
+ template <typename packet_type> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
+ typename cl::sycl::multi_ptr< \
+ const typename unpacket_traits<packet_type>::type, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ from) { \
+ return ploadt_ro<packet_type, Alignment>(from); \
+ }
+
+// global space
+SYCL_PLOAD(global_space, Unaligned, u)
+SYCL_PLOAD(global_space, Aligned, )
+// local space
+SYCL_PLOAD(local_space, Unaligned, u)
+SYCL_PLOAD(local_space, Aligned, )
+
+#undef SYCL_PLOAD
+#endif
+
+#define SYCL_PLOAD(Alignment, AlignedType) \
+ template <typename packet_type> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
+ const Eigen::TensorSycl::internal::RangeAccess< \
+ cl::sycl::access::mode::read_write, \
+ typename unpacket_traits<packet_type>::type> \
+ from) { \
+ return ploadt_ro<packet_type, Alignment>(from); \
+ }
+SYCL_PLOAD(Unaligned, u)
+SYCL_PLOAD(Aligned, )
+#undef SYCL_PLOAD
+
+#ifdef SYCL_DEVICE_ONLY
+/** \internal \returns a packet version of \a *from.
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+#define SYCL_PLOADT(address_space_target) \
+ template <typename packet_type, int Alignment> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt( \
+ typename cl::sycl::multi_ptr< \
+ const typename unpacket_traits<packet_type>::type, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ from) { \
+ if (Alignment >= unpacket_traits<packet_type>::alignment) \
+ return pload<packet_type>(from); \
+ else \
+ return ploadu<packet_type>(from); \
+ }
+
+// global space
+SYCL_PLOADT(global_space)
+// local space
+SYCL_PLOADT(local_space)
+#undef SYCL_PLOADT
+#endif
+
+template <typename packet_type, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
+ploadt(const Eigen::TensorSycl::internal::RangeAccess<
+ cl::sycl::access::mode::read_write,
+ typename unpacket_traits<packet_type>::type>& from) {
+ return ploadt<packet_type, Alignment>(from.get_pointer());
+}
+#ifdef SYCL_DEVICE_ONLY
+
+// private_space
+#define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type \
+ ploadt_ro<packet_type, Alignment>( \
+ const typename unpacket_traits<packet_type>::type* from) { \
+ typedef typename unpacket_traits<packet_type>::type scalar; \
+ auto res = packet_type(static_cast<scalar>(0)); \
+ res.template load<cl::sycl::access::address_space::private_space>( \
+ 0, const_cast<scalar*>(from)); \
+ return res; \
+ }
+
+SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)
+SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned)
+SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)
+SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)
+
+#define SYCL_PLOAD_SPECIAL(packet_type, alignment_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \
+ const typename unpacket_traits<packet_type>::type* from) { \
+ typedef typename unpacket_traits<packet_type>::type scalar; \
+ auto res = packet_type(static_cast<scalar>(0)); \
+ res.template load<cl::sycl::access::address_space::private_space>( \
+ 0, const_cast<scalar*>(from)); \
+ return res; \
+ }
+SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, )
+SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, )
+SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
+SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
+
+#undef SYCL_PLOAD_SPECIAL
+
+#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
+ typename cl::sycl::multi_ptr< \
+ scalar, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ to, \
+ const packet_type& from) { \
+ typedef cl::sycl::multi_ptr< \
+ scalar, cl::sycl::access::address_space::address_space_target> \
+ multi_ptr; \
+ from.store(0, multi_ptr(to)); \
+ }
+
+// global space
+SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, )
+SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u)
+SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, )
+SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)
+SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )
+SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)
+SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )
+SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u)
+
+SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, )
+SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)
+SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )
+SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)
+#undef SYCL_PSTORE
+
+#define SYCL_PSTORE_T(address_space_target) \
+ template <typename scalar, typename packet_type, int Alignment> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( \
+ typename cl::sycl::multi_ptr< \
+ scalar, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ to, \
+ const packet_type& from) { \
+ if (Alignment) \
+ pstore(to, from); \
+ else \
+ pstoreu(to, from); \
+ }
+
+SYCL_PSTORE_T(global_space)
+
+SYCL_PSTORE_T(local_space)
+
+#undef SYCL_PSTORE_T
+
+#define SYCL_PSET1(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \
+ const typename unpacket_traits<packet_type>::type& from) { \
+ return packet_type(from); \
+ }
+
+// global space
+SYCL_PSET1(cl::sycl::cl_float4)
+SYCL_PSET1(cl::sycl::cl_double2)
+
+#undef SYCL_PSET1
+
+template <typename packet_type>
+struct get_base_packet {
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
+ get_ploaddup(sycl_multi_pointer) {}
+
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
+ get_pgather(sycl_multi_pointer, Index) {}
+};
+
+template <>
+struct get_base_packet<cl::sycl::cl_float4> {
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(
+ sycl_multi_pointer from) {
+ return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
+ }
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(
+ sycl_multi_pointer from, Index stride) {
+ return cl::sycl::cl_float4(from[0 * stride], from[1 * stride],
+ from[2 * stride], from[3 * stride]);
+ }
+
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
+ sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) {
+ auto tmp = stride;
+ to[0] = from.x();
+ to[tmp] = from.y();
+ to[tmp += stride] = from.z();
+ to[tmp += stride] = from.w();
+ }
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(
+ const float& a) {
+ return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1),
+ static_cast<float>(a + 2),
+ static_cast<float>(a + 3));
+ }
+};
+
+template <>
+struct get_base_packet<cl::sycl::cl_double2> {
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2
+ get_ploaddup(const sycl_multi_pointer from) {
+ return cl::sycl::cl_double2(from[0], from[0]);
+ }
+
+ template <typename sycl_multi_pointer, typename Index>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(
+ const sycl_multi_pointer from, Index stride) {
+ return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
+ }
+
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
+ sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) {
+ to[0] = from.x();
+ to[stride] = from.y();
+ }
+
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(
+ const double& a) {
+ return cl::sycl::cl_double2(static_cast<double>(a),
+ static_cast<double>(a + 1));
+ }
+};
+
+#define SYCL_PLOAD_DUP(address_space_target) \
+ template <typename packet_type> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \
+ typename cl::sycl::multi_ptr< \
+ const typename unpacket_traits<packet_type>::type, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ from) { \
+ return get_base_packet<packet_type>::get_ploaddup(from); \
+ }
+
+// global space
+SYCL_PLOAD_DUP(global_space)
+// local_space
+SYCL_PLOAD_DUP(local_space)
+#undef SYCL_PLOAD_DUP
+
+#define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
+ const typename unpacket_traits<packet_type>::type* from) { \
+ return get_base_packet<packet_type>::get_ploaddup(from); \
+ }
+
+SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
+SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
+
+#undef SYCL_PLOAD_DUP_SPECILIZE
+
+#define SYCL_PLSET(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \
+ const typename unpacket_traits<packet_type>::type& a) { \
+ return get_base_packet<packet_type>::set_plset(a); \
+ }
+
+SYCL_PLSET(cl::sycl::cl_float4)
+SYCL_PLSET(cl::sycl::cl_double2)
+
+#undef SYCL_PLSET
+
+#define SYCL_PGATHER(address_space_target) \
+ template <typename Scalar, typename packet_type> \
+ EIGEN_DEVICE_FUNC inline packet_type pgather( \
+ typename cl::sycl::multi_ptr< \
+ const typename unpacket_traits<packet_type>::type, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ from, \
+ Index stride) { \
+ return get_base_packet<packet_type>::get_pgather(from, stride); \
+ }
+
+// global space
+SYCL_PGATHER(global_space)
+// local space
+SYCL_PGATHER(local_space)
+
+#undef SYCL_PGATHER
+
+#define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
+ pgather<scalar, packet_type>( \
+ const typename unpacket_traits<packet_type>::type* from, Index stride) { \
+ return get_base_packet<packet_type>::get_pgather(from, stride); \
+ }
+
+SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
+SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
+
+#undef SYCL_PGATHER_SPECILIZE
+
+#define SYCL_PSCATTER(address_space_target) \
+ template <typename Scalar, typename packet_type> \
+ EIGEN_DEVICE_FUNC inline void pscatter( \
+ typename cl::sycl::multi_ptr< \
+ typename unpacket_traits<packet_type>::type, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ to, \
+ const packet_type& from, Index stride) { \
+ get_base_packet<packet_type>::set_pscatter(to, from, stride); \
+ }
+
+// global space
+SYCL_PSCATTER(global_space)
+// local space
+SYCL_PSCATTER(local_space)
+
+#undef SYCL_PSCATTER
+
+#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
+ typename unpacket_traits<packet_type>::type * to, \
+ const packet_type& from, Index stride) { \
+ get_base_packet<packet_type>::set_pscatter(to, from, stride); \
+ }
+
+SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
+SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
+
+#undef SYCL_PSCATTER_SPECILIZE
+
+#define SYCL_PMAD(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( \
+ const packet_type& a, const packet_type& b, const packet_type& c) { \
+ return cl::sycl::mad(a, b, c); \
+ }
+
+SYCL_PMAD(cl::sycl::cl_float4)
+SYCL_PMAD(cl::sycl::cl_double2)
+#undef SYCL_PMAD
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(
+ const cl::sycl::cl_float4& a) {
+ return a.x();
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(
+ const cl::sycl::cl_double2& a) {
+ return a.x();
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(
+ const cl::sycl::cl_float4& a) {
+ return a.x() + a.y() + a.z() + a.w();
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(
+ const cl::sycl::cl_double2& a) {
+ return a.x() + a.y();
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(
+ const cl::sycl::cl_float4& a) {
+ return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()),
+ cl::sycl::fmax(a.z(), a.w()));
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(
+ const cl::sycl::cl_double2& a) {
+ return cl::sycl::fmax(a.x(), a.y());
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(
+ const cl::sycl::cl_float4& a) {
+ return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()),
+ cl::sycl::fmin(a.z(), a.w()));
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(
+ const cl::sycl::cl_double2& a) {
+ return cl::sycl::fmin(a.x(), a.y());
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(
+ const cl::sycl::cl_float4& a) {
+ return a.x() * a.y() * a.z() * a.w();
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(
+ const cl::sycl::cl_double2& a) {
+ return a.x() * a.y();
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
+pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
+ return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()),
+ cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
+pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+ return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
+}
+
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet &a,
+ const Packet &b) {
+ return ((a <= b)
+ .template convert<typename unpacket_traits<Packet>::type,
+ cl::sycl::rounding_mode::automatic>());
+}
+
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet &a,
+ const Packet &b) {
+ return ((a < b)
+ .template convert<typename unpacket_traits<Packet>::type,
+ cl::sycl::rounding_mode::automatic>());
+}
+
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet &a,
+ const Packet &b) {
+ return ((a == b)
+ .template convert<typename unpacket_traits<Packet>::type,
+ cl::sycl::rounding_mode::automatic>());
+}
+
+#define SYCL_PCMP(OP, TYPE) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE &a, \
+ const TYPE &b) { \
+ return sycl_pcmp_##OP<TYPE>(a, b); \
+ }
+
+SYCL_PCMP(le, cl::sycl::cl_float4)
+SYCL_PCMP(lt, cl::sycl::cl_float4)
+SYCL_PCMP(eq, cl::sycl::cl_float4)
+SYCL_PCMP(le, cl::sycl::cl_double2)
+SYCL_PCMP(lt, cl::sycl::cl_double2)
+SYCL_PCMP(eq, cl::sycl::cl_double2)
+#undef SYCL_PCMP
+
+template <typename T> struct convert_to_integer;
+
+template <> struct convert_to_integer<float> {
+ using type = std::int32_t;
+ using packet_type = cl::sycl::cl_int4;
+};
+template <> struct convert_to_integer<double> {
+ using type = std::int64_t;
+ using packet_type = cl::sycl::cl_long2;
+};
+
+template <typename PacketIn>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename convert_to_integer<
+ typename unpacket_traits<PacketIn>::type>::packet_type
+vector_as_int(const PacketIn &p) {
+ return (
+ p.template convert<typename convert_to_integer<
+ typename unpacket_traits<PacketIn>::type>::type,
+ cl::sycl::rounding_mode::automatic>());
+}
+
+template <typename packetOut, typename PacketIn>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packetOut
+convert_vector(const PacketIn &p) {
+ return (p.template convert<typename unpacket_traits<packetOut>::type,
+ cl::sycl::rounding_mode::automatic>());
+}
+
+#define SYCL_PAND(TYPE) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand<TYPE>(const TYPE &a, \
+ const TYPE &b) { \
+ return convert_vector<TYPE>(vector_as_int(a) & vector_as_int(b)); \
+ }
+SYCL_PAND(cl::sycl::cl_float4)
+SYCL_PAND(cl::sycl::cl_double2)
+#undef SYCL_PAND
+
+#define SYCL_POR(TYPE) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por<TYPE>(const TYPE &a, \
+ const TYPE &b) { \
+ return convert_vector<TYPE>(vector_as_int(a) | vector_as_int(b)); \
+ }
+
+SYCL_POR(cl::sycl::cl_float4)
+SYCL_POR(cl::sycl::cl_double2)
+#undef SYCL_POR
+
+#define SYCL_PXOR(TYPE) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor<TYPE>(const TYPE &a, \
+ const TYPE &b) { \
+ return convert_vector<TYPE>(vector_as_int(a) ^ vector_as_int(b)); \
+ }
+
+SYCL_PXOR(cl::sycl::cl_float4)
+SYCL_PXOR(cl::sycl::cl_double2)
+#undef SYCL_PXOR
+
+#define SYCL_PANDNOT(TYPE) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot<TYPE>(const TYPE &a, \
+ const TYPE &b) { \
+ return convert_vector<TYPE>(vector_as_int(a) & (~vector_as_int(b))); \
+ }
+SYCL_PANDNOT(cl::sycl::cl_float4)
+SYCL_PANDNOT(cl::sycl::cl_double2)
+#undef SYCL_PANDNOT
+
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
+ PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
+ float tmp = kernel.packet[0].y();
+ kernel.packet[0].y() = kernel.packet[1].x();
+ kernel.packet[1].x() = tmp;
+
+ tmp = kernel.packet[0].z();
+ kernel.packet[0].z() = kernel.packet[2].x();
+ kernel.packet[2].x() = tmp;
+
+ tmp = kernel.packet[0].w();
+ kernel.packet[0].w() = kernel.packet[3].x();
+ kernel.packet[3].x() = tmp;
+
+ tmp = kernel.packet[1].z();
+ kernel.packet[1].z() = kernel.packet[2].y();
+ kernel.packet[2].y() = tmp;
+
+ tmp = kernel.packet[1].w();
+ kernel.packet[1].w() = kernel.packet[3].y();
+ kernel.packet[3].y() = tmp;
+
+ tmp = kernel.packet[2].w();
+ kernel.packet[2].w() = kernel.packet[3].z();
+ kernel.packet[3].z() = tmp;
+}
+
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
+ PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
+ double tmp = kernel.packet[0].y();
+ kernel.packet[0].y() = kernel.packet[1].x();
+ kernel.packet[1].x() = tmp;
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
+ const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
+ const cl::sycl::cl_float4& thenPacket,
+ const cl::sycl::cl_float4& elsePacket) {
+ cl::sycl::cl_int4 condition(
+ ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1,
+ ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1);
+ return cl::sycl::select(thenPacket, elsePacket, condition);
+}
+
+template <>
+inline cl::sycl::cl_double2 pblend(
+ const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
+ const cl::sycl::cl_double2& thenPacket,
+ const cl::sycl::cl_double2& elsePacket) {
+ cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
+ ifPacket.select[1] ? 0 : -1);
+ return cl::sycl::select(thenPacket, elsePacket, condition);
+}
+#endif // SYCL_DEVICE_ONLY
+
+#define SYCL_PSTORE(alignment) \
+ template <typename packet_type> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
+ const Eigen::TensorSycl::internal::RangeAccess< \
+ cl::sycl::access::mode::read_write, \
+ typename unpacket_traits<packet_type>::type>& to, \
+ const packet_type& from) { \
+ pstore##alignment(to.get_pointer(), from); \
+ }
+
+// global space
+SYCL_PSTORE()
+SYCL_PSTORE(u)
+
+#undef SYCL_PSTORE
+
+template <typename scalar, typename packet_type, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(
+ Eigen::TensorSycl::internal::RangeAccess<
+ cl::sycl::access::mode::read_write,
+ typename unpacket_traits<packet_type>::type>
+ to,
+ const packet_type& from) {
+ pstoret<scalar, packet_type, Alignment>(to.get_pointer(), from);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PACKET_MATH_SYCL_H
diff --git a/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h b/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h
new file mode 100644
index 000000000..f81e59db5
--- /dev/null
+++ b/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h
@@ -0,0 +1,694 @@
+/***************************************************************************
+ * Copyright (C) 2017 Codeplay Software Limited
+ * This Source Code Form is subject to the terms of the Mozilla
+ * Public License v. 2.0. If a copy of the MPL was not distributed
+ * with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ *
+ * SyclMemoryModel.h
+ *
+ * Description:
+ * Interface for SYCL buffers to behave as a non-dereferenceable pointer
+ * Interface for Placeholder accessor to behave as a pointer on both host
+ * and device
+ *
+ * Authors:
+ *
+ * Ruyman Reyes Codeplay Software Ltd.
+ * Mehdi Goli Codeplay Software Ltd.
+ * Vanya Yaneva Codeplay Software Ltd.
+ *
+ **************************************************************************/
+
+#if defined(EIGEN_USE_SYCL) && \
+ !defined(EIGEN_CXX11_TENSOR_TENSOR_SYCL_STORAGE_MEMORY_H)
+#define EIGEN_CXX11_TENSOR_TENSOR_SYCL_STORAGE_MEMORY_H
+
+#include <CL/sycl.hpp>
+#ifdef EIGEN_EXCEPTIONS
+#include <stdexcept>
+#endif
+#include <cstddef>
+#include <queue>
+#include <set>
+#include <unordered_map>
+
+namespace Eigen {
+namespace TensorSycl {
+namespace internal {
+
+using sycl_acc_target = cl::sycl::access::target;
+using sycl_acc_mode = cl::sycl::access::mode;
+
+/**
+ * Default values for template arguments
+ */
+using buffer_data_type_t = uint8_t;
+const sycl_acc_target default_acc_target = sycl_acc_target::global_buffer;
+const sycl_acc_mode default_acc_mode = sycl_acc_mode::read_write;
+
+/**
+ * PointerMapper
+ * Associates fake pointers with buffers.
+ *
+ */
+class PointerMapper {
+ public:
+ using base_ptr_t = std::intptr_t;
+
+ /* Structure of a virtual pointer
+ *
+ * |================================================|
+ * | POINTER ADDRESS |
+ * |================================================|
+ */
+ struct virtual_pointer_t {
+ /* Type for the pointers
+ */
+ base_ptr_t m_contents;
+
+ /** Conversions from virtual_pointer_t to
+ * void * should just reinterpret_cast the integer number
+ */
+ operator void *() const { return reinterpret_cast<void *>(m_contents); }
+
+ /**
+ * Convert back to the integer number.
+ */
+ operator base_ptr_t() const { return m_contents; }
+
+ /**
+ * Add a certain value to the pointer to create a
+ * new pointer to that offset
+ */
+ virtual_pointer_t operator+(size_t off) { return m_contents + off; }
+
+ /* Numerical order for sorting pointers in containers. */
+ bool operator<(virtual_pointer_t rhs) const {
+ return (static_cast<base_ptr_t>(m_contents) <
+ static_cast<base_ptr_t>(rhs.m_contents));
+ }
+
+ bool operator>(virtual_pointer_t rhs) const {
+ return (static_cast<base_ptr_t>(m_contents) >
+ static_cast<base_ptr_t>(rhs.m_contents));
+ }
+
+ /**
+ * Numerical order for sorting pointers in containers
+ */
+ bool operator==(virtual_pointer_t rhs) const {
+ return (static_cast<base_ptr_t>(m_contents) ==
+ static_cast<base_ptr_t>(rhs.m_contents));
+ }
+
+ /**
+ * Simple forward to the equality overload.
+ */
+ bool operator!=(virtual_pointer_t rhs) const {
+ return !(this->operator==(rhs));
+ }
+
+ /**
+ * Converts a void * into a virtual pointer structure.
+ * Note that this will only work if the void * was
+ * already a virtual_pointer_t, but we have no way of
+ * checking
+ */
+ virtual_pointer_t(const void *ptr)
+ : m_contents(reinterpret_cast<base_ptr_t>(ptr)){};
+
+ /**
+ * Creates a virtual_pointer_t from the given integer
+ * number
+ */
+ virtual_pointer_t(base_ptr_t u) : m_contents(u){};
+ };
+
+ /* Definition of a null pointer
+ */
+ const virtual_pointer_t null_virtual_ptr = nullptr;
+
+ /**
+ * Whether if a pointer is null or not.
+ * A pointer is nullptr if the value is of null_virtual_ptr
+ */
+ static inline bool is_nullptr(virtual_pointer_t ptr) {
+ return (static_cast<void *>(ptr) == nullptr);
+ }
+
+ /* basic type for all buffers
+ */
+ using buffer_t = cl::sycl::buffer_mem;
+
+ /**
+ * Node that stores information about a device allocation.
+ * Nodes are sorted by size to organise a free list of nodes
+ * that can be recovered.
+ */
+ struct pMapNode_t {
+ buffer_t m_buffer;
+ size_t m_size;
+ bool m_free;
+
+ pMapNode_t(buffer_t b, size_t size, bool f)
+ : m_buffer{b}, m_size{size}, m_free{f} {
+ m_buffer.set_final_data(nullptr);
+ }
+
+ bool operator<=(const pMapNode_t &rhs) { return (m_size <= rhs.m_size); }
+ };
+
+ /** Storage of the pointer / buffer tree
+ */
+ using pointerMap_t = std::map<virtual_pointer_t, pMapNode_t>;
+
+ /**
+ * Obtain the insertion point in the pointer map for
+ * a pointer of the given size.
+ * \param requiredSize Size attemted to reclaim
+ */
+ typename pointerMap_t::iterator get_insertion_point(size_t requiredSize) {
+ typename pointerMap_t::iterator retVal;
+ bool reuse = false;
+ if (!m_freeList.empty()) {
+ // try to re-use an existing block
+ for (auto freeElem : m_freeList) {
+ if (freeElem->second.m_size >= requiredSize) {
+ retVal = freeElem;
+ reuse = true;
+ // Element is not going to be free anymore
+ m_freeList.erase(freeElem);
+ break;
+ }
+ }
+ }
+ if (!reuse) {
+ retVal = std::prev(m_pointerMap.end());
+ }
+ return retVal;
+ }
+
+ /**
+ * Returns an iterator to the node that stores the information
+ * of the given virtual pointer from the given pointer map structure.
+ * If pointer is not found, throws std::out_of_range.
+ * If the pointer map structure is empty, throws std::out_of_range
+ *
+ * \param pMap the pointerMap_t structure storing all the pointers
+ * \param virtual_pointer_ptr The virtual pointer to obtain the node of
+ * \throws std::out:of_range if the pointer is not found or pMap is empty
+ */
+ typename pointerMap_t::iterator get_node(const virtual_pointer_t ptr) {
+ if (this->count() == 0) {
+ m_pointerMap.clear();
+ EIGEN_THROW_X(std::out_of_range("There are no pointers allocated\n"));
+
+ }
+ if (is_nullptr(ptr)) {
+ m_pointerMap.clear();
+ EIGEN_THROW_X(std::out_of_range("Cannot access null pointer\n"));
+ }
+ // The previous element to the lower bound is the node that
+ // holds this memory address
+ auto node = m_pointerMap.lower_bound(ptr);
+ // If the value of the pointer is not the one of the node
+ // then we return the previous one
+ if (node == std::end(m_pointerMap)) {
+ --node;
+ } else if (node->first != ptr) {
+ if (node == std::begin(m_pointerMap)) {
+ m_pointerMap.clear();
+ EIGEN_THROW_X(
+ std::out_of_range("The pointer is not registered in the map\n"));
+
+ }
+ --node;
+ }
+
+ return node;
+ }
+
+ /* get_buffer.
+ * Returns a buffer from the map using the pointer address
+ */
+ template <typename buffer_data_type = buffer_data_type_t>
+ cl::sycl::buffer<buffer_data_type, 1> get_buffer(
+ const virtual_pointer_t ptr) {
+ using sycl_buffer_t = cl::sycl::buffer<buffer_data_type, 1>;
+
+ // get_node() returns a `buffer_mem`, so we need to cast it to a `buffer<>`.
+ // We can do this without the `buffer_mem` being a pointer, as we
+ // only declare member variables in the base class (`buffer_mem`) and not in
+ // the child class (`buffer<>).
+ auto node = get_node(ptr);
+ eigen_assert(node->first == ptr || node->first < ptr);
+ eigen_assert(ptr < static_cast<virtual_pointer_t>(node->second.m_size +
+ node->first));
+ return *(static_cast<sycl_buffer_t *>(&node->second.m_buffer));
+ }
+
+ /**
+ * @brief Returns an accessor to the buffer of the given virtual pointer
+ * @param accessMode
+ * @param accessTarget
+ * @param ptr The virtual pointer
+ */
+ template <sycl_acc_mode access_mode = default_acc_mode,
+ sycl_acc_target access_target = default_acc_target,
+ typename buffer_data_type = buffer_data_type_t>
+ cl::sycl::accessor<buffer_data_type, 1, access_mode, access_target>
+ get_access(const virtual_pointer_t ptr) {
+ auto buf = get_buffer<buffer_data_type>(ptr);
+ return buf.template get_access<access_mode, access_target>();
+ }
+
+ /**
+ * @brief Returns an accessor to the buffer of the given virtual pointer
+ * in the given command group scope
+ * @param accessMode
+ * @param accessTarget
+ * @param ptr The virtual pointer
+ * @param cgh Reference to the command group scope
+ */
+ template <sycl_acc_mode access_mode = default_acc_mode,
+ sycl_acc_target access_target = default_acc_target,
+ typename buffer_data_type = buffer_data_type_t>
+ cl::sycl::accessor<buffer_data_type, 1, access_mode, access_target>
+ get_access(const virtual_pointer_t ptr, cl::sycl::handler &cgh) {
+ auto buf = get_buffer<buffer_data_type>(ptr);
+ return buf.template get_access<access_mode, access_target>(cgh);
+ }
+
+ /*
+ * Returns the offset from the base address of this pointer.
+ */
+ inline std::ptrdiff_t get_offset(const virtual_pointer_t ptr) {
+ // The previous element to the lower bound is the node that
+ // holds this memory address
+ auto node = get_node(ptr);
+ auto start = node->first;
+ eigen_assert(start == ptr || start < ptr);
+ eigen_assert(ptr < start + node->second.m_size);
+ return (ptr - start);
+ }
+
+ /*
+ * Returns the number of elements by which the given pointer is offset from
+ * the base address.
+ */
+ template <typename buffer_data_type>
+ inline size_t get_element_offset(const virtual_pointer_t ptr) {
+ return get_offset(ptr) / sizeof(buffer_data_type);
+ }
+
+ /**
+ * Constructs the PointerMapper structure.
+ */
+ PointerMapper(base_ptr_t baseAddress = 4096)
+ : m_pointerMap{}, m_freeList{}, m_baseAddress{baseAddress} {
+ if (m_baseAddress == 0) {
+ EIGEN_THROW_X(std::invalid_argument("Base address cannot be zero\n"));
+ }
+ };
+
+ /**
+ * PointerMapper cannot be copied or moved
+ */
+ PointerMapper(const PointerMapper &) = delete;
+
+ /**
+ * Empty the pointer list
+ */
+ inline void clear() {
+ m_freeList.clear();
+ m_pointerMap.clear();
+ }
+
+ /* add_pointer.
+ * Adds an existing pointer to the map and returns the virtual pointer id.
+ */
+ inline virtual_pointer_t add_pointer(const buffer_t &b) {
+ return add_pointer_impl(b);
+ }
+
+ /* add_pointer.
+ * Adds a pointer to the map and returns the virtual pointer id.
+ */
+ inline virtual_pointer_t add_pointer(buffer_t &&b) {
+ return add_pointer_impl(b);
+ }
+
+ /**
+ * @brief Fuses the given node with the previous nodes in the
+ * pointer map if they are free
+ *
+ * @param node A reference to the free node to be fused
+ */
+ void fuse_forward(typename pointerMap_t::iterator &node) {
+ while (node != std::prev(m_pointerMap.end())) {
+ // if following node is free
+ // remove it and extend the current node with its size
+ auto fwd_node = std::next(node);
+ if (!fwd_node->second.m_free) {
+ break;
+ }
+ auto fwd_size = fwd_node->second.m_size;
+ m_freeList.erase(fwd_node);
+ m_pointerMap.erase(fwd_node);
+
+ node->second.m_size += fwd_size;
+ }
+ }
+
+ /**
+ * @brief Fuses the given node with the following nodes in the
+ * pointer map if they are free
+ *
+ * @param node A reference to the free node to be fused
+ */
+ void fuse_backward(typename pointerMap_t::iterator &node) {
+ while (node != m_pointerMap.begin()) {
+ // if previous node is free, extend it
+ // with the size of the current one
+ auto prev_node = std::prev(node);
+ if (!prev_node->second.m_free) {
+ break;
+ }
+ prev_node->second.m_size += node->second.m_size;
+
+ // remove the current node
+ m_freeList.erase(node);
+ m_pointerMap.erase(node);
+
+ // point to the previous node
+ node = prev_node;
+ }
+ }
+
+ /* remove_pointer.
+ * Removes the given pointer from the map.
+ * The pointer is allowed to be reused only if ReUse if true.
+ */
+ template <bool ReUse = true>
+ void remove_pointer(const virtual_pointer_t ptr) {
+ if (is_nullptr(ptr)) {
+ return;
+ }
+ auto node = this->get_node(ptr);
+
+ node->second.m_free = true;
+ m_freeList.emplace(node);
+
+ // Fuse the node
+ // with free nodes before and after it
+ fuse_forward(node);
+ fuse_backward(node);
+
+ // If after fusing the node is the last one
+ // simply remove it (since it is free)
+ if (node == std::prev(m_pointerMap.end())) {
+ m_freeList.erase(node);
+ m_pointerMap.erase(node);
+ }
+ }
+
+ /* count.
+ * Return the number of active pointers (i.e, pointers that
+ * have been malloc but not freed).
+ */
+ size_t count() const { return (m_pointerMap.size() - m_freeList.size()); }
+
+ private:
+ /* add_pointer_impl.
+ * Adds a pointer to the map and returns the virtual pointer id.
+ * BufferT is either a const buffer_t& or a buffer_t&&.
+ */
+ template <class BufferT>
+ virtual_pointer_t add_pointer_impl(BufferT b) {
+ virtual_pointer_t retVal = nullptr;
+ size_t bufSize = b.get_count();
+ pMapNode_t p{b, bufSize, false};
+ // If this is the first pointer:
+ if (m_pointerMap.empty()) {
+ virtual_pointer_t initialVal{m_baseAddress};
+ m_pointerMap.emplace(initialVal, p);
+ return initialVal;
+ }
+
+ auto lastElemIter = get_insertion_point(bufSize);
+ // We are recovering an existing free node
+ if (lastElemIter->second.m_free) {
+ lastElemIter->second.m_buffer = b;
+ lastElemIter->second.m_free = false;
+
+ // If the recovered node is bigger than the inserted one
+ // add a new free node with the remaining space
+ if (lastElemIter->second.m_size > bufSize) {
+ // create a new node with the remaining space
+ auto remainingSize = lastElemIter->second.m_size - bufSize;
+ pMapNode_t p2{b, remainingSize, true};
+
+ // update size of the current node
+ lastElemIter->second.m_size = bufSize;
+
+ // add the new free node
+ auto newFreePtr = lastElemIter->first + bufSize;
+ auto freeNode = m_pointerMap.emplace(newFreePtr, p2).first;
+ m_freeList.emplace(freeNode);
+ }
+
+ retVal = lastElemIter->first;
+ } else {
+ size_t lastSize = lastElemIter->second.m_size;
+ retVal = lastElemIter->first + lastSize;
+ m_pointerMap.emplace(retVal, p);
+ }
+ return retVal;
+ }
+
+ /**
+ * Compare two iterators to pointer map entries according to
+ * the size of the allocation on the device.
+ */
+ struct SortBySize {
+ bool operator()(typename pointerMap_t::iterator a,
+ typename pointerMap_t::iterator b) const {
+ return ((a->first < b->first) && (a->second <= b->second)) ||
+ ((a->first < b->first) && (b->second <= a->second));
+ }
+ };
+
+ /* Maps the pointer addresses to buffer and size pairs.
+ */
+ pointerMap_t m_pointerMap;
+
+ /* List of free nodes available for re-using
+ */
+ std::set<typename pointerMap_t::iterator, SortBySize> m_freeList;
+
+ /* Base address used when issuing the first virtual pointer, allows users
+ * to specify alignment. Cannot be zero. */
+ std::intptr_t m_baseAddress;
+};
+
+/* remove_pointer.
+ * Removes the given pointer from the map.
+ * The pointer is allowed to be reused only if ReUse if true.
+ */
+template <>
+inline void PointerMapper::remove_pointer<false>(const virtual_pointer_t ptr) {
+ if (is_nullptr(ptr)) {
+ return;
+ }
+ m_pointerMap.erase(this->get_node(ptr));
+}
+
+/**
+ * Malloc-like interface to the pointer-mapper.
+ * Given a size, creates a byte-typed buffer and returns a
+ * fake pointer to keep track of it.
+ * \param size Size in bytes of the desired allocation
+ * \throw cl::sycl::exception if error while creating the buffer
+ */
+inline void *SYCLmalloc(size_t size, PointerMapper &pMap) {
+ if (size == 0) {
+ return nullptr;
+ }
+ // Create a generic buffer of the given size
+ using buffer_t = cl::sycl::buffer<buffer_data_type_t, 1>;
+ auto thePointer = pMap.add_pointer(buffer_t(cl::sycl::range<1>{size}));
+ // Store the buffer on the global list
+ return static_cast<void *>(thePointer);
+}
+
+/**
+ * Free-like interface to the pointer mapper.
+ * Given a fake-pointer created with the virtual-pointer malloc,
+ * destroys the buffer and remove it from the list.
+ * If ReUse is false, the pointer is not added to the freeList,
+ * it should be false only for sub-buffers.
+ */
+template <bool ReUse = true, typename PointerMapper>
+inline void SYCLfree(void *ptr, PointerMapper &pMap) {
+ pMap.template remove_pointer<ReUse>(ptr);
+}
+
+/**
+ * Clear all the memory allocated by SYCL.
+ */
+template <typename PointerMapper>
+inline void SYCLfreeAll(PointerMapper &pMap) {
+ pMap.clear();
+}
+
+template <cl::sycl::access::mode AcMd, typename T>
+struct RangeAccess {
+ static const auto global_access = cl::sycl::access::target::global_buffer;
+ static const auto is_place_holder = cl::sycl::access::placeholder::true_t;
+ typedef T scalar_t;
+ typedef scalar_t &ref_t;
+ typedef typename cl::sycl::global_ptr<scalar_t>::pointer_t ptr_t;
+
+ // the accessor type does not necessarily the same as T
+ typedef cl::sycl::accessor<scalar_t, 1, AcMd, global_access, is_place_holder>
+ accessor;
+
+ typedef RangeAccess<AcMd, T> self_t;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RangeAccess(accessor access,
+ size_t offset,
+ std::intptr_t virtual_ptr)
+ : access_(access), offset_(offset), virtual_ptr_(virtual_ptr) {}
+
+ RangeAccess(cl::sycl::buffer<scalar_t, 1> buff =
+ cl::sycl::buffer<scalar_t, 1>(cl::sycl::range<1>(1)))
+ : access_{accessor{buff}}, offset_(0), virtual_ptr_(-1) {}
+
+ // This should be only used for null constructor on the host side
+ RangeAccess(std::nullptr_t) : RangeAccess() {}
+ // This template parameter must be removed and scalar_t should be replaced
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptr_t get_pointer() const {
+ return (access_.get_pointer().get() + offset_);
+ }
+ template <typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t &operator+=(Index offset) {
+ offset_ += (offset);
+ return *this;
+ }
+ template <typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t operator+(Index offset) const {
+ return self_t(access_, offset_ + offset, virtual_ptr_);
+ }
+ template <typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t operator-(Index offset) const {
+ return self_t(access_, offset_ - offset, virtual_ptr_);
+ }
+ template <typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t &operator-=(Index offset) {
+ offset_ -= offset;
+ return *this;
+ }
+
+ // THIS IS FOR NULL COMPARISON ONLY
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend bool operator==(
+ const RangeAccess &lhs, std::nullptr_t) {
+ return ((lhs.virtual_ptr_ == -1));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend bool operator!=(
+ const RangeAccess &lhs, std::nullptr_t i) {
+ return !(lhs == i);
+ }
+
+ // THIS IS FOR NULL COMPARISON ONLY
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend bool operator==(
+ std::nullptr_t, const RangeAccess &rhs) {
+ return ((rhs.virtual_ptr_ == -1));
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend bool operator!=(
+ std::nullptr_t i, const RangeAccess &rhs) {
+ return !(i == rhs);
+ }
+ // Prefix operator (Increment and return value)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t &operator++() {
+ offset_++;
+ return (*this);
+ }
+
+ // Postfix operator (Return value and increment)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE self_t operator++(int i) {
+ EIGEN_UNUSED_VARIABLE(i);
+ self_t temp_iterator(*this);
+ offset_++;
+ return temp_iterator;
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t get_size() const {
+ return (access_.get_count() - offset_);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t get_offset() const {
+ return offset_;
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_offset(std::ptrdiff_t offset) {
+ offset_ = offset;
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ref_t operator*() const {
+ return *get_pointer();
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ref_t operator*() {
+ return *get_pointer();
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptr_t operator->() = delete;
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ref_t operator[](int x) {
+ return *(get_pointer() + x);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ref_t operator[](int x) const {
+ return *(get_pointer() + x);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_t *get_virtual_pointer() const {
+ return reinterpret_cast<scalar_t *>(virtual_ptr_ +
+ (offset_ * sizeof(scalar_t)));
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit operator bool() const {
+ return (virtual_ptr_ != -1);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator RangeAccess<AcMd, const T>() {
+ return RangeAccess<AcMd, const T>(access_, offset_, virtual_ptr_);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ operator RangeAccess<AcMd, const T>() const {
+ return RangeAccess<AcMd, const T>(access_, offset_, virtual_ptr_);
+ }
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(
+ cl::sycl::handler &cgh) const {
+ cgh.require(access_);
+ }
+
+ private:
+ accessor access_;
+ size_t offset_;
+ std::intptr_t virtual_ptr_; // the location of the buffer in the map
+};
+
+template <cl::sycl::access::mode AcMd, typename T>
+struct RangeAccess<AcMd, const T> : RangeAccess<AcMd, T> {
+ typedef RangeAccess<AcMd, T> Base;
+ using Base::Base;
+};
+
+} // namespace internal
+} // namespace TensorSycl
+} // namespace Eigen
+
+#endif // EIGEN_CXX11_TENSOR_TENSOR_SYCL_STORAGE_MEMORY_H
diff --git a/Eigen/src/Core/arch/SYCL/TypeCasting.h b/Eigen/src/Core/arch/SYCL/TypeCasting.h
new file mode 100644
index 000000000..9208ab21d
--- /dev/null
+++ b/Eigen/src/Core/arch/SYCL/TypeCasting.h
@@ -0,0 +1,85 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TypeCasting.h
+ *
+ * \brief:
+ * TypeCasting
+ *
+ *****************************************************************/
+
+#ifndef EIGEN_TYPE_CASTING_SYCL_H
+#define EIGEN_TYPE_CASTING_SYCL_H
+
+namespace Eigen {
+
+namespace internal {
+#ifdef SYCL_DEVICE_ONLY
+template <>
+struct type_casting_traits<float, int> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4
+pcast<cl::sycl::cl_float4, cl::sycl::cl_int4>(const cl::sycl::cl_float4& a) {
+ return a
+ .template convert<cl::sycl::cl_int, cl::sycl::rounding_mode::automatic>();
+}
+
+template <>
+struct type_casting_traits<int, float> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
+};
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
+pcast<cl::sycl::cl_int4, cl::sycl::cl_float4>(const cl::sycl::cl_int4& a) {
+ return a.template convert<cl::sycl::cl_float,
+ cl::sycl::rounding_mode::automatic>();
+}
+
+template <>
+struct type_casting_traits<double, float> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
+};
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
+pcast<cl::sycl::cl_double2, cl::sycl::cl_float4>(
+ const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) {
+ auto a1 = a.template convert<cl::sycl::cl_float,
+ cl::sycl::rounding_mode::automatic>();
+ auto b1 = b.template convert<cl::sycl::cl_float,
+ cl::sycl::rounding_mode::automatic>();
+ return cl::sycl::float4(a1.x(), a1.y(), b1.x(), b1.y());
+}
+
+template <>
+struct type_casting_traits<float, double> {
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
+};
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
+pcast<cl::sycl::cl_float4, cl::sycl::cl_double2>(const cl::sycl::cl_float4& a) {
+ // Simply discard the second half of the input
+ return cl::sycl::cl_double2(a.x(), a.y());
+}
+
+#endif
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TYPE_CASTING_SYCL_H