1 files changed, 86 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index 52b803d7f..246ebe44e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -12,7 +12,7 @@
 
 namespace Eigen {
 
-// MakePointer class is used as a container of the adress space of the pointer
+// MakePointer class is used as a container of the address space of the pointer
 // on the host and on the device. From the host side it generates the T* pointer
 // and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to
 // T* m_data on the host. It is always called on the device.
@@ -20,8 +20,35 @@ namespace Eigen {
 // map_allocator.
 template<typename T> struct MakePointer {
   typedef T* Type;
+  typedef const T* ConstType;
 };
 
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* constCast(const T* data) {
+  return const_cast<T*>(data);
+}
+
+// The StorageMemory class is a container of the device specific pointer
+// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
+// is a device-agnostic type and need MakePointer class for type conversion,
+// the TensorEvaluator class can be specialized for a device, hence it is possible
+// to construct different types of temproray storage memory in TensorEvaluator
+// for different devices by specializing the following StorageMemory class.
+template<typename T, typename device> struct StorageMemory: MakePointer <T> {};
+
+namespace internal{
+template<typename A, typename B> struct Pointer_type_promotion {
+  static const bool val=false;
+};
+template<typename A> struct Pointer_type_promotion<A, A> {
+  static const bool val = true;
+};
+template<typename A, typename B> struct TypeConversion {
+  typedef A* type;
+};
+}
+
+
 template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap;
 template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor;
 template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize;
@@ -37,7 +64,7 @@ template<typename Op, typename Dims, typename XprType, template <class> class Ma
 template<typename XprType> class TensorIndexTupleOp;
 template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp;
 template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp;
-template<typename Dimensions, typename LeftXprType, typename RightXprType> class TensorContractionOp;
+template<typename Dimensions, typename LeftXprType, typename RightXprType, typename OutputKernelType> class TensorContractionOp;
 template<typename TargetType, typename XprType> class TensorConversionOp;
 template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp;
 template<typename FFT, typename XprType, int FFTDataType, int FFTDirection> class TensorFFTOp;
@@ -58,21 +85,50 @@ template<typename Strides, typename XprType> class TensorInflationOp;
 template<typename Generator, typename XprType> class TensorGeneratorOp;
 template<typename LeftXprType, typename RightXprType> class TensorAssignOp;
 template<typename Op, typename XprType> class TensorScanOp;
+template<typename Dims, typename XprType> class TensorTraceOp;
 
 template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp;
 template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp;
 
 template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp;
-template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorForcedEvalOp;
+template<typename XprType> class TensorForcedEvalOp;
 
 template<typename ExpressionType, typename DeviceType> class TensorDevice;
+template<typename ExpressionType, typename DeviceType, typename DoneCallback> class TensorAsyncDevice;
 template<typename Derived, typename Device> struct TensorEvaluator;
 
+struct NoOpOutputKernel;
+
 struct DefaultDevice;
 struct ThreadPoolDevice;
 struct GpuDevice;
 struct SyclDevice;
 
+#ifdef EIGEN_USE_SYCL
+
+template <typename T> struct MakeSYCLPointer {
+  typedef Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T> Type;
+};
+
+template <typename T>
+EIGEN_STRONG_INLINE const Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>&
+constCast(const Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>& data) {
+  return data;
+}
+
+template <typename T>
+struct StorageMemory<T, SyclDevice> : MakeSYCLPointer<T> {};
+template <typename T>
+struct StorageMemory<T, const SyclDevice> : StorageMemory<T, SyclDevice> {};
+
+namespace TensorSycl {
+namespace internal{
+template <typename Evaluator, typename Op> class GenericNondeterministicReducer;
+}
+}
+#endif
+
+
 enum FFTResultType {
   RealPart = 0,
   ImagPart = 1,
@@ -98,10 +154,36 @@ struct IsVectorizable<GpuDevice, Expression> {
                             TensorEvaluator<Expression, GpuDevice>::IsAligned;
 };
 
+// Tiled evaluation strategy.
+enum TiledEvaluation {
+  Off = 0,    // tiled evaluation is not supported
+  On = 1,     // still work in progress (see TensorBlock.h)
+};
+
+template <typename Device, typename Expression>
+struct IsTileable {
+  // Check that block evaluation is supported and it's a preferred option (at
+  // least one sub-expression has much faster block evaluation, e.g.
+  // broadcasting).
+  static const bool BlockAccess =
+      TensorEvaluator<Expression, Device>::BlockAccess &&
+      TensorEvaluator<Expression, Device>::PreferBlockAccess;
+
+  static const TiledEvaluation value =
+      BlockAccess ? TiledEvaluation::On : TiledEvaluation::Off;
+};
+
 template <typename Expression, typename Device,
-          bool Vectorizable = IsVectorizable<Device, Expression>::value>
+          bool Vectorizable      = IsVectorizable<Device, Expression>::value,
+          TiledEvaluation Tiling = IsTileable<Device, Expression>::value>
 class TensorExecutor;
 
+template <typename Expression, typename Device, typename DoneCallback,
+          bool Vectorizable = IsVectorizable<Device, Expression>::value,
+          TiledEvaluation Tiling = IsTileable<Device, Expression>::value>
+class TensorAsyncExecutor;
+
+
 }  // end namespace internal
 
 }  // end namespace Eigen