aboutsummaryrefslogtreecommitdiff
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h90
1 files changed, 86 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index 52b803d7f..246ebe44e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -12,7 +12,7 @@
namespace Eigen {
-// MakePointer class is used as a container of the adress space of the pointer
+// MakePointer class is used as a container of the address space of the pointer
// on the host and on the device. From the host side it generates the T* pointer
// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to
// T* m_data on the host. It is always called on the device.
@@ -20,8 +20,35 @@ namespace Eigen {
// map_allocator.
template<typename T> struct MakePointer {
typedef T* Type;
+ typedef const T* ConstType;
};
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* constCast(const T* data) {
+ return const_cast<T*>(data);
+}
+
+// The StorageMemory class is a container of the device specific pointer
+// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
+// is a device-agnostic type and need MakePointer class for type conversion,
+// the TensorEvaluator class can be specialized for a device, hence it is possible
+// to construct different types of temproray storage memory in TensorEvaluator
+// for different devices by specializing the following StorageMemory class.
+template<typename T, typename device> struct StorageMemory: MakePointer <T> {};
+
+namespace internal{
+template<typename A, typename B> struct Pointer_type_promotion {
+ static const bool val=false;
+};
+template<typename A> struct Pointer_type_promotion<A, A> {
+ static const bool val = true;
+};
+template<typename A, typename B> struct TypeConversion {
+ typedef A* type;
+};
+}
+
+
template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap;
template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor;
template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize;
@@ -37,7 +64,7 @@ template<typename Op, typename Dims, typename XprType, template <class> class Ma
template<typename XprType> class TensorIndexTupleOp;
template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp;
template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp;
-template<typename Dimensions, typename LeftXprType, typename RightXprType> class TensorContractionOp;
+template<typename Dimensions, typename LeftXprType, typename RightXprType, typename OutputKernelType> class TensorContractionOp;
template<typename TargetType, typename XprType> class TensorConversionOp;
template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp;
template<typename FFT, typename XprType, int FFTDataType, int FFTDirection> class TensorFFTOp;
@@ -58,21 +85,50 @@ template<typename Strides, typename XprType> class TensorInflationOp;
template<typename Generator, typename XprType> class TensorGeneratorOp;
template<typename LeftXprType, typename RightXprType> class TensorAssignOp;
template<typename Op, typename XprType> class TensorScanOp;
+template<typename Dims, typename XprType> class TensorTraceOp;
template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp;
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp;
template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp;
-template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorForcedEvalOp;
+template<typename XprType> class TensorForcedEvalOp;
template<typename ExpressionType, typename DeviceType> class TensorDevice;
+template<typename ExpressionType, typename DeviceType, typename DoneCallback> class TensorAsyncDevice;
template<typename Derived, typename Device> struct TensorEvaluator;
+struct NoOpOutputKernel;
+
struct DefaultDevice;
struct ThreadPoolDevice;
struct GpuDevice;
struct SyclDevice;
+#ifdef EIGEN_USE_SYCL
+
+template <typename T> struct MakeSYCLPointer {
+ typedef Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T> Type;
+};
+
+template <typename T>
+EIGEN_STRONG_INLINE const Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>&
+constCast(const Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>& data) {
+ return data;
+}
+
+template <typename T>
+struct StorageMemory<T, SyclDevice> : MakeSYCLPointer<T> {};
+template <typename T>
+struct StorageMemory<T, const SyclDevice> : StorageMemory<T, SyclDevice> {};
+
+namespace TensorSycl {
+namespace internal{
+template <typename Evaluator, typename Op> class GenericNondeterministicReducer;
+}
+}
+#endif
+
+
enum FFTResultType {
RealPart = 0,
ImagPart = 1,
@@ -98,10 +154,36 @@ struct IsVectorizable<GpuDevice, Expression> {
TensorEvaluator<Expression, GpuDevice>::IsAligned;
};
+// Tiled evaluation strategy.
+enum TiledEvaluation {
+ Off = 0, // tiled evaluation is not supported
+ On = 1, // still work in progress (see TensorBlock.h)
+};
+
+template <typename Device, typename Expression>
+struct IsTileable {
+ // Check that block evaluation is supported and it's a preferred option (at
+ // least one sub-expression has much faster block evaluation, e.g.
+ // broadcasting).
+ static const bool BlockAccess =
+ TensorEvaluator<Expression, Device>::BlockAccess &&
+ TensorEvaluator<Expression, Device>::PreferBlockAccess;
+
+ static const TiledEvaluation value =
+ BlockAccess ? TiledEvaluation::On : TiledEvaluation::Off;
+};
+
template <typename Expression, typename Device,
- bool Vectorizable = IsVectorizable<Device, Expression>::value>
+ bool Vectorizable = IsVectorizable<Device, Expression>::value,
+ TiledEvaluation Tiling = IsTileable<Device, Expression>::value>
class TensorExecutor;
+template <typename Expression, typename Device, typename DoneCallback,
+ bool Vectorizable = IsVectorizable<Device, Expression>::value,
+ TiledEvaluation Tiling = IsTileable<Device, Expression>::value>
+class TensorAsyncExecutor;
+
+
} // end namespace internal
} // end namespace Eigen