diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h | 90 |
1 files changed, 86 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 52b803d7f..246ebe44e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -12,7 +12,7 @@ namespace Eigen { -// MakePointer class is used as a container of the adress space of the pointer +// MakePointer class is used as a container of the address space of the pointer // on the host and on the device. From the host side it generates the T* pointer // and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to // T* m_data on the host. It is always called on the device. @@ -20,8 +20,35 @@ namespace Eigen { // map_allocator. template<typename T> struct MakePointer { typedef T* Type; + typedef const T* ConstType; }; +template <typename T> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* constCast(const T* data) { + return const_cast<T*>(data); +} + +// The StorageMemory class is a container of the device specific pointer +// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression +// is a device-agnostic type and need MakePointer class for type conversion, +// the TensorEvaluator class can be specialized for a device, hence it is possible +// to construct different types of temproray storage memory in TensorEvaluator +// for different devices by specializing the following StorageMemory class. +template<typename T, typename device> struct StorageMemory: MakePointer <T> {}; + +namespace internal{ +template<typename A, typename B> struct Pointer_type_promotion { + static const bool val=false; +}; +template<typename A> struct Pointer_type_promotion<A, A> { + static const bool val = true; +}; +template<typename A, typename B> struct TypeConversion { + typedef A* type; +}; +} + + template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap; template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor; template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize; @@ -37,7 +64,7 @@ template<typename Op, typename Dims, typename XprType, template <class> class Ma template<typename XprType> class TensorIndexTupleOp; template<typename ReduceOp, typename Dims, typename XprType> class TensorTupleReducerOp; template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp; -template<typename Dimensions, typename LeftXprType, typename RightXprType> class TensorContractionOp; +template<typename Dimensions, typename LeftXprType, typename RightXprType, typename OutputKernelType> class TensorContractionOp; template<typename TargetType, typename XprType> class TensorConversionOp; template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp; template<typename FFT, typename XprType, int FFTDataType, int FFTDirection> class TensorFFTOp; @@ -58,21 +85,50 @@ template<typename Strides, typename XprType> class TensorInflationOp; template<typename Generator, typename XprType> class TensorGeneratorOp; template<typename LeftXprType, typename RightXprType> class TensorAssignOp; template<typename Op, typename XprType> class TensorScanOp; +template<typename Dims, typename XprType> class TensorTraceOp; template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp; template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp; template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorEvalToOp; -template<typename XprType, template <class> class MakePointer_ = MakePointer> class TensorForcedEvalOp; +template<typename XprType> class TensorForcedEvalOp; template<typename ExpressionType, typename DeviceType> class TensorDevice; +template<typename ExpressionType, typename DeviceType, typename DoneCallback> class TensorAsyncDevice; template<typename Derived, typename Device> struct TensorEvaluator; +struct NoOpOutputKernel; + struct DefaultDevice; struct ThreadPoolDevice; struct GpuDevice; struct SyclDevice; +#ifdef EIGEN_USE_SYCL + +template <typename T> struct MakeSYCLPointer { + typedef Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T> Type; +}; + +template <typename T> +EIGEN_STRONG_INLINE const Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>& +constCast(const Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>& data) { + return data; +} + +template <typename T> +struct StorageMemory<T, SyclDevice> : MakeSYCLPointer<T> {}; +template <typename T> +struct StorageMemory<T, const SyclDevice> : StorageMemory<T, SyclDevice> {}; + +namespace TensorSycl { +namespace internal{ +template <typename Evaluator, typename Op> class GenericNondeterministicReducer; +} +} +#endif + + enum FFTResultType { RealPart = 0, ImagPart = 1, @@ -98,10 +154,36 @@ struct IsVectorizable<GpuDevice, Expression> { TensorEvaluator<Expression, GpuDevice>::IsAligned; }; +// Tiled evaluation strategy. +enum TiledEvaluation { + Off = 0, // tiled evaluation is not supported + On = 1, // still work in progress (see TensorBlock.h) +}; + +template <typename Device, typename Expression> +struct IsTileable { + // Check that block evaluation is supported and it's a preferred option (at + // least one sub-expression has much faster block evaluation, e.g. + // broadcasting). + static const bool BlockAccess = + TensorEvaluator<Expression, Device>::BlockAccess && + TensorEvaluator<Expression, Device>::PreferBlockAccess; + + static const TiledEvaluation value = + BlockAccess ? TiledEvaluation::On : TiledEvaluation::Off; +}; + template <typename Expression, typename Device, - bool Vectorizable = IsVectorizable<Device, Expression>::value> + bool Vectorizable = IsVectorizable<Device, Expression>::value, + TiledEvaluation Tiling = IsTileable<Device, Expression>::value> class TensorExecutor; +template <typename Expression, typename Device, typename DoneCallback, + bool Vectorizable = IsVectorizable<Device, Expression>::value, + TiledEvaluation Tiling = IsTileable<Device, Expression>::value> +class TensorAsyncExecutor; + + } // end namespace internal } // end namespace Eigen |