diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h | 269 |
1 files changed, 223 insertions, 46 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index 860a6949a..09d2da9a8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -32,6 +32,7 @@ struct traits<TensorConversionOp<TargetType, XprType> > static const int NumDimensions = traits<XprType>::NumDimensions; static const int Layout = traits<XprType>::Layout; enum { Flags = 0 }; + typedef typename TypeConversion<Scalar, typename traits<XprType>::PointerType>::type PointerType; }; template<typename TargetType, typename XprType> @@ -50,7 +51,10 @@ struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorCo template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio> -struct PacketConverter { +struct PacketConverter; + +template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> +struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 1> { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {} @@ -108,7 +112,33 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> { }; template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> -struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> { +struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 8, 1> { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template<int LoadMode, typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; + + SrcPacket src1 = m_impl.template packet<LoadMode>(index); + SrcPacket src2 = m_impl.template packet<LoadMode>(index + 1 * SrcPacketSize); + SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize); + SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize); + SrcPacket src5 = m_impl.template packet<LoadMode>(index + 4 * SrcPacketSize); + SrcPacket src6 = m_impl.template packet<LoadMode>(index + 5 * SrcPacketSize); + SrcPacket src7 = m_impl.template packet<LoadMode>(index + 6 * SrcPacketSize); + SrcPacket src8 = m_impl.template packet<LoadMode>(index + 7 * SrcPacketSize); + TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4, src5, src6, src7, src8); + return result; + } + + private: + const TensorEvaluator& m_impl; +}; + +template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int TgtCoeffRatio> +struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, TgtCoeffRatio> { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} @@ -128,6 +158,7 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> { typedef typename internal::unpacket_traits<TgtPacket>::type TgtType; internal::scalar_cast_op<SrcType, TgtType> converter; EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize]; + EIGEN_UNROLL_LOOP for (int i = 0; i < TgtPacketSize; ++i) { values[i] = converter(m_impl.coeff(index+i)); } @@ -163,19 +194,114 @@ class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprT typename XprType::Nested m_xpr; }; -template <bool SameType, typename Eval, typename Scalar> struct ConversionSubExprEval { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) { +template <bool SameType, typename Eval, typename EvalPointerType> struct ConversionSubExprEval { + static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType) { impl.evalSubExprsIfNeeded(NULL); return true; } }; -template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eval, Scalar> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) { +template <typename Eval, typename EvalPointerType> struct ConversionSubExprEval<true, Eval, EvalPointerType> { + static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType data) { return impl.evalSubExprsIfNeeded(data); } }; +#ifdef EIGEN_USE_THREADS +template <bool SameType, typename Eval, typename EvalPointerType, + typename EvalSubExprsCallback> +struct ConversionSubExprEvalAsync { + static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType, EvalSubExprsCallback done) { + impl.evalSubExprsIfNeededAsync(nullptr, std::move(done)); + } +}; + +template <typename Eval, typename EvalPointerType, + typename EvalSubExprsCallback> +struct ConversionSubExprEvalAsync<true, Eval, EvalPointerType, + EvalSubExprsCallback> { + static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType data, EvalSubExprsCallback done) { + impl.evalSubExprsIfNeededAsync(data, std::move(done)); + } +}; +#endif + +namespace internal { + +template <typename SrcType, typename TargetType, bool IsSameT> +struct CoeffConv { + template <typename ArgType, typename Device> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator<ArgType, Device>& impl, Index index) { + internal::scalar_cast_op<SrcType, TargetType> converter; + return converter(impl.coeff(index)); + } +}; + +template <typename SrcType, typename TargetType> +struct CoeffConv<SrcType, TargetType, true> { + template <typename ArgType, typename Device> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator<ArgType, Device>& impl, Index index) { + return impl.coeff(index); + } +}; + +template <typename SrcPacket, typename TargetPacket, int LoadMode, bool ActuallyVectorize, bool IsSameT> +struct PacketConv { + typedef typename internal::unpacket_traits<SrcPacket>::type SrcType; + typedef typename internal::unpacket_traits<TargetPacket>::type TargetType; + + static const int PacketSize = internal::unpacket_traits<TargetPacket>::size; + + template <typename ArgType, typename Device> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) { + internal::scalar_cast_op<SrcType, TargetType> converter; + EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize]; + EIGEN_UNROLL_LOOP + for (int i = 0; i < PacketSize; ++i) { + values[i] = converter(impl.coeff(index+i)); + } + TargetPacket rslt = internal::pload<TargetPacket>(values); + return rslt; + } +}; + +template <typename SrcPacket, typename TargetPacket, int LoadMode, bool IsSameT> +struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> { + typedef typename internal::unpacket_traits<SrcPacket>::type SrcType; + typedef typename internal::unpacket_traits<TargetPacket>::type TargetType; + + template <typename ArgType, typename Device> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) { + const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; + const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; + PacketConverter<TensorEvaluator<ArgType, Device>, SrcPacket, TargetPacket, + SrcCoeffRatio, TgtCoeffRatio> converter(impl); + return converter.template packet<LoadMode>(index); + } +}; + +template <typename SrcPacket, typename TargetPacket, int LoadMode> +struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/false, /*IsSameT=*/true> { + typedef typename internal::unpacket_traits<TargetPacket>::type TargetType; + static const int PacketSize = internal::unpacket_traits<TargetPacket>::size; + + template <typename ArgType, typename Device> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) { + EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i); + return internal::pload<TargetPacket>(values); + } +}; + +template <typename SrcPacket, typename TargetPacket, int LoadMode> +struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/true, /*IsSameT=*/true> { + template <typename ArgType, typename Device> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) { + return impl.template packet<LoadMode>(index); + } +}; + +} // namespace internal // Eval as rvalue template<typename TargetType, typename ArgType, typename Device> @@ -189,44 +315,98 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<SrcType, Device>::type PacketSourceType; - static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; + static const int PacketSize = PacketType<CoeffReturnType, Device>::size; + static const bool IsSameType = internal::is_same<TargetType, SrcType>::value; + typedef StorageMemory<CoeffReturnType, Device> Storage; + typedef typename Storage::Type EvaluatorPointerType; enum { - IsAligned = false, - PacketAccess = true, - Layout = TensorEvaluator<ArgType, Device>::Layout, - RawAccess = false + IsAligned = false, + PacketAccess = + #ifndef EIGEN_USE_SYCL + true, + #else + TensorEvaluator<ArgType, Device>::PacketAccess & + internal::type_casting_traits<SrcType, TargetType>::VectorizedCast, + #endif + BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, + PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess, + Layout = TensorEvaluator<ArgType, Device>::Layout, + RawAccess = false + }; + + static const int NumDims = internal::array_size<Dimensions>::value; + + //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// + typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; + typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; + + typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock + ArgTensorBlock; + + struct TensorConversionOpBlockFactory { + template <typename ArgXprType> + struct XprType { + typedef TensorConversionOp<TargetType, const ArgXprType> type; + }; + + template <typename ArgXprType> + typename XprType<ArgXprType>::type expr(const ArgXprType& expr) const { + return typename XprType<ArgXprType>::type(expr); + } }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + typedef internal::TensorUnaryExprBlock<TensorConversionOpBlockFactory, + ArgTensorBlock> + TensorBlock; + //===--------------------------------------------------------------------===// + + EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { - return ConversionSubExprEval<internal::is_same<TargetType, SrcType>::value, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data); + return ConversionSubExprEval<IsSameType, TensorEvaluator<ArgType, Device>, EvaluatorPointerType>::run(m_impl, data); + } + +#ifdef EIGEN_USE_THREADS + template <typename EvalSubExprsCallback> + EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( + EvaluatorPointerType data, EvalSubExprsCallback done) { + ConversionSubExprEvalAsync<IsSameType, TensorEvaluator<ArgType, Device>, + EvaluatorPointerType, + EvalSubExprsCallback>::run(m_impl, data, std::move(done)); } +#endif - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() + EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - internal::scalar_cast_op<SrcType, TargetType> converter; - return converter(m_impl.coeff(index)); + return internal::CoeffConv<SrcType, TargetType, IsSameType>::run(m_impl,index); } template<int LoadMode> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess & - internal::type_casting_traits<SrcType, TargetType>::VectorizedCast; - return PacketConv<LoadMode, Vectorizable>::run(m_impl, index); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType + packet(Index index) const { + // If we are not going to do the cast, we just need to check that base + // TensorEvaluator has packet access. Otherwise we also need to make sure, + // that we have an implementation of vectorized cast. + const bool Vectorizable = + IsSameType + ? TensorEvaluator<ArgType, Device>::PacketAccess + : int(TensorEvaluator<ArgType, Device>::PacketAccess) & + int(internal::type_casting_traits<SrcType, TargetType>::VectorizedCast); + + return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode, + Vectorizable, IsSameType>::run(m_impl, index); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost @@ -244,33 +424,30 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> } } - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + internal::TensorBlockResourceRequirements getResourceRequirements() const { + return m_impl.getResourceRequirements(); + } - protected: - template <int LoadMode, bool ActuallyVectorize> - struct PacketConv { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) { - internal::scalar_cast_op<SrcType, TargetType> converter; - EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; - for (int i = 0; i < PacketSize; ++i) { - values[i] = converter(impl.coeff(index+i)); - } - PacketReturnType rslt = internal::pload<PacketReturnType>(values); - return rslt; - } - }; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + block(TensorBlockDesc& desc, TensorBlockScratch& scratch, + bool /*root_of_expr_ast*/ = false) const { + return TensorBlock(m_impl.block(desc, scratch), + TensorConversionOpBlockFactory()); + } - template <int LoadMode> - struct PacketConv<LoadMode, true> { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) { - const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; - const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; - PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType, - SrcCoeffRatio, TgtCoeffRatio> converter(impl); - return converter.template packet<LoadMode>(index); - } - }; + EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } + + /// required by sycl in order to extract the sycl accessor + const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } +#ifdef EIGEN_USE_SYCL + // binding placeholder accessors to a command group handler for SYCL + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { + m_impl.bind(cgh); + } +#endif + protected: TensorEvaluator<ArgType, Device> m_impl; }; |