aboutsummaryrefslogtreecommitdiff
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h269
1 files changed, 223 insertions, 46 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
index 860a6949a..09d2da9a8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
@@ -32,6 +32,7 @@ struct traits<TensorConversionOp<TargetType, XprType> >
static const int NumDimensions = traits<XprType>::NumDimensions;
static const int Layout = traits<XprType>::Layout;
enum { Flags = 0 };
+ typedef typename TypeConversion<Scalar, typename traits<XprType>::PointerType>::type PointerType;
};
template<typename TargetType, typename XprType>
@@ -50,7 +51,10 @@ struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorCo
template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
-struct PacketConverter {
+struct PacketConverter;
+
+template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
+struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 1> {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
PacketConverter(const TensorEvaluator& impl)
: m_impl(impl) {}
@@ -108,7 +112,33 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
};
template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
-struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
+struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 8, 1> {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ PacketConverter(const TensorEvaluator& impl)
+ : m_impl(impl) {}
+
+ template<int LoadMode, typename Index>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
+ const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
+
+ SrcPacket src1 = m_impl.template packet<LoadMode>(index);
+ SrcPacket src2 = m_impl.template packet<LoadMode>(index + 1 * SrcPacketSize);
+ SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
+ SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
+ SrcPacket src5 = m_impl.template packet<LoadMode>(index + 4 * SrcPacketSize);
+ SrcPacket src6 = m_impl.template packet<LoadMode>(index + 5 * SrcPacketSize);
+ SrcPacket src7 = m_impl.template packet<LoadMode>(index + 6 * SrcPacketSize);
+ SrcPacket src8 = m_impl.template packet<LoadMode>(index + 7 * SrcPacketSize);
+ TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4, src5, src6, src7, src8);
+ return result;
+ }
+
+ private:
+ const TensorEvaluator& m_impl;
+};
+
+template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int TgtCoeffRatio>
+struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, TgtCoeffRatio> {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
PacketConverter(const TensorEvaluator& impl)
: m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
@@ -128,6 +158,7 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
internal::scalar_cast_op<SrcType, TgtType> converter;
EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
+ EIGEN_UNROLL_LOOP
for (int i = 0; i < TgtPacketSize; ++i) {
values[i] = converter(m_impl.coeff(index+i));
}
@@ -163,19 +194,114 @@ class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprT
typename XprType::Nested m_xpr;
};
-template <bool SameType, typename Eval, typename Scalar> struct ConversionSubExprEval {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) {
+template <bool SameType, typename Eval, typename EvalPointerType> struct ConversionSubExprEval {
+ static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType) {
impl.evalSubExprsIfNeeded(NULL);
return true;
}
};
-template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eval, Scalar> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) {
+template <typename Eval, typename EvalPointerType> struct ConversionSubExprEval<true, Eval, EvalPointerType> {
+ static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType data) {
return impl.evalSubExprsIfNeeded(data);
}
};
+#ifdef EIGEN_USE_THREADS
+template <bool SameType, typename Eval, typename EvalPointerType,
+ typename EvalSubExprsCallback>
+struct ConversionSubExprEvalAsync {
+ static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType, EvalSubExprsCallback done) {
+ impl.evalSubExprsIfNeededAsync(nullptr, std::move(done));
+ }
+};
+
+template <typename Eval, typename EvalPointerType,
+ typename EvalSubExprsCallback>
+struct ConversionSubExprEvalAsync<true, Eval, EvalPointerType,
+ EvalSubExprsCallback> {
+ static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType data, EvalSubExprsCallback done) {
+ impl.evalSubExprsIfNeededAsync(data, std::move(done));
+ }
+};
+#endif
+
+namespace internal {
+
+template <typename SrcType, typename TargetType, bool IsSameT>
+struct CoeffConv {
+ template <typename ArgType, typename Device>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ internal::scalar_cast_op<SrcType, TargetType> converter;
+ return converter(impl.coeff(index));
+ }
+};
+
+template <typename SrcType, typename TargetType>
+struct CoeffConv<SrcType, TargetType, true> {
+ template <typename ArgType, typename Device>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ return impl.coeff(index);
+ }
+};
+
+template <typename SrcPacket, typename TargetPacket, int LoadMode, bool ActuallyVectorize, bool IsSameT>
+struct PacketConv {
+ typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
+ typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
+
+ static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
+
+ template <typename ArgType, typename Device>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ internal::scalar_cast_op<SrcType, TargetType> converter;
+ EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
+ EIGEN_UNROLL_LOOP
+ for (int i = 0; i < PacketSize; ++i) {
+ values[i] = converter(impl.coeff(index+i));
+ }
+ TargetPacket rslt = internal::pload<TargetPacket>(values);
+ return rslt;
+ }
+};
+
+template <typename SrcPacket, typename TargetPacket, int LoadMode, bool IsSameT>
+struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
+ typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
+ typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
+
+ template <typename ArgType, typename Device>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
+ const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
+ PacketConverter<TensorEvaluator<ArgType, Device>, SrcPacket, TargetPacket,
+ SrcCoeffRatio, TgtCoeffRatio> converter(impl);
+ return converter.template packet<LoadMode>(index);
+ }
+};
+
+template <typename SrcPacket, typename TargetPacket, int LoadMode>
+struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/false, /*IsSameT=*/true> {
+ typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
+ static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
+
+ template <typename ArgType, typename Device>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
+ for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i);
+ return internal::pload<TargetPacket>(values);
+ }
+};
+
+template <typename SrcPacket, typename TargetPacket, int LoadMode>
+struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/true, /*IsSameT=*/true> {
+ template <typename ArgType, typename Device>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ return impl.template packet<LoadMode>(index);
+ }
+};
+
+} // namespace internal
// Eval as rvalue
template<typename TargetType, typename ArgType, typename Device>
@@ -189,44 +315,98 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename PacketType<SrcType, Device>::type PacketSourceType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+ static const bool IsSameType = internal::is_same<TargetType, SrcType>::value;
+ typedef StorageMemory<CoeffReturnType, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
enum {
- IsAligned = false,
- PacketAccess = true,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- RawAccess = false
+ IsAligned = false,
+ PacketAccess =
+ #ifndef EIGEN_USE_SYCL
+ true,
+ #else
+ TensorEvaluator<ArgType, Device>::PacketAccess &
+ internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
+ #endif
+ BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
+ PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
+ Layout = TensorEvaluator<ArgType, Device>::Layout,
+ RawAccess = false
+ };
+
+ static const int NumDims = internal::array_size<Dimensions>::value;
+
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
+ typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
+
+ typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
+ ArgTensorBlock;
+
+ struct TensorConversionOpBlockFactory {
+ template <typename ArgXprType>
+ struct XprType {
+ typedef TensorConversionOp<TargetType, const ArgXprType> type;
+ };
+
+ template <typename ArgXprType>
+ typename XprType<ArgXprType>::type expr(const ArgXprType& expr) const {
+ return typename XprType<ArgXprType>::type(expr);
+ }
};
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ typedef internal::TensorUnaryExprBlock<TensorConversionOpBlockFactory,
+ ArgTensorBlock>
+ TensorBlock;
+ //===--------------------------------------------------------------------===//
+
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device)
{
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data)
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
{
- return ConversionSubExprEval<internal::is_same<TargetType, SrcType>::value, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data);
+ return ConversionSubExprEval<IsSameType, TensorEvaluator<ArgType, Device>, EvaluatorPointerType>::run(m_impl, data);
+ }
+
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType data, EvalSubExprsCallback done) {
+ ConversionSubExprEvalAsync<IsSameType, TensorEvaluator<ArgType, Device>,
+ EvaluatorPointerType,
+ EvalSubExprsCallback>::run(m_impl, data, std::move(done));
}
+#endif
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
+ EIGEN_STRONG_INLINE void cleanup()
{
m_impl.cleanup();
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
{
- internal::scalar_cast_op<SrcType, TargetType> converter;
- return converter(m_impl.coeff(index));
+ return internal::CoeffConv<SrcType, TargetType, IsSameType>::run(m_impl,index);
}
template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess &
- internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
- return PacketConv<LoadMode, Vectorizable>::run(m_impl, index);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType
+ packet(Index index) const {
+ // If we are not going to do the cast, we just need to check that base
+ // TensorEvaluator has packet access. Otherwise we also need to make sure,
+ // that we have an implementation of vectorized cast.
+ const bool Vectorizable =
+ IsSameType
+ ? TensorEvaluator<ArgType, Device>::PacketAccess
+ : int(TensorEvaluator<ArgType, Device>::PacketAccess) &
+ int(internal::type_casting_traits<SrcType, TargetType>::VectorizedCast);
+
+ return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode,
+ Vectorizable, IsSameType>::run(m_impl, index);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
@@ -244,33 +424,30 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
}
}
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ internal::TensorBlockResourceRequirements getResourceRequirements() const {
+ return m_impl.getResourceRequirements();
+ }
- protected:
- template <int LoadMode, bool ActuallyVectorize>
- struct PacketConv {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
- internal::scalar_cast_op<SrcType, TargetType> converter;
- EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
- for (int i = 0; i < PacketSize; ++i) {
- values[i] = converter(impl.coeff(index+i));
- }
- PacketReturnType rslt = internal::pload<PacketReturnType>(values);
- return rslt;
- }
- };
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+ block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
+ bool /*root_of_expr_ast*/ = false) const {
+ return TensorBlock(m_impl.block(desc, scratch),
+ TensorConversionOpBlockFactory());
+ }
- template <int LoadMode>
- struct PacketConv<LoadMode, true> {
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
- const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
- const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
- PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
- SrcCoeffRatio, TgtCoeffRatio> converter(impl);
- return converter.template packet<LoadMode>(index);
- }
- };
+ EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
+
+ /// required by sycl in order to extract the sycl accessor
+ const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_impl.bind(cgh);
+ }
+#endif
+ protected:
TensorEvaluator<ArgType, Device> m_impl;
};