aboutsummaryrefslogtreecommitdiff
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h630
1 files changed, 490 insertions, 140 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 834ce07df..3aff7fa01 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -32,44 +32,72 @@ struct TensorEvaluator
typedef typename Derived::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename Derived::Dimensions Dimensions;
+ typedef Derived XprType;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+ typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
+ typedef StorageMemory<Scalar, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
// NumDimensions is -1 for variable dim tensors
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
internal::traits<Derived>::NumDimensions : 0;
enum {
- IsAligned = Derived::IsAligned,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
- Layout = Derived::Layout,
- CoordAccess = NumCoords > 0,
- RawAccess = true
+ IsAligned = Derived::IsAligned,
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
+ BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
+ PreferBlockAccess = false,
+ Layout = Derived::Layout,
+ CoordAccess = NumCoords > 0,
+ RawAccess = true
};
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
- : m_data(const_cast<typename internal::traits<Derived>::template MakePointer<Scalar>::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m)
+ typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
+ typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
+
+ typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
+ Layout, Index>
+ TensorBlock;
+ //===--------------------------------------------------------------------===//
+
+ EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
+ : m_data(device.get((const_cast<TensorPointerType>(m.data())))),
+ m_dims(m.dimensions()),
+ m_device(device)
{ }
- // Used for accessor extraction in SYCL Managed TensorMap:
- const Derived& derived() const { return m_impl; }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) {
- if (dest) {
- m_device.memcpy((void*)dest, m_data, sizeof(Scalar) * m_dims.TotalSize());
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
+ if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
+ m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
return false;
}
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType dest, EvalSubExprsCallback done) {
+ // TODO(ezhulenev): ThreadPoolDevice memcpy is blockign operation.
+ done(evalSubExprsIfNeeded(dest));
+ }
+#endif // EIGEN_USE_THREADS
+
+ EIGEN_STRONG_INLINE void cleanup() {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
- eigen_assert(m_data);
+ eigen_assert(m_data != NULL);
return m_data[index];
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
- eigen_assert(m_data);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) {
+ eigen_assert(m_data != NULL);
return m_data[index];
}
@@ -79,6 +107,18 @@ struct TensorEvaluator
return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
}
+ // Return a packet starting at `index` where `umask` specifies which elements
+ // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
+ // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
+ // float element will be loaded, otherwise 0 will be loaded.
+ // Function has been templatized to enable Sfinae.
+ template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
+ partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
+ {
+ return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
+ }
+
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketReturnType& x)
{
@@ -86,7 +126,7 @@ struct TensorEvaluator
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
- eigen_assert(m_data);
+ eigen_assert(m_data != NULL);
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
return m_data[m_dims.IndexOfColMajor(coords)];
} else {
@@ -94,8 +134,9 @@ struct TensorEvaluator
}
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array<DenseIndex, NumCoords>& coords) {
- eigen_assert(m_data);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
+ coeffRef(const array<DenseIndex, NumCoords>& coords) {
+ eigen_assert(m_data != NULL);
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
return m_data[m_dims.IndexOfColMajor(coords)];
} else {
@@ -105,19 +146,50 @@ struct TensorEvaluator
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
- internal::unpacket_traits<PacketReturnType>::size);
+ PacketType<CoeffReturnType, Device>::size);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ internal::TensorBlockResourceRequirements getResourceRequirements() const {
+ return internal::TensorBlockResourceRequirements::any();
}
- EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<Scalar>::Type data() const { return m_data; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+ block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
+ bool /*root_of_expr_ast*/ = false) const {
+ assert(m_data != NULL);
+ return TensorBlock::materialize(m_data, m_dims, desc, scratch);
+ }
+
+ template<typename TensorBlock>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
+ const TensorBlockDesc& desc, const TensorBlock& block) {
+ assert(m_data != NULL);
+
+ typedef typename TensorBlock::XprType TensorBlockExpr;
+ typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
+ Index>
+ TensorBlockAssign;
- /// required by sycl in order to construct sycl buffer from raw pointer
- const Device& device() const{return m_device;}
+ TensorBlockAssign::Run(
+ TensorBlockAssign::target(desc.dimensions(),
+ internal::strides<Layout>(m_dims), m_data,
+ desc.offset()),
+ block.expr());
+ }
+
+ EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_data.bind(cgh);
+ }
+#endif
protected:
- typename internal::traits<Derived>::template MakePointer<Scalar>::Type m_data;
+ EvaluatorPointerType m_data;
Dimensions m_dims;
- const Device& m_device;
- const Derived& m_impl;
+ const Device EIGEN_DEVICE_REF m_device;
};
namespace {
@@ -126,7 +198,7 @@ T loadConstant(const T* address) {
return *address;
}
// Use the texture cache on CUDA devices whenever possible
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float loadConstant(const float* address) {
return __ldg(address);
@@ -140,6 +212,13 @@ Eigen::half loadConstant(const Eigen::half* address) {
return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
}
#endif
+#ifdef EIGEN_USE_SYCL
+// overload of load constant should be implemented here based on range access
+template <cl::sycl::access::mode AcMd, typename T>
+T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) {
+ return *address;
+}
+#endif
}
@@ -152,40 +231,64 @@ struct TensorEvaluator<const Derived, Device>
typedef typename Derived::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename Derived::Dimensions Dimensions;
+ typedef const Derived XprType;
+ typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType;
+ typedef StorageMemory<const Scalar, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
+
+ typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
// NumDimensions is -1 for variable dim tensors
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
internal::traits<Derived>::NumDimensions : 0;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
- IsAligned = Derived::IsAligned,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
- Layout = Derived::Layout,
- CoordAccess = NumCoords > 0,
- RawAccess = true
+ IsAligned = Derived::IsAligned,
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
+ BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
+ PreferBlockAccess = false,
+ Layout = Derived::Layout,
+ CoordAccess = NumCoords > 0,
+ RawAccess = true
};
- // Used for accessor extraction in SYCL Managed TensorMap:
- const Derived& derived() const { return m_impl; }
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
+ typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
+
+ typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
+ Layout, Index>
+ TensorBlock;
+ //===--------------------------------------------------------------------===//
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
- : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m)
+ EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
+ : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
{ }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
- m_device.memcpy((void*)data, m_data, m_dims.TotalSize() * sizeof(Scalar));
+ m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
return false;
}
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType dest, EvalSubExprsCallback done) {
+ // TODO(ezhulenev): ThreadPoolDevice memcpy is a blockign operation.
+ done(evalSubExprsIfNeeded(dest));
+ }
+#endif // EIGEN_USE_THREADS
+
+ EIGEN_STRONG_INLINE void cleanup() { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
- eigen_assert(m_data);
+ eigen_assert(m_data != NULL);
return loadConstant(m_data+index);
}
@@ -195,8 +298,20 @@ struct TensorEvaluator<const Derived, Device>
return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
}
+ // Return a packet starting at `index` where `umask` specifies which elements
+ // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
+ // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
+ // float element will be loaded, otherwise 0 will be loaded.
+ // Function has been templatized to enable Sfinae.
+ template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ typename internal::enable_if<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>::type
+ partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
+ {
+ return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
- eigen_assert(m_data);
+ eigen_assert(m_data != NULL);
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
: m_dims.IndexOfRowMajor(coords);
return loadConstant(m_data+index);
@@ -204,19 +319,32 @@ struct TensorEvaluator<const Derived, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
- internal::unpacket_traits<PacketReturnType>::size);
+ PacketType<CoeffReturnType, Device>::size);
}
- EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<const Scalar>::Type data() const { return m_data; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ internal::TensorBlockResourceRequirements getResourceRequirements() const {
+ return internal::TensorBlockResourceRequirements::any();
+ }
- /// added for sycl in order to construct the buffer from the sycl device
- const Device& device() const{return m_device;}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+ block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
+ bool /*root_of_expr_ast*/ = false) const {
+ assert(m_data != NULL);
+ return TensorBlock::materialize(m_data, m_dims, desc, scratch);
+ }
+ EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_data.bind(cgh);
+ }
+#endif
protected:
- typename internal::traits<Derived>::template MakePointer<const Scalar>::Type m_data;
+ EvaluatorPointerType m_data;
Dimensions m_dims;
- const Device& m_device;
- const Derived& m_impl;
+ const Device EIGEN_DEVICE_REF m_device;
};
@@ -229,15 +357,6 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
{
typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
- enum {
- IsAligned = true,
- PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
- };
-
- EIGEN_DEVICE_FUNC
TensorEvaluator(const XprType& op, const Device& device)
: m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
{ }
@@ -246,13 +365,42 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
+ typedef StorageMemory<CoeffReturnType, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
+
+ enum {
+ IsAligned = true,
+ PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
+ #ifdef EIGEN_USE_SYCL
+ && (PacketType<CoeffReturnType, Device>::size >1)
+ #endif
+ ,
+ BlockAccess = false,
+ PreferBlockAccess = false,
+ Layout = TensorEvaluator<ArgType, Device>::Layout,
+ CoordAccess = false, // to be implemented
+ RawAccess = false
+ };
+
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockNotImplemented TensorBlock;
+ //===--------------------------------------------------------------------===//
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { return true; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; }
+
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType, EvalSubExprsCallback done) {
+ done(true);
+ }
+#endif // EIGEN_USE_THREADS
+
+ EIGEN_STRONG_INLINE void cleanup() { }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
@@ -268,16 +416,17 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
costPerCoeff(bool vectorized) const {
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
- internal::unpacket_traits<PacketReturnType>::size);
+ PacketType<CoeffReturnType, Device>::size);
}
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
-
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; }
- /// required by sycl in order to extract the accessor
- NullaryOp functor() const { return m_functor; }
+ EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_argImpl.bind(cgh);
+ }
+#endif
private:
const NullaryOp m_functor;
@@ -295,32 +444,60 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
enum {
- IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess & internal::functor_traits<UnaryOp>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
+ IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
+ PacketAccess = int(TensorEvaluator<ArgType, Device>::PacketAccess) &
+ int(internal::functor_traits<UnaryOp>::PacketAccess),
+ BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
+ PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
+ Layout = TensorEvaluator<ArgType, Device>::Layout,
+ CoordAccess = false, // to be implemented
+ RawAccess = false
};
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
- : m_functor(op.functor()),
+ TensorEvaluator(const XprType& op, const Device& device)
+ : m_device(device),
+ m_functor(op.functor()),
m_argImpl(op.nestedExpression(), device)
{ }
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
+ typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
+ typedef StorageMemory<CoeffReturnType, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
+ static const int NumDims = internal::array_size<Dimensions>::value;
+
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
+ typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
+
+ typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
+ ArgTensorBlock;
+
+ typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock>
+ TensorBlock;
+ //===--------------------------------------------------------------------===//
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_argImpl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType, EvalSubExprsCallback done) {
+ m_argImpl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
+ }
+#endif // EIGEN_USE_THREADS
+
+ EIGEN_STRONG_INLINE void cleanup() {
m_argImpl.cleanup();
}
@@ -341,15 +518,31 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
}
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ internal::TensorBlockResourceRequirements getResourceRequirements() const {
+ static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
+ return m_argImpl.getResourceRequirements().addCostPerCoeff(
+ {0, 0, functor_cost / PacketSize});
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+ block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
+ bool /*root_of_expr_ast*/ = false) const {
+ return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
+ }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<ArgType, Device> & impl() const { return m_argImpl; }
- /// added for sycl in order to construct the buffer from sycl device
- UnaryOp functor() const { return m_functor; }
+ EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
+
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const{
+ m_argImpl.bind(cgh);
+ }
+#endif
private:
+ const Device EIGEN_DEVICE_REF m_device;
const UnaryOp m_functor;
TensorEvaluator<ArgType, Device> m_argImpl;
};
@@ -363,16 +556,23 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
enum {
- IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & TensorEvaluator<RightArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess &
- internal::functor_traits<BinaryOp>::PacketAccess,
- Layout = TensorEvaluator<LeftArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
+ IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
+ int(TensorEvaluator<RightArgType, Device>::IsAligned),
+ PacketAccess = int(TensorEvaluator<LeftArgType, Device>::PacketAccess) &
+ int(TensorEvaluator<RightArgType, Device>::PacketAccess) &
+ int(internal::functor_traits<BinaryOp>::PacketAccess),
+ BlockAccess = int(TensorEvaluator<LeftArgType, Device>::BlockAccess) &
+ int(TensorEvaluator<RightArgType, Device>::BlockAccess),
+ PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
+ int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
+ Layout = TensorEvaluator<LeftArgType, Device>::Layout,
+ CoordAccess = false, // to be implemented
+ RawAccess = false
};
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
- : m_functor(op.functor()),
+ TensorEvaluator(const XprType& op, const Device& device)
+ : m_device(device),
+ m_functor(op.functor()),
m_leftImpl(op.lhsExpression(), device),
m_rightImpl(op.rhsExpression(), device)
{
@@ -384,8 +584,27 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
+ typedef StorageMemory<CoeffReturnType, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
+
+ static const int NumDims = internal::array_size<
+ typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
+
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
+ typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
+
+ typedef typename TensorEvaluator<const LeftArgType, Device>::TensorBlock
+ LeftTensorBlock;
+ typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock
+ RightTensorBlock;
+
+ typedef internal::TensorCwiseBinaryBlock<BinaryOp, LeftTensorBlock,
+ RightTensorBlock>
+ TensorBlock;
+ //===--------------------------------------------------------------------===//
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
{
@@ -393,12 +612,25 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
return m_leftImpl.dimensions();
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_leftImpl.evalSubExprsIfNeeded(NULL);
m_rightImpl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType, EvalSubExprsCallback done) {
+ // TODO(ezhulenev): Evaluate two expression in parallel?
+ m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) {
+ m_rightImpl.evalSubExprsIfNeededAsync(nullptr,
+ [done](bool) { done(true); });
+ });
+ }
+#endif // EIGEN_USE_THREADS
+
+ EIGEN_STRONG_INLINE void cleanup() {
m_leftImpl.cleanup();
m_rightImpl.cleanup();
}
@@ -421,15 +653,34 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
}
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
- /// required by sycl in order to extract the accessor
- BinaryOp functor() const { return m_functor; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ internal::TensorBlockResourceRequirements getResourceRequirements() const {
+ static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
+ return internal::TensorBlockResourceRequirements::merge(
+ m_leftImpl.getResourceRequirements(),
+ m_rightImpl.getResourceRequirements())
+ .addCostPerCoeff({0, 0, functor_cost / PacketSize});
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+ block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
+ bool /*root_of_expr_ast*/ = false) const {
+ desc.DropDestinationBuffer();
+ return TensorBlock(m_leftImpl.block(desc, scratch),
+ m_rightImpl.block(desc, scratch), m_functor);
+ }
+
+ EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
+ #ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_leftImpl.bind(cgh);
+ m_rightImpl.bind(cgh);
+ }
+ #endif
private:
+ const Device EIGEN_DEVICE_REF m_device;
const BinaryOp m_functor;
TensorEvaluator<LeftArgType, Device> m_leftImpl;
TensorEvaluator<RightArgType, Device> m_rightImpl;
@@ -444,14 +695,20 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
enum {
IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned,
- PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess & TensorEvaluator<Arg2Type, Device>::PacketAccess & TensorEvaluator<Arg3Type, Device>::PacketAccess &
- internal::functor_traits<TernaryOp>::PacketAccess,
- Layout = TensorEvaluator<Arg1Type, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
+ PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess &&
+ TensorEvaluator<Arg2Type, Device>::PacketAccess &&
+ TensorEvaluator<Arg3Type, Device>::PacketAccess &&
+ internal::functor_traits<TernaryOp>::PacketAccess,
+ BlockAccess = false,
+ PreferBlockAccess = TensorEvaluator<Arg1Type, Device>::PreferBlockAccess ||
+ TensorEvaluator<Arg2Type, Device>::PreferBlockAccess ||
+ TensorEvaluator<Arg3Type, Device>::PreferBlockAccess,
+ Layout = TensorEvaluator<Arg1Type, Device>::Layout,
+ CoordAccess = false, // to be implemented
+ RawAccess = false
};
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
+ TensorEvaluator(const XprType& op, const Device& device)
: m_functor(op.functor()),
m_arg1Impl(op.arg1Expression(), device),
m_arg2Impl(op.arg2Expression(), device),
@@ -479,8 +736,14 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
+ typedef StorageMemory<CoeffReturnType, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
+
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockNotImplemented TensorBlock;
+ //===--------------------------------------------------------------------===//
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
{
@@ -488,13 +751,13 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
return m_arg1Impl.dimensions();
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_arg1Impl.evalSubExprsIfNeeded(NULL);
m_arg2Impl.evalSubExprsIfNeeded(NULL);
m_arg3Impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_arg1Impl.cleanup();
m_arg2Impl.cleanup();
m_arg3Impl.cleanup();
@@ -521,14 +784,16 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
}
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+ EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<Arg1Type, Device> & arg1Impl() const { return m_arg1Impl; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<Arg2Type, Device>& arg2Impl() const { return m_arg2Impl; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<Arg3Type, Device>& arg3Impl() const { return m_arg3Impl; }
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_arg1Impl.bind(cgh);
+ m_arg2Impl.bind(cgh);
+ m_arg3Impl.bind(cgh);
+ }
+#endif
private:
const TernaryOp m_functor;
@@ -547,15 +812,23 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
typedef typename XprType::Scalar Scalar;
enum {
- IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & TensorEvaluator<ElseArgType, Device>::IsAligned,
- PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess &
- internal::packet_traits<Scalar>::HasBlend,
- Layout = TensorEvaluator<IfArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
+ IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned &
+ TensorEvaluator<ElseArgType, Device>::IsAligned,
+ PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess &
+ TensorEvaluator<ElseArgType, Device>::PacketAccess &
+ PacketType<Scalar, Device>::HasBlend,
+ BlockAccess = TensorEvaluator<IfArgType, Device>::BlockAccess &&
+ TensorEvaluator<ThenArgType, Device>::BlockAccess &&
+ TensorEvaluator<ElseArgType, Device>::BlockAccess,
+ PreferBlockAccess = TensorEvaluator<IfArgType, Device>::PreferBlockAccess ||
+ TensorEvaluator<ThenArgType, Device>::PreferBlockAccess ||
+ TensorEvaluator<ElseArgType, Device>::PreferBlockAccess,
+ Layout = TensorEvaluator<IfArgType, Device>::Layout,
+ CoordAccess = false, // to be implemented
+ RawAccess = false
};
- EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
+ TensorEvaluator(const XprType& op, const Device& device)
: m_condImpl(op.ifExpression(), device),
m_thenImpl(op.thenExpression(), device),
m_elseImpl(op.elseExpression(), device)
@@ -569,8 +842,42 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
typedef typename XprType::Index Index;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
+ typedef StorageMemory<CoeffReturnType, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
+
+ static const int NumDims = internal::array_size<Dimensions>::value;
+
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
+ typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
+
+ typedef typename TensorEvaluator<const IfArgType, Device>::TensorBlock
+ IfArgTensorBlock;
+ typedef typename TensorEvaluator<const ThenArgType, Device>::TensorBlock
+ ThenArgTensorBlock;
+ typedef typename TensorEvaluator<const ElseArgType, Device>::TensorBlock
+ ElseArgTensorBlock;
+
+ struct TensorSelectOpBlockFactory {
+ template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
+ struct XprType {
+ typedef TensorSelectOp<const IfArgXprType, const ThenArgXprType, const ElseArgXprType> type;
+ };
+
+ template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
+ typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type expr(
+ const IfArgXprType& if_expr, const ThenArgXprType& then_expr, const ElseArgXprType& else_expr) const {
+ return typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type(if_expr, then_expr, else_expr);
+ }
+ };
+
+ typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
+ IfArgTensorBlock, ThenArgTensorBlock,
+ ElseArgTensorBlock>
+ TensorBlock;
+ //===--------------------------------------------------------------------===//
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
{
@@ -578,13 +885,26 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
return m_condImpl.dimensions();
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_condImpl.evalSubExprsIfNeeded(NULL);
m_thenImpl.evalSubExprsIfNeeded(NULL);
m_elseImpl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType, EvalSubExprsCallback done) {
+ m_condImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
+ m_thenImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
+ m_elseImpl.evalSubExprsIfNeeded(nullptr, [done](bool) { done(true); });
+ });
+ });
+ }
+#endif // EIGEN_USE_THREADS
+
+ EIGEN_STRONG_INLINE void cleanup() {
m_condImpl.cleanup();
m_thenImpl.cleanup();
m_elseImpl.cleanup();
@@ -597,13 +917,15 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
template<int LoadMode>
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
{
- internal::Selector<PacketSize> select;
- for (Index i = 0; i < PacketSize; ++i) {
- select.select[i] = m_condImpl.coeff(index+i);
- }
- return internal::pblend(select,
- m_thenImpl.template packet<LoadMode>(index),
- m_elseImpl.template packet<LoadMode>(index));
+ internal::Selector<PacketSize> select;
+ EIGEN_UNROLL_LOOP
+ for (Index i = 0; i < PacketSize; ++i) {
+ select.select[i] = m_condImpl.coeff(index+i);
+ }
+ return internal::pblend(select,
+ m_thenImpl.template packet<LoadMode>(index),
+ m_elseImpl.template packet<LoadMode>(index));
+
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
@@ -613,14 +935,42 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
.cwiseMax(m_elseImpl.costPerCoeff(vectorized));
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return NULL; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<ThenArgType, Device>& then_impl() const { return m_thenImpl; }
- /// required by sycl in order to extract the accessor
- const TensorEvaluator<ElseArgType, Device>& else_impl() const { return m_elseImpl; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ internal::TensorBlockResourceRequirements getResourceRequirements() const {
+ auto then_req = m_thenImpl.getResourceRequirements();
+ auto else_req = m_elseImpl.getResourceRequirements();
+
+ auto merged_req =
+ internal::TensorBlockResourceRequirements::merge(then_req, else_req);
+ merged_req.cost_per_coeff =
+ then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
+
+ return internal::TensorBlockResourceRequirements::merge(
+ m_condImpl.getResourceRequirements(), merged_req);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+ block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
+ bool /*root_of_expr_ast*/ = false) const {
+ // It's unsafe to pass destination buffer to underlying expressions, because
+ // output might be aliased with one of the inputs.
+ desc.DropDestinationBuffer();
+
+ return TensorBlock(
+ m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
+ m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
+
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_condImpl.bind(cgh);
+ m_thenImpl.bind(cgh);
+ m_elseImpl.bind(cgh);
+ }
+#endif
private:
TensorEvaluator<IfArgType, Device> m_condImpl;
TensorEvaluator<ThenArgType, Device> m_thenImpl;