aboutsummaryrefslogtreecommitdiff
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h265
1 files changed, 221 insertions, 44 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
index 14e392e36..586ce68ab 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h
@@ -31,6 +31,7 @@ struct traits<TensorReverseOp<ReverseDimensions,
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
+ typedef typename XprTraits::PointerType PointerType;
};
template<typename ReverseDimensions, typename XprType>
@@ -53,15 +54,16 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
XprType>, WriteAccessors>
{
public:
- typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorReverseOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorReverseOp>::StorageKind
- StorageKind;
- typedef typename Eigen::internal::traits<TensorReverseOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp(
+ typedef TensorBase<TensorReverseOp<ReverseDimensions, XprType>, WriteAccessors>Base;
+ typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar Scalar;
+ typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename Eigen::internal::nested<TensorReverseOp>::type Nested;
+ typedef typename Eigen::internal::traits<TensorReverseOp>::StorageKind
+ StorageKind;
+ typedef typename Eigen::internal::traits<TensorReverseOp>::Index Index;
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp(
const XprType& expr, const ReverseDimensions& reverse_dims)
: m_xpr(expr), m_reverse_dims(reverse_dims) { }
@@ -72,24 +74,8 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
const typename internal::remove_all<typename XprType::Nested>::type&
expression() const { return m_xpr; }
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorReverseOp& operator = (const TensorReverseOp& other)
- {
- typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
+ EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReverseOp)
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorReverseOp& operator = (const OtherDerived& other)
- {
- typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
protected:
typename XprType::Nested m_xpr;
@@ -107,19 +93,38 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+ typedef StorageMemory<CoeffReturnType, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
enum {
- IsAligned = false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
+ IsAligned = false,
+ PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
+ BlockAccess = NumDims > 0,
+ PreferBlockAccess = true,
+ Layout = TensorEvaluator<ArgType, Device>::Layout,
+ CoordAccess = false, // to be implemented
+ RawAccess = false
};
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
- const Device& device)
- : m_impl(op.expression(), device), m_reverse(op.reverse())
+ typedef internal::TensorIntDivisor<Index> IndexDivisor;
+
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
+ typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
+
+ typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
+ ArgTensorBlock;
+
+ typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
+ Layout, Index>
+ TensorBlock;
+ //===--------------------------------------------------------------------===//
+
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ : m_impl(op.expression(), device),
+ m_reverse(op.reverse()),
+ m_device(device)
{
// Reversing a scalar isn't supported yet. It would be a no-op anyway.
EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -130,11 +135,13 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
m_strides[0] = 1;
for (int i = 1; i < NumDims; ++i) {
m_strides[i] = m_strides[i-1] * m_dimensions[i-1];
+ if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
}
} else {
m_strides[NumDims-1] = 1;
for (int i = NumDims - 2; i >= 0; --i) {
m_strides[i] = m_strides[i+1] * m_dimensions[i+1];
+ if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
}
}
}
@@ -142,11 +149,20 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+
+#ifdef EIGEN_USE_THREADS
+ template <typename EvalSubExprsCallback>
+ EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
+ EvaluatorPointerType, EvalSubExprsCallback done) {
+ m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
+ }
+#endif // EIGEN_USE_THREADS
+
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -155,8 +171,9 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
eigen_assert(index < dimensions().TotalSize());
Index inputIndex = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
+ EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) {
- Index idx = index / m_strides[i];
+ Index idx = index / m_fastStrides[i];
index -= idx * m_strides[i];
if (m_reverse[i]) {
idx = m_dimensions[i] - idx - 1;
@@ -169,8 +186,9 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
inputIndex += index;
}
} else {
+ EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) {
- Index idx = index / m_strides[i];
+ Index idx = index / m_fastStrides[i];
index -= idx * m_strides[i];
if (m_reverse[i]) {
idx = m_dimensions[i] - idx - 1;
@@ -202,6 +220,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
// local structure.
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
values[PacketSize];
+ EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index+i);
}
@@ -209,6 +228,130 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
return rslt;
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ internal::TensorBlockResourceRequirements getResourceRequirements() const {
+ const size_t target_size = m_device.lastLevelCacheSize();
+ // Block evaluation reads underlying memory in reverse order, and default
+ // cost model does not properly catch this in bytes stored/loaded.
+ return internal::TensorBlockResourceRequirements::skewed<Scalar>(
+ target_size)
+ .addCostPerCoeff({0, 0, 24});
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+ block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
+ bool /*root_of_expr_ast*/ = false) const {
+ // TODO(ezhulenev): If underlying tensor expression supports and prefers
+ // block evaluation we must use it. Currently we use coeff and packet
+ // access into the underlying tensor expression.
+ // static const bool useBlockAccessForArgType =
+ // TensorEvaluator<ArgType, Device>::BlockAccess &&
+ // TensorEvaluator<ArgType, Device>::PreferBlockAccess;
+
+ static const bool isColMajor =
+ static_cast<int>(Layout) == static_cast<int>(ColMajor);
+
+ static const Index inner_dim_idx = isColMajor ? 0 : NumDims - 1;
+ const bool inner_dim_reversed = m_reverse[inner_dim_idx];
+
+ // Offset in the output block.
+ Index block_offset = 0;
+
+ // Offset in the input Tensor.
+ Index input_offset = reverseIndex(desc.offset());
+
+ // Initialize output block iterator state. Dimension in this array are
+ // always in inner_most -> outer_most order (col major layout).
+ array<BlockIteratorState, NumDims> it;
+ for (int i = 0; i < NumDims; ++i) {
+ const int dim = isColMajor ? i : NumDims - 1 - i;
+ it[i].size = desc.dimension(dim);
+ it[i].count = 0;
+ it[i].reverse = m_reverse[dim];
+
+ it[i].block_stride =
+ i == 0 ? 1 : (it[i - 1].size * it[i - 1].block_stride);
+ it[i].block_span = it[i].block_stride * (it[i].size - 1);
+
+ it[i].input_stride = m_strides[dim];
+ it[i].input_span = it[i].input_stride * (it[i].size - 1);
+
+ if (it[i].reverse) {
+ it[i].input_stride = -1 * it[i].input_stride;
+ it[i].input_span = -1 * it[i].input_span;
+ }
+ }
+
+ // If multiple inner dimensions have the same reverse flag, check if we can
+ // merge them into a single virtual inner dimension.
+ int effective_inner_dim = 0;
+ for (int i = 1; i < NumDims; ++i) {
+ if (it[i].reverse != it[effective_inner_dim].reverse) break;
+ if (it[i].block_stride != it[effective_inner_dim].size) break;
+ if (it[i].block_stride != numext::abs(it[i].input_stride)) break;
+
+ it[i].size = it[effective_inner_dim].size * it[i].size;
+
+ it[i].block_stride = 1;
+ it[i].input_stride = (inner_dim_reversed ? -1 : 1);
+
+ it[i].block_span = it[i].block_stride * (it[i].size - 1);
+ it[i].input_span = it[i].input_stride * (it[i].size - 1);
+
+ effective_inner_dim = i;
+ }
+
+ eigen_assert(it[effective_inner_dim].block_stride == 1);
+ eigen_assert(it[effective_inner_dim].input_stride ==
+ (inner_dim_reversed ? -1 : 1));
+
+ const Index inner_dim_size = it[effective_inner_dim].size;
+
+ // Prepare storage for the materialized reverse result.
+ const typename TensorBlock::Storage block_storage =
+ TensorBlock::prepareStorage(desc, scratch);
+ CoeffReturnType* block_buffer = block_storage.data();
+
+ while (it[NumDims - 1].count < it[NumDims - 1].size) {
+ // Copy inner-most dimension data from reversed location in input.
+ Index dst = block_offset;
+ Index src = input_offset;
+
+ // NOTE(ezhulenev): Adding vectorized path with internal::preverse showed
+ // worse results in benchmarks than a simple coefficient loop.
+ if (inner_dim_reversed) {
+ for (Index i = 0; i < inner_dim_size; ++i) {
+ block_buffer[dst] = m_impl.coeff(src);
+ ++dst;
+ --src;
+ }
+ } else {
+ for (Index i = 0; i < inner_dim_size; ++i) {
+ block_buffer[dst] = m_impl.coeff(src);
+ ++dst;
+ ++src;
+ }
+ }
+
+ // For the 1d tensor we need to generate only one inner-most dimension.
+ if ((NumDims - effective_inner_dim) == 1) break;
+
+ // Update offset.
+ for (Index i = effective_inner_dim + 1; i < NumDims; ++i) {
+ if (++it[i].count < it[i].size) {
+ block_offset += it[i].block_stride;
+ input_offset += it[i].input_stride;
+ break;
+ }
+ if (i != NumDims - 1) it[i].count = 0;
+ block_offset -= it[i].block_span;
+ input_offset -= it[i].input_span;
+ }
+ }
+
+ return block_storage.AsTensorMaterializedBlock();
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
2 * TensorOpCost::MulCost<Index>() +
@@ -222,13 +365,42 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
}
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
+ EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
+
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_impl.bind(cgh);
+ }
+#endif
protected:
Dimensions m_dimensions;
array<Index, NumDims> m_strides;
+ array<IndexDivisor, NumDims> m_fastStrides;
TensorEvaluator<ArgType, Device> m_impl;
ReverseDimensions m_reverse;
+ const Device EIGEN_DEVICE_REF m_device;
+
+ private:
+ struct BlockIteratorState {
+ BlockIteratorState()
+ : size(0),
+ count(0),
+ reverse(false),
+ block_stride(0),
+ block_span(0),
+ input_stride(0),
+ input_span(0) {}
+
+ Index size;
+ Index count;
+ bool reverse;
+ Index block_stride;
+ Index block_span;
+ Index input_stride;
+ Index input_span;
+ };
};
// Eval as lvalue
@@ -247,18 +419,23 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
enum {
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
+ BlockAccess = false,
+ PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
- const Device& device)
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: Base(op, device) {}
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockNotImplemented TensorBlock;
+ //===--------------------------------------------------------------------===//
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions() const { return this->m_dimensions; }
@@ -275,11 +452,11 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
// This code is pilfered from TensorMorphing.h
EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
+ EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
this->coeffRef(index+i) = values[i];
}
}
-
};