aboutsummaryrefslogtreecommitdiff
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h278
1 files changed, 206 insertions, 72 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
index 1ba7ef170..376457341 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
@@ -32,12 +32,13 @@ struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType>
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions - 1;
static const int Layout = XprTraits::Layout;
+ typedef typename XprTraits::PointerType PointerType;
};
template<DenseIndex DimId, typename XprType>
struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense>
{
- typedef const TensorChippingOp<DimId, XprType>& type;
+ typedef const TensorChippingOp<DimId, XprType> EIGEN_DEVICE_REF type;
};
template<DenseIndex DimId, typename XprType>
@@ -50,6 +51,7 @@ template <DenseIndex DimId>
struct DimensionId
{
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) {
+ EIGEN_UNUSED_VARIABLE(dim);
eigen_assert(dim == DimId);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const {
@@ -78,44 +80,28 @@ template<DenseIndex DimId, typename XprType>
class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
{
public:
- typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index;
+ typedef TensorBase<TensorChippingOp<DimId, XprType> > Base;
+ typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar;
+ typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+ typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested;
+ typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind;
+ typedef typename Eigen::internal::traits<TensorChippingOp>::Index Index;
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim)
+ : m_xpr(expr), m_offset(offset), m_dim(dim) {
+ }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim)
- : m_xpr(expr), m_offset(offset), m_dim(dim) {
- }
+ EIGEN_DEVICE_FUNC
+ const Index offset() const { return m_offset; }
+ EIGEN_DEVICE_FUNC
+ const Index dim() const { return m_dim.actualDim(); }
- EIGEN_DEVICE_FUNC
- const Index offset() const { return m_offset; }
- EIGEN_DEVICE_FUNC
- const Index dim() const { return m_dim.actualDim(); }
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<typename XprType::Nested>::type&
+ expression() const { return m_xpr; }
- EIGEN_DEVICE_FUNC
- const typename internal::remove_all<typename XprType::Nested>::type&
- expression() const { return m_xpr; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorChippingOp& operator = (const TensorChippingOp& other)
- {
- typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
-
- template<typename OtherDerived>
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other)
- {
- typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign;
- Assign assign(*this, other);
- internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
- return *this;
- }
+ EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorChippingOp)
protected:
typename XprType::Nested m_xpr;
@@ -136,20 +122,49 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
-
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
+ typedef StorageMemory<CoeffReturnType, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
enum {
// Alignment can't be guaranteed at compile time since it depends on the
// slice offsets.
- IsAligned = false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
+ IsAligned = false,
+ Layout = TensorEvaluator<ArgType, Device>::Layout,
+ PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
+ BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
+ // Chipping of outer-most dimension is a trivial operation, because we can
+ // read and write directly from the underlying tensor using single offset.
+ IsOuterChipping = (static_cast<int>(Layout) == ColMajor && DimId == NumInputDims - 1) ||
+ (static_cast<int>(Layout) == RowMajor && DimId == 0),
+ // Chipping inner-most dimension.
+ IsInnerChipping = (static_cast<int>(Layout) == ColMajor && DimId == 0) ||
+ (static_cast<int>(Layout) == RowMajor && DimId == NumInputDims - 1),
+ // Prefer block access if the underlying expression prefers it, otherwise
+ // only if chipping is not trivial.
+ PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess ||
+ !IsOuterChipping,
+ CoordAccess = false, // to be implemented
+ RawAccess = false
};
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
+
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
+ typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
+
+ typedef internal::TensorBlockDescriptor<NumInputDims, Index>
+ ArgTensorBlockDesc;
+ typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
+ ArgTensorBlock;
+
+ typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
+ Layout, Index>
+ TensorBlock;
+ //===--------------------------------------------------------------------===//
+
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
{
EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -185,12 +200,12 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL);
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup();
}
@@ -205,21 +220,20 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
- if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
+ if (isInnerChipping()) {
// m_stride is equal to 1, so let's avoid the integer division.
eigen_assert(m_stride == 1);
Index inputIndex = index * m_inputStride + m_inputOffset;
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+ EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
values[i] = m_impl.coeff(inputIndex);
inputIndex += m_inputStride;
}
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
return rslt;
- } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims - 1) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
- // m_stride is aways greater than index, so let's avoid the integer division.
+ } else if (isOuterChipping()) {
+ // m_stride is always greater than index, so let's avoid the integer division.
eigen_assert(m_stride > index);
return m_impl.template packet<LoadMode>(index + m_inputOffset);
} else {
@@ -231,6 +245,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
} else {
// Cross the stride boundary. Fallback to slow path.
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+ EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index);
++index;
@@ -263,29 +278,100 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
TensorOpCost(0, 0, cost, vectorized, PacketSize);
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const {
- CoeffReturnType* result = const_cast<CoeffReturnType*>(m_impl.data());
- if (((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumDims) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) &&
- result) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ internal::TensorBlockResourceRequirements getResourceRequirements() const {
+ const size_t target_size = m_device.lastLevelCacheSize();
+ return internal::TensorBlockResourceRequirements::merge(
+ internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
+ m_impl.getResourceRequirements());
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+ block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
+ bool root_of_expr_ast = false) const {
+ const Index chip_dim = m_dim.actualDim();
+
+ DSizes<Index, NumInputDims> input_block_dims;
+ for (int i = 0; i < NumInputDims; ++i) {
+ input_block_dims[i]
+ = i < chip_dim ? desc.dimension(i)
+ : i > chip_dim ? desc.dimension(i - 1)
+ : 1;
+ }
+
+ ArgTensorBlockDesc arg_desc(srcCoeff(desc.offset()), input_block_dims);
+
+ // Try to reuse destination buffer for materializing argument block.
+ if (desc.HasDestinationBuffer()) {
+ DSizes<Index, NumInputDims> arg_destination_strides;
+ for (int i = 0; i < NumInputDims; ++i) {
+ arg_destination_strides[i]
+ = i < chip_dim ? desc.destination().strides()[i]
+ : i > chip_dim ? desc.destination().strides()[i - 1]
+ : 0; // for dimensions of size `1` stride should never be used.
+ }
+
+ arg_desc.template AddDestinationBuffer<Layout>(
+ desc.destination().template data<ScalarNoConst>(),
+ arg_destination_strides);
+ }
+
+ ArgTensorBlock arg_block = m_impl.block(arg_desc, scratch, root_of_expr_ast);
+ if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
+
+ if (arg_block.data() != NULL) {
+ // Forward argument block buffer if possible.
+ return TensorBlock(arg_block.kind(), arg_block.data(),
+ desc.dimensions());
+
+ } else {
+ // Assign argument block expression to a buffer.
+
+ // Prepare storage for the materialized chipping result.
+ const typename TensorBlock::Storage block_storage =
+ TensorBlock::prepareStorage(desc, scratch);
+
+ typedef internal::TensorBlockAssignment<
+ ScalarNoConst, NumInputDims, typename ArgTensorBlock::XprType, Index>
+ TensorBlockAssignment;
+
+ TensorBlockAssignment::Run(
+ TensorBlockAssignment::target(
+ arg_desc.dimensions(),
+ internal::strides<Layout>(arg_desc.dimensions()),
+ block_storage.data()),
+ arg_block.expr());
+
+ return block_storage.AsTensorMaterializedBlock();
+ }
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
+ typename Storage::Type result = constCast(m_impl.data());
+ if (isOuterChipping() && result) {
return result + m_inputOffset;
} else {
return NULL;
}
}
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
+ m_impl.bind(cgh);
+ }
+#endif
protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
{
Index inputIndex;
- if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
+ if (isInnerChipping()) {
// m_stride is equal to 1, so let's avoid the integer division.
eigen_assert(m_stride == 1);
inputIndex = index * m_inputStride + m_inputOffset;
- } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims-1) ||
- (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
- // m_stride is aways greater than index, so let's avoid the integer division.
+ } else if (isOuterChipping()) {
+ // m_stride is always greater than index, so let's avoid the integer
+ // division.
eigen_assert(m_stride > index);
inputIndex = index + m_inputOffset;
} else {
@@ -297,13 +383,25 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
return inputIndex;
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isInnerChipping() const {
+ return IsInnerChipping ||
+ (static_cast<int>(Layout) == ColMajor && m_dim.actualDim() == 0) ||
+ (static_cast<int>(Layout) == RowMajor && m_dim.actualDim() == NumInputDims - 1);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isOuterChipping() const {
+ return IsOuterChipping ||
+ (static_cast<int>(Layout) == ColMajor && m_dim.actualDim() == NumInputDims-1) ||
+ (static_cast<int>(Layout) == RowMajor && m_dim.actualDim() == 0);
+ }
+
Dimensions m_dimensions;
Index m_stride;
Index m_inputOffset;
Index m_inputStride;
TensorEvaluator<ArgType, Device> m_impl;
const internal::DimensionId<DimId> m_dim;
- const Device& m_device;
+ const Device EIGEN_DEVICE_REF m_device;
};
@@ -321,15 +419,21 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
- static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum {
- IsAligned = false,
- PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
- RawAccess = false
+ IsAligned = false,
+ PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
+ BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
+ Layout = TensorEvaluator<ArgType, Device>::Layout,
+ RawAccess = false
};
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
+ //===--------------------------------------------------------------------===//
+
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: Base(op, device)
{ }
@@ -343,20 +447,19 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == 0) ||
- (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) {
+ if (this->isInnerChipping()) {
// m_stride is equal to 1, so let's avoid the integer division.
eigen_assert(this->m_stride == 1);
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
+ EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
this->m_impl.coeffRef(inputIndex) = values[i];
inputIndex += this->m_inputStride;
}
- } else if ((static_cast<int>(this->Layout) == static_cast<int>(ColMajor) && this->m_dim.actualDim() == NumInputDims-1) ||
- (static_cast<int>(this->Layout) == static_cast<int>(RowMajor) && this->m_dim.actualDim() == 0)) {
- // m_stride is aways greater than index, so let's avoid the integer division.
+ } else if (this->isOuterChipping()) {
+ // m_stride is always greater than index, so let's avoid the integer division.
eigen_assert(this->m_stride > index);
this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x);
} else {
@@ -369,6 +472,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
// Cross stride boundary. Fallback to slow path.
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
+ EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) {
this->coeffRef(index) = values[i];
++index;
@@ -376,6 +480,36 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
}
}
}
+
+ template <typename TensorBlock>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
+ const TensorBlockDesc& desc, const TensorBlock& block) {
+ assert(this->m_impl.data() != NULL);
+
+ const Index chip_dim = this->m_dim.actualDim();
+
+ DSizes<Index, NumInputDims> input_block_dims;
+ for (int i = 0; i < NumInputDims; ++i) {
+ input_block_dims[i] = i < chip_dim ? desc.dimension(i)
+ : i > chip_dim ? desc.dimension(i - 1)
+ : 1;
+ }
+
+ typedef TensorReshapingOp<const DSizes<Index, NumInputDims>,
+ const typename TensorBlock::XprType>
+ TensorBlockExpr;
+
+ typedef internal::TensorBlockAssignment<Scalar, NumInputDims,
+ TensorBlockExpr, Index>
+ TensorBlockAssign;
+
+ TensorBlockAssign::Run(
+ TensorBlockAssign::target(
+ input_block_dims,
+ internal::strides<Layout>(this->m_impl.dimensions()),
+ this->m_impl.data(), this->srcCoeff(desc.offset())),
+ block.expr().reshape(input_block_dims));
+ }
};