aboutsummaryrefslogtreecommitdiff
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h151
1 files changed, 134 insertions, 17 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
index eb1d4934e..174bf0683 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
@@ -12,7 +12,7 @@
namespace Eigen {
-/** \class TensorGenerator
+/** \class TensorGeneratorOp
* \ingroup CXX11_Tensor_Module
*
* \brief Tensor generator class.
@@ -31,6 +31,7 @@ struct traits<TensorGeneratorOp<Generator, XprType> > : public traits<XprType>
typedef typename remove_reference<Nested>::type _Nested;
static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout;
+ typedef typename XprTraits::PointerType PointerType;
};
template<typename Generator, typename XprType>
@@ -87,40 +88,58 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
+ typedef StorageMemory<CoeffReturnType, Device> Storage;
+ typedef typename Storage::Type EvaluatorPointerType;
enum {
- IsAligned = false,
- PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
- BlockAccess = false,
- Layout = TensorEvaluator<ArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- RawAccess = false
+ IsAligned = false,
+ PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
+ BlockAccess = true,
+ PreferBlockAccess = true,
+ Layout = TensorEvaluator<ArgType, Device>::Layout,
+ CoordAccess = false, // to be implemented
+ RawAccess = false
};
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_generator(op.generator())
+ typedef internal::TensorIntDivisor<Index> IndexDivisor;
+
+ //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
+ typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
+ typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
+
+ typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
+ Layout, Index>
+ TensorBlock;
+ //===--------------------------------------------------------------------===//
+
+ EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
+ : m_device(device), m_generator(op.generator())
{
- TensorEvaluator<ArgType, Device> impl(op.expression(), device);
- m_dimensions = impl.dimensions();
+ TensorEvaluator<ArgType, Device> argImpl(op.expression(), device);
+ m_dimensions = argImpl.dimensions();
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
m_strides[0] = 1;
+ EIGEN_UNROLL_LOOP
for (int i = 1; i < NumDims; ++i) {
m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
+ if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
}
} else {
m_strides[NumDims - 1] = 1;
+ EIGEN_UNROLL_LOOP
for (int i = NumDims - 2; i >= 0; --i) {
m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
+ if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
}
}
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
+ EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
return true;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ EIGEN_STRONG_INLINE void cleanup() {
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
@@ -133,7 +152,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
- const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
+ const int packetSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
@@ -145,6 +164,97 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
return rslt;
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ internal::TensorBlockResourceRequirements getResourceRequirements() const {
+ const size_t target_size = m_device.firstLevelCacheSize();
+ // TODO(ezhulenev): Generator should have a cost.
+ return internal::TensorBlockResourceRequirements::skewed<Scalar>(
+ target_size);
+ }
+
+ struct BlockIteratorState {
+ Index stride;
+ Index span;
+ Index size;
+ Index count;
+ };
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
+ block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
+ bool /*root_of_expr_ast*/ = false) const {
+ static const bool is_col_major =
+ static_cast<int>(Layout) == static_cast<int>(ColMajor);
+
+ // Compute spatial coordinates for the first block element.
+ array<Index, NumDims> coords;
+ extract_coordinates(desc.offset(), coords);
+ array<Index, NumDims> initial_coords = coords;
+
+ // Offset in the output block buffer.
+ Index offset = 0;
+
+ // Initialize output block iterator state. Dimension in this array are
+ // always in inner_most -> outer_most order (col major layout).
+ array<BlockIteratorState, NumDims> it;
+ for (int i = 0; i < NumDims; ++i) {
+ const int dim = is_col_major ? i : NumDims - 1 - i;
+ it[i].size = desc.dimension(dim);
+ it[i].stride = i == 0 ? 1 : (it[i - 1].size * it[i - 1].stride);
+ it[i].span = it[i].stride * (it[i].size - 1);
+ it[i].count = 0;
+ }
+ eigen_assert(it[0].stride == 1);
+
+ // Prepare storage for the materialized generator result.
+ const typename TensorBlock::Storage block_storage =
+ TensorBlock::prepareStorage(desc, scratch);
+
+ CoeffReturnType* block_buffer = block_storage.data();
+
+ static const int packet_size = PacketType<CoeffReturnType, Device>::size;
+
+ static const int inner_dim = is_col_major ? 0 : NumDims - 1;
+ const Index inner_dim_size = it[0].size;
+ const Index inner_dim_vectorized = inner_dim_size - packet_size;
+
+ while (it[NumDims - 1].count < it[NumDims - 1].size) {
+ Index i = 0;
+ // Generate data for the vectorized part of the inner-most dimension.
+ for (; i <= inner_dim_vectorized; i += packet_size) {
+ for (Index j = 0; j < packet_size; ++j) {
+ array<Index, NumDims> j_coords = coords; // Break loop dependence.
+ j_coords[inner_dim] += j;
+ *(block_buffer + offset + i + j) = m_generator(j_coords);
+ }
+ coords[inner_dim] += packet_size;
+ }
+ // Finalize non-vectorized part of the inner-most dimension.
+ for (; i < inner_dim_size; ++i) {
+ *(block_buffer + offset + i) = m_generator(coords);
+ coords[inner_dim]++;
+ }
+ coords[inner_dim] = initial_coords[inner_dim];
+
+ // For the 1d tensor we need to generate only one inner-most dimension.
+ if (NumDims == 1) break;
+
+ // Update offset.
+ for (i = 1; i < NumDims; ++i) {
+ if (++it[i].count < it[i].size) {
+ offset += it[i].stride;
+ coords[is_col_major ? i : NumDims - 1 - i]++;
+ break;
+ }
+ if (i != NumDims - 1) it[i].count = 0;
+ coords[is_col_major ? i : NumDims - 1 - i] =
+ initial_coords[is_col_major ? i : NumDims - 1 - i];
+ offset -= it[i].span;
+ }
+ }
+
+ return block_storage.AsTensorMaterializedBlock();
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
costPerCoeff(bool) const {
// TODO(rmlarsen): This is just a placeholder. Define interface to make
@@ -153,21 +263,26 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
TensorOpCost::MulCost<Scalar>());
}
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
+ EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
+
+#ifdef EIGEN_USE_SYCL
+ // binding placeholder accessors to a command group handler for SYCL
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler&) const {}
+#endif
protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void extract_coordinates(Index index, array<Index, NumDims>& coords) const {
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_strides[i];
+ const Index idx = index / m_fast_strides[i];
index -= idx * m_strides[i];
coords[i] = idx;
}
coords[0] = index;
} else {
for (int i = 0; i < NumDims - 1; ++i) {
- const Index idx = index / m_strides[i];
+ const Index idx = index / m_fast_strides[i];
index -= idx * m_strides[i];
coords[i] = idx;
}
@@ -175,8 +290,10 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
}
}
+ const Device EIGEN_DEVICE_REF m_device;
Dimensions m_dimensions;
array<Index, NumDims> m_strides;
+ array<IndexDivisor, NumDims> m_fast_strides;
Generator m_generator;
};