aboutsummaryrefslogtreecommitdiff
path: root/Eigen/src/Core/AssignEvaluator.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/AssignEvaluator.h')
-rw-r--r--Eigen/src/Core/AssignEvaluator.h179
1 files changed, 127 insertions, 52 deletions
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index b0ec7b7ca..7d76f0c25 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -17,29 +17,29 @@ namespace Eigen {
// This implementation is based on Assign.h
namespace internal {
-
+
/***************************************************************************
* Part 1 : the logic deciding a strategy for traversal and unrolling *
***************************************************************************/
// copy_using_evaluator_traits is based on assign_traits
-template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
+template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1>
struct copy_using_evaluator_traits
{
typedef typename DstEvaluator::XprType Dst;
typedef typename Dst::Scalar DstScalar;
-
+
enum {
DstFlags = DstEvaluator::Flags,
SrcFlags = SrcEvaluator::Flags
};
-
+
public:
enum {
DstAlignment = DstEvaluator::Alignment,
SrcAlignment = SrcEvaluator::Alignment,
- DstHasDirectAccess = DstFlags & DirectAccessBit,
+ DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
};
@@ -51,13 +51,15 @@ private:
InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
: int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
: int(Dst::MaxRowsAtCompileTime),
+ RestrictedInnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(InnerSize,MaxPacketSize),
+ RestrictedLinearSize = EIGEN_SIZE_MIN_PREFER_FIXED(Dst::SizeAtCompileTime,MaxPacketSize),
OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
MaxSizeAtCompileTime = Dst::SizeAtCompileTime
};
// TODO distinguish between linear traversal and inner-traversals
- typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type LinearPacketType;
- typedef typename find_best_packet<DstScalar,InnerSize>::type InnerPacketType;
+ typedef typename find_best_packet<DstScalar,RestrictedLinearSize>::type LinearPacketType;
+ typedef typename find_best_packet<DstScalar,RestrictedInnerSize>::type InnerPacketType;
enum {
LinearPacketSize = unpacket_traits<LinearPacketType>::size,
@@ -83,7 +85,7 @@ private:
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
- MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
+ MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
@@ -97,7 +99,8 @@ private:
public:
enum {
- Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal)
+ Traversal = int(Dst::SizeAtCompileTime) == 0 ? int(AllAtOnceTraversal) // If compile-size is zero, traversing will fail at compile-time.
+ : (int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize)) ? int(LinearVectorizedTraversal)
: int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
@@ -135,7 +138,7 @@ public:
? int(CompleteUnrolling)
: int(NoUnrolling) )
: int(Traversal) == int(LinearTraversal)
- ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
+ ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
: int(NoUnrolling) )
#if EIGEN_UNALIGNED_VECTORIZE
: int(Traversal) == int(SliceVectorizedTraversal)
@@ -172,6 +175,8 @@ public:
EIGEN_DEBUG_VAR(MaySliceVectorize)
std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
+ EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost)
+ EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime)
EIGEN_DEBUG_VAR(UnrollingLimit)
EIGEN_DEBUG_VAR(MayUnrollCompletely)
EIGEN_DEBUG_VAR(MayUnrollInner)
@@ -195,7 +200,7 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
-
+
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime
@@ -261,7 +266,7 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::PacketType PacketType;
-
+
enum {
outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime,
@@ -313,6 +318,22 @@ template<typename Kernel,
struct dense_assignment_loop;
/************************
+***** Special Cases *****
+************************/
+
+// Zero-sized assignment is a no-op.
+template<typename Kernel, int Unrolling>
+struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>
+{
+ EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& /*kernel*/)
+ {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0,
+ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
+ }
+};
+
+/************************
*** Default traversal ***
************************/
@@ -426,10 +447,10 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
{
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::PacketType PacketType;
-
+
enum { size = DstXprType::SizeAtCompileTime,
packetSize =unpacket_traits<PacketType>::size,
- alignedSize = (size/packetSize)*packetSize };
+ alignedSize = (int(size)/packetSize)*packetSize };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
@@ -530,7 +551,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
const Scalar *dst_ptr = kernel.dstDataPtr();
if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
{
- // the pointer is not aligend-on scalar, so alignment is not possible
+ // the pointer is not aligned-on scalar, so alignment is not possible
return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
}
const Index packetAlignedMask = packetSize - 1;
@@ -568,14 +589,15 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
typedef typename Kernel::PacketType PacketType;
- enum { size = DstXprType::InnerSizeAtCompileTime,
+ enum { innerSize = DstXprType::InnerSizeAtCompileTime,
packetSize =unpacket_traits<PacketType>::size,
- vectorizableSize = (size/packetSize)*packetSize };
+ vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize),
+ size = DstXprType::SizeAtCompileTime };
for(Index outer = 0; outer < kernel.outerSize(); ++outer)
{
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
- copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
+ copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, innerSize>::run(kernel, outer);
}
}
};
@@ -599,73 +621,74 @@ protected:
typedef typename DstEvaluatorTypeT::XprType DstXprType;
typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
public:
-
+
typedef DstEvaluatorTypeT DstEvaluatorType;
typedef SrcEvaluatorTypeT SrcEvaluatorType;
typedef typename DstEvaluatorType::Scalar Scalar;
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
typedef typename AssignmentTraits::PacketType PacketType;
-
-
- EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
+
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
: m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
{
#ifdef EIGEN_DEBUG_ASSIGN
AssignmentTraits::debug();
#endif
}
-
- EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
- EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
- EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
- EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
- EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
- EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
-
- EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
- EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
-
+
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_dstExpr.size(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const EIGEN_NOEXCEPT { return m_dstExpr.innerSize(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const EIGEN_NOEXCEPT { return m_dstExpr.outerSize(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dstExpr.rows(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_dstExpr.cols(); }
+ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { return m_dstExpr.outerStride(); }
+
+ EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() EIGEN_NOEXCEPT { return m_dst; }
+ EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; }
+
/// Assign src(row,col) to dst(row,col) through the assignment functor.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
{
m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
}
-
+
/// \sa assignCoeff(Index,Index)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
{
m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
}
-
+
/// \sa assignCoeff(Index,Index)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
{
- Index row = rowIndexByOuterInner(outer, inner);
- Index col = colIndexByOuterInner(outer, inner);
+ Index row = rowIndexByOuterInner(outer, inner);
+ Index col = colIndexByOuterInner(outer, inner);
assignCoeff(row, col);
}
-
-
+
+
template<int StoreMode, int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
{
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
}
-
+
template<int StoreMode, int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
{
m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
}
-
+
template<int StoreMode, int LoadMode, typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
{
- Index row = rowIndexByOuterInner(outer, inner);
+ Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
assignPacket<StoreMode,LoadMode,PacketType>(row, col);
}
-
+
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
{
typedef typename DstEvaluatorType::ExpressionTraits Traits;
@@ -688,7 +711,7 @@ public:
{
return m_dstExpr.data();
}
-
+
protected:
DstEvaluatorType& m_dst;
const SrcEvaluatorType& m_src;
@@ -697,6 +720,27 @@ protected:
DstXprType& m_dstExpr;
};
+// Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the
+// PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used
+// when computing the product.
+
+template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
+class restricted_packet_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn>
+{
+protected:
+ typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;
+ public:
+ typedef typename Base::Scalar Scalar;
+ typedef typename Base::DstXprType DstXprType;
+ typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
+ typedef typename AssignmentTraits::PacketType PacketType;
+
+ EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
+ : Base(dst, src, func, dstExpr)
+ {
+ }
+ };
+
/***************************************************************************
* Part 5 : Entry point for dense rectangular assignment
***************************************************************************/
@@ -734,13 +778,23 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType
resize_if_allowed(dst, src, func);
DstEvaluatorType dstEvaluator(dst);
-
+
typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
dense_assignment_loop<Kernel>::run(kernel);
}
+// Specialization for filling the destination with a constant value.
+#ifndef EIGEN_GPU_COMPILE_PHASE
+template<typename DstXprType>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const Eigen::CwiseNullaryOp<Eigen::internal::scalar_constant_op<typename DstXprType::Scalar>, DstXprType>& src, const internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>& func)
+{
+ resize_if_allowed(dst, src, func);
+ std::fill_n(dst.data(), dst.size(), src.functor()());
+}
+#endif
+
template<typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
{
@@ -756,13 +810,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType
// AssignmentKind must define a Kind typedef.
template<typename DstShape, typename SrcShape> struct AssignmentKind;
-// Assignement kind defined in this file:
+// Assignment kind defined in this file:
struct Dense2Dense {};
struct EigenBase2EigenBase {};
template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
-
+
// This is the main assignment class
template< typename DstXprType, typename SrcXprType, typename Functor,
typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
@@ -787,7 +841,7 @@ void call_assignment(const Dst& dst, const Src& src)
{
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
}
-
+
// Deal with "assume-aliasing"
template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -827,14 +881,35 @@ void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
ActualDstType actualDst(dst);
-
+
// TODO check whether this is the right place to perform these checks:
EIGEN_STATIC_ASSERT_LVALUE(Dst)
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
-
+
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
}
+
+template<typename Dst, typename Src, typename Func>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
+{
+ typedef evaluator<Dst> DstEvaluatorType;
+ typedef evaluator<Src> SrcEvaluatorType;
+ typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Func> Kernel;
+
+ EIGEN_STATIC_ASSERT_LVALUE(Dst)
+ EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
+
+ SrcEvaluatorType srcEvaluator(src);
+ resize_if_allowed(dst, src, func);
+
+ DstEvaluatorType dstEvaluator(dst);
+ Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
+
+ dense_assignment_loop<Kernel>::run(kernel);
+}
+
template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias(Dst& dst, const Src& src)
@@ -875,7 +950,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
#ifndef EIGEN_NO_DEBUG
internal::check_for_aliasing(dst, src);
#endif
-
+
call_dense_assignment_loop(dst, src, func);
}
};
@@ -899,7 +974,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
src.evalTo(dst);
}
- // NOTE The following two functions are templated to avoid their instanciation if not needed
+ // NOTE The following two functions are templated to avoid their instantiation if not needed
// This is needed because some expressions supports evalTo only and/or have 'void' as scalar type.
template<typename SrcScalarType>
EIGEN_DEVICE_FUNC