aboutsummaryrefslogtreecommitdiff
path: root/Eigen/src/Core/PartialReduxEvaluator.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/PartialReduxEvaluator.h')
-rw-r--r--Eigen/src/Core/PartialReduxEvaluator.h232
1 files changed, 232 insertions, 0 deletions
diff --git a/Eigen/src/Core/PartialReduxEvaluator.h b/Eigen/src/Core/PartialReduxEvaluator.h
new file mode 100644
index 000000000..29abf35b9
--- /dev/null
+++ b/Eigen/src/Core/PartialReduxEvaluator.h
@@ -0,0 +1,232 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PARTIALREDUX_H
+#define EIGEN_PARTIALREDUX_H
+
+namespace Eigen {
+
+namespace internal {
+
+
+/***************************************************************************
+*
+* This file provides evaluators for partial reductions.
+* There are two modes:
+*
+* - scalar path: simply calls the respective function on the column or row.
+* -> nothing special here, all the tricky part is handled by the return
+* types of VectorwiseOp's members. They embed the functor calling the
+* respective DenseBase's member function.
+*
+* - vectorized path: implements a packet-wise reductions followed by
+* some (optional) processing of the outcome, e.g., division by n for mean.
+*
+* For the vectorized path let's observe that the packet-size and outer-unrolling
+* are both decided by the assignement logic. So all we have to do is to decide
+* on the inner unrolling.
+*
+* For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h,
+* but be need to be careful to specify correct increment.
+*
+***************************************************************************/
+
+
+/* logic deciding a strategy for unrolling of vectorized paths */
+template<typename Func, typename Evaluator>
+struct packetwise_redux_traits
+{
+ enum {
+ OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime,
+ Cost = OuterSize == Dynamic ? HugeCost
+ : OuterSize * Evaluator::CoeffReadCost + (OuterSize-1) * functor_traits<Func>::Cost,
+ Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling
+ };
+
+};
+
+/* Value to be returned when size==0 , by default let's return 0 */
+template<typename PacketType,typename Func>
+EIGEN_DEVICE_FUNC
+PacketType packetwise_redux_empty_value(const Func& ) { return pset1<PacketType>(0); }
+
+/* For products the default is 1 */
+template<typename PacketType,typename Scalar>
+EIGEN_DEVICE_FUNC
+PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) { return pset1<PacketType>(1); }
+
+/* Perform the actual reduction */
+template<typename Func, typename Evaluator,
+ int Unrolling = packetwise_redux_traits<Func, Evaluator>::Unrolling
+>
+struct packetwise_redux_impl;
+
+/* Perform the actual reduction with unrolling */
+template<typename Func, typename Evaluator>
+struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling>
+{
+ typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
+ typedef typename Evaluator::Scalar Scalar;
+
+ template<typename PacketType>
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
+ PacketType run(const Evaluator &eval, const Func& func, Index /*size*/)
+ {
+ return redux_vec_unroller<Func, Evaluator, 0, packetwise_redux_traits<Func, Evaluator>::OuterSize>::template run<PacketType>(eval,func);
+ }
+};
+
+/* Add a specialization of redux_vec_unroller for size==0 at compiletime.
+ * This specialization is not required for general reductions, which is
+ * why it is defined here.
+ */
+template<typename Func, typename Evaluator, int Start>
+struct redux_vec_unroller<Func, Evaluator, Start, 0>
+{
+ template<typename PacketType>
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE PacketType run(const Evaluator &, const Func& f)
+ {
+ return packetwise_redux_empty_value<PacketType>(f);
+ }
+};
+
+/* Perform the actual reduction for dynamic sizes */
+template<typename Func, typename Evaluator>
+struct packetwise_redux_impl<Func, Evaluator, NoUnrolling>
+{
+ typedef typename Evaluator::Scalar Scalar;
+ typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
+
+ template<typename PacketType>
+ EIGEN_DEVICE_FUNC
+ static PacketType run(const Evaluator &eval, const Func& func, Index size)
+ {
+ if(size==0)
+ return packetwise_redux_empty_value<PacketType>(func);
+
+ const Index size4 = (size-1)&(~3);
+ PacketType p = eval.template packetByOuterInner<Unaligned,PacketType>(0,0);
+ Index i = 1;
+ // This loop is optimized for instruction pipelining:
+ // - each iteration generates two independent instructions
+ // - thanks to branch prediction and out-of-order execution we have independent instructions across loops
+ for(; i<size4; i+=4)
+ p = func.packetOp(p,
+ func.packetOp(
+ func.packetOp(eval.template packetByOuterInner<Unaligned,PacketType>(i+0,0),eval.template packetByOuterInner<Unaligned,PacketType>(i+1,0)),
+ func.packetOp(eval.template packetByOuterInner<Unaligned,PacketType>(i+2,0),eval.template packetByOuterInner<Unaligned,PacketType>(i+3,0))));
+ for(; i<size; ++i)
+ p = func.packetOp(p, eval.template packetByOuterInner<Unaligned,PacketType>(i,0));
+ return p;
+ }
+};
+
+template< typename ArgType, typename MemberOp, int Direction>
+struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
+ : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >
+{
+ typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
+ typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested;
+ typedef typename internal::add_const_on_value_type<ArgTypeNested>::type ConstArgTypeNested;
+ typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
+ typedef typename ArgType::Scalar InputScalar;
+ typedef typename XprType::Scalar Scalar;
+ enum {
+ TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime)
+ };
+ typedef typename MemberOp::template Cost<int(TraversalSize)> CostOpType;
+ enum {
+ CoeffReadCost = TraversalSize==Dynamic ? HugeCost
+ : TraversalSize==0 ? 1
+ : int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value),
+
+ _ArgFlags = evaluator<ArgType>::Flags,
+
+ _Vectorizable = bool(int(_ArgFlags)&PacketAccessBit)
+ && bool(MemberOp::Vectorizable)
+ && (Direction==int(Vertical) ? bool(_ArgFlags&RowMajorBit) : (_ArgFlags&RowMajorBit)==0)
+ && (TraversalSize!=0),
+
+ Flags = (traits<XprType>::Flags&RowMajorBit)
+ | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit)))
+ | (_Vectorizable ? PacketAccessBit : 0)
+ | LinearAccessBit,
+
+ Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
+ };
+
+ EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr)
+ : m_arg(xpr.nestedExpression()), m_functor(xpr.functor())
+ {
+ EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : (TraversalSize==0 ? 1 : int(CostOpType::value)));
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+ }
+
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const Scalar coeff(Index i, Index j) const
+ {
+ return coeff(Direction==Vertical ? j : i);
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const Scalar coeff(Index index) const
+ {
+ return m_functor(m_arg.template subVector<DirectionType(Direction)>(index));
+ }
+
+ template<int LoadMode,typename PacketType>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ PacketType packet(Index i, Index j) const
+ {
+ return packet<LoadMode,PacketType>(Direction==Vertical ? j : i);
+ }
+
+ template<int LoadMode,typename PacketType>
+ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+ PacketType packet(Index idx) const
+ {
+ enum { PacketSize = internal::unpacket_traits<PacketType>::size };
+ typedef Block<const ArgTypeNestedCleaned,
+ Direction==Vertical ? int(ArgType::RowsAtCompileTime) : int(PacketSize),
+ Direction==Vertical ? int(PacketSize) : int(ArgType::ColsAtCompileTime),
+ true /* InnerPanel */> PanelType;
+
+ PanelType panel(m_arg,
+ Direction==Vertical ? 0 : idx,
+ Direction==Vertical ? idx : 0,
+ Direction==Vertical ? m_arg.rows() : Index(PacketSize),
+ Direction==Vertical ? Index(PacketSize) : m_arg.cols());
+
+ // FIXME
+ // See bug 1612, currently if PacketSize==1 (i.e. complex<double> with 128bits registers) then the storage-order of panel get reversed
+ // and methods like packetByOuterInner do not make sense anymore in this context.
+ // So let's just by pass "vectorization" in this case:
+ if(PacketSize==1)
+ return internal::pset1<PacketType>(coeff(idx));
+
+ typedef typename internal::redux_evaluator<PanelType> PanelEvaluator;
+ PanelEvaluator panel_eval(panel);
+ typedef typename MemberOp::BinaryOp BinaryOp;
+ PacketType p = internal::packetwise_redux_impl<BinaryOp,PanelEvaluator>::template run<PacketType>(panel_eval,m_functor.binaryFunc(),m_arg.outerSize());
+ return p;
+ }
+
+protected:
+ ConstArgTypeNested m_arg;
+ const MemberOp m_functor;
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PARTIALREDUX_H