diff options
Diffstat (limited to 'Eigen/src/Core/Transpose.h')
-rw-r--r-- | Eigen/src/Core/Transpose.h | 115 |
1 files changed, 88 insertions, 27 deletions
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 79b767bcc..2bc658f40 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -11,7 +11,7 @@ #ifndef EIGEN_TRANSPOSE_H #define EIGEN_TRANSPOSE_H -namespace Eigen { +namespace Eigen { namespace internal { template<typename MatrixType> @@ -61,24 +61,27 @@ template<typename MatrixType> class Transpose typedef typename internal::remove_all<MatrixType>::type NestedExpression; EIGEN_DEVICE_FUNC - explicit inline Transpose(MatrixType& matrix) : m_matrix(matrix) {} + explicit EIGEN_STRONG_INLINE Transpose(MatrixType& matrix) : m_matrix(matrix) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose) - EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); } - EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + Index rows() const EIGEN_NOEXCEPT { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + Index cols() const EIGEN_NOEXCEPT { return m_matrix.rows(); } /** \returns the nested expression */ - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename internal::remove_all<MatrixTypeNested>::type& nestedExpression() const { return m_matrix; } /** \returns the nested expression */ - EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::remove_reference<MatrixTypeNested>::type& nestedExpression() { return m_matrix; } /** \internal */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index nrows, Index ncols) { m_matrix.resize(ncols,nrows); } @@ -122,8 +125,10 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense> EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl) - EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); } - EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index innerStride() const { return derived().nestedExpression().innerStride(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index outerStride() const { return derived().nestedExpression().outerStride(); } typedef typename internal::conditional< internal::is_lvalue<MatrixType>::value, @@ -131,21 +136,25 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense> const Scalar >::type ScalarWithConstIfNotLvalue; - EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); } - EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar* data() const { return derived().nestedExpression().data(); } // FIXME: shall we keep the const version of coeffRef? - EIGEN_DEVICE_FUNC - inline const Scalar& coeffRef(Index rowId, Index colId) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar& coeffRef(Index rowId, Index colId) const { return derived().nestedExpression().coeffRef(colId, rowId); } - EIGEN_DEVICE_FUNC - inline const Scalar& coeffRef(Index index) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar& coeffRef(Index index) const { return derived().nestedExpression().coeffRef(index); } + protected: + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TransposeImpl) }; /** \returns an expression of the transpose of *this. @@ -168,7 +177,8 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense> * * \sa transposeInPlace(), adjoint() */ template<typename Derived> -inline Transpose<Derived> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Transpose<Derived> DenseBase<Derived>::transpose() { return TransposeReturnType(derived()); @@ -180,7 +190,8 @@ DenseBase<Derived>::transpose() * * \sa transposeInPlace(), adjoint() */ template<typename Derived> -inline typename DenseBase<Derived>::ConstTransposeReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +typename DenseBase<Derived>::ConstTransposeReturnType DenseBase<Derived>::transpose() const { return ConstTransposeReturnType(derived()); @@ -206,7 +217,7 @@ DenseBase<Derived>::transpose() const * * \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */ template<typename Derived> -inline const typename MatrixBase<Derived>::AdjointReturnType +EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::AdjointReturnType MatrixBase<Derived>::adjoint() const { return AdjointReturnType(this->transpose()); @@ -228,11 +239,10 @@ struct inplace_transpose_selector; template<typename MatrixType> struct inplace_transpose_selector<MatrixType,true,false> { // square matrix static void run(MatrixType& m) { - m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose()); + m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose().template triangularView<StrictlyUpper>()); } }; -// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only. template<typename MatrixType> struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize static void run(MatrixType& m) { @@ -249,16 +259,66 @@ struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x Packet } }; + +template <typename MatrixType, Index Alignment> +void BlockedInPlaceTranspose(MatrixType& m) { + typedef typename MatrixType::Scalar Scalar; + typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet; + const Index PacketSize = internal::packet_traits<Scalar>::size; + eigen_assert(m.rows() == m.cols()); + int row_start = 0; + for (; row_start + PacketSize <= m.rows(); row_start += PacketSize) { + for (int col_start = row_start; col_start + PacketSize <= m.cols(); col_start += PacketSize) { + PacketBlock<Packet> A; + if (row_start == col_start) { + for (Index i=0; i<PacketSize; ++i) + A.packet[i] = m.template packetByOuterInner<Alignment>(row_start + i,col_start); + internal::ptranspose(A); + for (Index i=0; i<PacketSize; ++i) + m.template writePacket<Alignment>(m.rowIndexByOuterInner(row_start + i, col_start), m.colIndexByOuterInner(row_start + i,col_start), A.packet[i]); + } else { + PacketBlock<Packet> B; + for (Index i=0; i<PacketSize; ++i) { + A.packet[i] = m.template packetByOuterInner<Alignment>(row_start + i,col_start); + B.packet[i] = m.template packetByOuterInner<Alignment>(col_start + i, row_start); + } + internal::ptranspose(A); + internal::ptranspose(B); + for (Index i=0; i<PacketSize; ++i) { + m.template writePacket<Alignment>(m.rowIndexByOuterInner(row_start + i, col_start), m.colIndexByOuterInner(row_start + i,col_start), B.packet[i]); + m.template writePacket<Alignment>(m.rowIndexByOuterInner(col_start + i, row_start), m.colIndexByOuterInner(col_start + i,row_start), A.packet[i]); + } + } + } + } + for (Index row = row_start; row < m.rows(); ++row) { + m.matrix().row(row).head(row).swap( + m.matrix().col(row).head(row).transpose()); + } +} + template<typename MatrixType,bool MatchPacketSize> -struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square matrix +struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square or dynamic matrix static void run(MatrixType& m) { - if (m.rows()==m.cols()) - m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose()); - else + typedef typename MatrixType::Scalar Scalar; + if (m.rows() == m.cols()) { + const Index PacketSize = internal::packet_traits<Scalar>::size; + if (!NumTraits<Scalar>::IsComplex && m.rows() >= PacketSize) { + if ((m.rows() % PacketSize) == 0) + BlockedInPlaceTranspose<MatrixType,internal::evaluator<MatrixType>::Alignment>(m); + else + BlockedInPlaceTranspose<MatrixType,Unaligned>(m); + } + else { + m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose().template triangularView<StrictlyUpper>()); + } + } else { m = m.transpose().eval(); + } } }; + } // end namespace internal /** This is the "in place" version of transpose(): it replaces \c *this by its own transpose. @@ -276,12 +336,12 @@ struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non squ * Notice however that this method is only useful if you want to replace a matrix by its own transpose. * If you just need the transpose of a matrix, use transpose(). * - * \note if the matrix is not square, then \c *this must be a resizable matrix. + * \note if the matrix is not square, then \c *this must be a resizable matrix. * This excludes (non-square) fixed-size matrices, block-expressions and maps. * * \sa transpose(), adjoint(), adjointInPlace() */ template<typename Derived> -inline void DenseBase<Derived>::transposeInPlace() +EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::transposeInPlace() { eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic)) && "transposeInPlace() called on a non-square non-resizable matrix"); @@ -312,7 +372,7 @@ inline void DenseBase<Derived>::transposeInPlace() * * \sa transpose(), adjoint(), transposeInPlace() */ template<typename Derived> -inline void MatrixBase<Derived>::adjointInPlace() +EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::adjointInPlace() { derived() = adjoint().eval(); } @@ -391,7 +451,8 @@ struct checkTransposeAliasing_impl<Derived, OtherDerived, false> template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src) { - internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src); + if((!Dst::IsVectorAtCompileTime) && dst.rows()>1 && dst.cols()>1) + internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src); } } // end namespace internal |