diff options
Diffstat (limited to 'Eigen/src/Core/products/TriangularMatrixMatrix.h')
-rw-r--r-- | Eigen/src/Core/products/TriangularMatrixMatrix.h | 95 |
1 files changed, 63 insertions, 32 deletions
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 6ec5a8a0b..f0c60507a 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -45,22 +45,24 @@ template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs, - int ResStorageOrder, int Version = Specialized> + int ResStorageOrder, int ResInnerStride, + int Version = Specialized> struct product_triangular_matrix_matrix; template <typename Scalar, typename Index, int Mode, bool LhsIsTriangular, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs, int Version> + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,RowMajor,Version> + RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,Version> { static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, const Scalar* lhs, Index lhsStride, const Scalar* rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) { product_triangular_matrix_matrix<Scalar, Index, @@ -70,18 +72,19 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular, ConjugateRhs, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, - ColMajor> - ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking); + ColMajor, ResInnerStride> + ::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking); } }; // implements col-major += alpha * op(triangular) * op(general) template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs, int Version> + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,ColMajor,Version> + RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version> { typedef gebp_traits<Scalar,Scalar> Traits; @@ -95,20 +98,21 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true, Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking); }; template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs, int Version> + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run( + RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run( Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* _res, Index resStride, + Scalar* _res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) { // strip zeros @@ -119,10 +123,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper; typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper; - typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper; + typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper; LhsMapper lhs(_lhs,lhsStride); RhsMapper rhs(_rhs,rhsStride); - ResMapper res(_res, resStride); + ResMapper res(_res, resStride, resIncr); Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction @@ -137,7 +141,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); - Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert())); + // To work around an "error: member reference base type 'Matrix<...> + // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is + // not a structure or union" compilation error in nvcc (tested V8.0.61), + // create a dummy internal::constructor_without_unaligned_array_assert + // object to pass to the Matrix constructor. + internal::constructor_without_unaligned_array_assert a; + Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a); triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); @@ -145,7 +155,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, triangularBuffer.diagonal().setOnes(); gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; - gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs; gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs; for(Index k2=IsLower ? depth : 0; @@ -216,7 +226,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, for(Index i2=start; i2<end; i2+=mc) { const Index actual_mc = (std::min)(i2+mc,end)-i2; - gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>() + gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder,false>() (blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc); gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, @@ -229,10 +239,11 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, // implements col-major += alpha * op(general) * op(triangular) template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs, int Version> + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> struct product_triangular_matrix_matrix<Scalar,Index,Mode,false, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,ColMajor,Version> + RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version> { typedef gebp_traits<Scalar,Scalar> Traits; enum { @@ -245,20 +256,21 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false, Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* res, Index resStride, + Scalar* res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking); }; template <typename Scalar, typename Index, int Mode, int LhsStorageOrder, bool ConjugateLhs, - int RhsStorageOrder, bool ConjugateRhs, int Version> + int RhsStorageOrder, bool ConjugateRhs, + int ResInnerStride, int Version> EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, LhsStorageOrder,ConjugateLhs, - RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run( + RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run( Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride, const Scalar* _rhs, Index rhsStride, - Scalar* _res, Index resStride, + Scalar* _res, Index resIncr, Index resStride, const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking) { const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar); @@ -270,10 +282,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper; typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper; - typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper; + typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper; LhsMapper lhs(_lhs,lhsStride); RhsMapper rhs(_rhs,rhsStride); - ResMapper res(_res, resStride); + ResMapper res(_res, resStride, resIncr); Index kc = blocking.kc(); // cache block size along the K direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction @@ -284,7 +296,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); - Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert())); + internal::constructor_without_unaligned_array_assert a; + Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a); triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); @@ -292,7 +305,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, triangularBuffer.diagonal().setOnes(); gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; - gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; + gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs; gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs; gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel; @@ -393,7 +406,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false> { template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha) { - typedef typename Dest::Scalar Scalar; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar Scalar; typedef internal::blas_traits<Lhs> LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; @@ -405,8 +420,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false> typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) - * RhsBlasTraits::extractScalarFactor(a_rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs); + Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha; typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; @@ -423,14 +439,29 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false> Mode, LhsIsTriangular, (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, (internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, - (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor> + (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor, Dest::InnerStrideAtCompileTime> ::run( stripedRows, stripedCols, stripedDepth, // sizes &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info - &dst.coeffRef(0,0), dst.outerStride(), // result info + &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info actualAlpha, blocking ); + + // Apply correction if the diagonal is unit and a scalar factor was nested: + if ((Mode&UnitDiag)==UnitDiag) + { + if (LhsIsTriangular && lhs_alpha!=LhsScalar(1)) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize); + } + else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1)) + { + Index diagSize = (std::min)(rhs.rows(),rhs.cols()); + dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize); + } + } } }; |