diff options
Diffstat (limited to 'Eigen/src/SparseLU')
-rw-r--r-- | Eigen/src/SparseLU/SparseLU.h | 176 | ||||
-rw-r--r-- | Eigen/src/SparseLU/SparseLU_Memory.h | 2 | ||||
-rw-r--r-- | Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h | 78 | ||||
-rw-r--r-- | Eigen/src/SparseLU/SparseLU_column_dfs.h | 4 | ||||
-rw-r--r-- | Eigen/src/SparseLU/SparseLU_gemm_kernel.h | 2 | ||||
-rw-r--r-- | Eigen/src/SparseLU/SparseLU_panel_bmod.h | 2 |
6 files changed, 243 insertions, 21 deletions
diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index f883ab383..0c8d8939b 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -18,6 +18,63 @@ template <typename _MatrixType, typename _OrderingType = COLAMDOrdering<typename template <typename MappedSparseMatrixType> struct SparseLUMatrixLReturnType; template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixUReturnType; +template <bool Conjugate,class SparseLUType> +class SparseLUTransposeView : public SparseSolverBase<SparseLUTransposeView<Conjugate,SparseLUType> > +{ +protected: + typedef SparseSolverBase<SparseLUTransposeView<Conjugate,SparseLUType> > APIBase; + using APIBase::m_isInitialized; +public: + typedef typename SparseLUType::Scalar Scalar; + typedef typename SparseLUType::StorageIndex StorageIndex; + typedef typename SparseLUType::MatrixType MatrixType; + typedef typename SparseLUType::OrderingType OrderingType; + + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + + SparseLUTransposeView() : m_sparseLU(NULL) {} + SparseLUTransposeView(const SparseLUTransposeView& view) { + this->m_sparseLU = view.m_sparseLU; + } + void setIsInitialized(const bool isInitialized) {this->m_isInitialized = isInitialized;} + void setSparseLU(SparseLUType* sparseLU) {m_sparseLU = sparseLU;} + using APIBase::_solve_impl; + template<typename Rhs, typename Dest> + bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const + { + Dest& X(X_base.derived()); + eigen_assert(m_sparseLU->info() == Success && "The matrix should be factorized first"); + EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + + + // this ugly const_cast_derived() helps to detect aliasing when applying the permutations + for(Index j = 0; j < B.cols(); ++j){ + X.col(j) = m_sparseLU->colsPermutation() * B.const_cast_derived().col(j); + } + //Forward substitution with transposed or adjoint of U + m_sparseLU->matrixU().template solveTransposedInPlace<Conjugate>(X); + + //Backward substitution with transposed or adjoint of L + m_sparseLU->matrixL().template solveTransposedInPlace<Conjugate>(X); + + // Permute back the solution + for (Index j = 0; j < B.cols(); ++j) + X.col(j) = m_sparseLU->rowsPermutation().transpose() * X.col(j); + return true; + } + inline Index rows() const { return m_sparseLU->rows(); } + inline Index cols() const { return m_sparseLU->cols(); } + +private: + SparseLUType *m_sparseLU; + SparseLUTransposeView& operator=(const SparseLUTransposeView&); +}; + + /** \ingroup SparseLU_Module * \class SparseLU * @@ -26,7 +83,7 @@ template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixURetu * This class implements the supernodal LU factorization for general matrices. * It uses the main techniques from the sequential SuperLU package * (http://crd-legacy.lbl.gov/~xiaoye/SuperLU/). It handles transparently real - * and complex arithmetics with single and double precision, depending on the + * and complex arithmetic with single and double precision, depending on the * scalar type of your input matrix. * The code has been optimized to provide BLAS-3 operations during supernode-panel updates. * It benefits directly from the built-in high-performant Eigen BLAS routines. @@ -43,8 +100,8 @@ template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixURetu * Simple example with key steps * \code * VectorXd x(n), b(n); - * SparseMatrix<double, ColMajor> A; - * SparseLU<SparseMatrix<scalar, ColMajor>, COLAMDOrdering<Index> > solver; + * SparseMatrix<double> A; + * SparseLU<SparseMatrix<double>, COLAMDOrdering<int> > solver; * // fill A and b; * // Compute the ordering permutation vector from the structural pattern of A * solver.analyzePattern(A); @@ -97,6 +154,7 @@ class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, }; public: + SparseLU():m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) { initperfvalues(); @@ -128,6 +186,45 @@ class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, //Factorize factorize(matrix); } + + /** \returns an expression of the transposed of the factored matrix. + * + * A typical usage is to solve for the transposed problem A^T x = b: + * \code + * solver.compute(A); + * x = solver.transpose().solve(b); + * \endcode + * + * \sa adjoint(), solve() + */ + const SparseLUTransposeView<false,SparseLU<_MatrixType,_OrderingType> > transpose() + { + SparseLUTransposeView<false, SparseLU<_MatrixType,_OrderingType> > transposeView; + transposeView.setSparseLU(this); + transposeView.setIsInitialized(this->m_isInitialized); + return transposeView; + } + + + /** \returns an expression of the adjoint of the factored matrix + * + * A typical usage is to solve for the adjoint problem A' x = b: + * \code + * solver.compute(A); + * x = solver.adjoint().solve(b); + * \endcode + * + * For real scalar types, this function is equivalent to transpose(). + * + * \sa transpose(), solve() + */ + const SparseLUTransposeView<true, SparseLU<_MatrixType,_OrderingType> > adjoint() + { + SparseLUTransposeView<true, SparseLU<_MatrixType,_OrderingType> > adjointView; + adjointView.setSparseLU(this); + adjointView.setIsInitialized(this->m_isInitialized); + return adjointView; + } inline Index rows() const { return m_mat.rows(); } inline Index cols() const { return m_mat.cols(); } @@ -193,7 +290,7 @@ class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, /** \brief Reports whether previous computation was successful. * - * \returns \c Success if computation was succesful, + * \returns \c Success if computation was successful, * \c NumericalIssue if the LU factorization reports a problem, zero diagonal for instance * \c InvalidInput if the input matrix is invalid * @@ -355,6 +452,9 @@ class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, return (m_detPermR * m_detPermC) > 0 ? det : -det; } + Index nnzL() const { return m_nnzL; }; + Index nnzU() const { return m_nnzU; }; + protected: // Functions void initperfvalues() @@ -391,7 +491,6 @@ class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, private: // Disable copy constructor SparseLU (const SparseLU& ); - }; // End class SparseLU @@ -499,11 +598,8 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix) eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices"); - typedef typename IndexVector::Scalar StorageIndex; - m_isInitialized = true; - // Apply the column permutation computed in analyzepattern() // m_mat = matrix * m_perm_c.inverse(); m_mat = matrix; @@ -587,7 +683,6 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix) // (a) a relaxed supernode at the bottom of the etree, or // (b) panel_size contiguous columns, <panel_size> defined by the user Index jcol; - IndexVector panel_histo(n); Index pivrow; // Pivotal row number in the original row matrix Index nseg1; // Number of segments in U-column above panel row jcol Index nseg; // Number of segments in each U-column @@ -706,13 +801,19 @@ struct SparseLUMatrixLReturnType : internal::no_assignment_operator typedef typename MappedSupernodalType::Scalar Scalar; explicit SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL) { } - Index rows() { return m_mapL.rows(); } - Index cols() { return m_mapL.cols(); } + Index rows() const { return m_mapL.rows(); } + Index cols() const { return m_mapL.cols(); } template<typename Dest> void solveInPlace( MatrixBase<Dest> &X) const { m_mapL.solveInPlace(X); } + template<bool Conjugate, typename Dest> + void solveTransposedInPlace( MatrixBase<Dest> &X) const + { + m_mapL.template solveTransposedInPlace<Conjugate>(X); + } + const MappedSupernodalType& m_mapL; }; @@ -723,8 +824,8 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU) : m_mapL(mapL),m_mapU(mapU) { } - Index rows() { return m_mapL.rows(); } - Index cols() { return m_mapL.cols(); } + Index rows() const { return m_mapL.rows(); } + Index cols() const { return m_mapL.cols(); } template<typename Dest> void solveInPlace(MatrixBase<Dest> &X) const { @@ -747,8 +848,9 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator } else { + // FIXME: the following lines should use Block expressions and not Map! Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) ); - Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); + Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X.coeffRef(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); U = A.template triangularView<Upper>().solve(U); } @@ -766,6 +868,52 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator } } // End For U-solve } + + template<bool Conjugate, typename Dest> void solveTransposedInPlace(MatrixBase<Dest> &X) const + { + using numext::conj; + Index nrhs = X.cols(); + Index n = X.rows(); + // Forward solve with U + for (Index k = 0; k <= m_mapL.nsuper(); k++) + { + Index fsupc = m_mapL.supToCol()[k]; + Index lda = m_mapL.colIndexPtr()[fsupc+1] - m_mapL.colIndexPtr()[fsupc]; // leading dimension + Index nsupc = m_mapL.supToCol()[k+1] - fsupc; + Index luptr = m_mapL.colIndexPtr()[fsupc]; + + for (Index j = 0; j < nrhs; ++j) + { + for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++) + { + typename MatrixUType::InnerIterator it(m_mapU, jcol); + for ( ; it; ++it) + { + Index irow = it.index(); + X(jcol, j) -= X(irow, j) * (Conjugate? conj(it.value()): it.value()); + } + } + } + if (nsupc == 1) + { + for (Index j = 0; j < nrhs; j++) + { + X(fsupc, j) /= (Conjugate? conj(m_mapL.valuePtr()[luptr]) : m_mapL.valuePtr()[luptr]); + } + } + else + { + Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) ); + Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); + if(Conjugate) + U = A.adjoint().template triangularView<Lower>().solve(U); + else + U = A.transpose().template triangularView<Lower>().solve(U); + } + }// End For U-solve + } + + const MatrixLType& m_mapL; const MatrixUType& m_mapU; }; diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index 4dc42e87b..349bfd585 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -51,7 +51,7 @@ inline Index LUTempSpace(Index&m, Index& w) /** - * Expand the existing storage to accomodate more fill-ins + * Expand the existing storage to accommodate more fill-ins * \param vec Valid pointer to the vector to allocate or expand * \param[in,out] length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector * \param[in] nbElts Current number of elements in the factors diff --git a/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h index 721e1883b..0be293d17 100644 --- a/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +++ b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h @@ -75,12 +75,12 @@ class MappedSuperNodalMatrix /** * Number of rows */ - Index rows() { return m_row; } + Index rows() const { return m_row; } /** * Number of columns */ - Index cols() { return m_col; } + Index cols() const { return m_col; } /** * Return the array of nonzero values packed by column @@ -156,6 +156,9 @@ class MappedSuperNodalMatrix class InnerIterator; template<typename Dest> void solveInPlace( MatrixBase<Dest>&X) const; + template<bool Conjugate, typename Dest> + void solveTransposedInPlace( MatrixBase<Dest>&X) const; + @@ -294,6 +297,77 @@ void MappedSuperNodalMatrix<Scalar,Index_>::solveInPlace( MatrixBase<Dest>&X) co } } +template<typename Scalar, typename Index_> +template<bool Conjugate, typename Dest> +void MappedSuperNodalMatrix<Scalar,Index_>::solveTransposedInPlace( MatrixBase<Dest>&X) const +{ + using numext::conj; + Index n = int(X.rows()); + Index nrhs = Index(X.cols()); + const Scalar * Lval = valuePtr(); // Nonzero values + Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor> work(n, nrhs); // working vector + work.setZero(); + for (Index k = nsuper(); k >= 0; k--) + { + Index fsupc = supToCol()[k]; // First column of the current supernode + Index istart = rowIndexPtr()[fsupc]; // Pointer index to the subscript of the current column + Index nsupr = rowIndexPtr()[fsupc+1] - istart; // Number of rows in the current supernode + Index nsupc = supToCol()[k+1] - fsupc; // Number of columns in the current supernode + Index nrow = nsupr - nsupc; // Number of rows in the non-diagonal part of the supernode + Index irow; //Current index row + + if (nsupc == 1 ) + { + for (Index j = 0; j < nrhs; j++) + { + InnerIterator it(*this, fsupc); + ++it; // Skip the diagonal element + for (; it; ++it) + { + irow = it.row(); + X(fsupc,j) -= X(irow, j) * (Conjugate?conj(it.value()):it.value()); + } + } + } + else + { + // The supernode has more than one column + Index luptr = colIndexPtr()[fsupc]; + Index lda = colIndexPtr()[fsupc+1] - luptr; + + //Begin Gather + for (Index j = 0; j < nrhs; j++) + { + Index iptr = istart + nsupc; + for (Index i = 0; i < nrow; i++) + { + irow = rowIndex()[iptr]; + work.topRows(nrow)(i,j)= X(irow,j); // Gather operation + iptr++; + } + } + + // Matrix-vector product with transposed submatrix + Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) ); + Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); + if(Conjugate) + U = U - A.adjoint() * work.topRows(nrow); + else + U = U - A.transpose() * work.topRows(nrow); + + // Triangular solve (of transposed diagonal block) + new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > ( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(lda) ); + if(Conjugate) + U = A.adjoint().template triangularView<UnitUpper>().solve(U); + else + U = A.transpose().template triangularView<UnitUpper>().solve(U); + + } + + } +} + + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index c98b30e32..5a2c941b4 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -151,7 +151,7 @@ Index SparseLUImpl<Scalar,StorageIndex>::column_dfs(const Index m, const Index j StorageIndex ito = glu.xlsub(fsupc+1); glu.xlsub(jcolm1) = ito; StorageIndex istop = ito + jptr - jm1ptr; - xprune(jcolm1) = istop; // intialize xprune(jcol-1) + xprune(jcolm1) = istop; // initialize xprune(jcol-1) glu.xlsub(jcol) = istop; for (StorageIndex ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito) @@ -166,7 +166,7 @@ Index SparseLUImpl<Scalar,StorageIndex>::column_dfs(const Index m, const Index j // Tidy up the pointers before exit glu.xsup(nsuper+1) = jcolp1; glu.supno(jcolp1) = nsuper; - xprune(jcol) = StorageIndex(nextl); // Intialize upper bound for pruning + xprune(jcol) = StorageIndex(nextl); // Initialize upper bound for pruning glu.xlsub(jcolp1) = StorageIndex(nextl); return 0; diff --git a/Eigen/src/SparseLU/SparseLU_gemm_kernel.h b/Eigen/src/SparseLU/SparseLU_gemm_kernel.h index 95ba7413f..e37c2fe0d 100644 --- a/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +++ b/Eigen/src/SparseLU/SparseLU_gemm_kernel.h @@ -215,7 +215,7 @@ void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\ pstore(C0+i+(I)*PacketSize, c0); - // agressive vectorization and peeling + // aggressive vectorization and peeling for(Index i=0; i<actual_b_end1; i+=PacketSize*8) { EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL2"); diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 822cf32c3..f052001c8 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -38,7 +38,7 @@ namespace internal { * \brief Performs numeric block updates (sup-panel) in topological order. * * Before entering this routine, the original nonzeros in the panel - * were already copied i nto the spa[m,w] + * were already copied into the spa[m,w] * * \param m number of rows in the matrix * \param w Panel size |