diff options
Diffstat (limited to 'internal/ceres/covariance_impl.cc')
-rw-r--r-- | internal/ceres/covariance_impl.cc | 92 |
1 files changed, 58 insertions, 34 deletions
diff --git a/internal/ceres/covariance_impl.cc b/internal/ceres/covariance_impl.cc index 1befde7..19d545c 100644 --- a/internal/ceres/covariance_impl.cc +++ b/internal/ceres/covariance_impl.cc @@ -38,6 +38,7 @@ #include <utility> #include <vector> #include "Eigen/SVD" +#include "ceres/compressed_col_sparse_matrix_utils.h" #include "ceres/compressed_row_sparse_matrix.h" #include "ceres/covariance.h" #include "ceres/crs_matrix.h" @@ -48,7 +49,6 @@ #include "ceres/suitesparse.h" #include "ceres/wall_time.h" #include "glog/logging.h" -#include "SuiteSparseQR.hpp" namespace ceres { namespace internal { @@ -56,6 +56,7 @@ namespace { // Per thread storage for SuiteSparse. #ifndef CERES_NO_SUITESPARSE + struct PerThreadContext { explicit PerThreadContext(int num_rows) : solution(NULL), @@ -81,6 +82,7 @@ struct PerThreadContext { cholmod_dense* rhs; SuiteSparse ss; }; + #endif } // namespace @@ -604,16 +606,8 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() { const int num_cols = jacobian.num_cols; const int num_nonzeros = jacobian.values.size(); - // UF_long is deprecated but SuiteSparse_long is only available in - // newer versions of SuiteSparse. -#if (SUITESPARSE_VERSION < 4002) - vector<UF_long> transpose_rows(num_cols + 1, 0); - vector<UF_long> transpose_cols(num_nonzeros, 0); -#else vector<SuiteSparse_long> transpose_rows(num_cols + 1, 0); vector<SuiteSparse_long> transpose_cols(num_nonzeros, 0); -#endif - vector<double> transpose_values(num_nonzeros, 0); for (int idx = 0; idx < num_nonzeros; ++idx) { @@ -658,23 +652,49 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() { cholmod_common cc; cholmod_l_start(&cc); - SuiteSparseQR_factorization<double>* factor = - SuiteSparseQR_factorize<double>(SPQR_ORDERING_BESTAMD, - SPQR_DEFAULT_TOL, - &cholmod_jacobian, - &cc); + cholmod_sparse* R = NULL; + SuiteSparse_long* permutation = NULL; + + // Compute a Q-less QR factorization of the Jacobian. Since we are + // only interested in inverting J'J = R'R, we do not need Q. This + // saves memory and gives us R as a permuted compressed column + // sparse matrix. + // + // TODO(sameeragarwal): Currently the symbolic factorization and the + // numeric factorization is done at the same time, and this does not + // explicitly account for the block column and row structure in the + // matrix. When using AMD, we have observed in the past that + // computing the ordering with the block matrix is significantly + // more efficient, both in runtime as well as the quality of + // ordering computed. So, it maybe worth doing that analysis + // separately. + const SuiteSparse_long rank = + SuiteSparseQR<double>(SPQR_ORDERING_BESTAMD, + SPQR_DEFAULT_TOL, + cholmod_jacobian.ncol, + &cholmod_jacobian, + &R, + &permutation, + &cc); event_logger.AddEvent("Numeric Factorization"); + CHECK_NOTNULL(permutation); + CHECK_NOTNULL(R); - const int rank = cc.SPQR_istat[4]; if (rank < cholmod_jacobian.ncol) { LOG(WARNING) << "Jacobian matrix is rank deficient." << "Number of columns: " << cholmod_jacobian.ncol << " rank: " << rank; - SuiteSparseQR_free(&factor, &cc); + delete []permutation; + cholmod_l_free_sparse(&R, &cc); cholmod_l_finish(&cc); return false; } + vector<int> inverse_permutation(num_cols); + for (SuiteSparse_long i = 0; i < num_cols; ++i) { + inverse_permutation[permutation[i]] = i; + } + const int* rows = covariance_matrix_->rows(); const int* cols = covariance_matrix_->cols(); double* values = covariance_matrix_->mutable_values(); @@ -688,35 +708,39 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() { // // Since the covariance matrix is symmetric, the i^th row and column // are equal. + const int num_threads = options_.num_threads; + scoped_array<double> workspace(new double[num_threads * num_cols]); - cholmod_dense* rhs = cholmod_l_zeros(num_cols, 1, CHOLMOD_REAL, &cc); - double* rhs_x = reinterpret_cast<double*>(rhs->x); - +#pragma omp parallel for num_threads(num_threads) schedule(dynamic) for (int r = 0; r < num_cols; ++r) { - int row_begin = rows[r]; - int row_end = rows[r + 1]; + const int row_begin = rows[r]; + const int row_end = rows[r + 1]; if (row_end == row_begin) { continue; } - rhs_x[r] = 1.0; - - cholmod_dense* y1 = SuiteSparseQR_solve<double>(SPQR_RTX_EQUALS_ETB, factor, rhs, &cc); - cholmod_dense* solution = SuiteSparseQR_solve<double>(SPQR_RETX_EQUALS_B, factor, y1, &cc); +# ifdef CERES_USE_OPENMP + int thread_id = omp_get_thread_num(); +# else + int thread_id = 0; +# endif - double* solution_x = reinterpret_cast<double*>(solution->x); + double* solution = workspace.get() + thread_id * num_cols; + SolveRTRWithSparseRHS<SuiteSparse_long>( + num_cols, + static_cast<SuiteSparse_long*>(R->i), + static_cast<SuiteSparse_long*>(R->p), + static_cast<double*>(R->x), + inverse_permutation[r], + solution); for (int idx = row_begin; idx < row_end; ++idx) { - const int c = cols[idx]; - values[idx] = solution_x[c]; + const int c = cols[idx]; + values[idx] = solution[inverse_permutation[c]]; } - - cholmod_l_free_dense(&y1, &cc); - cholmod_l_free_dense(&solution, &cc); - rhs_x[r] = 0.0; } - cholmod_l_free_dense(&rhs, &cc); - SuiteSparseQR_free(&factor, &cc); + delete []permutation; + cholmod_l_free_sparse(&R, &cc); cholmod_l_finish(&cc); event_logger.AddEvent("Inversion"); return true; |