aboutsummaryrefslogtreecommitdiff
path: root/internal/ceres/covariance_impl.cc
diff options
context:
space:
mode:
Diffstat (limited to 'internal/ceres/covariance_impl.cc')
-rw-r--r--internal/ceres/covariance_impl.cc92
1 files changed, 58 insertions, 34 deletions
diff --git a/internal/ceres/covariance_impl.cc b/internal/ceres/covariance_impl.cc
index 1befde7..19d545c 100644
--- a/internal/ceres/covariance_impl.cc
+++ b/internal/ceres/covariance_impl.cc
@@ -38,6 +38,7 @@
#include <utility>
#include <vector>
#include "Eigen/SVD"
+#include "ceres/compressed_col_sparse_matrix_utils.h"
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/covariance.h"
#include "ceres/crs_matrix.h"
@@ -48,7 +49,6 @@
#include "ceres/suitesparse.h"
#include "ceres/wall_time.h"
#include "glog/logging.h"
-#include "SuiteSparseQR.hpp"
namespace ceres {
namespace internal {
@@ -56,6 +56,7 @@ namespace {
// Per thread storage for SuiteSparse.
#ifndef CERES_NO_SUITESPARSE
+
struct PerThreadContext {
explicit PerThreadContext(int num_rows)
: solution(NULL),
@@ -81,6 +82,7 @@ struct PerThreadContext {
cholmod_dense* rhs;
SuiteSparse ss;
};
+
#endif
} // namespace
@@ -604,16 +606,8 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() {
const int num_cols = jacobian.num_cols;
const int num_nonzeros = jacobian.values.size();
- // UF_long is deprecated but SuiteSparse_long is only available in
- // newer versions of SuiteSparse.
-#if (SUITESPARSE_VERSION < 4002)
- vector<UF_long> transpose_rows(num_cols + 1, 0);
- vector<UF_long> transpose_cols(num_nonzeros, 0);
-#else
vector<SuiteSparse_long> transpose_rows(num_cols + 1, 0);
vector<SuiteSparse_long> transpose_cols(num_nonzeros, 0);
-#endif
-
vector<double> transpose_values(num_nonzeros, 0);
for (int idx = 0; idx < num_nonzeros; ++idx) {
@@ -658,23 +652,49 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() {
cholmod_common cc;
cholmod_l_start(&cc);
- SuiteSparseQR_factorization<double>* factor =
- SuiteSparseQR_factorize<double>(SPQR_ORDERING_BESTAMD,
- SPQR_DEFAULT_TOL,
- &cholmod_jacobian,
- &cc);
+ cholmod_sparse* R = NULL;
+ SuiteSparse_long* permutation = NULL;
+
+ // Compute a Q-less QR factorization of the Jacobian. Since we are
+ // only interested in inverting J'J = R'R, we do not need Q. This
+ // saves memory and gives us R as a permuted compressed column
+ // sparse matrix.
+ //
+ // TODO(sameeragarwal): Currently the symbolic factorization and the
+ // numeric factorization is done at the same time, and this does not
+ // explicitly account for the block column and row structure in the
+ // matrix. When using AMD, we have observed in the past that
+ // computing the ordering with the block matrix is significantly
+ // more efficient, both in runtime as well as the quality of
+ // ordering computed. So, it maybe worth doing that analysis
+ // separately.
+ const SuiteSparse_long rank =
+ SuiteSparseQR<double>(SPQR_ORDERING_BESTAMD,
+ SPQR_DEFAULT_TOL,
+ cholmod_jacobian.ncol,
+ &cholmod_jacobian,
+ &R,
+ &permutation,
+ &cc);
event_logger.AddEvent("Numeric Factorization");
+ CHECK_NOTNULL(permutation);
+ CHECK_NOTNULL(R);
- const int rank = cc.SPQR_istat[4];
if (rank < cholmod_jacobian.ncol) {
LOG(WARNING) << "Jacobian matrix is rank deficient."
<< "Number of columns: " << cholmod_jacobian.ncol
<< " rank: " << rank;
- SuiteSparseQR_free(&factor, &cc);
+ delete []permutation;
+ cholmod_l_free_sparse(&R, &cc);
cholmod_l_finish(&cc);
return false;
}
+ vector<int> inverse_permutation(num_cols);
+ for (SuiteSparse_long i = 0; i < num_cols; ++i) {
+ inverse_permutation[permutation[i]] = i;
+ }
+
const int* rows = covariance_matrix_->rows();
const int* cols = covariance_matrix_->cols();
double* values = covariance_matrix_->mutable_values();
@@ -688,35 +708,39 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() {
//
// Since the covariance matrix is symmetric, the i^th row and column
// are equal.
+ const int num_threads = options_.num_threads;
+ scoped_array<double> workspace(new double[num_threads * num_cols]);
- cholmod_dense* rhs = cholmod_l_zeros(num_cols, 1, CHOLMOD_REAL, &cc);
- double* rhs_x = reinterpret_cast<double*>(rhs->x);
-
+#pragma omp parallel for num_threads(num_threads) schedule(dynamic)
for (int r = 0; r < num_cols; ++r) {
- int row_begin = rows[r];
- int row_end = rows[r + 1];
+ const int row_begin = rows[r];
+ const int row_end = rows[r + 1];
if (row_end == row_begin) {
continue;
}
- rhs_x[r] = 1.0;
-
- cholmod_dense* y1 = SuiteSparseQR_solve<double>(SPQR_RTX_EQUALS_ETB, factor, rhs, &cc);
- cholmod_dense* solution = SuiteSparseQR_solve<double>(SPQR_RETX_EQUALS_B, factor, y1, &cc);
+# ifdef CERES_USE_OPENMP
+ int thread_id = omp_get_thread_num();
+# else
+ int thread_id = 0;
+# endif
- double* solution_x = reinterpret_cast<double*>(solution->x);
+ double* solution = workspace.get() + thread_id * num_cols;
+ SolveRTRWithSparseRHS<SuiteSparse_long>(
+ num_cols,
+ static_cast<SuiteSparse_long*>(R->i),
+ static_cast<SuiteSparse_long*>(R->p),
+ static_cast<double*>(R->x),
+ inverse_permutation[r],
+ solution);
for (int idx = row_begin; idx < row_end; ++idx) {
- const int c = cols[idx];
- values[idx] = solution_x[c];
+ const int c = cols[idx];
+ values[idx] = solution[inverse_permutation[c]];
}
-
- cholmod_l_free_dense(&y1, &cc);
- cholmod_l_free_dense(&solution, &cc);
- rhs_x[r] = 0.0;
}
- cholmod_l_free_dense(&rhs, &cc);
- SuiteSparseQR_free(&factor, &cc);
+ delete []permutation;
+ cholmod_l_free_sparse(&R, &cc);
cholmod_l_finish(&cc);
event_logger.AddEvent("Inversion");
return true;