From 399f7d09e0c45af54b77b4ab9508d6f23759b927 Mon Sep 17 00:00:00 2001 From: Scott Ettinger Date: Mon, 9 Sep 2013 12:54:43 -0700 Subject: Update Ceres to the latest version Bug: 10673139 Bug: 10621282 Change-Id: Ib740a6e0e29049cc203da9f083b0d4f5734a2741 --- internal/ceres/CMakeLists.txt | 54 +-- internal/ceres/blas.cc | 78 ++++ internal/ceres/blas.h | 379 +------------------ internal/ceres/block_jacobi_preconditioner.cc | 2 +- internal/ceres/block_sparse_matrix.cc | 2 +- internal/ceres/c_api.cc | 5 +- .../ceres/compressed_col_sparse_matrix_utils.h | 75 ++++ .../compressed_col_sparse_matrix_utils_test.cc | 87 +++++ internal/ceres/covariance_impl.cc | 92 +++-- internal/ceres/covariance_test.cc | 7 + internal/ceres/cxsparse.h | 5 + internal/ceres/dense_normal_cholesky_solver.cc | 69 +++- internal/ceres/dense_normal_cholesky_solver.h | 12 + internal/ceres/dense_qr_solver.cc | 81 +++- internal/ceres/dense_qr_solver.h | 14 + internal/ceres/implicit_schur_complement.cc | 2 +- internal/ceres/implicit_schur_complement_test.cc | 4 +- .../ceres/iterative_schur_complement_solver.cc | 23 +- .../iterative_schur_complement_solver_test.cc | 15 +- internal/ceres/lapack.cc | 157 ++++++++ internal/ceres/lapack.h | 88 +++++ internal/ceres/line_search.cc | 2 +- internal/ceres/line_search.h | 2 +- internal/ceres/linear_solver.h | 6 +- internal/ceres/miniglog/glog/logging.cc | 39 -- internal/ceres/miniglog/glog/logging.h | 274 +++++++------- internal/ceres/partitioned_matrix_view.cc | 2 +- internal/ceres/preconditioner.h | 4 +- internal/ceres/program_evaluator.h | 2 +- internal/ceres/residual_block.cc | 3 +- internal/ceres/schur_complement_solver.cc | 46 +-- internal/ceres/schur_complement_solver.h | 4 - internal/ceres/schur_complement_solver_test.cc | 58 ++- internal/ceres/schur_eliminator_impl.h | 3 +- internal/ceres/schur_eliminator_test.cc | 6 +- internal/ceres/schur_jacobi_preconditioner.cc | 2 +- internal/ceres/small_blas.h | 406 +++++++++++++++++++++ internal/ceres/small_blas_test.cc | 303 +++++++++++++++ internal/ceres/solver.cc | 14 +- internal/ceres/solver_impl.cc | 85 ++++- internal/ceres/solver_impl.h | 7 + internal/ceres/solver_impl_test.cc | 51 ++- internal/ceres/sparse_normal_cholesky_solver.cc | 22 +- internal/ceres/sparse_normal_cholesky_solver.h | 4 - internal/ceres/suitesparse.h | 14 + internal/ceres/system_test.cc | 26 +- internal/ceres/trust_region_minimizer.cc | 51 ++- internal/ceres/types.cc | 37 +- internal/ceres/unsymmetric_linear_solver_test.cc | 30 +- internal/ceres/visibility.cc | 2 +- .../ceres/visibility_based_preconditioner_test.cc | 2 +- 51 files changed, 2007 insertions(+), 751 deletions(-) create mode 100644 internal/ceres/blas.cc create mode 100644 internal/ceres/lapack.cc create mode 100644 internal/ceres/lapack.h delete mode 100644 internal/ceres/miniglog/glog/logging.cc create mode 100644 internal/ceres/small_blas.h create mode 100644 internal/ceres/small_blas_test.cc (limited to 'internal/ceres') diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt index 9e2e1ae..610e816 100644 --- a/internal/ceres/CMakeLists.txt +++ b/internal/ceres/CMakeLists.txt @@ -30,6 +30,7 @@ SET(CERES_INTERNAL_SRC array_utils.cc + blas.cc block_evaluate_preparer.cc block_jacobi_preconditioner.cc block_jacobian_writer.cc @@ -64,6 +65,7 @@ SET(CERES_INTERNAL_SRC incomplete_lq_factorization.cc iterative_schur_complement_solver.cc levenberg_marquardt_strategy.cc + lapack.cc line_search.cc line_search_direction.cc line_search_minimizer.cc @@ -130,20 +132,14 @@ ELSE (SCHUR_SPECIALIZATIONS) ENDIF (SCHUR_SPECIALIZATIONS) # For Android, use the internal Glog implementation. -IF (BUILD_ANDROID) - ADD_LIBRARY(miniglog STATIC - miniglog/glog/logging.cc) - - # The Android logging library that defines e.g. __android_log_print is - # creatively named "log". - TARGET_LINK_LIBRARIES(miniglog log) - +IF (MINIGLOG) + ADD_LIBRARY(miniglog STATIC miniglog/glog/logging.cc) INSTALL(TARGETS miniglog EXPORT CeresExport RUNTIME DESTINATION bin LIBRARY DESTINATION lib${LIB_SUFFIX} ARCHIVE DESTINATION lib${LIB_SUFFIX}) -ENDIF (BUILD_ANDROID) +ENDIF (MINIGLOG) SET(CERES_LIBRARY_DEPENDENCIES ${GLOG_LIB}) @@ -166,18 +162,20 @@ IF (SUITESPARSE_FOUND) LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${METIS_LIB}) ENDIF (EXISTS ${METIS_LIB}) - LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${LAPACK_LIB}) - - IF (EXISTS ${BLAS_LIB}) - LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${BLAS_LIB}) - ENDIF (EXISTS ${BLAS_LIB}) - IF (TBB_FOUND) LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${TBB_LIB}) LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${TBB_MALLOC_LIB}) ENDIF (TBB_FOUND) ENDIF (SUITESPARSE_FOUND) +IF (CXSPARSE_FOUND) + LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${CXSPARSE_LIB}) +ENDIF (CXSPARSE_FOUND) + +IF (BLAS_AND_LAPACK_FOUND) + LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${LAPACK_LIBRARIES}) + LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${BLAS_LIBRARIES}) +ENDIF (BLAS_AND_LAPACK_FOUND) IF (CXSPARSE_FOUND) LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${CXSPARSE_LIB}) @@ -194,7 +192,11 @@ SET(CERES_LIBRARY_SOURCE ${CERES_INTERNAL_HDRS} ${CERES_INTERNAL_SCHUR_FILES}) -ADD_LIBRARY(ceres STATIC ${CERES_LIBRARY_SOURCE}) +ADD_LIBRARY(ceres ${CERES_LIBRARY_SOURCE}) +SET_TARGET_PROPERTIES(ceres PROPERTIES + VERSION ${CERES_VERSION} + SOVERSION ${CERES_VERSION_MAJOR} +) TARGET_LINK_LIBRARIES(ceres ${CERES_LIBRARY_DEPENDENCIES}) INSTALL(TARGETS ceres @@ -203,23 +205,6 @@ INSTALL(TARGETS ceres LIBRARY DESTINATION lib${LIB_SUFFIX} ARCHIVE DESTINATION lib${LIB_SUFFIX}) -# Don't build a DLL on MSVC. Supporting Ceres as a DLL on Windows involves -# nontrivial changes that we haven't made yet. -IF (NOT MSVC AND NOT BUILD_ANDROID AND BUILD_SHARED) - ADD_LIBRARY(ceres_shared SHARED ${CERES_LIBRARY_SOURCE}) - TARGET_LINK_LIBRARIES(ceres_shared ${CERES_LIBRARY_DEPENDENCIES}) - SET_TARGET_PROPERTIES(ceres_shared PROPERTIES - VERSION ${CERES_VERSION} - SOVERSION ${CERES_ABI_VERSION}) - - INSTALL(TARGETS ceres_shared - EXPORT CeresExport - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib${LIB_SUFFIX} - ARCHIVE DESTINATION lib${LIB_SUFFIX}) - -ENDIF (NOT MSVC AND NOT BUILD_ANDROID AND BUILD_SHARED) - IF (BUILD_TESTING AND GFLAGS) ADD_LIBRARY(gtest gmock_gtest_all.cc gmock_main.cc) ADD_LIBRARY(test_util @@ -228,6 +213,7 @@ IF (BUILD_TESTING AND GFLAGS) test_util.cc) TARGET_LINK_LIBRARIES(gtest ${GFLAGS_LIB} ${GLOG_LIB}) + TARGET_LINK_LIBRARIES(test_util ceres gtest ${GLOG_LIB}) MACRO (CERES_TEST NAME) ADD_EXECUTABLE(${NAME}_test ${NAME}_test.cc) @@ -242,7 +228,6 @@ IF (BUILD_TESTING AND GFLAGS) CERES_TEST(autodiff) CERES_TEST(autodiff_cost_function) CERES_TEST(autodiff_local_parameterization) - CERES_TEST(blas) CERES_TEST(block_random_access_crs_matrix) CERES_TEST(block_random_access_dense_matrix) CERES_TEST(block_random_access_sparse_matrix) @@ -285,6 +270,7 @@ IF (BUILD_TESTING AND GFLAGS) CERES_TEST(runtime_numeric_diff_cost_function) CERES_TEST(schur_complement_solver) CERES_TEST(schur_eliminator) + CERES_TEST(small_blas) CERES_TEST(solver_impl) # TODO(sameeragarwal): This test should ultimately be made diff --git a/internal/ceres/blas.cc b/internal/ceres/blas.cc new file mode 100644 index 0000000..f79b1eb --- /dev/null +++ b/internal/ceres/blas.cc @@ -0,0 +1,78 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2013 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) + +#include "ceres/blas.h" +#include "glog/logging.h" + +extern "C" void dsyrk_(char* uplo, + char* trans, + int* n, + int* k, + double* alpha, + double* a, + int* lda, + double* beta, + double* c, + int* ldc); + +namespace ceres { +namespace internal { + +void BLAS::SymmetricRankKUpdate(int num_rows, + int num_cols, + const double* a, + bool transpose, + double alpha, + double beta, + double* c) { +#ifdef CERES_NO_LAPACK + LOG(FATAL) << "Ceres was built without a BLAS library."; +#else + char uplo = 'L'; + char trans = transpose ? 'T' : 'N'; + int n = transpose ? num_cols : num_rows; + int k = transpose ? num_rows : num_cols; + int lda = k; + int ldc = n; + dsyrk_(&uplo, + &trans, + &n, + &k, + &alpha, + const_cast(a), + &lda, + &beta, + c, + &ldc); +#endif +} + +} // namespace internal +} // namespace ceres diff --git a/internal/ceres/blas.h b/internal/ceres/blas.h index 9629b3d..2ab6663 100644 --- a/internal/ceres/blas.h +++ b/internal/ceres/blas.h @@ -28,377 +28,28 @@ // // Author: sameeragarwal@google.com (Sameer Agarwal) // -// Simple blas functions for use in the Schur Eliminator. These are -// fairly basic implementations which already yield a significant -// speedup in the eliminator performance. +// Wrapper functions around BLAS functions. #ifndef CERES_INTERNAL_BLAS_H_ #define CERES_INTERNAL_BLAS_H_ -#include "ceres/internal/eigen.h" -#include "glog/logging.h" - namespace ceres { namespace internal { -// Remove the ".noalias()" annotation from the matrix matrix -// mutliplies to produce a correct build with the Android NDK, -// including versions 6, 7, 8, and 8b, when built with STLPort and the -// non-standalone toolchain (i.e. ndk-build). This appears to be a -// compiler bug; if the workaround is not in place, the line -// -// block.noalias() -= A * B; -// -// gets compiled to -// -// block.noalias() += A * B; -// -// which breaks schur elimination. Introducing a temporary by removing the -// .noalias() annotation causes the issue to disappear. Tracking this -// issue down was tricky, since the test suite doesn't run when built with -// the non-standalone toolchain. -// -// TODO(keir): Make a reproduction case for this and send it upstream. -#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG -#define CERES_MAYBE_NOALIAS -#else -#define CERES_MAYBE_NOALIAS .noalias() -#endif - -// The following three macros are used to share code and reduce -// template junk across the various GEMM variants. -#define CERES_GEMM_BEGIN(name) \ - template \ - inline void name(const double* A, \ - const int num_row_a, \ - const int num_col_a, \ - const double* B, \ - const int num_row_b, \ - const int num_col_b, \ - double* C, \ - const int start_row_c, \ - const int start_col_c, \ - const int row_stride_c, \ - const int col_stride_c) - -#define CERES_GEMM_NAIVE_HEADER \ - DCHECK_GT(num_row_a, 0); \ - DCHECK_GT(num_col_a, 0); \ - DCHECK_GT(num_row_b, 0); \ - DCHECK_GT(num_col_b, 0); \ - DCHECK_GE(start_row_c, 0); \ - DCHECK_GE(start_col_c, 0); \ - DCHECK_GT(row_stride_c, 0); \ - DCHECK_GT(col_stride_c, 0); \ - DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); \ - DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); \ - DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b)); \ - DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b)); \ - const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); \ - const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); \ - const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b); \ - const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b); - -#define CERES_GEMM_EIGEN_HEADER \ - const typename EigenTypes::ConstMatrixRef \ - Aref(A, num_row_a, num_col_a); \ - const typename EigenTypes::ConstMatrixRef \ - Bref(B, num_row_b, num_col_b); \ - MatrixRef Cref(C, row_stride_c, col_stride_c); \ - -#define CERES_CALL_GEMM(name) \ - name( \ - A, num_row_a, num_col_a, \ - B, num_row_b, num_col_b, \ - C, start_row_c, start_col_c, row_stride_c, col_stride_c); - - -// For the matrix-matrix functions below, there are three variants for -// each functionality. Foo, FooNaive and FooEigen. Foo is the one to -// be called by the user. FooNaive is a basic loop based -// implementation and FooEigen uses Eigen's implementation. Foo -// chooses between FooNaive and FooEigen depending on how many of the -// template arguments are fixed at compile time. Currently, FooEigen -// is called if all matrix dimensions are compile time -// constants. FooNaive is called otherwise. This leads to the best -// performance currently. -// -// The MatrixMatrixMultiply variants compute: -// -// C op A * B; -// -// The MatrixTransposeMatrixMultiply variants compute: -// -// C op A' * B -// -// where op can be +=, -=, or =. -// -// The template parameters (kRowA, kColA, kRowB, kColB) allow -// specialization of the loop at compile time. If this information is -// not available, then Eigen::Dynamic should be used as the template -// argument. -// -// kOperation = 1 -> C += A * B -// kOperation = -1 -> C -= A * B -// kOperation = 0 -> C = A * B -// -// The functions can write into matrices C which are larger than the -// matrix A * B. This is done by specifying the true size of C via -// row_stride_c and col_stride_c, and then indicating where A * B -// should be written into by start_row_c and start_col_c. -// -// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c = -// 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get -// -// ------------ -// ------------ -// ------------ -// ------------ -// -----xxxx--- -// -----xxxx--- -// -----xxxx--- -// ------------ -// ------------ -// ------------ -// -CERES_GEMM_BEGIN(MatrixMatrixMultiplyEigen) { - CERES_GEMM_EIGEN_HEADER - Eigen::Block - block(Cref, start_row_c, start_col_c, num_row_a, num_col_b); - - if (kOperation > 0) { - block CERES_MAYBE_NOALIAS += Aref * Bref; - } else if (kOperation < 0) { - block CERES_MAYBE_NOALIAS -= Aref * Bref; - } else { - block CERES_MAYBE_NOALIAS = Aref * Bref; - } -} - -CERES_GEMM_BEGIN(MatrixMatrixMultiplyNaive) { - CERES_GEMM_NAIVE_HEADER - DCHECK_EQ(NUM_COL_A, NUM_ROW_B); - - const int NUM_ROW_C = NUM_ROW_A; - const int NUM_COL_C = NUM_COL_B; - DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c); - DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c); - - for (int row = 0; row < NUM_ROW_C; ++row) { - for (int col = 0; col < NUM_COL_C; ++col) { - double tmp = 0.0; - for (int k = 0; k < NUM_COL_A; ++k) { - tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col]; - } - - const int index = (row + start_row_c) * col_stride_c + start_col_c + col; - if (kOperation > 0) { - C[index] += tmp; - } else if (kOperation < 0) { - C[index] -= tmp; - } else { - C[index] = tmp; - } - } - } -} - -CERES_GEMM_BEGIN(MatrixMatrixMultiply) { -#ifdef CERES_NO_CUSTOM_BLAS - - CERES_CALL_GEMM(MatrixMatrixMultiplyEigen) - return; - -#else - - if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic && - kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) { - CERES_CALL_GEMM(MatrixMatrixMultiplyEigen) - } else { - CERES_CALL_GEMM(MatrixMatrixMultiplyNaive) - } - -#endif -} - -CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyEigen) { - CERES_GEMM_EIGEN_HEADER - Eigen::Block block(Cref, - start_row_c, start_col_c, - num_col_a, num_col_b); - if (kOperation > 0) { - block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref; - } else if (kOperation < 0) { - block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref; - } else { - block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref; - } -} - -CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyNaive) { - CERES_GEMM_NAIVE_HEADER - DCHECK_EQ(NUM_ROW_A, NUM_ROW_B); - - const int NUM_ROW_C = NUM_COL_A; - const int NUM_COL_C = NUM_COL_B; - DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c); - DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c); - - for (int row = 0; row < NUM_ROW_C; ++row) { - for (int col = 0; col < NUM_COL_C; ++col) { - double tmp = 0.0; - for (int k = 0; k < NUM_ROW_A; ++k) { - tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col]; - } - - const int index = (row + start_row_c) * col_stride_c + start_col_c + col; - if (kOperation > 0) { - C[index]+= tmp; - } else if (kOperation < 0) { - C[index]-= tmp; - } else { - C[index]= tmp; - } - } - } -} - -CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiply) { -#ifdef CERES_NO_CUSTOM_BLAS - - CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen) - return; - -#else - - if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic && - kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) { - CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen) - } else { - CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyNaive) - } - -#endif -} - -// Matrix-Vector multiplication -// -// c op A * b; -// -// where op can be +=, -=, or =. -// -// The template parameters (kRowA, kColA) allow specialization of the -// loop at compile time. If this information is not available, then -// Eigen::Dynamic should be used as the template argument. -// -// kOperation = 1 -> c += A' * b -// kOperation = -1 -> c -= A' * b -// kOperation = 0 -> c = A' * b -template -inline void MatrixVectorMultiply(const double* A, - const int num_row_a, - const int num_col_a, - const double* b, - double* c) { -#ifdef CERES_NO_CUSTOM_BLAS - const typename EigenTypes::ConstMatrixRef - Aref(A, num_row_a, num_col_a); - const typename EigenTypes::ConstVectorRef bref(b, num_col_a); - typename EigenTypes::VectorRef cref(c, num_row_a); - - // lazyProduct works better than .noalias() for matrix-vector - // products. - if (kOperation > 0) { - cref += Aref.lazyProduct(bref); - } else if (kOperation < 0) { - cref -= Aref.lazyProduct(bref); - } else { - cref = Aref.lazyProduct(bref); - } -#else - - DCHECK_GT(num_row_a, 0); - DCHECK_GT(num_col_a, 0); - DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); - DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); - - const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); - const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); - - for (int row = 0; row < NUM_ROW_A; ++row) { - double tmp = 0.0; - for (int col = 0; col < NUM_COL_A; ++col) { - tmp += A[row * NUM_COL_A + col] * b[col]; - } - - if (kOperation > 0) { - c[row] += tmp; - } else if (kOperation < 0) { - c[row] -= tmp; - } else { - c[row] = tmp; - } - } -#endif // CERES_NO_CUSTOM_BLAS -} - -// Similar to MatrixVectorMultiply, except that A is transposed, i.e., -// -// c op A' * b; -template -inline void MatrixTransposeVectorMultiply(const double* A, - const int num_row_a, - const int num_col_a, - const double* b, - double* c) { -#ifdef CERES_NO_CUSTOM_BLAS - const typename EigenTypes::ConstMatrixRef - Aref(A, num_row_a, num_col_a); - const typename EigenTypes::ConstVectorRef bref(b, num_row_a); - typename EigenTypes::VectorRef cref(c, num_col_a); - - // lazyProduct works better than .noalias() for matrix-vector - // products. - if (kOperation > 0) { - cref += Aref.transpose().lazyProduct(bref); - } else if (kOperation < 0) { - cref -= Aref.transpose().lazyProduct(bref); - } else { - cref = Aref.transpose().lazyProduct(bref); - } -#else - - DCHECK_GT(num_row_a, 0); - DCHECK_GT(num_col_a, 0); - DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); - DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); - - const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); - const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); - - for (int row = 0; row < NUM_COL_A; ++row) { - double tmp = 0.0; - for (int col = 0; col < NUM_ROW_A; ++col) { - tmp += A[col * NUM_COL_A + row] * b[col]; - } - - if (kOperation > 0) { - c[row] += tmp; - } else if (kOperation < 0) { - c[row] -= tmp; - } else { - c[row] = tmp; - } - } -#endif // CERES_NO_CUSTOM_BLAS -} - - -#undef CERES_MAYBE_NOALIAS -#undef CERES_GEMM_BEGIN -#undef CERES_GEMM_EIGEN_HEADER -#undef CERES_GEMM_NAIVE_HEADER -#undef CERES_CALL_GEMM +class BLAS { + public: + // transpose = true : c = alpha * a'a + beta * c; + // transpose = false : c = alpha * aa' + beta * c; + // + // Assumes column major matrices. + static void SymmetricRankKUpdate(int num_rows, + int num_cols, + const double* a, + bool transpose, + double alpha, + double beta, + double* c); +}; } // namespace internal } // namespace ceres diff --git a/internal/ceres/block_jacobi_preconditioner.cc b/internal/ceres/block_jacobi_preconditioner.cc index 749e0b6..29974d4 100644 --- a/internal/ceres/block_jacobi_preconditioner.cc +++ b/internal/ceres/block_jacobi_preconditioner.cc @@ -111,7 +111,7 @@ bool BlockJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A, } block = block.selfadjointView() - .ldlt() + .llt() .solve(Matrix::Identity(size, size)); } return true; diff --git a/internal/ceres/block_sparse_matrix.cc b/internal/ceres/block_sparse_matrix.cc index fdd762c..a487262 100644 --- a/internal/ceres/block_sparse_matrix.cc +++ b/internal/ceres/block_sparse_matrix.cc @@ -33,9 +33,9 @@ #include #include #include -#include "ceres/blas.h" #include "ceres/block_structure.h" #include "ceres/internal/eigen.h" +#include "ceres/small_blas.h" #include "ceres/triplet_sparse_matrix.h" #include "glog/logging.h" diff --git a/internal/ceres/c_api.cc b/internal/ceres/c_api.cc index 02bc129..1fd01c9 100644 --- a/internal/ceres/c_api.cc +++ b/internal/ceres/c_api.cc @@ -49,7 +49,8 @@ using ceres::Problem; void ceres_init() { // This is not ideal, but it's not clear what to do if there is no gflags and // no access to command line arguments. - google::InitGoogleLogging(""); + char message[] = ""; + google::InitGoogleLogging(message); } ceres_problem_t* ceres_create_problem() { @@ -172,7 +173,7 @@ ceres_residual_block_id_t* ceres_problem_add_residual_block( void ceres_solve(ceres_problem_t* c_problem) { Problem* problem = reinterpret_cast(c_problem); - + // TODO(keir): Obviously, this way of setting options won't scale or last. // Instead, figure out a way to specify some of the options without // duplicating everything. diff --git a/internal/ceres/compressed_col_sparse_matrix_utils.h b/internal/ceres/compressed_col_sparse_matrix_utils.h index afabf1c..c8de2a1 100644 --- a/internal/ceres/compressed_col_sparse_matrix_utils.h +++ b/internal/ceres/compressed_col_sparse_matrix_utils.h @@ -61,6 +61,81 @@ void BlockOrderingToScalarOrdering(const vector& blocks, const vector& block_ordering, vector* scalar_ordering); +// Solve the linear system +// +// R * solution = rhs +// +// Where R is an upper triangular compressed column sparse matrix. +template +void SolveUpperTriangularInPlace(IntegerType num_cols, + const IntegerType* rows, + const IntegerType* cols, + const double* values, + double* rhs_and_solution) { + for (IntegerType c = num_cols - 1; c >= 0; --c) { + rhs_and_solution[c] /= values[cols[c + 1] - 1]; + for (IntegerType idx = cols[c]; idx < cols[c + 1] - 1; ++idx) { + const IntegerType r = rows[idx]; + const double v = values[idx]; + rhs_and_solution[r] -= v * rhs_and_solution[c]; + } + } +} + +// Solve the linear system +// +// R' * solution = rhs +// +// Where R is an upper triangular compressed column sparse matrix. +template +void SolveUpperTriangularTransposeInPlace(IntegerType num_cols, + const IntegerType* rows, + const IntegerType* cols, + const double* values, + double* rhs_and_solution) { + for (IntegerType c = 0; c < num_cols; ++c) { + for (IntegerType idx = cols[c]; idx < cols[c + 1] - 1; ++idx) { + const IntegerType r = rows[idx]; + const double v = values[idx]; + rhs_and_solution[c] -= v * rhs_and_solution[r]; + } + rhs_and_solution[c] = rhs_and_solution[c] / values[cols[c + 1] - 1]; + } +} + +// Given a upper triangular matrix R in compressed column form, solve +// the linear system, +// +// R'R x = b +// +// Where b is all zeros except for rhs_nonzero_index, where it is +// equal to one. +// +// The function exploits this knowledge to reduce the number of +// floating point operations. +template +void SolveRTRWithSparseRHS(IntegerType num_cols, + const IntegerType* rows, + const IntegerType* cols, + const double* values, + const int rhs_nonzero_index, + double* solution) { + fill(solution, solution + num_cols, 0.0); + solution[rhs_nonzero_index] = 1.0 / values[cols[rhs_nonzero_index + 1] - 1]; + + for (IntegerType c = rhs_nonzero_index + 1; c < num_cols; ++c) { + for (IntegerType idx = cols[c]; idx < cols[c + 1] - 1; ++idx) { + const IntegerType r = rows[idx]; + if (r < rhs_nonzero_index) continue; + const double v = values[idx]; + solution[c] -= v * solution[r]; + } + solution[c] = solution[c] / values[cols[c + 1] - 1]; + } + + SolveUpperTriangularInPlace(num_cols, rows, cols, values, solution); +} + } // namespace internal } // namespace ceres diff --git a/internal/ceres/compressed_col_sparse_matrix_utils_test.cc b/internal/ceres/compressed_col_sparse_matrix_utils_test.cc index e810837..3faf06c 100644 --- a/internal/ceres/compressed_col_sparse_matrix_utils_test.cc +++ b/internal/ceres/compressed_col_sparse_matrix_utils_test.cc @@ -193,5 +193,92 @@ TEST(_, ScalarMatrixToBlockMatrix) { ss.Free(ccsm.release()); } +class SolveUpperTriangularTest : public ::testing::Test { + protected: + void SetUp() { + cols.resize(5); + rows.resize(7); + values.resize(7); + + cols[0] = 0; + rows[0] = 0; + values[0] = 0.50754; + + cols[1] = 1; + rows[1] = 1; + values[1] = 0.80483; + + cols[2] = 2; + rows[2] = 1; + values[2] = 0.14120; + rows[3] = 2; + values[3] = 0.3; + + cols[3] = 4; + rows[4] = 0; + values[4] = 0.77696; + rows[5] = 1; + values[5] = 0.41860; + rows[6] = 3; + values[6] = 0.88979; + + cols[4] = 7; + } + + vector cols; + vector rows; + vector values; +}; + +TEST_F(SolveUpperTriangularTest, SolveInPlace) { + double rhs_and_solution[] = {1.0, 1.0, 2.0, 2.0}; + const double expected[] = { -1.4706, -1.0962, 6.6667, 2.2477}; + + SolveUpperTriangularInPlace(cols.size() - 1, + &rows[0], + &cols[0], + &values[0], + rhs_and_solution); + + for (int i = 0; i < 4; ++i) { + EXPECT_NEAR(rhs_and_solution[i], expected[i], 1e-4) << i; + } +} + +TEST_F(SolveUpperTriangularTest, TransposeSolveInPlace) { + double rhs_and_solution[] = {1.0, 1.0, 2.0, 2.0}; + double expected[] = {1.970288, 1.242498, 6.081864, -0.057255}; + + SolveUpperTriangularTransposeInPlace(cols.size() - 1, + &rows[0], + &cols[0], + &values[0], + rhs_and_solution); + + for (int i = 0; i < 4; ++i) { + EXPECT_NEAR(rhs_and_solution[i], expected[i], 1e-4) << i; + } +} + +TEST_F(SolveUpperTriangularTest, RTRSolveWithSparseRHS) { + double solution[4]; + double expected[] = { 6.8420e+00, 1.0057e+00, -1.4907e-16, -1.9335e+00, + 1.0057e+00, 2.2275e+00, -1.9493e+00, -6.5693e-01, + -1.4907e-16, -1.9493e+00, 1.1111e+01, 9.7381e-17, + -1.9335e+00, -6.5693e-01, 9.7381e-17, 1.2631e+00 }; + + for (int i = 0; i < 4; ++i) { + SolveRTRWithSparseRHS(cols.size() - 1, + &rows[0], + &cols[0], + &values[0], + i, + solution); + for (int j = 0; j < 4; ++j) { + EXPECT_NEAR(solution[j], expected[4 * i + j], 1e-3) << i; + } + } +} + } // namespace internal } // namespace ceres diff --git a/internal/ceres/covariance_impl.cc b/internal/ceres/covariance_impl.cc index 1befde7..19d545c 100644 --- a/internal/ceres/covariance_impl.cc +++ b/internal/ceres/covariance_impl.cc @@ -38,6 +38,7 @@ #include #include #include "Eigen/SVD" +#include "ceres/compressed_col_sparse_matrix_utils.h" #include "ceres/compressed_row_sparse_matrix.h" #include "ceres/covariance.h" #include "ceres/crs_matrix.h" @@ -48,7 +49,6 @@ #include "ceres/suitesparse.h" #include "ceres/wall_time.h" #include "glog/logging.h" -#include "SuiteSparseQR.hpp" namespace ceres { namespace internal { @@ -56,6 +56,7 @@ namespace { // Per thread storage for SuiteSparse. #ifndef CERES_NO_SUITESPARSE + struct PerThreadContext { explicit PerThreadContext(int num_rows) : solution(NULL), @@ -81,6 +82,7 @@ struct PerThreadContext { cholmod_dense* rhs; SuiteSparse ss; }; + #endif } // namespace @@ -604,16 +606,8 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() { const int num_cols = jacobian.num_cols; const int num_nonzeros = jacobian.values.size(); - // UF_long is deprecated but SuiteSparse_long is only available in - // newer versions of SuiteSparse. -#if (SUITESPARSE_VERSION < 4002) - vector transpose_rows(num_cols + 1, 0); - vector transpose_cols(num_nonzeros, 0); -#else vector transpose_rows(num_cols + 1, 0); vector transpose_cols(num_nonzeros, 0); -#endif - vector transpose_values(num_nonzeros, 0); for (int idx = 0; idx < num_nonzeros; ++idx) { @@ -658,23 +652,49 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() { cholmod_common cc; cholmod_l_start(&cc); - SuiteSparseQR_factorization* factor = - SuiteSparseQR_factorize(SPQR_ORDERING_BESTAMD, - SPQR_DEFAULT_TOL, - &cholmod_jacobian, - &cc); + cholmod_sparse* R = NULL; + SuiteSparse_long* permutation = NULL; + + // Compute a Q-less QR factorization of the Jacobian. Since we are + // only interested in inverting J'J = R'R, we do not need Q. This + // saves memory and gives us R as a permuted compressed column + // sparse matrix. + // + // TODO(sameeragarwal): Currently the symbolic factorization and the + // numeric factorization is done at the same time, and this does not + // explicitly account for the block column and row structure in the + // matrix. When using AMD, we have observed in the past that + // computing the ordering with the block matrix is significantly + // more efficient, both in runtime as well as the quality of + // ordering computed. So, it maybe worth doing that analysis + // separately. + const SuiteSparse_long rank = + SuiteSparseQR(SPQR_ORDERING_BESTAMD, + SPQR_DEFAULT_TOL, + cholmod_jacobian.ncol, + &cholmod_jacobian, + &R, + &permutation, + &cc); event_logger.AddEvent("Numeric Factorization"); + CHECK_NOTNULL(permutation); + CHECK_NOTNULL(R); - const int rank = cc.SPQR_istat[4]; if (rank < cholmod_jacobian.ncol) { LOG(WARNING) << "Jacobian matrix is rank deficient." << "Number of columns: " << cholmod_jacobian.ncol << " rank: " << rank; - SuiteSparseQR_free(&factor, &cc); + delete []permutation; + cholmod_l_free_sparse(&R, &cc); cholmod_l_finish(&cc); return false; } + vector inverse_permutation(num_cols); + for (SuiteSparse_long i = 0; i < num_cols; ++i) { + inverse_permutation[permutation[i]] = i; + } + const int* rows = covariance_matrix_->rows(); const int* cols = covariance_matrix_->cols(); double* values = covariance_matrix_->mutable_values(); @@ -688,35 +708,39 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() { // // Since the covariance matrix is symmetric, the i^th row and column // are equal. + const int num_threads = options_.num_threads; + scoped_array workspace(new double[num_threads * num_cols]); - cholmod_dense* rhs = cholmod_l_zeros(num_cols, 1, CHOLMOD_REAL, &cc); - double* rhs_x = reinterpret_cast(rhs->x); - +#pragma omp parallel for num_threads(num_threads) schedule(dynamic) for (int r = 0; r < num_cols; ++r) { - int row_begin = rows[r]; - int row_end = rows[r + 1]; + const int row_begin = rows[r]; + const int row_end = rows[r + 1]; if (row_end == row_begin) { continue; } - rhs_x[r] = 1.0; - - cholmod_dense* y1 = SuiteSparseQR_solve(SPQR_RTX_EQUALS_ETB, factor, rhs, &cc); - cholmod_dense* solution = SuiteSparseQR_solve(SPQR_RETX_EQUALS_B, factor, y1, &cc); +# ifdef CERES_USE_OPENMP + int thread_id = omp_get_thread_num(); +# else + int thread_id = 0; +# endif - double* solution_x = reinterpret_cast(solution->x); + double* solution = workspace.get() + thread_id * num_cols; + SolveRTRWithSparseRHS( + num_cols, + static_cast(R->i), + static_cast(R->p), + static_cast(R->x), + inverse_permutation[r], + solution); for (int idx = row_begin; idx < row_end; ++idx) { - const int c = cols[idx]; - values[idx] = solution_x[c]; + const int c = cols[idx]; + values[idx] = solution[inverse_permutation[c]]; } - - cholmod_l_free_dense(&y1, &cc); - cholmod_l_free_dense(&solution, &cc); - rhs_x[r] = 0.0; } - cholmod_l_free_dense(&rhs, &cc); - SuiteSparseQR_free(&factor, &cc); + delete []permutation; + cholmod_l_free_sparse(&R, &cc); cholmod_l_finish(&cc); event_logger.AddEvent("Inversion"); return true; diff --git a/internal/ceres/covariance_test.cc b/internal/ceres/covariance_test.cc index e7d25a1..f3a5051 100644 --- a/internal/ceres/covariance_test.cc +++ b/internal/ceres/covariance_test.cc @@ -499,6 +499,9 @@ TEST_F(CovarianceTest, ConstantParameterBlock) { #ifndef CERES_NO_SUITESPARSE options.algorithm_type = SPARSE_CHOLESKY; ComputeAndCompareCovarianceBlocks(options, expected_covariance); + + options.algorithm_type = SPARSE_QR; + ComputeAndCompareCovarianceBlocks(options, expected_covariance); #endif options.algorithm_type = DENSE_SVD; @@ -552,6 +555,9 @@ TEST_F(CovarianceTest, LocalParameterization) { #ifndef CERES_NO_SUITESPARSE options.algorithm_type = SPARSE_CHOLESKY; ComputeAndCompareCovarianceBlocks(options, expected_covariance); + + options.algorithm_type = SPARSE_QR; + ComputeAndCompareCovarianceBlocks(options, expected_covariance); #endif options.algorithm_type = DENSE_SVD; @@ -776,6 +782,7 @@ class LargeScaleCovarianceTest : public ::testing::Test { TEST_F(LargeScaleCovarianceTest, Parallel) { ComputeAndCompare(SPARSE_CHOLESKY, 4); + ComputeAndCompare(SPARSE_QR, 4); } #endif // !defined(CERES_NO_SUITESPARSE) && defined(CERES_USE_OPENMP) diff --git a/internal/ceres/cxsparse.h b/internal/ceres/cxsparse.h index 6004301..cd87908 100644 --- a/internal/ceres/cxsparse.h +++ b/internal/ceres/cxsparse.h @@ -125,6 +125,11 @@ class CXSparse { } // namespace internal } // namespace ceres +#else // CERES_NO_CXSPARSE + +class CXSparse {}; +typedef void cs_dis; + #endif // CERES_NO_CXSPARSE #endif // CERES_INTERNAL_CXSPARSE_H_ diff --git a/internal/ceres/dense_normal_cholesky_solver.cc b/internal/ceres/dense_normal_cholesky_solver.cc index 96f5511..fbf3cbe 100644 --- a/internal/ceres/dense_normal_cholesky_solver.cc +++ b/internal/ceres/dense_normal_cholesky_solver.cc @@ -33,9 +33,11 @@ #include #include "Eigen/Dense" +#include "ceres/blas.h" #include "ceres/dense_sparse_matrix.h" #include "ceres/internal/eigen.h" #include "ceres/internal/scoped_ptr.h" +#include "ceres/lapack.h" #include "ceres/linear_solver.h" #include "ceres/types.h" #include "ceres/wall_time.h" @@ -52,6 +54,18 @@ LinearSolver::Summary DenseNormalCholeskySolver::SolveImpl( const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double* x) { + if (options_.dense_linear_algebra_library_type == EIGEN) { + return SolveUsingEigen(A, b, per_solve_options, x); + } else { + return SolveUsingLAPACK(A, b, per_solve_options, x); + } +} + +LinearSolver::Summary DenseNormalCholeskySolver::SolveUsingEigen( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x) { EventLogger event_logger("DenseNormalCholeskySolver::Solve"); const int num_rows = A->num_rows(); @@ -62,6 +76,7 @@ LinearSolver::Summary DenseNormalCholeskySolver::SolveImpl( lhs.setZero(); event_logger.AddEvent("Setup"); + // lhs += A'A // // Using rankUpdate instead of GEMM, exposes the fact that its the @@ -76,16 +91,66 @@ LinearSolver::Summary DenseNormalCholeskySolver::SolveImpl( ConstVectorRef D(per_solve_options.D, num_cols); lhs += D.array().square().matrix().asDiagonal(); } + event_logger.AddEvent("Product"); LinearSolver::Summary summary; summary.num_iterations = 1; summary.termination_type = TOLERANCE; VectorRef(x, num_cols) = - lhs.selfadjointView().ldlt().solve(rhs); + lhs.selfadjointView().llt().solve(rhs); event_logger.AddEvent("Solve"); - return summary; } +LinearSolver::Summary DenseNormalCholeskySolver::SolveUsingLAPACK( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x) { + EventLogger event_logger("DenseNormalCholeskySolver::Solve"); + + if (per_solve_options.D != NULL) { + // Temporarily append a diagonal block to the A matrix, but undo + // it before returning the matrix to the user. + A->AppendDiagonal(per_solve_options.D); + } + + const int num_cols = A->num_cols(); + Matrix lhs(num_cols, num_cols); + event_logger.AddEvent("Setup"); + + // lhs = A'A + // + // Note: This is a bit delicate, it assumes that the stride on this + // matrix is the same as the number of rows. + BLAS::SymmetricRankKUpdate(A->num_rows(), + num_cols, + A->values(), + true, + 1.0, + 0.0, + lhs.data()); + + if (per_solve_options.D != NULL) { + // Undo the modifications to the matrix A. + A->RemoveDiagonal(); + } + + // TODO(sameeragarwal): Replace this with a gemv call for true blasness. + // rhs = A'b + VectorRef(x, num_cols) = + A->matrix().transpose() * ConstVectorRef(b, A->num_rows()); + event_logger.AddEvent("Product"); + + const int info = LAPACK::SolveInPlaceUsingCholesky(num_cols, lhs.data(), x); + event_logger.AddEvent("Solve"); + + LinearSolver::Summary summary; + summary.num_iterations = 1; + summary.termination_type = info == 0 ? TOLERANCE : FAILURE; + + event_logger.AddEvent("TearDown"); + return summary; +} } // namespace internal } // namespace ceres diff --git a/internal/ceres/dense_normal_cholesky_solver.h b/internal/ceres/dense_normal_cholesky_solver.h index de47740..e35053f 100644 --- a/internal/ceres/dense_normal_cholesky_solver.h +++ b/internal/ceres/dense_normal_cholesky_solver.h @@ -85,6 +85,18 @@ class DenseNormalCholeskySolver: public DenseSparseMatrixSolver { const LinearSolver::PerSolveOptions& per_solve_options, double* x); + LinearSolver::Summary SolveUsingLAPACK( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x); + + LinearSolver::Summary SolveUsingEigen( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x); + const LinearSolver::Options options_; CERES_DISALLOW_COPY_AND_ASSIGN(DenseNormalCholeskySolver); }; diff --git a/internal/ceres/dense_qr_solver.cc b/internal/ceres/dense_qr_solver.cc index 1fb9709..d76d58b 100644 --- a/internal/ceres/dense_qr_solver.cc +++ b/internal/ceres/dense_qr_solver.cc @@ -30,12 +30,13 @@ #include "ceres/dense_qr_solver.h" -#include +#include #include "Eigen/Dense" #include "ceres/dense_sparse_matrix.h" #include "ceres/internal/eigen.h" #include "ceres/internal/scoped_ptr.h" +#include "ceres/lapack.h" #include "ceres/linear_solver.h" #include "ceres/types.h" #include "ceres/wall_time.h" @@ -44,13 +45,87 @@ namespace ceres { namespace internal { DenseQRSolver::DenseQRSolver(const LinearSolver::Options& options) - : options_(options) {} + : options_(options) { + work_.resize(1); +} LinearSolver::Summary DenseQRSolver::SolveImpl( DenseSparseMatrix* A, const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double* x) { + if (options_.dense_linear_algebra_library_type == EIGEN) { + return SolveUsingEigen(A, b, per_solve_options, x); + } else { + return SolveUsingLAPACK(A, b, per_solve_options, x); + } +} +LinearSolver::Summary DenseQRSolver::SolveUsingLAPACK( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x) { + EventLogger event_logger("DenseQRSolver::Solve"); + + const int num_rows = A->num_rows(); + const int num_cols = A->num_cols(); + + if (per_solve_options.D != NULL) { + // Temporarily append a diagonal block to the A matrix, but undo + // it before returning the matrix to the user. + A->AppendDiagonal(per_solve_options.D); + } + + // TODO(sameeragarwal): Since we are copying anyways, the diagonal + // can be appended to the matrix instead of doing it on A. + lhs_ = A->matrix(); + + if (per_solve_options.D != NULL) { + // Undo the modifications to the matrix A. + A->RemoveDiagonal(); + } + + // rhs = [b;0] to account for the additional rows in the lhs. + if (rhs_.rows() != lhs_.rows()) { + rhs_.resize(lhs_.rows()); + } + rhs_.setZero(); + rhs_.head(num_rows) = ConstVectorRef(b, num_rows); + + if (work_.rows() == 1) { + const int work_size = + LAPACK::EstimateWorkSizeForQR(lhs_.rows(), lhs_.cols()); + VLOG(3) << "Working memory for Dense QR factorization: " + << work_size * sizeof(double); + work_.resize(work_size); + } + + const int info = LAPACK::SolveUsingQR(lhs_.rows(), + lhs_.cols(), + lhs_.data(), + work_.rows(), + work_.data(), + rhs_.data()); + event_logger.AddEvent("Solve"); + + LinearSolver::Summary summary; + summary.num_iterations = 1; + if (info == 0) { + VectorRef(x, num_cols) = rhs_.head(num_cols); + summary.termination_type = TOLERANCE; + } else { + summary.termination_type = FAILURE; + } + + event_logger.AddEvent("TearDown"); + return summary; +} + +LinearSolver::Summary DenseQRSolver::SolveUsingEigen( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x) { EventLogger event_logger("DenseQRSolver::Solve"); const int num_rows = A->num_rows(); @@ -73,7 +148,7 @@ LinearSolver::Summary DenseQRSolver::SolveImpl( event_logger.AddEvent("Setup"); // Solve the system. - VectorRef(x, num_cols) = A->matrix().colPivHouseholderQr().solve(rhs_); + VectorRef(x, num_cols) = A->matrix().householderQr().solve(rhs_); event_logger.AddEvent("Solve"); if (per_solve_options.D != NULL) { diff --git a/internal/ceres/dense_qr_solver.h b/internal/ceres/dense_qr_solver.h index f78fa72..e745c63 100644 --- a/internal/ceres/dense_qr_solver.h +++ b/internal/ceres/dense_qr_solver.h @@ -90,8 +90,22 @@ class DenseQRSolver: public DenseSparseMatrixSolver { const LinearSolver::PerSolveOptions& per_solve_options, double* x); + LinearSolver::Summary SolveUsingEigen( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x); + + LinearSolver::Summary SolveUsingLAPACK( + DenseSparseMatrix* A, + const double* b, + const LinearSolver::PerSolveOptions& per_solve_options, + double* x); + const LinearSolver::Options options_; + ColMajorMatrix lhs_; Vector rhs_; + Vector work_; CERES_DISALLOW_COPY_AND_ASSIGN(DenseQRSolver); }; diff --git a/internal/ceres/implicit_schur_complement.cc b/internal/ceres/implicit_schur_complement.cc index 7c934fb..32722bb 100644 --- a/internal/ceres/implicit_schur_complement.cc +++ b/internal/ceres/implicit_schur_complement.cc @@ -161,7 +161,7 @@ void ImplicitSchurComplement::AddDiagonalAndInvert( m = m .selfadjointView() - .ldlt() + .llt() .solve(Matrix::Identity(row_block_size, row_block_size)); } } diff --git a/internal/ceres/implicit_schur_complement_test.cc b/internal/ceres/implicit_schur_complement_test.cc index bd36672..1694273 100644 --- a/internal/ceres/implicit_schur_complement_test.cc +++ b/internal/ceres/implicit_schur_complement_test.cc @@ -109,7 +109,7 @@ class ImplicitSchurComplementTest : public ::testing::Test { solution->setZero(); VectorRef schur_solution(solution->data() + num_cols_ - num_schur_rows, num_schur_rows); - schur_solution = lhs->selfadjointView().ldlt().solve(*rhs); + schur_solution = lhs->selfadjointView().llt().solve(*rhs); eliminator->BackSubstitute(A_.get(), b_.get(), D, schur_solution.data(), solution->data()); } @@ -156,7 +156,7 @@ class ImplicitSchurComplementTest : public ::testing::Test { // Reference solution to the f_block. const Vector reference_f_sol = - lhs.selfadjointView().ldlt().solve(rhs); + lhs.selfadjointView().llt().solve(rhs); // Backsubstituted solution from the implicit schur solver using the // reference solution to the f_block. diff --git a/internal/ceres/iterative_schur_complement_solver.cc b/internal/ceres/iterative_schur_complement_solver.cc index d39d7db..1aac565 100644 --- a/internal/ceres/iterative_schur_complement_solver.cc +++ b/internal/ceres/iterative_schur_complement_solver.cc @@ -78,6 +78,17 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl( } schur_complement_->Init(*A, per_solve_options.D, b); + const int num_schur_complement_blocks = + A->block_structure()->cols.size() - options_.elimination_groups[0]; + if (num_schur_complement_blocks == 0) { + VLOG(2) << "No parameter blocks left in the schur complement."; + LinearSolver::Summary cg_summary; + cg_summary.num_iterations = 0; + cg_summary.termination_type = TOLERANCE; + schur_complement_->BackSubstitute(NULL, x); + return cg_summary; + } + // Initialize the solution to the Schur complement system to zero. // // TODO(sameeragarwal): There maybe a better initialization than an @@ -97,8 +108,8 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl( Preconditioner::Options preconditioner_options; preconditioner_options.type = options_.preconditioner_type; - preconditioner_options.sparse_linear_algebra_library = - options_.sparse_linear_algebra_library; + preconditioner_options.sparse_linear_algebra_library_type = + options_.sparse_linear_algebra_library_type; preconditioner_options.num_threads = options_.num_threads; preconditioner_options.row_block_size = options_.row_block_size; preconditioner_options.e_block_size = options_.e_block_size; @@ -116,16 +127,16 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl( case SCHUR_JACOBI: if (preconditioner_.get() == NULL) { preconditioner_.reset( - new SchurJacobiPreconditioner( - *A->block_structure(), preconditioner_options)); + new SchurJacobiPreconditioner(*A->block_structure(), + preconditioner_options)); } break; case CLUSTER_JACOBI: case CLUSTER_TRIDIAGONAL: if (preconditioner_.get() == NULL) { preconditioner_.reset( - new VisibilityBasedPreconditioner( - *A->block_structure(), preconditioner_options)); + new VisibilityBasedPreconditioner(*A->block_structure(), + preconditioner_options)); } break; default: diff --git a/internal/ceres/iterative_schur_complement_solver_test.cc b/internal/ceres/iterative_schur_complement_solver_test.cc index 86e7825..db45741 100644 --- a/internal/ceres/iterative_schur_complement_solver_test.cc +++ b/internal/ceres/iterative_schur_complement_solver_test.cc @@ -58,9 +58,9 @@ const double kEpsilon = 1e-14; class IterativeSchurComplementSolverTest : public ::testing::Test { protected : - virtual void SetUp() { + void SetUpProblem(int problem_id) { scoped_ptr problem( - CreateLinearLeastSquaresProblemFromId(2)); + CreateLinearLeastSquaresProblemFromId(problem_id)); CHECK_NOTNULL(problem.get()); A_.reset(down_cast(problem->A.release())); @@ -90,7 +90,9 @@ class IterativeSchurComplementSolverTest : public ::testing::Test { qr->Solve(&dense_A, b_.get(), per_solve_options, reference_solution.data()); options.elimination_groups.push_back(num_eliminate_blocks_); + options.elimination_groups.push_back(0); options.max_num_iterations = num_cols_; + options.preconditioner_type = SCHUR_JACOBI; IterativeSchurComplementSolver isc(options); Vector isc_sol(num_cols_); @@ -114,7 +116,14 @@ class IterativeSchurComplementSolverTest : public ::testing::Test { scoped_array D_; }; -TEST_F(IterativeSchurComplementSolverTest, SolverTest) { +TEST_F(IterativeSchurComplementSolverTest, NormalProblem) { + SetUpProblem(2); + EXPECT_TRUE(TestSolver(NULL)); + EXPECT_TRUE(TestSolver(D_.get())); +} + +TEST_F(IterativeSchurComplementSolverTest, ProblemWithNoFBlocks) { + SetUpProblem(3); EXPECT_TRUE(TestSolver(NULL)); EXPECT_TRUE(TestSolver(D_.get())); } diff --git a/internal/ceres/lapack.cc b/internal/ceres/lapack.cc new file mode 100644 index 0000000..73bfa69 --- /dev/null +++ b/internal/ceres/lapack.cc @@ -0,0 +1,157 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2013 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) + +#include "ceres/lapack.h" +#include "glog/logging.h" + +// C interface to the LAPACK Cholesky factorization and triangular solve. +extern "C" void dpotrf_(char* uplo, + int* n, + double* a, + int* lda, + int* info); + +extern "C" void dpotrs_(char* uplo, + int* n, + int* nrhs, + double* a, + int* lda, + double* b, + int* ldb, + int* info); + +extern "C" void dgels_(char* uplo, + int* m, + int* n, + int* nrhs, + double* a, + int* lda, + double* b, + int* ldb, + double* work, + int* lwork, + int* info); + + +namespace ceres { +namespace internal { + +int LAPACK::SolveInPlaceUsingCholesky(int num_rows, + const double* in_lhs, + double* rhs_and_solution) { +#ifdef CERES_NO_LAPACK + LOG(FATAL) << "Ceres was built without a BLAS library."; + return -1; +#else + char uplo = 'L'; + int n = num_rows; + int info = 0; + int nrhs = 1; + double* lhs = const_cast(in_lhs); + + dpotrf_(&uplo, &n, lhs, &n, &info); + if (info != 0) { + LOG(INFO) << "Cholesky factorization (dpotrf) failed: " << info; + return info; + } + + dpotrs_(&uplo, &n, &nrhs, lhs, &n, rhs_and_solution, &n, &info); + if (info != 0) { + LOG(INFO) << "Triangular solve (dpotrs) failed: " << info; + } + + return info; +#endif +}; + +int LAPACK::EstimateWorkSizeForQR(int num_rows, int num_cols) { +#ifdef CERES_NO_LAPACK + LOG(FATAL) << "Ceres was built without a LAPACK library."; + return -1; +#else + char trans = 'N'; + int nrhs = 1; + int lwork = -1; + double work; + int info = 0; + dgels_(&trans, + &num_rows, + &num_cols, + &nrhs, + NULL, + &num_rows, + NULL, + &num_rows, + &work, + &lwork, + &info); + + CHECK_EQ(info, 0); + return work; +#endif +} + +int LAPACK::SolveUsingQR(int num_rows, + int num_cols, + const double* in_lhs, + int work_size, + double* work, + double* rhs_and_solution) { +#ifdef CERES_NO_LAPACK + LOG(FATAL) << "Ceres was built without a LAPACK library."; + return -1; +#else + char trans = 'N'; + int m = num_rows; + int n = num_cols; + int nrhs = 1; + int lda = num_rows; + int ldb = num_rows; + int info = 0; + double* lhs = const_cast(in_lhs); + + dgels_(&trans, + &m, + &n, + &nrhs, + lhs, + &lda, + rhs_and_solution, + &ldb, + work, + &work_size, + &info); + + return info; +#endif +} + +} // namespace internal +} // namespace ceres diff --git a/internal/ceres/lapack.h b/internal/ceres/lapack.h new file mode 100644 index 0000000..4f3a88c --- /dev/null +++ b/internal/ceres/lapack.h @@ -0,0 +1,88 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2013 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) + +#ifndef CERES_INTERNAL_LAPACK_H_ +#define CERES_INTERNAL_LAPACK_H_ + +namespace ceres { +namespace internal { + +class LAPACK { + public: + // Solve + // + // lhs * solution = rhs + // + // using a Cholesky factorization. Here + // lhs is a symmetric positive definite matrix. It is assumed to be + // column major and only the lower triangular part of the matrix is + // referenced. + // + // This function uses the LAPACK dpotrf and dpotrs routines. + // + // The return value is zero if the solve is successful. + static int SolveInPlaceUsingCholesky(int num_rows, + const double* lhs, + double* rhs_and_solution); + + // The SolveUsingQR function requires a buffer for its temporary + // computation. This function given the size of the lhs matrix will + // return the size of the buffer needed. + static int EstimateWorkSizeForQR(int num_rows, int num_cols); + + // Solve + // + // lhs * solution = rhs + // + // using a dense QR factorization. lhs is an arbitrary (possibly + // rectangular) matrix with full column rank. + // + // work is an array of size work_size that this routine uses for its + // temporary storage. The optimal size of this array can be obtained + // by calling EstimateWorkSizeForQR. + // + // When calling, rhs_and_solution contains the rhs, and upon return + // the first num_col entries are the solution. + // + // This function uses the LAPACK dgels routine. + // + // The return value is zero if the solve is successful. + static int SolveUsingQR(int num_rows, + int num_cols, + const double* lhs, + int work_size, + double* work, + double* rhs_and_solution); +}; + +} // namespace internal +} // namespace ceres + +#endif // CERES_INTERNAL_LAPACK_H_ diff --git a/internal/ceres/line_search.cc b/internal/ceres/line_search.cc index 39618b5..8323896 100644 --- a/internal/ceres/line_search.cc +++ b/internal/ceres/line_search.cc @@ -112,7 +112,7 @@ void LineSearchFunction::Init(const Vector& position, direction_ = direction; } -bool LineSearchFunction::Evaluate(const double x, double* f, double* g) { +bool LineSearchFunction::Evaluate(double x, double* f, double* g) { scaled_direction_ = x * direction_; if (!evaluator_->Plus(position_.data(), scaled_direction_.data(), diff --git a/internal/ceres/line_search.h b/internal/ceres/line_search.h index e4836b2..5f24e9f 100644 --- a/internal/ceres/line_search.h +++ b/internal/ceres/line_search.h @@ -231,7 +231,7 @@ class LineSearchFunction : public LineSearch::Function { explicit LineSearchFunction(Evaluator* evaluator); virtual ~LineSearchFunction() {} void Init(const Vector& position, const Vector& direction); - virtual bool Evaluate(const double x, double* f, double* g); + virtual bool Evaluate(double x, double* f, double* g); double DirectionInfinityNorm() const; private: diff --git a/internal/ceres/linear_solver.h b/internal/ceres/linear_solver.h index 67bebe0..22691b3 100644 --- a/internal/ceres/linear_solver.h +++ b/internal/ceres/linear_solver.h @@ -74,7 +74,8 @@ class LinearSolver { Options() : type(SPARSE_NORMAL_CHOLESKY), preconditioner_type(JACOBI), - sparse_linear_algebra_library(SUITE_SPARSE), + dense_linear_algebra_library_type(EIGEN), + sparse_linear_algebra_library_type(SUITE_SPARSE), use_postordering(false), min_num_iterations(1), max_num_iterations(1), @@ -89,7 +90,8 @@ class LinearSolver { PreconditionerType preconditioner_type; - SparseLinearAlgebraLibraryType sparse_linear_algebra_library; + DenseLinearAlgebraLibraryType dense_linear_algebra_library_type; + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type; // See solver.h for information about this flag. bool use_postordering; diff --git a/internal/ceres/miniglog/glog/logging.cc b/internal/ceres/miniglog/glog/logging.cc deleted file mode 100644 index 32a78ce..0000000 --- a/internal/ceres/miniglog/glog/logging.cc +++ /dev/null @@ -1,39 +0,0 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2012 Google Inc. All rights reserved. -// http://code.google.com/p/ceres-solver/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -// Author: keir@google.com (Keir Mierle) - -#include "glog/logging.h" - -namespace google { - -// This is the set of log sinks. This must be in a separate library to ensure -// that there is only one instance of this across the entire program. -std::set log_sinks_global; - -} // namespace ceres diff --git a/internal/ceres/miniglog/glog/logging.h b/internal/ceres/miniglog/glog/logging.h index 1fc137b..bab3191 100644 --- a/internal/ceres/miniglog/glog/logging.h +++ b/internal/ceres/miniglog/glog/logging.h @@ -1,100 +1,74 @@ -// Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2011, 2012 Google Inc. All rights reserved. -// http://code.google.com/p/ceres-solver/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// * Neither the name of Google Inc. nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// +// Copyright 2011 Google Inc. All Rights Reserved. // Author: settinger@google.com (Scott Ettinger) -// keir@google.com (Keir Mierle) -// -// Simplified Glog style logging with Android support. Supported macros in -// decreasing severity level per line: + +// Simplified Google3 style logging with Android support. +// Supported macros are : LOG(INFO), LOG(WARNING), LOG(ERROR), LOG(FATAL), +// and VLOG(n). // -// VLOG(2), VLOG(N) -// VLOG(1), -// LOG(INFO), VLOG(0), LG -// LOG(WARNING), -// LOG(ERROR), -// LOG(FATAL), +// Portions of this code are taken from the GLOG package. This code +// is only a small subset of the GLOG functionality. And like GLOG, +// higher levels are more verbose. // -// With VLOG(n), the output is directed to one of the 5 Android log levels: +// Notable differences from GLOG : // -// 2 - Verbose -// 1 - Debug -// 0 - Info -// -1 - Warning -// -2 - Error -// -3 - Fatal +// 1. lack of support for displaying unprintable characters and lack +// of stack trace information upon failure of the CHECK macros. +// 2. All output is tagged with the string "native". +// 3. While there is no runtime flag filtering logs (-v, -vmodule), the +// compile time define MAX_LOG_LEVEL can be used to silence any +// logging above the given level. // -// Any logging of level 2 and above is directed to the Verbose level. All -// Android log output is tagged with the string "native". +// -------------------------------- Usage ------------------------------------ +// Basic usage : +// LOG() acts as a c++ stream to the Android logcat output. +// e.g. LOG(INFO) << "Value of counter = " << counter; // -// If the symbol ANDROID is not defined, all output goes to std::cerr. -// This allows code to be built on a different system for debug. +// Valid severity levels include INFO, WARNING, ERROR, FATAL. +// The various severity levels are routed to the corresponding Android logcat +// output. +// LOG(FATAL) outputs to the log and then terminates. // -// Portions of this code are taken from the GLOG package. This code is only a -// small subset of the GLOG functionality. Notable differences from GLOG -// behavior include lack of support for displaying unprintable characters and -// lack of stack trace information upon failure of the CHECK macros. On -// non-Android systems, log output goes to std::cerr and is not written to a -// file. +// VLOG() can also be used. +// VLOG(n) output is directed to the Android logcat levels as follows : +// >=2 - Verbose +// 1 - Debug +// 0 - Info +// -1 - Warning +// -2 - Error +// <=-3 - Fatal +// Note that VLOG(FATAL) will terminate the program. // -// CHECK macros are defined to test for conditions within code. Any CHECK that -// fails will log the failure and terminate the application. +// CHECK macros are defined to test for conditions within code. Any CHECK +// that fails will log the failure and terminate the application. // e.g. CHECK_GE(3, 2) will pass while CHECK_GE(3, 4) will fail after logging // "Check failed 3 >= 4". +// The following CHECK macros are defined : // -// The following CHECK macros are defined: -// -// CHECK(condition) - fails if condition is false and logs condition. -// CHECK_NOTNULL(variable) - fails if the variable is NULL. +// CHECK(condition) - fails if condition is false and logs condition. +// CHECK_NOTNULL(variable) - fails if the variable is NULL. // // The following binary check macros are also defined : -// -// Macro Operator equivalent -// -------------------- ------------------- -// CHECK_EQ(val1, val2) val1 == val2 -// CHECK_NE(val1, val2) val1 != val2 -// CHECK_GT(val1, val2) val1 > val2 -// CHECK_GE(val1, val2) val1 >= val2 -// CHECK_LT(val1, val2) val1 < val2 -// CHECK_LE(val1, val2) val1 <= val2 +// Macro operator applied +// ------------------------------------------ +// CHECK_EQ(val1, val2) val1 == val2 +// CHECK_NE(val1, val2) val1 != val2 +// CHECK_GT(val1, val2) val1 > val2 +// CHECK_GE(val1, val2) val1 >= val2 +// CHECK_LT(val1, val2) val1 < val2 +// CHECK_LE(val1, val2) val1 <= val2 // // Debug only versions of all of the check macros are also defined. These // macros generate no code in a release build, but avoid unused variable // warnings / errors. -// -// To use the debug only versions, prepend a D to the normal check macros, e.g. -// DCHECK_EQ(a, b). +// To use the debug only versions, Prepend a D to the normal check macros. +// e.g. DCHECK_EQ(a, b); -#ifndef CERCES_INTERNAL_MINIGLOG_GLOG_LOGGING_H_ -#define CERCES_INTERNAL_MINIGLOG_GLOG_LOGGING_H_ +#ifndef MOBILE_BASE_LOGGING_H_ +#define MOBILE_BASE_LOGGING_H_ -#ifdef ANDROID +// Definitions for building on an Android system. #include -#endif // ANDROID +#include #include #include @@ -120,29 +94,26 @@ const int WARNING = ::WARNING; const int ERROR = ::ERROR; const int FATAL = ::FATAL; -// Sink class used for integration with mock and test functions. If sinks are -// added, all log output is also sent to each sink through the send function. -// In this implementation, WaitTillSent() is called immediately after the send. +#ifdef ENABLE_LOG_SINKS + +// Sink class used for integration with mock and test functions. +// If sinks are added, all log output is also sent to each sink through +// the send function. In this implementation, WaitTillSent() is called +// immediately after the send. // This implementation is not thread safe. class LogSink { public: virtual ~LogSink() {} - virtual void send(LogSeverity severity, - const char* full_filename, - const char* base_filename, - int line, + virtual void send(LogSeverity severity, const char* full_filename, + const char* base_filename, int line, const struct tm* tm_time, - const char* message, - size_t message_len) = 0; + const char* message, size_t message_len) = 0; virtual void WaitTillSent() = 0; }; -// Global set of log sinks. The actual object is defined in logging.cc. -extern std::set log_sinks_global; - -inline void InitGoogleLogging(char *argv) { - // Do nothing; this is ignored. -} +// Global set of log sinks. +// TODO(settinger): Move this into a .cc file. +static std::set log_sinks_global; // Note: the Log sink functions are not thread safe. inline void AddLogSink(LogSink *sink) { @@ -153,16 +124,19 @@ inline void RemoveLogSink(LogSink *sink) { log_sinks_global.erase(sink); } +#endif // #ifdef ENABLE_LOG_SINKS + +inline void InitGoogleLogging(char *argv) {} + } // namespace google // ---------------------------- Logger Class -------------------------------- // Class created for each use of the logging macros. // The logger acts as a stream and routes the final stream contents to the -// Android logcat output at the proper filter level. If ANDROID is not -// defined, output is directed to std::cerr. This class should not +// Android logcat output at the proper filter level. This class should not // be directly instantiated in code, rather it should be invoked through the -// use of the log macros LG, LOG, or VLOG. +// use of the log macros LOG, or VLOG. class MessageLogger { public: MessageLogger(const char *file, int line, const char *tag, int severity) @@ -174,14 +148,17 @@ class MessageLogger { // Output the contents of the stream to the proper channel on destruction. ~MessageLogger() { +#ifdef MAX_LOG_LEVEL + if (severity_ > MAX_LOG_LEVEL && severity_ > FATAL) { + return; + } +#endif stream_ << "\n"; - -#ifdef ANDROID static const int android_log_levels[] = { ANDROID_LOG_FATAL, // LOG(FATAL) ANDROID_LOG_ERROR, // LOG(ERROR) ANDROID_LOG_WARN, // LOG(WARNING) - ANDROID_LOG_INFO, // LOG(INFO), LG, VLOG(0) + ANDROID_LOG_INFO, // LOG(INFO), VLOG(0) ANDROID_LOG_DEBUG, // VLOG(1) ANDROID_LOG_VERBOSE, // VLOG(2) .. VLOG(N) }; @@ -193,22 +170,22 @@ class MessageLogger { int android_log_level = android_log_levels[android_level_index]; // Output the log string the Android log at the appropriate level. - __android_log_print(android_log_level, tag_.c_str(), stream_.str().c_str()); + __android_log_write(android_log_level, tag_.c_str(), stream_.str().c_str()); // Indicate termination if needed. if (severity_ == FATAL) { - __android_log_print(ANDROID_LOG_FATAL, + __android_log_write(ANDROID_LOG_FATAL, tag_.c_str(), "terminating.\n"); } -#else - // If not building on Android, log all output to std::cerr. - std::cerr << stream_.str(); -#endif // ANDROID + +#ifdef ENABLE_LOG_SINKS LogToSinks(severity_); WaitForSinks(); +#endif // #ifdef ENABLE_LOG_SINKS + // Android logging at level FATAL does not terminate execution, so abort() // is still required to stop the program. if (severity_ == FATAL) { @@ -220,41 +197,41 @@ class MessageLogger { std::stringstream &stream() { return stream_; } private: +#ifdef ENABLE_LOG_SINKS + void LogToSinks(int severity) { time_t rawtime; - struct tm* timeinfo; + struct tm * timeinfo; - time (&rawtime); - timeinfo = localtime(&rawtime); - std::set::iterator iter; + time ( &rawtime ); + timeinfo = localtime ( &rawtime ); + std::set::iterator iter; // Send the log message to all sinks. for (iter = google::log_sinks_global.begin(); - iter != google::log_sinks_global.end(); ++iter) { + iter != google::log_sinks_global.end(); ++iter) (*iter)->send(severity, file_.c_str(), filename_only_.c_str(), line_, timeinfo, stream_.str().c_str(), stream_.str().size()); - } } void WaitForSinks() { - // TODO(settinger): Add locks for thread safety. + // TODO(settinger): add locks for thread safety. std::set::iterator iter; - // Call WaitTillSent() for all sinks. for (iter = google::log_sinks_global.begin(); - iter != google::log_sinks_global.end(); ++iter) { + iter != google::log_sinks_global.end(); ++iter) (*iter)->WaitTillSent(); - } } +#endif // #ifdef ENABLE_LOG_SINKS + void StripBasename(const std::string &full_path, std::string *filename) { // TODO(settinger): add support for OS with different path separators. const char kSeparator = '/'; size_t pos = full_path.rfind(kSeparator); - if (pos != std::string::npos) { + if (pos != std::string::npos) *filename = full_path.substr(pos + 1, std::string::npos); - } else { + else *filename = full_path; - } } std::string file_; @@ -267,20 +244,6 @@ class MessageLogger { // ---------------------- Logging Macro definitions -------------------------- -#define LG MessageLogger((char *)__FILE__, __LINE__, "native", \ - INFO).stream() - -#define LOG(n) MessageLogger((char *)__FILE__, __LINE__, "native", \ - n).stream() - -#define VLOG(n) MessageLogger((char *)__FILE__, __LINE__, "native", \ - n).stream() - -// Currently, VLOG is always on. -#define VLOG_IS_ON(x) true - -// ---------------------------- CHECK helpers -------------------------------- - // This class is used to explicitly ignore values in the conditional // logging macros. This avoids compiler warnings like "value computed // is not used" and "statement has no effect". @@ -294,7 +257,40 @@ class LoggerVoidify { // Log only if condition is met. Otherwise evaluates to void. #define LOG_IF(severity, condition) \ - condition ? (void) 0 : LoggerVoidify() & LOG(severity) + !(condition) ? (void) 0 : LoggerVoidify() & \ + MessageLogger((char *)__FILE__, __LINE__, "native", severity).stream() + +// Log only if condition is NOT met. Otherwise evaluates to void. +#define LOG_IF_FALSE(severity, condition) LOG_IF(severity, !(condition)) + +// LG is a convenient shortcut for LOG(INFO). Its use is in new +// google3 code is discouraged and the following shortcut exists for +// backward compatibility with existing code. +#ifdef MAX_LOG_LEVEL +#define LOG(n) LOG_IF(n, n <= MAX_LOG_LEVEL) +#define VLOG(n) LOG_IF(n, n <= MAX_LOG_LEVEL) +#define LG LOG_IF(INFO, INFO <= MAX_LOG_LEVEL) +#else +#define LOG(n) MessageLogger((char *)__FILE__, __LINE__, "native", n).stream() +#define VLOG(n) MessageLogger((char *)__FILE__, __LINE__, "native", n).stream() +#define LG MessageLogger((char *)__FILE__, __LINE__, "native", INFO).stream() +#endif + +// Currently, VLOG is always on for levels below MAX_LOG_LEVEL. +#ifndef MAX_LOG_LEVEL +#define VLOG_IS_ON(x) (1) +#else +#define VLOG_IS_ON(x) (x <= MAX_LOG_LEVEL) +#endif + +#ifndef NDEBUG +#define DLOG LOG +#else +#define DLOG(severity) true ? (void) 0 : LoggerVoidify() & \ + MessageLogger((char *)__FILE__, __LINE__, "native", severity).stream() +#endif + +// ---------------------------- CHECK helpers -------------------------------- // Log a message and terminate. template @@ -306,16 +302,16 @@ void LogMessageFatal(const char *file, int line, const T &message) { // ---------------------------- CHECK macros --------------------------------- // Check for a given boolean condition. -#define CHECK(condition) LOG_IF(FATAL, condition) \ +#define CHECK(condition) LOG_IF_FALSE(FATAL, condition) \ << "Check failed: " #condition " " #ifndef NDEBUG // Debug only version of CHECK -#define DCHECK(condition) LOG_IF(FATAL, condition) \ +#define DCHECK(condition) LOG_IF_FALSE(FATAL, condition) \ << "Check failed: " #condition " " #else // Optimized version - generates no code. -#define DCHECK(condition) if (false) LOG_IF(FATAL, condition) \ +#define DCHECK(condition) if (false) LOG_IF_FALSE(FATAL, condition) \ << "Check failed: " #condition " " #endif // NDEBUG @@ -323,7 +319,7 @@ void LogMessageFatal(const char *file, int line, const T &message) { // Generic binary operator check macro. This should not be directly invoked, // instead use the binary comparison macros defined below. -#define CHECK_OP(val1, val2, op) LOG_IF(FATAL, (val1 op val2)) \ +#define CHECK_OP(val1, val2, op) LOG_IF_FALSE(FATAL, (val1 op val2)) \ << "Check failed: " #val1 " " #op " " #val2 " " // Check_op macro definitions @@ -388,4 +384,8 @@ T& CheckNotNull(const char *file, int line, const char *names, T& t) { CheckNotNull(__FILE__, __LINE__, "'" #val "' Must be non NULL", (val)) #endif // NDEBUG -#endif // CERCES_INTERNAL_MINIGLOG_GLOG_LOGGING_H_ +inline void PrintAndroid(const char *msg) { + __android_log_write(ANDROID_LOG_VERBOSE, "native", msg); +} + +#endif // MOBILE_BASE_LOGGING_H_ diff --git a/internal/ceres/partitioned_matrix_view.cc b/internal/ceres/partitioned_matrix_view.cc index 5dad438..59eaff8 100644 --- a/internal/ceres/partitioned_matrix_view.cc +++ b/internal/ceres/partitioned_matrix_view.cc @@ -35,10 +35,10 @@ #include #include #include -#include "ceres/blas.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" #include "ceres/internal/eigen.h" +#include "ceres/small_blas.h" #include "glog/logging.h" namespace ceres { diff --git a/internal/ceres/preconditioner.h b/internal/ceres/preconditioner.h index cb0a381..af64e3c 100644 --- a/internal/ceres/preconditioner.h +++ b/internal/ceres/preconditioner.h @@ -48,7 +48,7 @@ class Preconditioner : public LinearOperator { struct Options { Options() : type(JACOBI), - sparse_linear_algebra_library(SUITE_SPARSE), + sparse_linear_algebra_library_type(SUITE_SPARSE), num_threads(1), row_block_size(Eigen::Dynamic), e_block_size(Eigen::Dynamic), @@ -57,7 +57,7 @@ class Preconditioner : public LinearOperator { PreconditionerType type; - SparseLinearAlgebraLibraryType sparse_linear_algebra_library; + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type; // If possible, how many threads the preconditioner can use. int num_threads; diff --git a/internal/ceres/program_evaluator.h b/internal/ceres/program_evaluator.h index 19c7541..8aa2a39 100644 --- a/internal/ceres/program_evaluator.h +++ b/internal/ceres/program_evaluator.h @@ -86,13 +86,13 @@ #include #include #include -#include "ceres/blas.h" #include "ceres/execution_summary.h" #include "ceres/internal/eigen.h" #include "ceres/internal/scoped_ptr.h" #include "ceres/parameter_block.h" #include "ceres/program.h" #include "ceres/residual_block.h" +#include "ceres/small_blas.h" namespace ceres { namespace internal { diff --git a/internal/ceres/residual_block.cc b/internal/ceres/residual_block.cc index 649f3f7..621082a 100644 --- a/internal/ceres/residual_block.cc +++ b/internal/ceres/residual_block.cc @@ -34,8 +34,6 @@ #include #include #include - -#include "ceres/blas.h" #include "ceres/corrector.h" #include "ceres/parameter_block.h" #include "ceres/residual_block_utils.h" @@ -44,6 +42,7 @@ #include "ceres/internal/fixed_array.h" #include "ceres/local_parameterization.h" #include "ceres/loss_function.h" +#include "ceres/small_blas.h" using Eigen::Dynamic; diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc index 09f61d7..b192aa1 100644 --- a/internal/ceres/schur_complement_solver.cc +++ b/internal/ceres/schur_complement_solver.cc @@ -33,20 +33,18 @@ #include #include -#ifndef CERES_NO_CXSPARSE -#include "cs.h" -#endif // CERES_NO_CXSPARSE - #include "Eigen/Dense" #include "ceres/block_random_access_dense_matrix.h" #include "ceres/block_random_access_matrix.h" #include "ceres/block_random_access_sparse_matrix.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" +#include "ceres/cxsparse.h" #include "ceres/detect_structure.h" #include "ceres/internal/eigen.h" #include "ceres/internal/port.h" #include "ceres/internal/scoped_ptr.h" +#include "ceres/lapack.h" #include "ceres/linear_solver.h" #include "ceres/schur_complement_solver.h" #include "ceres/suitesparse.h" @@ -130,29 +128,31 @@ bool DenseSchurComplementSolver::SolveReducedLinearSystem(double* solution) { return true; } - // TODO(sameeragarwal): Add proper error handling; this completely ignores - // the quality of the solution to the solve. - VectorRef(solution, num_rows) = - ConstMatrixRef(m->values(), num_rows, num_rows) - .selfadjointView() - .ldlt() - .solve(ConstVectorRef(rhs(), num_rows)); + if (options().dense_linear_algebra_library_type == EIGEN) { + // TODO(sameeragarwal): Add proper error handling; this completely ignores + // the quality of the solution to the solve. + VectorRef(solution, num_rows) = + ConstMatrixRef(m->values(), num_rows, num_rows) + .selfadjointView() + .llt() + .solve(ConstVectorRef(rhs(), num_rows)); + return true; + } - return true; + VectorRef(solution, num_rows) = ConstVectorRef(rhs(), num_rows); + const int info = LAPACK::SolveInPlaceUsingCholesky(num_rows, + m->values(), + solution); + return (info == 0); } #if !defined(CERES_NO_SUITESPARSE) || !defined(CERES_NO_CXSPARE) SparseSchurComplementSolver::SparseSchurComplementSolver( const LinearSolver::Options& options) - : SchurComplementSolver(options) { -#ifndef CERES_NO_SUITESPARSE - factor_ = NULL; -#endif // CERES_NO_SUITESPARSE - -#ifndef CERES_NO_CXSPARSE - cxsparse_factor_ = NULL; -#endif // CERES_NO_CXSPARSE + : SchurComplementSolver(options), + factor_(NULL), + cxsparse_factor_(NULL) { } SparseSchurComplementSolver::~SparseSchurComplementSolver() { @@ -243,18 +243,18 @@ void SparseSchurComplementSolver::InitStorage( } bool SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) { - switch (options().sparse_linear_algebra_library) { + switch (options().sparse_linear_algebra_library_type) { case SUITE_SPARSE: return SolveReducedLinearSystemUsingSuiteSparse(solution); case CX_SPARSE: return SolveReducedLinearSystemUsingCXSparse(solution); default: LOG(FATAL) << "Unknown sparse linear algebra library : " - << options().sparse_linear_algebra_library; + << options().sparse_linear_algebra_library_type; } LOG(FATAL) << "Unknown sparse linear algebra library : " - << options().sparse_linear_algebra_library; + << options().sparse_linear_algebra_library_type; return false; } diff --git a/internal/ceres/schur_complement_solver.h b/internal/ceres/schur_complement_solver.h index 9525e37..b5a1c74 100644 --- a/internal/ceres/schur_complement_solver.h +++ b/internal/ceres/schur_complement_solver.h @@ -167,18 +167,14 @@ class SparseSchurComplementSolver : public SchurComplementSolver { // Size of the blocks in the Schur complement. vector blocks_; -#ifndef CERES_NO_SUITESPARSE SuiteSparse ss_; // Symbolic factorization of the reduced linear system. Precomputed // once and reused in subsequent calls. cholmod_factor* factor_; -#endif // CERES_NO_SUITESPARSE -#ifndef CERES_NO_CXSPARSE CXSparse cxsparse_; // Cached factorization cs_dis* cxsparse_factor_; -#endif // CERES_NO_CXSPARSE CERES_DISALLOW_COPY_AND_ASSIGN(SparseSchurComplementSolver); }; diff --git a/internal/ceres/schur_complement_solver_test.cc b/internal/ceres/schur_complement_solver_test.cc index 206d4b5..d91c162 100644 --- a/internal/ceres/schur_complement_solver_test.cc +++ b/internal/ceres/schur_complement_solver_test.cc @@ -87,7 +87,8 @@ class SchurComplementSolverTest : public ::testing::Test { int problem_id, bool regularization, ceres::LinearSolverType linear_solver_type, - ceres::SparseLinearAlgebraLibraryType sparse_linear_algebra_library, + ceres::DenseLinearAlgebraLibraryType dense_linear_algebra_library_type, + ceres::SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type, bool use_postordering) { SetUpFromProblemId(problem_id); LinearSolver::Options options; @@ -95,7 +96,10 @@ class SchurComplementSolverTest : public ::testing::Test { options.elimination_groups.push_back( A->block_structure()->cols.size() - num_eliminate_blocks); options.type = linear_solver_type; - options.sparse_linear_algebra_library = sparse_linear_algebra_library; + options.dense_linear_algebra_library_type = + dense_linear_algebra_library_type; + options.sparse_linear_algebra_library_type = + sparse_linear_algebra_library_type; options.use_postordering = use_postordering; scoped_ptr solver(LinearSolver::Create(options)); @@ -131,53 +135,67 @@ class SchurComplementSolverTest : public ::testing::Test { scoped_array sol_d; }; -TEST_F(SchurComplementSolverTest, DenseSchurWithSmallProblem) { - ComputeAndCompareSolutions(2, false, DENSE_SCHUR, SUITE_SPARSE, true); - ComputeAndCompareSolutions(2, true, DENSE_SCHUR, SUITE_SPARSE, true); +TEST_F(SchurComplementSolverTest, EigenBasedDenseSchurWithSmallProblem) { + ComputeAndCompareSolutions(2, false, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true); + ComputeAndCompareSolutions(2, true, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true); } -TEST_F(SchurComplementSolverTest, DenseSchurWithLargeProblem) { - ComputeAndCompareSolutions(3, false, DENSE_SCHUR, SUITE_SPARSE, true); - ComputeAndCompareSolutions(3, true, DENSE_SCHUR, SUITE_SPARSE, true); +TEST_F(SchurComplementSolverTest, EigenBasedDenseSchurWithLargeProblem) { + ComputeAndCompareSolutions(3, false, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true); + ComputeAndCompareSolutions(3, true, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true); } +#ifndef CERES_NO_LAPACK +TEST_F(SchurComplementSolverTest, LAPACKBasedDenseSchurWithSmallProblem) { + ComputeAndCompareSolutions(2, false, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true); + ComputeAndCompareSolutions(2, true, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true); +} + +TEST_F(SchurComplementSolverTest, LAPACKBasedDenseSchurWithLargeProblem) { + ComputeAndCompareSolutions(3, false, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true); + ComputeAndCompareSolutions(3, true, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true); +} +#endif + #ifndef CERES_NO_SUITESPARSE TEST_F(SchurComplementSolverTest, SparseSchurWithSuiteSparseSmallProblemNoPostOrdering) { - ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, SUITE_SPARSE, false); - ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, SUITE_SPARSE, false); + ComputeAndCompareSolutions( + 2, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false); + ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false); } TEST_F(SchurComplementSolverTest, SparseSchurWithSuiteSparseSmallProblemPostOrdering) { - ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, SUITE_SPARSE, true); - ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, SUITE_SPARSE, true); + ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true); + ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true); } TEST_F(SchurComplementSolverTest, SparseSchurWithSuiteSparseLargeProblemNoPostOrdering) { - ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, SUITE_SPARSE, false); - ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, SUITE_SPARSE, false); + ComputeAndCompareSolutions( + 3, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false); + ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false); } TEST_F(SchurComplementSolverTest, SparseSchurWithSuiteSparseLargeProblemPostOrdering) { - ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, SUITE_SPARSE, true); - ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, SUITE_SPARSE, true); + ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true); + ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true); } #endif // CERES_NO_SUITESPARSE #ifndef CERES_NO_CXSPARSE TEST_F(SchurComplementSolverTest, SparseSchurWithSuiteSparseSmallProblem) { - ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, SUITE_SPARSE, true); - ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, SUITE_SPARSE, true); + ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, EIGEN, CX_SPARSE, true); + ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, EIGEN, CX_SPARSE, true); } TEST_F(SchurComplementSolverTest, SparseSchurWithSuiteSparseLargeProblem) { - ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, SUITE_SPARSE, true); - ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, SUITE_SPARSE, true); + ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, EIGEN, CX_SPARSE, true); + ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, EIGEN, CX_SPARSE, true); } #endif // CERES_NO_CXSPARSE diff --git a/internal/ceres/schur_eliminator_impl.h b/internal/ceres/schur_eliminator_impl.h index f072c88..c09b7fb 100644 --- a/internal/ceres/schur_eliminator_impl.h +++ b/internal/ceres/schur_eliminator_impl.h @@ -51,8 +51,6 @@ #include #include - -#include "ceres/blas.h" #include "ceres/block_random_access_matrix.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" @@ -61,6 +59,7 @@ #include "ceres/internal/scoped_ptr.h" #include "ceres/map_util.h" #include "ceres/schur_eliminator.h" +#include "ceres/small_blas.h" #include "ceres/stl_util.h" #include "Eigen/Dense" #include "glog/logging.h" diff --git a/internal/ceres/schur_eliminator_test.cc b/internal/ceres/schur_eliminator_test.cc index a7e96ae..bed8f3a 100644 --- a/internal/ceres/schur_eliminator_test.cc +++ b/internal/ceres/schur_eliminator_test.cc @@ -112,7 +112,7 @@ class SchurEliminatorTest : public ::testing::Test { P.block(row, row, block_size, block_size) = P .block(row, row, block_size, block_size) - .ldlt() + .llt() .solve(Matrix::Identity(block_size, block_size)); row += block_size; } @@ -121,7 +121,7 @@ class SchurEliminatorTest : public ::testing::Test { .triangularView() = R - Q.transpose() * P * Q; rhs_expected = g.tail(schur_size) - Q.transpose() * P * g.head(num_eliminate_cols); - sol_expected = H.ldlt().solve(g); + sol_expected = H.llt().solve(g); } void EliminateSolveAndCompare(const VectorRef& diagonal, @@ -160,7 +160,7 @@ class SchurEliminatorTest : public ::testing::Test { Vector reduced_sol = lhs_ref .selfadjointView() - .ldlt() + .llt() .solve(rhs); // Solution to the linear least squares problem. diff --git a/internal/ceres/schur_jacobi_preconditioner.cc b/internal/ceres/schur_jacobi_preconditioner.cc index aa840c5..338df71 100644 --- a/internal/ceres/schur_jacobi_preconditioner.cc +++ b/internal/ceres/schur_jacobi_preconditioner.cc @@ -128,7 +128,7 @@ void SchurJacobiPreconditioner::RightMultiply(const double* x, VectorRef(y, block_size) = block .selfadjointView() - .ldlt() + .llt() .solve(ConstVectorRef(x, block_size)); x += block_size; diff --git a/internal/ceres/small_blas.h b/internal/ceres/small_blas.h new file mode 100644 index 0000000..e14e664 --- /dev/null +++ b/internal/ceres/small_blas.h @@ -0,0 +1,406 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2013 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) +// +// Simple blas functions for use in the Schur Eliminator. These are +// fairly basic implementations which already yield a significant +// speedup in the eliminator performance. + +#ifndef CERES_INTERNAL_SMALL_BLAS_H_ +#define CERES_INTERNAL_SMALL_BLAS_H_ + +#include "ceres/internal/eigen.h" +#include "glog/logging.h" + +namespace ceres { +namespace internal { + +// Remove the ".noalias()" annotation from the matrix matrix +// mutliplies to produce a correct build with the Android NDK, +// including versions 6, 7, 8, and 8b, when built with STLPort and the +// non-standalone toolchain (i.e. ndk-build). This appears to be a +// compiler bug; if the workaround is not in place, the line +// +// block.noalias() -= A * B; +// +// gets compiled to +// +// block.noalias() += A * B; +// +// which breaks schur elimination. Introducing a temporary by removing the +// .noalias() annotation causes the issue to disappear. Tracking this +// issue down was tricky, since the test suite doesn't run when built with +// the non-standalone toolchain. +// +// TODO(keir): Make a reproduction case for this and send it upstream. +#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG +#define CERES_MAYBE_NOALIAS +#else +#define CERES_MAYBE_NOALIAS .noalias() +#endif + +// The following three macros are used to share code and reduce +// template junk across the various GEMM variants. +#define CERES_GEMM_BEGIN(name) \ + template \ + inline void name(const double* A, \ + const int num_row_a, \ + const int num_col_a, \ + const double* B, \ + const int num_row_b, \ + const int num_col_b, \ + double* C, \ + const int start_row_c, \ + const int start_col_c, \ + const int row_stride_c, \ + const int col_stride_c) + +#define CERES_GEMM_NAIVE_HEADER \ + DCHECK_GT(num_row_a, 0); \ + DCHECK_GT(num_col_a, 0); \ + DCHECK_GT(num_row_b, 0); \ + DCHECK_GT(num_col_b, 0); \ + DCHECK_GE(start_row_c, 0); \ + DCHECK_GE(start_col_c, 0); \ + DCHECK_GT(row_stride_c, 0); \ + DCHECK_GT(col_stride_c, 0); \ + DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); \ + DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); \ + DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b)); \ + DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b)); \ + const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); \ + const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); \ + const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b); \ + const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b); + +#define CERES_GEMM_EIGEN_HEADER \ + const typename EigenTypes::ConstMatrixRef \ + Aref(A, num_row_a, num_col_a); \ + const typename EigenTypes::ConstMatrixRef \ + Bref(B, num_row_b, num_col_b); \ + MatrixRef Cref(C, row_stride_c, col_stride_c); \ + +#define CERES_CALL_GEMM(name) \ + name( \ + A, num_row_a, num_col_a, \ + B, num_row_b, num_col_b, \ + C, start_row_c, start_col_c, row_stride_c, col_stride_c); + + +// For the matrix-matrix functions below, there are three variants for +// each functionality. Foo, FooNaive and FooEigen. Foo is the one to +// be called by the user. FooNaive is a basic loop based +// implementation and FooEigen uses Eigen's implementation. Foo +// chooses between FooNaive and FooEigen depending on how many of the +// template arguments are fixed at compile time. Currently, FooEigen +// is called if all matrix dimensions are compile time +// constants. FooNaive is called otherwise. This leads to the best +// performance currently. +// +// The MatrixMatrixMultiply variants compute: +// +// C op A * B; +// +// The MatrixTransposeMatrixMultiply variants compute: +// +// C op A' * B +// +// where op can be +=, -=, or =. +// +// The template parameters (kRowA, kColA, kRowB, kColB) allow +// specialization of the loop at compile time. If this information is +// not available, then Eigen::Dynamic should be used as the template +// argument. +// +// kOperation = 1 -> C += A * B +// kOperation = -1 -> C -= A * B +// kOperation = 0 -> C = A * B +// +// The functions can write into matrices C which are larger than the +// matrix A * B. This is done by specifying the true size of C via +// row_stride_c and col_stride_c, and then indicating where A * B +// should be written into by start_row_c and start_col_c. +// +// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c = +// 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get +// +// ------------ +// ------------ +// ------------ +// ------------ +// -----xxxx--- +// -----xxxx--- +// -----xxxx--- +// ------------ +// ------------ +// ------------ +// +CERES_GEMM_BEGIN(MatrixMatrixMultiplyEigen) { + CERES_GEMM_EIGEN_HEADER + Eigen::Block + block(Cref, start_row_c, start_col_c, num_row_a, num_col_b); + + if (kOperation > 0) { + block CERES_MAYBE_NOALIAS += Aref * Bref; + } else if (kOperation < 0) { + block CERES_MAYBE_NOALIAS -= Aref * Bref; + } else { + block CERES_MAYBE_NOALIAS = Aref * Bref; + } +} + +CERES_GEMM_BEGIN(MatrixMatrixMultiplyNaive) { + CERES_GEMM_NAIVE_HEADER + DCHECK_EQ(NUM_COL_A, NUM_ROW_B); + + const int NUM_ROW_C = NUM_ROW_A; + const int NUM_COL_C = NUM_COL_B; + DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c); + DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c); + + for (int row = 0; row < NUM_ROW_C; ++row) { + for (int col = 0; col < NUM_COL_C; ++col) { + double tmp = 0.0; + for (int k = 0; k < NUM_COL_A; ++k) { + tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col]; + } + + const int index = (row + start_row_c) * col_stride_c + start_col_c + col; + if (kOperation > 0) { + C[index] += tmp; + } else if (kOperation < 0) { + C[index] -= tmp; + } else { + C[index] = tmp; + } + } + } +} + +CERES_GEMM_BEGIN(MatrixMatrixMultiply) { +#ifdef CERES_NO_CUSTOM_BLAS + + CERES_CALL_GEMM(MatrixMatrixMultiplyEigen) + return; + +#else + + if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic && + kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) { + CERES_CALL_GEMM(MatrixMatrixMultiplyEigen) + } else { + CERES_CALL_GEMM(MatrixMatrixMultiplyNaive) + } + +#endif +} + +CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyEigen) { + CERES_GEMM_EIGEN_HEADER + Eigen::Block block(Cref, + start_row_c, start_col_c, + num_col_a, num_col_b); + if (kOperation > 0) { + block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref; + } else if (kOperation < 0) { + block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref; + } else { + block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref; + } +} + +CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyNaive) { + CERES_GEMM_NAIVE_HEADER + DCHECK_EQ(NUM_ROW_A, NUM_ROW_B); + + const int NUM_ROW_C = NUM_COL_A; + const int NUM_COL_C = NUM_COL_B; + DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c); + DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c); + + for (int row = 0; row < NUM_ROW_C; ++row) { + for (int col = 0; col < NUM_COL_C; ++col) { + double tmp = 0.0; + for (int k = 0; k < NUM_ROW_A; ++k) { + tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col]; + } + + const int index = (row + start_row_c) * col_stride_c + start_col_c + col; + if (kOperation > 0) { + C[index]+= tmp; + } else if (kOperation < 0) { + C[index]-= tmp; + } else { + C[index]= tmp; + } + } + } +} + +CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiply) { +#ifdef CERES_NO_CUSTOM_BLAS + + CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen) + return; + +#else + + if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic && + kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) { + CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen) + } else { + CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyNaive) + } + +#endif +} + +// Matrix-Vector multiplication +// +// c op A * b; +// +// where op can be +=, -=, or =. +// +// The template parameters (kRowA, kColA) allow specialization of the +// loop at compile time. If this information is not available, then +// Eigen::Dynamic should be used as the template argument. +// +// kOperation = 1 -> c += A' * b +// kOperation = -1 -> c -= A' * b +// kOperation = 0 -> c = A' * b +template +inline void MatrixVectorMultiply(const double* A, + const int num_row_a, + const int num_col_a, + const double* b, + double* c) { +#ifdef CERES_NO_CUSTOM_BLAS + const typename EigenTypes::ConstMatrixRef + Aref(A, num_row_a, num_col_a); + const typename EigenTypes::ConstVectorRef bref(b, num_col_a); + typename EigenTypes::VectorRef cref(c, num_row_a); + + // lazyProduct works better than .noalias() for matrix-vector + // products. + if (kOperation > 0) { + cref += Aref.lazyProduct(bref); + } else if (kOperation < 0) { + cref -= Aref.lazyProduct(bref); + } else { + cref = Aref.lazyProduct(bref); + } +#else + + DCHECK_GT(num_row_a, 0); + DCHECK_GT(num_col_a, 0); + DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); + DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); + + const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); + const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); + + for (int row = 0; row < NUM_ROW_A; ++row) { + double tmp = 0.0; + for (int col = 0; col < NUM_COL_A; ++col) { + tmp += A[row * NUM_COL_A + col] * b[col]; + } + + if (kOperation > 0) { + c[row] += tmp; + } else if (kOperation < 0) { + c[row] -= tmp; + } else { + c[row] = tmp; + } + } +#endif // CERES_NO_CUSTOM_BLAS +} + +// Similar to MatrixVectorMultiply, except that A is transposed, i.e., +// +// c op A' * b; +template +inline void MatrixTransposeVectorMultiply(const double* A, + const int num_row_a, + const int num_col_a, + const double* b, + double* c) { +#ifdef CERES_NO_CUSTOM_BLAS + const typename EigenTypes::ConstMatrixRef + Aref(A, num_row_a, num_col_a); + const typename EigenTypes::ConstVectorRef bref(b, num_row_a); + typename EigenTypes::VectorRef cref(c, num_col_a); + + // lazyProduct works better than .noalias() for matrix-vector + // products. + if (kOperation > 0) { + cref += Aref.transpose().lazyProduct(bref); + } else if (kOperation < 0) { + cref -= Aref.transpose().lazyProduct(bref); + } else { + cref = Aref.transpose().lazyProduct(bref); + } +#else + + DCHECK_GT(num_row_a, 0); + DCHECK_GT(num_col_a, 0); + DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); + DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); + + const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); + const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); + + for (int row = 0; row < NUM_COL_A; ++row) { + double tmp = 0.0; + for (int col = 0; col < NUM_ROW_A; ++col) { + tmp += A[col * NUM_COL_A + row] * b[col]; + } + + if (kOperation > 0) { + c[row] += tmp; + } else if (kOperation < 0) { + c[row] -= tmp; + } else { + c[row] = tmp; + } + } +#endif // CERES_NO_CUSTOM_BLAS +} + + +#undef CERES_MAYBE_NOALIAS +#undef CERES_GEMM_BEGIN +#undef CERES_GEMM_EIGEN_HEADER +#undef CERES_GEMM_NAIVE_HEADER +#undef CERES_CALL_GEMM + +} // namespace internal +} // namespace ceres + +#endif // CERES_INTERNAL_SMALL_BLAS_H_ diff --git a/internal/ceres/small_blas_test.cc b/internal/ceres/small_blas_test.cc new file mode 100644 index 0000000..b8b5bc5 --- /dev/null +++ b/internal/ceres/small_blas_test.cc @@ -0,0 +1,303 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2013 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: keir@google.com (Keir Mierle) + +#include "ceres/small_blas.h" + +#include "gtest/gtest.h" +#include "ceres/internal/eigen.h" + +namespace ceres { +namespace internal { + +TEST(BLAS, MatrixMatrixMultiply) { + const double kTolerance = 1e-16; + const int kRowA = 3; + const int kColA = 5; + Matrix A(kRowA, kColA); + A.setOnes(); + + const int kRowB = 5; + const int kColB = 7; + Matrix B(kRowB, kColB); + B.setOnes(); + + for (int row_stride_c = kRowA; row_stride_c < 3 * kRowA; ++row_stride_c) { + for (int col_stride_c = kColB; col_stride_c < 3 * kColB; ++col_stride_c) { + Matrix C(row_stride_c, col_stride_c); + C.setOnes(); + + Matrix C_plus = C; + Matrix C_minus = C; + Matrix C_assign = C; + + Matrix C_plus_ref = C; + Matrix C_minus_ref = C; + Matrix C_assign_ref = C; + for (int start_row_c = 0; start_row_c + kRowA < row_stride_c; ++start_row_c) { + for (int start_col_c = 0; start_col_c + kColB < col_stride_c; ++start_col_c) { + C_plus_ref.block(start_row_c, start_col_c, kRowA, kColB) += + A * B; + + MatrixMatrixMultiply( + A.data(), kRowA, kColA, + B.data(), kRowB, kColB, + C_plus.data(), start_row_c, start_col_c, row_stride_c, col_stride_c); + + EXPECT_NEAR((C_plus_ref - C_plus).norm(), 0.0, kTolerance) + << "C += A * B \n" + << "row_stride_c : " << row_stride_c << "\n" + << "col_stride_c : " << col_stride_c << "\n" + << "start_row_c : " << start_row_c << "\n" + << "start_col_c : " << start_col_c << "\n" + << "Cref : \n" << C_plus_ref << "\n" + << "C: \n" << C_plus; + + + C_minus_ref.block(start_row_c, start_col_c, kRowA, kColB) -= + A * B; + + MatrixMatrixMultiply( + A.data(), kRowA, kColA, + B.data(), kRowB, kColB, + C_minus.data(), start_row_c, start_col_c, row_stride_c, col_stride_c); + + EXPECT_NEAR((C_minus_ref - C_minus).norm(), 0.0, kTolerance) + << "C -= A * B \n" + << "row_stride_c : " << row_stride_c << "\n" + << "col_stride_c : " << col_stride_c << "\n" + << "start_row_c : " << start_row_c << "\n" + << "start_col_c : " << start_col_c << "\n" + << "Cref : \n" << C_minus_ref << "\n" + << "C: \n" << C_minus; + + C_assign_ref.block(start_row_c, start_col_c, kRowA, kColB) = + A * B; + + MatrixMatrixMultiply( + A.data(), kRowA, kColA, + B.data(), kRowB, kColB, + C_assign.data(), start_row_c, start_col_c, row_stride_c, col_stride_c); + + EXPECT_NEAR((C_assign_ref - C_assign).norm(), 0.0, kTolerance) + << "C = A * B \n" + << "row_stride_c : " << row_stride_c << "\n" + << "col_stride_c : " << col_stride_c << "\n" + << "start_row_c : " << start_row_c << "\n" + << "start_col_c : " << start_col_c << "\n" + << "Cref : \n" << C_assign_ref << "\n" + << "C: \n" << C_assign; + } + } + } + } +} + +TEST(BLAS, MatrixTransposeMatrixMultiply) { + const double kTolerance = 1e-16; + const int kRowA = 5; + const int kColA = 3; + Matrix A(kRowA, kColA); + A.setOnes(); + + const int kRowB = 5; + const int kColB = 7; + Matrix B(kRowB, kColB); + B.setOnes(); + + for (int row_stride_c = kColA; row_stride_c < 3 * kColA; ++row_stride_c) { + for (int col_stride_c = kColB; col_stride_c < 3 * kColB; ++col_stride_c) { + Matrix C(row_stride_c, col_stride_c); + C.setOnes(); + + Matrix C_plus = C; + Matrix C_minus = C; + Matrix C_assign = C; + + Matrix C_plus_ref = C; + Matrix C_minus_ref = C; + Matrix C_assign_ref = C; + for (int start_row_c = 0; start_row_c + kColA < row_stride_c; ++start_row_c) { + for (int start_col_c = 0; start_col_c + kColB < col_stride_c; ++start_col_c) { + C_plus_ref.block(start_row_c, start_col_c, kColA, kColB) += + A.transpose() * B; + + MatrixTransposeMatrixMultiply( + A.data(), kRowA, kColA, + B.data(), kRowB, kColB, + C_plus.data(), start_row_c, start_col_c, row_stride_c, col_stride_c); + + EXPECT_NEAR((C_plus_ref - C_plus).norm(), 0.0, kTolerance) + << "C += A' * B \n" + << "row_stride_c : " << row_stride_c << "\n" + << "col_stride_c : " << col_stride_c << "\n" + << "start_row_c : " << start_row_c << "\n" + << "start_col_c : " << start_col_c << "\n" + << "Cref : \n" << C_plus_ref << "\n" + << "C: \n" << C_plus; + + C_minus_ref.block(start_row_c, start_col_c, kColA, kColB) -= + A.transpose() * B; + + MatrixTransposeMatrixMultiply( + A.data(), kRowA, kColA, + B.data(), kRowB, kColB, + C_minus.data(), start_row_c, start_col_c, row_stride_c, col_stride_c); + + EXPECT_NEAR((C_minus_ref - C_minus).norm(), 0.0, kTolerance) + << "C -= A' * B \n" + << "row_stride_c : " << row_stride_c << "\n" + << "col_stride_c : " << col_stride_c << "\n" + << "start_row_c : " << start_row_c << "\n" + << "start_col_c : " << start_col_c << "\n" + << "Cref : \n" << C_minus_ref << "\n" + << "C: \n" << C_minus; + + C_assign_ref.block(start_row_c, start_col_c, kColA, kColB) = + A.transpose() * B; + + MatrixTransposeMatrixMultiply( + A.data(), kRowA, kColA, + B.data(), kRowB, kColB, + C_assign.data(), start_row_c, start_col_c, row_stride_c, col_stride_c); + + EXPECT_NEAR((C_assign_ref - C_assign).norm(), 0.0, kTolerance) + << "C = A' * B \n" + << "row_stride_c : " << row_stride_c << "\n" + << "col_stride_c : " << col_stride_c << "\n" + << "start_row_c : " << start_row_c << "\n" + << "start_col_c : " << start_col_c << "\n" + << "Cref : \n" << C_assign_ref << "\n" + << "C: \n" << C_assign; + } + } + } + } +} + +TEST(BLAS, MatrixVectorMultiply) { + const double kTolerance = 1e-16; + const int kRowA = 5; + const int kColA = 3; + Matrix A(kRowA, kColA); + A.setOnes(); + + Vector b(kColA); + b.setOnes(); + + Vector c(kRowA); + c.setOnes(); + + Vector c_plus = c; + Vector c_minus = c; + Vector c_assign = c; + + Vector c_plus_ref = c; + Vector c_minus_ref = c; + Vector c_assign_ref = c; + + c_plus_ref += A * b; + MatrixVectorMultiply(A.data(), kRowA, kColA, + b.data(), + c_plus.data()); + EXPECT_NEAR((c_plus_ref - c_plus).norm(), 0.0, kTolerance) + << "c += A * b \n" + << "c_ref : \n" << c_plus_ref << "\n" + << "c: \n" << c_plus; + + c_minus_ref -= A * b; + MatrixVectorMultiply(A.data(), kRowA, kColA, + b.data(), + c_minus.data()); + EXPECT_NEAR((c_minus_ref - c_minus).norm(), 0.0, kTolerance) + << "c += A * b \n" + << "c_ref : \n" << c_minus_ref << "\n" + << "c: \n" << c_minus; + + c_assign_ref = A * b; + MatrixVectorMultiply(A.data(), kRowA, kColA, + b.data(), + c_assign.data()); + EXPECT_NEAR((c_assign_ref - c_assign).norm(), 0.0, kTolerance) + << "c += A * b \n" + << "c_ref : \n" << c_assign_ref << "\n" + << "c: \n" << c_assign; +} + +TEST(BLAS, MatrixTransposeVectorMultiply) { + const double kTolerance = 1e-16; + const int kRowA = 5; + const int kColA = 3; + Matrix A(kRowA, kColA); + A.setRandom(); + + Vector b(kRowA); + b.setRandom(); + + Vector c(kColA); + c.setOnes(); + + Vector c_plus = c; + Vector c_minus = c; + Vector c_assign = c; + + Vector c_plus_ref = c; + Vector c_minus_ref = c; + Vector c_assign_ref = c; + + c_plus_ref += A.transpose() * b; + MatrixTransposeVectorMultiply(A.data(), kRowA, kColA, + b.data(), + c_plus.data()); + EXPECT_NEAR((c_plus_ref - c_plus).norm(), 0.0, kTolerance) + << "c += A' * b \n" + << "c_ref : \n" << c_plus_ref << "\n" + << "c: \n" << c_plus; + + c_minus_ref -= A.transpose() * b; + MatrixTransposeVectorMultiply(A.data(), kRowA, kColA, + b.data(), + c_minus.data()); + EXPECT_NEAR((c_minus_ref - c_minus).norm(), 0.0, kTolerance) + << "c += A' * b \n" + << "c_ref : \n" << c_minus_ref << "\n" + << "c: \n" << c_minus; + + c_assign_ref = A.transpose() * b; + MatrixTransposeVectorMultiply(A.data(), kRowA, kColA, + b.data(), + c_assign.data()); + EXPECT_NEAR((c_assign_ref - c_assign).norm(), 0.0, kTolerance) + << "c += A' * b \n" + << "c_ref : \n" << c_assign_ref << "\n" + << "c: \n" << c_assign; +} + +} // namespace internal +} // namespace ceres diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc index 5d8447d..3b67746 100644 --- a/internal/ceres/solver.cc +++ b/internal/ceres/solver.cc @@ -120,7 +120,8 @@ Solver::Summary::Summary() inner_iterations_used(false), preconditioner_type(IDENTITY), trust_region_strategy_type(LEVENBERG_MARQUARDT), - sparse_linear_algebra_library(SUITE_SPARSE), + dense_linear_algebra_library_type(EIGEN), + sparse_linear_algebra_library_type(SUITE_SPARSE), line_search_direction_type(LBFGS), line_search_type(ARMIJO) { } @@ -192,6 +193,15 @@ string Solver::Summary::FullReport() const { // TRUST_SEARCH HEADER StringAppendF(&report, "\nMinimizer %19s\n", "TRUST_REGION"); + + if (linear_solver_type_used == DENSE_NORMAL_CHOLESKY || + linear_solver_type_used == DENSE_SCHUR || + linear_solver_type_used == DENSE_QR) { + StringAppendF(&report, "\nDense linear algebra library %15s\n", + DenseLinearAlgebraLibraryTypeToString( + dense_linear_algebra_library_type)); + } + if (linear_solver_type_used == SPARSE_NORMAL_CHOLESKY || linear_solver_type_used == SPARSE_SCHUR || (linear_solver_type_used == ITERATIVE_SCHUR && @@ -199,7 +209,7 @@ string Solver::Summary::FullReport() const { preconditioner_type == CLUSTER_TRIDIAGONAL))) { StringAppendF(&report, "\nSparse linear algebra library %15s\n", SparseLinearAlgebraLibraryTypeToString( - sparse_linear_algebra_library)); + sparse_linear_algebra_library_type)); } StringAppendF(&report, "Trust region strategy %19s", diff --git a/internal/ceres/solver_impl.cc b/internal/ceres/solver_impl.cc index d6ef731..83faa05 100644 --- a/internal/ceres/solver_impl.cc +++ b/internal/ceres/solver_impl.cc @@ -317,6 +317,16 @@ void SolverImpl::LineSearchMinimize( void SolverImpl::Solve(const Solver::Options& options, ProblemImpl* problem_impl, Solver::Summary* summary) { + VLOG(2) << "Initial problem: " + << problem_impl->NumParameterBlocks() + << " parameter blocks, " + << problem_impl->NumParameters() + << " parameters, " + << problem_impl->NumResidualBlocks() + << " residual blocks, " + << problem_impl->NumResiduals() + << " residuals."; + if (options.minimizer_type == TRUST_REGION) { TrustRegionSolve(options, problem_impl, summary); } else { @@ -506,8 +516,10 @@ void SolverImpl::TrustRegionSolve(const Solver::Options& original_options, original_options.num_linear_solver_threads; summary->num_linear_solver_threads_used = options.num_linear_solver_threads; - summary->sparse_linear_algebra_library = - options.sparse_linear_algebra_library; + summary->dense_linear_algebra_library_type = + options.dense_linear_algebra_library_type; + summary->sparse_linear_algebra_library_type = + options.sparse_linear_algebra_library_type; summary->trust_region_strategy_type = options.trust_region_strategy_type; summary->dogleg_type = options.dogleg_type; @@ -1067,6 +1079,16 @@ Program* SolverImpl::CreateReducedProgram(Solver::Options* options, return NULL; } + VLOG(2) << "Reduced problem: " + << transformed_program->NumParameterBlocks() + << " parameter blocks, " + << transformed_program->NumParameters() + << " parameters, " + << transformed_program->NumResidualBlocks() + << " residual blocks, " + << transformed_program->NumResiduals() + << " residuals."; + if (transformed_program->NumParameterBlocks() == 0) { LOG(WARNING) << "No varying parameter blocks to optimize; " << "bailing early."; @@ -1092,7 +1114,7 @@ Program* SolverImpl::CreateReducedProgram(Solver::Options* options, if (IsSchurType(options->linear_solver_type)) { if (!ReorderProgramForSchurTypeLinearSolver( options->linear_solver_type, - options->sparse_linear_algebra_library, + options->sparse_linear_algebra_library_type, problem_impl->parameter_map(), linear_solver_ordering, transformed_program.get(), @@ -1104,7 +1126,7 @@ Program* SolverImpl::CreateReducedProgram(Solver::Options* options, if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY) { if (!ReorderProgramForSparseNormalCholesky( - options->sparse_linear_algebra_library, + options->sparse_linear_algebra_library_type, linear_solver_ordering, transformed_program.get(), error)) { @@ -1134,9 +1156,32 @@ LinearSolver* SolverImpl::CreateLinearSolver(Solver::Options* options, } } +#ifdef CERES_NO_LAPACK + if (options->linear_solver_type == DENSE_NORMAL_CHOLESKY && + options->dense_linear_algebra_library_type == LAPACK) { + *error = "Can't use DENSE_NORMAL_CHOLESKY with LAPACK because " + "LAPACK was not enabled when Ceres was built."; + return NULL; + } + + if (options->linear_solver_type == DENSE_QR && + options->dense_linear_algebra_library_type == LAPACK) { + *error = "Can't use DENSE_QR with LAPACK because " + "LAPACK was not enabled when Ceres was built."; + return NULL; + } + + if (options->linear_solver_type == DENSE_SCHUR && + options->dense_linear_algebra_library_type == LAPACK) { + *error = "Can't use DENSE_SCHUR with LAPACK because " + "LAPACK was not enabled when Ceres was built."; + return NULL; + } +#endif + #ifdef CERES_NO_SUITESPARSE if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY && - options->sparse_linear_algebra_library == SUITE_SPARSE) { + options->sparse_linear_algebra_library_type == SUITE_SPARSE) { *error = "Can't use SPARSE_NORMAL_CHOLESKY with SUITESPARSE because " "SuiteSparse was not enabled when Ceres was built."; return NULL; @@ -1157,7 +1202,7 @@ LinearSolver* SolverImpl::CreateLinearSolver(Solver::Options* options, #ifdef CERES_NO_CXSPARSE if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY && - options->sparse_linear_algebra_library == CX_SPARSE) { + options->sparse_linear_algebra_library_type == CX_SPARSE) { *error = "Can't use SPARSE_NORMAL_CHOLESKY with CXSPARSE because " "CXSparse was not enabled when Ceres was built."; return NULL; @@ -1194,8 +1239,10 @@ LinearSolver* SolverImpl::CreateLinearSolver(Solver::Options* options, options->max_linear_solver_iterations; linear_solver_options.type = options->linear_solver_type; linear_solver_options.preconditioner_type = options->preconditioner_type; - linear_solver_options.sparse_linear_algebra_library = - options->sparse_linear_algebra_library; + linear_solver_options.sparse_linear_algebra_library_type = + options->sparse_linear_algebra_library_type; + linear_solver_options.dense_linear_algebra_library_type = + options->dense_linear_algebra_library_type; linear_solver_options.use_postordering = options->use_postordering; // Ignore user's postordering preferences and force it to be true if @@ -1204,7 +1251,7 @@ LinearSolver* SolverImpl::CreateLinearSolver(Solver::Options* options, // done. #if !defined(CERES_NO_SUITESPARSE) && defined(CERES_NO_CAMD) if (IsSchurType(linear_solver_options.type) && - linear_solver_options.sparse_linear_algebra_library == SUITE_SPARSE) { + options->sparse_linear_algebra_library_type == SUITE_SPARSE) { linear_solver_options.use_postordering = true; } #endif @@ -1590,6 +1637,11 @@ bool SolverImpl::ReorderProgramForSchurTypeLinearSolver( parameter_blocks[i]->mutable_user_state())); } + // Renumber the entries of constraints to be contiguous integers + // as camd requires that the group ids be in the range [0, + // parameter_blocks.size() - 1]. + SolverImpl::CompactifyArray(&constraints); + // Set the offsets and index for CreateJacobianSparsityTranspose. program->SetParameterOffsetsAndIndex(); // Compute a block sparse presentation of J'. @@ -1713,5 +1765,20 @@ bool SolverImpl::ReorderProgramForSparseNormalCholesky( return true; } +void SolverImpl::CompactifyArray(vector* array_ptr) { + vector& array = *array_ptr; + const set unique_group_ids(array.begin(), array.end()); + map group_id_map; + for (set::const_iterator it = unique_group_ids.begin(); + it != unique_group_ids.end(); + ++it) { + InsertOrDie(&group_id_map, *it, group_id_map.size()); + } + + for (int i = 0; i < array.size(); ++i) { + array[i] = group_id_map[array[i]]; + } +} + } // namespace internal } // namespace ceres diff --git a/internal/ceres/solver_impl.h b/internal/ceres/solver_impl.h index ebfb813..2b7ca3e 100644 --- a/internal/ceres/solver_impl.h +++ b/internal/ceres/solver_impl.h @@ -208,6 +208,13 @@ class SolverImpl { ParameterBlockOrdering* parameter_block_ordering, Program* program, string* error); + + // array contains a list of (possibly repeating) non-negative + // integers. Let us assume that we have constructed another array + // `p` by sorting and uniqueing the entries of array. + // CompactifyArray replaces each entry in "array" with its position + // in `p`. + static void CompactifyArray(vector* array); }; } // namespace internal diff --git a/internal/ceres/solver_impl_test.cc b/internal/ceres/solver_impl_test.cc index d81858c..583ef4e 100644 --- a/internal/ceres/solver_impl_test.cc +++ b/internal/ceres/solver_impl_test.cc @@ -592,7 +592,7 @@ TEST(SolverImpl, CreateLinearSolverNormalOperation) { #ifndef CERES_NO_SUITESPARSE options.linear_solver_type = SPARSE_NORMAL_CHOLESKY; - options.sparse_linear_algebra_library = SUITE_SPARSE; + options.sparse_linear_algebra_library_type = SUITE_SPARSE; solver.reset(SolverImpl::CreateLinearSolver(&options, &error)); EXPECT_EQ(options.linear_solver_type, SPARSE_NORMAL_CHOLESKY); EXPECT_TRUE(solver.get() != NULL); @@ -600,7 +600,7 @@ TEST(SolverImpl, CreateLinearSolverNormalOperation) { #ifndef CERES_NO_CXSPARSE options.linear_solver_type = SPARSE_NORMAL_CHOLESKY; - options.sparse_linear_algebra_library = CX_SPARSE; + options.sparse_linear_algebra_library_type = CX_SPARSE; solver.reset(SolverImpl::CreateLinearSolver(&options, &error)); EXPECT_EQ(options.linear_solver_type, SPARSE_NORMAL_CHOLESKY); EXPECT_TRUE(solver.get() != NULL); @@ -991,5 +991,52 @@ TEST(SolverImpl, ReallocationInCreateJacobianBlockSparsityTranspose) { EXPECT_EQ((expected_dense_jacobian - actual_dense_jacobian).norm(), 0.0); } +TEST(CompactifyArray, ContiguousEntries) { + vector array; + array.push_back(0); + array.push_back(1); + vector expected = array; + SolverImpl::CompactifyArray(&array); + EXPECT_EQ(array, expected); + array.clear(); + + array.push_back(1); + array.push_back(0); + expected = array; + SolverImpl::CompactifyArray(&array); + EXPECT_EQ(array, expected); +} + +TEST(CompactifyArray, NonContiguousEntries) { + vector array; + array.push_back(0); + array.push_back(2); + vector expected; + expected.push_back(0); + expected.push_back(1); + SolverImpl::CompactifyArray(&array); + EXPECT_EQ(array, expected); +} + +TEST(CompactifyArray, NonContiguousRepeatingEntries) { + vector array; + array.push_back(3); + array.push_back(1); + array.push_back(0); + array.push_back(0); + array.push_back(0); + array.push_back(5); + vector expected; + expected.push_back(2); + expected.push_back(1); + expected.push_back(0); + expected.push_back(0); + expected.push_back(0); + expected.push_back(3); + + SolverImpl::CompactifyArray(&array); + EXPECT_EQ(array, expected); +} + } // namespace internal } // namespace ceres diff --git a/internal/ceres/sparse_normal_cholesky_solver.cc b/internal/ceres/sparse_normal_cholesky_solver.cc index 9601142..f1a5237 100644 --- a/internal/ceres/sparse_normal_cholesky_solver.cc +++ b/internal/ceres/sparse_normal_cholesky_solver.cc @@ -36,11 +36,8 @@ #include #include -#ifndef CERES_NO_CXSPARSE -#include "cs.h" -#endif - #include "ceres/compressed_row_sparse_matrix.h" +#include "ceres/cxsparse.h" #include "ceres/internal/eigen.h" #include "ceres/internal/scoped_ptr.h" #include "ceres/linear_solver.h" @@ -54,14 +51,9 @@ namespace internal { SparseNormalCholeskySolver::SparseNormalCholeskySolver( const LinearSolver::Options& options) - : options_(options) { -#ifndef CERES_NO_SUITESPARSE - factor_ = NULL; -#endif - -#ifndef CERES_NO_CXSPARSE - cxsparse_factor_ = NULL; -#endif // CERES_NO_CXSPARSE + : factor_(NULL), + cxsparse_factor_(NULL), + options_(options) { } SparseNormalCholeskySolver::~SparseNormalCholeskySolver() { @@ -85,18 +77,18 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImpl( const double* b, const LinearSolver::PerSolveOptions& per_solve_options, double * x) { - switch (options_.sparse_linear_algebra_library) { + switch (options_.sparse_linear_algebra_library_type) { case SUITE_SPARSE: return SolveImplUsingSuiteSparse(A, b, per_solve_options, x); case CX_SPARSE: return SolveImplUsingCXSparse(A, b, per_solve_options, x); default: LOG(FATAL) << "Unknown sparse linear algebra library : " - << options_.sparse_linear_algebra_library; + << options_.sparse_linear_algebra_library_type; } LOG(FATAL) << "Unknown sparse linear algebra library : " - << options_.sparse_linear_algebra_library; + << options_.sparse_linear_algebra_library_type; return LinearSolver::Summary(); } diff --git a/internal/ceres/sparse_normal_cholesky_solver.h b/internal/ceres/sparse_normal_cholesky_solver.h index ebb32e6..61111b4 100644 --- a/internal/ceres/sparse_normal_cholesky_solver.h +++ b/internal/ceres/sparse_normal_cholesky_solver.h @@ -73,17 +73,13 @@ class SparseNormalCholeskySolver : public CompressedRowSparseMatrixSolver { const LinearSolver::PerSolveOptions& options, double* x); -#ifndef CERES_NO_SUITESPARSE SuiteSparse ss_; // Cached factorization cholmod_factor* factor_; -#endif // CERES_NO_SUITESPARSE -#ifndef CERES_NO_CXSPARSE CXSparse cxsparse_; // Cached factorization cs_dis* cxsparse_factor_; -#endif // CERES_NO_CXSPARSE const LinearSolver::Options options_; CERES_DISALLOW_COPY_AND_ASSIGN(SparseNormalCholeskySolver); diff --git a/internal/ceres/suitesparse.h b/internal/ceres/suitesparse.h index 8a5b0a8..16f298e 100644 --- a/internal/ceres/suitesparse.h +++ b/internal/ceres/suitesparse.h @@ -43,6 +43,7 @@ #include "ceres/internal/port.h" #include "cholmod.h" #include "glog/logging.h" +#include "SuiteSparseQR.hpp" // Before SuiteSparse version 4.2.0, cholmod_camd was only enabled // if SuiteSparse was compiled with Metis support. This makes @@ -58,6 +59,14 @@ #define CERES_NO_CAMD #endif +// UF_long is deprecated but SuiteSparse_long is only available in +// newer versions of SuiteSparse. So for older versions of +// SuiteSparse, we define SuiteSparse_long to be the same as UF_long, +// which is what recent versions of SuiteSparse do anyways. +#ifndef SuiteSparse_long +#define SuiteSparse_long UF_long +#endif + namespace ceres { namespace internal { @@ -261,6 +270,11 @@ class SuiteSparse { } // namespace internal } // namespace ceres +#else // CERES_NO_SUITESPARSE + +class SuiteSparse {}; +typedef void cholmod_factor; + #endif // CERES_NO_SUITESPARSE #endif // CERES_INTERNAL_SUITESPARSE_H_ diff --git a/internal/ceres/system_test.cc b/internal/ceres/system_test.cc index 095b51e..7b0e02d 100644 --- a/internal/ceres/system_test.cc +++ b/internal/ceres/system_test.cc @@ -64,21 +64,21 @@ const bool kUserOrdering = false; // Struct used for configuring the solver. struct SolverConfig { SolverConfig(LinearSolverType linear_solver_type, - SparseLinearAlgebraLibraryType sparse_linear_algebra_library, + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type, bool use_automatic_ordering) : linear_solver_type(linear_solver_type), - sparse_linear_algebra_library(sparse_linear_algebra_library), + sparse_linear_algebra_library_type(sparse_linear_algebra_library_type), use_automatic_ordering(use_automatic_ordering), preconditioner_type(IDENTITY), num_threads(1) { } SolverConfig(LinearSolverType linear_solver_type, - SparseLinearAlgebraLibraryType sparse_linear_algebra_library, + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type, bool use_automatic_ordering, PreconditionerType preconditioner_type) : linear_solver_type(linear_solver_type), - sparse_linear_algebra_library(sparse_linear_algebra_library), + sparse_linear_algebra_library_type(sparse_linear_algebra_library_type), use_automatic_ordering(use_automatic_ordering), preconditioner_type(preconditioner_type), num_threads(1) { @@ -88,14 +88,14 @@ struct SolverConfig { return StringPrintf( "(%s, %s, %s, %s, %d)", LinearSolverTypeToString(linear_solver_type), - SparseLinearAlgebraLibraryTypeToString(sparse_linear_algebra_library), + SparseLinearAlgebraLibraryTypeToString(sparse_linear_algebra_library_type), use_automatic_ordering ? "AUTOMATIC" : "USER", PreconditionerTypeToString(preconditioner_type), num_threads); } LinearSolverType linear_solver_type; - SparseLinearAlgebraLibraryType sparse_linear_algebra_library; + SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type; bool use_automatic_ordering; PreconditionerType preconditioner_type; int num_threads; @@ -130,8 +130,8 @@ void RunSolversAndCheckTheyMatch(const vector& configurations, Solver::Options& options = *(system_test_problem->mutable_solver_options()); options.linear_solver_type = config.linear_solver_type; - options.sparse_linear_algebra_library = - config.sparse_linear_algebra_library; + options.sparse_linear_algebra_library_type = + config.sparse_linear_algebra_library_type; options.preconditioner_type = config.preconditioner_type; options.num_threads = config.num_threads; options.num_linear_solver_threads = config.num_threads; @@ -281,9 +281,9 @@ class PowellsFunction { TEST(SystemTest, PowellsFunction) { vector configs; -#define CONFIGURE(linear_solver, sparse_linear_algebra_library, ordering) \ - configs.push_back(SolverConfig(linear_solver, \ - sparse_linear_algebra_library, \ +#define CONFIGURE(linear_solver, sparse_linear_algebra_library_type, ordering) \ + configs.push_back(SolverConfig(linear_solver, \ + sparse_linear_algebra_library_type, \ ordering)) CONFIGURE(DENSE_QR, SUITE_SPARSE, kAutomaticOrdering); @@ -485,9 +485,9 @@ class BundleAdjustmentProblem { TEST(SystemTest, BundleAdjustmentProblem) { vector configs; -#define CONFIGURE(linear_solver, sparse_linear_algebra_library, ordering, preconditioner) \ +#define CONFIGURE(linear_solver, sparse_linear_algebra_library_type, ordering, preconditioner) \ configs.push_back(SolverConfig(linear_solver, \ - sparse_linear_algebra_library, \ + sparse_linear_algebra_library_type, \ ordering, \ preconditioner)) diff --git a/internal/ceres/trust_region_minimizer.cc b/internal/ceres/trust_region_minimizer.cc index d6ae0ab..03d6c8e 100644 --- a/internal/ceres/trust_region_minimizer.cc +++ b/internal/ceres/trust_region_minimizer.cc @@ -177,6 +177,7 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options, int num_consecutive_invalid_steps = 0; bool inner_iterations_are_enabled = options.inner_iteration_minimizer != NULL; while (true) { + bool inner_iterations_were_useful = false; if (!RunCallbacks(options.callbacks, iteration_summary, summary)) { return; } @@ -240,8 +241,8 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options, // = -f'J * step - step' * J' * J * step / 2 model_residuals.setZero(); jacobian->RightMultiply(trust_region_step.data(), model_residuals.data()); - model_cost_change = -(residuals.dot(model_residuals) + - model_residuals.squaredNorm() / 2.0); + model_cost_change = + - model_residuals.dot(residuals + model_residuals / 2.0); if (model_cost_change < 0.0) { VLOG(1) << "Invalid step: current_cost: " << cost @@ -330,11 +331,13 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options, << " x_plus_delta_cost: " << x_plus_delta_cost << " new_cost: " << new_cost; const double inner_iteration_relative_progress = - (x_plus_delta_cost - new_cost) / x_plus_delta_cost; + 1.0 - new_cost / x_plus_delta_cost; inner_iterations_are_enabled = (inner_iteration_relative_progress > options.inner_iteration_tolerance); + inner_iterations_were_useful = new_cost < cost; + // Disable inner iterations once the relative improvement // drops below tolerance. if (!inner_iterations_are_enabled) { @@ -398,13 +401,51 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options, ? max(relative_decrease, historical_relative_decrease) : relative_decrease; + // Normally, the quality of a trust region step is measured by + // the ratio + // + // cost_change + // r = ----------------- + // model_cost_change + // + // All the change in the nonlinear objective is due to the trust + // region step so this ratio is a good measure of the quality of + // the trust region radius. However, when inner iterations are + // being used, cost_change includes the contribution of the + // inner iterations and its not fair to credit it all to the + // trust region algorithm. So we change the ratio to be + // + // cost_change + // r = ------------------------------------------------ + // (model_cost_change + inner_iteration_cost_change) + // + // In most cases this is fine, but it can be the case that the + // change in solution quality due to inner iterations is so large + // and the trust region step is so bad, that this ratio can become + // quite small. + // + // This can cause the trust region loop to reject this step. To + // get around this, we expicitly check if the inner iterations + // led to a net decrease in the objective function value. If + // they did, we accept the step even if the trust region ratio + // is small. + // + // Notice that we do not just check that cost_change is positive + // which is a weaker condition and would render the + // min_relative_decrease threshold useless. Instead, we keep + // track of inner_iterations_were_useful, which is true only + // when inner iterations lead to a net decrease in the cost. iteration_summary.step_is_successful = - iteration_summary.relative_decrease > options_.min_relative_decrease; + (inner_iterations_were_useful || + iteration_summary.relative_decrease > + options_.min_relative_decrease); if (iteration_summary.step_is_successful) { accumulated_candidate_model_cost_change += model_cost_change; accumulated_reference_model_cost_change += model_cost_change; - if (relative_decrease <= options_.min_relative_decrease) { + + if (!inner_iterations_were_useful && + relative_decrease <= options_.min_relative_decrease) { iteration_summary.step_is_nonmonotonic = true; VLOG(2) << "Non-monotonic step! " << " relative_decrease: " << relative_decrease diff --git a/internal/ceres/types.cc b/internal/ceres/types.cc index 164185e..a97f1a5 100644 --- a/internal/ceres/types.cc +++ b/internal/ceres/types.cc @@ -101,7 +101,6 @@ const char* SparseLinearAlgebraLibraryTypeToString( } } - bool StringToSparseLinearAlgebraLibraryType( string value, SparseLinearAlgebraLibraryType* type) { @@ -111,6 +110,25 @@ bool StringToSparseLinearAlgebraLibraryType( return false; } +const char* DenseLinearAlgebraLibraryTypeToString( + DenseLinearAlgebraLibraryType type) { + switch (type) { + CASESTR(EIGEN); + CASESTR(LAPACK); + default: + return "UNKNOWN"; + } +} + +bool StringToDenseLinearAlgebraLibraryType( + string value, + DenseLinearAlgebraLibraryType* type) { + UpperCase(&value); + STRENUM(EIGEN); + STRENUM(LAPACK); + return false; +} + const char* TrustRegionStrategyTypeToString(TrustRegionStrategyType type) { switch (type) { CASESTR(LEVENBERG_MARQUARDT); @@ -318,4 +336,21 @@ bool IsSparseLinearAlgebraLibraryTypeAvailable( return false; } +bool IsDenseLinearAlgebraLibraryTypeAvailable( + DenseLinearAlgebraLibraryType type) { + if (type == EIGEN) { + return true; + } + if (type == LAPACK) { +#ifdef CERES_NO_LAPACK + return false; +#else + return true; +#endif + } + + LOG(WARNING) << "Unknown dense linear algebra library " << type; + return false; +} + } // namespace ceres diff --git a/internal/ceres/unsymmetric_linear_solver_test.cc b/internal/ceres/unsymmetric_linear_solver_test.cc index 232f34c..af9dffe 100644 --- a/internal/ceres/unsymmetric_linear_solver_test.cc +++ b/internal/ceres/unsymmetric_linear_solver_test.cc @@ -118,23 +118,41 @@ class UnsymmetricLinearSolverTest : public ::testing::Test { scoped_array sol_regularized_; }; -TEST_F(UnsymmetricLinearSolverTest, DenseQR) { +TEST_F(UnsymmetricLinearSolverTest, EigenDenseQR) { LinearSolver::Options options; options.type = DENSE_QR; + options.dense_linear_algebra_library_type = EIGEN; TestSolver(options); } -TEST_F(UnsymmetricLinearSolverTest, DenseNormalCholesky) { +TEST_F(UnsymmetricLinearSolverTest, EigenDenseNormalCholesky) { LinearSolver::Options options; + options.dense_linear_algebra_library_type = EIGEN; options.type = DENSE_NORMAL_CHOLESKY; TestSolver(options); } +#ifndef CERES_NO_LAPACK +TEST_F(UnsymmetricLinearSolverTest, LAPACKDenseQR) { + LinearSolver::Options options; + options.type = DENSE_QR; + options.dense_linear_algebra_library_type = LAPACK; + TestSolver(options); +} + +TEST_F(UnsymmetricLinearSolverTest, LAPACKDenseNormalCholesky) { + LinearSolver::Options options; + options.dense_linear_algebra_library_type = LAPACK; + options.type = DENSE_NORMAL_CHOLESKY; + TestSolver(options); +} +#endif + #ifndef CERES_NO_SUITESPARSE TEST_F(UnsymmetricLinearSolverTest, SparseNormalCholeskyUsingSuiteSparsePreOrdering) { LinearSolver::Options options; - options.sparse_linear_algebra_library = SUITE_SPARSE; + options.sparse_linear_algebra_library_type = SUITE_SPARSE; options.type = SPARSE_NORMAL_CHOLESKY; options.use_postordering = false; TestSolver(options); @@ -143,7 +161,7 @@ TEST_F(UnsymmetricLinearSolverTest, TEST_F(UnsymmetricLinearSolverTest, SparseNormalCholeskyUsingSuiteSparsePostOrdering) { LinearSolver::Options options; - options.sparse_linear_algebra_library = SUITE_SPARSE; + options.sparse_linear_algebra_library_type = SUITE_SPARSE; options.type = SPARSE_NORMAL_CHOLESKY; options.use_postordering = true; TestSolver(options); @@ -154,7 +172,7 @@ TEST_F(UnsymmetricLinearSolverTest, TEST_F(UnsymmetricLinearSolverTest, SparseNormalCholeskyUsingCXSparsePreOrdering) { LinearSolver::Options options; - options.sparse_linear_algebra_library = CX_SPARSE; + options.sparse_linear_algebra_library_type = CX_SPARSE; options.type = SPARSE_NORMAL_CHOLESKY; options.use_postordering = false; TestSolver(options); @@ -163,7 +181,7 @@ TEST_F(UnsymmetricLinearSolverTest, TEST_F(UnsymmetricLinearSolverTest, SparseNormalCholeskyUsingCXSparsePostOrdering) { LinearSolver::Options options; - options.sparse_linear_algebra_library = CX_SPARSE; + options.sparse_linear_algebra_library_type = CX_SPARSE; options.type = SPARSE_NORMAL_CHOLESKY; options.use_postordering = true; TestSolver(options); diff --git a/internal/ceres/visibility.cc b/internal/ceres/visibility.cc index fcd793c..acfa45b 100644 --- a/internal/ceres/visibility.cc +++ b/internal/ceres/visibility.cc @@ -153,4 +153,4 @@ Graph* CreateSchurComplementGraph(const vector >& visibility) { } // namespace internal } // namespace ceres -#endif +#endif // CERES_NO_SUITESPARSE diff --git a/internal/ceres/visibility_based_preconditioner_test.cc b/internal/ceres/visibility_based_preconditioner_test.cc index 53d10e1..2edbb18 100644 --- a/internal/ceres/visibility_based_preconditioner_test.cc +++ b/internal/ceres/visibility_based_preconditioner_test.cc @@ -279,7 +279,7 @@ namespace internal { // preconditioner_->RightMultiply(x.data(), y.data()); // z = full_schur_complement // .selfadjointView() -// .ldlt().solve(x); +// .llt().solve(x); // double max_relative_difference = // ((y - z).array() / z.array()).matrix().lpNorm(); // EXPECT_NEAR(max_relative_difference, 0.0, kTolerance); -- cgit v1.2.3