From 399f7d09e0c45af54b77b4ab9508d6f23759b927 Mon Sep 17 00:00:00 2001
From: Scott Ettinger <settinger@google.com>
Date: Mon, 9 Sep 2013 12:54:43 -0700
Subject: Update Ceres to the latest version Bug: 10673139 Bug: 10621282

Change-Id: Ib740a6e0e29049cc203da9f083b0d4f5734a2741
---
 internal/ceres/CMakeLists.txt                      |  54 +--
 internal/ceres/blas.cc                             |  78 ++++
 internal/ceres/blas.h                              | 379 +------------------
 internal/ceres/block_jacobi_preconditioner.cc      |   2 +-
 internal/ceres/block_sparse_matrix.cc              |   2 +-
 internal/ceres/c_api.cc                            |   5 +-
 .../ceres/compressed_col_sparse_matrix_utils.h     |  75 ++++
 .../compressed_col_sparse_matrix_utils_test.cc     |  87 +++++
 internal/ceres/covariance_impl.cc                  |  92 +++--
 internal/ceres/covariance_test.cc                  |   7 +
 internal/ceres/cxsparse.h                          |   5 +
 internal/ceres/dense_normal_cholesky_solver.cc     |  69 +++-
 internal/ceres/dense_normal_cholesky_solver.h      |  12 +
 internal/ceres/dense_qr_solver.cc                  |  81 +++-
 internal/ceres/dense_qr_solver.h                   |  14 +
 internal/ceres/implicit_schur_complement.cc        |   2 +-
 internal/ceres/implicit_schur_complement_test.cc   |   4 +-
 .../ceres/iterative_schur_complement_solver.cc     |  23 +-
 .../iterative_schur_complement_solver_test.cc      |  15 +-
 internal/ceres/lapack.cc                           | 157 ++++++++
 internal/ceres/lapack.h                            |  88 +++++
 internal/ceres/line_search.cc                      |   2 +-
 internal/ceres/line_search.h                       |   2 +-
 internal/ceres/linear_solver.h                     |   6 +-
 internal/ceres/miniglog/glog/logging.cc            |  39 --
 internal/ceres/miniglog/glog/logging.h             | 274 +++++++-------
 internal/ceres/partitioned_matrix_view.cc          |   2 +-
 internal/ceres/preconditioner.h                    |   4 +-
 internal/ceres/program_evaluator.h                 |   2 +-
 internal/ceres/residual_block.cc                   |   3 +-
 internal/ceres/schur_complement_solver.cc          |  46 +--
 internal/ceres/schur_complement_solver.h           |   4 -
 internal/ceres/schur_complement_solver_test.cc     |  58 ++-
 internal/ceres/schur_eliminator_impl.h             |   3 +-
 internal/ceres/schur_eliminator_test.cc            |   6 +-
 internal/ceres/schur_jacobi_preconditioner.cc      |   2 +-
 internal/ceres/small_blas.h                        | 406 +++++++++++++++++++++
 internal/ceres/small_blas_test.cc                  | 303 +++++++++++++++
 internal/ceres/solver.cc                           |  14 +-
 internal/ceres/solver_impl.cc                      |  85 ++++-
 internal/ceres/solver_impl.h                       |   7 +
 internal/ceres/solver_impl_test.cc                 |  51 ++-
 internal/ceres/sparse_normal_cholesky_solver.cc    |  22 +-
 internal/ceres/sparse_normal_cholesky_solver.h     |   4 -
 internal/ceres/suitesparse.h                       |  14 +
 internal/ceres/system_test.cc                      |  26 +-
 internal/ceres/trust_region_minimizer.cc           |  51 ++-
 internal/ceres/types.cc                            |  37 +-
 internal/ceres/unsymmetric_linear_solver_test.cc   |  30 +-
 internal/ceres/visibility.cc                       |   2 +-
 .../ceres/visibility_based_preconditioner_test.cc  |   2 +-
 51 files changed, 2007 insertions(+), 751 deletions(-)
 create mode 100644 internal/ceres/blas.cc
 create mode 100644 internal/ceres/lapack.cc
 create mode 100644 internal/ceres/lapack.h
 delete mode 100644 internal/ceres/miniglog/glog/logging.cc
 create mode 100644 internal/ceres/small_blas.h
 create mode 100644 internal/ceres/small_blas_test.cc

(limited to 'internal/ceres')

diff --git a/internal/ceres/CMakeLists.txt b/internal/ceres/CMakeLists.txt
index 9e2e1ae..610e816 100644
--- a/internal/ceres/CMakeLists.txt
+++ b/internal/ceres/CMakeLists.txt
@@ -30,6 +30,7 @@
 
 SET(CERES_INTERNAL_SRC
     array_utils.cc
+    blas.cc
     block_evaluate_preparer.cc
     block_jacobi_preconditioner.cc
     block_jacobian_writer.cc
@@ -64,6 +65,7 @@ SET(CERES_INTERNAL_SRC
     incomplete_lq_factorization.cc
     iterative_schur_complement_solver.cc
     levenberg_marquardt_strategy.cc
+    lapack.cc
     line_search.cc
     line_search_direction.cc
     line_search_minimizer.cc
@@ -130,20 +132,14 @@ ELSE (SCHUR_SPECIALIZATIONS)
 ENDIF (SCHUR_SPECIALIZATIONS)
 
 # For Android, use the internal Glog implementation.
-IF (BUILD_ANDROID)
-  ADD_LIBRARY(miniglog STATIC
-              miniglog/glog/logging.cc)
-
-  # The Android logging library that defines e.g. __android_log_print is
-  # creatively named "log".
-  TARGET_LINK_LIBRARIES(miniglog log)
-
+IF (MINIGLOG)
+  ADD_LIBRARY(miniglog STATIC miniglog/glog/logging.cc)
   INSTALL(TARGETS miniglog
           EXPORT  CeresExport
           RUNTIME DESTINATION bin
           LIBRARY DESTINATION lib${LIB_SUFFIX}
           ARCHIVE DESTINATION lib${LIB_SUFFIX})
-ENDIF (BUILD_ANDROID)
+ENDIF (MINIGLOG)
 
 SET(CERES_LIBRARY_DEPENDENCIES ${GLOG_LIB})
 
@@ -166,18 +162,20 @@ IF (SUITESPARSE_FOUND)
     LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${METIS_LIB})
   ENDIF (EXISTS ${METIS_LIB})
 
-  LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${LAPACK_LIB})
-
-  IF (EXISTS ${BLAS_LIB})
-    LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${BLAS_LIB})
-  ENDIF (EXISTS ${BLAS_LIB})
-
   IF (TBB_FOUND)
     LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${TBB_LIB})
     LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${TBB_MALLOC_LIB})
   ENDIF (TBB_FOUND)
 ENDIF (SUITESPARSE_FOUND)
 
+IF (CXSPARSE_FOUND)
+  LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${CXSPARSE_LIB})
+ENDIF (CXSPARSE_FOUND)
+
+IF (BLAS_AND_LAPACK_FOUND)
+  LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${LAPACK_LIBRARIES})
+  LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${BLAS_LIBRARIES})
+ENDIF (BLAS_AND_LAPACK_FOUND)
 
 IF (CXSPARSE_FOUND)
   LIST(APPEND CERES_LIBRARY_DEPENDENCIES ${CXSPARSE_LIB})
@@ -194,7 +192,11 @@ SET(CERES_LIBRARY_SOURCE
     ${CERES_INTERNAL_HDRS}
     ${CERES_INTERNAL_SCHUR_FILES})
 
-ADD_LIBRARY(ceres STATIC ${CERES_LIBRARY_SOURCE})
+ADD_LIBRARY(ceres ${CERES_LIBRARY_SOURCE})
+SET_TARGET_PROPERTIES(ceres PROPERTIES
+  VERSION ${CERES_VERSION}
+  SOVERSION ${CERES_VERSION_MAJOR}
+)
 TARGET_LINK_LIBRARIES(ceres ${CERES_LIBRARY_DEPENDENCIES})
 
 INSTALL(TARGETS ceres
@@ -203,23 +205,6 @@ INSTALL(TARGETS ceres
         LIBRARY DESTINATION lib${LIB_SUFFIX}
         ARCHIVE DESTINATION lib${LIB_SUFFIX})
 
-# Don't build a DLL on MSVC. Supporting Ceres as a DLL on Windows involves
-# nontrivial changes that we haven't made yet.
-IF (NOT MSVC AND NOT BUILD_ANDROID AND BUILD_SHARED)
-  ADD_LIBRARY(ceres_shared SHARED ${CERES_LIBRARY_SOURCE})
-  TARGET_LINK_LIBRARIES(ceres_shared ${CERES_LIBRARY_DEPENDENCIES})
-  SET_TARGET_PROPERTIES(ceres_shared PROPERTIES
-                        VERSION ${CERES_VERSION}
-                        SOVERSION ${CERES_ABI_VERSION})
-
-  INSTALL(TARGETS ceres_shared
-          EXPORT  CeresExport
-          RUNTIME DESTINATION bin
-          LIBRARY DESTINATION lib${LIB_SUFFIX}
-          ARCHIVE DESTINATION lib${LIB_SUFFIX})
-
-ENDIF (NOT MSVC AND NOT BUILD_ANDROID AND BUILD_SHARED)
-
 IF (BUILD_TESTING AND GFLAGS)
   ADD_LIBRARY(gtest gmock_gtest_all.cc gmock_main.cc)
   ADD_LIBRARY(test_util
@@ -228,6 +213,7 @@ IF (BUILD_TESTING AND GFLAGS)
               test_util.cc)
 
   TARGET_LINK_LIBRARIES(gtest ${GFLAGS_LIB} ${GLOG_LIB})
+  TARGET_LINK_LIBRARIES(test_util ceres gtest ${GLOG_LIB})
 
   MACRO (CERES_TEST NAME)
     ADD_EXECUTABLE(${NAME}_test ${NAME}_test.cc)
@@ -242,7 +228,6 @@ IF (BUILD_TESTING AND GFLAGS)
   CERES_TEST(autodiff)
   CERES_TEST(autodiff_cost_function)
   CERES_TEST(autodiff_local_parameterization)
-  CERES_TEST(blas)
   CERES_TEST(block_random_access_crs_matrix)
   CERES_TEST(block_random_access_dense_matrix)
   CERES_TEST(block_random_access_sparse_matrix)
@@ -285,6 +270,7 @@ IF (BUILD_TESTING AND GFLAGS)
   CERES_TEST(runtime_numeric_diff_cost_function)
   CERES_TEST(schur_complement_solver)
   CERES_TEST(schur_eliminator)
+  CERES_TEST(small_blas)
   CERES_TEST(solver_impl)
 
   # TODO(sameeragarwal): This test should ultimately be made
diff --git a/internal/ceres/blas.cc b/internal/ceres/blas.cc
new file mode 100644
index 0000000..f79b1eb
--- /dev/null
+++ b/internal/ceres/blas.cc
@@ -0,0 +1,78 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+
+#include "ceres/blas.h"
+#include "glog/logging.h"
+
+extern "C" void dsyrk_(char* uplo,
+                       char* trans,
+                       int* n,
+                       int* k,
+                       double* alpha,
+                       double* a,
+                       int* lda,
+                       double* beta,
+                       double* c,
+                       int* ldc);
+
+namespace ceres {
+namespace internal {
+
+void BLAS::SymmetricRankKUpdate(int num_rows,
+                                int num_cols,
+                                const double* a,
+                                bool transpose,
+                                double alpha,
+                                double beta,
+                                double* c) {
+#ifdef CERES_NO_LAPACK
+  LOG(FATAL) << "Ceres was built without a BLAS library.";
+#else
+  char uplo = 'L';
+  char trans = transpose ? 'T' : 'N';
+  int n = transpose ? num_cols : num_rows;
+  int k = transpose ? num_rows : num_cols;
+  int lda = k;
+  int ldc = n;
+  dsyrk_(&uplo,
+         &trans,
+         &n,
+         &k,
+         &alpha,
+         const_cast<double*>(a),
+         &lda,
+         &beta,
+         c,
+         &ldc);
+#endif
+}
+
+}  // namespace internal
+}  // namespace ceres
diff --git a/internal/ceres/blas.h b/internal/ceres/blas.h
index 9629b3d..2ab6663 100644
--- a/internal/ceres/blas.h
+++ b/internal/ceres/blas.h
@@ -28,377 +28,28 @@
 //
 // Author: sameeragarwal@google.com (Sameer Agarwal)
 //
-// Simple blas functions for use in the Schur Eliminator. These are
-// fairly basic implementations which already yield a significant
-// speedup in the eliminator performance.
+// Wrapper functions around BLAS functions.
 
 #ifndef CERES_INTERNAL_BLAS_H_
 #define CERES_INTERNAL_BLAS_H_
 
-#include "ceres/internal/eigen.h"
-#include "glog/logging.h"
-
 namespace ceres {
 namespace internal {
 
-// Remove the ".noalias()" annotation from the matrix matrix
-// mutliplies to produce a correct build with the Android NDK,
-// including versions 6, 7, 8, and 8b, when built with STLPort and the
-// non-standalone toolchain (i.e. ndk-build). This appears to be a
-// compiler bug; if the workaround is not in place, the line
-//
-//   block.noalias() -= A * B;
-//
-// gets compiled to
-//
-//   block.noalias() += A * B;
-//
-// which breaks schur elimination. Introducing a temporary by removing the
-// .noalias() annotation causes the issue to disappear. Tracking this
-// issue down was tricky, since the test suite doesn't run when built with
-// the non-standalone toolchain.
-//
-// TODO(keir): Make a reproduction case for this and send it upstream.
-#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
-#define CERES_MAYBE_NOALIAS
-#else
-#define CERES_MAYBE_NOALIAS .noalias()
-#endif
-
-// The following three macros are used to share code and reduce
-// template junk across the various GEMM variants.
-#define CERES_GEMM_BEGIN(name)                                          \
-  template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>  \
-  inline void name(const double* A,                                     \
-                   const int num_row_a,                                 \
-                   const int num_col_a,                                 \
-                   const double* B,                                     \
-                   const int num_row_b,                                 \
-                   const int num_col_b,                                 \
-                   double* C,                                           \
-                   const int start_row_c,                               \
-                   const int start_col_c,                               \
-                   const int row_stride_c,                              \
-                   const int col_stride_c)
-
-#define CERES_GEMM_NAIVE_HEADER                                         \
-  DCHECK_GT(num_row_a, 0);                                              \
-  DCHECK_GT(num_col_a, 0);                                              \
-  DCHECK_GT(num_row_b, 0);                                              \
-  DCHECK_GT(num_col_b, 0);                                              \
-  DCHECK_GE(start_row_c, 0);                                            \
-  DCHECK_GE(start_col_c, 0);                                            \
-  DCHECK_GT(row_stride_c, 0);                                           \
-  DCHECK_GT(col_stride_c, 0);                                           \
-  DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));            \
-  DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));            \
-  DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b));            \
-  DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b));            \
-  const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);  \
-  const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);  \
-  const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b);  \
-  const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b);
-
-#define CERES_GEMM_EIGEN_HEADER                                         \
-  const typename EigenTypes<kRowA, kColA>::ConstMatrixRef               \
-  Aref(A, num_row_a, num_col_a);                                        \
-  const typename EigenTypes<kRowB, kColB>::ConstMatrixRef               \
-  Bref(B, num_row_b, num_col_b);                                        \
-  MatrixRef Cref(C, row_stride_c, col_stride_c);                        \
-
-#define CERES_CALL_GEMM(name)                                           \
-  name<kRowA, kColA, kRowB, kColB, kOperation>(                         \
-      A, num_row_a, num_col_a,                                          \
-      B, num_row_b, num_col_b,                                          \
-      C, start_row_c, start_col_c, row_stride_c, col_stride_c);
-
-
-// For the matrix-matrix functions below, there are three variants for
-// each functionality. Foo, FooNaive and FooEigen. Foo is the one to
-// be called by the user. FooNaive is a basic loop based
-// implementation and FooEigen uses Eigen's implementation. Foo
-// chooses between FooNaive and FooEigen depending on how many of the
-// template arguments are fixed at compile time. Currently, FooEigen
-// is called if all matrix dimensions are compile time
-// constants. FooNaive is called otherwise. This leads to the best
-// performance currently.
-//
-// The MatrixMatrixMultiply variants compute:
-//
-//   C op A * B;
-//
-// The MatrixTransposeMatrixMultiply variants compute:
-//
-//   C op A' * B
-//
-// where op can be +=, -=, or =.
-//
-// The template parameters (kRowA, kColA, kRowB, kColB) allow
-// specialization of the loop at compile time. If this information is
-// not available, then Eigen::Dynamic should be used as the template
-// argument.
-//
-//   kOperation =  1  -> C += A * B
-//   kOperation = -1  -> C -= A * B
-//   kOperation =  0  -> C  = A * B
-//
-// The functions can write into matrices C which are larger than the
-// matrix A * B. This is done by specifying the true size of C via
-// row_stride_c and col_stride_c, and then indicating where A * B
-// should be written into by start_row_c and start_col_c.
-//
-// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c =
-// 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get
-//
-//   ------------
-//   ------------
-//   ------------
-//   ------------
-//   -----xxxx---
-//   -----xxxx---
-//   -----xxxx---
-//   ------------
-//   ------------
-//   ------------
-//
-CERES_GEMM_BEGIN(MatrixMatrixMultiplyEigen) {
-  CERES_GEMM_EIGEN_HEADER
-  Eigen::Block<MatrixRef, kRowA, kColB>
-    block(Cref, start_row_c, start_col_c, num_row_a, num_col_b);
-
-  if (kOperation > 0) {
-    block CERES_MAYBE_NOALIAS += Aref * Bref;
-  } else if (kOperation < 0) {
-    block CERES_MAYBE_NOALIAS -= Aref * Bref;
-  } else {
-    block CERES_MAYBE_NOALIAS = Aref * Bref;
-  }
-}
-
-CERES_GEMM_BEGIN(MatrixMatrixMultiplyNaive) {
-  CERES_GEMM_NAIVE_HEADER
-  DCHECK_EQ(NUM_COL_A, NUM_ROW_B);
-
-  const int NUM_ROW_C = NUM_ROW_A;
-  const int NUM_COL_C = NUM_COL_B;
-  DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c);
-  DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c);
-
-  for (int row = 0; row < NUM_ROW_C; ++row) {
-    for (int col = 0; col < NUM_COL_C; ++col) {
-      double tmp = 0.0;
-      for (int k = 0; k < NUM_COL_A; ++k) {
-        tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col];
-      }
-
-      const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
-      if (kOperation > 0) {
-        C[index] += tmp;
-      } else if (kOperation < 0) {
-        C[index] -= tmp;
-      } else {
-        C[index] = tmp;
-      }
-    }
-  }
-}
-
-CERES_GEMM_BEGIN(MatrixMatrixMultiply) {
-#ifdef CERES_NO_CUSTOM_BLAS
-
-  CERES_CALL_GEMM(MatrixMatrixMultiplyEigen)
-  return;
-
-#else
-
-  if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
-      kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
-    CERES_CALL_GEMM(MatrixMatrixMultiplyEigen)
-  } else {
-    CERES_CALL_GEMM(MatrixMatrixMultiplyNaive)
-  }
-
-#endif
-}
-
-CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyEigen) {
-  CERES_GEMM_EIGEN_HEADER
-  Eigen::Block<MatrixRef, kColA, kColB> block(Cref,
-                                              start_row_c, start_col_c,
-                                              num_col_a, num_col_b);
-  if (kOperation > 0) {
-    block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref;
-  } else if (kOperation < 0) {
-    block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref;
-  } else {
-    block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref;
-  }
-}
-
-CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyNaive) {
-  CERES_GEMM_NAIVE_HEADER
-  DCHECK_EQ(NUM_ROW_A, NUM_ROW_B);
-
-  const int NUM_ROW_C = NUM_COL_A;
-  const int NUM_COL_C = NUM_COL_B;
-  DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c);
-  DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c);
-
-  for (int row = 0; row < NUM_ROW_C; ++row) {
-    for (int col = 0; col < NUM_COL_C; ++col) {
-      double tmp = 0.0;
-      for (int k = 0; k < NUM_ROW_A; ++k) {
-        tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col];
-      }
-
-      const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
-      if (kOperation > 0) {
-        C[index]+= tmp;
-      } else if (kOperation < 0) {
-        C[index]-= tmp;
-      } else {
-        C[index]= tmp;
-      }
-    }
-  }
-}
-
-CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiply) {
-#ifdef CERES_NO_CUSTOM_BLAS
-
-  CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen)
-  return;
-
-#else
-
-  if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
-      kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
-    CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen)
-  } else {
-    CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyNaive)
-  }
-
-#endif
-}
-
-// Matrix-Vector multiplication
-//
-// c op A * b;
-//
-// where op can be +=, -=, or =.
-//
-// The template parameters (kRowA, kColA) allow specialization of the
-// loop at compile time. If this information is not available, then
-// Eigen::Dynamic should be used as the template argument.
-//
-// kOperation =  1  -> c += A' * b
-// kOperation = -1  -> c -= A' * b
-// kOperation =  0  -> c  = A' * b
-template<int kRowA, int kColA, int kOperation>
-inline void MatrixVectorMultiply(const double* A,
-                                 const int num_row_a,
-                                 const int num_col_a,
-                                 const double* b,
-                                 double* c) {
-#ifdef CERES_NO_CUSTOM_BLAS
-  const typename EigenTypes<kRowA, kColA>::ConstMatrixRef
-      Aref(A, num_row_a, num_col_a);
-  const typename EigenTypes<kColA>::ConstVectorRef bref(b, num_col_a);
-  typename EigenTypes<kRowA>::VectorRef cref(c, num_row_a);
-
-  // lazyProduct works better than .noalias() for matrix-vector
-  // products.
-  if (kOperation > 0) {
-    cref += Aref.lazyProduct(bref);
-  } else if (kOperation < 0) {
-    cref -= Aref.lazyProduct(bref);
-  } else {
-    cref = Aref.lazyProduct(bref);
-  }
-#else
-
-  DCHECK_GT(num_row_a, 0);
-  DCHECK_GT(num_col_a, 0);
-  DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
-  DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
-
-  const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
-  const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
-
-  for (int row = 0; row < NUM_ROW_A; ++row) {
-    double tmp = 0.0;
-    for (int col = 0; col < NUM_COL_A; ++col) {
-      tmp += A[row * NUM_COL_A + col] * b[col];
-    }
-
-    if (kOperation > 0) {
-      c[row] += tmp;
-    } else if (kOperation < 0) {
-      c[row] -= tmp;
-    } else {
-      c[row] = tmp;
-    }
-  }
-#endif  // CERES_NO_CUSTOM_BLAS
-}
-
-// Similar to MatrixVectorMultiply, except that A is transposed, i.e.,
-//
-// c op A' * b;
-template<int kRowA, int kColA, int kOperation>
-inline void MatrixTransposeVectorMultiply(const double* A,
-                                          const int num_row_a,
-                                          const int num_col_a,
-                                          const double* b,
-                                          double* c) {
-#ifdef CERES_NO_CUSTOM_BLAS
-  const typename EigenTypes<kRowA, kColA>::ConstMatrixRef
-      Aref(A, num_row_a, num_col_a);
-  const typename EigenTypes<kRowA>::ConstVectorRef bref(b, num_row_a);
-  typename EigenTypes<kColA>::VectorRef cref(c, num_col_a);
-
-  // lazyProduct works better than .noalias() for matrix-vector
-  // products.
-  if (kOperation > 0) {
-    cref += Aref.transpose().lazyProduct(bref);
-  } else if (kOperation < 0) {
-    cref -= Aref.transpose().lazyProduct(bref);
-  } else {
-    cref = Aref.transpose().lazyProduct(bref);
-  }
-#else
-
-  DCHECK_GT(num_row_a, 0);
-  DCHECK_GT(num_col_a, 0);
-  DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
-  DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
-
-  const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
-  const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
-
-  for (int row = 0; row < NUM_COL_A; ++row) {
-    double tmp = 0.0;
-    for (int col = 0; col < NUM_ROW_A; ++col) {
-      tmp += A[col * NUM_COL_A + row] * b[col];
-    }
-
-    if (kOperation > 0) {
-      c[row] += tmp;
-    } else if (kOperation < 0) {
-      c[row] -= tmp;
-    } else {
-      c[row] = tmp;
-    }
-  }
-#endif  // CERES_NO_CUSTOM_BLAS
-}
-
-
-#undef CERES_MAYBE_NOALIAS
-#undef CERES_GEMM_BEGIN
-#undef CERES_GEMM_EIGEN_HEADER
-#undef CERES_GEMM_NAIVE_HEADER
-#undef CERES_CALL_GEMM
+class BLAS {
+ public:
+  // transpose = true  : c = alpha * a'a + beta * c;
+  // transpose = false : c = alpha * aa' + beta * c;
+  //
+  // Assumes column major matrices.
+  static void SymmetricRankKUpdate(int num_rows,
+                                   int num_cols,
+                                   const double* a,
+                                   bool transpose,
+                                   double alpha,
+                                   double beta,
+                                   double* c);
+};
 
 }  // namespace internal
 }  // namespace ceres
diff --git a/internal/ceres/block_jacobi_preconditioner.cc b/internal/ceres/block_jacobi_preconditioner.cc
index 749e0b6..29974d4 100644
--- a/internal/ceres/block_jacobi_preconditioner.cc
+++ b/internal/ceres/block_jacobi_preconditioner.cc
@@ -111,7 +111,7 @@ bool BlockJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A,
     }
 
     block = block.selfadjointView<Eigen::Upper>()
-                 .ldlt()
+                 .llt()
                  .solve(Matrix::Identity(size, size));
   }
   return true;
diff --git a/internal/ceres/block_sparse_matrix.cc b/internal/ceres/block_sparse_matrix.cc
index fdd762c..a487262 100644
--- a/internal/ceres/block_sparse_matrix.cc
+++ b/internal/ceres/block_sparse_matrix.cc
@@ -33,9 +33,9 @@
 #include <cstddef>
 #include <algorithm>
 #include <vector>
-#include "ceres/blas.h"
 #include "ceres/block_structure.h"
 #include "ceres/internal/eigen.h"
+#include "ceres/small_blas.h"
 #include "ceres/triplet_sparse_matrix.h"
 #include "glog/logging.h"
 
diff --git a/internal/ceres/c_api.cc b/internal/ceres/c_api.cc
index 02bc129..1fd01c9 100644
--- a/internal/ceres/c_api.cc
+++ b/internal/ceres/c_api.cc
@@ -49,7 +49,8 @@ using ceres::Problem;
 void ceres_init() {
   // This is not ideal, but it's not clear what to do if there is no gflags and
   // no access to command line arguments.
-  google::InitGoogleLogging("<unknown>");
+  char message[] = "<unknown>";
+  google::InitGoogleLogging(message);
 }
 
 ceres_problem_t* ceres_create_problem() {
@@ -172,7 +173,7 @@ ceres_residual_block_id_t* ceres_problem_add_residual_block(
 
 void ceres_solve(ceres_problem_t* c_problem) {
   Problem* problem = reinterpret_cast<Problem*>(c_problem);
-  
+
   // TODO(keir): Obviously, this way of setting options won't scale or last.
   // Instead, figure out a way to specify some of the options without
   // duplicating everything.
diff --git a/internal/ceres/compressed_col_sparse_matrix_utils.h b/internal/ceres/compressed_col_sparse_matrix_utils.h
index afabf1c..c8de2a1 100644
--- a/internal/ceres/compressed_col_sparse_matrix_utils.h
+++ b/internal/ceres/compressed_col_sparse_matrix_utils.h
@@ -61,6 +61,81 @@ void BlockOrderingToScalarOrdering(const vector<int>& blocks,
                                    const vector<int>& block_ordering,
                                    vector<int>* scalar_ordering);
 
+// Solve the linear system
+//
+//   R * solution = rhs
+//
+// Where R is an upper triangular compressed column sparse matrix.
+template <typename IntegerType>
+void SolveUpperTriangularInPlace(IntegerType num_cols,
+                                 const IntegerType* rows,
+                                 const IntegerType* cols,
+                                 const double* values,
+                                 double* rhs_and_solution) {
+  for (IntegerType c = num_cols - 1; c >= 0; --c) {
+    rhs_and_solution[c] /= values[cols[c + 1] - 1];
+    for (IntegerType idx = cols[c]; idx < cols[c + 1] - 1; ++idx) {
+      const IntegerType r = rows[idx];
+      const double v = values[idx];
+      rhs_and_solution[r] -= v * rhs_and_solution[c];
+    }
+  }
+}
+
+// Solve the linear system
+//
+//   R' * solution = rhs
+//
+// Where R is an upper triangular compressed column sparse matrix.
+template <typename IntegerType>
+void SolveUpperTriangularTransposeInPlace(IntegerType num_cols,
+                                          const IntegerType* rows,
+                                          const IntegerType* cols,
+                                          const double* values,
+                                          double* rhs_and_solution) {
+  for (IntegerType c = 0; c < num_cols; ++c) {
+    for (IntegerType idx = cols[c]; idx < cols[c + 1] - 1; ++idx) {
+      const IntegerType r = rows[idx];
+      const double v = values[idx];
+      rhs_and_solution[c] -= v * rhs_and_solution[r];
+    }
+    rhs_and_solution[c] =  rhs_and_solution[c] / values[cols[c + 1] - 1];
+  }
+}
+
+// Given a upper triangular matrix R in compressed column form, solve
+// the linear system,
+//
+//  R'R x = b
+//
+// Where b is all zeros except for rhs_nonzero_index, where it is
+// equal to one.
+//
+// The function exploits this knowledge to reduce the number of
+// floating point operations.
+template <typename IntegerType>
+void SolveRTRWithSparseRHS(IntegerType num_cols,
+                           const IntegerType* rows,
+                           const IntegerType* cols,
+                           const double* values,
+                           const int rhs_nonzero_index,
+                           double* solution) {
+  fill(solution, solution + num_cols, 0.0);
+  solution[rhs_nonzero_index] = 1.0 / values[cols[rhs_nonzero_index + 1] - 1];
+
+  for (IntegerType c = rhs_nonzero_index + 1; c < num_cols; ++c) {
+    for (IntegerType idx = cols[c]; idx < cols[c + 1] - 1; ++idx) {
+      const IntegerType r = rows[idx];
+      if (r < rhs_nonzero_index) continue;
+      const double v = values[idx];
+      solution[c] -= v * solution[r];
+    }
+    solution[c] =  solution[c] / values[cols[c + 1] - 1];
+  }
+
+  SolveUpperTriangularInPlace(num_cols, rows, cols, values, solution);
+}
+
 }  // namespace internal
 }  // namespace ceres
 
diff --git a/internal/ceres/compressed_col_sparse_matrix_utils_test.cc b/internal/ceres/compressed_col_sparse_matrix_utils_test.cc
index e810837..3faf06c 100644
--- a/internal/ceres/compressed_col_sparse_matrix_utils_test.cc
+++ b/internal/ceres/compressed_col_sparse_matrix_utils_test.cc
@@ -193,5 +193,92 @@ TEST(_, ScalarMatrixToBlockMatrix) {
   ss.Free(ccsm.release());
 }
 
+class SolveUpperTriangularTest : public ::testing::Test {
+ protected:
+  void SetUp() {
+    cols.resize(5);
+    rows.resize(7);
+    values.resize(7);
+
+    cols[0] = 0;
+    rows[0] = 0;
+    values[0] = 0.50754;
+
+    cols[1] = 1;
+    rows[1] = 1;
+    values[1] = 0.80483;
+
+    cols[2] = 2;
+    rows[2] = 1;
+    values[2] = 0.14120;
+    rows[3] = 2;
+    values[3] = 0.3;
+
+    cols[3] = 4;
+    rows[4] = 0;
+    values[4] = 0.77696;
+    rows[5] = 1;
+    values[5] = 0.41860;
+    rows[6] = 3;
+    values[6] = 0.88979;
+
+    cols[4] = 7;
+  }
+
+  vector<int> cols;
+  vector<int> rows;
+  vector<double> values;
+};
+
+TEST_F(SolveUpperTriangularTest, SolveInPlace) {
+  double rhs_and_solution[] = {1.0, 1.0, 2.0, 2.0};
+  const double expected[] = { -1.4706, -1.0962, 6.6667, 2.2477};
+
+  SolveUpperTriangularInPlace<int>(cols.size() - 1,
+                                   &rows[0],
+                                   &cols[0],
+                                   &values[0],
+                                   rhs_and_solution);
+
+  for (int i = 0; i < 4; ++i) {
+    EXPECT_NEAR(rhs_and_solution[i], expected[i], 1e-4) << i;
+  }
+}
+
+TEST_F(SolveUpperTriangularTest, TransposeSolveInPlace) {
+  double rhs_and_solution[] = {1.0, 1.0, 2.0, 2.0};
+  double expected[] = {1.970288,  1.242498,  6.081864, -0.057255};
+
+  SolveUpperTriangularTransposeInPlace<int>(cols.size() - 1,
+                                            &rows[0],
+                                            &cols[0],
+                                            &values[0],
+                                            rhs_and_solution);
+
+  for (int i = 0; i < 4; ++i) {
+    EXPECT_NEAR(rhs_and_solution[i], expected[i], 1e-4) << i;
+  }
+}
+
+TEST_F(SolveUpperTriangularTest, RTRSolveWithSparseRHS) {
+  double solution[4];
+  double expected[] = { 6.8420e+00,   1.0057e+00,  -1.4907e-16,  -1.9335e+00,
+                        1.0057e+00,   2.2275e+00,  -1.9493e+00,  -6.5693e-01,
+                       -1.4907e-16,  -1.9493e+00,   1.1111e+01,   9.7381e-17,
+                       -1.9335e+00,  -6.5693e-01,   9.7381e-17,   1.2631e+00 };
+
+  for (int i = 0; i < 4; ++i) {
+    SolveRTRWithSparseRHS<int>(cols.size() - 1,
+                               &rows[0],
+                               &cols[0],
+                               &values[0],
+                               i,
+                               solution);
+    for (int j = 0; j < 4; ++j) {
+      EXPECT_NEAR(solution[j], expected[4 * i + j], 1e-3) << i;
+    }
+  }
+}
+
 }  // namespace internal
 }  // namespace ceres
diff --git a/internal/ceres/covariance_impl.cc b/internal/ceres/covariance_impl.cc
index 1befde7..19d545c 100644
--- a/internal/ceres/covariance_impl.cc
+++ b/internal/ceres/covariance_impl.cc
@@ -38,6 +38,7 @@
 #include <utility>
 #include <vector>
 #include "Eigen/SVD"
+#include "ceres/compressed_col_sparse_matrix_utils.h"
 #include "ceres/compressed_row_sparse_matrix.h"
 #include "ceres/covariance.h"
 #include "ceres/crs_matrix.h"
@@ -48,7 +49,6 @@
 #include "ceres/suitesparse.h"
 #include "ceres/wall_time.h"
 #include "glog/logging.h"
-#include "SuiteSparseQR.hpp"
 
 namespace ceres {
 namespace internal {
@@ -56,6 +56,7 @@ namespace {
 
 // Per thread storage for SuiteSparse.
 #ifndef CERES_NO_SUITESPARSE
+
 struct PerThreadContext {
   explicit PerThreadContext(int num_rows)
       : solution(NULL),
@@ -81,6 +82,7 @@ struct PerThreadContext {
   cholmod_dense* rhs;
   SuiteSparse ss;
 };
+
 #endif
 
 }  // namespace
@@ -604,16 +606,8 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() {
   const int num_cols = jacobian.num_cols;
   const int num_nonzeros = jacobian.values.size();
 
-  // UF_long is deprecated but SuiteSparse_long is only available in
-  // newer versions of SuiteSparse.
-#if (SUITESPARSE_VERSION < 4002)
-  vector<UF_long> transpose_rows(num_cols + 1, 0);
-  vector<UF_long> transpose_cols(num_nonzeros, 0);
-#else
   vector<SuiteSparse_long> transpose_rows(num_cols + 1, 0);
   vector<SuiteSparse_long> transpose_cols(num_nonzeros, 0);
-#endif
-
   vector<double> transpose_values(num_nonzeros, 0);
 
   for (int idx = 0; idx < num_nonzeros; ++idx) {
@@ -658,23 +652,49 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() {
   cholmod_common cc;
   cholmod_l_start(&cc);
 
-  SuiteSparseQR_factorization<double>* factor =
-      SuiteSparseQR_factorize<double>(SPQR_ORDERING_BESTAMD,
-                                      SPQR_DEFAULT_TOL,
-                                      &cholmod_jacobian,
-                                      &cc);
+  cholmod_sparse* R = NULL;
+  SuiteSparse_long* permutation = NULL;
+
+  // Compute a Q-less QR factorization of the Jacobian. Since we are
+  // only interested in inverting J'J = R'R, we do not need Q. This
+  // saves memory and gives us R as a permuted compressed column
+  // sparse matrix.
+  //
+  // TODO(sameeragarwal): Currently the symbolic factorization and the
+  // numeric factorization is done at the same time, and this does not
+  // explicitly account for the block column and row structure in the
+  // matrix. When using AMD, we have observed in the past that
+  // computing the ordering with the block matrix is significantly
+  // more efficient, both in runtime as well as the quality of
+  // ordering computed. So, it maybe worth doing that analysis
+  // separately.
+  const SuiteSparse_long rank =
+      SuiteSparseQR<double>(SPQR_ORDERING_BESTAMD,
+                            SPQR_DEFAULT_TOL,
+                            cholmod_jacobian.ncol,
+                            &cholmod_jacobian,
+                            &R,
+                            &permutation,
+                            &cc);
   event_logger.AddEvent("Numeric Factorization");
+  CHECK_NOTNULL(permutation);
+  CHECK_NOTNULL(R);
 
-  const int rank = cc.SPQR_istat[4];
   if (rank < cholmod_jacobian.ncol) {
     LOG(WARNING) << "Jacobian matrix is rank deficient."
                  << "Number of columns: " << cholmod_jacobian.ncol
                  << " rank: " << rank;
-    SuiteSparseQR_free(&factor, &cc);
+    delete []permutation;
+    cholmod_l_free_sparse(&R, &cc);
     cholmod_l_finish(&cc);
     return false;
   }
 
+  vector<int> inverse_permutation(num_cols);
+  for (SuiteSparse_long i = 0; i < num_cols; ++i) {
+    inverse_permutation[permutation[i]] = i;
+  }
+
   const int* rows = covariance_matrix_->rows();
   const int* cols = covariance_matrix_->cols();
   double* values = covariance_matrix_->mutable_values();
@@ -688,35 +708,39 @@ bool CovarianceImpl::ComputeCovarianceValuesUsingSparseQR() {
   //
   // Since the covariance matrix is symmetric, the i^th row and column
   // are equal.
+  const int num_threads = options_.num_threads;
+  scoped_array<double> workspace(new double[num_threads * num_cols]);
 
-  cholmod_dense* rhs = cholmod_l_zeros(num_cols, 1, CHOLMOD_REAL, &cc);
-  double* rhs_x = reinterpret_cast<double*>(rhs->x);
-
+#pragma omp parallel for num_threads(num_threads) schedule(dynamic)
   for (int r = 0; r < num_cols; ++r) {
-    int row_begin = rows[r];
-    int row_end = rows[r + 1];
+    const int row_begin = rows[r];
+    const int row_end = rows[r + 1];
     if (row_end == row_begin) {
       continue;
     }
 
-    rhs_x[r] = 1.0;
-
-    cholmod_dense* y1 = SuiteSparseQR_solve<double>(SPQR_RTX_EQUALS_ETB, factor, rhs, &cc);
-    cholmod_dense* solution = SuiteSparseQR_solve<double>(SPQR_RETX_EQUALS_B, factor, y1, &cc);
+#  ifdef CERES_USE_OPENMP
+    int thread_id = omp_get_thread_num();
+#  else
+    int thread_id = 0;
+#  endif
 
-    double* solution_x = reinterpret_cast<double*>(solution->x);
+    double* solution = workspace.get() + thread_id * num_cols;
+    SolveRTRWithSparseRHS<SuiteSparse_long>(
+        num_cols,
+        static_cast<SuiteSparse_long*>(R->i),
+        static_cast<SuiteSparse_long*>(R->p),
+        static_cast<double*>(R->x),
+        inverse_permutation[r],
+        solution);
     for (int idx = row_begin; idx < row_end; ++idx) {
-      const int c = cols[idx];
-      values[idx] = solution_x[c];
+     const int c = cols[idx];
+     values[idx] = solution[inverse_permutation[c]];
     }
-
-    cholmod_l_free_dense(&y1, &cc);
-    cholmod_l_free_dense(&solution, &cc);
-    rhs_x[r] = 0.0;
   }
 
-  cholmod_l_free_dense(&rhs, &cc);
-  SuiteSparseQR_free(&factor, &cc);
+  delete []permutation;
+  cholmod_l_free_sparse(&R, &cc);
   cholmod_l_finish(&cc);
   event_logger.AddEvent("Inversion");
   return true;
diff --git a/internal/ceres/covariance_test.cc b/internal/ceres/covariance_test.cc
index e7d25a1..f3a5051 100644
--- a/internal/ceres/covariance_test.cc
+++ b/internal/ceres/covariance_test.cc
@@ -499,6 +499,9 @@ TEST_F(CovarianceTest, ConstantParameterBlock) {
 #ifndef CERES_NO_SUITESPARSE
   options.algorithm_type = SPARSE_CHOLESKY;
   ComputeAndCompareCovarianceBlocks(options, expected_covariance);
+
+  options.algorithm_type = SPARSE_QR;
+  ComputeAndCompareCovarianceBlocks(options, expected_covariance);
 #endif
 
   options.algorithm_type = DENSE_SVD;
@@ -552,6 +555,9 @@ TEST_F(CovarianceTest, LocalParameterization) {
 #ifndef CERES_NO_SUITESPARSE
   options.algorithm_type = SPARSE_CHOLESKY;
   ComputeAndCompareCovarianceBlocks(options, expected_covariance);
+
+  options.algorithm_type = SPARSE_QR;
+  ComputeAndCompareCovarianceBlocks(options, expected_covariance);
 #endif
 
   options.algorithm_type = DENSE_SVD;
@@ -776,6 +782,7 @@ class LargeScaleCovarianceTest : public ::testing::Test {
 
 TEST_F(LargeScaleCovarianceTest, Parallel) {
   ComputeAndCompare(SPARSE_CHOLESKY, 4);
+  ComputeAndCompare(SPARSE_QR, 4);
 }
 
 #endif  // !defined(CERES_NO_SUITESPARSE) && defined(CERES_USE_OPENMP)
diff --git a/internal/ceres/cxsparse.h b/internal/ceres/cxsparse.h
index 6004301..cd87908 100644
--- a/internal/ceres/cxsparse.h
+++ b/internal/ceres/cxsparse.h
@@ -125,6 +125,11 @@ class CXSparse {
 }  // namespace internal
 }  // namespace ceres
 
+#else  // CERES_NO_CXSPARSE
+
+class CXSparse {};
+typedef void cs_dis;
+
 #endif  // CERES_NO_CXSPARSE
 
 #endif  // CERES_INTERNAL_CXSPARSE_H_
diff --git a/internal/ceres/dense_normal_cholesky_solver.cc b/internal/ceres/dense_normal_cholesky_solver.cc
index 96f5511..fbf3cbe 100644
--- a/internal/ceres/dense_normal_cholesky_solver.cc
+++ b/internal/ceres/dense_normal_cholesky_solver.cc
@@ -33,9 +33,11 @@
 #include <cstddef>
 
 #include "Eigen/Dense"
+#include "ceres/blas.h"
 #include "ceres/dense_sparse_matrix.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
+#include "ceres/lapack.h"
 #include "ceres/linear_solver.h"
 #include "ceres/types.h"
 #include "ceres/wall_time.h"
@@ -52,6 +54,18 @@ LinearSolver::Summary DenseNormalCholeskySolver::SolveImpl(
     const double* b,
     const LinearSolver::PerSolveOptions& per_solve_options,
     double* x) {
+  if (options_.dense_linear_algebra_library_type == EIGEN) {
+    return SolveUsingEigen(A, b, per_solve_options, x);
+  } else {
+    return SolveUsingLAPACK(A, b, per_solve_options, x);
+  }
+}
+
+LinearSolver::Summary DenseNormalCholeskySolver::SolveUsingEigen(
+    DenseSparseMatrix* A,
+    const double* b,
+    const LinearSolver::PerSolveOptions& per_solve_options,
+    double* x) {
   EventLogger event_logger("DenseNormalCholeskySolver::Solve");
 
   const int num_rows = A->num_rows();
@@ -62,6 +76,7 @@ LinearSolver::Summary DenseNormalCholeskySolver::SolveImpl(
   lhs.setZero();
 
   event_logger.AddEvent("Setup");
+
   //   lhs += A'A
   //
   // Using rankUpdate instead of GEMM, exposes the fact that its the
@@ -76,16 +91,66 @@ LinearSolver::Summary DenseNormalCholeskySolver::SolveImpl(
     ConstVectorRef D(per_solve_options.D, num_cols);
     lhs += D.array().square().matrix().asDiagonal();
   }
+  event_logger.AddEvent("Product");
 
   LinearSolver::Summary summary;
   summary.num_iterations = 1;
   summary.termination_type = TOLERANCE;
   VectorRef(x, num_cols) =
-      lhs.selfadjointView<Eigen::Upper>().ldlt().solve(rhs);
+      lhs.selfadjointView<Eigen::Upper>().llt().solve(rhs);
   event_logger.AddEvent("Solve");
-
   return summary;
 }
 
+LinearSolver::Summary DenseNormalCholeskySolver::SolveUsingLAPACK(
+    DenseSparseMatrix* A,
+    const double* b,
+    const LinearSolver::PerSolveOptions& per_solve_options,
+    double* x) {
+  EventLogger event_logger("DenseNormalCholeskySolver::Solve");
+
+  if (per_solve_options.D != NULL) {
+    // Temporarily append a diagonal block to the A matrix, but undo
+    // it before returning the matrix to the user.
+    A->AppendDiagonal(per_solve_options.D);
+  }
+
+  const int num_cols = A->num_cols();
+  Matrix lhs(num_cols, num_cols);
+  event_logger.AddEvent("Setup");
+
+  // lhs = A'A
+  //
+  // Note: This is a bit delicate, it assumes that the stride on this
+  // matrix is the same as the number of rows.
+  BLAS::SymmetricRankKUpdate(A->num_rows(),
+                             num_cols,
+                             A->values(),
+                             true,
+                             1.0,
+                             0.0,
+                             lhs.data());
+
+  if (per_solve_options.D != NULL) {
+    // Undo the modifications to the matrix A.
+    A->RemoveDiagonal();
+  }
+
+  // TODO(sameeragarwal): Replace this with a gemv call for true blasness.
+  //   rhs = A'b
+  VectorRef(x, num_cols) =
+      A->matrix().transpose() * ConstVectorRef(b, A->num_rows());
+  event_logger.AddEvent("Product");
+
+  const int info = LAPACK::SolveInPlaceUsingCholesky(num_cols, lhs.data(), x);
+  event_logger.AddEvent("Solve");
+
+  LinearSolver::Summary summary;
+  summary.num_iterations = 1;
+  summary.termination_type = info == 0 ? TOLERANCE : FAILURE;
+
+  event_logger.AddEvent("TearDown");
+  return summary;
+}
 }   // namespace internal
 }   // namespace ceres
diff --git a/internal/ceres/dense_normal_cholesky_solver.h b/internal/ceres/dense_normal_cholesky_solver.h
index de47740..e35053f 100644
--- a/internal/ceres/dense_normal_cholesky_solver.h
+++ b/internal/ceres/dense_normal_cholesky_solver.h
@@ -85,6 +85,18 @@ class DenseNormalCholeskySolver: public DenseSparseMatrixSolver {
       const LinearSolver::PerSolveOptions& per_solve_options,
       double* x);
 
+  LinearSolver::Summary SolveUsingLAPACK(
+      DenseSparseMatrix* A,
+      const double* b,
+      const LinearSolver::PerSolveOptions& per_solve_options,
+      double* x);
+
+  LinearSolver::Summary SolveUsingEigen(
+      DenseSparseMatrix* A,
+      const double* b,
+      const LinearSolver::PerSolveOptions& per_solve_options,
+      double* x);
+
   const LinearSolver::Options options_;
   CERES_DISALLOW_COPY_AND_ASSIGN(DenseNormalCholeskySolver);
 };
diff --git a/internal/ceres/dense_qr_solver.cc b/internal/ceres/dense_qr_solver.cc
index 1fb9709..d76d58b 100644
--- a/internal/ceres/dense_qr_solver.cc
+++ b/internal/ceres/dense_qr_solver.cc
@@ -30,12 +30,13 @@
 
 #include "ceres/dense_qr_solver.h"
 
-#include <cstddef>
 
+#include <cstddef>
 #include "Eigen/Dense"
 #include "ceres/dense_sparse_matrix.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
+#include "ceres/lapack.h"
 #include "ceres/linear_solver.h"
 #include "ceres/types.h"
 #include "ceres/wall_time.h"
@@ -44,13 +45,87 @@ namespace ceres {
 namespace internal {
 
 DenseQRSolver::DenseQRSolver(const LinearSolver::Options& options)
-    : options_(options) {}
+    : options_(options) {
+  work_.resize(1);
+}
 
 LinearSolver::Summary DenseQRSolver::SolveImpl(
     DenseSparseMatrix* A,
     const double* b,
     const LinearSolver::PerSolveOptions& per_solve_options,
     double* x) {
+  if (options_.dense_linear_algebra_library_type == EIGEN) {
+    return SolveUsingEigen(A, b, per_solve_options, x);
+  } else {
+    return SolveUsingLAPACK(A, b, per_solve_options, x);
+  }
+}
+LinearSolver::Summary DenseQRSolver::SolveUsingLAPACK(
+    DenseSparseMatrix* A,
+    const double* b,
+    const LinearSolver::PerSolveOptions& per_solve_options,
+    double* x) {
+  EventLogger event_logger("DenseQRSolver::Solve");
+
+  const int num_rows = A->num_rows();
+  const int num_cols = A->num_cols();
+
+  if (per_solve_options.D != NULL) {
+    // Temporarily append a diagonal block to the A matrix, but undo
+    // it before returning the matrix to the user.
+    A->AppendDiagonal(per_solve_options.D);
+  }
+
+  // TODO(sameeragarwal): Since we are copying anyways, the diagonal
+  // can be appended to the matrix instead of doing it on A.
+  lhs_ =  A->matrix();
+
+  if (per_solve_options.D != NULL) {
+    // Undo the modifications to the matrix A.
+    A->RemoveDiagonal();
+  }
+
+  // rhs = [b;0] to account for the additional rows in the lhs.
+  if (rhs_.rows() != lhs_.rows()) {
+    rhs_.resize(lhs_.rows());
+  }
+  rhs_.setZero();
+  rhs_.head(num_rows) = ConstVectorRef(b, num_rows);
+
+  if (work_.rows() == 1) {
+    const int work_size =
+        LAPACK::EstimateWorkSizeForQR(lhs_.rows(), lhs_.cols());
+    VLOG(3) << "Working memory for Dense QR factorization: "
+            << work_size * sizeof(double);
+    work_.resize(work_size);
+  }
+
+  const int info = LAPACK::SolveUsingQR(lhs_.rows(),
+                                        lhs_.cols(),
+                                        lhs_.data(),
+                                        work_.rows(),
+                                        work_.data(),
+                                        rhs_.data());
+  event_logger.AddEvent("Solve");
+
+  LinearSolver::Summary summary;
+  summary.num_iterations = 1;
+  if (info == 0) {
+    VectorRef(x, num_cols) = rhs_.head(num_cols);
+    summary.termination_type = TOLERANCE;
+  } else {
+    summary.termination_type = FAILURE;
+  }
+
+  event_logger.AddEvent("TearDown");
+  return summary;
+}
+
+LinearSolver::Summary DenseQRSolver::SolveUsingEigen(
+    DenseSparseMatrix* A,
+    const double* b,
+    const LinearSolver::PerSolveOptions& per_solve_options,
+    double* x) {
   EventLogger event_logger("DenseQRSolver::Solve");
 
   const int num_rows = A->num_rows();
@@ -73,7 +148,7 @@ LinearSolver::Summary DenseQRSolver::SolveImpl(
   event_logger.AddEvent("Setup");
 
   // Solve the system.
-  VectorRef(x, num_cols) = A->matrix().colPivHouseholderQr().solve(rhs_);
+  VectorRef(x, num_cols) = A->matrix().householderQr().solve(rhs_);
   event_logger.AddEvent("Solve");
 
   if (per_solve_options.D != NULL) {
diff --git a/internal/ceres/dense_qr_solver.h b/internal/ceres/dense_qr_solver.h
index f78fa72..e745c63 100644
--- a/internal/ceres/dense_qr_solver.h
+++ b/internal/ceres/dense_qr_solver.h
@@ -90,8 +90,22 @@ class DenseQRSolver: public DenseSparseMatrixSolver {
       const LinearSolver::PerSolveOptions& per_solve_options,
       double* x);
 
+  LinearSolver::Summary SolveUsingEigen(
+      DenseSparseMatrix* A,
+      const double* b,
+      const LinearSolver::PerSolveOptions& per_solve_options,
+      double* x);
+
+  LinearSolver::Summary SolveUsingLAPACK(
+      DenseSparseMatrix* A,
+      const double* b,
+      const LinearSolver::PerSolveOptions& per_solve_options,
+      double* x);
+
   const LinearSolver::Options options_;
+  ColMajorMatrix lhs_;
   Vector rhs_;
+  Vector work_;
   CERES_DISALLOW_COPY_AND_ASSIGN(DenseQRSolver);
 };
 
diff --git a/internal/ceres/implicit_schur_complement.cc b/internal/ceres/implicit_schur_complement.cc
index 7c934fb..32722bb 100644
--- a/internal/ceres/implicit_schur_complement.cc
+++ b/internal/ceres/implicit_schur_complement.cc
@@ -161,7 +161,7 @@ void ImplicitSchurComplement::AddDiagonalAndInvert(
 
     m = m
         .selfadjointView<Eigen::Upper>()
-        .ldlt()
+        .llt()
         .solve(Matrix::Identity(row_block_size, row_block_size));
   }
 }
diff --git a/internal/ceres/implicit_schur_complement_test.cc b/internal/ceres/implicit_schur_complement_test.cc
index bd36672..1694273 100644
--- a/internal/ceres/implicit_schur_complement_test.cc
+++ b/internal/ceres/implicit_schur_complement_test.cc
@@ -109,7 +109,7 @@ class ImplicitSchurComplementTest : public ::testing::Test {
     solution->setZero();
     VectorRef schur_solution(solution->data() + num_cols_ - num_schur_rows,
                              num_schur_rows);
-    schur_solution = lhs->selfadjointView<Eigen::Upper>().ldlt().solve(*rhs);
+    schur_solution = lhs->selfadjointView<Eigen::Upper>().llt().solve(*rhs);
     eliminator->BackSubstitute(A_.get(), b_.get(), D,
                                schur_solution.data(), solution->data());
   }
@@ -156,7 +156,7 @@ class ImplicitSchurComplementTest : public ::testing::Test {
 
     // Reference solution to the f_block.
     const Vector reference_f_sol =
-        lhs.selfadjointView<Eigen::Upper>().ldlt().solve(rhs);
+        lhs.selfadjointView<Eigen::Upper>().llt().solve(rhs);
 
     // Backsubstituted solution from the implicit schur solver using the
     // reference solution to the f_block.
diff --git a/internal/ceres/iterative_schur_complement_solver.cc b/internal/ceres/iterative_schur_complement_solver.cc
index d39d7db..1aac565 100644
--- a/internal/ceres/iterative_schur_complement_solver.cc
+++ b/internal/ceres/iterative_schur_complement_solver.cc
@@ -78,6 +78,17 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl(
   }
   schur_complement_->Init(*A, per_solve_options.D, b);
 
+  const int num_schur_complement_blocks =
+      A->block_structure()->cols.size() - options_.elimination_groups[0];
+  if (num_schur_complement_blocks == 0) {
+    VLOG(2) << "No parameter blocks left in the schur complement.";
+    LinearSolver::Summary cg_summary;
+    cg_summary.num_iterations = 0;
+    cg_summary.termination_type = TOLERANCE;
+    schur_complement_->BackSubstitute(NULL, x);
+    return cg_summary;
+  }
+
   // Initialize the solution to the Schur complement system to zero.
   //
   // TODO(sameeragarwal): There maybe a better initialization than an
@@ -97,8 +108,8 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl(
 
   Preconditioner::Options preconditioner_options;
   preconditioner_options.type = options_.preconditioner_type;
-  preconditioner_options.sparse_linear_algebra_library =
-      options_.sparse_linear_algebra_library;
+  preconditioner_options.sparse_linear_algebra_library_type =
+      options_.sparse_linear_algebra_library_type;
   preconditioner_options.num_threads = options_.num_threads;
   preconditioner_options.row_block_size = options_.row_block_size;
   preconditioner_options.e_block_size = options_.e_block_size;
@@ -116,16 +127,16 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl(
     case SCHUR_JACOBI:
       if (preconditioner_.get() == NULL) {
         preconditioner_.reset(
-            new SchurJacobiPreconditioner(
-                *A->block_structure(), preconditioner_options));
+            new SchurJacobiPreconditioner(*A->block_structure(),
+                                          preconditioner_options));
       }
       break;
     case CLUSTER_JACOBI:
     case CLUSTER_TRIDIAGONAL:
       if (preconditioner_.get() == NULL) {
         preconditioner_.reset(
-            new VisibilityBasedPreconditioner(
-                *A->block_structure(), preconditioner_options));
+            new VisibilityBasedPreconditioner(*A->block_structure(),
+                                              preconditioner_options));
       }
       break;
     default:
diff --git a/internal/ceres/iterative_schur_complement_solver_test.cc b/internal/ceres/iterative_schur_complement_solver_test.cc
index 86e7825..db45741 100644
--- a/internal/ceres/iterative_schur_complement_solver_test.cc
+++ b/internal/ceres/iterative_schur_complement_solver_test.cc
@@ -58,9 +58,9 @@ const double kEpsilon = 1e-14;
 
 class IterativeSchurComplementSolverTest : public ::testing::Test {
  protected :
-  virtual void SetUp() {
+  void SetUpProblem(int problem_id) {
     scoped_ptr<LinearLeastSquaresProblem> problem(
-        CreateLinearLeastSquaresProblemFromId(2));
+        CreateLinearLeastSquaresProblemFromId(problem_id));
 
     CHECK_NOTNULL(problem.get());
     A_.reset(down_cast<BlockSparseMatrix*>(problem->A.release()));
@@ -90,7 +90,9 @@ class IterativeSchurComplementSolverTest : public ::testing::Test {
     qr->Solve(&dense_A, b_.get(), per_solve_options, reference_solution.data());
 
     options.elimination_groups.push_back(num_eliminate_blocks_);
+    options.elimination_groups.push_back(0);
     options.max_num_iterations = num_cols_;
+    options.preconditioner_type = SCHUR_JACOBI;
     IterativeSchurComplementSolver isc(options);
 
     Vector isc_sol(num_cols_);
@@ -114,7 +116,14 @@ class IterativeSchurComplementSolverTest : public ::testing::Test {
   scoped_array<double> D_;
 };
 
-TEST_F(IterativeSchurComplementSolverTest, SolverTest) {
+TEST_F(IterativeSchurComplementSolverTest, NormalProblem) {
+  SetUpProblem(2);
+  EXPECT_TRUE(TestSolver(NULL));
+  EXPECT_TRUE(TestSolver(D_.get()));
+}
+
+TEST_F(IterativeSchurComplementSolverTest, ProblemWithNoFBlocks) {
+  SetUpProblem(3);
   EXPECT_TRUE(TestSolver(NULL));
   EXPECT_TRUE(TestSolver(D_.get()));
 }
diff --git a/internal/ceres/lapack.cc b/internal/ceres/lapack.cc
new file mode 100644
index 0000000..73bfa69
--- /dev/null
+++ b/internal/ceres/lapack.cc
@@ -0,0 +1,157 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+
+#include "ceres/lapack.h"
+#include "glog/logging.h"
+
+// C interface to the LAPACK Cholesky factorization and triangular solve.
+extern "C" void dpotrf_(char* uplo,
+                       int* n,
+                       double* a,
+                       int* lda,
+                       int* info);
+
+extern "C" void dpotrs_(char* uplo,
+                        int* n,
+                        int* nrhs,
+                        double* a,
+                        int* lda,
+                        double* b,
+                        int* ldb,
+                        int* info);
+
+extern "C" void dgels_(char* uplo,
+                       int* m,
+                       int* n,
+                       int* nrhs,
+                       double* a,
+                       int* lda,
+                       double* b,
+                       int* ldb,
+                       double* work,
+                       int* lwork,
+                       int* info);
+
+
+namespace ceres {
+namespace internal {
+
+int LAPACK::SolveInPlaceUsingCholesky(int num_rows,
+                                      const double* in_lhs,
+                                      double* rhs_and_solution) {
+#ifdef CERES_NO_LAPACK
+  LOG(FATAL) << "Ceres was built without a BLAS library.";
+  return -1;
+#else
+  char uplo = 'L';
+  int n = num_rows;
+  int info = 0;
+  int nrhs = 1;
+  double* lhs = const_cast<double*>(in_lhs);
+
+  dpotrf_(&uplo, &n, lhs, &n, &info);
+  if (info != 0) {
+    LOG(INFO) << "Cholesky factorization (dpotrf) failed: " << info;
+    return info;
+  }
+
+  dpotrs_(&uplo, &n, &nrhs, lhs, &n, rhs_and_solution, &n, &info);
+  if (info != 0) {
+    LOG(INFO) << "Triangular solve (dpotrs) failed: " << info;
+  }
+
+  return info;
+#endif
+};
+
+int LAPACK::EstimateWorkSizeForQR(int num_rows, int num_cols) {
+#ifdef CERES_NO_LAPACK
+  LOG(FATAL) << "Ceres was built without a LAPACK library.";
+  return -1;
+#else
+  char trans = 'N';
+  int nrhs = 1;
+  int lwork = -1;
+  double work;
+  int info = 0;
+  dgels_(&trans,
+         &num_rows,
+         &num_cols,
+         &nrhs,
+         NULL,
+         &num_rows,
+         NULL,
+         &num_rows,
+         &work,
+         &lwork,
+         &info);
+
+  CHECK_EQ(info, 0);
+  return work;
+#endif
+}
+
+int LAPACK::SolveUsingQR(int num_rows,
+                         int num_cols,
+                         const double* in_lhs,
+                         int work_size,
+                         double* work,
+                         double* rhs_and_solution) {
+#ifdef CERES_NO_LAPACK
+  LOG(FATAL) << "Ceres was built without a LAPACK library.";
+  return -1;
+#else
+  char trans = 'N';
+  int m = num_rows;
+  int n = num_cols;
+  int nrhs = 1;
+  int lda = num_rows;
+  int ldb = num_rows;
+  int info = 0;
+  double* lhs = const_cast<double*>(in_lhs);
+
+  dgels_(&trans,
+         &m,
+         &n,
+         &nrhs,
+         lhs,
+         &lda,
+         rhs_and_solution,
+         &ldb,
+         work,
+         &work_size,
+         &info);
+
+  return info;
+#endif
+}
+
+}  // namespace internal
+}  // namespace ceres
diff --git a/internal/ceres/lapack.h b/internal/ceres/lapack.h
new file mode 100644
index 0000000..4f3a88c
--- /dev/null
+++ b/internal/ceres/lapack.h
@@ -0,0 +1,88 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+
+#ifndef CERES_INTERNAL_LAPACK_H_
+#define CERES_INTERNAL_LAPACK_H_
+
+namespace ceres {
+namespace internal {
+
+class LAPACK {
+ public:
+  // Solve
+  //
+  //  lhs * solution = rhs
+  //
+  // using a Cholesky factorization. Here
+  // lhs is a symmetric positive definite matrix. It is assumed to be
+  // column major and only the lower triangular part of the matrix is
+  // referenced.
+  //
+  // This function uses the LAPACK dpotrf and dpotrs routines.
+  //
+  // The return value is zero if the solve is successful.
+  static int SolveInPlaceUsingCholesky(int num_rows,
+                                       const double* lhs,
+                                       double* rhs_and_solution);
+
+  // The SolveUsingQR function requires a buffer for its temporary
+  // computation. This function given the size of the lhs matrix will
+  // return the size of the buffer needed.
+  static int EstimateWorkSizeForQR(int num_rows, int num_cols);
+
+  // Solve
+  //
+  //  lhs * solution = rhs
+  //
+  // using a dense QR factorization. lhs is an arbitrary (possibly
+  // rectangular) matrix with full column rank.
+  //
+  // work is an array of size work_size that this routine uses for its
+  // temporary storage. The optimal size of this array can be obtained
+  // by calling EstimateWorkSizeForQR.
+  //
+  // When calling, rhs_and_solution contains the rhs, and upon return
+  // the first num_col entries are the solution.
+  //
+  // This function uses the LAPACK dgels routine.
+  //
+  // The return value is zero if the solve is successful.
+  static int SolveUsingQR(int num_rows,
+                          int num_cols,
+                          const double* lhs,
+                          int work_size,
+                          double* work,
+                          double* rhs_and_solution);
+};
+
+}  // namespace internal
+}  // namespace ceres
+
+#endif  // CERES_INTERNAL_LAPACK_H_
diff --git a/internal/ceres/line_search.cc b/internal/ceres/line_search.cc
index 39618b5..8323896 100644
--- a/internal/ceres/line_search.cc
+++ b/internal/ceres/line_search.cc
@@ -112,7 +112,7 @@ void LineSearchFunction::Init(const Vector& position,
   direction_ = direction;
 }
 
-bool LineSearchFunction::Evaluate(const double x, double* f, double* g) {
+bool LineSearchFunction::Evaluate(double x, double* f, double* g) {
   scaled_direction_ = x * direction_;
   if (!evaluator_->Plus(position_.data(),
                         scaled_direction_.data(),
diff --git a/internal/ceres/line_search.h b/internal/ceres/line_search.h
index e4836b2..5f24e9f 100644
--- a/internal/ceres/line_search.h
+++ b/internal/ceres/line_search.h
@@ -231,7 +231,7 @@ class LineSearchFunction : public LineSearch::Function {
   explicit LineSearchFunction(Evaluator* evaluator);
   virtual ~LineSearchFunction() {}
   void Init(const Vector& position, const Vector& direction);
-  virtual bool Evaluate(const double x, double* f, double* g);
+  virtual bool Evaluate(double x, double* f, double* g);
   double DirectionInfinityNorm() const;
 
  private:
diff --git a/internal/ceres/linear_solver.h b/internal/ceres/linear_solver.h
index 67bebe0..22691b3 100644
--- a/internal/ceres/linear_solver.h
+++ b/internal/ceres/linear_solver.h
@@ -74,7 +74,8 @@ class LinearSolver {
     Options()
         : type(SPARSE_NORMAL_CHOLESKY),
           preconditioner_type(JACOBI),
-          sparse_linear_algebra_library(SUITE_SPARSE),
+          dense_linear_algebra_library_type(EIGEN),
+          sparse_linear_algebra_library_type(SUITE_SPARSE),
           use_postordering(false),
           min_num_iterations(1),
           max_num_iterations(1),
@@ -89,7 +90,8 @@ class LinearSolver {
 
     PreconditionerType preconditioner_type;
 
-    SparseLinearAlgebraLibraryType sparse_linear_algebra_library;
+    DenseLinearAlgebraLibraryType dense_linear_algebra_library_type;
+    SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
 
     // See solver.h for information about this flag.
     bool use_postordering;
diff --git a/internal/ceres/miniglog/glog/logging.cc b/internal/ceres/miniglog/glog/logging.cc
deleted file mode 100644
index 32a78ce..0000000
--- a/internal/ceres/miniglog/glog/logging.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-// Ceres Solver - A fast non-linear least squares minimizer
-// Copyright 2012 Google Inc. All rights reserved.
-// http://code.google.com/p/ceres-solver/
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-// * Neither the name of Google Inc. nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: keir@google.com (Keir Mierle)
-
-#include "glog/logging.h"
-
-namespace google {
-
-// This is the set of log sinks. This must be in a separate library to ensure
-// that there is only one instance of this across the entire program.
-std::set<google::LogSink *> log_sinks_global;
-
-}  // namespace ceres
diff --git a/internal/ceres/miniglog/glog/logging.h b/internal/ceres/miniglog/glog/logging.h
index 1fc137b..bab3191 100644
--- a/internal/ceres/miniglog/glog/logging.h
+++ b/internal/ceres/miniglog/glog/logging.h
@@ -1,100 +1,74 @@
-// Ceres Solver - A fast non-linear least squares minimizer
-// Copyright 2011, 2012 Google Inc. All rights reserved.
-// http://code.google.com/p/ceres-solver/
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// * Redistributions of source code must retain the above copyright notice,
-//   this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above copyright notice,
-//   this list of conditions and the following disclaimer in the documentation
-//   and/or other materials provided with the distribution.
-// * Neither the name of Google Inc. nor the names of its contributors may be
-//   used to endorse or promote products derived from this software without
-//   specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
+// Copyright 2011 Google Inc. All Rights Reserved.
 // Author: settinger@google.com (Scott Ettinger)
-//         keir@google.com (Keir Mierle)
-//
-// Simplified Glog style logging with Android support. Supported macros in
-// decreasing severity level per line:
+
+// Simplified Google3 style logging with Android support.
+// Supported macros are : LOG(INFO), LOG(WARNING), LOG(ERROR), LOG(FATAL),
+//                        and VLOG(n).
 //
-//   VLOG(2), VLOG(N)
-//   VLOG(1),
-//   LOG(INFO), VLOG(0), LG
-//   LOG(WARNING),
-//   LOG(ERROR),
-//   LOG(FATAL),
+// Portions of this code are taken from the GLOG package.  This code
+// is only a small subset of the GLOG functionality. And like GLOG,
+// higher levels are more verbose.
 //
-// With VLOG(n), the output is directed to one of the 5 Android log levels:
+// Notable differences from GLOG :
 //
-//   2 - Verbose
-//   1 - Debug
-//   0 - Info
-//  -1 - Warning
-//  -2 - Error
-//  -3 - Fatal
+// 1. lack of support for displaying unprintable characters and lack
+// of stack trace information upon failure of the CHECK macros.
+// 2. All output is tagged with the string "native".
+// 3. While there is no runtime flag filtering logs (-v, -vmodule), the
+//    compile time define MAX_LOG_LEVEL can be used to silence any
+//    logging above the given level.
 //
-// Any logging of level 2 and above is directed to the Verbose level. All
-// Android log output is tagged with the string "native".
+// -------------------------------- Usage ------------------------------------
+// Basic usage :
+// LOG(<severity level>) acts as a c++ stream to the Android logcat output.
+// e.g. LOG(INFO) << "Value of counter = " << counter;
 //
-// If the symbol ANDROID is not defined, all output goes to std::cerr.
-// This allows code to be built on a different system for debug.
+// Valid severity levels include INFO, WARNING, ERROR, FATAL.
+// The various severity levels are routed to the corresponding Android logcat
+// output.
+// LOG(FATAL) outputs to the log and then terminates.
 //
-// Portions of this code are taken from the GLOG package.  This code is only a
-// small subset of the GLOG functionality. Notable differences from GLOG
-// behavior include lack of support for displaying unprintable characters and
-// lack of stack trace information upon failure of the CHECK macros.  On
-// non-Android systems, log output goes to std::cerr and is not written to a
-// file.
+// VLOG(<severity level>) can also be used.
+// VLOG(n) output is directed to the Android logcat levels as follows :
+//  >=2 - Verbose
+//    1 - Debug
+//    0 - Info
+//   -1 - Warning
+//   -2 - Error
+// <=-3 - Fatal
+// Note that VLOG(FATAL) will terminate the program.
 //
-// CHECK macros are defined to test for conditions within code.  Any CHECK that
-// fails will log the failure and terminate the application.
+// CHECK macros are defined to test for conditions within code.  Any CHECK
+// that fails will log the failure and terminate the application.
 // e.g. CHECK_GE(3, 2) will pass while CHECK_GE(3, 4) will fail after logging
 //      "Check failed 3 >= 4".
+// The following CHECK macros are defined :
 //
-// The following CHECK macros are defined:
-//
-//   CHECK(condition)        - fails if condition is false and logs condition.
-//   CHECK_NOTNULL(variable) - fails if the variable is NULL.
+// CHECK(condition) - fails if condition is false and logs condition.
+// CHECK_NOTNULL(variable) - fails if the variable is NULL.
 //
 // The following binary check macros are also defined :
-//
-//   Macro                     Operator equivalent
-//   --------------------      -------------------
-//   CHECK_EQ(val1, val2)      val1 == val2
-//   CHECK_NE(val1, val2)      val1 != val2
-//   CHECK_GT(val1, val2)      val1 > val2
-//   CHECK_GE(val1, val2)      val1 >= val2
-//   CHECK_LT(val1, val2)      val1 < val2
-//   CHECK_LE(val1, val2)      val1 <= val2
+//    Macro                 operator applied
+// ------------------------------------------
+// CHECK_EQ(val1, val2)      val1 == val2
+// CHECK_NE(val1, val2)      val1 != val2
+// CHECK_GT(val1, val2)      val1 > val2
+// CHECK_GE(val1, val2)      val1 >= val2
+// CHECK_LT(val1, val2)      val1 < val2
+// CHECK_LE(val1, val2)      val1 <= val2
 //
 // Debug only versions of all of the check macros are also defined.  These
 // macros generate no code in a release build, but avoid unused variable
 // warnings / errors.
-//
-// To use the debug only versions, prepend a D to the normal check macros, e.g.
-// DCHECK_EQ(a, b).
+// To use the debug only versions, Prepend a D to the normal check macros.
+// e.g. DCHECK_EQ(a, b);
 
-#ifndef CERCES_INTERNAL_MINIGLOG_GLOG_LOGGING_H_
-#define CERCES_INTERNAL_MINIGLOG_GLOG_LOGGING_H_
+#ifndef MOBILE_BASE_LOGGING_H_
+#define MOBILE_BASE_LOGGING_H_
 
-#ifdef ANDROID
+// Definitions for building on an Android system.
 #include <android/log.h>
-#endif  // ANDROID
+#include <time.h>
 
 #include <algorithm>
 #include <iostream>
@@ -120,29 +94,26 @@ const int WARNING = ::WARNING;
 const int ERROR   = ::ERROR;
 const int FATAL   = ::FATAL;
 
-// Sink class used for integration with mock and test functions. If sinks are
-// added, all log output is also sent to each sink through the send function.
-// In this implementation, WaitTillSent() is called immediately after the send.
+#ifdef ENABLE_LOG_SINKS
+
+// Sink class used for integration with mock and test functions.
+// If sinks are added, all log output is also sent to each sink through
+// the send function.  In this implementation, WaitTillSent() is called
+// immediately after the send.
 // This implementation is not thread safe.
 class LogSink {
  public:
   virtual ~LogSink() {}
-  virtual void send(LogSeverity severity,
-                    const char* full_filename,
-                    const char* base_filename,
-                    int line,
+  virtual void send(LogSeverity severity, const char* full_filename,
+                    const char* base_filename, int line,
                     const struct tm* tm_time,
-                    const char* message,
-                    size_t message_len) = 0;
+                    const char* message, size_t message_len) = 0;
   virtual void WaitTillSent() = 0;
 };
 
-// Global set of log sinks. The actual object is defined in logging.cc.
-extern std::set<LogSink *> log_sinks_global;
-
-inline void InitGoogleLogging(char *argv) {
-  // Do nothing; this is ignored.
-}
+// Global set of log sinks.
+// TODO(settinger): Move this into a .cc file.
+static std::set<LogSink *> log_sinks_global;
 
 // Note: the Log sink functions are not thread safe.
 inline void AddLogSink(LogSink *sink) {
@@ -153,16 +124,19 @@ inline void RemoveLogSink(LogSink *sink) {
   log_sinks_global.erase(sink);
 }
 
+#endif  // #ifdef ENABLE_LOG_SINKS
+
+inline void InitGoogleLogging(char *argv) {}
+
 }  // namespace google
 
 // ---------------------------- Logger Class --------------------------------
 
 // Class created for each use of the logging macros.
 // The logger acts as a stream and routes the final stream contents to the
-// Android logcat output at the proper filter level.  If ANDROID is not
-// defined, output is directed to std::cerr.  This class should not
+// Android logcat output at the proper filter level.  This class should not
 // be directly instantiated in code, rather it should be invoked through the
-// use of the log macros LG, LOG, or VLOG.
+// use of the log macros LOG, or VLOG.
 class MessageLogger {
  public:
   MessageLogger(const char *file, int line, const char *tag, int severity)
@@ -174,14 +148,17 @@ class MessageLogger {
 
   // Output the contents of the stream to the proper channel on destruction.
   ~MessageLogger() {
+#ifdef MAX_LOG_LEVEL
+    if (severity_ > MAX_LOG_LEVEL && severity_ > FATAL) {
+      return;
+    }
+#endif
     stream_ << "\n";
-
-#ifdef ANDROID
     static const int android_log_levels[] = {
         ANDROID_LOG_FATAL,    // LOG(FATAL)
         ANDROID_LOG_ERROR,    // LOG(ERROR)
         ANDROID_LOG_WARN,     // LOG(WARNING)
-        ANDROID_LOG_INFO,     // LOG(INFO), LG, VLOG(0)
+        ANDROID_LOG_INFO,     // LOG(INFO), VLOG(0)
         ANDROID_LOG_DEBUG,    // VLOG(1)
         ANDROID_LOG_VERBOSE,  // VLOG(2) .. VLOG(N)
     };
@@ -193,22 +170,22 @@ class MessageLogger {
     int android_log_level = android_log_levels[android_level_index];
 
     // Output the log string the Android log at the appropriate level.
-    __android_log_print(android_log_level, tag_.c_str(), stream_.str().c_str());
+    __android_log_write(android_log_level, tag_.c_str(), stream_.str().c_str());
 
     // Indicate termination if needed.
     if (severity_ == FATAL) {
-      __android_log_print(ANDROID_LOG_FATAL,
+      __android_log_write(ANDROID_LOG_FATAL,
                           tag_.c_str(),
                           "terminating.\n");
     }
-#else
-    // If not building on Android, log all output to std::cerr.
-    std::cerr << stream_.str();
-#endif  // ANDROID
+
+#ifdef ENABLE_LOG_SINKS
 
     LogToSinks(severity_);
     WaitForSinks();
 
+#endif  // #ifdef ENABLE_LOG_SINKS
+
     // Android logging at level FATAL does not terminate execution, so abort()
     // is still required to stop the program.
     if (severity_ == FATAL) {
@@ -220,41 +197,41 @@ class MessageLogger {
   std::stringstream &stream() { return stream_; }
 
  private:
+#ifdef ENABLE_LOG_SINKS
+
   void LogToSinks(int severity) {
     time_t rawtime;
-    struct tm* timeinfo;
+    struct tm * timeinfo;
 
-    time (&rawtime);
-    timeinfo = localtime(&rawtime);
-    std::set<google::LogSink*>::iterator iter;
+    time ( &rawtime );
+    timeinfo = localtime ( &rawtime );
+    std::set<google::LogSink *>::iterator iter;
     // Send the log message to all sinks.
     for (iter = google::log_sinks_global.begin();
-         iter != google::log_sinks_global.end(); ++iter) {
+         iter != google::log_sinks_global.end(); ++iter)
       (*iter)->send(severity, file_.c_str(), filename_only_.c_str(), line_,
                     timeinfo, stream_.str().c_str(), stream_.str().size());
-    }
   }
 
   void WaitForSinks() {
-    // TODO(settinger): Add locks for thread safety.
+    // TODO(settinger): add locks for thread safety.
     std::set<google::LogSink *>::iterator iter;
-
     // Call WaitTillSent() for all sinks.
     for (iter = google::log_sinks_global.begin();
-         iter != google::log_sinks_global.end(); ++iter) {
+         iter != google::log_sinks_global.end(); ++iter)
       (*iter)->WaitTillSent();
-    }
   }
 
+#endif // #ifdef ENABLE_LOG_SINKS
+
   void StripBasename(const std::string &full_path, std::string *filename) {
     // TODO(settinger): add support for OS with different path separators.
     const char kSeparator = '/';
     size_t pos = full_path.rfind(kSeparator);
-    if (pos != std::string::npos) {
+    if (pos != std::string::npos)
       *filename = full_path.substr(pos + 1, std::string::npos);
-    } else {
+    else
       *filename = full_path;
-    }
   }
 
   std::string file_;
@@ -267,20 +244,6 @@ class MessageLogger {
 
 // ---------------------- Logging Macro definitions --------------------------
 
-#define LG MessageLogger((char *)__FILE__, __LINE__, "native", \
-                         INFO).stream()
-
-#define LOG(n) MessageLogger((char *)__FILE__, __LINE__, "native", \
-                             n).stream()
-
-#define VLOG(n) MessageLogger((char *)__FILE__, __LINE__, "native", \
-                              n).stream()
-
-// Currently, VLOG is always on.
-#define VLOG_IS_ON(x) true
-
-// ---------------------------- CHECK helpers --------------------------------
-
 // This class is used to explicitly ignore values in the conditional
 // logging macros.  This avoids compiler warnings like "value computed
 // is not used" and "statement has no effect".
@@ -294,7 +257,40 @@ class LoggerVoidify {
 
 // Log only if condition is met.  Otherwise evaluates to void.
 #define LOG_IF(severity, condition) \
-  condition ? (void) 0 : LoggerVoidify() & LOG(severity)
+  !(condition) ? (void) 0 : LoggerVoidify() & \
+    MessageLogger((char *)__FILE__, __LINE__, "native", severity).stream()
+
+// Log only if condition is NOT met.  Otherwise evaluates to void.
+#define LOG_IF_FALSE(severity, condition) LOG_IF(severity, !(condition))
+
+// LG is a convenient shortcut for LOG(INFO). Its use is in new
+// google3 code is discouraged and the following shortcut exists for
+// backward compatibility with existing code.
+#ifdef MAX_LOG_LEVEL
+#define LOG(n) LOG_IF(n, n <= MAX_LOG_LEVEL)
+#define VLOG(n) LOG_IF(n, n <= MAX_LOG_LEVEL)
+#define LG LOG_IF(INFO, INFO <= MAX_LOG_LEVEL)
+#else
+#define LOG(n) MessageLogger((char *)__FILE__, __LINE__, "native", n).stream()
+#define VLOG(n) MessageLogger((char *)__FILE__, __LINE__, "native", n).stream()
+#define LG MessageLogger((char *)__FILE__, __LINE__, "native", INFO).stream()
+#endif
+
+// Currently, VLOG is always on for levels below MAX_LOG_LEVEL.
+#ifndef MAX_LOG_LEVEL
+#define VLOG_IS_ON(x) (1)
+#else
+#define VLOG_IS_ON(x) (x <= MAX_LOG_LEVEL)
+#endif
+
+#ifndef NDEBUG
+#define DLOG LOG
+#else
+#define DLOG(severity) true ? (void) 0 : LoggerVoidify() & \
+    MessageLogger((char *)__FILE__, __LINE__, "native", severity).stream()
+#endif
+
+// ---------------------------- CHECK helpers --------------------------------
 
 // Log a message and terminate.
 template<class T>
@@ -306,16 +302,16 @@ void LogMessageFatal(const char *file, int line, const T &message) {
 // ---------------------------- CHECK macros ---------------------------------
 
 // Check for a given boolean condition.
-#define CHECK(condition) LOG_IF(FATAL, condition) \
+#define CHECK(condition) LOG_IF_FALSE(FATAL, condition) \
         << "Check failed: " #condition " "
 
 #ifndef NDEBUG
 // Debug only version of CHECK
-#define DCHECK(condition) LOG_IF(FATAL, condition) \
+#define DCHECK(condition) LOG_IF_FALSE(FATAL, condition) \
         << "Check failed: " #condition " "
 #else
 // Optimized version - generates no code.
-#define DCHECK(condition) if (false) LOG_IF(FATAL, condition) \
+#define DCHECK(condition) if (false) LOG_IF_FALSE(FATAL, condition) \
         << "Check failed: " #condition " "
 #endif  // NDEBUG
 
@@ -323,7 +319,7 @@ void LogMessageFatal(const char *file, int line, const T &message) {
 
 // Generic binary operator check macro. This should not be directly invoked,
 // instead use the binary comparison macros defined below.
-#define CHECK_OP(val1, val2, op) LOG_IF(FATAL, (val1 op val2)) \
+#define CHECK_OP(val1, val2, op) LOG_IF_FALSE(FATAL, (val1 op val2)) \
   << "Check failed: " #val1 " " #op " " #val2 " "
 
 // Check_op macro definitions
@@ -388,4 +384,8 @@ T& CheckNotNull(const char *file, int line, const char *names, T& t) {
   CheckNotNull(__FILE__, __LINE__, "'" #val "' Must be non NULL", (val))
 #endif  // NDEBUG
 
-#endif  // CERCES_INTERNAL_MINIGLOG_GLOG_LOGGING_H_
+inline void PrintAndroid(const char *msg) {
+  __android_log_write(ANDROID_LOG_VERBOSE, "native", msg);
+}
+
+#endif  // MOBILE_BASE_LOGGING_H_
diff --git a/internal/ceres/partitioned_matrix_view.cc b/internal/ceres/partitioned_matrix_view.cc
index 5dad438..59eaff8 100644
--- a/internal/ceres/partitioned_matrix_view.cc
+++ b/internal/ceres/partitioned_matrix_view.cc
@@ -35,10 +35,10 @@
 #include <algorithm>
 #include <cstring>
 #include <vector>
-#include "ceres/blas.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
 #include "ceres/internal/eigen.h"
+#include "ceres/small_blas.h"
 #include "glog/logging.h"
 
 namespace ceres {
diff --git a/internal/ceres/preconditioner.h b/internal/ceres/preconditioner.h
index cb0a381..af64e3c 100644
--- a/internal/ceres/preconditioner.h
+++ b/internal/ceres/preconditioner.h
@@ -48,7 +48,7 @@ class Preconditioner : public LinearOperator {
   struct Options {
     Options()
         : type(JACOBI),
-          sparse_linear_algebra_library(SUITE_SPARSE),
+          sparse_linear_algebra_library_type(SUITE_SPARSE),
           num_threads(1),
           row_block_size(Eigen::Dynamic),
           e_block_size(Eigen::Dynamic),
@@ -57,7 +57,7 @@ class Preconditioner : public LinearOperator {
 
     PreconditionerType type;
 
-    SparseLinearAlgebraLibraryType sparse_linear_algebra_library;
+    SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
 
     // If possible, how many threads the preconditioner can use.
     int num_threads;
diff --git a/internal/ceres/program_evaluator.h b/internal/ceres/program_evaluator.h
index 19c7541..8aa2a39 100644
--- a/internal/ceres/program_evaluator.h
+++ b/internal/ceres/program_evaluator.h
@@ -86,13 +86,13 @@
 #include <map>
 #include <string>
 #include <vector>
-#include "ceres/blas.h"
 #include "ceres/execution_summary.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/parameter_block.h"
 #include "ceres/program.h"
 #include "ceres/residual_block.h"
+#include "ceres/small_blas.h"
 
 namespace ceres {
 namespace internal {
diff --git a/internal/ceres/residual_block.cc b/internal/ceres/residual_block.cc
index 649f3f7..621082a 100644
--- a/internal/ceres/residual_block.cc
+++ b/internal/ceres/residual_block.cc
@@ -34,8 +34,6 @@
 #include <algorithm>
 #include <cstddef>
 #include <vector>
-
-#include "ceres/blas.h"
 #include "ceres/corrector.h"
 #include "ceres/parameter_block.h"
 #include "ceres/residual_block_utils.h"
@@ -44,6 +42,7 @@
 #include "ceres/internal/fixed_array.h"
 #include "ceres/local_parameterization.h"
 #include "ceres/loss_function.h"
+#include "ceres/small_blas.h"
 
 using Eigen::Dynamic;
 
diff --git a/internal/ceres/schur_complement_solver.cc b/internal/ceres/schur_complement_solver.cc
index 09f61d7..b192aa1 100644
--- a/internal/ceres/schur_complement_solver.cc
+++ b/internal/ceres/schur_complement_solver.cc
@@ -33,20 +33,18 @@
 #include <set>
 #include <vector>
 
-#ifndef CERES_NO_CXSPARSE
-#include "cs.h"
-#endif  // CERES_NO_CXSPARSE
-
 #include "Eigen/Dense"
 #include "ceres/block_random_access_dense_matrix.h"
 #include "ceres/block_random_access_matrix.h"
 #include "ceres/block_random_access_sparse_matrix.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
+#include "ceres/cxsparse.h"
 #include "ceres/detect_structure.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/port.h"
 #include "ceres/internal/scoped_ptr.h"
+#include "ceres/lapack.h"
 #include "ceres/linear_solver.h"
 #include "ceres/schur_complement_solver.h"
 #include "ceres/suitesparse.h"
@@ -130,29 +128,31 @@ bool DenseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
     return true;
   }
 
-  // TODO(sameeragarwal): Add proper error handling; this completely ignores
-  // the quality of the solution to the solve.
-  VectorRef(solution, num_rows) =
-      ConstMatrixRef(m->values(), num_rows, num_rows)
-      .selfadjointView<Eigen::Upper>()
-      .ldlt()
-      .solve(ConstVectorRef(rhs(), num_rows));
+  if (options().dense_linear_algebra_library_type == EIGEN) {
+    // TODO(sameeragarwal): Add proper error handling; this completely ignores
+    // the quality of the solution to the solve.
+    VectorRef(solution, num_rows) =
+        ConstMatrixRef(m->values(), num_rows, num_rows)
+        .selfadjointView<Eigen::Upper>()
+        .llt()
+        .solve(ConstVectorRef(rhs(), num_rows));
+    return true;
+  }
 
-  return true;
+  VectorRef(solution, num_rows) = ConstVectorRef(rhs(), num_rows);
+  const int info = LAPACK::SolveInPlaceUsingCholesky(num_rows,
+                                                     m->values(),
+                                                     solution);
+  return (info == 0);
 }
 
 #if !defined(CERES_NO_SUITESPARSE) || !defined(CERES_NO_CXSPARE)
 
 SparseSchurComplementSolver::SparseSchurComplementSolver(
     const LinearSolver::Options& options)
-    : SchurComplementSolver(options) {
-#ifndef CERES_NO_SUITESPARSE
-  factor_ = NULL;
-#endif  // CERES_NO_SUITESPARSE
-
-#ifndef CERES_NO_CXSPARSE
-  cxsparse_factor_ = NULL;
-#endif  // CERES_NO_CXSPARSE
+    : SchurComplementSolver(options),
+      factor_(NULL),
+      cxsparse_factor_(NULL) {
 }
 
 SparseSchurComplementSolver::~SparseSchurComplementSolver() {
@@ -243,18 +243,18 @@ void SparseSchurComplementSolver::InitStorage(
 }
 
 bool SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) {
-  switch (options().sparse_linear_algebra_library) {
+  switch (options().sparse_linear_algebra_library_type) {
     case SUITE_SPARSE:
       return SolveReducedLinearSystemUsingSuiteSparse(solution);
     case CX_SPARSE:
       return SolveReducedLinearSystemUsingCXSparse(solution);
     default:
       LOG(FATAL) << "Unknown sparse linear algebra library : "
-                 << options().sparse_linear_algebra_library;
+                 << options().sparse_linear_algebra_library_type;
   }
 
   LOG(FATAL) << "Unknown sparse linear algebra library : "
-             << options().sparse_linear_algebra_library;
+             << options().sparse_linear_algebra_library_type;
   return false;
 }
 
diff --git a/internal/ceres/schur_complement_solver.h b/internal/ceres/schur_complement_solver.h
index 9525e37..b5a1c74 100644
--- a/internal/ceres/schur_complement_solver.h
+++ b/internal/ceres/schur_complement_solver.h
@@ -167,18 +167,14 @@ class SparseSchurComplementSolver : public SchurComplementSolver {
   // Size of the blocks in the Schur complement.
   vector<int> blocks_;
 
-#ifndef CERES_NO_SUITESPARSE
   SuiteSparse ss_;
   // Symbolic factorization of the reduced linear system. Precomputed
   // once and reused in subsequent calls.
   cholmod_factor* factor_;
-#endif  // CERES_NO_SUITESPARSE
 
-#ifndef CERES_NO_CXSPARSE
   CXSparse cxsparse_;
   // Cached factorization
   cs_dis* cxsparse_factor_;
-#endif  // CERES_NO_CXSPARSE
   CERES_DISALLOW_COPY_AND_ASSIGN(SparseSchurComplementSolver);
 };
 
diff --git a/internal/ceres/schur_complement_solver_test.cc b/internal/ceres/schur_complement_solver_test.cc
index 206d4b5..d91c162 100644
--- a/internal/ceres/schur_complement_solver_test.cc
+++ b/internal/ceres/schur_complement_solver_test.cc
@@ -87,7 +87,8 @@ class SchurComplementSolverTest : public ::testing::Test {
       int problem_id,
       bool regularization,
       ceres::LinearSolverType linear_solver_type,
-      ceres::SparseLinearAlgebraLibraryType sparse_linear_algebra_library,
+      ceres::DenseLinearAlgebraLibraryType dense_linear_algebra_library_type,
+      ceres::SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
       bool use_postordering) {
     SetUpFromProblemId(problem_id);
     LinearSolver::Options options;
@@ -95,7 +96,10 @@ class SchurComplementSolverTest : public ::testing::Test {
     options.elimination_groups.push_back(
         A->block_structure()->cols.size() - num_eliminate_blocks);
     options.type = linear_solver_type;
-    options.sparse_linear_algebra_library = sparse_linear_algebra_library;
+    options.dense_linear_algebra_library_type =
+        dense_linear_algebra_library_type;
+    options.sparse_linear_algebra_library_type =
+        sparse_linear_algebra_library_type;
     options.use_postordering = use_postordering;
 
     scoped_ptr<LinearSolver> solver(LinearSolver::Create(options));
@@ -131,53 +135,67 @@ class SchurComplementSolverTest : public ::testing::Test {
   scoped_array<double> sol_d;
 };
 
-TEST_F(SchurComplementSolverTest, DenseSchurWithSmallProblem) {
-  ComputeAndCompareSolutions(2, false, DENSE_SCHUR, SUITE_SPARSE, true);
-  ComputeAndCompareSolutions(2, true, DENSE_SCHUR, SUITE_SPARSE, true);
+TEST_F(SchurComplementSolverTest, EigenBasedDenseSchurWithSmallProblem) {
+  ComputeAndCompareSolutions(2, false, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true);
+  ComputeAndCompareSolutions(2, true, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true);
 }
 
-TEST_F(SchurComplementSolverTest, DenseSchurWithLargeProblem) {
-  ComputeAndCompareSolutions(3, false, DENSE_SCHUR, SUITE_SPARSE, true);
-  ComputeAndCompareSolutions(3, true, DENSE_SCHUR, SUITE_SPARSE, true);
+TEST_F(SchurComplementSolverTest, EigenBasedDenseSchurWithLargeProblem) {
+  ComputeAndCompareSolutions(3, false, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true);
+  ComputeAndCompareSolutions(3, true, DENSE_SCHUR, EIGEN, SUITE_SPARSE, true);
 }
 
+#ifndef CERES_NO_LAPACK
+TEST_F(SchurComplementSolverTest, LAPACKBasedDenseSchurWithSmallProblem) {
+  ComputeAndCompareSolutions(2, false, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true);
+  ComputeAndCompareSolutions(2, true, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true);
+}
+
+TEST_F(SchurComplementSolverTest, LAPACKBasedDenseSchurWithLargeProblem) {
+  ComputeAndCompareSolutions(3, false, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true);
+  ComputeAndCompareSolutions(3, true, DENSE_SCHUR, LAPACK, SUITE_SPARSE, true);
+}
+#endif
+
 #ifndef CERES_NO_SUITESPARSE
 TEST_F(SchurComplementSolverTest,
        SparseSchurWithSuiteSparseSmallProblemNoPostOrdering) {
-  ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, SUITE_SPARSE, false);
-  ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, SUITE_SPARSE, false);
+  ComputeAndCompareSolutions(
+      2, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false);
+  ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false);
 }
 
 TEST_F(SchurComplementSolverTest,
        SparseSchurWithSuiteSparseSmallProblemPostOrdering) {
-  ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, SUITE_SPARSE, true);
-  ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, SUITE_SPARSE, true);
+  ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true);
+  ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true);
 }
 
 TEST_F(SchurComplementSolverTest,
        SparseSchurWithSuiteSparseLargeProblemNoPostOrdering) {
-  ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, SUITE_SPARSE, false);
-  ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, SUITE_SPARSE, false);
+  ComputeAndCompareSolutions(
+      3, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false);
+  ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, false);
 }
 
 TEST_F(SchurComplementSolverTest,
        SparseSchurWithSuiteSparseLargeProblemPostOrdering) {
-  ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, SUITE_SPARSE, true);
-  ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, SUITE_SPARSE, true);
+  ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true);
+  ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, EIGEN, SUITE_SPARSE, true);
 }
 #endif  // CERES_NO_SUITESPARSE
 
 #ifndef CERES_NO_CXSPARSE
 TEST_F(SchurComplementSolverTest,
        SparseSchurWithSuiteSparseSmallProblem) {
-  ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, SUITE_SPARSE, true);
-  ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, SUITE_SPARSE, true);
+  ComputeAndCompareSolutions(2, false, SPARSE_SCHUR, EIGEN, CX_SPARSE, true);
+  ComputeAndCompareSolutions(2, true, SPARSE_SCHUR, EIGEN, CX_SPARSE, true);
 }
 
 TEST_F(SchurComplementSolverTest,
        SparseSchurWithSuiteSparseLargeProblem) {
-  ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, SUITE_SPARSE, true);
-  ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, SUITE_SPARSE, true);
+  ComputeAndCompareSolutions(3, false, SPARSE_SCHUR, EIGEN, CX_SPARSE, true);
+  ComputeAndCompareSolutions(3, true, SPARSE_SCHUR, EIGEN, CX_SPARSE, true);
 }
 #endif  // CERES_NO_CXSPARSE
 
diff --git a/internal/ceres/schur_eliminator_impl.h b/internal/ceres/schur_eliminator_impl.h
index f072c88..c09b7fb 100644
--- a/internal/ceres/schur_eliminator_impl.h
+++ b/internal/ceres/schur_eliminator_impl.h
@@ -51,8 +51,6 @@
 
 #include <algorithm>
 #include <map>
-
-#include "ceres/blas.h"
 #include "ceres/block_random_access_matrix.h"
 #include "ceres/block_sparse_matrix.h"
 #include "ceres/block_structure.h"
@@ -61,6 +59,7 @@
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/map_util.h"
 #include "ceres/schur_eliminator.h"
+#include "ceres/small_blas.h"
 #include "ceres/stl_util.h"
 #include "Eigen/Dense"
 #include "glog/logging.h"
diff --git a/internal/ceres/schur_eliminator_test.cc b/internal/ceres/schur_eliminator_test.cc
index a7e96ae..bed8f3a 100644
--- a/internal/ceres/schur_eliminator_test.cc
+++ b/internal/ceres/schur_eliminator_test.cc
@@ -112,7 +112,7 @@ class SchurEliminatorTest : public ::testing::Test {
       P.block(row, row,  block_size, block_size) =
           P
           .block(row, row,  block_size, block_size)
-          .ldlt()
+          .llt()
           .solve(Matrix::Identity(block_size, block_size));
       row += block_size;
     }
@@ -121,7 +121,7 @@ class SchurEliminatorTest : public ::testing::Test {
         .triangularView<Eigen::Upper>() = R - Q.transpose() * P * Q;
     rhs_expected =
         g.tail(schur_size) - Q.transpose() * P * g.head(num_eliminate_cols);
-    sol_expected = H.ldlt().solve(g);
+    sol_expected = H.llt().solve(g);
   }
 
   void EliminateSolveAndCompare(const VectorRef& diagonal,
@@ -160,7 +160,7 @@ class SchurEliminatorTest : public ::testing::Test {
     Vector reduced_sol  =
         lhs_ref
         .selfadjointView<Eigen::Upper>()
-        .ldlt()
+        .llt()
         .solve(rhs);
 
     // Solution to the linear least squares problem.
diff --git a/internal/ceres/schur_jacobi_preconditioner.cc b/internal/ceres/schur_jacobi_preconditioner.cc
index aa840c5..338df71 100644
--- a/internal/ceres/schur_jacobi_preconditioner.cc
+++ b/internal/ceres/schur_jacobi_preconditioner.cc
@@ -128,7 +128,7 @@ void SchurJacobiPreconditioner::RightMultiply(const double* x,
     VectorRef(y, block_size) =
         block
         .selfadjointView<Eigen::Upper>()
-        .ldlt()
+        .llt()
         .solve(ConstVectorRef(x, block_size));
 
     x += block_size;
diff --git a/internal/ceres/small_blas.h b/internal/ceres/small_blas.h
new file mode 100644
index 0000000..e14e664
--- /dev/null
+++ b/internal/ceres/small_blas.h
@@ -0,0 +1,406 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+//
+// Simple blas functions for use in the Schur Eliminator. These are
+// fairly basic implementations which already yield a significant
+// speedup in the eliminator performance.
+
+#ifndef CERES_INTERNAL_SMALL_BLAS_H_
+#define CERES_INTERNAL_SMALL_BLAS_H_
+
+#include "ceres/internal/eigen.h"
+#include "glog/logging.h"
+
+namespace ceres {
+namespace internal {
+
+// Remove the ".noalias()" annotation from the matrix matrix
+// mutliplies to produce a correct build with the Android NDK,
+// including versions 6, 7, 8, and 8b, when built with STLPort and the
+// non-standalone toolchain (i.e. ndk-build). This appears to be a
+// compiler bug; if the workaround is not in place, the line
+//
+//   block.noalias() -= A * B;
+//
+// gets compiled to
+//
+//   block.noalias() += A * B;
+//
+// which breaks schur elimination. Introducing a temporary by removing the
+// .noalias() annotation causes the issue to disappear. Tracking this
+// issue down was tricky, since the test suite doesn't run when built with
+// the non-standalone toolchain.
+//
+// TODO(keir): Make a reproduction case for this and send it upstream.
+#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
+#define CERES_MAYBE_NOALIAS
+#else
+#define CERES_MAYBE_NOALIAS .noalias()
+#endif
+
+// The following three macros are used to share code and reduce
+// template junk across the various GEMM variants.
+#define CERES_GEMM_BEGIN(name)                                          \
+  template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>  \
+  inline void name(const double* A,                                     \
+                   const int num_row_a,                                 \
+                   const int num_col_a,                                 \
+                   const double* B,                                     \
+                   const int num_row_b,                                 \
+                   const int num_col_b,                                 \
+                   double* C,                                           \
+                   const int start_row_c,                               \
+                   const int start_col_c,                               \
+                   const int row_stride_c,                              \
+                   const int col_stride_c)
+
+#define CERES_GEMM_NAIVE_HEADER                                         \
+  DCHECK_GT(num_row_a, 0);                                              \
+  DCHECK_GT(num_col_a, 0);                                              \
+  DCHECK_GT(num_row_b, 0);                                              \
+  DCHECK_GT(num_col_b, 0);                                              \
+  DCHECK_GE(start_row_c, 0);                                            \
+  DCHECK_GE(start_col_c, 0);                                            \
+  DCHECK_GT(row_stride_c, 0);                                           \
+  DCHECK_GT(col_stride_c, 0);                                           \
+  DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));            \
+  DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));            \
+  DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b));            \
+  DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b));            \
+  const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);  \
+  const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);  \
+  const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b);  \
+  const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b);
+
+#define CERES_GEMM_EIGEN_HEADER                                         \
+  const typename EigenTypes<kRowA, kColA>::ConstMatrixRef               \
+  Aref(A, num_row_a, num_col_a);                                        \
+  const typename EigenTypes<kRowB, kColB>::ConstMatrixRef               \
+  Bref(B, num_row_b, num_col_b);                                        \
+  MatrixRef Cref(C, row_stride_c, col_stride_c);                        \
+
+#define CERES_CALL_GEMM(name)                                           \
+  name<kRowA, kColA, kRowB, kColB, kOperation>(                         \
+      A, num_row_a, num_col_a,                                          \
+      B, num_row_b, num_col_b,                                          \
+      C, start_row_c, start_col_c, row_stride_c, col_stride_c);
+
+
+// For the matrix-matrix functions below, there are three variants for
+// each functionality. Foo, FooNaive and FooEigen. Foo is the one to
+// be called by the user. FooNaive is a basic loop based
+// implementation and FooEigen uses Eigen's implementation. Foo
+// chooses between FooNaive and FooEigen depending on how many of the
+// template arguments are fixed at compile time. Currently, FooEigen
+// is called if all matrix dimensions are compile time
+// constants. FooNaive is called otherwise. This leads to the best
+// performance currently.
+//
+// The MatrixMatrixMultiply variants compute:
+//
+//   C op A * B;
+//
+// The MatrixTransposeMatrixMultiply variants compute:
+//
+//   C op A' * B
+//
+// where op can be +=, -=, or =.
+//
+// The template parameters (kRowA, kColA, kRowB, kColB) allow
+// specialization of the loop at compile time. If this information is
+// not available, then Eigen::Dynamic should be used as the template
+// argument.
+//
+//   kOperation =  1  -> C += A * B
+//   kOperation = -1  -> C -= A * B
+//   kOperation =  0  -> C  = A * B
+//
+// The functions can write into matrices C which are larger than the
+// matrix A * B. This is done by specifying the true size of C via
+// row_stride_c and col_stride_c, and then indicating where A * B
+// should be written into by start_row_c and start_col_c.
+//
+// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c =
+// 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get
+//
+//   ------------
+//   ------------
+//   ------------
+//   ------------
+//   -----xxxx---
+//   -----xxxx---
+//   -----xxxx---
+//   ------------
+//   ------------
+//   ------------
+//
+CERES_GEMM_BEGIN(MatrixMatrixMultiplyEigen) {
+  CERES_GEMM_EIGEN_HEADER
+  Eigen::Block<MatrixRef, kRowA, kColB>
+    block(Cref, start_row_c, start_col_c, num_row_a, num_col_b);
+
+  if (kOperation > 0) {
+    block CERES_MAYBE_NOALIAS += Aref * Bref;
+  } else if (kOperation < 0) {
+    block CERES_MAYBE_NOALIAS -= Aref * Bref;
+  } else {
+    block CERES_MAYBE_NOALIAS = Aref * Bref;
+  }
+}
+
+CERES_GEMM_BEGIN(MatrixMatrixMultiplyNaive) {
+  CERES_GEMM_NAIVE_HEADER
+  DCHECK_EQ(NUM_COL_A, NUM_ROW_B);
+
+  const int NUM_ROW_C = NUM_ROW_A;
+  const int NUM_COL_C = NUM_COL_B;
+  DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c);
+  DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c);
+
+  for (int row = 0; row < NUM_ROW_C; ++row) {
+    for (int col = 0; col < NUM_COL_C; ++col) {
+      double tmp = 0.0;
+      for (int k = 0; k < NUM_COL_A; ++k) {
+        tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col];
+      }
+
+      const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
+      if (kOperation > 0) {
+        C[index] += tmp;
+      } else if (kOperation < 0) {
+        C[index] -= tmp;
+      } else {
+        C[index] = tmp;
+      }
+    }
+  }
+}
+
+CERES_GEMM_BEGIN(MatrixMatrixMultiply) {
+#ifdef CERES_NO_CUSTOM_BLAS
+
+  CERES_CALL_GEMM(MatrixMatrixMultiplyEigen)
+  return;
+
+#else
+
+  if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
+      kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
+    CERES_CALL_GEMM(MatrixMatrixMultiplyEigen)
+  } else {
+    CERES_CALL_GEMM(MatrixMatrixMultiplyNaive)
+  }
+
+#endif
+}
+
+CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyEigen) {
+  CERES_GEMM_EIGEN_HEADER
+  Eigen::Block<MatrixRef, kColA, kColB> block(Cref,
+                                              start_row_c, start_col_c,
+                                              num_col_a, num_col_b);
+  if (kOperation > 0) {
+    block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref;
+  } else if (kOperation < 0) {
+    block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref;
+  } else {
+    block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref;
+  }
+}
+
+CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyNaive) {
+  CERES_GEMM_NAIVE_HEADER
+  DCHECK_EQ(NUM_ROW_A, NUM_ROW_B);
+
+  const int NUM_ROW_C = NUM_COL_A;
+  const int NUM_COL_C = NUM_COL_B;
+  DCHECK_LE(start_row_c + NUM_ROW_C, row_stride_c);
+  DCHECK_LE(start_col_c + NUM_COL_C, col_stride_c);
+
+  for (int row = 0; row < NUM_ROW_C; ++row) {
+    for (int col = 0; col < NUM_COL_C; ++col) {
+      double tmp = 0.0;
+      for (int k = 0; k < NUM_ROW_A; ++k) {
+        tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col];
+      }
+
+      const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
+      if (kOperation > 0) {
+        C[index]+= tmp;
+      } else if (kOperation < 0) {
+        C[index]-= tmp;
+      } else {
+        C[index]= tmp;
+      }
+    }
+  }
+}
+
+CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiply) {
+#ifdef CERES_NO_CUSTOM_BLAS
+
+  CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen)
+  return;
+
+#else
+
+  if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
+      kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
+    CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen)
+  } else {
+    CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyNaive)
+  }
+
+#endif
+}
+
+// Matrix-Vector multiplication
+//
+// c op A * b;
+//
+// where op can be +=, -=, or =.
+//
+// The template parameters (kRowA, kColA) allow specialization of the
+// loop at compile time. If this information is not available, then
+// Eigen::Dynamic should be used as the template argument.
+//
+// kOperation =  1  -> c += A' * b
+// kOperation = -1  -> c -= A' * b
+// kOperation =  0  -> c  = A' * b
+template<int kRowA, int kColA, int kOperation>
+inline void MatrixVectorMultiply(const double* A,
+                                 const int num_row_a,
+                                 const int num_col_a,
+                                 const double* b,
+                                 double* c) {
+#ifdef CERES_NO_CUSTOM_BLAS
+  const typename EigenTypes<kRowA, kColA>::ConstMatrixRef
+      Aref(A, num_row_a, num_col_a);
+  const typename EigenTypes<kColA>::ConstVectorRef bref(b, num_col_a);
+  typename EigenTypes<kRowA>::VectorRef cref(c, num_row_a);
+
+  // lazyProduct works better than .noalias() for matrix-vector
+  // products.
+  if (kOperation > 0) {
+    cref += Aref.lazyProduct(bref);
+  } else if (kOperation < 0) {
+    cref -= Aref.lazyProduct(bref);
+  } else {
+    cref = Aref.lazyProduct(bref);
+  }
+#else
+
+  DCHECK_GT(num_row_a, 0);
+  DCHECK_GT(num_col_a, 0);
+  DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
+  DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
+
+  const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
+  const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
+
+  for (int row = 0; row < NUM_ROW_A; ++row) {
+    double tmp = 0.0;
+    for (int col = 0; col < NUM_COL_A; ++col) {
+      tmp += A[row * NUM_COL_A + col] * b[col];
+    }
+
+    if (kOperation > 0) {
+      c[row] += tmp;
+    } else if (kOperation < 0) {
+      c[row] -= tmp;
+    } else {
+      c[row] = tmp;
+    }
+  }
+#endif  // CERES_NO_CUSTOM_BLAS
+}
+
+// Similar to MatrixVectorMultiply, except that A is transposed, i.e.,
+//
+// c op A' * b;
+template<int kRowA, int kColA, int kOperation>
+inline void MatrixTransposeVectorMultiply(const double* A,
+                                          const int num_row_a,
+                                          const int num_col_a,
+                                          const double* b,
+                                          double* c) {
+#ifdef CERES_NO_CUSTOM_BLAS
+  const typename EigenTypes<kRowA, kColA>::ConstMatrixRef
+      Aref(A, num_row_a, num_col_a);
+  const typename EigenTypes<kRowA>::ConstVectorRef bref(b, num_row_a);
+  typename EigenTypes<kColA>::VectorRef cref(c, num_col_a);
+
+  // lazyProduct works better than .noalias() for matrix-vector
+  // products.
+  if (kOperation > 0) {
+    cref += Aref.transpose().lazyProduct(bref);
+  } else if (kOperation < 0) {
+    cref -= Aref.transpose().lazyProduct(bref);
+  } else {
+    cref = Aref.transpose().lazyProduct(bref);
+  }
+#else
+
+  DCHECK_GT(num_row_a, 0);
+  DCHECK_GT(num_col_a, 0);
+  DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
+  DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
+
+  const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
+  const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
+
+  for (int row = 0; row < NUM_COL_A; ++row) {
+    double tmp = 0.0;
+    for (int col = 0; col < NUM_ROW_A; ++col) {
+      tmp += A[col * NUM_COL_A + row] * b[col];
+    }
+
+    if (kOperation > 0) {
+      c[row] += tmp;
+    } else if (kOperation < 0) {
+      c[row] -= tmp;
+    } else {
+      c[row] = tmp;
+    }
+  }
+#endif  // CERES_NO_CUSTOM_BLAS
+}
+
+
+#undef CERES_MAYBE_NOALIAS
+#undef CERES_GEMM_BEGIN
+#undef CERES_GEMM_EIGEN_HEADER
+#undef CERES_GEMM_NAIVE_HEADER
+#undef CERES_CALL_GEMM
+
+}  // namespace internal
+}  // namespace ceres
+
+#endif  // CERES_INTERNAL_SMALL_BLAS_H_
diff --git a/internal/ceres/small_blas_test.cc b/internal/ceres/small_blas_test.cc
new file mode 100644
index 0000000..b8b5bc5
--- /dev/null
+++ b/internal/ceres/small_blas_test.cc
@@ -0,0 +1,303 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: keir@google.com (Keir Mierle)
+
+#include "ceres/small_blas.h"
+
+#include "gtest/gtest.h"
+#include "ceres/internal/eigen.h"
+
+namespace ceres {
+namespace internal {
+
+TEST(BLAS, MatrixMatrixMultiply) {
+  const double kTolerance = 1e-16;
+  const int kRowA = 3;
+  const int kColA = 5;
+  Matrix A(kRowA, kColA);
+  A.setOnes();
+
+  const int kRowB = 5;
+  const int kColB = 7;
+  Matrix B(kRowB, kColB);
+  B.setOnes();
+
+  for (int row_stride_c = kRowA; row_stride_c < 3 * kRowA; ++row_stride_c) {
+    for (int col_stride_c = kColB; col_stride_c < 3 * kColB; ++col_stride_c) {
+      Matrix C(row_stride_c, col_stride_c);
+      C.setOnes();
+
+      Matrix C_plus = C;
+      Matrix C_minus = C;
+      Matrix C_assign = C;
+
+      Matrix C_plus_ref = C;
+      Matrix C_minus_ref = C;
+      Matrix C_assign_ref = C;
+      for (int start_row_c = 0; start_row_c + kRowA < row_stride_c; ++start_row_c) {
+        for (int start_col_c = 0; start_col_c + kColB < col_stride_c; ++start_col_c) {
+          C_plus_ref.block(start_row_c, start_col_c, kRowA, kColB) +=
+              A * B;
+
+          MatrixMatrixMultiply<kRowA, kColA, kRowB, kColB, 1>(
+              A.data(), kRowA, kColA,
+              B.data(), kRowB, kColB,
+              C_plus.data(), start_row_c, start_col_c, row_stride_c, col_stride_c);
+
+          EXPECT_NEAR((C_plus_ref - C_plus).norm(), 0.0, kTolerance)
+              << "C += A * B \n"
+              << "row_stride_c : " << row_stride_c << "\n"
+              << "col_stride_c : " << col_stride_c << "\n"
+              << "start_row_c  : " << start_row_c << "\n"
+              << "start_col_c  : " << start_col_c << "\n"
+              << "Cref : \n" << C_plus_ref << "\n"
+              << "C: \n" << C_plus;
+
+
+          C_minus_ref.block(start_row_c, start_col_c, kRowA, kColB) -=
+              A * B;
+
+          MatrixMatrixMultiply<kRowA, kColA, kRowB, kColB, -1>(
+              A.data(), kRowA, kColA,
+              B.data(), kRowB, kColB,
+              C_minus.data(), start_row_c, start_col_c, row_stride_c, col_stride_c);
+
+           EXPECT_NEAR((C_minus_ref - C_minus).norm(), 0.0, kTolerance)
+              << "C -= A * B \n"
+              << "row_stride_c : " << row_stride_c << "\n"
+              << "col_stride_c : " << col_stride_c << "\n"
+              << "start_row_c  : " << start_row_c << "\n"
+              << "start_col_c  : " << start_col_c << "\n"
+              << "Cref : \n" << C_minus_ref << "\n"
+              << "C: \n" << C_minus;
+
+          C_assign_ref.block(start_row_c, start_col_c, kRowA, kColB) =
+              A * B;
+
+          MatrixMatrixMultiply<kRowA, kColA, kRowB, kColB, 0>(
+              A.data(), kRowA, kColA,
+              B.data(), kRowB, kColB,
+              C_assign.data(), start_row_c, start_col_c, row_stride_c, col_stride_c);
+
+          EXPECT_NEAR((C_assign_ref - C_assign).norm(), 0.0, kTolerance)
+              << "C = A * B \n"
+              << "row_stride_c : " << row_stride_c << "\n"
+              << "col_stride_c : " << col_stride_c << "\n"
+              << "start_row_c  : " << start_row_c << "\n"
+              << "start_col_c  : " << start_col_c << "\n"
+              << "Cref : \n" << C_assign_ref << "\n"
+              << "C: \n" << C_assign;
+        }
+      }
+    }
+  }
+}
+
+TEST(BLAS, MatrixTransposeMatrixMultiply) {
+  const double kTolerance = 1e-16;
+  const int kRowA = 5;
+  const int kColA = 3;
+  Matrix A(kRowA, kColA);
+  A.setOnes();
+
+  const int kRowB = 5;
+  const int kColB = 7;
+  Matrix B(kRowB, kColB);
+  B.setOnes();
+
+  for (int row_stride_c = kColA; row_stride_c < 3 * kColA; ++row_stride_c) {
+    for (int col_stride_c = kColB; col_stride_c <  3 * kColB; ++col_stride_c) {
+      Matrix C(row_stride_c, col_stride_c);
+      C.setOnes();
+
+      Matrix C_plus = C;
+      Matrix C_minus = C;
+      Matrix C_assign = C;
+
+      Matrix C_plus_ref = C;
+      Matrix C_minus_ref = C;
+      Matrix C_assign_ref = C;
+      for (int start_row_c = 0; start_row_c + kColA < row_stride_c; ++start_row_c) {
+        for (int start_col_c = 0; start_col_c + kColB < col_stride_c; ++start_col_c) {
+          C_plus_ref.block(start_row_c, start_col_c, kColA, kColB) +=
+              A.transpose() * B;
+
+          MatrixTransposeMatrixMultiply<kRowA, kColA, kRowB, kColB, 1>(
+              A.data(), kRowA, kColA,
+              B.data(), kRowB, kColB,
+              C_plus.data(), start_row_c, start_col_c, row_stride_c, col_stride_c);
+
+          EXPECT_NEAR((C_plus_ref - C_plus).norm(), 0.0, kTolerance)
+              << "C += A' * B \n"
+              << "row_stride_c : " << row_stride_c << "\n"
+              << "col_stride_c : " << col_stride_c << "\n"
+              << "start_row_c  : " << start_row_c << "\n"
+              << "start_col_c  : " << start_col_c << "\n"
+              << "Cref : \n" << C_plus_ref << "\n"
+              << "C: \n" << C_plus;
+
+          C_minus_ref.block(start_row_c, start_col_c, kColA, kColB) -=
+              A.transpose() * B;
+
+          MatrixTransposeMatrixMultiply<kRowA, kColA, kRowB, kColB, -1>(
+              A.data(), kRowA, kColA,
+              B.data(), kRowB, kColB,
+              C_minus.data(), start_row_c, start_col_c, row_stride_c, col_stride_c);
+
+          EXPECT_NEAR((C_minus_ref - C_minus).norm(), 0.0, kTolerance)
+              << "C -= A' * B \n"
+              << "row_stride_c : " << row_stride_c << "\n"
+              << "col_stride_c : " << col_stride_c << "\n"
+              << "start_row_c  : " << start_row_c << "\n"
+              << "start_col_c  : " << start_col_c << "\n"
+              << "Cref : \n" << C_minus_ref << "\n"
+              << "C: \n" << C_minus;
+
+          C_assign_ref.block(start_row_c, start_col_c, kColA, kColB) =
+              A.transpose() * B;
+
+          MatrixTransposeMatrixMultiply<kRowA, kColA, kRowB, kColB, 0>(
+              A.data(), kRowA, kColA,
+              B.data(), kRowB, kColB,
+              C_assign.data(), start_row_c, start_col_c, row_stride_c, col_stride_c);
+
+          EXPECT_NEAR((C_assign_ref - C_assign).norm(), 0.0, kTolerance)
+              << "C = A' * B \n"
+              << "row_stride_c : " << row_stride_c << "\n"
+              << "col_stride_c : " << col_stride_c << "\n"
+              << "start_row_c  : " << start_row_c << "\n"
+              << "start_col_c  : " << start_col_c << "\n"
+              << "Cref : \n" << C_assign_ref << "\n"
+              << "C: \n" << C_assign;
+        }
+      }
+    }
+  }
+}
+
+TEST(BLAS, MatrixVectorMultiply) {
+  const double kTolerance = 1e-16;
+  const int kRowA = 5;
+  const int kColA = 3;
+  Matrix A(kRowA, kColA);
+  A.setOnes();
+
+  Vector b(kColA);
+  b.setOnes();
+
+  Vector c(kRowA);
+  c.setOnes();
+
+  Vector c_plus = c;
+  Vector c_minus = c;
+  Vector c_assign = c;
+
+  Vector c_plus_ref = c;
+  Vector c_minus_ref = c;
+  Vector c_assign_ref = c;
+
+  c_plus_ref += A * b;
+  MatrixVectorMultiply<kRowA, kColA, 1>(A.data(), kRowA, kColA,
+                                        b.data(),
+                                        c_plus.data());
+  EXPECT_NEAR((c_plus_ref - c_plus).norm(), 0.0, kTolerance)
+      << "c += A * b \n"
+      << "c_ref : \n" << c_plus_ref << "\n"
+      << "c: \n" << c_plus;
+
+  c_minus_ref -= A * b;
+  MatrixVectorMultiply<kRowA, kColA, -1>(A.data(), kRowA, kColA,
+                                                 b.data(),
+                                                 c_minus.data());
+  EXPECT_NEAR((c_minus_ref - c_minus).norm(), 0.0, kTolerance)
+      << "c += A * b \n"
+      << "c_ref : \n" << c_minus_ref << "\n"
+      << "c: \n" << c_minus;
+
+  c_assign_ref = A * b;
+  MatrixVectorMultiply<kRowA, kColA, 0>(A.data(), kRowA, kColA,
+                                                  b.data(),
+                                                  c_assign.data());
+  EXPECT_NEAR((c_assign_ref - c_assign).norm(), 0.0, kTolerance)
+      << "c += A * b \n"
+      << "c_ref : \n" << c_assign_ref << "\n"
+      << "c: \n" << c_assign;
+}
+
+TEST(BLAS, MatrixTransposeVectorMultiply) {
+  const double kTolerance = 1e-16;
+  const int kRowA = 5;
+  const int kColA = 3;
+  Matrix A(kRowA, kColA);
+  A.setRandom();
+
+  Vector b(kRowA);
+  b.setRandom();
+
+  Vector c(kColA);
+  c.setOnes();
+
+  Vector c_plus = c;
+  Vector c_minus = c;
+  Vector c_assign = c;
+
+  Vector c_plus_ref = c;
+  Vector c_minus_ref = c;
+  Vector c_assign_ref = c;
+
+  c_plus_ref += A.transpose() * b;
+  MatrixTransposeVectorMultiply<kRowA, kColA, 1>(A.data(), kRowA, kColA,
+                                                 b.data(),
+                                                 c_plus.data());
+  EXPECT_NEAR((c_plus_ref - c_plus).norm(), 0.0, kTolerance)
+      << "c += A' * b \n"
+      << "c_ref : \n" << c_plus_ref << "\n"
+      << "c: \n" << c_plus;
+
+  c_minus_ref -= A.transpose() * b;
+  MatrixTransposeVectorMultiply<kRowA, kColA, -1>(A.data(), kRowA, kColA,
+                                                 b.data(),
+                                                 c_minus.data());
+  EXPECT_NEAR((c_minus_ref - c_minus).norm(), 0.0, kTolerance)
+      << "c += A' * b \n"
+      << "c_ref : \n" << c_minus_ref << "\n"
+      << "c: \n" << c_minus;
+
+  c_assign_ref = A.transpose() * b;
+  MatrixTransposeVectorMultiply<kRowA, kColA, 0>(A.data(), kRowA, kColA,
+                                                  b.data(),
+                                                  c_assign.data());
+  EXPECT_NEAR((c_assign_ref - c_assign).norm(), 0.0, kTolerance)
+      << "c += A' * b \n"
+      << "c_ref : \n" << c_assign_ref << "\n"
+      << "c: \n" << c_assign;
+}
+
+}  // namespace internal
+}  // namespace ceres
diff --git a/internal/ceres/solver.cc b/internal/ceres/solver.cc
index 5d8447d..3b67746 100644
--- a/internal/ceres/solver.cc
+++ b/internal/ceres/solver.cc
@@ -120,7 +120,8 @@ Solver::Summary::Summary()
       inner_iterations_used(false),
       preconditioner_type(IDENTITY),
       trust_region_strategy_type(LEVENBERG_MARQUARDT),
-      sparse_linear_algebra_library(SUITE_SPARSE),
+      dense_linear_algebra_library_type(EIGEN),
+      sparse_linear_algebra_library_type(SUITE_SPARSE),
       line_search_direction_type(LBFGS),
       line_search_type(ARMIJO) {
 }
@@ -192,6 +193,15 @@ string Solver::Summary::FullReport() const {
     // TRUST_SEARCH HEADER
     StringAppendF(&report, "\nMinimizer                 %19s\n",
                   "TRUST_REGION");
+
+    if (linear_solver_type_used == DENSE_NORMAL_CHOLESKY ||
+        linear_solver_type_used == DENSE_SCHUR ||
+        linear_solver_type_used == DENSE_QR) {
+      StringAppendF(&report, "\nDense linear algebra library  %15s\n",
+                    DenseLinearAlgebraLibraryTypeToString(
+                        dense_linear_algebra_library_type));
+    }
+
     if (linear_solver_type_used == SPARSE_NORMAL_CHOLESKY ||
         linear_solver_type_used == SPARSE_SCHUR ||
         (linear_solver_type_used == ITERATIVE_SCHUR &&
@@ -199,7 +209,7 @@ string Solver::Summary::FullReport() const {
           preconditioner_type == CLUSTER_TRIDIAGONAL))) {
       StringAppendF(&report, "\nSparse linear algebra library %15s\n",
                     SparseLinearAlgebraLibraryTypeToString(
-                                sparse_linear_algebra_library));
+                        sparse_linear_algebra_library_type));
     }
 
     StringAppendF(&report, "Trust region strategy     %19s",
diff --git a/internal/ceres/solver_impl.cc b/internal/ceres/solver_impl.cc
index d6ef731..83faa05 100644
--- a/internal/ceres/solver_impl.cc
+++ b/internal/ceres/solver_impl.cc
@@ -317,6 +317,16 @@ void SolverImpl::LineSearchMinimize(
 void SolverImpl::Solve(const Solver::Options& options,
                        ProblemImpl* problem_impl,
                        Solver::Summary* summary) {
+  VLOG(2) << "Initial problem: "
+          << problem_impl->NumParameterBlocks()
+          << " parameter blocks, "
+          << problem_impl->NumParameters()
+          << " parameters,  "
+          << problem_impl->NumResidualBlocks()
+          << " residual blocks, "
+          << problem_impl->NumResiduals()
+          << " residuals.";
+
   if (options.minimizer_type == TRUST_REGION) {
     TrustRegionSolve(options, problem_impl, summary);
   } else {
@@ -506,8 +516,10 @@ void SolverImpl::TrustRegionSolve(const Solver::Options& original_options,
       original_options.num_linear_solver_threads;
   summary->num_linear_solver_threads_used = options.num_linear_solver_threads;
 
-  summary->sparse_linear_algebra_library =
-      options.sparse_linear_algebra_library;
+  summary->dense_linear_algebra_library_type =
+      options.dense_linear_algebra_library_type;
+  summary->sparse_linear_algebra_library_type =
+      options.sparse_linear_algebra_library_type;
 
   summary->trust_region_strategy_type = options.trust_region_strategy_type;
   summary->dogleg_type = options.dogleg_type;
@@ -1067,6 +1079,16 @@ Program* SolverImpl::CreateReducedProgram(Solver::Options* options,
     return NULL;
   }
 
+  VLOG(2) << "Reduced problem: "
+          << transformed_program->NumParameterBlocks()
+          << " parameter blocks, "
+          << transformed_program->NumParameters()
+          << " parameters,  "
+          << transformed_program->NumResidualBlocks()
+          << " residual blocks, "
+          << transformed_program->NumResiduals()
+          << " residuals.";
+
   if (transformed_program->NumParameterBlocks() == 0) {
     LOG(WARNING) << "No varying parameter blocks to optimize; "
                  << "bailing early.";
@@ -1092,7 +1114,7 @@ Program* SolverImpl::CreateReducedProgram(Solver::Options* options,
   if (IsSchurType(options->linear_solver_type)) {
     if (!ReorderProgramForSchurTypeLinearSolver(
             options->linear_solver_type,
-            options->sparse_linear_algebra_library,
+            options->sparse_linear_algebra_library_type,
             problem_impl->parameter_map(),
             linear_solver_ordering,
             transformed_program.get(),
@@ -1104,7 +1126,7 @@ Program* SolverImpl::CreateReducedProgram(Solver::Options* options,
 
   if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY) {
     if (!ReorderProgramForSparseNormalCholesky(
-            options->sparse_linear_algebra_library,
+            options->sparse_linear_algebra_library_type,
             linear_solver_ordering,
             transformed_program.get(),
             error)) {
@@ -1134,9 +1156,32 @@ LinearSolver* SolverImpl::CreateLinearSolver(Solver::Options* options,
     }
   }
 
+#ifdef CERES_NO_LAPACK
+  if (options->linear_solver_type == DENSE_NORMAL_CHOLESKY &&
+      options->dense_linear_algebra_library_type == LAPACK) {
+    *error = "Can't use DENSE_NORMAL_CHOLESKY with LAPACK because "
+        "LAPACK was not enabled when Ceres was built.";
+    return NULL;
+  }
+
+  if (options->linear_solver_type == DENSE_QR &&
+      options->dense_linear_algebra_library_type == LAPACK) {
+    *error = "Can't use DENSE_QR with LAPACK because "
+        "LAPACK was not enabled when Ceres was built.";
+    return NULL;
+  }
+
+  if (options->linear_solver_type == DENSE_SCHUR &&
+      options->dense_linear_algebra_library_type == LAPACK) {
+    *error = "Can't use DENSE_SCHUR with LAPACK because "
+        "LAPACK was not enabled when Ceres was built.";
+    return NULL;
+  }
+#endif
+
 #ifdef CERES_NO_SUITESPARSE
   if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY &&
-      options->sparse_linear_algebra_library == SUITE_SPARSE) {
+      options->sparse_linear_algebra_library_type == SUITE_SPARSE) {
     *error = "Can't use SPARSE_NORMAL_CHOLESKY with SUITESPARSE because "
              "SuiteSparse was not enabled when Ceres was built.";
     return NULL;
@@ -1157,7 +1202,7 @@ LinearSolver* SolverImpl::CreateLinearSolver(Solver::Options* options,
 
 #ifdef CERES_NO_CXSPARSE
   if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY &&
-      options->sparse_linear_algebra_library == CX_SPARSE) {
+      options->sparse_linear_algebra_library_type == CX_SPARSE) {
     *error = "Can't use SPARSE_NORMAL_CHOLESKY with CXSPARSE because "
              "CXSparse was not enabled when Ceres was built.";
     return NULL;
@@ -1194,8 +1239,10 @@ LinearSolver* SolverImpl::CreateLinearSolver(Solver::Options* options,
       options->max_linear_solver_iterations;
   linear_solver_options.type = options->linear_solver_type;
   linear_solver_options.preconditioner_type = options->preconditioner_type;
-  linear_solver_options.sparse_linear_algebra_library =
-      options->sparse_linear_algebra_library;
+  linear_solver_options.sparse_linear_algebra_library_type =
+      options->sparse_linear_algebra_library_type;
+  linear_solver_options.dense_linear_algebra_library_type =
+      options->dense_linear_algebra_library_type;
   linear_solver_options.use_postordering = options->use_postordering;
 
   // Ignore user's postordering preferences and force it to be true if
@@ -1204,7 +1251,7 @@ LinearSolver* SolverImpl::CreateLinearSolver(Solver::Options* options,
   // done.
 #if !defined(CERES_NO_SUITESPARSE) && defined(CERES_NO_CAMD)
   if (IsSchurType(linear_solver_options.type) &&
-      linear_solver_options.sparse_linear_algebra_library == SUITE_SPARSE) {
+      options->sparse_linear_algebra_library_type == SUITE_SPARSE) {
     linear_solver_options.use_postordering = true;
   }
 #endif
@@ -1590,6 +1637,11 @@ bool SolverImpl::ReorderProgramForSchurTypeLinearSolver(
               parameter_blocks[i]->mutable_user_state()));
     }
 
+    // Renumber the entries of constraints to be contiguous integers
+    // as camd requires that the group ids be in the range [0,
+    // parameter_blocks.size() - 1].
+    SolverImpl::CompactifyArray(&constraints);
+
     // Set the offsets and index for CreateJacobianSparsityTranspose.
     program->SetParameterOffsetsAndIndex();
     // Compute a block sparse presentation of J'.
@@ -1713,5 +1765,20 @@ bool SolverImpl::ReorderProgramForSparseNormalCholesky(
   return true;
 }
 
+void SolverImpl::CompactifyArray(vector<int>* array_ptr) {
+  vector<int>& array = *array_ptr;
+  const set<int> unique_group_ids(array.begin(), array.end());
+  map<int, int> group_id_map;
+  for (set<int>::const_iterator it = unique_group_ids.begin();
+       it != unique_group_ids.end();
+       ++it) {
+    InsertOrDie(&group_id_map, *it, group_id_map.size());
+  }
+
+  for (int i = 0; i < array.size(); ++i) {
+    array[i] = group_id_map[array[i]];
+  }
+}
+
 }  // namespace internal
 }  // namespace ceres
diff --git a/internal/ceres/solver_impl.h b/internal/ceres/solver_impl.h
index ebfb813..2b7ca3e 100644
--- a/internal/ceres/solver_impl.h
+++ b/internal/ceres/solver_impl.h
@@ -208,6 +208,13 @@ class SolverImpl {
       ParameterBlockOrdering* parameter_block_ordering,
       Program* program,
       string* error);
+
+  // array contains a list of (possibly repeating) non-negative
+  // integers. Let us assume that we have constructed another array
+  // `p` by sorting and uniqueing the entries of array.
+  // CompactifyArray replaces each entry in "array" with its position
+  // in `p`.
+  static void CompactifyArray(vector<int>* array);
 };
 
 }  // namespace internal
diff --git a/internal/ceres/solver_impl_test.cc b/internal/ceres/solver_impl_test.cc
index d81858c..583ef4e 100644
--- a/internal/ceres/solver_impl_test.cc
+++ b/internal/ceres/solver_impl_test.cc
@@ -592,7 +592,7 @@ TEST(SolverImpl, CreateLinearSolverNormalOperation) {
 
 #ifndef CERES_NO_SUITESPARSE
   options.linear_solver_type = SPARSE_NORMAL_CHOLESKY;
-  options.sparse_linear_algebra_library = SUITE_SPARSE;
+  options.sparse_linear_algebra_library_type = SUITE_SPARSE;
   solver.reset(SolverImpl::CreateLinearSolver(&options, &error));
   EXPECT_EQ(options.linear_solver_type, SPARSE_NORMAL_CHOLESKY);
   EXPECT_TRUE(solver.get() != NULL);
@@ -600,7 +600,7 @@ TEST(SolverImpl, CreateLinearSolverNormalOperation) {
 
 #ifndef CERES_NO_CXSPARSE
   options.linear_solver_type = SPARSE_NORMAL_CHOLESKY;
-  options.sparse_linear_algebra_library = CX_SPARSE;
+  options.sparse_linear_algebra_library_type = CX_SPARSE;
   solver.reset(SolverImpl::CreateLinearSolver(&options, &error));
   EXPECT_EQ(options.linear_solver_type, SPARSE_NORMAL_CHOLESKY);
   EXPECT_TRUE(solver.get() != NULL);
@@ -991,5 +991,52 @@ TEST(SolverImpl, ReallocationInCreateJacobianBlockSparsityTranspose) {
   EXPECT_EQ((expected_dense_jacobian - actual_dense_jacobian).norm(), 0.0);
 }
 
+TEST(CompactifyArray, ContiguousEntries) {
+  vector<int> array;
+  array.push_back(0);
+  array.push_back(1);
+  vector<int> expected = array;
+  SolverImpl::CompactifyArray(&array);
+  EXPECT_EQ(array, expected);
+  array.clear();
+
+  array.push_back(1);
+  array.push_back(0);
+  expected = array;
+  SolverImpl::CompactifyArray(&array);
+  EXPECT_EQ(array, expected);
+}
+
+TEST(CompactifyArray, NonContiguousEntries) {
+  vector<int> array;
+  array.push_back(0);
+  array.push_back(2);
+  vector<int> expected;
+  expected.push_back(0);
+  expected.push_back(1);
+  SolverImpl::CompactifyArray(&array);
+  EXPECT_EQ(array, expected);
+}
+
+TEST(CompactifyArray, NonContiguousRepeatingEntries) {
+  vector<int> array;
+  array.push_back(3);
+  array.push_back(1);
+  array.push_back(0);
+  array.push_back(0);
+  array.push_back(0);
+  array.push_back(5);
+  vector<int> expected;
+  expected.push_back(2);
+  expected.push_back(1);
+  expected.push_back(0);
+  expected.push_back(0);
+  expected.push_back(0);
+  expected.push_back(3);
+
+  SolverImpl::CompactifyArray(&array);
+  EXPECT_EQ(array, expected);
+}
+
 }  // namespace internal
 }  // namespace ceres
diff --git a/internal/ceres/sparse_normal_cholesky_solver.cc b/internal/ceres/sparse_normal_cholesky_solver.cc
index 9601142..f1a5237 100644
--- a/internal/ceres/sparse_normal_cholesky_solver.cc
+++ b/internal/ceres/sparse_normal_cholesky_solver.cc
@@ -36,11 +36,8 @@
 #include <cstring>
 #include <ctime>
 
-#ifndef CERES_NO_CXSPARSE
-#include "cs.h"
-#endif
-
 #include "ceres/compressed_row_sparse_matrix.h"
+#include "ceres/cxsparse.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/linear_solver.h"
@@ -54,14 +51,9 @@ namespace internal {
 
 SparseNormalCholeskySolver::SparseNormalCholeskySolver(
     const LinearSolver::Options& options)
-    : options_(options) {
-#ifndef CERES_NO_SUITESPARSE
-  factor_ = NULL;
-#endif
-
-#ifndef CERES_NO_CXSPARSE
-  cxsparse_factor_ = NULL;
-#endif  // CERES_NO_CXSPARSE
+    : factor_(NULL),
+      cxsparse_factor_(NULL),
+      options_(options) {
 }
 
 SparseNormalCholeskySolver::~SparseNormalCholeskySolver() {
@@ -85,18 +77,18 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImpl(
     const double* b,
     const LinearSolver::PerSolveOptions& per_solve_options,
     double * x) {
-  switch (options_.sparse_linear_algebra_library) {
+  switch (options_.sparse_linear_algebra_library_type) {
     case SUITE_SPARSE:
       return SolveImplUsingSuiteSparse(A, b, per_solve_options, x);
     case CX_SPARSE:
       return SolveImplUsingCXSparse(A, b, per_solve_options, x);
     default:
       LOG(FATAL) << "Unknown sparse linear algebra library : "
-                 << options_.sparse_linear_algebra_library;
+                 << options_.sparse_linear_algebra_library_type;
   }
 
   LOG(FATAL) << "Unknown sparse linear algebra library : "
-             << options_.sparse_linear_algebra_library;
+             << options_.sparse_linear_algebra_library_type;
   return LinearSolver::Summary();
 }
 
diff --git a/internal/ceres/sparse_normal_cholesky_solver.h b/internal/ceres/sparse_normal_cholesky_solver.h
index ebb32e6..61111b4 100644
--- a/internal/ceres/sparse_normal_cholesky_solver.h
+++ b/internal/ceres/sparse_normal_cholesky_solver.h
@@ -73,17 +73,13 @@ class SparseNormalCholeskySolver : public CompressedRowSparseMatrixSolver {
       const LinearSolver::PerSolveOptions& options,
       double* x);
 
-#ifndef CERES_NO_SUITESPARSE
   SuiteSparse ss_;
   // Cached factorization
   cholmod_factor* factor_;
-#endif  // CERES_NO_SUITESPARSE
 
-#ifndef CERES_NO_CXSPARSE
   CXSparse cxsparse_;
   // Cached factorization
   cs_dis* cxsparse_factor_;
-#endif  // CERES_NO_CXSPARSE
 
   const LinearSolver::Options options_;
   CERES_DISALLOW_COPY_AND_ASSIGN(SparseNormalCholeskySolver);
diff --git a/internal/ceres/suitesparse.h b/internal/ceres/suitesparse.h
index 8a5b0a8..16f298e 100644
--- a/internal/ceres/suitesparse.h
+++ b/internal/ceres/suitesparse.h
@@ -43,6 +43,7 @@
 #include "ceres/internal/port.h"
 #include "cholmod.h"
 #include "glog/logging.h"
+#include "SuiteSparseQR.hpp"
 
 // Before SuiteSparse version 4.2.0, cholmod_camd was only enabled
 // if SuiteSparse was compiled with Metis support. This makes
@@ -58,6 +59,14 @@
 #define CERES_NO_CAMD
 #endif
 
+// UF_long is deprecated but SuiteSparse_long is only available in
+// newer versions of SuiteSparse. So for older versions of
+// SuiteSparse, we define SuiteSparse_long to be the same as UF_long,
+// which is what recent versions of SuiteSparse do anyways.
+#ifndef SuiteSparse_long
+#define SuiteSparse_long UF_long
+#endif
+
 namespace ceres {
 namespace internal {
 
@@ -261,6 +270,11 @@ class SuiteSparse {
 }  // namespace internal
 }  // namespace ceres
 
+#else  // CERES_NO_SUITESPARSE
+
+class SuiteSparse {};
+typedef void cholmod_factor;
+
 #endif  // CERES_NO_SUITESPARSE
 
 #endif  // CERES_INTERNAL_SUITESPARSE_H_
diff --git a/internal/ceres/system_test.cc b/internal/ceres/system_test.cc
index 095b51e..7b0e02d 100644
--- a/internal/ceres/system_test.cc
+++ b/internal/ceres/system_test.cc
@@ -64,21 +64,21 @@ const bool kUserOrdering = false;
 // Struct used for configuring the solver.
 struct SolverConfig {
   SolverConfig(LinearSolverType linear_solver_type,
-               SparseLinearAlgebraLibraryType sparse_linear_algebra_library,
+               SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
                bool use_automatic_ordering)
       : linear_solver_type(linear_solver_type),
-        sparse_linear_algebra_library(sparse_linear_algebra_library),
+        sparse_linear_algebra_library_type(sparse_linear_algebra_library_type),
         use_automatic_ordering(use_automatic_ordering),
         preconditioner_type(IDENTITY),
         num_threads(1) {
   }
 
   SolverConfig(LinearSolverType linear_solver_type,
-               SparseLinearAlgebraLibraryType sparse_linear_algebra_library,
+               SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type,
                bool use_automatic_ordering,
                PreconditionerType preconditioner_type)
       : linear_solver_type(linear_solver_type),
-        sparse_linear_algebra_library(sparse_linear_algebra_library),
+        sparse_linear_algebra_library_type(sparse_linear_algebra_library_type),
         use_automatic_ordering(use_automatic_ordering),
         preconditioner_type(preconditioner_type),
         num_threads(1) {
@@ -88,14 +88,14 @@ struct SolverConfig {
     return StringPrintf(
         "(%s, %s, %s, %s, %d)",
         LinearSolverTypeToString(linear_solver_type),
-        SparseLinearAlgebraLibraryTypeToString(sparse_linear_algebra_library),
+        SparseLinearAlgebraLibraryTypeToString(sparse_linear_algebra_library_type),
         use_automatic_ordering ? "AUTOMATIC" : "USER",
         PreconditionerTypeToString(preconditioner_type),
         num_threads);
   }
 
   LinearSolverType linear_solver_type;
-  SparseLinearAlgebraLibraryType sparse_linear_algebra_library;
+  SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type;
   bool use_automatic_ordering;
   PreconditionerType preconditioner_type;
   int num_threads;
@@ -130,8 +130,8 @@ void RunSolversAndCheckTheyMatch(const vector<SolverConfig>& configurations,
 
     Solver::Options& options = *(system_test_problem->mutable_solver_options());
     options.linear_solver_type = config.linear_solver_type;
-    options.sparse_linear_algebra_library =
-        config.sparse_linear_algebra_library;
+    options.sparse_linear_algebra_library_type =
+        config.sparse_linear_algebra_library_type;
     options.preconditioner_type = config.preconditioner_type;
     options.num_threads = config.num_threads;
     options.num_linear_solver_threads = config.num_threads;
@@ -281,9 +281,9 @@ class PowellsFunction {
 
 TEST(SystemTest, PowellsFunction) {
   vector<SolverConfig> configs;
-#define CONFIGURE(linear_solver, sparse_linear_algebra_library, ordering) \
-  configs.push_back(SolverConfig(linear_solver,                           \
-                                 sparse_linear_algebra_library,           \
+#define CONFIGURE(linear_solver, sparse_linear_algebra_library_type, ordering) \
+  configs.push_back(SolverConfig(linear_solver,                         \
+                                 sparse_linear_algebra_library_type,    \
                                  ordering))
 
   CONFIGURE(DENSE_QR,               SUITE_SPARSE, kAutomaticOrdering);
@@ -485,9 +485,9 @@ class BundleAdjustmentProblem {
 TEST(SystemTest, BundleAdjustmentProblem) {
   vector<SolverConfig> configs;
 
-#define CONFIGURE(linear_solver, sparse_linear_algebra_library, ordering, preconditioner) \
+#define CONFIGURE(linear_solver, sparse_linear_algebra_library_type, ordering, preconditioner) \
   configs.push_back(SolverConfig(linear_solver,                         \
-                                 sparse_linear_algebra_library,         \
+                                 sparse_linear_algebra_library_type,    \
                                  ordering,                              \
                                  preconditioner))
 
diff --git a/internal/ceres/trust_region_minimizer.cc b/internal/ceres/trust_region_minimizer.cc
index d6ae0ab..03d6c8e 100644
--- a/internal/ceres/trust_region_minimizer.cc
+++ b/internal/ceres/trust_region_minimizer.cc
@@ -177,6 +177,7 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options,
   int num_consecutive_invalid_steps = 0;
   bool inner_iterations_are_enabled = options.inner_iteration_minimizer != NULL;
   while (true) {
+    bool inner_iterations_were_useful = false;
     if (!RunCallbacks(options.callbacks, iteration_summary, summary)) {
       return;
     }
@@ -240,8 +241,8 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options,
       //  = -f'J * step - step' * J' * J * step / 2
       model_residuals.setZero();
       jacobian->RightMultiply(trust_region_step.data(), model_residuals.data());
-      model_cost_change = -(residuals.dot(model_residuals) +
-                            model_residuals.squaredNorm() / 2.0);
+      model_cost_change =
+          - model_residuals.dot(residuals + model_residuals / 2.0);
 
       if (model_cost_change < 0.0) {
         VLOG(1) << "Invalid step: current_cost: " << cost
@@ -330,11 +331,13 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options,
                     << " x_plus_delta_cost: " << x_plus_delta_cost
                     << " new_cost: " << new_cost;
             const double inner_iteration_relative_progress =
-                (x_plus_delta_cost - new_cost) / x_plus_delta_cost;
+                1.0 - new_cost / x_plus_delta_cost;
             inner_iterations_are_enabled =
                 (inner_iteration_relative_progress >
                  options.inner_iteration_tolerance);
 
+            inner_iterations_were_useful = new_cost < cost;
+
             // Disable inner iterations once the relative improvement
             // drops below tolerance.
             if (!inner_iterations_are_enabled) {
@@ -398,13 +401,51 @@ void TrustRegionMinimizer::Minimize(const Minimizer::Options& options,
           ? max(relative_decrease, historical_relative_decrease)
           : relative_decrease;
 
+      // Normally, the quality of a trust region step is measured by
+      // the ratio
+      //
+      //              cost_change
+      //    r =    -----------------
+      //           model_cost_change
+      //
+      // All the change in the nonlinear objective is due to the trust
+      // region step so this ratio is a good measure of the quality of
+      // the trust region radius. However, when inner iterations are
+      // being used, cost_change includes the contribution of the
+      // inner iterations and its not fair to credit it all to the
+      // trust region algorithm. So we change the ratio to be
+      //
+      //                              cost_change
+      //    r =    ------------------------------------------------
+      //           (model_cost_change + inner_iteration_cost_change)
+      //
+      // In most cases this is fine, but it can be the case that the
+      // change in solution quality due to inner iterations is so large
+      // and the trust region step is so bad, that this ratio can become
+      // quite small.
+      //
+      // This can cause the trust region loop to reject this step. To
+      // get around this, we expicitly check if the inner iterations
+      // led to a net decrease in the objective function value. If
+      // they did, we accept the step even if the trust region ratio
+      // is small.
+      //
+      // Notice that we do not just check that cost_change is positive
+      // which is a weaker condition and would render the
+      // min_relative_decrease threshold useless. Instead, we keep
+      // track of inner_iterations_were_useful, which is true only
+      // when inner iterations lead to a net decrease in the cost.
       iteration_summary.step_is_successful =
-          iteration_summary.relative_decrease > options_.min_relative_decrease;
+          (inner_iterations_were_useful ||
+           iteration_summary.relative_decrease >
+           options_.min_relative_decrease);
 
       if (iteration_summary.step_is_successful) {
         accumulated_candidate_model_cost_change += model_cost_change;
         accumulated_reference_model_cost_change += model_cost_change;
-        if (relative_decrease <= options_.min_relative_decrease) {
+
+        if (!inner_iterations_were_useful &&
+            relative_decrease <= options_.min_relative_decrease) {
           iteration_summary.step_is_nonmonotonic = true;
           VLOG(2) << "Non-monotonic step! "
                   << " relative_decrease: " << relative_decrease
diff --git a/internal/ceres/types.cc b/internal/ceres/types.cc
index 164185e..a97f1a5 100644
--- a/internal/ceres/types.cc
+++ b/internal/ceres/types.cc
@@ -101,7 +101,6 @@ const char* SparseLinearAlgebraLibraryTypeToString(
   }
 }
 
-
 bool StringToSparseLinearAlgebraLibraryType(
     string value,
     SparseLinearAlgebraLibraryType* type) {
@@ -111,6 +110,25 @@ bool StringToSparseLinearAlgebraLibraryType(
   return false;
 }
 
+const char* DenseLinearAlgebraLibraryTypeToString(
+    DenseLinearAlgebraLibraryType type) {
+  switch (type) {
+    CASESTR(EIGEN);
+    CASESTR(LAPACK);
+    default:
+      return "UNKNOWN";
+  }
+}
+
+bool StringToDenseLinearAlgebraLibraryType(
+    string value,
+    DenseLinearAlgebraLibraryType* type) {
+  UpperCase(&value);
+  STRENUM(EIGEN);
+  STRENUM(LAPACK);
+  return false;
+}
+
 const char* TrustRegionStrategyTypeToString(TrustRegionStrategyType type) {
   switch (type) {
     CASESTR(LEVENBERG_MARQUARDT);
@@ -318,4 +336,21 @@ bool IsSparseLinearAlgebraLibraryTypeAvailable(
   return false;
 }
 
+bool IsDenseLinearAlgebraLibraryTypeAvailable(
+    DenseLinearAlgebraLibraryType type) {
+  if (type == EIGEN) {
+    return true;
+  }
+  if (type == LAPACK) {
+#ifdef CERES_NO_LAPACK
+    return false;
+#else
+    return true;
+#endif
+  }
+
+  LOG(WARNING) << "Unknown dense linear algebra library " << type;
+  return false;
+}
+
 }  // namespace ceres
diff --git a/internal/ceres/unsymmetric_linear_solver_test.cc b/internal/ceres/unsymmetric_linear_solver_test.cc
index 232f34c..af9dffe 100644
--- a/internal/ceres/unsymmetric_linear_solver_test.cc
+++ b/internal/ceres/unsymmetric_linear_solver_test.cc
@@ -118,23 +118,41 @@ class UnsymmetricLinearSolverTest : public ::testing::Test {
   scoped_array<double> sol_regularized_;
 };
 
-TEST_F(UnsymmetricLinearSolverTest, DenseQR) {
+TEST_F(UnsymmetricLinearSolverTest, EigenDenseQR) {
   LinearSolver::Options options;
   options.type = DENSE_QR;
+  options.dense_linear_algebra_library_type = EIGEN;
   TestSolver(options);
 }
 
-TEST_F(UnsymmetricLinearSolverTest, DenseNormalCholesky) {
+TEST_F(UnsymmetricLinearSolverTest, EigenDenseNormalCholesky) {
   LinearSolver::Options options;
+  options.dense_linear_algebra_library_type = EIGEN;
   options.type = DENSE_NORMAL_CHOLESKY;
   TestSolver(options);
 }
 
+#ifndef CERES_NO_LAPACK
+TEST_F(UnsymmetricLinearSolverTest, LAPACKDenseQR) {
+  LinearSolver::Options options;
+  options.type = DENSE_QR;
+  options.dense_linear_algebra_library_type = LAPACK;
+  TestSolver(options);
+}
+
+TEST_F(UnsymmetricLinearSolverTest, LAPACKDenseNormalCholesky) {
+  LinearSolver::Options options;
+  options.dense_linear_algebra_library_type = LAPACK;
+  options.type = DENSE_NORMAL_CHOLESKY;
+  TestSolver(options);
+}
+#endif
+
 #ifndef CERES_NO_SUITESPARSE
 TEST_F(UnsymmetricLinearSolverTest,
        SparseNormalCholeskyUsingSuiteSparsePreOrdering) {
   LinearSolver::Options options;
-  options.sparse_linear_algebra_library = SUITE_SPARSE;
+  options.sparse_linear_algebra_library_type = SUITE_SPARSE;
   options.type = SPARSE_NORMAL_CHOLESKY;
   options.use_postordering = false;
   TestSolver(options);
@@ -143,7 +161,7 @@ TEST_F(UnsymmetricLinearSolverTest,
 TEST_F(UnsymmetricLinearSolverTest,
        SparseNormalCholeskyUsingSuiteSparsePostOrdering) {
   LinearSolver::Options options;
-  options.sparse_linear_algebra_library = SUITE_SPARSE;
+  options.sparse_linear_algebra_library_type = SUITE_SPARSE;
   options.type = SPARSE_NORMAL_CHOLESKY;
   options.use_postordering = true;
   TestSolver(options);
@@ -154,7 +172,7 @@ TEST_F(UnsymmetricLinearSolverTest,
 TEST_F(UnsymmetricLinearSolverTest,
        SparseNormalCholeskyUsingCXSparsePreOrdering) {
   LinearSolver::Options options;
-  options.sparse_linear_algebra_library = CX_SPARSE;
+  options.sparse_linear_algebra_library_type = CX_SPARSE;
   options.type = SPARSE_NORMAL_CHOLESKY;
   options.use_postordering = false;
   TestSolver(options);
@@ -163,7 +181,7 @@ TEST_F(UnsymmetricLinearSolverTest,
 TEST_F(UnsymmetricLinearSolverTest,
        SparseNormalCholeskyUsingCXSparsePostOrdering) {
   LinearSolver::Options options;
-  options.sparse_linear_algebra_library = CX_SPARSE;
+  options.sparse_linear_algebra_library_type = CX_SPARSE;
   options.type = SPARSE_NORMAL_CHOLESKY;
   options.use_postordering = true;
   TestSolver(options);
diff --git a/internal/ceres/visibility.cc b/internal/ceres/visibility.cc
index fcd793c..acfa45b 100644
--- a/internal/ceres/visibility.cc
+++ b/internal/ceres/visibility.cc
@@ -153,4 +153,4 @@ Graph<int>* CreateSchurComplementGraph(const vector<set<int> >& visibility) {
 }  // namespace internal
 }  // namespace ceres
 
-#endif
+#endif  // CERES_NO_SUITESPARSE
diff --git a/internal/ceres/visibility_based_preconditioner_test.cc b/internal/ceres/visibility_based_preconditioner_test.cc
index 53d10e1..2edbb18 100644
--- a/internal/ceres/visibility_based_preconditioner_test.cc
+++ b/internal/ceres/visibility_based_preconditioner_test.cc
@@ -279,7 +279,7 @@ namespace internal {
 //     preconditioner_->RightMultiply(x.data(), y.data());
 //     z = full_schur_complement
 //         .selfadjointView<Eigen::Upper>()
-//         .ldlt().solve(x);
+//         .llt().solve(x);
 //     double max_relative_difference =
 //         ((y - z).array() / z.array()).matrix().lpNorm<Eigen::Infinity>();
 //     EXPECT_NEAR(max_relative_difference, 0.0, kTolerance);
-- 
cgit v1.2.3