Update ceres to the latest version in google3.

Change-Id: I0165fffa55f60714f23e0096eac89fa68df75a05
author: Sascha Haeberling <haeberling@google.com> 2013-07-23 19:00:21 -0700
committer: Sascha Haeberling <haeberling@google.com> 2013-07-24 12:00:09 -0700
commit: 1d2624a10e2c559f8ba9ef89eaa30832c0a83a96 (patch)
tree: f43667ef858dd0f377b15a58a9d5c9a126762c55 /include
parent: 0ae28bd5885b5daa526898fcf7c323dc2c3e1963 (diff)
download: ceres-solver-1d2624a10e2c559f8ba9ef89eaa30832c0a83a96.tar.gz
28 files changed, 3608 insertions, 592 deletions
diff --git a/include/ceres/autodiff_cost_function.h b/include/ceres/autodiff_cost_function.h
index bb08d64..371a11f 100644
--- a/include/ceres/autodiff_cost_function.h
+++ b/include/ceres/autodiff_cost_function.h
@@ -28,10 +28,10 @@
 //
 // Author: sameeragarwal@google.com (Sameer Agarwal)
 //
-// Helpers for making CostFunctions as needed by the least squares framework,
-// with Jacobians computed via automatic differentiation. For more information
-// on automatic differentation, see the wikipedia article at
-// http://en.wikipedia.org/wiki/Automatic_differentiation
+// Create CostFunctions as needed by the least squares framework, with
+// Jacobians computed via automatic differentiation. For more
+// information on automatic differentation, see the wikipedia article
+// at http://en.wikipedia.org/wiki/Automatic_differentiation
 //
 // To get an auto differentiated cost function, you must define a class with a
 // templated operator() (a functor) that computes the cost function in terms of
@@ -40,8 +40,11 @@
 // this is hidden, and you should write the function as if T were a scalar type
 // (e.g. a double-precision floating point number).
 //
-// The function must write the computed value in the last argument (the only
-// non-const one) and return true to indicate success.
+// The function must write the computed value in the last argument
+// (the only non-const one) and return true to indicate
+// success. Please see cost_function.h for details on how the return
+// value maybe used to impose simple constraints on the parameter
+// block.
 //
 // For example, consider a scalar error e = k - x'y, where both x and y are
 // two-dimensional column vector parameters, the prime sign indicates
@@ -57,8 +60,8 @@
 // To write an auto-differentiable cost function for the above model, first
 // define the object
 //
-//   class MyScalarCostFunction {
-//     MyScalarCostFunction(double k): k_(k) {}
+//   class MyScalarCostFunctor {
+//     MyScalarCostFunctor(double k): k_(k) {}
 //
 //     template <typename T>
 //     bool operator()(const T* const x , const T* const y, T* e) const {
@@ -80,32 +83,32 @@
 // it can be constructed as follows.
 //
 //   CostFunction* cost_function
-//       = new AutoDiffCostFunction<MyScalarCostFunction, 1, 2, 2>(
-//           new MyScalarCostFunction(1.0));              ^  ^  ^
-//                                                        |  |  |
-//                            Dimension of residual ------+  |  |
-//                            Dimension of x ----------------+  |
-//                            Dimension of y -------------------+
+//       = new AutoDiffCostFunction<MyScalarCostFunctor, 1, 2, 2>(
+//            new MyScalarCostFunctor(1.0));             ^  ^  ^
+//                                                       |  |  |
+//                            Dimension of residual -----+  |  |
+//                            Dimension of x ---------------+  |
+//                            Dimension of y ------------------+
 //
 // In this example, there is usually an instance for each measumerent of k.
 //
 // In the instantiation above, the template parameters following
-// "MyScalarCostFunction", "1, 2, 2", describe the functor as computing a
+// "MyScalarCostFunctor", "1, 2, 2", describe the functor as computing a
 // 1-dimensional output from two arguments, both 2-dimensional.
 //
 // The autodiff cost function also supports cost functions with a
 // runtime-determined number of residuals. For example:
 //
 //   CostFunction* cost_function
-//       = new AutoDiffCostFunction<MyScalarCostFunction, DYNAMIC, 2, 2>(
-//           new CostFunctionWithDynamicNumResiduals(1.0),   ^     ^  ^
-//           runtime_number_of_residuals); <----+            |     |  |
-//                                              |            |     |  |
-//                                              |            |     |  |
-//             Actual number of residuals ------+            |     |  |
-//             Indicate dynamic number of residuals ---------+     |  |
-//             Dimension of x -------------------------------------+  |
-//             Dimension of y ----------------------------------------+
+//       = new AutoDiffCostFunction<MyScalarCostFunctor, DYNAMIC, 2, 2>(
+//           new CostFunctorWithDynamicNumResiduals(1.0),   ^     ^  ^
+//           runtime_number_of_residuals); <----+           |     |  |
+//                                              |           |     |  |
+//                                              |           |     |  |
+//             Actual number of residuals ------+           |     |  |
+//             Indicate dynamic number of residuals --------+     |  |
+//             Dimension of x ------------------------------------+  |
+//             Dimension of y ---------------------------------------+
 //
 // The framework can currently accommodate cost functions of up to 6 independent
 // variables, and there is no limit on the dimensionality of each of them.
@@ -119,17 +122,17 @@
 // functions is to get the sizing wrong. In particular, there is a tendency to
 // set the template parameters to (dimension of residual, number of parameters)
 // instead of passing a dimension parameter for *every parameter*. In the
-// example above, that would be <MyScalarCostFunction, 1, 2>, which is missing
+// example above, that would be <MyScalarCostFunctor, 1, 2>, which is missing
 // the last '2' argument. Please be careful when setting the size parameters.
 
 #ifndef CERES_PUBLIC_AUTODIFF_COST_FUNCTION_H_
 #define CERES_PUBLIC_AUTODIFF_COST_FUNCTION_H_
 
-#include <glog/logging.h>
 #include "ceres/internal/autodiff.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/sized_cost_function.h"
 #include "ceres/types.h"
+#include "glog/logging.h"
 
 namespace ceres {
 
@@ -159,8 +162,9 @@ template <typename CostFunctor,
           int N7 = 0,   // Number of parameters in block 7.
           int N8 = 0,   // Number of parameters in block 8.
           int N9 = 0>   // Number of parameters in block 9.
-class AutoDiffCostFunction :
-  public SizedCostFunction<M, N0, N1, N2, N3, N4, N5, N6, N7, N8, N9> {
+class AutoDiffCostFunction : public SizedCostFunction<M,
+                                                      N0, N1, N2, N3, N4,
+                                                      N5, N6, N7, N8, N9> {
  public:
   // Takes ownership of functor. Uses the template-provided value for the
   // number of residuals ("M").
diff --git a/include/ceres/autodiff_local_parameterization.h b/include/ceres/autodiff_local_parameterization.h
new file mode 100644
index 0000000..0aae6c7
--- /dev/null
+++ b/include/ceres/autodiff_local_parameterization.h
@@ -0,0 +1,144 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sergey.vfx@gmail.com (Sergey Sharybin)
+//         mierle@gmail.com (Keir Mierle)
+//         sameeragarwal@google.com (Sameer Agarwal)
+
+#ifndef CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_
+#define CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_
+
+#include "ceres/internal/autodiff.h"
+#include "ceres/internal/scoped_ptr.h"
+#include "ceres/local_parameterization.h"
+
+namespace ceres {
+
+// Create local parameterization with Jacobians computed via automatic
+// differentiation. For more information on local parameterizations,
+// see include/ceres/local_parameterization.h
+//
+// To get an auto differentiated local parameterization, you must define
+// a class with a templated operator() (a functor) that computes
+//
+//   x_plus_delta = Plus(x, delta);
+//
+// the template parameter T. The autodiff framework substitutes appropriate
+// "Jet" objects for T in order to compute the derivative when necessary, but
+// this is hidden, and you should write the function as if T were a scalar type
+// (e.g. a double-precision floating point number).
+//
+// The function must write the computed value in the last argument (the only
+// non-const one) and return true to indicate success.
+//
+// For example, Quaternions have a three dimensional local
+// parameterization. It's plus operation can be implemented as (taken
+// from internal/ceres/auto_diff_local_parameterization_test.cc)
+//
+//   struct QuaternionPlus {
+//     template<typename T>
+//     bool operator()(const T* x, const T* delta, T* x_plus_delta) const {
+//       const T squared_norm_delta =
+//           delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2];
+//
+//       T q_delta[4];
+//       if (squared_norm_delta > T(0.0)) {
+//         T norm_delta = sqrt(squared_norm_delta);
+//         const T sin_delta_by_delta = sin(norm_delta) / norm_delta;
+//         q_delta[0] = cos(norm_delta);
+//         q_delta[1] = sin_delta_by_delta * delta[0];
+//         q_delta[2] = sin_delta_by_delta * delta[1];
+//         q_delta[3] = sin_delta_by_delta * delta[2];
+//       } else {
+//         // We do not just use q_delta = [1,0,0,0] here because that is a
+//         // constant and when used for automatic differentiation will
+//         // lead to a zero derivative. Instead we take a first order
+//         // approximation and evaluate it at zero.
+//         q_delta[0] = T(1.0);
+//         q_delta[1] = delta[0];
+//         q_delta[2] = delta[1];
+//         q_delta[3] = delta[2];
+//       }
+//
+//       QuaternionProduct(q_delta, x, x_plus_delta);
+//       return true;
+//     }
+//   };
+//
+// Then given this struct, the auto differentiated local
+// parameterization can now be constructed as
+//
+//   LocalParameterization* local_parameterization =
+//     new AutoDiffLocalParameterization<QuaternionPlus, 4, 3>;
+//                                                       |  |
+//                            Global Size ---------------+  |
+//                            Local Size -------------------+
+//
+// WARNING: Since the functor will get instantiated with different types for
+// T, you must to convert from other numeric types to T before mixing
+// computations with other variables of type T. In the example above, this is
+// seen where instead of using k_ directly, k_ is wrapped with T(k_).
+
+template <typename Functor, int kGlobalSize, int kLocalSize>
+class AutoDiffLocalParameterization : public LocalParameterization {
+ public:
+  virtual ~AutoDiffLocalParameterization() {}
+  virtual bool Plus(const double* x,
+                    const double* delta,
+                    double* x_plus_delta) const {
+    return Functor()(x, delta, x_plus_delta);
+  }
+
+  virtual bool ComputeJacobian(const double* x, double* jacobian) const {
+    double zero_delta[kLocalSize];
+    for (int i = 0; i < kLocalSize; ++i) {
+      zero_delta[i] = 0.0;
+    }
+
+    double x_plus_delta[kGlobalSize];
+    for (int i = 0; i < kGlobalSize; ++i) {
+      x_plus_delta[i] = 0.0;
+    }
+
+    const double* parameter_ptrs[2] = {x, zero_delta};
+    double* jacobian_ptrs[2] = { NULL, jacobian };
+    return internal::AutoDiff<Functor, double, kGlobalSize, kLocalSize>
+        ::Differentiate(Functor(),
+                        parameter_ptrs,
+                        kGlobalSize,
+                        x_plus_delta,
+                        jacobian_ptrs);
+  }
+
+  virtual int GlobalSize() const { return kGlobalSize; }
+  virtual int LocalSize() const { return kLocalSize; }
+};
+
+}  // namespace ceres
+
+#endif  // CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_
diff --git a/include/ceres/c_api.h b/include/ceres/c_api.h
new file mode 100644
index 0000000..add68de
--- /dev/null
+++ b/include/ceres/c_api.h
@@ -0,0 +1,141 @@
+/* Ceres Solver - A fast non-linear least squares minimizer
+ * Copyright 2013 Google Inc. All rights reserved.
+ * http://code.google.com/p/ceres-solver/
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of Google Inc. nor the names of its contributors may be
+ *   used to endorse or promote products derived from this software without
+ *   specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Author: mierle@gmail.com (Keir Mierle)
+ *
+ * A minimal C API for Ceres. Not all functionality is included. This API is
+ * not intended for clients of Ceres, but is instead intended for easing the
+ * process of binding Ceres to other languages.
+ *
+ * Currently this is a work in progress.
+ */
+
+#ifndef CERES_PUBLIC_C_API_H_
+#define CERES_PUBLIC_C_API_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Init the Ceres private data. Must be called before anything else. */
+void ceres_init();
+
+/* Equivalent to CostFunction::Evaluate() in the C++ API.
+ *
+ * The user may keep private information inside the opaque user_data object.
+ * The pointer here is the same one passed in the ceres_add_residual_block().
+ */
+typedef int (*ceres_cost_function_t)(void* user_data,
+                                     double** parameters,
+                                     double* residuals,
+                                     double** jacobians);
+
+/* Equivalent to LossFunction::Evaluate() from the C++ API. */
+typedef void (*ceres_loss_function_t)(void* user_data,
+                                      double squared_norm,
+                                      double out[3]);
+
+/* Create callback data for Ceres' stock loss functions.
+ *
+ * Ceres has several loss functions available by default, and these functions
+ * expose those to the C API. To use the stock loss functions, call
+ * ceres_create_*_loss_data(), which internally creates an instance of one of
+ * the stock loss functions (for example ceres::CauchyLoss), and pass the
+ * returned "loss_function_data" along with the ceres_stock_loss_function to
+ * ceres_add_residual_block().
+ *
+ * For example:
+ *
+ *   void* cauchy_loss_function_data =
+ *       ceres_create_cauchy_loss_function_data(1.2, 0.0);
+ *   ceres_problem_add_residual_block(
+ *       problem,
+ *       my_cost_function,
+ *       my_cost_function_data,
+ *       ceres_stock_loss_function,
+ *       cauchy_loss_function_data,
+ *       1,
+ *       2,
+ *       parameter_sizes,
+ *       parameter_pointers);
+ *    ...
+ *    ceres_free_stock_loss_function_data(cauchy_loss_function_data);
+ *
+ * See loss_function.h for the details of each loss function.
+ */
+void* ceres_create_huber_loss_function_data(double a);
+void* ceres_create_softl1_loss_function_data(double a);
+void* ceres_create_cauchy_loss_function_data(double a);
+void* ceres_create_arctan_loss_function_data(double a);
+void* ceres_create_tolerant_loss_function_data(double a, double b);
+
+/* Free the given stock loss function data. */
+void ceres_free_stock_loss_function_data(void* loss_function_data);
+
+/* This is an implementation of ceres_loss_function_t contained within Ceres
+ * itself, intended as a way to access the various stock Ceres loss functions
+ * from the C API. This should be passed to ceres_add_residual() below, in
+ * combination with a user_data pointer generated by
+ * ceres_create_stock_loss_function() above. */
+void ceres_stock_loss_function(void* user_data,
+                               double squared_norm,
+                               double out[3]);
+
+/* Equivalent to Problem from the C++ API. */
+struct ceres_problem_s;
+typedef struct ceres_problem_s ceres_problem_t;
+
+struct ceres_residual_block_id_s;
+typedef struct ceres_residual_block_id_s ceres_residual_block_id_t;
+
+/* Create and destroy a problem */
+/* TODO(keir): Add options for the problem. */
+ceres_problem_t* ceres_create_problem();
+void ceres_free_problem(ceres_problem_t* problem);
+
+/* Add a residual block. */
+ceres_residual_block_id_t* ceres_problem_add_residual_block(
+    ceres_problem_t* problem,
+    ceres_cost_function_t cost_function,
+    void* cost_function_data,
+    ceres_loss_function_t loss_function,
+    void* loss_function_data,
+    int num_residuals,
+    int num_parameter_blocks,
+    int* parameter_block_sizes,
+    double** parameters);
+
+void ceres_solve(ceres_problem_t* problem);
+
+/* TODO(keir): Figure out a way to pass a config in. */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* CERES_PUBLIC_C_API_H_ */
diff --git a/include/ceres/ceres.h b/include/ceres/ceres.h
index 9b8f212..61b8b94 100644
--- a/include/ceres/ceres.h
+++ b/include/ceres/ceres.h
@@ -34,15 +34,21 @@
 #ifndef CERES_PUBLIC_CERES_H_
 #define CERES_PUBLIC_CERES_H_
 
-#define CERES_VERSION 1.4.0
-#define CERES_ABI_VERSION 1.4.0
+#define CERES_VERSION 1.7.0
+#define CERES_ABI_VERSION 1.7.0
 
 #include "ceres/autodiff_cost_function.h"
+#include "ceres/autodiff_local_parameterization.h"
 #include "ceres/cost_function.h"
+#include "ceres/cost_function_to_functor.h"
+#include "ceres/covariance.h"
+#include "ceres/crs_matrix.h"
 #include "ceres/iteration_callback.h"
+#include "ceres/jet.h"
 #include "ceres/local_parameterization.h"
 #include "ceres/loss_function.h"
 #include "ceres/numeric_diff_cost_function.h"
+#include "ceres/numeric_diff_functor.h"
 #include "ceres/ordered_groups.h"
 #include "ceres/problem.h"
 #include "ceres/sized_cost_function.h"
diff --git a/include/ceres/cost_function.h b/include/ceres/cost_function.h
index 9b010f7..8013e96 100644
--- a/include/ceres/cost_function.h
+++ b/include/ceres/cost_function.h
@@ -93,6 +93,24 @@ class CostFunction {
   // the case when computing cost only. If jacobians[i] is NULL, then
   // the jacobian block corresponding to the i'th parameter block must
   // not to be returned.
+  //
+  // The return value indicates whether the computation of the
+  // residuals and/or jacobians was successful or not.
+  //
+  // This can be used to communicate numerical failures in jacobian
+  // computations for instance.
+  //
+  // A more interesting and common use is to impose constraints on the
+  // parameters. If the initial values of the parameter blocks satisfy
+  // the constraints, then returning false whenever the constraints
+  // are not satisfied will prevent the solver from moving into the
+  // infeasible region. This is not a very sophisticated mechanism for
+  // enforcing constraints, but is often good enough for things like
+  // non-negativity constraints.
+  //
+  // Note that it is important that the initial values of the
+  // parameter block must be feasible, otherwise the solver will
+  // declare a numerical problem at iteration 0.
   virtual bool Evaluate(double const* const* parameters,
                         double* residuals,
                         double** jacobians) const = 0;
diff --git a/include/ceres/cost_function_to_functor.h b/include/ceres/cost_function_to_functor.h
new file mode 100644
index 0000000..fa1012d
--- /dev/null
+++ b/include/ceres/cost_function_to_functor.h
@@ -0,0 +1,752 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+//
+// CostFunctionToFunctor is an adapter class that allows users to use
+// CostFunction objects in templated functors which are to be used for
+// automatic differentiation.  This allows the user to seamlessly mix
+// analytic, numeric and automatic differentiation.
+//
+// For example, let us assume that
+//
+//  class IntrinsicProjection : public SizedCostFunction<2, 5, 3> {
+//    public:
+//      IntrinsicProjection(const double* observations);
+//      virtual bool Evaluate(double const* const* parameters,
+//                            double* residuals,
+//                            double** jacobians) const;
+//  };
+//
+// is a cost function that implements the projection of a point in its
+// local coordinate system onto its image plane and subtracts it from
+// the observed point projection. It can compute its residual and
+// either via analytic or numerical differentiation can compute its
+// jacobians.
+//
+// Now we would like to compose the action of this CostFunction with
+// the action of camera extrinsics, i.e., rotation and
+// translation. Say we have a templated function
+//
+//   template<typename T>
+//   void RotateAndTranslatePoint(const T* rotation,
+//                                const T* translation,
+//                                const T* point,
+//                                T* result);
+//
+// Then we can now do the following,
+//
+// struct CameraProjection {
+//   CameraProjection(double* observation) {
+//     intrinsic_projection_.reset(
+//         new CostFunctionToFunctor<2, 5, 3>(
+//             new IntrinsicProjection(observation_)));
+//   }
+//   template <typename T>
+//   bool operator()(const T* rotation,
+//                   const T* translation,
+//                   const T* intrinsics,
+//                   const T* point,
+//                   T* residual) const {
+//     T transformed_point[3];
+//     RotateAndTranslatePoint(rotation, translation, point, transformed_point);
+//
+//     // Note that we call intrinsic_projection_, just like it was
+//     // any other templated functor.
+//
+//     return (*intrinsic_projection_)(intrinsics, transformed_point, residual);
+//   }
+//
+//  private:
+//   scoped_ptr<CostFunctionToFunctor<2,5,3> > intrinsic_projection_;
+// };
+
+#ifndef CERES_PUBLIC_COST_FUNCTION_TO_FUNCTOR_H_
+#define CERES_PUBLIC_COST_FUNCTION_TO_FUNCTOR_H_
+
+#include <numeric>
+#include <vector>
+
+#include "ceres/cost_function.h"
+#include "ceres/internal/fixed_array.h"
+#include "ceres/internal/port.h"
+#include "ceres/internal/scoped_ptr.h"
+
+namespace ceres {
+
+template <int kNumResiduals,
+          int N0, int N1 = 0, int N2 = 0, int N3 = 0, int N4 = 0,
+          int N5 = 0, int N6 = 0, int N7 = 0, int N8 = 0, int N9 = 0>
+class CostFunctionToFunctor {
+ public:
+  explicit CostFunctionToFunctor(CostFunction* cost_function)
+  : cost_function_(cost_function) {
+    CHECK_NOTNULL(cost_function);
+
+    CHECK_GE(kNumResiduals, 0);
+    CHECK_EQ(cost_function->num_residuals(), kNumResiduals);
+
+    // This block breaks the 80 column rule to keep it somewhat readable.
+    CHECK((!N1 && !N2 && !N3 && !N4 && !N5 && !N6 && !N7 && !N8 && !N9) ||
+          ((N1 > 0) && !N2 && !N3 && !N4 && !N5 && !N6 && !N7 && !N8 && !N9) ||
+          ((N1 > 0) && (N2 > 0) && !N3 && !N4 && !N5 && !N6 && !N7 && !N8 && !N9) ||
+          ((N1 > 0) && (N2 > 0) && (N3 > 0) && !N4 && !N5 && !N6 && !N7 && !N8 && !N9) ||
+          ((N1 > 0) && (N2 > 0) && (N3 > 0) && (N4 > 0) && !N5 && !N6 && !N7 && !N8 && !N9) ||
+          ((N1 > 0) && (N2 > 0) && (N3 > 0) && (N4 > 0) && (N5 > 0) && !N6 && !N7 && !N8 && !N9) ||
+          ((N1 > 0) && (N2 > 0) && (N3 > 0) && (N4 > 0) && (N5 > 0) && (N6 > 0) && !N7 && !N8 && !N9) ||
+          ((N1 > 0) && (N2 > 0) && (N3 > 0) && (N4 > 0) && (N5 > 0) && (N6 > 0) && (N7 > 0) && !N8 && !N9) ||
+          ((N1 > 0) && (N2 > 0) && (N3 > 0) && (N4 > 0) && (N5 > 0) && (N6 > 0) && (N7 > 0) && (N8 > 0) && !N9) ||
+          ((N1 > 0) && (N2 > 0) && (N3 > 0) && (N4 > 0) && (N5 > 0) && (N6 > 0) && (N7 > 0) && (N8 > 0) && (N9 > 0)))
+        << "Zero block cannot precede a non-zero block. Block sizes are "
+        << "(ignore trailing 0s): " << N0 << ", " << N1 << ", " << N2 << ", "
+        << N3 << ", " << N4 << ", " << N5 << ", " << N6 << ", " << N7 << ", "
+        << N8 << ", " << N9;
+
+    const vector<int16>& parameter_block_sizes =
+        cost_function->parameter_block_sizes();
+    const int num_parameter_blocks =
+        (N0 > 0) + (N1 > 0) + (N2 > 0) + (N3 > 0) + (N4 > 0) +
+        (N5 > 0) + (N6 > 0) + (N7 > 0) + (N8 > 0) + (N9 > 0);
+    CHECK_EQ(parameter_block_sizes.size(), num_parameter_blocks);
+
+    CHECK_EQ(N0, parameter_block_sizes[0]);
+    if (parameter_block_sizes.size() > 1) CHECK_EQ(N1, parameter_block_sizes[1]);  // NOLINT
+    if (parameter_block_sizes.size() > 2) CHECK_EQ(N2, parameter_block_sizes[2]);  // NOLINT
+    if (parameter_block_sizes.size() > 3) CHECK_EQ(N3, parameter_block_sizes[3]);  // NOLINT
+    if (parameter_block_sizes.size() > 4) CHECK_EQ(N4, parameter_block_sizes[4]);  // NOLINT
+    if (parameter_block_sizes.size() > 5) CHECK_EQ(N5, parameter_block_sizes[5]);  // NOLINT
+    if (parameter_block_sizes.size() > 6) CHECK_EQ(N6, parameter_block_sizes[6]);  // NOLINT
+    if (parameter_block_sizes.size() > 7) CHECK_EQ(N7, parameter_block_sizes[7]);  // NOLINT
+    if (parameter_block_sizes.size() > 8) CHECK_EQ(N8, parameter_block_sizes[8]);  // NOLINT
+    if (parameter_block_sizes.size() > 9) CHECK_EQ(N9, parameter_block_sizes[9]);  // NOLINT
+
+    CHECK_EQ(accumulate(parameter_block_sizes.begin(),
+                        parameter_block_sizes.end(), 0),
+             N0 + N1 + N2 + N3 + N4 + N5 + N6 + N7 + N8 + N9);
+  }
+
+  bool operator()(const double* x0, double* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_EQ(N1, 0);
+    CHECK_EQ(N2, 0);
+    CHECK_EQ(N3, 0);
+    CHECK_EQ(N4, 0);
+    CHECK_EQ(N5, 0);
+    CHECK_EQ(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+
+    return cost_function_->Evaluate(&x0, residuals, NULL);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  double* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_EQ(N2, 0);
+    CHECK_EQ(N3, 0);
+    CHECK_EQ(N4, 0);
+    CHECK_EQ(N5, 0);
+    CHECK_EQ(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const double*> parameter_blocks(2);
+    parameter_blocks[0] = x0;
+    parameter_blocks[1] = x1;
+    return cost_function_->Evaluate(parameter_blocks.get(), residuals, NULL);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  double* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_EQ(N3, 0);
+    CHECK_EQ(N4, 0);
+    CHECK_EQ(N5, 0);
+    CHECK_EQ(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const double*> parameter_blocks(3);
+    parameter_blocks[0] = x0;
+    parameter_blocks[1] = x1;
+    parameter_blocks[2] = x2;
+    return cost_function_->Evaluate(parameter_blocks.get(), residuals, NULL);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  double* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_EQ(N4, 0);
+    CHECK_EQ(N5, 0);
+    CHECK_EQ(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const double*> parameter_blocks(4);
+    parameter_blocks[0] = x0;
+    parameter_blocks[1] = x1;
+    parameter_blocks[2] = x2;
+    parameter_blocks[3] = x3;
+    return cost_function_->Evaluate(parameter_blocks.get(), residuals, NULL);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  const double* x4,
+                  double* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_NE(N4, 0);
+    CHECK_EQ(N5, 0);
+    CHECK_EQ(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const double*> parameter_blocks(5);
+    parameter_blocks[0] = x0;
+    parameter_blocks[1] = x1;
+    parameter_blocks[2] = x2;
+    parameter_blocks[3] = x3;
+    parameter_blocks[4] = x4;
+    return cost_function_->Evaluate(parameter_blocks.get(), residuals, NULL);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  const double* x4,
+                  const double* x5,
+                  double* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_NE(N4, 0);
+    CHECK_NE(N5, 0);
+    CHECK_EQ(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const double*> parameter_blocks(6);
+    parameter_blocks[0] = x0;
+    parameter_blocks[1] = x1;
+    parameter_blocks[2] = x2;
+    parameter_blocks[3] = x3;
+    parameter_blocks[4] = x4;
+    parameter_blocks[5] = x5;
+    return cost_function_->Evaluate(parameter_blocks.get(), residuals, NULL);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  const double* x4,
+                  const double* x5,
+                  const double* x6,
+                  double* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_NE(N4, 0);
+    CHECK_NE(N5, 0);
+    CHECK_NE(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const double*> parameter_blocks(7);
+    parameter_blocks[0] = x0;
+    parameter_blocks[1] = x1;
+    parameter_blocks[2] = x2;
+    parameter_blocks[3] = x3;
+    parameter_blocks[4] = x4;
+    parameter_blocks[5] = x5;
+    parameter_blocks[6] = x6;
+    return cost_function_->Evaluate(parameter_blocks.get(), residuals, NULL);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  const double* x4,
+                  const double* x5,
+                  const double* x6,
+                  const double* x7,
+                  double* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_NE(N4, 0);
+    CHECK_NE(N5, 0);
+    CHECK_NE(N6, 0);
+    CHECK_NE(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const double*> parameter_blocks(8);
+    parameter_blocks[0] = x0;
+    parameter_blocks[1] = x1;
+    parameter_blocks[2] = x2;
+    parameter_blocks[3] = x3;
+    parameter_blocks[4] = x4;
+    parameter_blocks[5] = x5;
+    parameter_blocks[6] = x6;
+    parameter_blocks[7] = x7;
+    return cost_function_->Evaluate(parameter_blocks.get(), residuals, NULL);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  const double* x4,
+                  const double* x5,
+                  const double* x6,
+                  const double* x7,
+                  const double* x8,
+                  double* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_NE(N4, 0);
+    CHECK_NE(N5, 0);
+    CHECK_NE(N6, 0);
+    CHECK_NE(N7, 0);
+    CHECK_NE(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const double*> parameter_blocks(9);
+    parameter_blocks[0] = x0;
+    parameter_blocks[1] = x1;
+    parameter_blocks[2] = x2;
+    parameter_blocks[3] = x3;
+    parameter_blocks[4] = x4;
+    parameter_blocks[5] = x5;
+    parameter_blocks[6] = x6;
+    parameter_blocks[7] = x7;
+    parameter_blocks[8] = x8;
+    return cost_function_->Evaluate(parameter_blocks.get(), residuals, NULL);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  const double* x4,
+                  const double* x5,
+                  const double* x6,
+                  const double* x7,
+                  const double* x8,
+                  const double* x9,
+                  double* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_NE(N4, 0);
+    CHECK_NE(N5, 0);
+    CHECK_NE(N6, 0);
+    CHECK_NE(N7, 0);
+    CHECK_NE(N8, 0);
+    CHECK_NE(N9, 0);
+    internal::FixedArray<const double*> parameter_blocks(10);
+    parameter_blocks[0] = x0;
+    parameter_blocks[1] = x1;
+    parameter_blocks[2] = x2;
+    parameter_blocks[3] = x3;
+    parameter_blocks[4] = x4;
+    parameter_blocks[5] = x5;
+    parameter_blocks[6] = x6;
+    parameter_blocks[7] = x7;
+    parameter_blocks[8] = x8;
+    parameter_blocks[9] = x9;
+    return cost_function_->Evaluate(parameter_blocks.get(), residuals, NULL);
+  }
+
+  template <typename JetT>
+  bool operator()(const JetT* x0, JetT* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_EQ(N1, 0);
+    CHECK_EQ(N2, 0);
+    CHECK_EQ(N3, 0);
+    CHECK_EQ(N4, 0);
+    CHECK_EQ(N5, 0);
+    CHECK_EQ(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    return EvaluateWithJets(&x0, residuals);
+  }
+
+  template <typename JetT>
+  bool operator()(const JetT* x0,
+                  const JetT* x1,
+                  JetT* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_EQ(N2, 0);
+    CHECK_EQ(N3, 0);
+    CHECK_EQ(N4, 0);
+    CHECK_EQ(N5, 0);
+    CHECK_EQ(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const JetT*> jets(2);
+    jets[0] = x0;
+    jets[1] = x1;
+    return EvaluateWithJets(jets.get(), residuals);
+  }
+
+  template <typename JetT>
+  bool operator()(const JetT* x0,
+                  const JetT* x1,
+                  const JetT* x2,
+                  JetT* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_EQ(N3, 0);
+    CHECK_EQ(N4, 0);
+    CHECK_EQ(N5, 0);
+    CHECK_EQ(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const JetT*> jets(3);
+    jets[0] = x0;
+    jets[1] = x1;
+    jets[2] = x2;
+    return EvaluateWithJets(jets.get(), residuals);
+  }
+
+  template <typename JetT>
+  bool operator()(const JetT* x0,
+                  const JetT* x1,
+                  const JetT* x2,
+                  const JetT* x3,
+                  JetT* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_EQ(N4, 0);
+    CHECK_EQ(N5, 0);
+    CHECK_EQ(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const JetT*> jets(4);
+    jets[0] = x0;
+    jets[1] = x1;
+    jets[2] = x2;
+    jets[3] = x3;
+    return EvaluateWithJets(jets.get(), residuals);
+  }
+
+  template <typename JetT>
+  bool operator()(const JetT* x0,
+                  const JetT* x1,
+                  const JetT* x2,
+                  const JetT* x3,
+                  const JetT* x4,
+                  JetT* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_NE(N4, 0);
+    CHECK_EQ(N5, 0);
+    CHECK_EQ(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const JetT*> jets(5);
+    jets[0] = x0;
+    jets[1] = x1;
+    jets[2] = x2;
+    jets[3] = x3;
+    jets[4] = x4;
+    return EvaluateWithJets(jets.get(), residuals);
+  }
+
+  template <typename JetT>
+  bool operator()(const JetT* x0,
+                  const JetT* x1,
+                  const JetT* x2,
+                  const JetT* x3,
+                  const JetT* x4,
+                  const JetT* x5,
+                  JetT* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_NE(N4, 0);
+    CHECK_NE(N5, 0);
+    CHECK_EQ(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const JetT*> jets(6);
+    jets[0] = x0;
+    jets[1] = x1;
+    jets[2] = x2;
+    jets[3] = x3;
+    jets[4] = x4;
+    jets[5] = x5;
+    return EvaluateWithJets(jets.get(), residuals);
+  }
+
+  template <typename JetT>
+  bool operator()(const JetT* x0,
+                  const JetT* x1,
+                  const JetT* x2,
+                  const JetT* x3,
+                  const JetT* x4,
+                  const JetT* x5,
+                  const JetT* x6,
+                  JetT* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_NE(N4, 0);
+    CHECK_NE(N5, 0);
+    CHECK_NE(N6, 0);
+    CHECK_EQ(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const JetT*> jets(7);
+    jets[0] = x0;
+    jets[1] = x1;
+    jets[2] = x2;
+    jets[3] = x3;
+    jets[4] = x4;
+    jets[5] = x5;
+    jets[6] = x6;
+    return EvaluateWithJets(jets.get(), residuals);
+  }
+
+  template <typename JetT>
+  bool operator()(const JetT* x0,
+                  const JetT* x1,
+                  const JetT* x2,
+                  const JetT* x3,
+                  const JetT* x4,
+                  const JetT* x5,
+                  const JetT* x6,
+                  const JetT* x7,
+                  JetT* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_NE(N4, 0);
+    CHECK_NE(N5, 0);
+    CHECK_NE(N6, 0);
+    CHECK_NE(N7, 0);
+    CHECK_EQ(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const JetT*> jets(8);
+    jets[0] = x0;
+    jets[1] = x1;
+    jets[2] = x2;
+    jets[3] = x3;
+    jets[4] = x4;
+    jets[5] = x5;
+    jets[6] = x6;
+    jets[7] = x7;
+    return EvaluateWithJets(jets.get(), residuals);
+  }
+
+  template <typename JetT>
+  bool operator()(const JetT* x0,
+                  const JetT* x1,
+                  const JetT* x2,
+                  const JetT* x3,
+                  const JetT* x4,
+                  const JetT* x5,
+                  const JetT* x6,
+                  const JetT* x7,
+                  const JetT* x8,
+                  JetT* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_NE(N4, 0);
+    CHECK_NE(N5, 0);
+    CHECK_NE(N6, 0);
+    CHECK_NE(N7, 0);
+    CHECK_NE(N8, 0);
+    CHECK_EQ(N9, 0);
+    internal::FixedArray<const JetT*> jets(9);
+    jets[0] = x0;
+    jets[1] = x1;
+    jets[2] = x2;
+    jets[3] = x3;
+    jets[4] = x4;
+    jets[5] = x5;
+    jets[6] = x6;
+    jets[7] = x7;
+    jets[8] = x8;
+    return EvaluateWithJets(jets.get(), residuals);
+  }
+
+  template <typename JetT>
+  bool operator()(const JetT* x0,
+                  const JetT* x1,
+                  const JetT* x2,
+                  const JetT* x3,
+                  const JetT* x4,
+                  const JetT* x5,
+                  const JetT* x6,
+                  const JetT* x7,
+                  const JetT* x8,
+                  const JetT* x9,
+                  JetT* residuals) const {
+    CHECK_NE(N0, 0);
+    CHECK_NE(N1, 0);
+    CHECK_NE(N2, 0);
+    CHECK_NE(N3, 0);
+    CHECK_NE(N4, 0);
+    CHECK_NE(N5, 0);
+    CHECK_NE(N6, 0);
+    CHECK_NE(N7, 0);
+    CHECK_NE(N8, 0);
+    CHECK_NE(N9, 0);
+    internal::FixedArray<const JetT*> jets(10);
+    jets[0] = x0;
+    jets[1] = x1;
+    jets[2] = x2;
+    jets[3] = x3;
+    jets[4] = x4;
+    jets[5] = x5;
+    jets[6] = x6;
+    jets[7] = x7;
+    jets[8] = x8;
+    jets[9] = x9;
+    return EvaluateWithJets(jets.get(), residuals);
+  }
+
+ private:
+  template <typename JetT>
+  bool EvaluateWithJets(const JetT** inputs, JetT* output) const {
+    const int kNumParameters =  N0 + N1 + N2 + N3 + N4 + N5 + N6 + N7 + N8 + N9;
+    const vector<int16>& parameter_block_sizes =
+        cost_function_->parameter_block_sizes();
+    const int num_parameter_blocks = parameter_block_sizes.size();
+    const int num_residuals = cost_function_->num_residuals();
+
+    internal::FixedArray<double> parameters(kNumParameters);
+    internal::FixedArray<double*> parameter_blocks(num_parameter_blocks);
+    internal::FixedArray<double> jacobians(num_residuals * kNumParameters);
+    internal::FixedArray<double*> jacobian_blocks(num_parameter_blocks);
+    internal::FixedArray<double> residuals(num_residuals);
+
+    // Build a set of arrays to get the residuals and jacobians from
+    // the CostFunction wrapped by this functor.
+    double* parameter_ptr = parameters.get();
+    double* jacobian_ptr = jacobians.get();
+    for (int i = 0; i < num_parameter_blocks; ++i) {
+      parameter_blocks[i] = parameter_ptr;
+      jacobian_blocks[i] = jacobian_ptr;
+      for (int j = 0; j < parameter_block_sizes[i]; ++j) {
+        *parameter_ptr++ = inputs[i][j].a;
+      }
+      jacobian_ptr += num_residuals * parameter_block_sizes[i];
+    }
+
+    if (!cost_function_->Evaluate(parameter_blocks.get(),
+                                  residuals.get(),
+                                  jacobian_blocks.get())) {
+      return false;
+    }
+
+    // Now that we have the incoming Jets, which are carrying the
+    // partial derivatives of each of the inputs w.r.t to some other
+    // underlying parameters. The derivative of the outputs of the
+    // cost function w.r.t to the same underlying parameters can now
+    // be computed by applying the chain rule.
+    //
+    //  d output[i]               d output[i]   d input[j]
+    //  --------------  = sum_j   ----------- * ------------
+    //  d parameter[k]            d input[j]    d parameter[k]
+    //
+    // d input[j]
+    // --------------  = inputs[j], so
+    // d parameter[k]
+    //
+    //  outputJet[i]  = sum_k jacobian[i][k] * inputJet[k]
+    //
+    // The following loop, iterates over the residuals, computing one
+    // output jet at a time.
+    for (int i = 0; i < num_residuals; ++i) {
+      output[i].a = residuals[i];
+      output[i].v.setZero();
+
+      for (int j = 0; j < num_parameter_blocks; ++j) {
+        const int16 block_size = parameter_block_sizes[j];
+        for (int k = 0; k < parameter_block_sizes[j]; ++k) {
+          output[i].v +=
+              jacobian_blocks[j][i * block_size + k] * inputs[j][k].v;
+        }
+      }
+    }
+
+    return true;
+  }
+
+ private:
+  internal::scoped_ptr<CostFunction> cost_function_;
+};
+
+}  // namespace ceres
+
+#endif  // CERES_PUBLIC_COST_FUNCTION_TO_FUNCTOR_H_
diff --git a/include/ceres/covariance.h b/include/ceres/covariance.h
new file mode 100644
index 0000000..83126b5
--- /dev/null
+++ b/include/ceres/covariance.h
@@ -0,0 +1,422 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+
+#ifndef CERES_PUBLIC_COVARIANCE_H_
+#define CERES_PUBLIC_COVARIANCE_H_
+
+#include <utility>
+#include <vector>
+#include "ceres/internal/port.h"
+#include "ceres/internal/scoped_ptr.h"
+#include "ceres/types.h"
+
+namespace ceres {
+
+class Problem;
+
+namespace internal {
+class CovarianceImpl;
+}  // namespace internal
+
+// WARNING
+// =======
+// It is very easy to use this class incorrectly without understanding
+// the underlying mathematics. Please read and understand the
+// documentation completely before attempting to use this class.
+//
+//
+// This class allows the user to evaluate the covariance for a
+// non-linear least squares problem and provides random access to its
+// blocks
+//
+// Background
+// ==========
+// One way to assess the quality of the solution returned by a
+// non-linear least squares solve is to analyze the covariance of the
+// solution.
+//
+// Let us consider the non-linear regression problem
+//
+//   y = f(x) + N(0, I)
+//
+// i.e., the observation y is a random non-linear function of the
+// independent variable x with mean f(x) and identity covariance. Then
+// the maximum likelihood estimate of x given observations y is the
+// solution to the non-linear least squares problem:
+//
+//  x* = arg min_x |f(x)|^2
+//
+// And the covariance of x* is given by
+//
+//  C(x*) = inverse[J'(x*)J(x*)]
+//
+// Here J(x*) is the Jacobian of f at x*. The above formula assumes
+// that J(x*) has full column rank.
+//
+// If J(x*) is rank deficient, then the covariance matrix C(x*) is
+// also rank deficient and is given by
+//
+//  C(x*) =  pseudoinverse[J'(x*)J(x*)]
+//
+// Note that in the above, we assumed that the covariance
+// matrix for y was identity. This is an important assumption. If this
+// is not the case and we have
+//
+//  y = f(x) + N(0, S)
+//
+// Where S is a positive semi-definite matrix denoting the covariance
+// of y, then the maximum likelihood problem to be solved is
+//
+//  x* = arg min_x f'(x) inverse[S] f(x)
+//
+// and the corresponding covariance estimate of x* is given by
+//
+//  C(x*) = inverse[J'(x*) inverse[S] J(x*)]
+//
+// So, if it is the case that the observations being fitted to have a
+// covariance matrix not equal to identity, then it is the user's
+// responsibility that the corresponding cost functions are correctly
+// scaled, e.g. in the above case the cost function for this problem
+// should evaluate S^{-1/2} f(x) instead of just f(x), where S^{-1/2}
+// is the inverse square root of the covariance matrix S.
+//
+// This class allows the user to evaluate the covariance for a
+// non-linear least squares problem and provides random access to its
+// blocks. The computation assumes that the CostFunctions compute
+// residuals such that their covariance is identity.
+//
+// Since the computation of the covariance matrix requires computing
+// the inverse of a potentially large matrix, this can involve a
+// rather large amount of time and memory. However, it is usually the
+// case that the user is only interested in a small part of the
+// covariance matrix. Quite often just the block diagonal. This class
+// allows the user to specify the parts of the covariance matrix that
+// she is interested in and then uses this information to only compute
+// and store those parts of the covariance matrix.
+//
+// Rank of the Jacobian
+// --------------------
+// As we noted above, if the jacobian is rank deficient, then the
+// inverse of J'J is not defined and instead a pseudo inverse needs to
+// be computed.
+//
+// The rank deficiency in J can be structural -- columns which are
+// always known to be zero or numerical -- depending on the exact
+// values in the Jacobian.
+//
+// Structural rank deficiency occurs when the problem contains
+// parameter blocks that are constant. This class correctly handles
+// structural rank deficiency like that.
+//
+// Numerical rank deficiency, where the rank of the matrix cannot be
+// predicted by its sparsity structure and requires looking at its
+// numerical values is more complicated. Here again there are two
+// cases.
+//
+//   a. The rank deficiency arises from overparameterization. e.g., a
+//   four dimensional quaternion used to parameterize SO(3), which is
+//   a three dimensional manifold. In cases like this, the user should
+//   use an appropriate LocalParameterization. Not only will this lead
+//   to better numerical behaviour of the Solver, it will also expose
+//   the rank deficiency to the Covariance object so that it can
+//   handle it correctly.
+//
+//   b. More general numerical rank deficiency in the Jacobian
+//   requires the computation of the so called Singular Value
+//   Decomposition (SVD) of J'J. We do not know how to do this for
+//   large sparse matrices efficiently. For small and moderate sized
+//   problems this is done using dense linear algebra.
+//
+// Gauge Invariance
+// ----------------
+// In structure from motion (3D reconstruction) problems, the
+// reconstruction is ambiguous upto a similarity transform. This is
+// known as a Gauge Ambiguity. Handling Gauges correctly requires the
+// use of SVD or custom inversion algorithms. For small problems the
+// user can use the dense algorithm. For more details see
+//
+// Ken-ichi Kanatani, Daniel D. Morris: Gauges and gauge
+// transformations for uncertainty description of geometric structure
+// with indeterminacy. IEEE Transactions on Information Theory 47(5):
+// 2017-2028 (2001)
+//
+// Example Usage
+// =============
+//
+//  double x[3];
+//  double y[2];
+//
+//  Problem problem;
+//  problem.AddParameterBlock(x, 3);
+//  problem.AddParameterBlock(y, 2);
+//  <Build Problem>
+//  <Solve Problem>
+//
+//  Covariance::Options options;
+//  Covariance covariance(options);
+//
+//  vector<pair<const double*, const double*> > covariance_blocks;
+//  covariance_blocks.push_back(make_pair(x, x));
+//  covariance_blocks.push_back(make_pair(y, y));
+//  covariance_blocks.push_back(make_pair(x, y));
+//
+//  CHECK(covariance.Compute(covariance_blocks, &problem));
+//
+//  double covariance_xx[3 * 3];
+//  double covariance_yy[2 * 2];
+//  double covariance_xy[3 * 2];
+//  covariance.GetCovarianceBlock(x, x, covariance_xx)
+//  covariance.GetCovarianceBlock(y, y, covariance_yy)
+//  covariance.GetCovarianceBlock(x, y, covariance_xy)
+//
+class Covariance {
+ public:
+  struct Options {
+    Options()
+#ifndef CERES_NO_SUITESPARSE
+        : algorithm_type(SPARSE_QR),
+#else
+        : algorithm_type(DENSE_SVD),
+#endif
+          min_reciprocal_condition_number(1e-14),
+          null_space_rank(0),
+          num_threads(1),
+          apply_loss_function(true) {
+    }
+
+    // Ceres supports three different algorithms for covariance
+    // estimation, which represent different tradeoffs in speed,
+    // accuracy and reliability.
+    //
+    // 1. DENSE_SVD uses Eigen's JacobiSVD to perform the
+    //    computations. It computes the singular value decomposition
+    //
+    //      U * S * V' = J
+    //
+    //    and then uses it to compute the pseudo inverse of J'J as
+    //
+    //      pseudoinverse[J'J]^ = V * pseudoinverse[S] * V'
+    //
+    //    It is an accurate but slow method and should only be used
+    //    for small to moderate sized problems. It can handle
+    //    full-rank as well as rank deficient Jacobians.
+    //
+    // 2. SPARSE_CHOLESKY uses the CHOLMOD sparse Cholesky
+    //    factorization library to compute the decomposition :
+    //
+    //      R'R = J'J
+    //
+    //    and then
+    //
+    //      [J'J]^-1  = [R'R]^-1
+    //
+    //    It a fast algorithm for sparse matrices that should be used
+    //    when the Jacobian matrix J is well conditioned. For
+    //    ill-conditioned matrices, this algorithm can fail
+    //    unpredictabily. This is because Cholesky factorization is
+    //    not a rank-revealing factorization, i.e., it cannot reliably
+    //    detect when the matrix being factorized is not of full
+    //    rank. SuiteSparse/CHOLMOD supplies a heuristic for checking
+    //    if the matrix is rank deficient (cholmod_rcond), but it is
+    //    only a heuristic and can have both false positive and false
+    //    negatives.
+    //
+    //    Recent versions of SuiteSparse (>= 4.2.0) provide a much
+    //    more efficient method for solving for rows of the covariance
+    //    matrix. Therefore, if you are doing SPARSE_CHOLESKY, we
+    //    strongly recommend using a recent version of SuiteSparse.
+    //
+    // 3. SPARSE_QR uses the SuiteSparseQR sparse QR factorization
+    //    library to compute the decomposition
+    //
+    //      Q * R = J
+    //
+    //    [J'J]^-1 = [R*R']^-1
+    //
+    //    It is a moderately fast algorithm for sparse matrices, which
+    //    at the price of more time and memory than the
+    //    SPARSE_CHOLESKY algorithm is numerically better behaved and
+    //    is rank revealing, i.e., it can reliably detect when the
+    //    Jacobian matrix is rank deficient.
+    //
+    // Neither SPARSE_CHOLESKY or SPARSE_QR are capable of computing
+    // the covariance if the Jacobian is rank deficient.
+
+    CovarianceAlgorithmType algorithm_type;
+
+    // If the Jacobian matrix is near singular, then inverting J'J
+    // will result in unreliable results, e.g, if
+    //
+    //   J = [1.0 1.0         ]
+    //       [1.0 1.0000001   ]
+    //
+    // which is essentially a rank deficient matrix, we have
+    //
+    //   inv(J'J) = [ 2.0471e+14  -2.0471e+14]
+    //              [-2.0471e+14   2.0471e+14]
+    //
+    // This is not a useful result. Therefore, by default
+    // Covariance::Compute will return false if a rank deficient
+    // Jacobian is encountered. How rank deficiency is detected
+    // depends on the algorithm being used.
+    //
+    // 1. DENSE_SVD
+    //
+    //      min_sigma / max_sigma < sqrt(min_reciprocal_condition_number)
+    //
+    //    where min_sigma and max_sigma are the minimum and maxiumum
+    //    singular values of J respectively.
+    //
+    // 2. SPARSE_CHOLESKY
+    //
+    //      cholmod_rcond < min_reciprocal_conditioner_number
+    //
+    //    Here cholmod_rcond is a crude estimate of the reciprocal
+    //    condition number of J'J by using the maximum and minimum
+    //    diagonal entries of the Cholesky factor R. There are no
+    //    theoretical guarantees associated with this test. It can
+    //    give false positives and negatives. Use at your own
+    //    risk. The default value of min_reciprocal_condition_number
+    //    has been set to a conservative value, and sometimes the
+    //    Covariance::Compute may return false even if it is possible
+    //    to estimate the covariance reliably. In such cases, the user
+    //    should exercise their judgement before lowering the value of
+    //    min_reciprocal_condition_number.
+    //
+    // 3. SPARSE_QR
+    //
+    //      rank(J) < num_col(J)
+    //
+    //   Here rank(J) is the estimate of the rank of J returned by the
+    //   SuiteSparseQR algorithm. It is a fairly reliable indication
+    //   of rank deficiency.
+    //
+    double min_reciprocal_condition_number;
+
+    // When using DENSE_SVD, the user has more control in dealing with
+    // singular and near singular covariance matrices.
+    //
+    // As mentioned above, when the covariance matrix is near
+    // singular, instead of computing the inverse of J'J, the
+    // Moore-Penrose pseudoinverse of J'J should be computed.
+    //
+    // If J'J has the eigen decomposition (lambda_i, e_i), where
+    // lambda_i is the i^th eigenvalue and e_i is the corresponding
+    // eigenvector, then the inverse of J'J is
+    //
+    //   inverse[J'J] = sum_i e_i e_i' / lambda_i
+    //
+    // and computing the pseudo inverse involves dropping terms from
+    // this sum that correspond to small eigenvalues.
+    //
+    // How terms are dropped is controlled by
+    // min_reciprocal_condition_number and null_space_rank.
+    //
+    // If null_space_rank is non-negative, then the smallest
+    // null_space_rank eigenvalue/eigenvectors are dropped
+    // irrespective of the magnitude of lambda_i. If the ratio of the
+    // smallest non-zero eigenvalue to the largest eigenvalue in the
+    // truncated matrix is still below
+    // min_reciprocal_condition_number, then the Covariance::Compute()
+    // will fail and return false.
+    //
+    // Setting null_space_rank = -1 drops all terms for which
+    //
+    //   lambda_i / lambda_max < min_reciprocal_condition_number.
+    //
+    // This option has no effect on the SPARSE_CHOLESKY or SPARSE_QR
+    // algorithms.
+    int null_space_rank;
+
+    int num_threads;
+
+    // Even though the residual blocks in the problem may contain loss
+    // functions, setting apply_loss_function to false will turn off
+    // the application of the loss function to the output of the cost
+    // function and in turn its effect on the covariance.
+    //
+    // TODO(sameergaarwal): Expand this based on Jim's experiments.
+    bool apply_loss_function;
+  };
+
+  explicit Covariance(const Options& options);
+  ~Covariance();
+
+  // Compute a part of the covariance matrix.
+  //
+  // The vector covariance_blocks, indexes into the covariance matrix
+  // block-wise using pairs of parameter blocks. This allows the
+  // covariance estimation algorithm to only compute and store these
+  // blocks.
+  //
+  // Since the covariance matrix is symmetric, if the user passes
+  // (block1, block2), then GetCovarianceBlock can be called with
+  // block1, block2 as well as block2, block1.
+  //
+  // covariance_blocks cannot contain duplicates. Bad things will
+  // happen if they do.
+  //
+  // Note that the list of covariance_blocks is only used to determine
+  // what parts of the covariance matrix are computed. The full
+  // Jacobian is used to do the computation, i.e. they do not have an
+  // impact on what part of the Jacobian is used for computation.
+  //
+  // The return value indicates the success or failure of the
+  // covariance computation. Please see the documentation for
+  // Covariance::Options for more on the conditions under which this
+  // function returns false.
+  bool Compute(
+      const vector<pair<const double*, const double*> >& covariance_blocks,
+      Problem* problem);
+
+  // Return the block of the covariance matrix corresponding to
+  // parameter_block1 and parameter_block2.
+  //
+  // Compute must be called before the first call to
+  // GetCovarianceBlock and the pair <parameter_block1,
+  // parameter_block2> OR the pair <parameter_block2,
+  // parameter_block1> must have been present in the vector
+  // covariance_blocks when Compute was called. Otherwise
+  // GetCovarianceBlock will return false.
+  //
+  // covariance_block must point to a memory location that can store a
+  // parameter_block1_size x parameter_block2_size matrix. The
+  // returned covariance will be a row-major matrix.
+  bool GetCovarianceBlock(const double* parameter_block1,
+                          const double* parameter_block2,
+                          double* covariance_block) const;
+
+ private:
+  internal::scoped_ptr<internal::CovarianceImpl> impl_;
+};
+
+}  // namespace ceres
+
+#endif  // CERES_PUBLIC_COVARIANCE_H_
diff --git a/include/ceres/crs_matrix.h b/include/ceres/crs_matrix.h
index c9fe8f7..8c470cd 100644
--- a/include/ceres/crs_matrix.h
+++ b/include/ceres/crs_matrix.h
@@ -44,17 +44,35 @@ struct CRSMatrix {
   int num_rows;
   int num_cols;
 
-  // A compressed row matrix stores its contents in three arrays.
-  // The non-zero pattern of the i^th row is given by
+  // A compressed row matrix stores its contents in three arrays,
+  // rows, cols and values.
   //
-  //   rows[cols[i] ... cols[i + 1]]
+  // rows is a num_rows + 1 sized array that points into the cols and
+  // values array. For each row i:
   //
-  // and the corresponding values by
+  // cols[rows[i]] ... cols[rows[i + 1] - 1] are the indices of the
+  // non-zero columns of row i.
   //
-  //   values[cols[i] ... cols[i + 1]]
+  // values[rows[i]] .. values[rows[i + 1] - 1] are the values of the
+  // corresponding entries.
   //
-  // Thus, cols is a vector of size num_cols + 1, and rows and values
-  // have as many entries as number of non-zeros in the matrix.
+  // cols and values contain as many entries as there are non-zeros in
+  // the matrix.
+  //
+  // e.g, consider the 3x4 sparse matrix
+  //
+  //  [ 0 10  0  4 ]
+  //  [ 0  2 -3  2 ]
+  //  [ 1  2  0  0 ]
+  //
+  // The three arrays will be:
+  //
+  //
+  //            -row0-  ---row1---  -row2-
+  //  rows   = [ 0,      2,          5,     7]
+  //  cols   = [ 1,  3,  1,  2,  3,  0,  1]
+  //  values = [10,  4,  2, -3,  2,  1,  2]
+
   vector<int> cols;
   vector<int> rows;
   vector<double> values;
diff --git a/include/ceres/dynamic_autodiff_cost_function.h b/include/ceres/dynamic_autodiff_cost_function.h
new file mode 100644
index 0000000..5d8f188
--- /dev/null
+++ b/include/ceres/dynamic_autodiff_cost_function.h
@@ -0,0 +1,259 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2012 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: mierle@gmail.com (Keir Mierle)
+//         sameeragarwal@google.com (Sameer Agarwal)
+//         thadh@gmail.com (Thad Hughes)
+//
+// This autodiff implementation differs from the one found in
+// autodiff_cost_function.h by supporting autodiff on cost functions with
+// variable numbers of parameters with variable sizes. With the other
+// implementation, all the sizes (both the number of parameter blocks and the
+// size of each block) must be fixed at compile time.
+//
+// The functor API differs slightly from the API for fixed size autodiff; the
+// expected interface for the cost functors is:
+//
+//   struct MyCostFunctor {
+//     template<typename T>
+//     bool operator()(T const* const* parameters, T* residuals) const {
+//       // Use parameters[i] to access the i'th parameter block.
+//     }
+//   }
+//
+// Since the sizing of the parameters is done at runtime, you must also specify
+// the sizes after creating the dynamic autodiff cost function. For example:
+//
+//   DynamicAutoDiffCostFunction<MyCostFunctor, 3> cost_function(
+//       new MyCostFunctor());
+//   cost_function.AddParameterBlock(5);
+//   cost_function.AddParameterBlock(10);
+//   cost_function.SetNumResiduals(21);
+//
+// Under the hood, the implementation evaluates the cost function multiple
+// times, computing a small set of the derivatives (four by default, controlled
+// by the Stride template parameter) with each pass. There is a tradeoff with
+// the size of the passes; you may want to experiment with the stride.
+
+#ifndef CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_
+#define CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_
+
+#include <cmath>
+#include <numeric>
+#include <vector>
+
+#include "ceres/cost_function.h"
+#include "ceres/internal/scoped_ptr.h"
+#include "ceres/jet.h"
+#include "glog/logging.h"
+
+namespace ceres {
+
+template <typename CostFunctor, int Stride = 4>
+class DynamicAutoDiffCostFunction : public CostFunction {
+ public:
+  explicit DynamicAutoDiffCostFunction(CostFunctor* functor)
+    : functor_(functor) {}
+
+  virtual ~DynamicAutoDiffCostFunction() {}
+
+  void AddParameterBlock(int size) {
+    mutable_parameter_block_sizes()->push_back(size);
+  }
+
+  void SetNumResiduals(int num_residuals) {
+    set_num_residuals(num_residuals);
+  }
+
+  virtual bool Evaluate(double const* const* parameters,
+                        double* residuals,
+                        double** jacobians) const {
+    CHECK_GT(num_residuals(), 0)
+        << "You must call DynamicAutoDiffCostFunction::SetNumResiduals() "
+        << "before DynamicAutoDiffCostFunction::Evaluate().";
+
+    if (jacobians == NULL) {
+      return (*functor_)(parameters, residuals);
+    }
+
+    // The difficulty with Jets, as implemented in Ceres, is that they were
+    // originally designed for strictly compile-sized use. At this point, there
+    // is a large body of code that assumes inside a cost functor it is
+    // acceptable to do e.g. T(1.5) and get an appropriately sized jet back.
+    //
+    // Unfortunately, it is impossible to communicate the expected size of a
+    // dynamically sized jet to the static instantiations that existing code
+    // depends on.
+    //
+    // To work around this issue, the solution here is to evaluate the
+    // jacobians in a series of passes, each one computing Stripe *
+    // num_residuals() derivatives. This is done with small, fixed-size jets.
+    const int num_parameter_blocks = parameter_block_sizes().size();
+    const int num_parameters = std::accumulate(parameter_block_sizes().begin(),
+                                               parameter_block_sizes().end(),
+                                               0);
+
+    // Allocate scratch space for the strided evaluation.
+    vector<Jet<double, Stride> > input_jets(num_parameters);
+    vector<Jet<double, Stride> > output_jets(num_residuals());
+
+    // Make the parameter pack that is sent to the functor (reused).
+    vector<Jet<double, Stride>* > jet_parameters(num_parameter_blocks,
+        static_cast<Jet<double, Stride>* >(NULL));
+    int num_active_parameters = 0;
+
+    // To handle constant parameters between non-constant parameter blocks, the
+    // start position --- a raw parameter index --- of each contiguous block of
+    // non-constant parameters is recorded in start_derivative_section.
+    vector<int> start_derivative_section;
+    bool in_derivative_section = false;
+    int parameter_cursor = 0;
+
+    // Discover the derivative sections and set the parameter values.
+    for (int i = 0; i < num_parameter_blocks; ++i) {
+      jet_parameters[i] = &input_jets[parameter_cursor];
+
+      const int parameter_block_size = parameter_block_sizes()[i];
+      if (jacobians[i] != NULL) {
+        if (!in_derivative_section) {
+          start_derivative_section.push_back(parameter_cursor);
+          in_derivative_section = true;
+        }
+
+        num_active_parameters += parameter_block_size;
+      } else {
+        in_derivative_section = false;
+      }
+
+      for (int j = 0; j < parameter_block_size; ++j, parameter_cursor++) {
+        input_jets[parameter_cursor].a = parameters[i][j];
+      }
+    }
+
+    // When `num_active_parameters % Stride != 0` then it can be the case
+    // that `active_parameter_count < Stride` while parameter_cursor is less
+    // than the total number of parameters and with no remaining non-constant
+    // parameter blocks. Pushing parameter_cursor (the total number of
+    // parameters) as a final entry to start_derivative_section is required
+    // because if a constant parameter block is encountered after the
+    // last non-constant block then current_derivative_section is incremented
+    // and would otherwise index an invalid position in
+    // start_derivative_section. Setting the final element to the total number
+    // of parameters means that this can only happen at most once in the loop
+    // below.
+    start_derivative_section.push_back(parameter_cursor);
+
+    // Evaluate all of the strides. Each stride is a chunk of the derivative to
+    // evaluate, typically some size proportional to the size of the SIMD
+    // registers of the CPU.
+    int num_strides = static_cast<int>(ceil(num_active_parameters /
+                                            static_cast<float>(Stride)));
+
+    int current_derivative_section = 0;
+    int current_derivative_section_cursor = 0;
+
+    for (int pass = 0; pass < num_strides; ++pass) {
+      // Set most of the jet components to zero, except for
+      // non-constant #Stride parameters.
+      const int initial_derivative_section = current_derivative_section;
+      const int initial_derivative_section_cursor =
+        current_derivative_section_cursor;
+
+      int active_parameter_count = 0;
+      parameter_cursor = 0;
+
+      for (int i = 0; i < num_parameter_blocks; ++i) {
+        for (int j = 0; j < parameter_block_sizes()[i];
+             ++j, parameter_cursor++) {
+          input_jets[parameter_cursor].v.setZero();
+          if (active_parameter_count < Stride &&
+              parameter_cursor >= (
+                start_derivative_section[current_derivative_section] +
+                current_derivative_section_cursor)) {
+            if (jacobians[i] != NULL) {
+              input_jets[parameter_cursor].v[active_parameter_count] = 1.0;
+              ++active_parameter_count;
+              ++current_derivative_section_cursor;
+            } else {
+              ++current_derivative_section;
+              current_derivative_section_cursor = 0;
+            }
+          }
+        }
+      }
+
+      if (!(*functor_)(&jet_parameters[0], &output_jets[0])) {
+        return false;
+      }
+
+      // Copy the pieces of the jacobians into their final place.
+      active_parameter_count = 0;
+
+      current_derivative_section = initial_derivative_section;
+      current_derivative_section_cursor = initial_derivative_section_cursor;
+
+      for (int i = 0, parameter_cursor = 0; i < num_parameter_blocks; ++i) {
+        for (int j = 0; j < parameter_block_sizes()[i];
+             ++j, parameter_cursor++) {
+          if (active_parameter_count < Stride &&
+              parameter_cursor >= (
+                start_derivative_section[current_derivative_section] +
+                current_derivative_section_cursor)) {
+            if (jacobians[i] != NULL) {
+              for (int k = 0; k < num_residuals(); ++k) {
+                jacobians[i][k * parameter_block_sizes()[i] + j] =
+                    output_jets[k].v[active_parameter_count];
+              }
+              ++active_parameter_count;
+              ++current_derivative_section_cursor;
+            } else {
+              ++current_derivative_section;
+              current_derivative_section_cursor = 0;
+            }
+          }
+        }
+      }
+
+      // Only copy the residuals over once (even though we compute them on
+      // every loop).
+      if (pass == num_strides - 1) {
+        for (int k = 0; k < num_residuals(); ++k) {
+          residuals[k] = output_jets[k].a;
+        }
+      }
+    }
+    return true;
+  }
+
+ private:
+  internal::scoped_ptr<CostFunctor> functor_;
+};
+
+}  // namespace ceres
+
+#endif  // CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_
diff --git a/include/ceres/fpclassify.h b/include/ceres/fpclassify.h
index 5a9ea15..b730832 100644
--- a/include/ceres/fpclassify.h
+++ b/include/ceres/fpclassify.h
@@ -41,6 +41,8 @@
 #include <float.h>
 #endif
 
+#include <limits>
+
 namespace ceres {
 
 #if defined(_MSC_VER)
diff --git a/include/ceres/gradient_checker.h b/include/ceres/gradient_checker.h
index 2ce605d..3ec8056 100644
--- a/include/ceres/gradient_checker.h
+++ b/include/ceres/gradient_checker.h
@@ -37,16 +37,16 @@
 #ifndef CERES_PUBLIC_GRADIENT_CHECKER_H_
 #define CERES_PUBLIC_GRADIENT_CHECKER_H_
 
-#include <algorithm>
 #include <cstddef>
+#include <algorithm>
 #include <vector>
 
-#include <glog/logging.h>
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/fixed_array.h"
 #include "ceres/internal/macros.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/numeric_diff_cost_function.h"
+#include "glog/logging.h"
 
 namespace ceres {
 
@@ -161,7 +161,8 @@ class GradientChecker {
     results->finite_difference_jacobians.resize(num_blocks);
 
     internal::FixedArray<double*> term_jacobian_pointers(num_blocks);
-    internal::FixedArray<double*> finite_difference_jacobian_pointers(num_blocks);
+    internal::FixedArray<double*>
+        finite_difference_jacobian_pointers(num_blocks);
     for (int i = 0; i < num_blocks; i++) {
       results->term_jacobians[i].resize(num_residuals, block_sizes[i]);
       term_jacobian_pointers[i] = results->term_jacobians[i].data();
diff --git a/include/ceres/internal/autodiff.h b/include/ceres/internal/autodiff.h
index 581e881..cf21d7a 100644
--- a/include/ceres/internal/autodiff.h
+++ b/include/ceres/internal/autodiff.h
@@ -38,7 +38,7 @@
 //
 //   struct F {
 //     template<typename T>
-//     bool operator(const T *x, const T *y, ..., T *z) {
+//     bool operator()(const T *x, const T *y, ..., T *z) {
 //       // Compute z[] based on x[], y[], ...
 //       // return true if computation succeeded, false otherwise.
 //     }
@@ -102,7 +102,7 @@
 //
 //   struct F {
 //     template<typename T>
-//     bool operator(const T *p, const T *q, T *z) {
+//     bool operator()(const T *p, const T *q, T *z) {
 //       // ...
 //     }
 //   };
@@ -142,10 +142,11 @@
 
 #include <stddef.h>
 
-#include <glog/logging.h>
 #include "ceres/jet.h"
 #include "ceres/internal/eigen.h"
 #include "ceres/internal/fixed_array.h"
+#include "ceres/internal/variadic_evaluate.h"
+#include "glog/logging.h"
 
 namespace ceres {
 namespace internal {
@@ -164,13 +165,14 @@ namespace internal {
 //
 // is what would get put in dst if N was 3, offset was 3, and the jet type JetT
 // was 8-dimensional.
-template <typename JetT, typename T>
-inline void Make1stOrderPerturbation(int offset, int N, const T *src,
-                                     JetT *dst) {
+template <typename JetT, typename T, int N>
+inline void Make1stOrderPerturbation(int offset, const T* src, JetT* dst) {
   DCHECK(src);
   DCHECK(dst);
   for (int j = 0; j < N; ++j) {
-    dst[j] = JetT(src[j], offset + j);
+    dst[j].a = src[j];
+    dst[j].v.setZero();
+    dst[j].v[offset + j] = 1.0;
   }
 }
 
@@ -191,151 +193,15 @@ inline void Take1stOrderPart(const int M, const JetT *src, T *dst) {
   DCHECK(src);
   DCHECK(dst);
   for (int i = 0; i < M; ++i) {
-    Eigen::Map<Eigen::Matrix<T, N, 1> >(dst + N * i, N) = src[i].v.template segment<N>(N0);
+    Eigen::Map<Eigen::Matrix<T, N, 1> >(dst + N * i, N) =
+        src[i].v.template segment<N>(N0);
   }
 }
 
-// This block of quasi-repeated code calls the user-supplied functor, which may
-// take a variable number of arguments. This is accomplished by specializing the
-// struct based on the size of the trailing parameters; parameters with 0 size
-// are assumed missing.
-//
-// Supporting variadic functions is the primary source of complexity in the
-// autodiff implementation.
-
-template<typename Functor, typename T, int N0, int N1, int N2, int N3, int N4,
-         int N5, int N6, int N7, int N8, int N9>
-struct VariadicEvaluate {
-  static bool Call(const Functor& functor, T const *const *input, T* output) {
-    return functor(input[0],
-                   input[1],
-                   input[2],
-                   input[3],
-                   input[4],
-                   input[5],
-                   input[6],
-                   input[7],
-                   input[8],
-                   input[9],
-                   output);
-  }
-};
-
-template<typename Functor, typename T, int N0, int N1, int N2, int N3, int N4,
-         int N5, int N6, int N7, int N8>
-struct VariadicEvaluate<Functor, T, N0, N1, N2, N3, N4, N5, N6, N7, N8, 0> {
-  static bool Call(const Functor& functor, T const *const *input, T* output) {
-    return functor(input[0],
-                   input[1],
-                   input[2],
-                   input[3],
-                   input[4],
-                   input[5],
-                   input[6],
-                   input[7],
-                   input[8],
-                   output);
-  }
-};
-
-template<typename Functor, typename T, int N0, int N1, int N2, int N3, int N4,
-         int N5, int N6, int N7>
-struct VariadicEvaluate<Functor, T, N0, N1, N2, N3, N4, N5, N6, N7, 0, 0> {
-  static bool Call(const Functor& functor, T const *const *input, T* output) {
-    return functor(input[0],
-                   input[1],
-                   input[2],
-                   input[3],
-                   input[4],
-                   input[5],
-                   input[6],
-                   input[7],
-                   output);
-  }
-};
-
-template<typename Functor, typename T, int N0, int N1, int N2, int N3, int N4,
-         int N5, int N6>
-struct VariadicEvaluate<Functor, T, N0, N1, N2, N3, N4, N5, N6, 0, 0, 0> {
-  static bool Call(const Functor& functor, T const *const *input, T* output) {
-    return functor(input[0],
-                   input[1],
-                   input[2],
-                   input[3],
-                   input[4],
-                   input[5],
-                   input[6],
-                   output);
-  }
-};
-
-template<typename Functor, typename T, int N0, int N1, int N2, int N3, int N4,
-         int N5>
-struct VariadicEvaluate<Functor, T, N0, N1, N2, N3, N4, N5, 0, 0, 0, 0> {
-  static bool Call(const Functor& functor, T const *const *input, T* output) {
-    return functor(input[0],
-                   input[1],
-                   input[2],
-                   input[3],
-                   input[4],
-                   input[5],
-                   output);
-  }
-};
-
-template<typename Functor, typename T, int N0, int N1, int N2, int N3, int N4>
-struct VariadicEvaluate<Functor, T, N0, N1, N2, N3, N4, 0, 0, 0, 0, 0> {
-  static bool Call(const Functor& functor, T const *const *input, T* output) {
-    return functor(input[0],
-                   input[1],
-                   input[2],
-                   input[3],
-                   input[4],
-                   output);
-  }
-};
-
-template<typename Functor, typename T, int N0, int N1, int N2, int N3>
-struct VariadicEvaluate<Functor, T, N0, N1, N2, N3, 0, 0, 0, 0, 0, 0> {
-  static bool Call(const Functor& functor, T const *const *input, T* output) {
-    return functor(input[0],
-                   input[1],
-                   input[2],
-                   input[3],
-                   output);
-  }
-};
-
-template<typename Functor, typename T, int N0, int N1, int N2>
-struct VariadicEvaluate<Functor, T, N0, N1, N2, 0, 0, 0, 0, 0, 0, 0> {
-  static bool Call(const Functor& functor, T const *const *input, T* output) {
-    return functor(input[0],
-                   input[1],
-                   input[2],
-                   output);
-  }
-};
-
-template<typename Functor, typename T, int N0, int N1>
-struct VariadicEvaluate<Functor, T, N0, N1, 0, 0, 0, 0, 0, 0, 0, 0> {
-  static bool Call(const Functor& functor, T const *const *input, T* output) {
-    return functor(input[0],
-                   input[1],
-                   output);
-  }
-};
-
-template<typename Functor, typename T, int N0>
-struct VariadicEvaluate<Functor, T, N0, 0, 0, 0, 0, 0, 0, 0, 0, 0> {
-  static bool Call(const Functor& functor, T const *const *input, T* output) {
-    return functor(input[0],
-                   output);
-  }
-};
-
-// This is in a struct because default template parameters on a function are not
-// supported in C++03 (though it is available in C++0x). N0 through N5 are the
-// dimension of the input arguments to the user supplied functor.
+// This is in a struct because default template parameters on a
+// function are not supported in C++03 (though it is available in
+// C++0x). N0 through N5 are the dimension of the input arguments to
+// the user supplied functor.
 template <typename Functor, typename T,
           int N0 = 0, int N1 = 0, int N2 = 0, int N3 = 0, int N4 = 0,
           int N5 = 0, int N6 = 0, int N7 = 0, int N8 = 0, int N9 = 0>
@@ -347,7 +213,7 @@ struct AutoDiff {
                             T **jacobians) {
     // This block breaks the 80 column rule to keep it somewhat readable.
     DCHECK_GT(num_outputs, 0);
-    CHECK((!N1 && !N2 && !N3 && !N4 && !N5 && !N6 && !N7 && !N8 && !N9) ||
+    DCHECK((!N1 && !N2 && !N3 && !N4 && !N5 && !N6 && !N7 && !N8 && !N9) ||
           ((N1 > 0) && !N2 && !N3 && !N4 && !N5 && !N6 && !N7 && !N8 && !N9) ||
           ((N1 > 0) && (N2 > 0) && !N3 && !N4 && !N5 && !N6 && !N7 && !N8 && !N9) ||
           ((N1 > 0) && (N2 > 0) && (N3 > 0) && !N4 && !N5 && !N6 && !N7 && !N8 && !N9) ||
@@ -390,14 +256,15 @@ struct AutoDiff {
         x.get() + jet8,
         x.get() + jet9,
     };
-    JetT *output = x.get() + jet6;
 
-#define CERES_MAKE_1ST_ORDER_PERTURBATION(i) \
-    if (N ## i) { \
-      internal::Make1stOrderPerturbation(jet ## i, \
-                                         N ## i, \
-                                         parameters[i], \
-                                         x.get() + jet ## i); \
+    JetT* output = x.get() + N0 + N1 + N2 + N3 + N4 + N5 + N6 + N7 + N8 + N9;
+
+#define CERES_MAKE_1ST_ORDER_PERTURBATION(i)                            \
+    if (N ## i) {                                                       \
+      internal::Make1stOrderPerturbation<JetT, T, N ## i>(              \
+          jet ## i,                                                     \
+          parameters[i],                                                \
+          x.get() + jet ## i);                                          \
     }
     CERES_MAKE_1ST_ORDER_PERTURBATION(0);
     CERES_MAKE_1ST_ORDER_PERTURBATION(1);
diff --git a/include/ceres/internal/eigen.h b/include/ceres/internal/eigen.h
index be76f9e..85df54b 100644
--- a/include/ceres/internal/eigen.h
+++ b/include/ceres/internal/eigen.h
@@ -35,27 +35,40 @@
 
 namespace ceres {
 
-using Eigen::Dynamic;
-using Eigen::RowMajor;
-
-typedef Eigen::Matrix<double, Dynamic, 1> Vector;
-typedef Eigen::Matrix<double, Dynamic, Dynamic, RowMajor> Matrix;
+typedef Eigen::Matrix<double, Eigen::Dynamic, 1> Vector;
+typedef Eigen::Matrix<double,
+                      Eigen::Dynamic,
+                      Eigen::Dynamic,
+                      Eigen::RowMajor> Matrix;
 typedef Eigen::Map<Vector> VectorRef;
 typedef Eigen::Map<Matrix> MatrixRef;
-typedef Eigen::Map<Matrix, Eigen::Aligned> AlignedMatrixRef;
 typedef Eigen::Map<const Vector> ConstVectorRef;
-typedef Eigen::Map<const Matrix, Eigen::Aligned> ConstAlignedMatrixRef;
 typedef Eigen::Map<const Matrix> ConstMatrixRef;
 
+// Column major matrices for DenseSparseMatrix/DenseQRSolver
+typedef Eigen::Matrix<double,
+                      Eigen::Dynamic,
+                      Eigen::Dynamic,
+                      Eigen::ColMajor> ColMajorMatrix;
+
+typedef Eigen::Map<ColMajorMatrix, 0,
+                   Eigen::Stride<Eigen::Dynamic, 1> > ColMajorMatrixRef;
+
+typedef Eigen::Map<const ColMajorMatrix,
+                   0,
+                   Eigen::Stride<Eigen::Dynamic, 1> > ConstColMajorMatrixRef;
+
+
+
 // C++ does not support templated typdefs, thus the need for this
 // struct so that we can support statically sized Matrix and Maps.
 template <int num_rows = Eigen::Dynamic, int num_cols = Eigen::Dynamic>
 struct EigenTypes {
-  typedef Eigen::Matrix <double, num_rows, num_cols, RowMajor>
+  typedef Eigen::Matrix <double, num_rows, num_cols, Eigen::RowMajor>
   Matrix;
 
   typedef Eigen::Map<
-    Eigen::Matrix<double, num_rows, num_cols, RowMajor> >
+    Eigen::Matrix<double, num_rows, num_cols, Eigen::RowMajor> >
   MatrixRef;
 
   typedef Eigen::Matrix <double, num_rows, 1>
@@ -67,7 +80,7 @@ struct EigenTypes {
 
 
   typedef Eigen::Map<
-    const Eigen::Matrix<double, num_rows, num_cols, RowMajor> >
+    const Eigen::Matrix<double, num_rows, num_cols, Eigen::RowMajor> >
   ConstMatrixRef;
 
   typedef Eigen::Map <
diff --git a/include/ceres/internal/fixed_array.h b/include/ceres/internal/fixed_array.h
index fa4a339..ee264d1 100644
--- a/include/ceres/internal/fixed_array.h
+++ b/include/ceres/internal/fixed_array.h
@@ -33,10 +33,10 @@
 #define CERES_PUBLIC_INTERNAL_FIXED_ARRAY_H_
 
 #include <cstddef>
-#include <glog/logging.h>
 #include "Eigen/Core"
 #include "ceres/internal/macros.h"
 #include "ceres/internal/manual_constructor.h"
+#include "glog/logging.h"
 
 namespace ceres {
 namespace internal {
diff --git a/include/ceres/internal/macros.h b/include/ceres/internal/macros.h
index 83ec311..388cf30 100644
--- a/include/ceres/internal/macros.h
+++ b/include/ceres/internal/macros.h
@@ -132,16 +132,16 @@ char (&ArraySizeHelper(const T (&array)[N]))[N];
 // - wan 2005-11-16
 //
 // Starting with Visual C++ 2005, WinNT.h includes ARRAYSIZE. However,
-// the definition comes from the over-broad windows.h header that 
+// the definition comes from the over-broad windows.h header that
 // introduces a macro, ERROR, that conflicts with the logging framework
 // that Ceres uses. Instead, rename ARRAYSIZE to CERES_ARRAYSIZE.
-#define CERES_ARRAYSIZE(a) \
-  ((sizeof(a) / sizeof(*(a))) / \
+#define CERES_ARRAYSIZE(a)                              \
+  ((sizeof(a) / sizeof(*(a))) /                         \
    static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
 
-// Tell the compiler to warn about unused return values for functions declared
-// with this macro.  The macro should be used on function declarations
-// following the argument list:
+// Tell the compiler to warn about unused return values for functions
+// declared with this macro.  The macro should be used on function
+// declarations following the argument list:
 //
 //   Sprocket* AllocateSprocket() MUST_USE_RESULT;
 //
diff --git a/include/ceres/internal/numeric_diff.h b/include/ceres/internal/numeric_diff.h
new file mode 100644
index 0000000..4058366
--- /dev/null
+++ b/include/ceres/internal/numeric_diff.h
@@ -0,0 +1,199 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+//         mierle@gmail.com (Keir Mierle)
+//
+// Finite differencing routine used by NumericDiffCostFunction.
+
+#ifndef CERES_PUBLIC_INTERNAL_NUMERIC_DIFF_H_
+#define CERES_PUBLIC_INTERNAL_NUMERIC_DIFF_H_
+
+#include <cstring>
+
+#include "Eigen/Dense"
+#include "ceres/cost_function.h"
+#include "ceres/internal/scoped_ptr.h"
+#include "ceres/internal/variadic_evaluate.h"
+#include "ceres/types.h"
+#include "glog/logging.h"
+
+
+namespace ceres {
+namespace internal {
+
+// Helper templates that allow evaluation of a variadic functor or a
+// CostFunction object.
+template <typename CostFunctor,
+          int N0, int N1, int N2, int N3, int N4,
+          int N5, int N6, int N7, int N8, int N9 >
+bool EvaluateImpl(const CostFunctor* functor,
+                  double const* const* parameters,
+                  double* residuals,
+                  const void* /* NOT USED */) {
+  return VariadicEvaluate<CostFunctor,
+                          double,
+                          N0, N1, N2, N3, N4, N5, N6, N7, N8, N9>::Call(
+                              *functor,
+                              parameters,
+                              residuals);
+}
+
+template <typename CostFunctor,
+          int N0, int N1, int N2, int N3, int N4,
+          int N5, int N6, int N7, int N8, int N9 >
+bool EvaluateImpl(const CostFunctor* functor,
+                  double const* const* parameters,
+                  double* residuals,
+                  const CostFunction* /* NOT USED */) {
+  return functor->Evaluate(parameters, residuals, NULL);
+}
+
+// This is split from the main class because C++ doesn't allow partial template
+// specializations for member functions. The alternative is to repeat the main
+// class for differing numbers of parameters, which is also unfortunate.
+template <typename CostFunctor,
+          NumericDiffMethod kMethod,
+          int kNumResiduals,
+          int N0, int N1, int N2, int N3, int N4,
+          int N5, int N6, int N7, int N8, int N9,
+          int kParameterBlock,
+          int kParameterBlockSize>
+struct NumericDiff {
+  // Mutates parameters but must restore them before return.
+  static bool EvaluateJacobianForParameterBlock(
+      const CostFunctor* functor,
+      double const* residuals_at_eval_point,
+      const double relative_step_size,
+      double **parameters,
+      double *jacobian) {
+    using Eigen::Map;
+    using Eigen::Matrix;
+    using Eigen::RowMajor;
+    using Eigen::ColMajor;
+
+    typedef Matrix<double, kNumResiduals, 1> ResidualVector;
+    typedef Matrix<double, kParameterBlockSize, 1> ParameterVector;
+    typedef Matrix<double, kNumResiduals, kParameterBlockSize,
+                   (kParameterBlockSize == 1 &&
+                    kNumResiduals > 1) ? ColMajor : RowMajor> JacobianMatrix;
+
+
+    Map<JacobianMatrix> parameter_jacobian(jacobian,
+                                           kNumResiduals,
+                                           kParameterBlockSize);
+
+    // Mutate 1 element at a time and then restore.
+    Map<ParameterVector> x_plus_delta(parameters[kParameterBlock],
+                                      kParameterBlockSize);
+    ParameterVector x(x_plus_delta);
+    ParameterVector step_size = x.array().abs() * relative_step_size;
+
+    // To handle cases where a parameter is exactly zero, instead use
+    // the mean step_size for the other dimensions. If all the
+    // parameters are zero, there's no good answer. Take
+    // relative_step_size as a guess and hope for the best.
+    const double fallback_step_size =
+        (step_size.sum() == 0)
+        ? relative_step_size
+        : step_size.sum() / step_size.rows();
+
+    // For each parameter in the parameter block, use finite differences to
+    // compute the derivative for that parameter.
+    for (int j = 0; j < kParameterBlockSize; ++j) {
+      const double delta =
+          (step_size(j) == 0.0) ? fallback_step_size : step_size(j);
+
+      x_plus_delta(j) = x(j) + delta;
+
+      double residuals[kNumResiduals];  // NOLINT
+
+      if (!EvaluateImpl<CostFunctor, N0, N1, N2, N3, N4, N5, N6, N7, N8, N9>(
+              functor, parameters, residuals, functor)) {
+        return false;
+      }
+
+      // Compute this column of the jacobian in 3 steps:
+      // 1. Store residuals for the forward part.
+      // 2. Subtract residuals for the backward (or 0) part.
+      // 3. Divide out the run.
+      parameter_jacobian.col(j) =
+          Map<const ResidualVector>(residuals, kNumResiduals);
+
+      double one_over_delta = 1.0 / delta;
+      if (kMethod == CENTRAL) {
+        // Compute the function on the other side of x(j).
+        x_plus_delta(j) = x(j) - delta;
+
+        if (!EvaluateImpl<CostFunctor, N0, N1, N2, N3, N4, N5, N6, N7, N8, N9>(
+                functor, parameters, residuals, functor)) {
+          return false;
+        }
+
+        parameter_jacobian.col(j) -=
+            Map<ResidualVector>(residuals, kNumResiduals, 1);
+        one_over_delta /= 2;
+      } else {
+        // Forward difference only; reuse existing residuals evaluation.
+        parameter_jacobian.col(j) -=
+            Map<const ResidualVector>(residuals_at_eval_point, kNumResiduals);
+      }
+      x_plus_delta(j) = x(j);  // Restore x_plus_delta.
+
+      // Divide out the run to get slope.
+      parameter_jacobian.col(j) *= one_over_delta;
+    }
+    return true;
+  }
+};
+
+template <typename CostFunctor,
+          NumericDiffMethod kMethod,
+          int kNumResiduals,
+          int N0, int N1, int N2, int N3, int N4,
+          int N5, int N6, int N7, int N8, int N9,
+          int kParameterBlock>
+struct NumericDiff<CostFunctor, kMethod, kNumResiduals,
+                   N0, N1, N2, N3, N4, N5, N6, N7, N8, N9,
+                   kParameterBlock, 0> {
+  // Mutates parameters but must restore them before return.
+  static bool EvaluateJacobianForParameterBlock(
+      const CostFunctor* functor,
+      double const* residuals_at_eval_point,
+      const double relative_step_size,
+      double **parameters,
+      double *jacobian) {
+    LOG(FATAL) << "Control should never reach here.";
+    return true;
+  }
+};
+
+}  // namespace internal
+}  // namespace ceres
+
+#endif  // CERES_PUBLIC_INTERNAL_NUMERIC_DIFF_H_
diff --git a/include/ceres/internal/scoped_ptr.h b/include/ceres/internal/scoped_ptr.h
index 44f198b..5dfb551 100644
--- a/include/ceres/internal/scoped_ptr.h
+++ b/include/ceres/internal/scoped_ptr.h
@@ -38,6 +38,7 @@
 #include <assert.h>
 #include <stdlib.h>
 #include <cstddef>
+#include <algorithm>
 
 namespace ceres {
 namespace internal {
@@ -49,18 +50,17 @@ template <class C> class scoped_array;
 template <class C>
 scoped_ptr<C> make_scoped_ptr(C *);
 
-// A scoped_ptr<T> is like a T*, except that the destructor of scoped_ptr<T>
-// automatically deletes the pointer it holds (if any). That is, scoped_ptr<T>
-// owns the T object that it points to. Like a T*, a scoped_ptr<T> may hold
-// either NULL or a pointer to a T object. Also like T*, scoped_ptr<T> is
-// thread-compatible, and once you dereference it, you get the threadsafety
-// guarantees of T.
+// A scoped_ptr<T> is like a T*, except that the destructor of
+// scoped_ptr<T> automatically deletes the pointer it holds (if
+// any). That is, scoped_ptr<T> owns the T object that it points
+// to. Like a T*, a scoped_ptr<T> may hold either NULL or a pointer to
+// a T object. Also like T*, scoped_ptr<T> is thread-compatible, and
+// once you dereference it, you get the threadsafety guarantees of T.
 //
 // The size of a scoped_ptr is small: sizeof(scoped_ptr<C>) == sizeof(C*)
 template <class C>
 class scoped_ptr {
  public:
-
   // The element type
   typedef C element_type;
 
@@ -193,7 +193,6 @@ scoped_ptr<C> make_scoped_ptr(C *p) {
 template <class C>
 class scoped_array {
  public:
-
   // The element type
   typedef C element_type;
 
diff --git a/include/ceres/internal/variadic_evaluate.h b/include/ceres/internal/variadic_evaluate.h
new file mode 100644
index 0000000..9a473d5
--- /dev/null
+++ b/include/ceres/internal/variadic_evaluate.h
@@ -0,0 +1,182 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+//         mierle@gmail.com (Keir Mierle)
+
+#ifndef CERES_PUBLIC_INTERNAL_VARIADIC_EVALUATE_H_
+#define CERES_PUBLIC_INTERNAL_VARIADIC_EVALUATE_H_
+
+#include <stddef.h>
+
+#include "ceres/jet.h"
+#include "ceres/internal/eigen.h"
+#include "ceres/internal/fixed_array.h"
+#include "glog/logging.h"
+
+namespace ceres {
+namespace internal {
+
+// This block of quasi-repeated code calls the user-supplied functor, which may
+// take a variable number of arguments. This is accomplished by specializing the
+// struct based on the size of the trailing parameters; parameters with 0 size
+// are assumed missing.
+template<typename Functor, typename T, int N0, int N1, int N2, int N3, int N4,
+         int N5, int N6, int N7, int N8, int N9>
+struct VariadicEvaluate {
+  static bool Call(const Functor& functor, T const *const *input, T* output) {
+    return functor(input[0],
+                   input[1],
+                   input[2],
+                   input[3],
+                   input[4],
+                   input[5],
+                   input[6],
+                   input[7],
+                   input[8],
+                   input[9],
+                   output);
+  }
+};
+
+template<typename Functor, typename T, int N0, int N1, int N2, int N3, int N4,
+         int N5, int N6, int N7, int N8>
+struct VariadicEvaluate<Functor, T, N0, N1, N2, N3, N4, N5, N6, N7, N8, 0> {
+  static bool Call(const Functor& functor, T const *const *input, T* output) {
+    return functor(input[0],
+                   input[1],
+                   input[2],
+                   input[3],
+                   input[4],
+                   input[5],
+                   input[6],
+                   input[7],
+                   input[8],
+                   output);
+  }
+};
+
+template<typename Functor, typename T, int N0, int N1, int N2, int N3, int N4,
+         int N5, int N6, int N7>
+struct VariadicEvaluate<Functor, T, N0, N1, N2, N3, N4, N5, N6, N7, 0, 0> {
+  static bool Call(const Functor& functor, T const *const *input, T* output) {
+    return functor(input[0],
+                   input[1],
+                   input[2],
+                   input[3],
+                   input[4],
+                   input[5],
+                   input[6],
+                   input[7],
+                   output);
+  }
+};
+
+template<typename Functor, typename T, int N0, int N1, int N2, int N3, int N4,
+         int N5, int N6>
+struct VariadicEvaluate<Functor, T, N0, N1, N2, N3, N4, N5, N6, 0, 0, 0> {
+  static bool Call(const Functor& functor, T const *const *input, T* output) {
+    return functor(input[0],
+                   input[1],
+                   input[2],
+                   input[3],
+                   input[4],
+                   input[5],
+                   input[6],
+                   output);
+  }
+};
+
+template<typename Functor, typename T, int N0, int N1, int N2, int N3, int N4,
+         int N5>
+struct VariadicEvaluate<Functor, T, N0, N1, N2, N3, N4, N5, 0, 0, 0, 0> {
+  static bool Call(const Functor& functor, T const *const *input, T* output) {
+    return functor(input[0],
+                   input[1],
+                   input[2],
+                   input[3],
+                   input[4],
+                   input[5],
+                   output);
+  }
+};
+
+template<typename Functor, typename T, int N0, int N1, int N2, int N3, int N4>
+struct VariadicEvaluate<Functor, T, N0, N1, N2, N3, N4, 0, 0, 0, 0, 0> {
+  static bool Call(const Functor& functor, T const *const *input, T* output) {
+    return functor(input[0],
+                   input[1],
+                   input[2],
+                   input[3],
+                   input[4],
+                   output);
+  }
+};
+
+template<typename Functor, typename T, int N0, int N1, int N2, int N3>
+struct VariadicEvaluate<Functor, T, N0, N1, N2, N3, 0, 0, 0, 0, 0, 0> {
+  static bool Call(const Functor& functor, T const *const *input, T* output) {
+    return functor(input[0],
+                   input[1],
+                   input[2],
+                   input[3],
+                   output);
+  }
+};
+
+template<typename Functor, typename T, int N0, int N1, int N2>
+struct VariadicEvaluate<Functor, T, N0, N1, N2, 0, 0, 0, 0, 0, 0, 0> {
+  static bool Call(const Functor& functor, T const *const *input, T* output) {
+    return functor(input[0],
+                   input[1],
+                   input[2],
+                   output);
+  }
+};
+
+template<typename Functor, typename T, int N0, int N1>
+struct VariadicEvaluate<Functor, T, N0, N1, 0, 0, 0, 0, 0, 0, 0, 0> {
+  static bool Call(const Functor& functor, T const *const *input, T* output) {
+    return functor(input[0],
+                   input[1],
+                   output);
+  }
+};
+
+template<typename Functor, typename T, int N0>
+struct VariadicEvaluate<Functor, T, N0, 0, 0, 0, 0, 0, 0, 0, 0, 0> {
+  static bool Call(const Functor& functor, T const *const *input, T* output) {
+    return functor(input[0],
+                   output);
+  }
+};
+
+}  // namespace internal
+}  // namespace ceres
+
+#endif  // CERES_PUBLIC_INTERNAL_VARIADIC_EVALUATE_H_
diff --git a/include/ceres/iteration_callback.h b/include/ceres/iteration_callback.h
index 57cf0a6..987c2d9 100644
--- a/include/ceres/iteration_callback.h
+++ b/include/ceres/iteration_callback.h
@@ -52,6 +52,10 @@ struct IterationSummary {
         gradient_max_norm(0.0),
         step_norm(0.0),
         eta(0.0),
+        step_size(0.0),
+        line_search_function_evaluations(0),
+        line_search_gradient_evaluations(0),
+        line_search_iterations(0),
         linear_solver_iterations(0),
         iteration_time_in_seconds(0.0),
         step_solver_time_in_seconds(0.0),
@@ -116,10 +120,24 @@ struct IterationSummary {
   // ignore it.
   double eta;
 
+  // Step sized computed by the line search algorithm.
+  double step_size;
+
+  // Number of function value evaluations used by the line search algorithm.
+  int line_search_function_evaluations;
+
+  // Number of function gradient evaluations used by the line search algorithm.
+  int line_search_gradient_evaluations;
+
+  // Number of iterations taken by the line search algorithm.
+  int line_search_iterations;
+
   // Number of iterations taken by the linear solver to solve for the
   // Newton step.
   int linear_solver_iterations;
 
+  // All times reported below are wall times.
+
   // Time (in seconds) spent inside the minimizer loop in the current
   // iteration.
   double iteration_time_in_seconds;
diff --git a/include/ceres/jet.h b/include/ceres/jet.h
index 96e2256..4d2a857 100644
--- a/include/ceres/jet.h
+++ b/include/ceres/jet.h
@@ -348,8 +348,12 @@ Jet<T, N> operator/(const Jet<T, N>& f,
   //   b + v   (b + v)(b - v)        b^2
   //
   // which holds because v*v = 0.
-  h.a = f.a / g.a;
-  h.v = (f.v - f.a / g.a * g.v) / g.a;
+  const T g_a_inverse = T(1.0) / g.a;
+  h.a = f.a * g_a_inverse;
+  const T f_a_by_g_a = f.a * g_a_inverse;
+  for (int i = 0; i < N; ++i) {
+    h.v[i] = (f.v[i] - f_a_by_g_a * g.v[i]) * g_a_inverse;
+  }
   return h;
 }
 
@@ -358,7 +362,8 @@ template<typename T, int N> inline
 Jet<T, N> operator/(T s, const Jet<T, N>& g) {
   Jet<T, N> h;
   h.a = s / g.a;
-  h.v = - s * g.v / (g.a * g.a);
+  const T minus_s_g_a_inverse2 = -s / (g.a * g.a);
+  h.v = g.v * minus_s_g_a_inverse2;
   return h;
 }
 
@@ -366,8 +371,9 @@ Jet<T, N> operator/(T s, const Jet<T, N>& g) {
 template<typename T, int N> inline
 Jet<T, N> operator/(const Jet<T, N>& f, T s) {
   Jet<T, N> h;
-  h.a = f.a / s;
-  h.v = f.v / s;
+  const T s_inverse = 1.0 / s;
+  h.a = f.a * s_inverse;
+  h.v = f.v * s_inverse;
   return h;
 }
 
@@ -399,7 +405,6 @@ CERES_DEFINE_JET_COMPARISON_OPERATOR( != )  // NOLINT
 // double-valued and Jet-valued functions, but we are not allowed to put
 // Jet-valued functions inside namespace std.
 //
-// Missing: cosh, sinh, tanh, tan
 // TODO(keir): Switch to "using".
 inline double abs     (double x) { return std::abs(x);      }
 inline double log     (double x) { return std::log(x);      }
@@ -409,6 +414,11 @@ inline double cos     (double x) { return std::cos(x);      }
 inline double acos    (double x) { return std::acos(x);     }
 inline double sin     (double x) { return std::sin(x);      }
 inline double asin    (double x) { return std::asin(x);     }
+inline double tan     (double x) { return std::tan(x);      }
+inline double atan    (double x) { return std::atan(x);     }
+inline double sinh    (double x) { return std::sinh(x);     }
+inline double cosh    (double x) { return std::cosh(x);     }
+inline double tanh    (double x) { return std::tanh(x);     }
 inline double pow  (double x, double y) { return std::pow(x, y);   }
 inline double atan2(double y, double x) { return std::atan2(y, x); }
 
@@ -425,7 +435,8 @@ template <typename T, int N> inline
 Jet<T, N> log(const Jet<T, N>& f) {
   Jet<T, N> g;
   g.a = log(f.a);
-  g.v = f.v / f.a;
+  const T a_inverse = T(1.0) / f.a;
+  g.v = f.v * a_inverse;
   return g;
 }
 
@@ -443,7 +454,8 @@ template <typename T, int N> inline
 Jet<T, N> sqrt(const Jet<T, N>& f) {
   Jet<T, N> g;
   g.a = sqrt(f.a);
-  g.v = f.v / (T(2.0) * g.a);
+  const T two_a_inverse = T(1.0) / (T(2.0) * g.a);
+  g.v = f.v * two_a_inverse;
   return g;
 }
 
@@ -452,7 +464,7 @@ template <typename T, int N> inline
 Jet<T, N> cos(const Jet<T, N>& f) {
   Jet<T, N> g;
   g.a = cos(f.a);
-  T sin_a = sin(f.a);
+  const T sin_a = sin(f.a);
   g.v = - sin_a * f.v;
   return g;
 }
@@ -462,7 +474,8 @@ template <typename T, int N> inline
 Jet<T, N> acos(const Jet<T, N>& f) {
   Jet<T, N> g;
   g.a = acos(f.a);
-  g.v = - T(1.0) / sqrt(T(1.0) - f.a * f.a) * f.v;
+  const T tmp = - T(1.0) / sqrt(T(1.0) - f.a * f.a);
+  g.v = tmp * f.v;
   return g;
 }
 
@@ -471,7 +484,7 @@ template <typename T, int N> inline
 Jet<T, N> sin(const Jet<T, N>& f) {
   Jet<T, N> g;
   g.a = sin(f.a);
-  T cos_a = cos(f.a);
+  const T cos_a = cos(f.a);
   g.v = cos_a * f.v;
   return g;
 }
@@ -481,7 +494,60 @@ template <typename T, int N> inline
 Jet<T, N> asin(const Jet<T, N>& f) {
   Jet<T, N> g;
   g.a = asin(f.a);
-  g.v = T(1.0) / sqrt(T(1.0) - f.a * f.a) * f.v;
+  const T tmp = T(1.0) / sqrt(T(1.0) - f.a * f.a);
+  g.v = tmp * f.v;
+  return g;
+}
+
+// tan(a + h) ~= tan(a) + (1 + tan(a)^2) h
+template <typename T, int N> inline
+Jet<T, N> tan(const Jet<T, N>& f) {
+  Jet<T, N> g;
+  g.a = tan(f.a);
+  double tan_a = tan(f.a);
+  const T tmp = T(1.0) + tan_a * tan_a;
+  g.v = tmp * f.v;
+  return g;
+}
+
+// atan(a + h) ~= atan(a) + 1 / (1 + a^2) h
+template <typename T, int N> inline
+Jet<T, N> atan(const Jet<T, N>& f) {
+  Jet<T, N> g;
+  g.a = atan(f.a);
+  const T tmp = T(1.0) / (T(1.0) + f.a * f.a);
+  g.v = tmp * f.v;
+  return g;
+}
+
+// sinh(a + h) ~= sinh(a) + cosh(a) h
+template <typename T, int N> inline
+Jet<T, N> sinh(const Jet<T, N>& f) {
+  Jet<T, N> g;
+  g.a = sinh(f.a);
+  const T cosh_a = cosh(f.a);
+  g.v = cosh_a * f.v;
+  return g;
+}
+
+// cosh(a + h) ~= cosh(a) + sinh(a) h
+template <typename T, int N> inline
+Jet<T, N> cosh(const Jet<T, N>& f) {
+  Jet<T, N> g;
+  g.a = cosh(f.a);
+  const T sinh_a = sinh(f.a);
+  g.v = sinh_a * f.v;
+  return g;
+}
+
+// tanh(a + h) ~= tanh(a) + (1 - tanh(a)^2) h
+template <typename T, int N> inline
+Jet<T, N> tanh(const Jet<T, N>& f) {
+  Jet<T, N> g;
+  g.a = tanh(f.a);
+  double tanh_a = tanh(f.a);
+  const T tmp = T(1.0) - tanh_a * tanh_a;
+  g.v = tmp * f.v;
   return g;
 }
 
@@ -635,6 +701,11 @@ template<typename T, int N> inline       Jet<T, N>  ei_exp (const Jet<T, N>& x)
 template<typename T, int N> inline       Jet<T, N>  ei_log (const Jet<T, N>& x) { return log(x);         }  // NOLINT
 template<typename T, int N> inline       Jet<T, N>  ei_sin (const Jet<T, N>& x) { return sin(x);         }  // NOLINT
 template<typename T, int N> inline       Jet<T, N>  ei_cos (const Jet<T, N>& x) { return cos(x);         }  // NOLINT
+template<typename T, int N> inline       Jet<T, N>  ei_tan (const Jet<T, N>& x) { return tan(x);         }  // NOLINT
+template<typename T, int N> inline       Jet<T, N>  ei_atan(const Jet<T, N>& x) { return atan(x);        }  // NOLINT
+template<typename T, int N> inline       Jet<T, N>  ei_sinh(const Jet<T, N>& x) { return sinh(x);        }  // NOLINT
+template<typename T, int N> inline       Jet<T, N>  ei_cosh(const Jet<T, N>& x) { return cosh(x);        }  // NOLINT
+template<typename T, int N> inline       Jet<T, N>  ei_tanh(const Jet<T, N>& x) { return tanh(x);        }  // NOLINT
 template<typename T, int N> inline       Jet<T, N>  ei_pow (const Jet<T, N>& x, Jet<T, N> y) { return pow(x, y); }  // NOLINT
 
 // Note: This has to be in the ceres namespace for argument dependent lookup to
diff --git a/include/ceres/loss_function.h b/include/ceres/loss_function.h
index 0c0ceaa..b99c184 100644
--- a/include/ceres/loss_function.h
+++ b/include/ceres/loss_function.h
@@ -75,10 +75,10 @@
 #ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
 #define CERES_PUBLIC_LOSS_FUNCTION_H_
 
-#include <glog/logging.h>
 #include "ceres/internal/macros.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/types.h"
+#include "glog/logging.h"
 
 namespace ceres {
 
@@ -347,19 +347,20 @@ class ScaledLoss : public LossFunction {
 //
 //  CostFunction* cost_function =
 //    new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
-//      new UW_Camera_Mapper(data->observations[2*i + 0],
-//                           data->observations[2*i + 1]));
+//      new UW_Camera_Mapper(feature_x, feature_y));
 //
 //  LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
 //
 //  problem.AddResidualBlock(cost_function, loss_function, parameters);
 //
 //  Solver::Options options;
-//  scoped_ptr<Solver::Summary> summary1(Solve(problem, options));
+//  Solger::Summary summary;
+//
+//  Solve(options, &problem, &summary)
 //
 //  loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
 //
-//  scoped_ptr<Solver::Summary> summary2(Solve(problem, options));
+//  Solve(options, &problem, &summary)
 //
 class LossFunctionWrapper : public LossFunction {
  public:
diff --git a/include/ceres/numeric_diff_cost_function.h b/include/ceres/numeric_diff_cost_function.h
index 8544e44..a47a66d 100644
--- a/include/ceres/numeric_diff_cost_function.h
+++ b/include/ceres/numeric_diff_cost_function.h
@@ -27,19 +27,111 @@
 // POSSIBILITY OF SUCH DAMAGE.
 //
 // Author: keir@google.com (Keir Mierle)
+//         sameeragarwal@google.com (Sameer Agarwal)
 //
 // Create CostFunctions as needed by the least squares framework with jacobians
 // computed via numeric (a.k.a. finite) differentiation. For more details see
 // http://en.wikipedia.org/wiki/Numerical_differentiation.
 //
-// To get a numerically differentiated cost function, define a subclass of
-// CostFunction such that the Evaluate() function ignores the jacobian
-// parameter. The numeric differentiation wrapper will fill in the jacobian
-// parameter if nececssary by repeatedly calling the Evaluate() function with
-// small changes to the appropriate parameters, and computing the slope. For
-// performance, the numeric differentiation wrapper class is templated on the
-// concrete cost function, even though it could be implemented only in terms of
-// the virtual CostFunction interface.
+// To get an numerically differentiated cost function, you must define
+// a class with a operator() (a functor) that computes the residuals.
+//
+// The function must write the computed value in the last argument
+// (the only non-const one) and return true to indicate success.
+// Please see cost_function.h for details on how the return value
+// maybe used to impose simple constraints on the parameter block.
+//
+// For example, consider a scalar error e = k - x'y, where both x and y are
+// two-dimensional column vector parameters, the prime sign indicates
+// transposition, and k is a constant. The form of this error, which is the
+// difference between a constant and an expression, is a common pattern in least
+// squares problems. For example, the value x'y might be the model expectation
+// for a series of measurements, where there is an instance of the cost function
+// for each measurement k.
+//
+// The actual cost added to the total problem is e^2, or (k - x'k)^2; however,
+// the squaring is implicitly done by the optimization framework.
+//
+// To write an numerically-differentiable cost function for the above model, first
+// define the object
+//
+//   class MyScalarCostFunctor {
+//     MyScalarCostFunctor(double k): k_(k) {}
+//
+//     bool operator()(const double* const x,
+//                     const double* const y,
+//                     double* residuals) const {
+//       residuals[0] = k_ - x[0] * y[0] + x[1] * y[1];
+//       return true;
+//     }
+//
+//    private:
+//     double k_;
+//   };
+//
+// Note that in the declaration of operator() the input parameters x
+// and y come first, and are passed as const pointers to arrays of
+// doubles. If there were three input parameters, then the third input
+// parameter would come after y. The output is always the last
+// parameter, and is also a pointer to an array. In the example above,
+// the residual is a scalar, so only residuals[0] is set.
+//
+// Then given this class definition, the numerically differentiated
+// cost function with central differences used for computing the
+// derivative can be constructed as follows.
+//
+//   CostFunction* cost_function
+//       = new NumericDiffCostFunction<MyScalarCostFunctor, CENTRAL, 1, 2, 2>(
+//           new MyScalarCostFunctor(1.0));                    ^     ^  ^  ^
+//                                                             |     |  |  |
+//                                 Finite Differencing Scheme -+     |  |  |
+//                                 Dimension of residual ------------+  |  |
+//                                 Dimension of x ----------------------+  |
+//                                 Dimension of y -------------------------+
+//
+// In this example, there is usually an instance for each measurement of k.
+//
+// In the instantiation above, the template parameters following
+// "MyScalarCostFunctor", "1, 2, 2", describe the functor as computing
+// a 1-dimensional output from two arguments, both 2-dimensional.
+//
+// The framework can currently accommodate cost functions of up to 10
+// independent variables, and there is no limit on the dimensionality
+// of each of them.
+//
+// The central difference method is considerably more accurate at the cost of
+// twice as many function evaluations than forward difference. Consider using
+// central differences begin with, and only after that works, trying forward
+// difference to improve performance.
+//
+// TODO(sameeragarwal): Add support for dynamic number of residuals.
+//
+// WARNING #1: A common beginner's error when first using
+// NumericDiffCostFunction is to get the sizing wrong. In particular,
+// there is a tendency to set the template parameters to (dimension of
+// residual, number of parameters) instead of passing a dimension
+// parameter for *every parameter*. In the example above, that would
+// be <MyScalarCostFunctor, 1, 2>, which is missing the last '2'
+// argument. Please be careful when setting the size parameters.
+//
+////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////
+//
+// ALTERNATE INTERFACE
+//
+// For a variety of reason, including compatibility with legacy code,
+// NumericDiffCostFunction can also take CostFunction objects as
+// input. The following describes how.
+//
+// To get a numerically differentiated cost function, define a
+// subclass of CostFunction such that the Evaluate() function ignores
+// the jacobian parameter. The numeric differentiation wrapper will
+// fill in the jacobian parameter if necessary by repeatedly calling
+// the Evaluate() function with small changes to the appropriate
+// parameters, and computing the slope. For performance, the numeric
+// differentiation wrapper class is templated on the concrete cost
+// function, even though it could be implemented only in terms of the
+// virtual CostFunction interface.
 //
 // The numerically differentiated version of a cost function for a cost function
 // can be constructed as follows:
@@ -51,233 +143,153 @@
 // where MyCostFunction has 1 residual and 2 parameter blocks with sizes 4 and 8
 // respectively. Look at the tests for a more detailed example.
 //
-// The central difference method is considerably more accurate at the cost of
-// twice as many function evaluations than forward difference. Consider using
-// central differences begin with, and only after that works, trying forward
-// difference to improve performance.
-//
 // TODO(keir): Characterize accuracy; mention pitfalls; provide alternatives.
 
 #ifndef CERES_PUBLIC_NUMERIC_DIFF_COST_FUNCTION_H_
 #define CERES_PUBLIC_NUMERIC_DIFF_COST_FUNCTION_H_
 
-#include <cstring>
-#include <glog/logging.h>
 #include "Eigen/Dense"
+#include "ceres/cost_function.h"
+#include "ceres/internal/numeric_diff.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/sized_cost_function.h"
 #include "ceres/types.h"
+#include "glog/logging.h"
 
 namespace ceres {
 
-enum NumericDiffMethod {
-  CENTRAL,
-  FORWARD
-};
-
-// This is split from the main class because C++ doesn't allow partial template
-// specializations for member functions. The alternative is to repeat the main
-// class for differing numbers of parameters, which is also unfortunate.
-template <typename CostFunctionNoJacobian,
-          int num_residuals,
-          int parameter_block_size,
-          int parameter_block,
-          NumericDiffMethod method>
-struct Differencer {
-  // Mutates parameters but must restore them before return.
-  static bool EvaluateJacobianForParameterBlock(
-      const CostFunctionNoJacobian *function,
-      double const* residuals_at_eval_point,
-      double **parameters,
-      double **jacobians) {
-    using Eigen::Map;
-    using Eigen::Matrix;
-    using Eigen::RowMajor;
-    using Eigen::ColMajor;
-
-    typedef Matrix<double, num_residuals, 1> ResidualVector;
-    typedef Matrix<double, parameter_block_size, 1> ParameterVector;
-    typedef Matrix<double, num_residuals, parameter_block_size,
-                   (parameter_block_size == 1 &&
-                    num_residuals > 1) ? ColMajor : RowMajor> JacobianMatrix;
-
-    Map<JacobianMatrix> parameter_jacobian(jacobians[parameter_block],
-                                           num_residuals,
-                                           parameter_block_size);
-
-    // Mutate 1 element at a time and then restore.
-    Map<ParameterVector> x_plus_delta(parameters[parameter_block],
-                                      parameter_block_size);
-    ParameterVector x(x_plus_delta);
-
-    // TODO(keir): Pick a smarter number! In theory a good choice is sqrt(eps) *
-    // x, which for doubles means about 1e-8 * x. However, I have found this
-    // number too optimistic. This number should be exposed for users to change.
-    const double kRelativeStepSize = 1e-6;
-
-    ParameterVector step_size = x.array().abs() * kRelativeStepSize;
-
-    // To handle cases where a parameter is exactly zero, instead use the mean
-    // step_size for the other dimensions.
-    double fallback_step_size = step_size.sum() / step_size.rows();
-    if (fallback_step_size == 0.0) {
-      // If all the parameters are zero, there's no good answer. Take
-      // kRelativeStepSize as a guess and hope for the best.
-      fallback_step_size = kRelativeStepSize;
-    }
-
-    // For each parameter in the parameter block, use finite differences to
-    // compute the derivative for that parameter.
-    for (int j = 0; j < parameter_block_size; ++j) {
-      if (step_size(j) == 0.0) {
-        // The parameter is exactly zero, so compromise and use the mean
-        // step_size from the other parameters. This can break in many cases,
-        // but it's hard to pick a good number without problem specific
-        // knowledge.
-        step_size(j) = fallback_step_size;
-      }
-      x_plus_delta(j) = x(j) + step_size(j);
-
-      double residuals[num_residuals];  // NOLINT
-      if (!function->Evaluate(parameters, residuals, NULL)) {
-        // Something went wrong; bail.
-        return false;
-      }
-
-      // Compute this column of the jacobian in 3 steps:
-      // 1. Store residuals for the forward part.
-      // 2. Subtract residuals for the backward (or 0) part.
-      // 3. Divide out the run.
-      parameter_jacobian.col(j) =
-          Map<const ResidualVector>(residuals, num_residuals);
-
-      double one_over_h = 1 / step_size(j);
-      if (method == CENTRAL) {
-        // Compute the function on the other side of x(j).
-        x_plus_delta(j) = x(j) - step_size(j);
-
-        if (!function->Evaluate(parameters, residuals, NULL)) {
-          // Something went wrong; bail.
-          return false;
-        }
-        parameter_jacobian.col(j) -=
-            Map<ResidualVector>(residuals, num_residuals, 1);
-        one_over_h /= 2;
-      } else {
-        // Forward difference only; reuse existing residuals evaluation.
-        parameter_jacobian.col(j) -=
-            Map<const ResidualVector>(residuals_at_eval_point, num_residuals);
-      }
-      x_plus_delta(j) = x(j);  // Restore x_plus_delta.
-
-      // Divide out the run to get slope.
-      parameter_jacobian.col(j) *= one_over_h;
-    }
-    return true;
-  }
-};
-
-// Prevent invalid instantiations.
-template <typename CostFunctionNoJacobian,
-          int num_residuals,
-          int parameter_block,
-          NumericDiffMethod method>
-struct Differencer<CostFunctionNoJacobian,
-                  num_residuals,
-                  0 /* parameter_block_size */,
-                  parameter_block,
-                  method> {
-  static bool EvaluateJacobianForParameterBlock(
-      const CostFunctionNoJacobian *function,
-      double const* residuals_at_eval_point,
-      double **parameters,
-      double **jacobians) {
-    LOG(FATAL) << "Shouldn't get here.";
-    return true;
-  }
-};
-
-template <typename CostFunctionNoJacobian,
-         NumericDiffMethod method = CENTRAL, int M = 0,
-         int N0 = 0, int N1 = 0, int N2 = 0, int N3 = 0, int N4 = 0, int N5 = 0>
+template <typename CostFunctor,
+          NumericDiffMethod method = CENTRAL,
+          int kNumResiduals = 0,  // Number of residuals, or ceres::DYNAMIC
+          int N0 = 0,   // Number of parameters in block 0.
+          int N1 = 0,   // Number of parameters in block 1.
+          int N2 = 0,   // Number of parameters in block 2.
+          int N3 = 0,   // Number of parameters in block 3.
+          int N4 = 0,   // Number of parameters in block 4.
+          int N5 = 0,   // Number of parameters in block 5.
+          int N6 = 0,   // Number of parameters in block 6.
+          int N7 = 0,   // Number of parameters in block 7.
+          int N8 = 0,   // Number of parameters in block 8.
+          int N9 = 0>   // Number of parameters in block 9.
 class NumericDiffCostFunction
-    : public SizedCostFunction<M, N0, N1, N2, N3, N4, N5> {
+    : public SizedCostFunction<kNumResiduals,
+                               N0, N1, N2, N3, N4,
+                               N5, N6, N7, N8, N9> {
  public:
-  NumericDiffCostFunction(CostFunctionNoJacobian* function,
-                          Ownership ownership)
-      : function_(function), ownership_(ownership) {}
+  NumericDiffCostFunction(CostFunctor* functor,
+                          const double relative_step_size = 1e-6)
+      :functor_(functor),
+       ownership_(TAKE_OWNERSHIP),
+       relative_step_size_(relative_step_size) {}
 
-  virtual ~NumericDiffCostFunction() {
+  NumericDiffCostFunction(CostFunctor* functor,
+                          Ownership ownership,
+                          const double relative_step_size = 1e-6)
+      : functor_(functor),
+        ownership_(ownership),
+        relative_step_size_(relative_step_size) {}
+
+  ~NumericDiffCostFunction() {
     if (ownership_ != TAKE_OWNERSHIP) {
-      function_.release();
+      functor_.release();
     }
   }
 
   virtual bool Evaluate(double const* const* parameters,
                         double* residuals,
                         double** jacobians) const {
+    using internal::FixedArray;
+    using internal::NumericDiff;
+
+    const int kNumParameters = N0 + N1 + N2 + N3 + N4 + N5 + N6 + N7 + N8 + N9;
+    const int kNumParameterBlocks =
+        (N0 > 0) + (N1 > 0) + (N2 > 0) + (N3 > 0) + (N4 > 0) +
+        (N5 > 0) + (N6 > 0) + (N7 > 0) + (N8 > 0) + (N9 > 0);
+
     // Get the function value (residuals) at the the point to evaluate.
-    bool success = function_->Evaluate(parameters, residuals, NULL);
-    if (!success) {
-      // Something went wrong; ignore the jacobian.
+    if (!internal::EvaluateImpl<CostFunctor,
+                                N0, N1, N2, N3, N4, N5, N6, N7, N8, N9>(
+                                    functor_.get(),
+                                    parameters,
+                                    residuals,
+                                    functor_.get())) {
       return false;
     }
+
     if (!jacobians) {
-      // Nothing to do; just forward.
       return true;
     }
 
     // Create a copy of the parameters which will get mutated.
-    const int kParametersSize = N0 + N1 + N2 + N3 + N4 + N5;
-    double parameters_copy[kParametersSize];
-    double *parameters_references_copy[6];
-    parameters_references_copy[0] = &parameters_copy[0];
-    parameters_references_copy[1] = &parameters_copy[0] + N0;
-    parameters_references_copy[2] = &parameters_copy[0] + N0 + N1;
-    parameters_references_copy[3] = &parameters_copy[0] + N0 + N1 + N2;
-    parameters_references_copy[4] = &parameters_copy[0] + N0 + N1 + N2 + N3;
-    parameters_references_copy[5] =
-        &parameters_copy[0] + N0 + N1 + N2 + N3 + N4;
+    FixedArray<double> parameters_copy(kNumParameters);
+    FixedArray<double*> parameters_reference_copy(kNumParameterBlocks);
+
+    parameters_reference_copy[0] = parameters_copy.get();
+    if (N1) parameters_reference_copy[1] = parameters_reference_copy[0] + N0;
+    if (N2) parameters_reference_copy[2] = parameters_reference_copy[1] + N1;
+    if (N3) parameters_reference_copy[3] = parameters_reference_copy[2] + N2;
+    if (N4) parameters_reference_copy[4] = parameters_reference_copy[3] + N3;
+    if (N5) parameters_reference_copy[5] = parameters_reference_copy[4] + N4;
+    if (N6) parameters_reference_copy[6] = parameters_reference_copy[5] + N5;
+    if (N7) parameters_reference_copy[7] = parameters_reference_copy[6] + N6;
+    if (N8) parameters_reference_copy[8] = parameters_reference_copy[7] + N7;
+    if (N9) parameters_reference_copy[9] = parameters_reference_copy[8] + N8;
+
+#define COPY_PARAMETER_BLOCK(block)                                     \
+  if (N ## block) memcpy(parameters_reference_copy[block],              \
+                         parameters[block],                             \
+                         sizeof(double) * N ## block);  // NOLINT
 
-#define COPY_PARAMETER_BLOCK(block) \
-    if (N ## block) memcpy(parameters_references_copy[block], \
-                           parameters[block], \
-                           sizeof(double) * N ## block);  // NOLINT
     COPY_PARAMETER_BLOCK(0);
     COPY_PARAMETER_BLOCK(1);
     COPY_PARAMETER_BLOCK(2);
     COPY_PARAMETER_BLOCK(3);
     COPY_PARAMETER_BLOCK(4);
     COPY_PARAMETER_BLOCK(5);
+    COPY_PARAMETER_BLOCK(6);
+    COPY_PARAMETER_BLOCK(7);
+    COPY_PARAMETER_BLOCK(8);
+    COPY_PARAMETER_BLOCK(9);
+
 #undef COPY_PARAMETER_BLOCK
 
-#define EVALUATE_JACOBIAN_FOR_BLOCK(block) \
-    if (N ## block && jacobians[block]) { \
-      if (!Differencer<CostFunctionNoJacobian, /* NOLINT */ \
-                       M, \
-                       N ## block, \
-                       block, \
-                       method>::EvaluateJacobianForParameterBlock( \
-          function_.get(), \
-          residuals, \
-          parameters_references_copy, \
-          jacobians)) { \
-        return false; \
-      } \
+#define EVALUATE_JACOBIAN_FOR_BLOCK(block)                              \
+    if (N ## block && jacobians[block] != NULL) {                       \
+      if (!NumericDiff<CostFunctor,                                     \
+                       method,                                          \
+                       kNumResiduals,                                   \
+                       N0, N1, N2, N3, N4, N5, N6, N7, N8, N9,          \
+                       block,                                           \
+                       N ## block >::EvaluateJacobianForParameterBlock( \
+                           functor_.get(),                              \
+                           residuals,                                   \
+                           relative_step_size_,                         \
+                           parameters_reference_copy.get(),             \
+                           jacobians[block])) {                         \
+        return false;                                                   \
+      }                                                                 \
     }
+
     EVALUATE_JACOBIAN_FOR_BLOCK(0);
     EVALUATE_JACOBIAN_FOR_BLOCK(1);
     EVALUATE_JACOBIAN_FOR_BLOCK(2);
     EVALUATE_JACOBIAN_FOR_BLOCK(3);
     EVALUATE_JACOBIAN_FOR_BLOCK(4);
     EVALUATE_JACOBIAN_FOR_BLOCK(5);
+    EVALUATE_JACOBIAN_FOR_BLOCK(6);
+    EVALUATE_JACOBIAN_FOR_BLOCK(7);
+    EVALUATE_JACOBIAN_FOR_BLOCK(8);
+    EVALUATE_JACOBIAN_FOR_BLOCK(9);
+
 #undef EVALUATE_JACOBIAN_FOR_BLOCK
+
     return true;
   }
 
  private:
-  internal::scoped_ptr<CostFunctionNoJacobian> function_;
+  internal::scoped_ptr<CostFunctor> functor_;
   Ownership ownership_;
+  const double relative_step_size_;
 };
 
 }  // namespace ceres
diff --git a/include/ceres/numeric_diff_functor.h b/include/ceres/numeric_diff_functor.h
new file mode 100644
index 0000000..039e1a1
--- /dev/null
+++ b/include/ceres/numeric_diff_functor.h
@@ -0,0 +1,346 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+//   used to endorse or promote products derived from this software without
+//   specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+//
+// A wrapper class that takes a variadic functor evaluating a
+// function, numerically differentiates it and makes it available as a
+// templated functor so that it can be easily used as part of Ceres'
+// automatic differentiation framework.
+//
+// For example:
+//
+// For example, let us assume that
+//
+//  struct IntrinsicProjection
+//    IntrinsicProjection(const double* observations);
+//    bool operator()(const double* calibration,
+//                    const double* point,
+//                    double* residuals);
+//  };
+//
+// is a functor that implements the projection of a point in its local
+// coordinate system onto its image plane and subtracts it from the
+// observed point projection.
+//
+// Now we would like to compose the action of this functor with the
+// action of camera extrinsics, i.e., rotation and translation, which
+// is given by the following templated function
+//
+//   template<typename T>
+//   void RotateAndTranslatePoint(const T* rotation,
+//                                const T* translation,
+//                                const T* point,
+//                                T* result);
+//
+// To compose the extrinsics and intrinsics, we can construct a
+// CameraProjection functor as follows.
+//
+// struct CameraProjection {
+//    typedef NumericDiffFunctor<IntrinsicProjection, CENTRAL, 2, 5, 3>
+//       IntrinsicProjectionFunctor;
+//
+//   CameraProjection(double* observation) {
+//     intrinsic_projection_.reset(
+//         new IntrinsicProjectionFunctor(observation)) {
+//   }
+//
+//   template <typename T>
+//   bool operator()(const T* rotation,
+//                   const T* translation,
+//                   const T* intrinsics,
+//                   const T* point,
+//                   T* residuals) const {
+//     T transformed_point[3];
+//     RotateAndTranslatePoint(rotation, translation, point, transformed_point);
+//     return (*intrinsic_projection_)(intrinsics, transformed_point, residual);
+//   }
+//
+//  private:
+//   scoped_ptr<IntrinsicProjectionFunctor> intrinsic_projection_;
+// };
+//
+// Here, we made the choice of using CENTRAL differences to compute
+// the jacobian of IntrinsicProjection.
+//
+// Now, we are ready to construct an automatically differentiated cost
+// function as
+//
+// CostFunction* cost_function =
+//    new AutoDiffCostFunction<CameraProjection, 2, 3, 3, 5>(
+//        new CameraProjection(observations));
+//
+// cost_function now seamlessly integrates automatic differentiation
+// of RotateAndTranslatePoint with a numerically differentiated
+// version of IntrinsicProjection.
+
+#ifndef CERES_PUBLIC_NUMERIC_DIFF_FUNCTOR_H_
+#define CERES_PUBLIC_NUMERIC_DIFF_FUNCTOR_H_
+
+#include "ceres/numeric_diff_cost_function.h"
+#include "ceres/types.h"
+#include "ceres/cost_function_to_functor.h"
+
+namespace ceres {
+
+template<typename Functor,
+         NumericDiffMethod kMethod = CENTRAL,
+         int kNumResiduals = 0,
+         int N0 = 0, int N1 = 0 , int N2 = 0, int N3 = 0, int N4 = 0,
+         int N5 = 0, int N6 = 0 , int N7 = 0, int N8 = 0, int N9 = 0>
+class NumericDiffFunctor {
+ public:
+  // relative_step_size controls the step size used by the numeric
+  // differentiation process.
+  explicit NumericDiffFunctor(double relative_step_size = 1e-6)
+      : functor_(
+          new NumericDiffCostFunction<Functor,
+                                      kMethod,
+                                      kNumResiduals,
+                                      N0, N1, N2, N3, N4,
+                                      N5, N6, N7, N8, N9>(new Functor,
+                                                          relative_step_size)) {
+  }
+
+  NumericDiffFunctor(Functor* functor, double relative_step_size = 1e-6)
+      : functor_(new NumericDiffCostFunction<Functor,
+                                             kMethod,
+                                             kNumResiduals,
+                                             N0, N1, N2, N3, N4,
+                                             N5, N6, N7, N8, N9>(
+                                                 functor, relative_step_size)) {
+  }
+
+  bool operator()(const double* x0, double* residuals) const {
+    return functor_(x0, residuals);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  double* residuals) const {
+    return functor_(x0, x1, residuals);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  double* residuals) const {
+    return functor_(x0, x1, x2, residuals);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  double* residuals) const {
+    return functor_(x0, x1, x2, x3, residuals);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  const double* x4,
+                  double* residuals) const {
+    return functor_(x0, x1, x2, x3, x4, residuals);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  const double* x4,
+                  const double* x5,
+                  double* residuals) const {
+    return functor_(x0, x1, x2, x3, x4, x5, residuals);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  const double* x4,
+                  const double* x5,
+                  const double* x6,
+                  double* residuals) const {
+    return functor_(x0, x1, x2, x3, x4, x5, x6, residuals);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  const double* x4,
+                  const double* x5,
+                  const double* x6,
+                  const double* x7,
+                  double* residuals) const {
+    return functor_(x0, x1, x2, x3, x4, x5, x6, x7, residuals);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  const double* x4,
+                  const double* x5,
+                  const double* x6,
+                  const double* x7,
+                  const double* x8,
+                  double* residuals) const {
+    return functor_(x0, x1, x2, x3, x4, x5, x6, x7, x8, residuals);
+  }
+
+  bool operator()(const double* x0,
+                  const double* x1,
+                  const double* x2,
+                  const double* x3,
+                  const double* x4,
+                  const double* x5,
+                  const double* x6,
+                  const double* x7,
+                  const double* x8,
+                  const double* x9,
+                  double* residuals) const {
+    return functor_(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, residuals);
+  }
+
+  template <typename T>
+  bool operator()(const T* x0, T* residuals) const {
+    return functor_(x0, residuals);
+  }
+
+  template <typename T>
+  bool operator()(const T* x0,
+                  const T* x1,
+                  T* residuals) const {
+    return functor_(x0, x1, residuals);
+  }
+
+  template <typename T>
+  bool operator()(const T* x0,
+                  const T* x1,
+                  const T* x2,
+                  T* residuals) const {
+    return functor_(x0, x1, x2, residuals);
+  }
+
+  template <typename T>
+  bool operator()(const T* x0,
+                  const T* x1,
+                  const T* x2,
+                  const T* x3,
+                  T* residuals) const {
+    return functor_(x0, x1, x2, x3, residuals);
+  }
+
+  template <typename T>
+  bool operator()(const T* x0,
+                  const T* x1,
+                  const T* x2,
+                  const T* x3,
+                  const T* x4,
+                  T* residuals) const {
+    return functor_(x0, x1, x2, x3, x4, residuals);
+  }
+
+  template <typename T>
+  bool operator()(const T* x0,
+                  const T* x1,
+                  const T* x2,
+                  const T* x3,
+                  const T* x4,
+                  const T* x5,
+                  T* residuals) const {
+    return functor_(x0, x1, x2, x3, x4, x5, residuals);
+  }
+
+  template <typename T>
+  bool operator()(const T* x0,
+                  const T* x1,
+                  const T* x2,
+                  const T* x3,
+                  const T* x4,
+                  const T* x5,
+                  const T* x6,
+                  T* residuals) const {
+    return functor_(x0, x1, x2, x3, x4, x5, x6, residuals);
+  }
+
+  template <typename T>
+  bool operator()(const T* x0,
+                  const T* x1,
+                  const T* x2,
+                  const T* x3,
+                  const T* x4,
+                  const T* x5,
+                  const T* x6,
+                  const T* x7,
+                  T* residuals) const {
+    return functor_(x0, x1, x2, x3, x4, x5, x6, x7, residuals);
+  }
+
+  template <typename T>
+  bool operator()(const T* x0,
+                  const T* x1,
+                  const T* x2,
+                  const T* x3,
+                  const T* x4,
+                  const T* x5,
+                  const T* x6,
+                  const T* x7,
+                  const T* x8,
+                  T* residuals) const {
+    return functor_(x0, x1, x2, x3, x4, x5, x6, x7, x8, residuals);
+  }
+
+  template <typename T>
+  bool operator()(const T* x0,
+                  const T* x1,
+                  const T* x2,
+                  const T* x3,
+                  const T* x4,
+                  const T* x5,
+                  const T* x6,
+                  const T* x7,
+                  const T* x8,
+                  const T* x9,
+                  T* residuals) const {
+    return functor_(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, residuals);
+  }
+
+
+ private:
+  CostFunctionToFunctor<kNumResiduals,
+                        N0, N1, N2, N3, N4,
+                        N5, N6, N7, N8, N9> functor_;
+};
+
+}  // namespace ceres
+
+#endif  // CERES_PUBLIC_NUMERIC_DIFF_FUNCTOR_H_
diff --git a/include/ceres/problem.h b/include/ceres/problem.h
index 201cc7f..663616d 100644
--- a/include/ceres/problem.h
+++ b/include/ceres/problem.h
@@ -39,11 +39,12 @@
 #include <set>
 #include <vector>
 
-#include <glog/logging.h>
 #include "ceres/internal/macros.h"
 #include "ceres/internal/port.h"
 #include "ceres/internal/scoped_ptr.h"
 #include "ceres/types.h"
+#include "glog/logging.h"
+
 
 namespace ceres {
 
@@ -51,6 +52,7 @@ class CostFunction;
 class LossFunction;
 class LocalParameterization;
 class Solver;
+struct CRSMatrix;
 
 namespace internal {
 class Preprocessor;
@@ -59,10 +61,9 @@ class ParameterBlock;
 class ResidualBlock;
 }  // namespace internal
 
-// A ResidualBlockId is a handle clients can use to delete residual
-// blocks after creating them. They are opaque for any purposes other
-// than that.
-typedef const internal::ResidualBlock* ResidualBlockId;
+// A ResidualBlockId is an opaque handle clients can use to remove residual
+// blocks from a Problem after adding them.
+typedef internal::ResidualBlock* ResidualBlockId;
 
 // A class to represent non-linear least squares problems. Such
 // problems have a cost function that is a sum of error terms (known
@@ -122,7 +123,9 @@ class Problem {
     Options()
         : cost_function_ownership(TAKE_OWNERSHIP),
           loss_function_ownership(TAKE_OWNERSHIP),
-          local_parameterization_ownership(TAKE_OWNERSHIP) {}
+          local_parameterization_ownership(TAKE_OWNERSHIP),
+          enable_fast_parameter_block_removal(false),
+          disable_all_safety_checks(false) {}
 
     // These flags control whether the Problem object owns the cost
     // functions, loss functions, and parameterizations passed into
@@ -134,6 +137,29 @@ class Problem {
     Ownership cost_function_ownership;
     Ownership loss_function_ownership;
     Ownership local_parameterization_ownership;
+
+    // If true, trades memory for a faster RemoveParameterBlock() operation.
+    //
+    // RemoveParameterBlock() takes time proportional to the size of the entire
+    // Problem. If you only remove parameter blocks from the Problem
+    // occassionaly, this may be acceptable. However, if you are modifying the
+    // Problem frequently, and have memory to spare, then flip this switch to
+    // make RemoveParameterBlock() take time proportional to the number of
+    // residual blocks that depend on it.  The increase in memory usage is an
+    // additonal hash set per parameter block containing all the residuals that
+    // depend on the parameter block.
+    bool enable_fast_parameter_block_removal;
+
+    // By default, Ceres performs a variety of safety checks when constructing
+    // the problem. There is a small but measurable performance penalty to
+    // these checks, typically around 5% of construction time. If you are sure
+    // your problem construction is correct, and 5% of the problem construction
+    // time is truly an overhead you want to avoid, then you can set
+    // disable_all_safety_checks to true.
+    //
+    // WARNING: Do not set this to true, unless you are absolutely sure of what
+    // you are doing.
+    bool disable_all_safety_checks;
   };
 
   // The default constructor is equivalent to the
@@ -244,6 +270,33 @@ class Problem {
                          int size,
                          LocalParameterization* local_parameterization);
 
+  // Remove a parameter block from the problem. The parameterization of the
+  // parameter block, if it exists, will persist until the deletion of the
+  // problem (similar to cost/loss functions in residual block removal). Any
+  // residual blocks that depend on the parameter are also removed, as
+  // described above in RemoveResidualBlock().
+  //
+  // If Problem::Options::enable_fast_parameter_block_removal is true, then the
+  // removal is fast (almost constant time). Otherwise, removing a parameter
+  // block will incur a scan of the entire Problem object.
+  //
+  // WARNING: Removing a residual or parameter block will destroy the implicit
+  // ordering, rendering the jacobian or residuals returned from the solver
+  // uninterpretable. If you depend on the evaluated jacobian, do not use
+  // remove! This may change in a future release.
+  void RemoveParameterBlock(double* values);
+
+  // Remove a residual block from the problem. Any parameters that the residual
+  // block depends on are not removed. The cost and loss functions for the
+  // residual block will not get deleted immediately; won't happen until the
+  // problem itself is deleted.
+  //
+  // WARNING: Removing a residual or parameter block will destroy the implicit
+  // ordering, rendering the jacobian or residuals returned from the solver
+  // uninterpretable. If you depend on the evaluated jacobian, do not use
+  // remove! This may change in a future release.
+  void RemoveResidualBlock(ResidualBlockId residual_block);
+
   // Hold the indicated parameter block constant during optimization.
   void SetParameterBlockConstant(double* values);
 
@@ -275,8 +328,102 @@ class Problem {
   // sizes of all of the residual blocks.
   int NumResiduals() const;
 
+  // The size of the parameter block.
+  int ParameterBlockSize(const double* values) const;
+
+  // The size of local parameterization for the parameter block. If
+  // there is no local parameterization associated with this parameter
+  // block, then ParameterBlockLocalSize = ParameterBlockSize.
+  int ParameterBlockLocalSize(const double* values) const;
+
+  // Fills the passed parameter_blocks vector with pointers to the
+  // parameter blocks currently in the problem. After this call,
+  // parameter_block.size() == NumParameterBlocks.
+  void GetParameterBlocks(vector<double*>* parameter_blocks) const;
+
+  // Options struct to control Problem::Evaluate.
+  struct EvaluateOptions {
+    EvaluateOptions()
+        : apply_loss_function(true),
+          num_threads(1) {
+    }
+
+    // The set of parameter blocks for which evaluation should be
+    // performed. This vector determines the order that parameter
+    // blocks occur in the gradient vector and in the columns of the
+    // jacobian matrix. If parameter_blocks is empty, then it is
+    // assumed to be equal to vector containing ALL the parameter
+    // blocks.  Generally speaking the parameter blocks will occur in
+    // the order in which they were added to the problem. But, this
+    // may change if the user removes any parameter blocks from the
+    // problem.
+    //
+    // NOTE: This vector should contain the same pointers as the ones
+    // used to add parameter blocks to the Problem. These parameter
+    // block should NOT point to new memory locations. Bad things will
+    // happen otherwise.
+    vector<double*> parameter_blocks;
+
+    // The set of residual blocks to evaluate. This vector determines
+    // the order in which the residuals occur, and how the rows of the
+    // jacobian are ordered. If residual_blocks is empty, then it is
+    // assumed to be equal to the vector containing all the residual
+    // blocks. If this vector is empty, then it is assumed to be equal
+    // to a vector containing ALL the residual blocks. Generally
+    // speaking the residual blocks will occur in the order in which
+    // they were added to the problem. But, this may change if the
+    // user removes any residual blocks from the problem.
+    vector<ResidualBlockId> residual_blocks;
+
+    // Even though the residual blocks in the problem may contain loss
+    // functions, setting apply_loss_function to false will turn off
+    // the application of the loss function to the output of the cost
+    // function. This is of use for example if the user wishes to
+    // analyse the solution quality by studying the distribution of
+    // residuals before and after the solve.
+    bool apply_loss_function;
+
+    int num_threads;
+  };
+
+  // Evaluate Problem. Any of the output pointers can be NULL. Which
+  // residual blocks and parameter blocks are used is controlled by
+  // the EvaluateOptions struct above.
+  //
+  // Note 1: The evaluation will use the values stored in the memory
+  // locations pointed to by the parameter block pointers used at the
+  // time of the construction of the problem. i.e.,
+  //
+  //   Problem problem;
+  //   double x = 1;
+  //   problem.AddResidualBlock(new MyCostFunction, NULL, &x);
+  //
+  //   double cost = 0.0;
+  //   problem.Evaluate(Problem::EvaluateOptions(), &cost, NULL, NULL, NULL);
+  //
+  // The cost is evaluated at x = 1. If you wish to evaluate the
+  // problem at x = 2, then
+  //
+  //    x = 2;
+  //    problem.Evaluate(Problem::EvaluateOptions(), &cost, NULL, NULL, NULL);
+  //
+  // is the way to do so.
+  //
+  // Note 2: If no local parameterizations are used, then the size of
+  // the gradient vector (and the number of columns in the jacobian)
+  // is the sum of the sizes of all the parameter blocks. If a
+  // parameter block has a local parameterization, then it contributes
+  // "LocalSize" entries to the gradient vector (and the number of
+  // columns in the jacobian).
+  bool Evaluate(const EvaluateOptions& options,
+                double* cost,
+                vector<double>* residuals,
+                vector<double>* gradient,
+                CRSMatrix* jacobian);
+
  private:
   friend class Solver;
+  friend class Covariance;
   internal::scoped_ptr<internal::ProblemImpl> problem_impl_;
   CERES_DISALLOW_COPY_AND_ASSIGN(Problem);
 };
diff --git a/include/ceres/rotation.h b/include/ceres/rotation.h
index 0d8a390..ea0b769 100644
--- a/include/ceres/rotation.h
+++ b/include/ceres/rotation.h
@@ -51,6 +51,31 @@
 
 namespace ceres {
 
+// Trivial wrapper to index linear arrays as matrices, given a fixed
+// column and row stride. When an array "T* array" is wrapped by a
+//
+//   (const) MatrixAdapter<T, row_stride, col_stride> M"
+//
+// the expression  M(i, j) is equivalent to
+//
+//   arrary[i * row_stride + j * col_stride]
+//
+// Conversion functions to and from rotation matrices accept
+// MatrixAdapters to permit using row-major and column-major layouts,
+// and rotation matrices embedded in larger matrices (such as a 3x4
+// projection matrix).
+template <typename T, int row_stride, int col_stride>
+struct MatrixAdapter;
+
+// Convenience functions to create a MatrixAdapter that treats the
+// array pointed to by "pointer" as a 3x3 (contiguous) column-major or
+// row-major matrix.
+template <typename T>
+MatrixAdapter<T, 1, 3> ColumnMajorAdapter3x3(T* pointer);
+
+template <typename T>
+MatrixAdapter<T, 3, 1> RowMajorAdapter3x3(T* pointer);
+
 // Convert a value in combined axis-angle representation to a quaternion.
 // The value angle_axis is a triple whose norm is an angle in radians,
 // and whose direction is aligned with the axis of rotation,
@@ -58,7 +83,7 @@ namespace ceres {
 // The implementation may be used with auto-differentiation up to the first
 // derivative, higher derivatives may have unexpected results near the origin.
 template<typename T>
-void AngleAxisToQuaternion(T const* angle_axis, T* quaternion);
+void AngleAxisToQuaternion(const T* angle_axis, T* quaternion);
 
 // Convert a quaternion to the equivalent combined axis-angle representation.
 // The value quaternion must be a unit quaternion - it is not normalized first,
@@ -67,15 +92,26 @@ void AngleAxisToQuaternion(T const* angle_axis, T* quaternion);
 // The implemention may be used with auto-differentiation up to the first
 // derivative, higher derivatives may have unexpected results near the origin.
 template<typename T>
-void QuaternionToAngleAxis(T const* quaternion, T* angle_axis);
+void QuaternionToAngleAxis(const T* quaternion, T* angle_axis);
 
 // Conversions between 3x3 rotation matrix (in column major order) and
 // axis-angle rotation representations.  Templated for use with
 // autodifferentiation.
 template <typename T>
-void RotationMatrixToAngleAxis(T const * R, T * angle_axis);
+void RotationMatrixToAngleAxis(const T* R, T* angle_axis);
+
+template <typename T, int row_stride, int col_stride>
+void RotationMatrixToAngleAxis(
+    const MatrixAdapter<const T, row_stride, col_stride>& R,
+    T* angle_axis);
+
 template <typename T>
-void AngleAxisToRotationMatrix(T const * angle_axis, T * R);
+void AngleAxisToRotationMatrix(const T* angle_axis, T* R);
+
+template <typename T, int row_stride, int col_stride>
+void AngleAxisToRotationMatrix(
+    const T* angle_axis,
+    const MatrixAdapter<T, row_stride, col_stride>& R);
 
 // Conversions between 3x3 rotation matrix (in row major order) and
 // Euler angle (in degrees) rotation representations.
@@ -86,6 +122,11 @@ void AngleAxisToRotationMatrix(T const * angle_axis, T * R);
 template <typename T>
 void EulerAnglesToRotationMatrix(const T* euler, int row_stride, T* R);
 
+template <typename T, int row_stride, int col_stride>
+void EulerAnglesToRotationMatrix(
+    const T* euler,
+    const MatrixAdapter<T, row_stride, col_stride>& R);
+
 // Convert a 4-vector to a 3x3 scaled rotation matrix.
 //
 // The choice of rotation is such that the quaternion [1 0 0 0] goes to an
@@ -108,11 +149,21 @@ void EulerAnglesToRotationMatrix(const T* euler, int row_stride, T* R);
 template <typename T> inline
 void QuaternionToScaledRotation(const T q[4], T R[3 * 3]);
 
+template <typename T, int row_stride, int col_stride> inline
+void QuaternionToScaledRotation(
+    const T q[4],
+    const MatrixAdapter<T, row_stride, col_stride>& R);
+
 // Same as above except that the rotation matrix is normalized by the
 // Frobenius norm, so that R * R' = I (and det(R) = 1).
 template <typename T> inline
 void QuaternionToRotation(const T q[4], T R[3 * 3]);
 
+template <typename T, int row_stride, int col_stride> inline
+void QuaternionToRotation(
+    const T q[4],
+    const MatrixAdapter<T, row_stride, col_stride>& R);
+
 // Rotates a point pt by a quaternion q:
 //
 //   result = R(q) * pt
@@ -146,6 +197,28 @@ void AngleAxisRotatePoint(const T angle_axis[3], const T pt[3], T result[3]);
 
 // --- IMPLEMENTATION
 
+template<typename T, int row_stride, int col_stride>
+struct MatrixAdapter {
+  T* pointer_;
+  explicit MatrixAdapter(T* pointer)
+    : pointer_(pointer)
+  {}
+
+  T& operator()(int r, int c) const {
+    return pointer_[r * row_stride + c * col_stride];
+  }
+};
+
+template <typename T>
+MatrixAdapter<T, 1, 3> ColumnMajorAdapter3x3(T* pointer) {
+  return MatrixAdapter<T, 1, 3>(pointer);
+}
+
+template <typename T>
+MatrixAdapter<T, 3, 1> RowMajorAdapter3x3(T* pointer) {
+  return MatrixAdapter<T, 3, 1>(pointer);
+}
+
 template<typename T>
 inline void AngleAxisToQuaternion(const T* angle_axis, T* quaternion) {
   const T& a0 = angle_axis[0];
@@ -227,18 +300,25 @@ inline void QuaternionToAngleAxis(const T* quaternion, T* angle_axis) {
 // occurs and deals with them by taking code paths that are guaranteed
 // to not perform division by a small number.
 template <typename T>
-inline void RotationMatrixToAngleAxis(const T * R, T * angle_axis) {
+inline void RotationMatrixToAngleAxis(const T* R, T* angle_axis) {
+  RotationMatrixToAngleAxis(ColumnMajorAdapter3x3(R), angle_axis);
+}
+
+template <typename T, int row_stride, int col_stride>
+void RotationMatrixToAngleAxis(
+    const MatrixAdapter<const T, row_stride, col_stride>& R,
+    T* angle_axis) {
   // x = k * 2 * sin(theta), where k is the axis of rotation.
-  angle_axis[0] = R[5] - R[7];
-  angle_axis[1] = R[6] - R[2];
-  angle_axis[2] = R[1] - R[3];
+  angle_axis[0] = R(2, 1) - R(1, 2);
+  angle_axis[1] = R(0, 2) - R(2, 0);
+  angle_axis[2] = R(1, 0) - R(0, 1);
 
   static const T kOne = T(1.0);
   static const T kTwo = T(2.0);
 
   // Since the right hand side may give numbers just above 1.0 or
   // below -1.0 leading to atan misbehaving, we threshold.
-  T costheta = std::min(std::max((R[0] + R[4] + R[8] - kOne) / kTwo,
+  T costheta = std::min(std::max((R(0, 0) + R(1, 1) + R(2, 2) - kOne) / kTwo,
                                  T(-1.0)),
                         kOne);
 
@@ -296,7 +376,7 @@ inline void RotationMatrixToAngleAxis(const T * R, T * angle_axis) {
   // with the sign of sin(theta). If they are the same, then
   // angle_axis[i] should be positive, otherwise negative.
   for (int i = 0; i < 3; ++i) {
-    angle_axis[i] = theta * sqrt((R[i*4] - costheta) * inv_one_minus_costheta);
+    angle_axis[i] = theta * sqrt((R(i, i) - costheta) * inv_one_minus_costheta);
     if (((sintheta < 0.0) && (angle_axis[i] > 0.0)) ||
         ((sintheta > 0.0) && (angle_axis[i] < 0.0))) {
       angle_axis[i] = -angle_axis[i];
@@ -305,7 +385,14 @@ inline void RotationMatrixToAngleAxis(const T * R, T * angle_axis) {
 }
 
 template <typename T>
-inline void AngleAxisToRotationMatrix(const T * angle_axis, T * R) {
+inline void AngleAxisToRotationMatrix(const T* angle_axis, T* R) {
+  AngleAxisToRotationMatrix(angle_axis, ColumnMajorAdapter3x3(R));
+}
+
+template <typename T, int row_stride, int col_stride>
+void AngleAxisToRotationMatrix(
+    const T* angle_axis,
+    const MatrixAdapter<T, row_stride, col_stride>& R) {
   static const T kOne = T(1.0);
   const T theta2 = DotProduct(angle_axis, angle_axis);
   if (theta2 > 0.0) {
@@ -320,33 +407,41 @@ inline void AngleAxisToRotationMatrix(const T * angle_axis, T * R) {
     const T costheta = cos(theta);
     const T sintheta = sin(theta);
 
-    R[0] =     costheta   + wx*wx*(kOne -    costheta);
-    R[1] =  wz*sintheta   + wx*wy*(kOne -    costheta);
-    R[2] = -wy*sintheta   + wx*wz*(kOne -    costheta);
-    R[3] =  wx*wy*(kOne - costheta)     - wz*sintheta;
-    R[4] =     costheta   + wy*wy*(kOne -    costheta);
-    R[5] =  wx*sintheta   + wy*wz*(kOne -    costheta);
-    R[6] =  wy*sintheta   + wx*wz*(kOne -    costheta);
-    R[7] = -wx*sintheta   + wy*wz*(kOne -    costheta);
-    R[8] =     costheta   + wz*wz*(kOne -    costheta);
+    R(0, 0) =     costheta   + wx*wx*(kOne -    costheta);
+    R(1, 0) =  wz*sintheta   + wx*wy*(kOne -    costheta);
+    R(2, 0) = -wy*sintheta   + wx*wz*(kOne -    costheta);
+    R(0, 1) =  wx*wy*(kOne - costheta)     - wz*sintheta;
+    R(1, 1) =     costheta   + wy*wy*(kOne -    costheta);
+    R(2, 1) =  wx*sintheta   + wy*wz*(kOne -    costheta);
+    R(0, 2) =  wy*sintheta   + wx*wz*(kOne -    costheta);
+    R(1, 2) = -wx*sintheta   + wy*wz*(kOne -    costheta);
+    R(2, 2) =     costheta   + wz*wz*(kOne -    costheta);
   } else {
     // At zero, we switch to using the first order Taylor expansion.
-    R[0] =  kOne;
-    R[1] = -angle_axis[2];
-    R[2] =  angle_axis[1];
-    R[3] =  angle_axis[2];
-    R[4] =  kOne;
-    R[5] = -angle_axis[0];
-    R[6] = -angle_axis[1];
-    R[7] =  angle_axis[0];
-    R[8] = kOne;
+    R(0, 0) =  kOne;
+    R(1, 0) = -angle_axis[2];
+    R(2, 0) =  angle_axis[1];
+    R(0, 1) =  angle_axis[2];
+    R(1, 1) =  kOne;
+    R(2, 1) = -angle_axis[0];
+    R(0, 2) = -angle_axis[1];
+    R(1, 2) =  angle_axis[0];
+    R(2, 2) = kOne;
   }
 }
 
 template <typename T>
 inline void EulerAnglesToRotationMatrix(const T* euler,
-                                        const int row_stride,
+                                        const int row_stride_parameter,
                                         T* R) {
+  CHECK_EQ(row_stride_parameter, 3);
+  EulerAnglesToRotationMatrix(euler, RowMajorAdapter3x3(R));
+}
+
+template <typename T, int row_stride, int col_stride>
+void EulerAnglesToRotationMatrix(
+    const T* euler,
+    const MatrixAdapter<T, row_stride, col_stride>& R) {
   const double kPi = 3.14159265358979323846;
   const T degrees_to_radians(kPi / 180.0);
 
@@ -361,26 +456,28 @@ inline void EulerAnglesToRotationMatrix(const T* euler,
   const T c3 = cos(pitch);
   const T s3 = sin(pitch);
 
-  // Rows of the rotation matrix.
-  T* R1 = R;
-  T* R2 = R1 + row_stride;
-  T* R3 = R2 + row_stride;
-
-  R1[0] = c1*c2;
-  R1[1] = -s1*c3 + c1*s2*s3;
-  R1[2] = s1*s3 + c1*s2*c3;
+  R(0, 0) = c1*c2;
+  R(0, 1) = -s1*c3 + c1*s2*s3;
+  R(0, 2) = s1*s3 + c1*s2*c3;
 
-  R2[0] = s1*c2;
-  R2[1] = c1*c3 + s1*s2*s3;
-  R2[2] = -c1*s3 + s1*s2*c3;
+  R(1, 0) = s1*c2;
+  R(1, 1) = c1*c3 + s1*s2*s3;
+  R(1, 2) = -c1*s3 + s1*s2*c3;
 
-  R3[0] = -s2;
-  R3[1] = c2*s3;
-  R3[2] = c2*c3;
+  R(2, 0) = -s2;
+  R(2, 1) = c2*s3;
+  R(2, 2) = c2*c3;
 }
 
 template <typename T> inline
 void QuaternionToScaledRotation(const T q[4], T R[3 * 3]) {
+  QuaternionToScaledRotation(q, RowMajorAdapter3x3(R));
+}
+
+template <typename T, int row_stride, int col_stride> inline
+void QuaternionToScaledRotation(
+    const T q[4],
+    const MatrixAdapter<T, row_stride, col_stride>& R) {
   // Make convenient names for elements of q.
   T a = q[0];
   T b = q[1];
@@ -399,21 +496,29 @@ void QuaternionToScaledRotation(const T q[4], T R[3 * 3]) {
   T cd = c * d;
   T dd = d * d;
 
-  R[0] =  aa + bb - cc - dd; R[1] = T(2) * (bc - ad); R[2] = T(2) * (ac + bd);  // NOLINT
-  R[3] = T(2) * (ad + bc); R[4] =  aa - bb + cc - dd; R[5] = T(2) * (cd - ab);  // NOLINT
-  R[6] = T(2) * (bd - ac); R[7] = T(2) * (ab + cd); R[8] =  aa - bb - cc + dd;  // NOLINT
+  R(0, 0) = aa + bb - cc - dd; R(0, 1) = T(2) * (bc - ad);  R(0, 2) = T(2) * (ac + bd);  // NOLINT
+  R(1, 0) = T(2) * (ad + bc);  R(1, 1) = aa - bb + cc - dd; R(1, 2) = T(2) * (cd - ab);  // NOLINT
+  R(2, 0) = T(2) * (bd - ac);  R(2, 1) = T(2) * (ab + cd);  R(2, 2) = aa - bb - cc + dd; // NOLINT
 }
 
 template <typename T> inline
 void QuaternionToRotation(const T q[4], T R[3 * 3]) {
+  QuaternionToRotation(q, RowMajorAdapter3x3(R));
+}
+
+template <typename T, int row_stride, int col_stride> inline
+void QuaternionToRotation(const T q[4],
+                          const MatrixAdapter<T, row_stride, col_stride>& R) {
   QuaternionToScaledRotation(q, R);
 
   T normalizer = q[0]*q[0] + q[1]*q[1] + q[2]*q[2] + q[3]*q[3];
   CHECK_NE(normalizer, T(0));
   normalizer = T(1) / normalizer;
 
-  for (int i = 0; i < 9; ++i) {
-    R[i] *= normalizer;
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      R(i, j) *= normalizer;
+    }
   }
 }
 
@@ -433,7 +538,6 @@ void UnitQuaternionRotatePoint(const T q[4], const T pt[3], T result[3]) {
   result[2] = T(2) * ((t7 - t3) * pt[0] + (t2 + t9) * pt[1] + (t5 + t8) * pt[2]) + pt[2];  // NOLINT
 }
 
-
 template <typename T> inline
 void QuaternionRotatePoint(const T q[4], const T pt[3], T result[3]) {
   // 'scale' is 1 / norm(q).
diff --git a/include/ceres/sized_cost_function.h b/include/ceres/sized_cost_function.h
index 6bfc1af..4f98d4e 100644
--- a/include/ceres/sized_cost_function.h
+++ b/include/ceres/sized_cost_function.h
@@ -38,9 +38,9 @@
 #ifndef CERES_PUBLIC_SIZED_COST_FUNCTION_H_
 #define CERES_PUBLIC_SIZED_COST_FUNCTION_H_
 
-#include <glog/logging.h>
 #include "ceres/types.h"
 #include "ceres/cost_function.h"
+#include "glog/logging.h"
 
 namespace ceres {
 
diff --git a/include/ceres/solver.h b/include/ceres/solver.h
index 62fb087..e7b8d09 100644
--- a/include/ceres/solver.h
+++ b/include/ceres/solver.h
@@ -58,6 +58,22 @@ class Solver {
   struct Options {
     // Default constructor that sets up a generic sparse problem.
     Options() {
+      minimizer_type = TRUST_REGION;
+      line_search_direction_type = LBFGS;
+      line_search_type = WOLFE;
+      nonlinear_conjugate_gradient_type = FLETCHER_REEVES;
+      max_lbfgs_rank = 20;
+      use_approximate_eigenvalue_bfgs_scaling = false;
+      line_search_interpolation_type = CUBIC;
+      min_line_search_step_size = 1e-9;
+      line_search_sufficient_function_decrease = 1e-4;
+      max_line_search_step_contraction = 1e-3;
+      min_line_search_step_contraction = 0.6;
+      max_num_line_search_step_size_iterations = 20;
+      max_num_line_search_direction_restarts = 5;
+      line_search_sufficient_curvature_decrease = 0.9;
+      max_line_search_step_expansion = 10.0;
+
       trust_region_strategy_type = LEVENBERG_MARQUARDT;
       dogleg_type = TRADITIONAL_DOGLEG;
       use_nonmonotonic_steps = false;
@@ -69,8 +85,8 @@ class Solver {
       max_trust_region_radius = 1e16;
       min_trust_region_radius = 1e-32;
       min_relative_decrease = 1e-3;
-      lm_min_diagonal = 1e-6;
-      lm_max_diagonal = 1e32;
+      min_lm_diagonal = 1e-6;
+      max_lm_diagonal = 1e32;
       max_num_consecutive_invalid_steps = 5;
       function_tolerance = 1e-6;
       gradient_tolerance = 1e-10;
@@ -90,29 +106,19 @@ class Solver {
 #endif
 
       num_linear_solver_threads = 1;
-
-#if defined(CERES_NO_SUITESPARSE)
-      use_block_amd = false;
-#else
-      use_block_amd = true;
-#endif
       linear_solver_ordering = NULL;
-      use_inner_iterations = false;
-      inner_iteration_ordering = NULL;
-      linear_solver_min_num_iterations = 1;
-      linear_solver_max_num_iterations = 500;
+      use_postordering = false;
+      min_linear_solver_iterations = 1;
+      max_linear_solver_iterations = 500;
       eta = 1e-1;
       jacobi_scaling = true;
+      use_inner_iterations = false;
+      inner_iteration_tolerance = 1e-3;
+      inner_iteration_ordering = NULL;
       logging_type = PER_MINIMIZER_ITERATION;
       minimizer_progress_to_stdout = false;
-      return_initial_residuals = false;
-      return_initial_gradient = false;
-      return_initial_jacobian = false;
-      return_final_residuals = false;
-      return_final_gradient = false;
-      return_final_jacobian = false;
-      lsqp_dump_directory = "/tmp";
-      lsqp_dump_format_type = TEXTFILE;
+      trust_region_problem_dump_directory = "/tmp";
+      trust_region_problem_dump_format_type = TEXTFILE;
       check_gradients = false;
       gradient_check_relative_precision = 1e-8;
       numeric_derivative_relative_step_size = 1e-6;
@@ -122,6 +128,164 @@ class Solver {
     ~Options();
     // Minimizer options ----------------------------------------
 
+    // Ceres supports the two major families of optimization strategies -
+    // Trust Region and Line Search.
+    //
+    // 1. The line search approach first finds a descent direction
+    // along which the objective function will be reduced and then
+    // computes a step size that decides how far should move along
+    // that direction. The descent direction can be computed by
+    // various methods, such as gradient descent, Newton's method and
+    // Quasi-Newton method. The step size can be determined either
+    // exactly or inexactly.
+    //
+    // 2. The trust region approach approximates the objective
+    // function using using a model function (often a quadratic) over
+    // a subset of the search space known as the trust region. If the
+    // model function succeeds in minimizing the true objective
+    // function the trust region is expanded; conversely, otherwise it
+    // is contracted and the model optimization problem is solved
+    // again.
+    //
+    // Trust region methods are in some sense dual to line search methods:
+    // trust region methods first choose a step size (the size of the
+    // trust region) and then a step direction while line search methods
+    // first choose a step direction and then a step size.
+    MinimizerType minimizer_type;
+
+    LineSearchDirectionType line_search_direction_type;
+    LineSearchType line_search_type;
+    NonlinearConjugateGradientType nonlinear_conjugate_gradient_type;
+
+    // The LBFGS hessian approximation is a low rank approximation to
+    // the inverse of the Hessian matrix. The rank of the
+    // approximation determines (linearly) the space and time
+    // complexity of using the approximation. Higher the rank, the
+    // better is the quality of the approximation. The increase in
+    // quality is however is bounded for a number of reasons.
+    //
+    // 1. The method only uses secant information and not actual
+    // derivatives.
+    //
+    // 2. The Hessian approximation is constrained to be positive
+    // definite.
+    //
+    // So increasing this rank to a large number will cost time and
+    // space complexity without the corresponding increase in solution
+    // quality. There are no hard and fast rules for choosing the
+    // maximum rank. The best choice usually requires some problem
+    // specific experimentation.
+    //
+    // For more theoretical and implementation details of the LBFGS
+    // method, please see:
+    //
+    // Nocedal, J. (1980). "Updating Quasi-Newton Matrices with
+    // Limited Storage". Mathematics of Computation 35 (151): 773–782.
+    int max_lbfgs_rank;
+
+    // As part of the (L)BFGS update step (BFGS) / right-multiply step (L-BFGS),
+    // the initial inverse Hessian approximation is taken to be the Identity.
+    // However, Oren showed that using instead I * \gamma, where \gamma is
+    // chosen to approximate an eigenvalue of the true inverse Hessian can
+    // result in improved convergence in a wide variety of cases. Setting
+    // use_approximate_eigenvalue_bfgs_scaling to true enables this scaling.
+    //
+    // It is important to note that approximate eigenvalue scaling does not
+    // always improve convergence, and that it can in fact significantly degrade
+    // performance for certain classes of problem, which is why it is disabled
+    // by default.  In particular it can degrade performance when the
+    // sensitivity of the problem to different parameters varies significantly,
+    // as in this case a single scalar factor fails to capture this variation
+    // and detrimentally downscales parts of the jacobian approximation which
+    // correspond to low-sensitivity parameters. It can also reduce the
+    // robustness of the solution to errors in the jacobians.
+    //
+    // Oren S.S., Self-scaling variable metric (SSVM) algorithms
+    // Part II: Implementation and experiments, Management Science,
+    // 20(5), 863-874, 1974.
+    bool use_approximate_eigenvalue_bfgs_scaling;
+
+    // Degree of the polynomial used to approximate the objective
+    // function. Valid values are BISECTION, QUADRATIC and CUBIC.
+    //
+    // BISECTION corresponds to pure backtracking search with no
+    // interpolation.
+    LineSearchInterpolationType line_search_interpolation_type;
+
+    // If during the line search, the step_size falls below this
+    // value, it is truncated to zero.
+    double min_line_search_step_size;
+
+    // Line search parameters.
+
+    // Solving the line search problem exactly is computationally
+    // prohibitive. Fortunately, line search based optimization
+    // algorithms can still guarantee convergence if instead of an
+    // exact solution, the line search algorithm returns a solution
+    // which decreases the value of the objective function
+    // sufficiently. More precisely, we are looking for a step_size
+    // s.t.
+    //
+    //   f(step_size) <= f(0) + sufficient_decrease * f'(0) * step_size
+    //
+    double line_search_sufficient_function_decrease;
+
+    // In each iteration of the line search,
+    //
+    //  new_step_size >= max_line_search_step_contraction * step_size
+    //
+    // Note that by definition, for contraction:
+    //
+    //  0 < max_step_contraction < min_step_contraction < 1
+    //
+    double max_line_search_step_contraction;
+
+    // In each iteration of the line search,
+    //
+    //  new_step_size <= min_line_search_step_contraction * step_size
+    //
+    // Note that by definition, for contraction:
+    //
+    //  0 < max_step_contraction < min_step_contraction < 1
+    //
+    double min_line_search_step_contraction;
+
+    // Maximum number of trial step size iterations during each line search,
+    // if a step size satisfying the search conditions cannot be found within
+    // this number of trials, the line search will terminate.
+    int max_num_line_search_step_size_iterations;
+
+    // Maximum number of restarts of the line search direction algorithm before
+    // terminating the optimization. Restarts of the line search direction
+    // algorithm occur when the current algorithm fails to produce a new descent
+    // direction. This typically indicates a numerical failure, or a breakdown
+    // in the validity of the approximations used.
+    int max_num_line_search_direction_restarts;
+
+    // The strong Wolfe conditions consist of the Armijo sufficient
+    // decrease condition, and an additional requirement that the
+    // step-size be chosen s.t. the _magnitude_ ('strong' Wolfe
+    // conditions) of the gradient along the search direction
+    // decreases sufficiently. Precisely, this second condition
+    // is that we seek a step_size s.t.
+    //
+    //   |f'(step_size)| <= sufficient_curvature_decrease * |f'(0)|
+    //
+    // Where f() is the line search objective and f'() is the derivative
+    // of f w.r.t step_size (d f / d step_size).
+    double line_search_sufficient_curvature_decrease;
+
+    // During the bracketing phase of the Wolfe search, the step size is
+    // increased until either a point satisfying the Wolfe conditions is
+    // found, or an upper bound for a bracket containing a point satisfying
+    // the conditions is found.  Precisely, at each iteration of the
+    // expansion:
+    //
+    //   new_step_size <= max_step_expansion * step_size.
+    //
+    // By definition for expansion, max_step_expansion > 1.0.
+    double max_line_search_step_expansion;
+
     TrustRegionStrategyType trust_region_strategy_type;
 
     // Type of dogleg strategy to use.
@@ -181,11 +345,11 @@ class Solver {
     // the normal equations J'J is used to control the size of the
     // trust region. Extremely small and large values along the
     // diagonal can make this regularization scheme
-    // fail. lm_max_diagonal and lm_min_diagonal, clamp the values of
+    // fail. max_lm_diagonal and min_lm_diagonal, clamp the values of
     // diag(J'J) from above and below. In the normal course of
     // operation, the user should not have to modify these parameters.
-    double lm_min_diagonal;
-    double lm_max_diagonal;
+    double min_lm_diagonal;
+    double max_lm_diagonal;
 
     // Sometimes due to numerical conditioning problems or linear
     // solver flakiness, the trust region strategy may return a
@@ -302,19 +466,26 @@ class Solver {
     // deallocate the memory when destroyed.
     ParameterBlockOrdering* linear_solver_ordering;
 
-    // By virtue of the modeling layer in Ceres being block oriented,
-    // all the matrices used by Ceres are also block oriented. When
-    // doing sparse direct factorization of these matrices (for
-    // SPARSE_NORMAL_CHOLESKY, SPARSE_SCHUR and ITERATIVE in
-    // conjunction with CLUSTER_TRIDIAGONAL AND CLUSTER_JACOBI
-    // preconditioners), the fill-reducing ordering algorithms can
-    // either be run on the block or the scalar form of these matrices.
-    // Running it on the block form exposes more of the super-nodal
-    // structure of the matrix to the factorization routines. Setting
-    // this parameter to true runs the ordering algorithms in block
-    // form. Currently this option only makes sense with
-    // sparse_linear_algebra_library = SUITE_SPARSE.
-    bool use_block_amd;
+    // Sparse Cholesky factorization algorithms use a fill-reducing
+    // ordering to permute the columns of the Jacobian matrix. There
+    // are two ways of doing this.
+
+    // 1. Compute the Jacobian matrix in some order and then have the
+    //    factorization algorithm permute the columns of the Jacobian.
+
+    // 2. Compute the Jacobian with its columns already permuted.
+
+    // The first option incurs a significant memory penalty. The
+    // factorization algorithm has to make a copy of the permuted
+    // Jacobian matrix, thus Ceres pre-permutes the columns of the
+    // Jacobian matrix and generally speaking, there is no performance
+    // penalty for doing so.
+
+    // In some rare cases, it is worth using a more complicated
+    // reordering algorithm which has slightly better runtime
+    // performance at the expense of an extra copy of the Jacobian
+    // matrix. Setting use_postordering to true enables this tradeoff.
+    bool use_postordering;
 
     // Some non-linear least squares problems have additional
     // structure in the way the parameter blocks interact that it is
@@ -384,18 +555,30 @@ class Solver {
     //
     // 2. Specify a collection of of ordered independent sets. Where
     //    the lower numbered groups are optimized before the higher
-    //    number groups. Each group must be an independent set.
+    //    number groups. Each group must be an independent set. Not
+    //    all parameter blocks need to be present in the ordering.
     ParameterBlockOrdering* inner_iteration_ordering;
 
+    // Generally speaking, inner iterations make significant progress
+    // in the early stages of the solve and then their contribution
+    // drops down sharply, at which point the time spent doing inner
+    // iterations is not worth it.
+    //
+    // Once the relative decrease in the objective function due to
+    // inner iterations drops below inner_iteration_tolerance, the use
+    // of inner iterations in subsequent trust region minimizer
+    // iterations is disabled.
+    double inner_iteration_tolerance;
+
     // Minimum number of iterations for which the linear solver should
     // run, even if the convergence criterion is satisfied.
-    int linear_solver_min_num_iterations;
+    int min_linear_solver_iterations;
 
     // Maximum number of iterations for which the linear solver should
     // run. If the solver does not converge in less than
-    // linear_solver_max_num_iterations, then it returns
-    // MAX_ITERATIONS, as its termination type.
-    int linear_solver_max_num_iterations;
+    // max_linear_solver_iterations, then it returns MAX_ITERATIONS,
+    // as its termination type.
+    int max_linear_solver_iterations;
 
     // Forcing sequence parameter. The truncated Newton solver uses
     // this number to control the relative accuracy with which the
@@ -421,22 +604,17 @@ class Solver {
     // is sent to STDOUT.
     bool minimizer_progress_to_stdout;
 
-    bool return_initial_residuals;
-    bool return_initial_gradient;
-    bool return_initial_jacobian;
-
-    bool return_final_residuals;
-    bool return_final_gradient;
-    bool return_final_jacobian;
+    // List of iterations at which the minimizer should dump the trust
+    // region problem. Useful for testing and benchmarking. If empty
+    // (default), no problems are dumped.
+    vector<int> trust_region_minimizer_iterations_to_dump;
 
-    // List of iterations at which the optimizer should dump the
-    // linear least squares problem to disk. Useful for testing and
-    // benchmarking. If empty (default), no problems are dumped.
-    //
-    // This is ignored if protocol buffers are disabled.
-    vector<int> lsqp_iterations_to_dump;
-    string lsqp_dump_directory;
-    DumpFormatType lsqp_dump_format_type;
+    // Directory to which the problems should be written to. Should be
+    // non-empty if trust_region_minimizer_iterations_to_dump is
+    // non-empty and trust_region_problem_dump_format_type is not
+    // CONSOLE.
+    string trust_region_problem_dump_directory;
+    DumpFormatType trust_region_problem_dump_format_type;
 
     // Finite differences options ----------------------------------------------
 
@@ -518,6 +696,8 @@ class Solver {
     string FullReport() const;
 
     // Minimizer summary -------------------------------------------------
+    MinimizerType minimizer_type;
+
     SolverTerminationType termination_type;
 
     // If the solver did not run, or there was a failure, a
@@ -534,58 +714,13 @@ class Solver {
     // blocks that they depend on were fixed.
     double fixed_cost;
 
-    // Vectors of residuals before and after the optimization. The
-    // entries of these vectors are in the order in which
-    // ResidualBlocks were added to the Problem object.
-    //
-    // Whether the residual vectors are populated with values is
-    // controlled by Solver::Options::return_initial_residuals and
-    // Solver::Options::return_final_residuals respectively.
-    vector<double> initial_residuals;
-    vector<double> final_residuals;
-
-    // Gradient vectors, before and after the optimization.  The rows
-    // are in the same order in which the ParameterBlocks were added
-    // to the Problem object.
-    //
-    // NOTE: Since AddResidualBlock adds ParameterBlocks to the
-    // Problem automatically if they do not already exist, if you wish
-    // to have explicit control over the ordering of the vectors, then
-    // use Problem::AddParameterBlock to explicitly add the
-    // ParameterBlocks in the order desired.
-    //
-    // Whether the vectors are populated with values is controlled by
-    // Solver::Options::return_initial_gradient and
-    // Solver::Options::return_final_gradient respectively.
-    vector<double> initial_gradient;
-    vector<double> final_gradient;
-
-    // Jacobian matrices before and after the optimization. The rows
-    // of these matrices are in the same order in which the
-    // ResidualBlocks were added to the Problem object. The columns
-    // are in the same order in which the ParameterBlocks were added
-    // to the Problem object.
-    //
-    // NOTE: Since AddResidualBlock adds ParameterBlocks to the
-    // Problem automatically if they do not already exist, if you wish
-    // to have explicit control over the column ordering of the
-    // matrix, then use Problem::AddParameterBlock to explicitly add
-    // the ParameterBlocks in the order desired.
-    //
-    // The Jacobian matrices are stored as compressed row sparse
-    // matrices. Please see ceres/crs_matrix.h for more details of the
-    // format.
-    //
-    // Whether the Jacboan matrices are populated with values is
-    // controlled by Solver::Options::return_initial_jacobian and
-    // Solver::Options::return_final_jacobian respectively.
-    CRSMatrix initial_jacobian;
-    CRSMatrix final_jacobian;
-
     vector<IterationSummary> iterations;
 
     int num_successful_steps;
     int num_unsuccessful_steps;
+    int num_inner_iteration_steps;
+
+    // All times reported below are wall times.
 
     // When the user calls Solve, before the actual optimization
     // occurs, Ceres performs a number of preprocessing steps. These
@@ -604,14 +739,21 @@ class Solver {
     // Some total of all time spent inside Ceres when Solve is called.
     double total_time_in_seconds;
 
+    double linear_solver_time_in_seconds;
+    double residual_evaluation_time_in_seconds;
+    double jacobian_evaluation_time_in_seconds;
+    double inner_iteration_time_in_seconds;
+
     // Preprocessor summary.
     int num_parameter_blocks;
     int num_parameters;
+    int num_effective_parameters;
     int num_residual_blocks;
     int num_residuals;
 
     int num_parameter_blocks_reduced;
     int num_parameters_reduced;
+    int num_effective_parameters_reduced;
     int num_residual_blocks_reduced;
     int num_residuals_reduced;
 
@@ -627,11 +769,28 @@ class Solver {
     LinearSolverType linear_solver_type_given;
     LinearSolverType linear_solver_type_used;
 
+    vector<int> linear_solver_ordering_given;
+    vector<int> linear_solver_ordering_used;
+
+    bool inner_iterations_given;
+    bool inner_iterations_used;
+
+    vector<int> inner_iteration_ordering_given;
+    vector<int> inner_iteration_ordering_used;
+
     PreconditionerType preconditioner_type;
 
     TrustRegionStrategyType trust_region_strategy_type;
     DoglegType dogleg_type;
+
     SparseLinearAlgebraLibraryType sparse_linear_algebra_library;
+
+    LineSearchDirectionType line_search_direction_type;
+    LineSearchType line_search_type;
+    LineSearchInterpolationType line_search_interpolation_type;
+    NonlinearConjugateGradientType nonlinear_conjugate_gradient_type;
+
+    int max_lbfgs_rank;
   };
 
   // Once a least squares problem has been built, this function takes
diff --git a/include/ceres/types.h b/include/ceres/types.h
index 888e9b0..a967541 100644
--- a/include/ceres/types.h
+++ b/include/ceres/types.h
@@ -37,6 +37,8 @@
 #ifndef CERES_PUBLIC_TYPES_H_
 #define CERES_PUBLIC_TYPES_H_
 
+#include <string>
+
 #include "ceres/internal/port.h"
 
 namespace ceres {
@@ -101,8 +103,7 @@ enum PreconditionerType {
   JACOBI,
 
   // Block diagonal of the Schur complement. This preconditioner may
-  // only be used with the ITERATIVE_SCHUR solver. Requires
-  // SuiteSparse/CHOLMOD.
+  // only be used with the ITERATIVE_SCHUR solver.
   SCHUR_JACOBI,
 
   // Visibility clustering based preconditioners.
@@ -152,6 +153,98 @@ enum LoggingType {
   PER_MINIMIZER_ITERATION
 };
 
+enum MinimizerType {
+  LINE_SEARCH,
+  TRUST_REGION
+};
+
+enum LineSearchDirectionType {
+  // Negative of the gradient.
+  STEEPEST_DESCENT,
+
+  // A generalization of the Conjugate Gradient method to non-linear
+  // functions. The generalization can be performed in a number of
+  // different ways, resulting in a variety of search directions. The
+  // precise choice of the non-linear conjugate gradient algorithm
+  // used is determined by NonlinerConjuateGradientType.
+  NONLINEAR_CONJUGATE_GRADIENT,
+
+  // BFGS, and it's limited memory approximation L-BFGS, are quasi-Newton
+  // algorithms that approximate the Hessian matrix by iteratively refining
+  // an initial estimate with rank-one updates using the gradient at each
+  // iteration. They are a generalisation of the Secant method and satisfy
+  // the Secant equation.  The Secant equation has an infinium of solutions
+  // in multiple dimensions, as there are N*(N+1)/2 degrees of freedom in a
+  // symmetric matrix but only N conditions are specified by the Secant
+  // equation. The requirement that the Hessian approximation be positive
+  // definite imposes another N additional constraints, but that still leaves
+  // remaining degrees-of-freedom.  (L)BFGS methods uniquely deteremine the
+  // approximate Hessian by imposing the additional constraints that the
+  // approximation at the next iteration must be the 'closest' to the current
+  // approximation (the nature of how this proximity is measured is actually
+  // the defining difference between a family of quasi-Newton methods including
+  // (L)BFGS & DFP). (L)BFGS is currently regarded as being the best known
+  // general quasi-Newton method.
+  //
+  // The principal difference between BFGS and L-BFGS is that whilst BFGS
+  // maintains a full, dense approximation to the (inverse) Hessian, L-BFGS
+  // maintains only a window of the last M observations of the parameters and
+  // gradients. Using this observation history, the calculation of the next
+  // search direction can be computed without requiring the construction of the
+  // full dense inverse Hessian approximation. This is particularly important
+  // for problems with a large number of parameters, where storage of an N-by-N
+  // matrix in memory would be prohibitive.
+  //
+  // For more details on BFGS see:
+  //
+  // Broyden, C.G., "The Convergence of a Class of Double-rank Minimization
+  // Algorithms,"; J. Inst. Maths. Applics., Vol. 6, pp 76–90, 1970.
+  //
+  // Fletcher, R., "A New Approach to Variable Metric Algorithms,"
+  // Computer Journal, Vol. 13, pp 317–322, 1970.
+  //
+  // Goldfarb, D., "A Family of Variable Metric Updates Derived by Variational
+  // Means," Mathematics of Computing, Vol. 24, pp 23–26, 1970.
+  //
+  // Shanno, D.F., "Conditioning of Quasi-Newton Methods for Function
+  // Minimization," Mathematics of Computing, Vol. 24, pp 647–656, 1970.
+  //
+  // For more details on L-BFGS see:
+  //
+  // Nocedal, J. (1980). "Updating Quasi-Newton Matrices with Limited
+  // Storage". Mathematics of Computation 35 (151): 773–782.
+  //
+  // Byrd, R. H.; Nocedal, J.; Schnabel, R. B. (1994).
+  // "Representations of Quasi-Newton Matrices and their use in
+  // Limited Memory Methods". Mathematical Programming 63 (4):
+  // 129–156.
+  //
+  // A general reference for both methods:
+  //
+  // Nocedal J., Wright S., Numerical Optimization, 2nd Ed. Springer, 1999.
+  LBFGS,
+  BFGS,
+};
+
+// Nonliner conjugate gradient methods are a generalization of the
+// method of Conjugate Gradients for linear systems. The
+// generalization can be carried out in a number of different ways
+// leading to number of different rules for computing the search
+// direction. Ceres provides a number of different variants. For more
+// details see Numerical Optimization by Nocedal & Wright.
+enum NonlinearConjugateGradientType {
+  FLETCHER_REEVES,
+  POLAK_RIBIRERE,
+  HESTENES_STIEFEL,
+};
+
+enum LineSearchType {
+  // Backtracking line search with polynomial interpolation or
+  // bisection.
+  ARMIJO,
+  WOLFE,
+};
+
 // Ceres supports different strategies for computing the trust region
 // step.
 enum TrustRegionStrategyType {
@@ -262,13 +355,6 @@ enum DumpFormatType {
   CONSOLE,
 
   // Write out the linear least squares problem to the directory
-  // pointed to by Solver::Options::lsqp_dump_directory as a protocol
-  // buffer. linear_least_squares_problems.h/cc contains routines for
-  // loading these problems. For details on the on disk format used,
-  // see matrix.proto. The files are named lm_iteration_???.lsqp.
-  PROTOBUF,
-
-  // Write out the linear least squares problem to the directory
   // pointed to by Solver::Options::lsqp_dump_directory as text files
   // which can be read into MATLAB/Octave. The Jacobian is dumped as a
   // text file containing (i,j,s) triplets, the vectors D, x and f are
@@ -286,6 +372,23 @@ enum DimensionType {
   DYNAMIC = -1
 };
 
+enum NumericDiffMethod {
+  CENTRAL,
+  FORWARD
+};
+
+enum LineSearchInterpolationType {
+  BISECTION,
+  QUADRATIC,
+  CUBIC
+};
+
+enum CovarianceAlgorithmType {
+  DENSE_SVD,
+  SPARSE_CHOLESKY,
+  SPARSE_QR
+};
+
 const char* LinearSolverTypeToString(LinearSolverType type);
 bool StringToLinearSolverType(string value, LinearSolverType* type);
 
@@ -305,6 +408,34 @@ bool StringToTrustRegionStrategyType(string value,
 const char* DoglegTypeToString(DoglegType type);
 bool StringToDoglegType(string value, DoglegType* type);
 
+const char* MinimizerTypeToString(MinimizerType type);
+bool StringToMinimizerType(string value, MinimizerType* type);
+
+const char* LineSearchDirectionTypeToString(LineSearchDirectionType type);
+bool StringToLineSearchDirectionType(string value,
+                                     LineSearchDirectionType* type);
+
+const char* LineSearchTypeToString(LineSearchType type);
+bool StringToLineSearchType(string value, LineSearchType* type);
+
+const char* NonlinearConjugateGradientTypeToString(
+    NonlinearConjugateGradientType type);
+bool StringToNonlinearConjugateGradientType(
+    string value,
+    NonlinearConjugateGradientType* type);
+
+const char* LineSearchInterpolationTypeToString(
+    LineSearchInterpolationType type);
+bool StringToLineSearchInterpolationType(
+    string value,
+    LineSearchInterpolationType* type);
+
+const char* CovarianceAlgorithmTypeToString(
+    CovarianceAlgorithmType type);
+bool StringToCovarianceAlgorithmType(
+    string value,
+    CovarianceAlgorithmType* type);
+
 const char* LinearSolverTerminationTypeToString(
     LinearSolverTerminationType type);
author	Sascha Haeberling <haeberling@google.com>	2013-07-23 19:00:21 -0700
committer	Sascha Haeberling <haeberling@google.com>	2013-07-24 12:00:09 -0700
commit	1d2624a10e2c559f8ba9ef89eaa30832c0a83a96 (patch)
tree	f43667ef858dd0f377b15a58a9d5c9a126762c55 /include
parent	0ae28bd5885b5daa526898fcf7c323dc2c3e1963 (diff)
download	ceres-solver-1d2624a10e2c559f8ba9ef89eaa30832c0a83a96.tar.gz