aboutsummaryrefslogtreecommitdiff
path: root/include/ceres/loss_function.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/ceres/loss_function.h')
-rw-r--r--include/ceres/loss_function.h397
1 files changed, 397 insertions, 0 deletions
diff --git a/include/ceres/loss_function.h b/include/ceres/loss_function.h
new file mode 100644
index 0000000..0c0ceaa
--- /dev/null
+++ b/include/ceres/loss_function.h
@@ -0,0 +1,397 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+//
+// The LossFunction interface is the way users describe how residuals
+// are converted to cost terms for the overall problem cost function.
+// For the exact manner in which loss functions are converted to the
+// overall cost for a problem, see problem.h.
+//
+// For least squares problem where there are no outliers and standard
+// squared loss is expected, it is not necessary to create a loss
+// function; instead passing a NULL to the problem when adding
+// residuals implies a standard squared loss.
+//
+// For least squares problems where the minimization may encounter
+// input terms that contain outliers, that is, completely bogus
+// measurements, it is important to use a loss function that reduces
+// their associated penalty.
+//
+// Consider a structure from motion problem. The unknowns are 3D
+// points and camera parameters, and the measurements are image
+// coordinates describing the expected reprojected position for a
+// point in a camera. For example, we want to model the geometry of a
+// street scene with fire hydrants and cars, observed by a moving
+// camera with unknown parameters, and the only 3D points we care
+// about are the pointy tippy-tops of the fire hydrants. Our magic
+// image processing algorithm, which is responsible for producing the
+// measurements that are input to Ceres, has found and matched all
+// such tippy-tops in all image frames, except that in one of the
+// frame it mistook a car's headlight for a hydrant. If we didn't do
+// anything special (i.e. if we used a basic quadratic loss), the
+// residual for the erroneous measurement will result in extreme error
+// due to the quadratic nature of squared loss. This results in the
+// entire solution getting pulled away from the optimimum to reduce
+// the large error that would otherwise be attributed to the wrong
+// measurement.
+//
+// Using a robust loss function, the cost for large residuals is
+// reduced. In the example above, this leads to outlier terms getting
+// downweighted so they do not overly influence the final solution.
+//
+// What cost function is best?
+//
+// In general, there isn't a principled way to select a robust loss
+// function. The authors suggest starting with a non-robust cost, then
+// only experimenting with robust loss functions if standard squared
+// loss doesn't work.
+
+#ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
+#define CERES_PUBLIC_LOSS_FUNCTION_H_
+
+#include <glog/logging.h>
+#include "ceres/internal/macros.h"
+#include "ceres/internal/scoped_ptr.h"
+#include "ceres/types.h"
+
+namespace ceres {
+
+class LossFunction {
+ public:
+ virtual ~LossFunction() {}
+
+ // For a residual vector with squared 2-norm 'sq_norm', this method
+ // is required to fill in the value and derivatives of the loss
+ // function (rho in this example):
+ //
+ // out[0] = rho(sq_norm),
+ // out[1] = rho'(sq_norm),
+ // out[2] = rho''(sq_norm),
+ //
+ // Here the convention is that the contribution of a term to the
+ // cost function is given by 1/2 rho(s), where
+ //
+ // s = ||residuals||^2.
+ //
+ // Calling the method with a negative value of 's' is an error and
+ // the implementations are not required to handle that case.
+ //
+ // Most sane choices of rho() satisfy:
+ //
+ // rho(0) = 0,
+ // rho'(0) = 1,
+ // rho'(s) < 1 in outlier region,
+ // rho''(s) < 0 in outlier region,
+ //
+ // so that they mimic the least squares cost for small residuals.
+ virtual void Evaluate(double sq_norm, double out[3]) const = 0;
+};
+
+// Some common implementations follow below.
+//
+// Note: in the region of interest (i.e. s < 3) we have:
+// TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
+
+
+// This corresponds to no robustification.
+//
+// rho(s) = s
+//
+// At s = 0: rho = [0, 1, 0].
+//
+// It is not normally necessary to use this, as passing NULL for the
+// loss function when building the problem accomplishes the same
+// thing.
+class TrivialLoss : public LossFunction {
+ public:
+ virtual void Evaluate(double, double*) const;
+};
+
+// Scaling
+// -------
+// Given one robustifier
+// s -> rho(s)
+// one can change the length scale at which robustification takes
+// place, by adding a scale factor 'a' as follows:
+//
+// s -> a^2 rho(s / a^2).
+//
+// The first and second derivatives are:
+//
+// s -> rho'(s / a^2),
+// s -> (1 / a^2) rho''(s / a^2),
+//
+// but the behaviour near s = 0 is the same as the original function,
+// i.e.
+//
+// rho(s) = s + higher order terms,
+// a^2 rho(s / a^2) = s + higher order terms.
+//
+// The scalar 'a' should be positive.
+//
+// The reason for the appearance of squaring is that 'a' is in the
+// units of the residual vector norm whereas 's' is a squared
+// norm. For applications it is more convenient to specify 'a' than
+// its square. The commonly used robustifiers below are described in
+// un-scaled format (a = 1) but their implementations work for any
+// non-zero value of 'a'.
+
+// Huber.
+//
+// rho(s) = s for s <= 1,
+// rho(s) = 2 sqrt(s) - 1 for s >= 1.
+//
+// At s = 0: rho = [0, 1, 0].
+//
+// The scaling parameter 'a' corresponds to 'delta' on this page:
+// http://en.wikipedia.org/wiki/Huber_Loss_Function
+class HuberLoss : public LossFunction {
+ public:
+ explicit HuberLoss(double a) : a_(a), b_(a * a) { }
+ virtual void Evaluate(double, double*) const;
+
+ private:
+ const double a_;
+ // b = a^2.
+ const double b_;
+};
+
+// Soft L1, similar to Huber but smooth.
+//
+// rho(s) = 2 (sqrt(1 + s) - 1).
+//
+// At s = 0: rho = [0, 1, -1/2].
+class SoftLOneLoss : public LossFunction {
+ public:
+ explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { }
+ virtual void Evaluate(double, double*) const;
+
+ private:
+ // b = a^2.
+ const double b_;
+ // c = 1 / a^2.
+ const double c_;
+};
+
+// Inspired by the Cauchy distribution
+//
+// rho(s) = log(1 + s).
+//
+// At s = 0: rho = [0, 1, -1].
+class CauchyLoss : public LossFunction {
+ public:
+ explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { }
+ virtual void Evaluate(double, double*) const;
+
+ private:
+ // b = a^2.
+ const double b_;
+ // c = 1 / a^2.
+ const double c_;
+};
+
+// Loss that is capped beyond a certain level using the arc-tangent function.
+// The scaling parameter 'a' determines the level where falloff occurs.
+// For costs much smaller than 'a', the loss function is linear and behaves like
+// TrivialLoss, and for values much larger than 'a' the value asymptotically
+// approaches the constant value of a * PI / 2.
+//
+// rho(s) = a atan(s / a).
+//
+// At s = 0: rho = [0, 1, 0].
+class ArctanLoss : public LossFunction {
+ public:
+ explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) { }
+ virtual void Evaluate(double, double*) const;
+
+ private:
+ const double a_;
+ // b = 1 / a^2.
+ const double b_;
+};
+
+// Loss function that maps to approximately zero cost in a range around the
+// origin, and reverts to linear in error (quadratic in cost) beyond this range.
+// The tolerance parameter 'a' sets the nominal point at which the
+// transition occurs, and the transition size parameter 'b' sets the nominal
+// distance over which most of the transition occurs. Both a and b must be
+// greater than zero, and typically b will be set to a fraction of a.
+// The slope rho'[s] varies smoothly from about 0 at s <= a - b to
+// about 1 at s >= a + b.
+//
+// The term is computed as:
+//
+// rho(s) = b log(1 + exp((s - a) / b)) - c0.
+//
+// where c0 is chosen so that rho(0) == 0
+//
+// c0 = b log(1 + exp(-a / b)
+//
+// This has the following useful properties:
+//
+// rho(s) == 0 for s = 0
+// rho'(s) ~= 0 for s << a - b
+// rho'(s) ~= 1 for s >> a + b
+// rho''(s) > 0 for all s
+//
+// In addition, all derivatives are continuous, and the curvature is
+// concentrated in the range a - b to a + b.
+//
+// At s = 0: rho = [0, ~0, ~0].
+class TolerantLoss : public LossFunction {
+ public:
+ explicit TolerantLoss(double a, double b);
+ virtual void Evaluate(double, double*) const;
+
+ private:
+ const double a_, b_, c_;
+};
+
+// Composition of two loss functions. The error is the result of first
+// evaluating g followed by f to yield the composition f(g(s)).
+// The loss functions must not be NULL.
+class ComposedLoss : public LossFunction {
+ public:
+ explicit ComposedLoss(const LossFunction* f, Ownership ownership_f,
+ const LossFunction* g, Ownership ownership_g);
+ virtual ~ComposedLoss();
+ virtual void Evaluate(double, double*) const;
+
+ private:
+ internal::scoped_ptr<const LossFunction> f_, g_;
+ const Ownership ownership_f_, ownership_g_;
+};
+
+// The discussion above has to do with length scaling: it affects the space
+// in which s is measured. Sometimes you want to simply scale the output
+// value of the robustifier. For example, you might want to weight
+// different error terms differently (e.g., weight pixel reprojection
+// errors differently from terrain errors).
+//
+// If rho is the wrapped robustifier, then this simply outputs
+// s -> a * rho(s)
+//
+// The first and second derivatives are, not surprisingly
+// s -> a * rho'(s)
+// s -> a * rho''(s)
+//
+// Since we treat the a NULL Loss function as the Identity loss
+// function, rho = NULL is a valid input and will result in the input
+// being scaled by a. This provides a simple way of implementing a
+// scaled ResidualBlock.
+class ScaledLoss : public LossFunction {
+ public:
+ // Constructs a ScaledLoss wrapping another loss function. Takes
+ // ownership of the wrapped loss function or not depending on the
+ // ownership parameter.
+ ScaledLoss(const LossFunction* rho, double a, Ownership ownership) :
+ rho_(rho), a_(a), ownership_(ownership) { }
+
+ virtual ~ScaledLoss() {
+ if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
+ rho_.release();
+ }
+ }
+ virtual void Evaluate(double, double*) const;
+
+ private:
+ internal::scoped_ptr<const LossFunction> rho_;
+ const double a_;
+ const Ownership ownership_;
+ CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss);
+};
+
+// Sometimes after the optimization problem has been constructed, we
+// wish to mutate the scale of the loss function. For example, when
+// performing estimation from data which has substantial outliers,
+// convergence can be improved by starting out with a large scale,
+// optimizing the problem and then reducing the scale. This can have
+// better convergence behaviour than just using a loss function with a
+// small scale.
+//
+// This templated class allows the user to implement a loss function
+// whose scale can be mutated after an optimization problem has been
+// constructed.
+//
+// Example usage
+//
+// Problem problem;
+//
+// // Add parameter blocks
+//
+// CostFunction* cost_function =
+// new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
+// new UW_Camera_Mapper(data->observations[2*i + 0],
+// data->observations[2*i + 1]));
+//
+// LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
+//
+// problem.AddResidualBlock(cost_function, loss_function, parameters);
+//
+// Solver::Options options;
+// scoped_ptr<Solver::Summary> summary1(Solve(problem, options));
+//
+// loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
+//
+// scoped_ptr<Solver::Summary> summary2(Solve(problem, options));
+//
+class LossFunctionWrapper : public LossFunction {
+ public:
+ LossFunctionWrapper(LossFunction* rho, Ownership ownership)
+ : rho_(rho), ownership_(ownership) {
+ }
+
+ virtual ~LossFunctionWrapper() {
+ if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
+ rho_.release();
+ }
+ }
+
+ virtual void Evaluate(double sq_norm, double out[3]) const {
+ CHECK_NOTNULL(rho_.get());
+ rho_->Evaluate(sq_norm, out);
+ }
+
+ void Reset(LossFunction* rho, Ownership ownership) {
+ if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
+ rho_.release();
+ }
+ rho_.reset(rho);
+ ownership_ = ownership;
+ }
+
+ private:
+ internal::scoped_ptr<const LossFunction> rho_;
+ Ownership ownership_;
+ CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper);
+};
+
+} // namespace ceres
+
+#endif // CERES_PUBLIC_LOSS_FUNCTION_H_