diff options
Diffstat (limited to 'include/ceres/loss_function.h')
-rw-r--r-- | include/ceres/loss_function.h | 397 |
1 files changed, 397 insertions, 0 deletions
diff --git a/include/ceres/loss_function.h b/include/ceres/loss_function.h new file mode 100644 index 0000000..0c0ceaa --- /dev/null +++ b/include/ceres/loss_function.h @@ -0,0 +1,397 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2010, 2011, 2012 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) +// +// The LossFunction interface is the way users describe how residuals +// are converted to cost terms for the overall problem cost function. +// For the exact manner in which loss functions are converted to the +// overall cost for a problem, see problem.h. +// +// For least squares problem where there are no outliers and standard +// squared loss is expected, it is not necessary to create a loss +// function; instead passing a NULL to the problem when adding +// residuals implies a standard squared loss. +// +// For least squares problems where the minimization may encounter +// input terms that contain outliers, that is, completely bogus +// measurements, it is important to use a loss function that reduces +// their associated penalty. +// +// Consider a structure from motion problem. The unknowns are 3D +// points and camera parameters, and the measurements are image +// coordinates describing the expected reprojected position for a +// point in a camera. For example, we want to model the geometry of a +// street scene with fire hydrants and cars, observed by a moving +// camera with unknown parameters, and the only 3D points we care +// about are the pointy tippy-tops of the fire hydrants. Our magic +// image processing algorithm, which is responsible for producing the +// measurements that are input to Ceres, has found and matched all +// such tippy-tops in all image frames, except that in one of the +// frame it mistook a car's headlight for a hydrant. If we didn't do +// anything special (i.e. if we used a basic quadratic loss), the +// residual for the erroneous measurement will result in extreme error +// due to the quadratic nature of squared loss. This results in the +// entire solution getting pulled away from the optimimum to reduce +// the large error that would otherwise be attributed to the wrong +// measurement. +// +// Using a robust loss function, the cost for large residuals is +// reduced. In the example above, this leads to outlier terms getting +// downweighted so they do not overly influence the final solution. +// +// What cost function is best? +// +// In general, there isn't a principled way to select a robust loss +// function. The authors suggest starting with a non-robust cost, then +// only experimenting with robust loss functions if standard squared +// loss doesn't work. + +#ifndef CERES_PUBLIC_LOSS_FUNCTION_H_ +#define CERES_PUBLIC_LOSS_FUNCTION_H_ + +#include <glog/logging.h> +#include "ceres/internal/macros.h" +#include "ceres/internal/scoped_ptr.h" +#include "ceres/types.h" + +namespace ceres { + +class LossFunction { + public: + virtual ~LossFunction() {} + + // For a residual vector with squared 2-norm 'sq_norm', this method + // is required to fill in the value and derivatives of the loss + // function (rho in this example): + // + // out[0] = rho(sq_norm), + // out[1] = rho'(sq_norm), + // out[2] = rho''(sq_norm), + // + // Here the convention is that the contribution of a term to the + // cost function is given by 1/2 rho(s), where + // + // s = ||residuals||^2. + // + // Calling the method with a negative value of 's' is an error and + // the implementations are not required to handle that case. + // + // Most sane choices of rho() satisfy: + // + // rho(0) = 0, + // rho'(0) = 1, + // rho'(s) < 1 in outlier region, + // rho''(s) < 0 in outlier region, + // + // so that they mimic the least squares cost for small residuals. + virtual void Evaluate(double sq_norm, double out[3]) const = 0; +}; + +// Some common implementations follow below. +// +// Note: in the region of interest (i.e. s < 3) we have: +// TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss + + +// This corresponds to no robustification. +// +// rho(s) = s +// +// At s = 0: rho = [0, 1, 0]. +// +// It is not normally necessary to use this, as passing NULL for the +// loss function when building the problem accomplishes the same +// thing. +class TrivialLoss : public LossFunction { + public: + virtual void Evaluate(double, double*) const; +}; + +// Scaling +// ------- +// Given one robustifier +// s -> rho(s) +// one can change the length scale at which robustification takes +// place, by adding a scale factor 'a' as follows: +// +// s -> a^2 rho(s / a^2). +// +// The first and second derivatives are: +// +// s -> rho'(s / a^2), +// s -> (1 / a^2) rho''(s / a^2), +// +// but the behaviour near s = 0 is the same as the original function, +// i.e. +// +// rho(s) = s + higher order terms, +// a^2 rho(s / a^2) = s + higher order terms. +// +// The scalar 'a' should be positive. +// +// The reason for the appearance of squaring is that 'a' is in the +// units of the residual vector norm whereas 's' is a squared +// norm. For applications it is more convenient to specify 'a' than +// its square. The commonly used robustifiers below are described in +// un-scaled format (a = 1) but their implementations work for any +// non-zero value of 'a'. + +// Huber. +// +// rho(s) = s for s <= 1, +// rho(s) = 2 sqrt(s) - 1 for s >= 1. +// +// At s = 0: rho = [0, 1, 0]. +// +// The scaling parameter 'a' corresponds to 'delta' on this page: +// http://en.wikipedia.org/wiki/Huber_Loss_Function +class HuberLoss : public LossFunction { + public: + explicit HuberLoss(double a) : a_(a), b_(a * a) { } + virtual void Evaluate(double, double*) const; + + private: + const double a_; + // b = a^2. + const double b_; +}; + +// Soft L1, similar to Huber but smooth. +// +// rho(s) = 2 (sqrt(1 + s) - 1). +// +// At s = 0: rho = [0, 1, -1/2]. +class SoftLOneLoss : public LossFunction { + public: + explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { } + virtual void Evaluate(double, double*) const; + + private: + // b = a^2. + const double b_; + // c = 1 / a^2. + const double c_; +}; + +// Inspired by the Cauchy distribution +// +// rho(s) = log(1 + s). +// +// At s = 0: rho = [0, 1, -1]. +class CauchyLoss : public LossFunction { + public: + explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { } + virtual void Evaluate(double, double*) const; + + private: + // b = a^2. + const double b_; + // c = 1 / a^2. + const double c_; +}; + +// Loss that is capped beyond a certain level using the arc-tangent function. +// The scaling parameter 'a' determines the level where falloff occurs. +// For costs much smaller than 'a', the loss function is linear and behaves like +// TrivialLoss, and for values much larger than 'a' the value asymptotically +// approaches the constant value of a * PI / 2. +// +// rho(s) = a atan(s / a). +// +// At s = 0: rho = [0, 1, 0]. +class ArctanLoss : public LossFunction { + public: + explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) { } + virtual void Evaluate(double, double*) const; + + private: + const double a_; + // b = 1 / a^2. + const double b_; +}; + +// Loss function that maps to approximately zero cost in a range around the +// origin, and reverts to linear in error (quadratic in cost) beyond this range. +// The tolerance parameter 'a' sets the nominal point at which the +// transition occurs, and the transition size parameter 'b' sets the nominal +// distance over which most of the transition occurs. Both a and b must be +// greater than zero, and typically b will be set to a fraction of a. +// The slope rho'[s] varies smoothly from about 0 at s <= a - b to +// about 1 at s >= a + b. +// +// The term is computed as: +// +// rho(s) = b log(1 + exp((s - a) / b)) - c0. +// +// where c0 is chosen so that rho(0) == 0 +// +// c0 = b log(1 + exp(-a / b) +// +// This has the following useful properties: +// +// rho(s) == 0 for s = 0 +// rho'(s) ~= 0 for s << a - b +// rho'(s) ~= 1 for s >> a + b +// rho''(s) > 0 for all s +// +// In addition, all derivatives are continuous, and the curvature is +// concentrated in the range a - b to a + b. +// +// At s = 0: rho = [0, ~0, ~0]. +class TolerantLoss : public LossFunction { + public: + explicit TolerantLoss(double a, double b); + virtual void Evaluate(double, double*) const; + + private: + const double a_, b_, c_; +}; + +// Composition of two loss functions. The error is the result of first +// evaluating g followed by f to yield the composition f(g(s)). +// The loss functions must not be NULL. +class ComposedLoss : public LossFunction { + public: + explicit ComposedLoss(const LossFunction* f, Ownership ownership_f, + const LossFunction* g, Ownership ownership_g); + virtual ~ComposedLoss(); + virtual void Evaluate(double, double*) const; + + private: + internal::scoped_ptr<const LossFunction> f_, g_; + const Ownership ownership_f_, ownership_g_; +}; + +// The discussion above has to do with length scaling: it affects the space +// in which s is measured. Sometimes you want to simply scale the output +// value of the robustifier. For example, you might want to weight +// different error terms differently (e.g., weight pixel reprojection +// errors differently from terrain errors). +// +// If rho is the wrapped robustifier, then this simply outputs +// s -> a * rho(s) +// +// The first and second derivatives are, not surprisingly +// s -> a * rho'(s) +// s -> a * rho''(s) +// +// Since we treat the a NULL Loss function as the Identity loss +// function, rho = NULL is a valid input and will result in the input +// being scaled by a. This provides a simple way of implementing a +// scaled ResidualBlock. +class ScaledLoss : public LossFunction { + public: + // Constructs a ScaledLoss wrapping another loss function. Takes + // ownership of the wrapped loss function or not depending on the + // ownership parameter. + ScaledLoss(const LossFunction* rho, double a, Ownership ownership) : + rho_(rho), a_(a), ownership_(ownership) { } + + virtual ~ScaledLoss() { + if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { + rho_.release(); + } + } + virtual void Evaluate(double, double*) const; + + private: + internal::scoped_ptr<const LossFunction> rho_; + const double a_; + const Ownership ownership_; + CERES_DISALLOW_COPY_AND_ASSIGN(ScaledLoss); +}; + +// Sometimes after the optimization problem has been constructed, we +// wish to mutate the scale of the loss function. For example, when +// performing estimation from data which has substantial outliers, +// convergence can be improved by starting out with a large scale, +// optimizing the problem and then reducing the scale. This can have +// better convergence behaviour than just using a loss function with a +// small scale. +// +// This templated class allows the user to implement a loss function +// whose scale can be mutated after an optimization problem has been +// constructed. +// +// Example usage +// +// Problem problem; +// +// // Add parameter blocks +// +// CostFunction* cost_function = +// new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>( +// new UW_Camera_Mapper(data->observations[2*i + 0], +// data->observations[2*i + 1])); +// +// LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP); +// +// problem.AddResidualBlock(cost_function, loss_function, parameters); +// +// Solver::Options options; +// scoped_ptr<Solver::Summary> summary1(Solve(problem, options)); +// +// loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP); +// +// scoped_ptr<Solver::Summary> summary2(Solve(problem, options)); +// +class LossFunctionWrapper : public LossFunction { + public: + LossFunctionWrapper(LossFunction* rho, Ownership ownership) + : rho_(rho), ownership_(ownership) { + } + + virtual ~LossFunctionWrapper() { + if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { + rho_.release(); + } + } + + virtual void Evaluate(double sq_norm, double out[3]) const { + CHECK_NOTNULL(rho_.get()); + rho_->Evaluate(sq_norm, out); + } + + void Reset(LossFunction* rho, Ownership ownership) { + if (ownership_ == DO_NOT_TAKE_OWNERSHIP) { + rho_.release(); + } + rho_.reset(rho); + ownership_ = ownership; + } + + private: + internal::scoped_ptr<const LossFunction> rho_; + Ownership ownership_; + CERES_DISALLOW_COPY_AND_ASSIGN(LossFunctionWrapper); +}; + +} // namespace ceres + +#endif // CERES_PUBLIC_LOSS_FUNCTION_H_ |