internal/ceres/schur_eliminator.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339

// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2010, 2011, 2012 Google Inc. All rights reserved.
// http://code.google.com/p/ceres-solver/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
//   this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
//   this list of conditions and the following disclaimer in the documentation
//   and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
//   used to endorse or promote products derived from this software without
//   specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: sameeragarwal@google.com (Sameer Agarwal)

#ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_H_
#define CERES_INTERNAL_SCHUR_ELIMINATOR_H_

#include <map>
#include <vector>
#include "ceres/mutex.h"
#include "ceres/block_random_access_matrix.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
#include "ceres/linear_solver.h"
#include "ceres/internal/eigen.h"
#include "ceres/internal/scoped_ptr.h"

namespace ceres {
namespace internal {

// Classes implementing the SchurEliminatorBase interface implement
// variable elimination for linear least squares problems. Assuming
// that the input linear system Ax = b can be partitioned into
//
//  E y + F z = b
//
// Where x = [y;z] is a partition of the variables.  The paritioning
// of the variables is such that, E'E is a block diagonal matrix. Or
// in other words, the parameter blocks in E form an independent set
// of the of the graph implied by the block matrix A'A. Then, this
// class provides the functionality to compute the Schur complement
// system
//
//   S z = r
//
// where
//
//   S = F'F - F'E (E'E)^{-1} E'F and r = F'b - F'E(E'E)^(-1) E'b
//
// This is the Eliminate operation, i.e., construct the linear system
// obtained by eliminating the variables in E.
//
// The eliminator also provides the reverse functionality, i.e. given
// values for z it can back substitute for the values of y, by solving the
// linear system
//
//  Ey = b - F z
//
// which is done by observing that
//
//  y = (E'E)^(-1) [E'b - E'F z]
//
// The eliminator has a number of requirements.
//
// The rows of A are ordered so that for every variable block in y,
// all the rows containing that variable block occur as a vertically
// contiguous block. i.e the matrix A looks like
//
//              E                 F                   chunk
//  A = [ y1   0   0   0 |  z1    0    0   0    z5]     1
//      [ y1   0   0   0 |  z1   z2    0   0     0]     1
//      [  0  y2   0   0 |   0    0   z3   0     0]     2
//      [  0   0  y3   0 |  z1   z2   z3  z4    z5]     3
//      [  0   0  y3   0 |  z1    0    0   0    z5]     3
//      [  0   0   0  y4 |   0    0    0   0    z5]     4
//      [  0   0   0  y4 |   0   z2    0   0     0]     4
//      [  0   0   0  y4 |   0    0    0   0     0]     4
//      [  0   0   0   0 |  z1    0    0   0     0] non chunk blocks
//      [  0   0   0   0 |   0    0   z3  z4    z5] non chunk blocks
//
// This structure should be reflected in the corresponding
// CompressedRowBlockStructure object associated with A. The linear
// system Ax = b should either be well posed or the array D below
// should be non-null and the diagonal matrix corresponding to it
// should be non-singular. For simplicity of exposition only the case
// with a null D is described.
//
// The usual way to do the elimination is as follows. Starting with
//
//  E y + F z = b
//
// we can form the normal equations,
//
//  E'E y + E'F z = E'b
//  F'E y + F'F z = F'b
//
// multiplying both sides of the first equation by (E'E)^(-1) and then
// by F'E we get
//
//  F'E y + F'E (E'E)^(-1) E'F z =  F'E (E'E)^(-1) E'b
//  F'E y +                F'F z =  F'b
//
// now subtracting the two equations we get
//
// [FF' - F'E (E'E)^(-1) E'F] z = F'b - F'E(E'E)^(-1) E'b
//
// Instead of forming the normal equations and operating on them as
// general sparse matrices, the algorithm here deals with one
// parameter block in y at a time. The rows corresponding to a single
// parameter block yi are known as a chunk, and the algorithm operates
// on one chunk at a time. The mathematics remains the same since the
// reduced linear system can be shown to be the sum of the reduced
// linear systems for each chunk. This can be seen by observing two
// things.
//
//  1. E'E is a block diagonal matrix.
//
//  2. When E'F is computed, only the terms within a single chunk
//  interact, i.e for y1 column blocks when transposed and multiplied
//  with F, the only non-zero contribution comes from the blocks in
//  chunk1.
//
// Thus, the reduced linear system
//
//  FF' - F'E (E'E)^(-1) E'F
//
// can be re-written as
//
//  sum_k F_k F_k' - F_k'E_k (E_k'E_k)^(-1) E_k' F_k
//
// Where the sum is over chunks and E_k'E_k is dense matrix of size y1
// x y1.
//
// Advanced usage. Uptil now it has been assumed that the user would
// be interested in all of the Schur Complement S. However, it is also
// possible to use this eliminator to obtain an arbitrary submatrix of
// the full Schur complement. When the eliminator is generating the
// blocks of S, it asks the RandomAccessBlockMatrix instance passed to
// it if it has storage for that block. If it does, the eliminator
// computes/updates it, if not it is skipped. This is useful when one
// is interested in constructing a preconditioner based on the Schur
// Complement, e.g., computing the block diagonal of S so that it can
// be used as a preconditioner for an Iterative Substructuring based
// solver [See Agarwal et al, Bundle Adjustment in the Large, ECCV
// 2008 for an example of such use].
//
// Example usage: Please see schur_complement_solver.cc
class SchurEliminatorBase {
 public:
  virtual ~SchurEliminatorBase() {}

  // Initialize the eliminator. It is the user's responsibilty to call
  // this function before calling Eliminate or BackSubstitute. It is
  // also the caller's responsibilty to ensure that the
  // CompressedRowBlockStructure object passed to this method is the
  // same one (or is equivalent to) the one associated with the
  // BlockSparseMatrixBase objects below.
  virtual void Init(int num_eliminate_blocks,
                    const CompressedRowBlockStructure* bs) = 0;

  // Compute the Schur complement system from the augmented linear
  // least squares problem [A;D] x = [b;0]. The left hand side and the
  // right hand side of the reduced linear system are returned in lhs
  // and rhs respectively.
  //
  // It is the caller's responsibility to construct and initialize
  // lhs. Depending upon the structure of the lhs object passed here,
  // the full or a submatrix of the Schur complement will be computed.
  //
  // Since the Schur complement is a symmetric matrix, only the upper
  // triangular part of the Schur complement is computed.
  virtual void Eliminate(const BlockSparseMatrixBase* A,
                         const double* b,
                         const double* D,
                         BlockRandomAccessMatrix* lhs,
                         double* rhs) = 0;

  // Given values for the variables z in the F block of A, solve for
  // the optimal values of the variables y corresponding to the E
  // block in A.
  virtual void BackSubstitute(const BlockSparseMatrixBase* A,
                              const double* b,
                              const double* D,
                              const double* z,
                              double* y) = 0;
  // Factory
  static SchurEliminatorBase* Create(const LinearSolver::Options& options);
};

// Templated implementation of the SchurEliminatorBase interface. The
// templating is on the sizes of the row, e and f blocks sizes in the
// input matrix. In many problems, the sizes of one or more of these
// blocks are constant, in that case, its worth passing these
// parameters as template arguments so that they are visible to the
// compiler and can be used for compile time optimization of the low
// level linear algebra routines.
//
// This implementation is mulithreaded using OpenMP. The level of
// parallelism is controlled by LinearSolver::Options::num_threads.
template <int kRowBlockSize = Dynamic,
          int kEBlockSize = Dynamic,
          int kFBlockSize = Dynamic >
class SchurEliminator : public SchurEliminatorBase {
 public:
  explicit SchurEliminator(const LinearSolver::Options& options)
      : num_threads_(options.num_threads) {
  }

  // SchurEliminatorBase Interface
  virtual ~SchurEliminator();
  virtual void Init(int num_eliminate_blocks,
                    const CompressedRowBlockStructure* bs);
  virtual void Eliminate(const BlockSparseMatrixBase* A,
                         const double* b,
                         const double* D,
                         BlockRandomAccessMatrix* lhs,
                         double* rhs);
  virtual void BackSubstitute(const BlockSparseMatrixBase* A,
                              const double* b,
                              const double* D,
                              const double* z,
                              double* y);

 private:
  // Chunk objects store combinatorial information needed to
  // efficiently eliminate a whole chunk out of the least squares
  // problem. Consider the first chunk in the example matrix above.
  //
  //      [ y1   0   0   0 |  z1    0    0   0    z5]
  //      [ y1   0   0   0 |  z1   z2    0   0     0]
  //
  // One of the intermediate quantities that needs to be calculated is
  // for each row the product of the y block transposed with the
  // non-zero z block, and the sum of these blocks across rows. A
  // temporary array "buffer_" is used for computing and storing them
  // and the buffer_layout maps the indices of the z-blocks to
  // position in the buffer_ array.  The size of the chunk is the
  // number of row blocks/residual blocks for the particular y block
  // being considered.
  //
  // For the example chunk shown above,
  //
  // size = 2
  //
  // The entries of buffer_layout will be filled in the following order.
  //
  // buffer_layout[z1] = 0
  // buffer_layout[z5] = y1 * z1
  // buffer_layout[z2] = y1 * z1 + y1 * z5
  typedef map<int, int> BufferLayoutType;
  struct Chunk {
    Chunk() : size(0) {}
    int size;
    int start;
    BufferLayoutType buffer_layout;
  };

  void ChunkDiagonalBlockAndGradient(
      const Chunk& chunk,
      const BlockSparseMatrixBase* A,
      const double* b,
      int row_block_counter,
      typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* eet,
      typename EigenTypes<kEBlockSize>::Vector* g,
      double* buffer,
      BlockRandomAccessMatrix* lhs);

  void UpdateRhs(const Chunk& chunk,
                 const BlockSparseMatrixBase* A,
                 const double* b,
                 int row_block_counter,
                 const Vector& inverse_ete_g,
                 double* rhs);

  void ChunkOuterProduct(const CompressedRowBlockStructure* bs,
                         const Matrix& inverse_eet,
                         const double* buffer,
                         const BufferLayoutType& buffer_layout,
                         BlockRandomAccessMatrix* lhs);
  void EBlockRowOuterProduct(const BlockSparseMatrixBase* A,
                             int row_block_index,
                             BlockRandomAccessMatrix* lhs);


  void NoEBlockRowsUpdate(const BlockSparseMatrixBase* A,
                             const double* b,
                             int row_block_counter,
                             BlockRandomAccessMatrix* lhs,
                             double* rhs);

  void NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A,
                               int row_block_index,
                               BlockRandomAccessMatrix* lhs);

  int num_eliminate_blocks_;

  // Block layout of the columns of the reduced linear system. Since
  // the f blocks can be of varying size, this vector stores the
  // position of each f block in the row/col of the reduced linear
  // system. Thus lhs_row_layout_[i] is the row/col position of the
  // i^th f block.
  vector<int> lhs_row_layout_;

  // Combinatorial structure of the chunks in A. For more information
  // see the documentation of the Chunk object above.
  vector<Chunk> chunks_;

  // Buffer to store the products of the y and z blocks generated
  // during the elimination phase.
  scoped_array<double> buffer_;
  int buffer_size_;
  int num_threads_;
  int uneliminated_row_begins_;

  // Locks for the blocks in the right hand side of the reduced linear
  // system.
  vector<Mutex*> rhs_locks_;
};

}  // namespace internal
}  // namespace ceres

#endif  // CERES_INTERNAL_SCHUR_ELIMINATOR_H_