bordeaux/learning/stochastic_linear_ranker/native/stochastic_linear_ranker.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Stochastic Linear Ranking algorithms.
// This class will implement a set of incremental algorithms for ranking tasks
// They support both L1 and L2 regularizations.


#ifndef LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_
#define LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_

#include <cmath>
#include <hash_map>
#include <string>

#include <sys/types.h>
#include "cutils/log.h"
#include "common_defs.h"
#include "learning_rate_controller-inl.h"
#include "sparse_weight_vector.h"

namespace learning_stochastic_linear {

// NOTE: This Stochastic Linear Ranker supports only the following update types:
// SL: Stochastic Linear
// CS: Constraint Satisfaction
template<class Key = std::string, class Hash = std::hash_map<std::string, double> >
class StochasticLinearRanker {
 public:
  // initialize lambda_ and constraint to a meaningful default. Will give
  // equal weight to the error and regularizer.
  StochasticLinearRanker() {
    iteration_num_ = 0;
    lambda_ = 1.0;
    learning_rate_controller_.SetLambda(lambda_);
    mini_batch_size_ = 1;
    learning_rate_controller_.SetMiniBatchSize(mini_batch_size_);
    learning_rate_controller_.SetAdaptationMode(INV_LINEAR);
    update_type_ = SL;
    kernel_type_ = LINEAR;
    kernel_param_ = 1.0;
    kernel_gain_ = 1.0;
    kernel_bias_ = 0.0;
    rank_loss_type_ = PAIRWISE;
    acceptence_probability_ = 0.1;
    mini_batch_counter_ = 0;
    gradient_l0_norm_ = -1;
    norm_constraint_ = 1.0;
  }

  ~StochasticLinearRanker() {}
  // Getters and setters
  double GetIterationNumber() const {
    return iteration_num_;
  }
  double GetNormContraint() const {
    return norm_constraint_;
  }
  RegularizationType GetRegularizationType() const {
    return regularization_type_;
  }
  double GetLambda() const {
    return lambda_;
  }
  uint64 GetMiniBatchSize() const {
    return mini_batch_size_;
  }
  int32 GetGradientL0Norm() const {
    return gradient_l0_norm_;
  }
  UpdateType GetUpdateType() const {
    return update_type_;
  }
  AdaptationMode GetAdaptationMode() const {
    return adaptation_mode_;
  }
  // This function additionally returns the basic kernel parameter. In case of
  // polynomial kernel, it implies the degree of the polynomial.  In case of
  // RBF kernel, it implies the sigma parameter. In case of linear kernel,
  // it is not used.
  // It also returns the kernel gain and bias.
  KernelType GetKernelType(double *param, double *gain, double *bias) const {
    *param = kernel_param_;
    *gain = kernel_gain_;
    *bias = kernel_bias_;
    return kernel_type_;
  }
  RankLossType GetRankLossType() const {
    return rank_loss_type_;
  }
  double GetAcceptanceProbability() const {
    return acceptence_probability_;
  }
  void SetIterationNumber(uint64 num) {
    iteration_num_=num;
  }
  void SetNormConstraint(const double norm) {
    norm_constraint_ = norm;
  }
  void SetRegularizationType(const RegularizationType r) {
    regularization_type_ = r;
  }
  void SetLambda(double l) {
    lambda_ = l;
    learning_rate_controller_.SetLambda(l);
  }
  void SetMiniBatchSize(const uint64 msize) {
    mini_batch_size_ = msize;
    learning_rate_controller_.SetMiniBatchSize(msize);
  }
  void SetAdaptationMode(AdaptationMode m) {
    adaptation_mode_ = m;
    learning_rate_controller_.SetAdaptationMode(m);
  }
  // This function additionally sets the basic kernel parameter. In case of
  // polynomial kernel, it implies the degree of the polynomial. In case of
  // RBF kernel, it implies the sigma parameter. In case of linear kernel,
  // it is not used.
  // It also sets the kernel gain and bias. NOTE: in most use cases, gain should
  // be set to 1.0 and bias to 0.0.
  void SetKernelType(KernelType k, double param, double gain, double bias) {
    kernel_type_ = k;
    kernel_param_ = param;
    kernel_gain_ = gain;
    kernel_bias_ = bias;
  }
  void SetUpdateType(UpdateType u) {
    update_type_ = u;
  }
  void SetRankLossType(RankLossType r) {
    rank_loss_type_ = r;
  }
  void SetAcceptanceProbability(double p) {
    acceptence_probability_ = p;
  }
  void SetGradientL0Norm(const int32 gradient_l0_norm) {
    gradient_l0_norm_ = gradient_l0_norm;
  }
  // Load an existing model
  void LoadWeights(const SparseWeightVector<Key, Hash> &model) {
    weight_.LoadWeightVector(model);
  }
  // Save current model
  void SaveWeights(SparseWeightVector<Key, Hash> *model) {
    model->LoadWeightVector(weight_);
  }
  // Scoring
  double ScoreSample(const SparseWeightVector<Key, Hash> &sample) {
    const double dot = weight_.DotProduct(sample);
    double w_square;
    double s_square;
    switch (kernel_type_) {
      case LINEAR:
        return dot;
      case POLY:
        return pow(kernel_gain_ * dot + kernel_bias_, kernel_param_);
      case RBF:
        w_square = weight_.L2Norm();
        s_square = sample.L2Norm();
        return exp(-1 * kernel_param_ * (w_square + s_square - 2 * dot));
      default:
      ALOGE("unsupported kernel: %d", kernel_type_);
    }
    return -1;
  }
  // Learning Functions
  // Return values:
  // 1 :full update went through
  // 2 :partial update went through (for SL only)
  // 0 :no update necessary.
  // -1:error.
  int UpdateClassifier(const SparseWeightVector<Key, Hash> &positive,
                       const SparseWeightVector<Key, Hash> &negative);

 private:
  SparseWeightVector<Key, Hash> weight_;
  double norm_constraint_;
  double lambda_;
  RegularizationType regularization_type_;
  AdaptationMode adaptation_mode_;
  UpdateType update_type_;
  RankLossType rank_loss_type_;
  KernelType kernel_type_;
  // Kernel gain and bias are typically multiplicative and additive factors to
  // the dot product while calculating the kernel function. Kernel param is
  // kernel-specific. In case of polynomial kernel, it is the degree of the
  // polynomial.
  double kernel_param_;
  double kernel_gain_;
  double kernel_bias_;
  double acceptence_probability_;
  SparseWeightVector<Key, Hash> current_negative_;
  LearningRateController learning_rate_controller_;
  uint64 iteration_num_;
  // We average out gradient updates for mini_batch_size_ samples
  // before performing an iteration of the algorithm.
  uint64 mini_batch_counter_;
  uint64 mini_batch_size_;
  // Specifies the number of non-zero entries allowed in a gradient.
  // Default is -1 which means we take the gradient as given by data without
  // adding any new constraints. positive number is treated as an L0 constraint
  int32 gradient_l0_norm_;
  // Sub-Gradient Updates
  // Pure Sub-Gradient update without any reprojection
  // Note that a form of L2 regularization is built into this
  void UpdateSubGradient(const SparseWeightVector<Key, Hash> &positive,
                         const SparseWeightVector<Key, Hash> &negative,
                         const double learning_rate,
                         const double positive_score,
                         const double negative_score,
                         const int32 gradient_l0_norm);

};
}  // namespace learning_stochastic_linear
#endif  // LEARNING_STOCHASTIC_LINEAR_STOCHASTIC_LINEAR_RANKER_H_