Apache Singa
A General Distributed Deep Learning Library
optimizer.h
1 
19 #ifndef SINGA_MODEL_OPTIMIZER_H_
20 #define SINGA_MODEL_OPTIMIZER_H_
21 
22 #include <string>
23 #include <unordered_map>
24 #include <vector>
25 
26 #include "singa/core/tensor.h"
27 #include "singa/proto/model.pb.h"
28 
29 using std::string;
30 using std::vector;
31 using std::unordered_map;
32 namespace singa {
33 class Constraint;
34 class Regularizer;
41 class Optimizer {
42  public:
43  Optimizer() = default;
44  virtual ~Optimizer();
47  void Setup(const string& str) {
48  OptimizerConf conf;
49  conf.ParseFromString(str);
50  this->Setup(conf);
51  }
52 
54  virtual void Setup(const OptimizerConf& conf);
58  virtual void Register(const string& name, const ParamSpec& specs);
59 
60 
61  virtual void ApplyRegularizerConstraint(int epoch, const string& name,
62  const Tensor& value, Tensor& grad, int step = -1);
63 
69  virtual void Apply(int epoch, float lr, const string& name,
70  Tensor& grad, Tensor& value, int step = -1) = 0;
71 
76  void Apply(int epoch, const string& name, Tensor& grad, Tensor& value,
77  int step = -1);
78 
81  void SetLearningRateGenerator(function<float(int)> func) {
82  learning_rate_generator_ = func;
83  }
84  float GetLearningRate(int step) {
85  if (learning_rate_generator_)
86  return learning_rate_generator_(step);
87  else
88  return 0;
89  }
90 
91  protected:
92  function<float(int)> learning_rate_generator_;
93  std::unordered_map<std::string, float> learning_rate_multplier_;
94  std::unordered_map<std::string, Constraint*> constraints_;
95  std::unordered_map<std::string, Regularizer*> regularizers_;
96  Constraint* constraint_ = nullptr;
97  Regularizer* regularizer_ = nullptr;
98 
99  OptimizerConf conf_;
100 };
101 
106 class Constraint {
107  public:
108  Constraint() = default;
109  explicit Constraint(const ConstraintConf& conf) { Setup(conf); }
110  Constraint(const string& type, float threshold)
111  : type_(type), threshold_(threshold) {}
112  void Setup(const ConstraintConf& conf);
113  void Setup(const string& conf_str) {
114  ConstraintConf conf;
115  conf.ParseFromString(conf_str);
116  Setup(conf);
117  }
122  void Apply(int epoch, const Tensor& value, Tensor& grad, int step = -1);
125  void Apply(int epoch, const vector<Tensor>& values,
126  const vector<Tensor>& grads, int step = -1);
127 
128  private:
133  string type_ = "Unknown";
134  float threshold_;
135 };
136 
137 inline std::shared_ptr<Constraint> CreateConstraint(std::string type) {
138  return std::make_shared<Constraint>();
139 }
142 class Regularizer {
143  public:
144  Regularizer() = default;
145  explicit Regularizer(const RegularizerConf& conf) { Setup(conf); }
146  Regularizer(const string& type, float coefficient)
147  : type_(type), coefficient_(coefficient) {}
148  void Setup(const RegularizerConf& conf);
149  void Setup(const string& conf_str) {
150  RegularizerConf conf;
151  conf.ParseFromString(conf_str);
152  Setup(conf);
153  }
154 
159  void Apply(int epoch, const Tensor& value, Tensor& grad, int step = -1);
162  void Apply(int epoch, const vector<Tensor>& values,
163  const vector<Tensor>& grads, int step = -1);
164 
165  private:
168  string type_ = "NotSet";
169  float coefficient_;
170 };
171 inline std::shared_ptr<Regularizer> CreateRegularizer(std::string type) {
172  return std::make_shared<Regularizer>();
173 }
174 
175 
176 
177 // =============Vallina SGD with Momentum=====================================
178 class SGD : public Optimizer {
179  public:
180  void Setup(const OptimizerConf& conf);
182  void Apply(int epoch, float lr, const string& name, Tensor& grad,
183  Tensor& value, int step = -1) override;
184 
187  void SetMomentumGenerator(std::function<float(int)> func) {
188  momentum_generator_ = func;
189  }
190 
191  private:
192  std::unordered_map<string, Tensor> history_gradient_;
193  std::function<float(int)> momentum_generator_;
194 };
195 
196 // =============Nesterov======================================================
197 class Nesterov : public Optimizer {
198  public:
199  void Setup(const OptimizerConf& conf);
201  void Apply(int epoch, float lr, const string& name, Tensor& grad,
202  Tensor& value, int step = -1) override;
203 
206  void SetMomentumGenerator(std::function<float(int)> func) {
207  momentum_generator_ = func;
208  }
209 
210  private:
211  std::unordered_map<string, Tensor> history_gradient_;
212  std::function<float(int)> momentum_generator_;
213 };
214 
215 // =============Adagrad=======================================================
216 class AdaGrad : public Optimizer {
217  public:
218  void Setup(const OptimizerConf& conf);
220  void Apply(int epoch, float lr, const string& name, Tensor& grad,
221  Tensor& value, int step = -1) override;
222 
223  private:
224  std::unordered_map<string, Tensor> history_gradient_;
225  float delta_;
226 };
227 // =============RMSProp=======================================================
228 class RMSProp : public Optimizer {
229  public:
230  void Setup(const OptimizerConf& conf);
232  void Apply(int epoch, float lr, const string& name, Tensor& grad,
233  Tensor& value, int step = -1) override;
234  virtual ~RMSProp() = default;
235 
236  private:
237  std::unordered_map<string, Tensor> history_gradient_;
238  float delta_, rho_;
239 };
240 
241 
242 inline std::shared_ptr<Optimizer> CreateOptimizer(const string& type) {
243  std::shared_ptr<Optimizer> opt;
244  if (type == "SGD")
245  opt = std::shared_ptr<Optimizer>(new SGD());
246  else if (type == "RMSProp")
247  opt = std::shared_ptr<Optimizer>(new RMSProp());
248  else if (type == "AdaGrad")
249  opt = std::shared_ptr<Optimizer>(new AdaGrad());
250  else if (type == "Nesterov")
251  opt = std::shared_ptr<Optimizer>(new Nesterov());
252  else
253  LOG(FATAL) << "Unknown optimizer type : " << type;
254  return opt;
255 }
256 // ============LocalAllReduce for single node multiple workers ==============
263 /*
264 class LocalAllReduce : public Optimizer{
265  pulbic:
266  LocalAllReduce(Optimizer* opt);
267  void Setup(const string& str) {
268  AllReduce conf;
269  conf.ParseFromString(str);
270  this->Setup(conf);
271  }
272  void Setup(const AllReduce& conf) {}
273 
282 register the specs
284  void Register(const vector<string>& names,
285  const vector<Tensor>& values,
286  const vector<ParamSpecs>& specs) override;
287 
293  void Update(int step, float lr, const string& name, const Tensor& grad,
294  Tensor* param) override;
295 
300  boo UpdateNow();
301 
302  private:
303  int nb_workers_;
304  vector<Tensor> aggregated_gradients_;
305 };
306 */
307 }
308 #endif // SINGA_MODEL_OPTIMIZER_H_
virtual void Apply(int epoch, float lr, const string &name, Tensor &grad, Tensor &value, int step=-1)=0
Apply the updating algorithm if the gradient is not empty.
A Tensor instance is a multi-dimensional array resident on a Device (default device is the host CPU)...
Definition: tensor.h:56
Apply constraints for parameters (gradient).
Definition: optimizer.h:106
Apply regularization for parameters (gradient), e.g., L1 norm and L2 norm.
Definition: optimizer.h:142
Definition: optimizer.h:197
void SetMomentumGenerator(std::function< float(int)> func)
The argument function returns the momentum value given the current running step (i.e., iterations/mini-batches).
Definition: optimizer.h:206
virtual void Register(const string &name, const ParamSpec &specs)
Register the parameter, e.g., create Constraint and Regularizers.
Definition: optimizer.h:228
void SetLearningRateGenerator(function< float(int)> func)
The argument is a function that returns the learning rate given the current step (i.e., curren running iteration).
Definition: optimizer.h:81
void SetMomentumGenerator(std::function< float(int)> func)
The argument function returns the momentum value given the current running step (i.e., iterations/mini-batches).
Definition: optimizer.h:187
void Setup(const string &str)
Setup the optimzier using configurations from serialized string (for binding languages).
Definition: optimizer.h:47
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements...
Definition: common.h:48
Definition: optimizer.h:178
The base class for gradient descent algorithms used to update the model parameters in order to optimi...
Definition: optimizer.h:41
Definition: optimizer.h:216