doxygen/html/optimizer_8h_source.html

 #ifndef SINGA_MODEL_OPTIMIZER_H_
 #define SINGA_MODEL_OPTIMIZER_H_

 #include <string>
 #include <unordered_map>
 #include <vector>

 #include "singa/core/tensor.h"
 #include "singa/proto/model.pb.h"

 using std::string;
 using std::vector;
 using std::unordered_map;
 namespace singa {
 class Constraint;
 class Regularizer;
 class Optimizer {
  public:
   Optimizer() = default;
   virtual ~Optimizer();
   void Setup(const string& str) {
     OptimizerConf conf;
     conf.ParseFromString(str);
     this->Setup(conf);
   }

   virtual void Setup(const OptimizerConf& conf);
   virtual void Register(const string& name, const ParamSpec& specs);


   virtual void ApplyRegularizerConstraint(int epoch, const string& name,
       const Tensor& value, Tensor& grad, int step = -1);

   virtual void Apply(int epoch, float lr, const string& name,
                      Tensor& grad, Tensor& value, int step = -1) = 0;

   void Apply(int epoch, const string& name, Tensor& grad, Tensor& value,
       int step = -1);

   void SetLearningRateGenerator(function<float(int)> func) {
     learning_rate_generator_ = func;
   }
   float GetLearningRate(int step) {
     if (learning_rate_generator_)
       return learning_rate_generator_(step);
     else
       return 0;
   }

  protected:
   function<float(int)> learning_rate_generator_;
   std::unordered_map<std::string, float> learning_rate_multplier_;
   std::unordered_map<std::string, Constraint*> constraints_;
   std::unordered_map<std::string, Regularizer*> regularizers_;
   Constraint* constraint_ = nullptr;
   Regularizer* regularizer_ = nullptr;

   OptimizerConf conf_;
 };

 class Constraint {
  public:
   Constraint() = default;
   explicit Constraint(const ConstraintConf& conf) { Setup(conf); }
   Constraint(const string& type, float threshold)
       : type_(type), threshold_(threshold) {}
   void Setup(const ConstraintConf& conf);
   void Setup(const string& conf_str) {
     ConstraintConf conf;
     conf.ParseFromString(conf_str);
     Setup(conf);
   }
   void Apply(int epoch, const Tensor& value, Tensor& grad, int step = -1);
   void Apply(int epoch, const vector<Tensor>& values,
              const vector<Tensor>& grads, int step = -1);

  private:
   string type_ = "Unknown";
   float threshold_;
 };

 inline std::shared_ptr<Constraint> CreateConstraint(std::string type) {
   return std::make_shared<Constraint>();
 }
 class Regularizer {
  public:
   Regularizer() = default;
   explicit Regularizer(const RegularizerConf& conf) { Setup(conf); }
   Regularizer(const string& type, float coefficient)
       : type_(type), coefficient_(coefficient) {}
   void Setup(const RegularizerConf& conf);
   void Setup(const string& conf_str) {
     RegularizerConf conf;
     conf.ParseFromString(conf_str);
     Setup(conf);
   }

   void Apply(int epoch, const Tensor& value, Tensor& grad, int step = -1);
   void Apply(int epoch, const vector<Tensor>& values,
              const vector<Tensor>& grads, int step = -1);

  private:
   string type_ = "NotSet";
   float coefficient_;
 };
 inline std::shared_ptr<Regularizer> CreateRegularizer(std::string type) {
   return std::make_shared<Regularizer>();
 }


 // =============Vallina SGD with Momentum=====================================
 class SGD : public Optimizer {
  public:
   void Setup(const OptimizerConf& conf);
   void Apply(int epoch, float lr, const string& name, Tensor& grad,
              Tensor& value, int step = -1) override;

   void SetMomentumGenerator(std::function<float(int)> func) {
     momentum_generator_ = func;
   }

  private:
   std::unordered_map<string, Tensor> history_gradient_;
   std::function<float(int)> momentum_generator_;
 };

 // =============Nesterov======================================================
 class Nesterov : public Optimizer {
  public:
   void Setup(const OptimizerConf& conf);
   void Apply(int epoch, float lr, const string& name, Tensor& grad,
              Tensor& value, int step = -1) override;

   void SetMomentumGenerator(std::function<float(int)> func) {
     momentum_generator_ = func;
   }

  private:
   std::unordered_map<string, Tensor> history_gradient_;
   std::function<float(int)> momentum_generator_;
 };

 // =============Adagrad=======================================================
 class AdaGrad : public Optimizer {
  public:
   void Setup(const OptimizerConf& conf);
   void Apply(int epoch, float lr, const string& name, Tensor& grad,
              Tensor& value, int step = -1) override;

  private:
   std::unordered_map<string, Tensor> history_gradient_;
   float delta_;
 };
 // =============RMSProp=======================================================
 class RMSProp : public Optimizer {
  public:
   void Setup(const OptimizerConf& conf);
   void Apply(int epoch, float lr, const string& name, Tensor& grad,
              Tensor& value, int step = -1) override;
   virtual ~RMSProp() = default;

  private:
   std::unordered_map<string, Tensor> history_gradient_;
   float delta_, rho_;
 };


 inline std::shared_ptr<Optimizer> CreateOptimizer(const string& type) {
   std::shared_ptr<Optimizer>  opt;
   if (type == "SGD")
     opt = std::shared_ptr<Optimizer>(new SGD());
   else if (type == "RMSProp")
     opt = std::shared_ptr<Optimizer>(new RMSProp());
   else if (type == "AdaGrad")
     opt = std::shared_ptr<Optimizer>(new AdaGrad());
   else if (type == "Nesterov")
     opt = std::shared_ptr<Optimizer>(new Nesterov());
   else
     LOG(FATAL) << "Unknown optimizer type : " << type;
   return opt;
 }
 // ============LocalAllReduce for single node multiple workers ==============
 /*
 class LocalAllReduce : public Optimizer{
  pulbic:
   LocalAllReduce(Optimizer* opt);
   void Setup(const string& str) {
     AllReduce conf;
     conf.ParseFromString(str);
     this->Setup(conf);
   }
   void Setup(const AllReduce& conf) {}

 register the specs
   void Register(const vector<string>& names,
                 const vector<Tensor>& values,
                 const vector<ParamSpecs>& specs) override;

   void Update(int step, float lr, const string& name, const Tensor& grad,
               Tensor* param) override;

   boo UpdateNow();

  private:
   int nb_workers_;
   vector<Tensor> aggregated_gradients_;
 };
 */
 }
 #endif  // SINGA_MODEL_OPTIMIZER_H_
singa::Optimizer::Apply
virtual void Apply(int epoch, float lr, const string &name, Tensor &grad, Tensor &value, int step=-1)=0
Apply the updating algorithm if the gradient is not empty.

singa::Tensor
A Tensor instance is a multi-dimensional array resident on a Device (default device is the host CPU)...
Definition: tensor.h:56

singa::Constraint
Apply constraints for parameters (gradient).
Definition: optimizer.h:106

singa::Regularizer
Apply regularization for parameters (gradient), e.g., L1 norm and L2 norm.
Definition: optimizer.h:142

singa::Nesterov
Definition: optimizer.h:197

singa::Nesterov::SetMomentumGenerator
void SetMomentumGenerator(std::function< float(int)> func)
The argument function returns the momentum value given the current running step (i.e., iterations/mini-batches).
Definition: optimizer.h:206

singa::Optimizer::Register
virtual void Register(const string &name, const ParamSpec &specs)
Register the parameter, e.g., create Constraint and Regularizers.

singa::RMSProp
Definition: optimizer.h:228

singa::Optimizer::SetLearningRateGenerator
void SetLearningRateGenerator(function< float(int)> func)
The argument is a function that returns the learning rate given the current step (i.e., curren running iteration).
Definition: optimizer.h:81

singa::SGD::SetMomentumGenerator
void SetMomentumGenerator(std::function< float(int)> func)
The argument function returns the momentum value given the current running step (i.e., iterations/mini-batches).
Definition: optimizer.h:187

singa::Optimizer::Setup
void Setup(const string &str)
Setup the optimzier using configurations from serialized string (for binding languages).
Definition: optimizer.h:47

singa
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements...
Definition: common.h:48

singa::SGD
Definition: optimizer.h:178

singa::Optimizer
The base class for gradient descent algorithms used to update the model parameters in order to optimi...
Definition: optimizer.h:41

singa::AdaGrad
Definition: optimizer.h:216