content/api-v0.1.0/worker_8h_source.html

 /************************************************************

 *

 * Licensed to the Apache Software Foundation (ASF) under one

 * or more contributor license agreements.  See the NOTICE file

 * distributed with this work for additional information

 * regarding copyright ownership.  The ASF licenses this file

 * to you under the Apache License, Version 2.0 (the

 * "License"); you may not use this file except in compliance

 * with the License.  You may obtain a copy of the License at

 *

 *   http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing,

 * software distributed under the License is distributed on an

 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

 * KIND, either express or implied.  See the License for the

 * specific language governing permissions and limitations

 * under the License.

 *

 *************************************************************/


 #ifndef SINGA_TRAINER_WORKER_H_

 #define SINGA_TRAINER_WORKER_H_

 #include "neuralnet/neuralnet.h"

 #include "proto/job.pb.h"

 #include "communication/socket.h"


 namespace singa {

 const int kCollectSleepTime=5;

 class Worker {

  public:

   static Worker* Create(const JobProto& proto);

   virtual void Init(int thread_id, int grp_id, int id);

   virtual ~Worker();

   void Setup(const JobProto& job, shared_ptr<NeuralNet> train_net,

       shared_ptr<NeuralNet> valid_net, shared_ptr<NeuralNet> test_net);

   void Run();

   void InitLocalParams();


   void Checkpoint(int step, shared_ptr<NeuralNet> net);

   void Test(int nsteps, Phase phase, shared_ptr<NeuralNet> net);

   virtual void TrainOneBatch(int step, Metric* perf)=0;

   virtual void TestOneBatch(int step, Phase phase, shared_ptr<NeuralNet> net,

       Metric* perf)=0;

   void Report(const string& prefix, const Metric & perf);


   int Put(Param* param, int step);

   int Get(Param* param, int step);

   int Update(Param* param, int step);

   int Collect(Param* param, int step);

   int CollectAll(shared_ptr<NeuralNet> net, int step);

   void ReceiveBlobs(

     bool data, bool grad, BridgeLayer* layer, shared_ptr<NeuralNet> net);

   void SendBlobs(

     bool data, bool grad, BridgeLayer* layer, shared_ptr<NeuralNet> net);


   inline bool DisplayNow(int step) const;

   inline bool DisplayDebugInfo(int step) const;

   inline bool StopNow(int step) const;

   inline bool CheckpointNow(int step) const;

   inline bool TestNow(int step) const;

   inline bool ValidateNow(int step) const;


   int grp_id() const { return grp_id_;}


   int id() const { return id_;}


  protected:

   int thread_id_, grp_id_, id_;

   int step_;

   JobProto job_conf_;

   shared_ptr<NeuralNet> train_net_, test_net_, validation_net_;

   Dealer* layer_dealer_, *dealer_;

 };


 class BPWorker: public Worker{

  public:

   ~BPWorker(){}

   void Init(int thread_id, int grp_id, int id) override;

   void TrainOneBatch(int step, Metric* perf) override;

   void TestOneBatch(int step, Phase phase, shared_ptr<NeuralNet> net,

       Metric* perf) override;


   void Forward(int step, Phase phase, shared_ptr<NeuralNet> net, Metric* perf);

   void Backward(int step, shared_ptr<NeuralNet> net);

 };


 class CDWorker: public Worker{

  public:

   void TrainOneBatch(int step, Metric* perf) override;

   void TestOneBatch(int step, Phase phase, shared_ptr<NeuralNet> net,

       Metric* perf) override;

 };


 inline int BlobTrgt(int grp, int layer) {

   return (grp << 16) | layer;

 }


 inline int BlobGrp(int blob_trgt) {

   return blob_trgt >> 16;

 }


 inline int BlobLayer(int blob_trgt) {

   static int mask = (1 << 16) -1;

   return blob_trgt & mask;

 }

 }  // namespace singa


 #endif  // SINGA_TRAINER_WORKER_H_

singa::BPWorker::TrainOneBatch
void TrainOneBatch(int step, Metric *perf) override
Train one mini-batch.

singa::Worker::TestNow
bool TestNow(int step) const
Check is it time to do test.

singa::Worker::Collect
int Collect(Param *param, int step)
Block until the param is updated since sending the update request.

singa::Worker
The Worker class which runs the training algorithm.
Definition: worker.h:42

singa::Worker::InitLocalParams
void InitLocalParams()
Init all local params (i.e., params from layers resident in this worker).

singa::Param
Base paramter class.
Definition: param.h:93

singa::Worker::grp_id
int grp_id() const
Definition: worker.h:187

singa::Worker::CollectAll
int CollectAll(shared_ptr< NeuralNet > net, int step)
Call Collect for every param of net.

singa::Worker::DisplayDebugInfo
bool DisplayDebugInfo(int step) const
Check is it time to display training info, e.g., loss and precison.

singa::BPWorker
Definition: worker.h:202

singa::Worker::ReceiveBlobs
void ReceiveBlobs(bool data, bool grad, BridgeLayer *layer, shared_ptr< NeuralNet > net)
Receive blobs from other workers due to model partitions.

singa::CDWorker::TestOneBatch
void TestOneBatch(int step, Phase phase, shared_ptr< NeuralNet > net, Metric *perf) override
Test/validate one mini-batch.

singa::Worker::Init
virtual void Init(int thread_id, int grp_id, int id)

singa::Dealer
Definition: socket.h:91

singa::Worker::DisplayNow
bool DisplayNow(int step) const
Check is it time to display training info, e.g., loss and precison.

singa::Worker::Test
void Test(int nsteps, Phase phase, shared_ptr< NeuralNet > net)
Test the perforance of the learned model on validation or test dataset.

singa::Worker::id
int id() const
worker ID within the worker group.
Definition: worker.h:192

singa::BPWorker::Init
void Init(int thread_id, int grp_id, int id) override

singa::Worker::Update
int Update(Param *param, int step)
Update Param.

singa::Worker::Put
int Put(Param *param, int step)
Put Param to server.

singa::Worker::Setup
void Setup(const JobProto &job, shared_ptr< NeuralNet > train_net, shared_ptr< NeuralNet > valid_net, shared_ptr< NeuralNet > test_net)
Setup members.

singa::CDWorker::TrainOneBatch
void TrainOneBatch(int step, Metric *perf) override
Train one mini-batch.

singa::BridgeLayer
Definition: connection_layer.h:33

singa::Worker::TestOneBatch
virtual void TestOneBatch(int step, Phase phase, shared_ptr< NeuralNet > net, Metric *perf)=0
Test/validate one mini-batch.

singa::Worker::Run
void Run()
Main function of Worker.

singa::Worker::Checkpoint
void Checkpoint(int step, shared_ptr< NeuralNet > net)
Checkpoint all params owned by the worker from the first group onto disk.

singa::Worker::Get
int Get(Param *param, int step)
Get Param with specific version from server If the current version >= the requested version...

singa::Worker::StopNow
bool StopNow(int step) const
Check is it time to stop.

singa::Worker::Report
void Report(const string &prefix, const Metric &perf)
Report performance to the stub.

singa::Metric
Performance mtrics.
Definition: common.h:85

singa::Worker::SendBlobs
void SendBlobs(bool data, bool grad, BridgeLayer *layer, shared_ptr< NeuralNet > net)
Send blobs to other workers due to model partitions.

singa::Worker::TrainOneBatch
virtual void TrainOneBatch(int step, Metric *perf)=0
Train one mini-batch.

singa::kCollectSleepTime
const int kCollectSleepTime
< sleep 5 milliseconds if the Param is not updated to the expected version
Definition: worker.h:30

singa::Worker::CheckpointNow
bool CheckpointNow(int step) const
Check is it time to do checkpoint.

singa::CDWorker
Definition: worker.h:214

singa::BPWorker::TestOneBatch
void TestOneBatch(int step, Phase phase, shared_ptr< NeuralNet > net, Metric *perf) override
Test/validate one mini-batch.

singa::Worker::ValidateNow
bool ValidateNow(int step) const
Check is it time to do validation.