content/api-v0.1.0/cluster_8h_source.html

 /************************************************************

 *

 * Licensed to the Apache Software Foundation (ASF) under one

 * or more contributor license agreements.  See the NOTICE file

 * distributed with this work for additional information

 * regarding copyright ownership.  The ASF licenses this file

 * to you under the Apache License, Version 2.0 (the

 * "License"); you may not use this file except in compliance

 * with the License.  You may obtain a copy of the License at

 *

 *   http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing,

 * software distributed under the License is distributed on an

 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

 * KIND, either express or implied.  See the License for the

 * specific language governing permissions and limitations

 * under the License.

 *

 *************************************************************/


 #ifndef SINGA_UTILS_CLUSTER_H_

 #define SINGA_UTILS_CLUSTER_H_


 #include <glog/logging.h>

 #include <string>

 #include <unordered_map>

 #include <memory>

 #include <vector>

 #include "proto/job.pb.h"

 #include "proto/singa.pb.h"

 #include "utils/cluster_rt.h"

 #include "utils/common.h"

 #include "utils/singleton.h"


 namespace singa {


 class Cluster {

  public:

   // Cluster is a global singleton in a process

   static Cluster* Setup(int job_id, const SingaProto& singaConf,

                         const ClusterProto& clusterConf);

   static Cluster* Get();


   inline int nserver_groups() const { return cluster_.nserver_groups(); }

   inline int nworker_groups() const { return cluster_.nworker_groups(); }

   inline int nworkers_per_group() const { return cluster_.nworkers_per_group();}

   inline int nservers_per_group() const { return cluster_.nservers_per_group();}

   inline int nworkers_per_procs() const { return cluster_.nworkers_per_procs();}

   inline int nservers_per_procs() const { return cluster_.nservers_per_procs();}

   inline int nworker_groups_per_server_group() const {

     if (nserver_groups() == 0 || nservers_per_group() == 0)

       return 1;

     else

       return cluster_.nworker_groups() / cluster_.nserver_groups();

   }

   inline bool has_server() const {

     if (server_worker_separate()) {

       CHECK_LT(procs_id_, nprocs_);

       return procs_id_ >= nworker_procs();

     } else {

       return procs_id_ < nserver_procs();

     }

   }

   inline bool has_worker() const {

     return procs_id_ < nworker_procs();

   }

   inline int procs_id() const { return procs_id_; }

   inline void set_procs_id(int procs_id) { procs_id_ = procs_id; }

   inline bool server_worker_separate() const {

     return cluster_.server_worker_separate();

   }

   inline int nworker_procs() const {

     return nworker_groups() * nworkers_per_group() / nworkers_per_procs();

   }

   inline int nserver_procs() const {

     return nserver_groups() * nservers_per_group() / nservers_per_procs();

   }

   inline int nprocs() const { return nprocs_; }

   inline std::string endpoint(int procs_id) const {

     CHECK_LT(procs_id, nprocs());

     CHECK_GE(procs_id, 0);

     return cluster_rt_->GetProcHost(procs_id);

   }

   inline std::string workspace() const { return cluster_.workspace(); }

   inline std::string vis_folder() const {

     return cluster_.workspace() + "/visualization";

   }

   inline std::string checkpoint_folder() const {

     return cluster_.workspace() + "/checkpoint";

   }

   /*

   const int stub_timeout() const { return cluster_.stub_timeout(); }

   const int worker_timeout() const { return cluster_.worker_timeout(); }

   const int server_timeout() const { return cluster_.server_timeout(); }

   */

   inline bool share_memory() const { return cluster_.share_memory(); }

   inline int sync_freq() const { return cluster_.sync_freq(); }

   inline int poll_time() const { return cluster_.poll_time(); }

   ClusterRuntime* runtime() const { return cluster_rt_; }


   inline int ProcsIDOf(int group_id, int id, int flag) {

     return procs_ids_.at(Hash(group_id, id, flag));

   }

   inline std::string hostip() const { return hostip_; }


   const std::vector<int> ExecutorRng(int pid, int group_size, int procs_size);

   void Register(int pid, const std::string& endpoint);


  private:

   void Init(int job, const SingaProto& singaConf,

           const ClusterProto& clusterConf);

   void SetupFolders(const ClusterProto &cluster);

   int Hash(int gid, int id, int flag);


   int procs_id_ = -1;

   int nprocs_ = 0;

   std::string hostip_ = "";

   // cluster config proto

   ClusterProto cluster_;

   SingaProto singa_;

   ClusterRuntime* cluster_rt_ = nullptr;

   std::unordered_map<int, int> procs_ids_;

 };


 }  // namespace singa


 #endif  // SINGA_UTILS_CLUSTER_H_

singa::Cluster::has_worker
bool has_worker() const
Definition: cluster.h:76

singa::Cluster::endpoint
std::string endpoint(int procs_id) const
Definition: cluster.h:97

singa::Cluster::Register
void Register(int pid, const std::string &endpoint)
Register this process.

singa::ClusterRuntime::GetProcHost
std::string GetProcHost(int proc_id)
translate the process id to host address

singa::Cluster::procs_id
int procs_id() const
Definition: cluster.h:82

singa::Cluster::ProcsIDOf
int ProcsIDOf(int group_id, int id, int flag)
Definition: cluster.h:122

singa::Cluster
Cluster is a singleton object, which provides cluster configuations, e.g., the topology of the cluste...
Definition: cluster.h:43

singa::Cluster::ExecutorRng
const std::vector< int > ExecutorRng(int pid, int group_size, int procs_size)

singa::Cluster::has_server
bool has_server() const
Definition: cluster.h:65