22 #ifndef SINGA_UTILS_CLUSTER_H_
23 #define SINGA_UTILS_CLUSTER_H_
25 #include <glog/logging.h>
27 #include <unordered_map>
30 #include "proto/job.pb.h"
31 #include "proto/singa.pb.h"
32 #include "utils/cluster_rt.h"
33 #include "utils/common.h"
34 #include "utils/singleton.h"
46 static Cluster* Setup(
int job_id,
const SingaProto& singaConf,
47 const ClusterProto& clusterConf);
50 inline int nserver_groups()
const {
return cluster_.nserver_groups(); }
51 inline int nworker_groups()
const {
return cluster_.nworker_groups(); }
52 inline int nworkers_per_group()
const {
return cluster_.nworkers_per_group();}
53 inline int nservers_per_group()
const {
return cluster_.nservers_per_group();}
54 inline int nworkers_per_procs()
const {
return cluster_.nworkers_per_procs();}
55 inline int nservers_per_procs()
const {
return cluster_.nservers_per_procs();}
56 inline int nworker_groups_per_server_group()
const {
57 if (nserver_groups() == 0 || nservers_per_group() == 0)
60 return cluster_.nworker_groups() / cluster_.nserver_groups();
66 if (server_worker_separate()) {
67 CHECK_LT(procs_id_, nprocs_);
68 return procs_id_ >= nworker_procs();
70 return procs_id_ < nserver_procs();
77 return procs_id_ < nworker_procs();
82 inline int procs_id()
const {
return procs_id_; }
84 inline bool server_worker_separate()
const {
85 return cluster_.server_worker_separate();
87 inline int nworker_procs()
const {
88 return nworker_groups() * nworkers_per_group() / nworkers_per_procs();
90 inline int nserver_procs()
const {
91 return nserver_groups() * nservers_per_group() / nservers_per_procs();
93 inline int nprocs()
const {
return nprocs_; }
98 CHECK_LT(procs_id, nprocs());
99 CHECK_GE(procs_id, 0);
102 inline std::string workspace()
const {
return cluster_.workspace(); }
103 inline std::string vis_folder()
const {
104 return cluster_.workspace() +
"/visualization";
106 inline std::string checkpoint_folder()
const {
107 return cluster_.workspace() +
"/checkpoint";
114 inline bool share_memory()
const {
return cluster_.share_memory(); }
115 inline int sync_freq()
const {
return cluster_.sync_freq(); }
116 inline int poll_time()
const {
return cluster_.poll_time(); }
117 ClusterRuntime* runtime()
const {
return cluster_rt_; }
123 return procs_ids_.at(Hash(group_id,
id, flag));
125 inline std::string hostip()
const {
return hostip_; }
135 const std::vector<int>
ExecutorRng(
int pid,
int group_size,
int procs_size);
146 void Init(
int job,
const SingaProto& singaConf,
147 const ClusterProto& clusterConf);
148 void SetupFolders(
const ClusterProto &cluster);
149 int Hash(
int gid,
int id,
int flag);
153 std::string hostip_ =
"";
155 ClusterProto cluster_;
157 ClusterRuntime* cluster_rt_ =
nullptr;
158 std::unordered_map<int, int> procs_ids_;
163 #endif // SINGA_UTILS_CLUSTER_H_
bool has_worker() const
Definition: cluster.h:76
std::string endpoint(int procs_id) const
Definition: cluster.h:97
void Register(int pid, const std::string &endpoint)
Register this process.
std::string GetProcHost(int proc_id)
translate the process id to host address
int procs_id() const
Definition: cluster.h:82
int ProcsIDOf(int group_id, int id, int flag)
Definition: cluster.h:122
Cluster is a singleton object, which provides cluster configuations, e.g., the topology of the cluste...
Definition: cluster.h:43
const std::vector< int > ExecutorRng(int pid, int group_size, int procs_size)
bool has_server() const
Definition: cluster.h:65