Apache Singa
A General Distributed Deep Learning Library
device.h
1 
19 #ifndef SINGA_CORE_DEVICE_H_
20 #define SINGA_CORE_DEVICE_H_
21 
22 #include <type_traits>
23 #include <vector>
24 #include <string>
25 #include <functional>
26 #include <memory>
27 
28 #include "singa/singa_config.h"
29 #include "singa/core/common.h"
30 #include "singa/core/memory.h"
31 #include "singa/core/scheduler.h"
32 #include "singa/proto/core.pb.h"
33 
34 #ifdef USE_CUDA
35 #include "singa/utils/cuda_utils.h"
36 #endif // USE_CUDA
37 
38 #ifdef USE_OPENCL
39 #include "singa/utils/opencl_utils.h"
40 #endif // USE_OPENCL
41 
42 #ifdef USE_MKLDNN
43 #include "singa/utils/mkldnn_utils.h"
44 #endif // USE_MKLDNN
45 
46 using std::vector;
47 using std::string;
48 using std::function;
49 using std::shared_ptr;
50 
51 namespace singa {
52 
56 class Device {
57  public:
58  // Device() = default;
59  virtual ~Device() {}
62  Device(int id, int num_executors);
63 
64  virtual void SetRandSeed(unsigned seed) = 0;
65 
67  Block* NewBlock(int size);
68 
70  void FreeBlock(Block* block);
71 
74  virtual size_t GetAllocatedMem() {
75  return 0u;
76  }
77 
79  virtual void CopyDataToFrom(Block* dst, Block* src, size_t nBytes,
80  CopyDirection direction, int dst_offset, int src_offset);
81 
82  void CopyDataFromHostPtr(Block* dst, const void* src, size_t nBytes,
83  size_t dst_offset = 0);
86  void Exec(function<void(Context*)>&& fn, const vector<Block*> read_blocks,
87  const vector<Block*> write_blocks,
88  bool use_rand_generator = false);
89 
90  // Wait for one event.
91  // void WaitFor();
92 
94  void Sync();
95 
97  LangType lang() const {
98  return lang_;
99  }
100 
101  virtual std::shared_ptr<Device> host() const { return host_;}
102 
103  Context* context(int k) {
104  return &ctx_;
105  }
106 
107  int id() const { return id_; }
108 
109  private:
110  Device() {};
111 
112  protected:
114  virtual void DoExec(function<void(Context*)>&& fn, int executor) = 0;
115 
116  virtual void CopyToFrom(void* dst, const void* src, size_t nBytes,
117  CopyDirection direction, Context* ctx) = 0;
118 
120  virtual void* Malloc(int size) = 0;
121 
123  virtual void Free(void* ptr) = 0;
124 
125  protected:
126  int id_ = 0;
127  int num_executors_ = 0;
128  unsigned seed_ = 0;
129  // Scheduler* scheduler_ = nullptr;
130  // VirtualMemory* vm_ = nullptr;
132  LangType lang_;
133  // SafeQueue<Operation> op_queue_;
134  // SafeQueue<Operation> op_log_;
136  std::shared_ptr<Device> host_;
137  // TODO(wangwei) define multiple contexts, one per executor
138  Context ctx_;
139 };
140 
142 extern std::shared_ptr<Device> defaultDevice;
143 
146 class CppCPU : public Device {
147  public:
148  ~CppCPU();
149  CppCPU();
150 
151  std::shared_ptr<Device> host() const override { return defaultDevice;}
152  void SetRandSeed(unsigned seed) override;
153 
154  protected:
155  void DoExec(function<void(Context*)>&& fn, int executor) override;
156 
157  void CopyToFrom(void* dst, const void* src, size_t nBytes,
158  CopyDirection direction, Context* ctx) override;
159 
161  void* Malloc(int size) override;
162 
164  void Free(void* ptr) override;
165 };
166 
167 
168 // Implement Device using OpenCL libs.
169 // class OpenclDevice : public Device { };
170 
171 #ifdef USE_CUDA
172 // Represent a Nvidia GPU which runs cuda code.
173 class CudaGPU : public Device {
174  public:
175  ~CudaGPU();
177  CudaGPU(int id = 0);
179  CudaGPU(int id, std::shared_ptr<DeviceMemPool> pool);
180 
181  void SetRandSeed(unsigned seed) override;
182  size_t GetAllocatedMem() override;
183 
184  protected:
185  void DoExec(function<void(Context*)>&& fn, int executor) override;
186 
187  void CopyToFrom(void* dst, const void* src, size_t nBytes,
188  CopyDirection direction, Context* ctx) override;
189 
191  void* Malloc(int size) override;
192 
194  void Free(void* ptr) override;
195 
196  private:
197  void Setup();
198 
199  private:
200  shared_ptr<DeviceMemPool> pool_;
201 };
202 
204 
205 #endif // USE_CUDA
206 
207 #ifdef USE_OPENCL
208 
209 // Implement Device using OpenCL libs.
210 class OpenclDevice : public singa::Device {
211 public:
212 
213  // TODO: Constructor arguments to consider:
214  // Path to kernel sources?
215  // Select only certain device types?
216  OpenclDevice(int id = 0, int num_executors = 1);
217  ~OpenclDevice();
218 
219 // Overridden, inherited methods
220  void SetRandSeed(unsigned seed) override;
221 
222  virtual void CopyDataToFrom(Block* dst, Block* src, size_t nBytes,
223  CopyDirection direction, int dst_offset = 0,
224  int src_offset = 0) override;
225 
226 protected:
230  viennacl::ocl::device this_device;
231 
234  viennacl::ocl::context vcl_ctx;
235 
238  void BuildPrograms();
239 
240 // Overridden, inherited methods.
241 
242  void DoExec(function<void(Context*)>&& fn, int executor) override;
243 
244  void CopyToFrom(void* dst, const void* src, size_t nBytes,
245  CopyDirection direction, Context* ctx = nullptr) override;
246 
250  void* Malloc(int size) override;
251 
254  void Free(void* ptr) override;
255 
256 private:
257 
258  static const std::string cl_src_path;
259 };
260 #endif // USE_OPENCL
261 
267 class Platform {
268 public:
269 
271  static std::shared_ptr<Device> GetDefaultDevice() {
272  return defaultDevice;
273  }
274 
275 #ifdef USE_CUDA
276  static int GetNumGPUs();
278 
281  static const std::vector<int> GetGPUIDs();
282 
283  static const std::pair<size_t, size_t> GetGPUMemSize(const int device);
284 
286  static const std::vector<std::pair<size_t, size_t>> GetGPUMemSize();
287 
289  static const std::string DeviceQuery(int id, bool verbose = false);
290 
292  static const std::vector<std::shared_ptr<Device>>
293  CreateCudaGPUs(const size_t num_devices, size_t init_size = 0);
294 
296  static const std::vector<std::shared_ptr<Device>>
297  CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
298 
313  static bool CheckDevice(const int device_id);
314 #endif // USE_CUDA
315 
316 #ifdef USE_OPENCL
317 
318  const int GetNumOpenclPlatforms();
319 
320  const int GetNumOpenclDevices();
321 
322  static const std::shared_ptr<Device> GetDefaultOpenclDevice();
323 
328 // static const std::vector<std::shared_ptr<Device>>
329 // CreateOpenclDevices(const size_t num_devices);
330 
336 // const std::vector<std::shared_ptr<Device>>
337 // CreateOpenclDevices(const vector<int> &id);
338 #endif // USE_OPENCL
339 
340 };
341 
342 
343 } // namespace singa
344 
345 #endif // SINGA_CORE_DEVICE_H_
void FreeBlock(Block *block)
Called by Tensor.
virtual size_t GetAllocatedMem()
Return the size (bytes) of memory in use TODO(wangwei) override this function for all devices...
Definition: device.h:74
virtual void * Malloc(int size)=0
Allocate device memory.
void Sync()
wait for all operations submitted to this device.
This class queries all available calculating devices on a given machine grouped according to manufact...
Definition: device.h:267
Represent a CPU device which may have multiple threads/executors.
Definition: device.h:146
LangType lang_
Programming language type, could be kCpp, kCuda, kOpencl.
Definition: device.h:132
virtual void DoExec(function< void(Context *)> &&fn, int executor)=0
Execute one operation on one executor.
virtual void CopyDataToFrom(Block *dst, Block *src, size_t nBytes, CopyDirection direction, int dst_offset, int src_offset)
Copy data within or across devices.
Definition: common.h:102
void Exec(function< void(Context *)> &&fn, const vector< Block *> read_blocks, const vector< Block *> write_blocks, bool use_rand_generator=false)
Submit the operation to the device, which may execute it right now or delay it depending on the sched...
Allocate memory and execute Tensor operations.
Definition: device.h:56
std::shared_ptr< Device > defaultDevice
a singleton CppDevice as the host for all devices.
Block * NewBlock(int size)
Called by Tensor.
static std::shared_ptr< Device > GetDefaultDevice()
Return the default host device.
Definition: device.h:271
std::shared_ptr< Device > host_
The host device.
Definition: device.h:136
Block represent a chunk of memory (on device or host).
Definition: common.h:60
LangType lang() const
Return the programming language for this device.
Definition: device.h:97
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements...
Definition: common.h:48
virtual void Free(void *ptr)=0
Free device memory.