19 #ifndef SINGA_CORE_DEVICE_H_ 20 #define SINGA_CORE_DEVICE_H_ 22 #include <type_traits> 28 #include "singa/singa_config.h" 29 #include "singa/core/common.h" 30 #include "singa/core/memory.h" 31 #include "singa/core/scheduler.h" 32 #include "singa/proto/core.pb.h" 35 #include "singa/utils/cuda_utils.h" 39 #include "singa/utils/opencl_utils.h" 43 #include "singa/utils/mkldnn_utils.h" 49 using std::shared_ptr;
62 Device(
int id,
int num_executors);
64 virtual void SetRandSeed(
unsigned seed) = 0;
80 CopyDirection direction,
int dst_offset,
int src_offset);
82 void CopyDataFromHostPtr(
Block* dst,
const void* src,
size_t nBytes,
83 size_t dst_offset = 0);
86 void Exec(
function<
void(
Context*)>&& fn,
const vector<Block*> read_blocks,
87 const vector<Block*> write_blocks,
88 bool use_rand_generator =
false);
101 virtual std::shared_ptr<Device> host()
const {
return host_;}
107 int id()
const {
return id_; }
114 virtual void DoExec(
function<
void(
Context*)>&& fn,
int executor) = 0;
116 virtual void CopyToFrom(
void* dst,
const void* src,
size_t nBytes,
117 CopyDirection direction,
Context* ctx) = 0;
120 virtual void*
Malloc(
int size) = 0;
123 virtual void Free(
void* ptr) = 0;
127 int num_executors_ = 0;
151 std::shared_ptr<Device> host()
const override {
return defaultDevice;}
152 void SetRandSeed(
unsigned seed)
override;
155 void DoExec(
function<
void(
Context*)>&& fn,
int executor)
override;
157 void CopyToFrom(
void* dst,
const void* src,
size_t nBytes,
158 CopyDirection direction,
Context* ctx)
override;
161 void*
Malloc(
int size)
override;
164 void Free(
void* ptr)
override;
173 class CudaGPU :
public Device {
179 CudaGPU(
int id, std::shared_ptr<DeviceMemPool> pool);
181 void SetRandSeed(
unsigned seed)
override;
185 void DoExec(
function<
void(
Context*)>&& fn,
int executor)
override;
187 void CopyToFrom(
void* dst,
const void* src,
size_t nBytes,
188 CopyDirection direction,
Context* ctx)
override;
191 void*
Malloc(
int size)
override;
194 void Free(
void* ptr)
override;
200 shared_ptr<DeviceMemPool> pool_;
216 OpenclDevice(
int id = 0,
int num_executors = 1);
220 void SetRandSeed(
unsigned seed)
override;
223 CopyDirection direction,
int dst_offset = 0,
224 int src_offset = 0)
override;
230 viennacl::ocl::device this_device;
234 viennacl::ocl::context vcl_ctx;
238 void BuildPrograms();
242 void DoExec(
function<
void(
Context*)>&& fn,
int executor)
override;
244 void CopyToFrom(
void* dst,
const void* src,
size_t nBytes,
245 CopyDirection direction,
Context* ctx =
nullptr)
override;
250 void*
Malloc(
int size)
override;
254 void Free(
void* ptr)
override;
258 static const std::string cl_src_path;
276 static int GetNumGPUs();
281 static const std::vector<int> GetGPUIDs();
283 static const std::pair<size_t, size_t> GetGPUMemSize(
const int device);
286 static const std::vector<std::pair<size_t, size_t>> GetGPUMemSize();
289 static const std::string DeviceQuery(
int id,
bool verbose =
false);
292 static const std::vector<std::shared_ptr<Device>>
293 CreateCudaGPUs(
const size_t num_devices,
size_t init_size = 0);
296 static const std::vector<std::shared_ptr<Device>>
297 CreateCudaGPUsOn(
const std::vector<int> &devices,
size_t init_size = 0);
313 static bool CheckDevice(
const int device_id);
318 const int GetNumOpenclPlatforms();
320 const int GetNumOpenclDevices();
322 static const std::shared_ptr<Device> GetDefaultOpenclDevice();
345 #endif // SINGA_CORE_DEVICE_H_ void FreeBlock(Block *block)
Called by Tensor.
virtual size_t GetAllocatedMem()
Return the size (bytes) of memory in use TODO(wangwei) override this function for all devices...
Definition: device.h:74
virtual void * Malloc(int size)=0
Allocate device memory.
void Sync()
wait for all operations submitted to this device.
Represent a CPU device which may have multiple threads/executors.
Definition: device.h:146
LangType lang_
Programming language type, could be kCpp, kCuda, kOpencl.
Definition: device.h:132
virtual void DoExec(function< void(Context *)> &&fn, int executor)=0
Execute one operation on one executor.
virtual void CopyDataToFrom(Block *dst, Block *src, size_t nBytes, CopyDirection direction, int dst_offset, int src_offset)
Copy data within or across devices.
void Exec(function< void(Context *)> &&fn, const vector< Block *> read_blocks, const vector< Block *> write_blocks, bool use_rand_generator=false)
Submit the operation to the device, which may execute it right now or delay it depending on the sched...
Allocate memory and execute Tensor operations.
Definition: device.h:56
std::shared_ptr< Device > defaultDevice
a singleton CppDevice as the host for all devices.
Block * NewBlock(int size)
Called by Tensor.
std::shared_ptr< Device > host_
The host device.
Definition: device.h:136
Block represent a chunk of memory (on device or host).
Definition: common.h:60
LangType lang() const
Return the programming language for this device.
Definition: device.h:97
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements...
Definition: common.h:48
virtual void Free(void *ptr)=0
Free device memory.