1 #ifndef MSHADOW_TENSOR_GPU_INL_HPP
2 #define MSHADOW_TENSOR_GPU_INL_HPP
10 #if !(MSHADOW_USE_CUDA)
20 #if (MSHADOW_USE_NVML)
21 inline int AutoSelectDevice(
int device_count) {
30 cudaGetDeviceCount(&device_count);
31 utils::Assert(device_count > 0,
"Cannot find CUDA device. Please check CUDA-Configuration");
33 #if (MSHADOW_USE_NVML)
34 device_id = AutoSelectDevice(device_count);
39 utils::Assert( device_id < device_count,
"Incorrect Device ID" );
40 utils::Assert( cudaSetDevice(device_id) == cudaSuccess,
"cannot set device" );
41 cudaGetDeviceProperties(&prop, device_id);
42 printf(
"Use CUDA Device %d: %s\n", device_id, prop.name);
54 cudaError_t err = cudaMallocPitch( (
void**)&obj.
dptr, &pitch, \
60 cudaError_t err = cudaMallocPitch( (
void**)&obj.
dptr, &pitch, \
68 cudaFree( obj.
dptr ); obj.
dptr = NULL;
71 template<
typename A,
typename B,
int dim>
72 inline void Copy(Tensor<A,dim> _dst, Tensor<B,dim> _src, cudaMemcpyKind kind){
73 utils::Assert( _dst.shape == _src.shape,
"Copy:shape mismatch" );
74 Tensor<A,2> dst = _dst.FlatTo2D();
75 Tensor<B,2> src = _src.FlatTo2D();
76 cudaError_t err = cudaMemcpy2D( dst.dptr, dst.shape.stride_ *
sizeof(
real_t),
77 src.dptr, src.shape.stride_ *
sizeof(
real_t),
78 dst.shape[0] *
sizeof(
real_t),
84 Copy( dst, src, cudaMemcpyDeviceToHost );
88 Copy( dst, src, cudaMemcpyDeviceToDevice );
92 Copy( dst, src, cudaMemcpyHostToDevice );
98 #include "cuda/tensor_gpu-inl.cuh"
101 template<
typename Saver,
typename E,
int dim>
102 inline void MapPlan(Tensor<gpu,dim> _dst,
const expr::Plan<E> &plan){
103 cuda::MapPlan<Saver>( _dst.FlatTo2D(), plan );
106 template<
typename Saver,
int dim,
typename E,
int etype>
107 inline void MapExp(Tensor<gpu,dim> dst,
const expr::Exp<E,etype> &exp ){
108 using namespace expr;
109 TypeCheckPass< TypeCheck<gpu,dim,E>::kMapPass >::Error_All_Tensor_in_Exp_Must_Have_Same_Type();
110 Shape<dim> eshape = ShapeCheck<dim,E>::Check( exp.self() );
111 utils::Assert( eshape[0] == 0 || eshape == dst.shape,
"Assignment: Shape of Tensors in expression is not consistent with target" );
112 MapPlan<Saver>( dst, MakePlan( exp.self() ) );
115 template<
typename Saver,
typename Reducer,
typename E,
int etype>
117 using namespace expr;
118 TypeCheckPass< TypeCheck<gpu,1,E>::kRedPass >::Error_TypeCheck_Not_Pass_For_Reduce_Exp();
119 Shape<2> eshape = ShapeCheck< ExpInfo<E>::kDim, E >::Check( exp.self() ).FlatTo2D();
121 utils::Assert( eshape[0] == dst.shape[0],
"reduction dimension do not match" );
122 utils::Assert( eshape[1] != 0,
"can not reduce over empty tensor" );
123 cuda::MapReduceKeepLowest<Saver,Reducer>( dst, MakePlan( exp.self() ), scale, eshape );
126 template<
typename Saver,
typename Reducer,
int dimkeep,
typename E,
int etype>
128 using namespace expr;
129 TypeCheckPass< TypeCheck<gpu,dimkeep,E>::kRedPass >::Error_TypeCheck_Not_Pass_For_Reduce_Exp();
130 typedef Shape< ExpInfo<E>::kDim > EShape;
131 EShape eshape = ShapeCheck< ExpInfo<E>::kDim, E >::Check( exp.self() );
132 utils::Assert( eshape[dimkeep] == dst.shape[0],
"reduction dimension do not match" );
134 Shape<4> pshape =
Shape4( eshape.ProdShape(dimkeep+1,EShape::kMaxShape), eshape[dimkeep],
135 eshape.ProdShape(1,dimkeep), eshape[0] );
137 cuda::MapReduceKeepDim2<Saver,Reducer>( dst, MakePlan( exp.self() ), scale, pshape );
140 inline void Softmax( Tensor<gpu,2> dst,
const Tensor<gpu,2>& src ){
147 #endif // MSHADOW_USE_CUDA
148 #endif // TENSOR_GPU_INL_HPP
void MapExp(Tensor< cpu, dim > dst, const expr::Exp< E, etype > &exp)
CPU/GPU: map a expression to a tensor, this function calls MapPlan.
Definition: tensor_cpu-inl.hpp:87
unsigned index_t
type that will be used for index
Definition: tensor_base.h:123
void MapReduceKeepLowest(Tensor< cpu, 1 > dst, const expr::Exp< E, etype > &exp, real_t scale=1.0f)
CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0) ...
Definition: tensor_cpu-inl.hpp:100
#define MSHADOW_MIN_PAD_RATIO
x dimension of data must be bigger pad_size * ratio to be alloced padded memory, otherwise use tide a...
Definition: tensor_base.h:32
MSHADOW_XINLINE Shape< 4 > Shape4(index_t s3, index_t s2, index_t s1, index_t s0)
construct a four dimension shape, stride will equal s0
Definition: tensor.h:176
void FreeSpace(Tensor< cpu, dim > &obj)
CPU/GPU: free the space of tensor, will set obj.dptr to NULL.
Definition: tensor_cpu-inl.hpp:36
void Assert(bool exp)
assert a expression is true
Definition: tensor_base.h:285
void InitTensorEngine(int device_id=0)
initialize tensor engine, used to call intialization functions of dependent libs this function should...
Definition: tensor_gpu-inl.hpp:26
void MapReduceKeepHighDim(Tensor< cpu, 1 > dst, const expr::Exp< E, etype > &exp, real_t scale=1.0f)
CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2) ...
Definition: tensor_cpu-inl.hpp:119
float real_t
type that will be used for content
Definition: tensor_base.h:118
void Softmax(Tensor< cpu, 2 > dst, const Tensor< cpu, 2 > &energy)
CPU/GPU: normalize softmax: dst[i][j] = exp( energy[i][j] ) /( sum_j exp( energy[i][j] ) ) ...
Definition: tensor_cpu-inl.hpp:160
header file of tensor data structure and functions covention: this lib requires explicit memory alloc...
real_t * dptr
pointer to the data
Definition: tensor.h:215
MSHADOW_XINLINE Tensor< Device, 2 > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:229
void ShutdownTensorEngine(void)
Shutdown tensor engine, this function should be called after all GPU tensor operations, for using tensors in CPU, this call is actually not needed.
Definition: tensor_gpu-inl.hpp:45
void Copy(Tensor< cpu, dim > dst, const Tensor< cpu, dim > &src)
copy data from one tensor to another, with same shape
Definition: tensor_cpu-inl.hpp:42
Shape< dimension > shape
shape of the tensor
Definition: tensor.h:217
void AllocSpace(Tensor< cpu, dim > &obj, bool pad=MSHADOW_ALLOC_PAD)
CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible t...
Definition: tensor_cpu-inl.hpp:14
general tensor
Definition: tensor.h:206
PaddingExp< SrcExp, ExpInfo< SrcExp >::kDim > pad(const Exp< SrcExp, etype > &src, index_t pad)
padding expression, pad a image with zeros on boundaries, padding affects shape[0], and shape[1]
Definition: tensor_expr_ext.h:496