Caffe2 - C++ API
A deep learning, cross platform ML framework
common_gpu.cc
1 #include "caffe2/core/common_gpu.h"
2 #include "caffe2/core/asan.h"
3 
4 #include <atomic>
5 #include <cstdlib>
6 #include <sstream>
7 
8 #include "caffe2/core/init.h"
9 #include "caffe2/core/logging.h"
10 
11 namespace caffe2 {
12 
14  if (getenv("CAFFE2_DEBUG_CUDA_INIT_ORDER")) {
15  static bool first = true;
16  if (first) {
17  first = false;
18  std::cerr << "DEBUG: caffe2::NumCudaDevices() invoked for the first time"
19  << std::endl;
20  }
21  }
22  static int count = -1;
23  if (count < 0) {
24  auto err = cudaGetDeviceCount(&count);
25  switch (err) {
26  case cudaSuccess:
27  // Everything is good.
28  break;
29  case cudaErrorNoDevice:
30  count = 0;
31  break;
32  case cudaErrorInsufficientDriver:
33  LOG(WARNING) << "Insufficient cuda driver. Cannot use cuda.";
34  count = 0;
35  break;
36  case cudaErrorInitializationError:
37  LOG(WARNING) << "Cuda driver initialization failed, you might not "
38  "have a cuda gpu.";
39  count = 0;
40  break;
41  case cudaErrorUnknown:
42  LOG(ERROR) << "Found an unknown error - this may be due to an "
43  "incorrectly set up environment, e.g. changing env "
44  "variable CUDA_VISIBLE_DEVICES after program start. "
45  "I will set the available devices to be zero.";
46  count = 0;
47  break;
48  case cudaErrorMemoryAllocation:
49 #if CAFFE2_ASAN_ENABLED
50  // In ASAN mode, we know that a cudaErrorMemoryAllocation error will
51  // pop up.
52  LOG(ERROR) << "It is known that CUDA does not work well with ASAN. As "
53  "a result we will simply shut down CUDA support. If you "
54  "would like to use GPUs, turn off ASAN.";
55  count = 0;
56  break;
57 #else // CAFFE2_ASAN_ENABLED
58  // If we are not in ASAN mode and we get cudaErrorMemoryAllocation,
59  // this means that something is wrong before NumCudaDevices() call.
60  LOG(FATAL) << "Unexpected error from cudaGetDeviceCount(). Did you run "
61  "some cuda functions before calling NumCudaDevices() "
62  "that might have already set an error? Error: "
63  << err;
64  break;
65 #endif // CAFFE2_ASAN_ENABLED
66  default:
67  LOG(FATAL) << "Unexpected error from cudaGetDeviceCount(). Did you run "
68  "some cuda functions before calling NumCudaDevices() "
69  "that might have already set an error? Error: "
70  << err;
71  }
72  }
73  return count;
74 }
75 
76 namespace {
77 int gDefaultGPUID = 0;
78 } // namespace
79 
80 void SetDefaultGPUID(const int deviceid) {
81  CAFFE_ENFORCE_LT(
82  deviceid,
84  "The default gpu id should be smaller than the number of gpus "
85  "on this machine: ",
86  deviceid,
87  " vs ",
88  NumCudaDevices());
89  gDefaultGPUID = deviceid;
90 }
91 int GetDefaultGPUID() { return gDefaultGPUID; }
92 
94  int gpu_id = 0;
95  CUDA_ENFORCE(cudaGetDevice(&gpu_id));
96  return gpu_id;
97 }
98 
99 int GetGPUIDForPointer(const void* ptr) {
100  cudaPointerAttributes attr;
101  CUDA_ENFORCE(cudaPointerGetAttributes(&attr, ptr));
102  return attr.device;
103 }
104 
105 const cudaDeviceProp& GetDeviceProperty(const int deviceid) {
106  static vector<cudaDeviceProp> props;
107  CAFFE_ENFORCE_LT(
108  deviceid,
109  NumCudaDevices(),
110  "The gpu id should be smaller than the number of gpus ",
111  "on this machine: ",
112  deviceid,
113  " vs ",
114  NumCudaDevices());
115  if (props.size() == 0) {
116  props.resize(NumCudaDevices());
117  for (int i = 0; i < NumCudaDevices(); ++i) {
118  CUDA_ENFORCE(cudaGetDeviceProperties(&props[i], i));
119  }
120  }
121  return props[deviceid];
122 }
123 
124 void DeviceQuery(const int device) {
125  const cudaDeviceProp& prop = GetDeviceProperty(device);
126  std::stringstream ss;
127  ss << std::endl;
128  ss << "Device id: " << device << std::endl;
129  ss << "Major revision number: " << prop.major << std::endl;
130  ss << "Minor revision number: " << prop.minor << std::endl;
131  ss << "Name: " << prop.name << std::endl;
132  ss << "Total global memory: " << prop.totalGlobalMem << std::endl;
133  ss << "Total shared memory per block: " << prop.sharedMemPerBlock
134  << std::endl;
135  ss << "Total registers per block: " << prop.regsPerBlock << std::endl;
136  ss << "Warp size: " << prop.warpSize << std::endl;
137  ss << "Maximum memory pitch: " << prop.memPitch << std::endl;
138  ss << "Maximum threads per block: " << prop.maxThreadsPerBlock
139  << std::endl;
140  ss << "Maximum dimension of block: "
141  << prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
142  << prop.maxThreadsDim[2] << std::endl;
143  ss << "Maximum dimension of grid: "
144  << prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
145  << prop.maxGridSize[2] << std::endl;
146  ss << "Clock rate: " << prop.clockRate << std::endl;
147  ss << "Total constant memory: " << prop.totalConstMem << std::endl;
148  ss << "Texture alignment: " << prop.textureAlignment << std::endl;
149  ss << "Concurrent copy and execution: "
150  << (prop.deviceOverlap ? "Yes" : "No") << std::endl;
151  ss << "Number of multiprocessors: " << prop.multiProcessorCount
152  << std::endl;
153  ss << "Kernel execution timeout: "
154  << (prop.kernelExecTimeoutEnabled ? "Yes" : "No") << std::endl;
155  LOG(INFO) << ss.str();
156  return;
157 }
158 
159 bool GetCudaPeerAccessPattern(vector<vector<bool> >* pattern) {
160  int gpu_count;
161  if (cudaGetDeviceCount(&gpu_count) != cudaSuccess) return false;
162  pattern->clear();
163  pattern->resize(gpu_count, vector<bool>(gpu_count, false));
164  for (int i = 0; i < gpu_count; ++i) {
165  for (int j = 0; j < gpu_count; ++j) {
166  int can_access = true;
167  if (i != j) {
168  if (cudaDeviceCanAccessPeer(&can_access, i, j)
169  != cudaSuccess) {
170  return false;
171  }
172  }
173  (*pattern)[i][j] = static_cast<bool>(can_access);
174  }
175  }
176  return true;
177 }
178 
179 const char* cublasGetErrorString(cublasStatus_t error) {
180  switch (error) {
181  case CUBLAS_STATUS_SUCCESS:
182  return "CUBLAS_STATUS_SUCCESS";
183  case CUBLAS_STATUS_NOT_INITIALIZED:
184  return "CUBLAS_STATUS_NOT_INITIALIZED";
185  case CUBLAS_STATUS_ALLOC_FAILED:
186  return "CUBLAS_STATUS_ALLOC_FAILED";
187  case CUBLAS_STATUS_INVALID_VALUE:
188  return "CUBLAS_STATUS_INVALID_VALUE";
189  case CUBLAS_STATUS_ARCH_MISMATCH:
190  return "CUBLAS_STATUS_ARCH_MISMATCH";
191  case CUBLAS_STATUS_MAPPING_ERROR:
192  return "CUBLAS_STATUS_MAPPING_ERROR";
193  case CUBLAS_STATUS_EXECUTION_FAILED:
194  return "CUBLAS_STATUS_EXECUTION_FAILED";
195  case CUBLAS_STATUS_INTERNAL_ERROR:
196  return "CUBLAS_STATUS_INTERNAL_ERROR";
197 #if CUDA_VERSION >= 6000
198  case CUBLAS_STATUS_NOT_SUPPORTED:
199  return "CUBLAS_STATUS_NOT_SUPPORTED";
200 #if CUDA_VERSION >= 6050
201  case CUBLAS_STATUS_LICENSE_ERROR:
202  return "CUBLAS_STATUS_LICENSE_ERROR";
203 #endif // CUDA_VERSION >= 6050
204 #endif // CUDA_VERSION >= 6000
205  }
206  // To suppress compiler warning.
207  return "Unrecognized cublas error string";
208 }
209 
210 const char* curandGetErrorString(curandStatus_t error) {
211  switch (error) {
212  case CURAND_STATUS_SUCCESS:
213  return "CURAND_STATUS_SUCCESS";
214  case CURAND_STATUS_VERSION_MISMATCH:
215  return "CURAND_STATUS_VERSION_MISMATCH";
216  case CURAND_STATUS_NOT_INITIALIZED:
217  return "CURAND_STATUS_NOT_INITIALIZED";
218  case CURAND_STATUS_ALLOCATION_FAILED:
219  return "CURAND_STATUS_ALLOCATION_FAILED";
220  case CURAND_STATUS_TYPE_ERROR:
221  return "CURAND_STATUS_TYPE_ERROR";
222  case CURAND_STATUS_OUT_OF_RANGE:
223  return "CURAND_STATUS_OUT_OF_RANGE";
224  case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
225  return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
226  case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
227  return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
228  case CURAND_STATUS_LAUNCH_FAILURE:
229  return "CURAND_STATUS_LAUNCH_FAILURE";
230  case CURAND_STATUS_PREEXISTING_FAILURE:
231  return "CURAND_STATUS_PREEXISTING_FAILURE";
232  case CURAND_STATUS_INITIALIZATION_FAILED:
233  return "CURAND_STATUS_INITIALIZATION_FAILED";
234  case CURAND_STATUS_ARCH_MISMATCH:
235  return "CURAND_STATUS_ARCH_MISMATCH";
236  case CURAND_STATUS_INTERNAL_ERROR:
237  return "CURAND_STATUS_INTERNAL_ERROR";
238  }
239  // To suppress compiler warning.
240  return "Unrecognized curand error string";
241 }
242 } // namespace caffe2
void DeviceQuery(const int device)
Runs a device query function and prints out the results to LOG(INFO).
Definition: common_gpu.cc:124
void SetDefaultGPUID(const int deviceid)
Sets the default GPU id for Caffe2.
Definition: common_gpu.cc:80
int GetCurrentGPUID()
Gets the current GPU id.
Definition: common_gpu.cc:93
bool GetCudaPeerAccessPattern(vector< vector< bool > > *pattern)
Return a peer access pattern by returning a matrix (in the format of a nested vector) of boolean valu...
Definition: common_gpu.cc:159
int NumCudaDevices()
Returns the number of devices.
Definition: common_gpu.cc:13
Simple registry implementation in Caffe2 that uses static variables to register object creators durin...
int GetGPUIDForPointer(const void *ptr)
Gets the GPU id that the current pointer is located at.
Definition: common_gpu.cc:99
int GetDefaultGPUID()
Gets the default GPU id for Caffe2.
Definition: common_gpu.cc:91
const cudaDeviceProp & GetDeviceProperty(const int deviceid)
Gets the device property for the given device.
Definition: common_gpu.cc:105
const char * curandGetErrorString(curandStatus_t error)
Return a human readable curand error string.
Definition: common_gpu.cc:210
const char * cublasGetErrorString(cublasStatus_t error)
Return a human readable cublas error string.
Definition: common_gpu.cc:179