1 #include "caffe2/core/common_gpu.h" 2 #include "caffe2/core/asan.h" 8 #include "caffe2/core/init.h" 9 #include "caffe2/core/logging.h" 14 if (getenv(
"CAFFE2_DEBUG_CUDA_INIT_ORDER")) {
15 static bool first =
true;
18 std::cerr <<
"DEBUG: caffe2::NumCudaDevices() invoked for the first time" 22 static int count = -1;
24 auto err = cudaGetDeviceCount(&count);
29 case cudaErrorNoDevice:
32 case cudaErrorInsufficientDriver:
33 LOG(WARNING) <<
"Insufficient cuda driver. Cannot use cuda.";
36 case cudaErrorInitializationError:
37 LOG(WARNING) <<
"Cuda driver initialization failed, you might not " 41 case cudaErrorUnknown:
42 LOG(ERROR) <<
"Found an unknown error - this may be due to an " 43 "incorrectly set up environment, e.g. changing env " 44 "variable CUDA_VISIBLE_DEVICES after program start. " 45 "I will set the available devices to be zero.";
48 case cudaErrorMemoryAllocation:
49 #if CAFFE2_ASAN_ENABLED 52 LOG(ERROR) <<
"It is known that CUDA does not work well with ASAN. As " 53 "a result we will simply shut down CUDA support. If you " 54 "would like to use GPUs, turn off ASAN.";
57 #else // CAFFE2_ASAN_ENABLED 60 LOG(FATAL) <<
"Unexpected error from cudaGetDeviceCount(). Did you run " 61 "some cuda functions before calling NumCudaDevices() " 62 "that might have already set an error? Error: " 65 #endif // CAFFE2_ASAN_ENABLED 67 LOG(FATAL) <<
"Unexpected error from cudaGetDeviceCount(). Did you run " 68 "some cuda functions before calling NumCudaDevices() " 69 "that might have already set an error? Error: " 77 int gDefaultGPUID = 0;
84 "The default gpu id should be smaller than the number of gpus " 89 gDefaultGPUID = deviceid;
95 CUDA_ENFORCE(cudaGetDevice(&gpu_id));
100 cudaPointerAttributes attr;
101 CUDA_ENFORCE(cudaPointerGetAttributes(&attr, ptr));
106 static vector<cudaDeviceProp> props;
110 "The gpu id should be smaller than the number of gpus ",
115 if (props.size() == 0) {
118 CUDA_ENFORCE(cudaGetDeviceProperties(&props[i], i));
121 return props[deviceid];
126 std::stringstream ss;
128 ss <<
"Device id: " << device << std::endl;
129 ss <<
"Major revision number: " << prop.major << std::endl;
130 ss <<
"Minor revision number: " << prop.minor << std::endl;
131 ss <<
"Name: " << prop.name << std::endl;
132 ss <<
"Total global memory: " << prop.totalGlobalMem << std::endl;
133 ss <<
"Total shared memory per block: " << prop.sharedMemPerBlock
135 ss <<
"Total registers per block: " << prop.regsPerBlock << std::endl;
136 ss <<
"Warp size: " << prop.warpSize << std::endl;
137 ss <<
"Maximum memory pitch: " << prop.memPitch << std::endl;
138 ss <<
"Maximum threads per block: " << prop.maxThreadsPerBlock
140 ss <<
"Maximum dimension of block: " 141 << prop.maxThreadsDim[0] <<
", " << prop.maxThreadsDim[1] <<
", " 142 << prop.maxThreadsDim[2] << std::endl;
143 ss <<
"Maximum dimension of grid: " 144 << prop.maxGridSize[0] <<
", " << prop.maxGridSize[1] <<
", " 145 << prop.maxGridSize[2] << std::endl;
146 ss <<
"Clock rate: " << prop.clockRate << std::endl;
147 ss <<
"Total constant memory: " << prop.totalConstMem << std::endl;
148 ss <<
"Texture alignment: " << prop.textureAlignment << std::endl;
149 ss <<
"Concurrent copy and execution: " 150 << (prop.deviceOverlap ?
"Yes" :
"No") << std::endl;
151 ss <<
"Number of multiprocessors: " << prop.multiProcessorCount
153 ss <<
"Kernel execution timeout: " 154 << (prop.kernelExecTimeoutEnabled ?
"Yes" :
"No") << std::endl;
155 LOG(INFO) << ss.str();
161 if (cudaGetDeviceCount(&gpu_count) != cudaSuccess)
return false;
163 pattern->resize(gpu_count, vector<bool>(gpu_count,
false));
164 for (
int i = 0; i < gpu_count; ++i) {
165 for (
int j = 0; j < gpu_count; ++j) {
166 int can_access =
true;
168 if (cudaDeviceCanAccessPeer(&can_access, i, j)
173 (*pattern)[i][j] =
static_cast<bool>(can_access);
181 case CUBLAS_STATUS_SUCCESS:
182 return "CUBLAS_STATUS_SUCCESS";
183 case CUBLAS_STATUS_NOT_INITIALIZED:
184 return "CUBLAS_STATUS_NOT_INITIALIZED";
185 case CUBLAS_STATUS_ALLOC_FAILED:
186 return "CUBLAS_STATUS_ALLOC_FAILED";
187 case CUBLAS_STATUS_INVALID_VALUE:
188 return "CUBLAS_STATUS_INVALID_VALUE";
189 case CUBLAS_STATUS_ARCH_MISMATCH:
190 return "CUBLAS_STATUS_ARCH_MISMATCH";
191 case CUBLAS_STATUS_MAPPING_ERROR:
192 return "CUBLAS_STATUS_MAPPING_ERROR";
193 case CUBLAS_STATUS_EXECUTION_FAILED:
194 return "CUBLAS_STATUS_EXECUTION_FAILED";
195 case CUBLAS_STATUS_INTERNAL_ERROR:
196 return "CUBLAS_STATUS_INTERNAL_ERROR";
197 #if CUDA_VERSION >= 6000 198 case CUBLAS_STATUS_NOT_SUPPORTED:
199 return "CUBLAS_STATUS_NOT_SUPPORTED";
200 #if CUDA_VERSION >= 6050 201 case CUBLAS_STATUS_LICENSE_ERROR:
202 return "CUBLAS_STATUS_LICENSE_ERROR";
203 #endif // CUDA_VERSION >= 6050 204 #endif // CUDA_VERSION >= 6000 207 return "Unrecognized cublas error string";
212 case CURAND_STATUS_SUCCESS:
213 return "CURAND_STATUS_SUCCESS";
214 case CURAND_STATUS_VERSION_MISMATCH:
215 return "CURAND_STATUS_VERSION_MISMATCH";
216 case CURAND_STATUS_NOT_INITIALIZED:
217 return "CURAND_STATUS_NOT_INITIALIZED";
218 case CURAND_STATUS_ALLOCATION_FAILED:
219 return "CURAND_STATUS_ALLOCATION_FAILED";
220 case CURAND_STATUS_TYPE_ERROR:
221 return "CURAND_STATUS_TYPE_ERROR";
222 case CURAND_STATUS_OUT_OF_RANGE:
223 return "CURAND_STATUS_OUT_OF_RANGE";
224 case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
225 return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
226 case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
227 return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
228 case CURAND_STATUS_LAUNCH_FAILURE:
229 return "CURAND_STATUS_LAUNCH_FAILURE";
230 case CURAND_STATUS_PREEXISTING_FAILURE:
231 return "CURAND_STATUS_PREEXISTING_FAILURE";
232 case CURAND_STATUS_INITIALIZATION_FAILED:
233 return "CURAND_STATUS_INITIALIZATION_FAILED";
234 case CURAND_STATUS_ARCH_MISMATCH:
235 return "CURAND_STATUS_ARCH_MISMATCH";
236 case CURAND_STATUS_INTERNAL_ERROR:
237 return "CURAND_STATUS_INTERNAL_ERROR";
240 return "Unrecognized curand error string";
void DeviceQuery(const int device)
Runs a device query function and prints out the results to LOG(INFO).
void SetDefaultGPUID(const int deviceid)
Sets the default GPU id for Caffe2.
int GetCurrentGPUID()
Gets the current GPU id.
bool GetCudaPeerAccessPattern(vector< vector< bool > > *pattern)
Return a peer access pattern by returning a matrix (in the format of a nested vector) of boolean valu...
int NumCudaDevices()
Returns the number of devices.
Simple registry implementation in Caffe2 that uses static variables to register object creators durin...
int GetGPUIDForPointer(const void *ptr)
Gets the GPU id that the current pointer is located at.
int GetDefaultGPUID()
Gets the default GPU id for Caffe2.
const cudaDeviceProp & GetDeviceProperty(const int deviceid)
Gets the device property for the given device.
const char * curandGetErrorString(curandStatus_t error)
Return a human readable curand error string.
const char * cublasGetErrorString(cublasStatus_t error)
Return a human readable cublas error string.