1 #ifndef CAFFE2_CORE_COMMON_GPU_H_ 2 #define CAFFE2_CORE_COMMON_GPU_H_ 7 #include <cuda_runtime.h> 9 #include <driver_types.h> 11 #include "caffe2/core/logging.h" 12 #include "caffe2/core/common.h" 20 #ifndef CAFFE_HAS_CUDA_FP16 21 #if CUDA_VERSION >= 7050 22 #define CAFFE_HAS_CUDA_FP16 23 #endif // CUDA_VERSION >= 7050 24 #endif // CAFFE_HAS_CUDA_FP16 26 #ifdef CAFFE_HAS_CUDA_FP16 27 #include <cuda_fp16.h> 33 #define CAFFE2_COMPILE_TIME_MAX_GPUS 16 41 #define CAFFE2_CUDA_MAX_PEER_SIZE 8 120 #define CUDA_ENFORCE(condition) \ 122 cudaError_t error = condition; \ 131 cudaGetErrorString(error)); \ 133 #define CUDA_CHECK(condition) \ 135 cudaError_t error = condition; \ 136 CHECK(error == cudaSuccess) << cudaGetErrorString(error); \ 139 #define CUDA_DRIVERAPI_ENFORCE(condition) \ 141 CUresult result = condition; \ 142 if (result != CUDA_SUCCESS) { \ 144 cuGetErrorName(result, &msg); \ 145 CAFFE_THROW("Error at: ", __FILE__, ":", __LINE__, ": ", msg); \ 148 #define CUDA_DRIVERAPI_CHECK(condition) \ 150 CUresult result = condition; \ 151 if (result != CUDA_SUCCESS) { \ 153 cuGetErrorName(result, &msg); \ 154 LOG(FATAL) << "Error at: " << __FILE__ << ":" << __LINE__ << ": " \ 159 #define CUBLAS_ENFORCE(condition) \ 161 cublasStatus_t status = condition; \ 164 CUBLAS_STATUS_SUCCESS, \ 170 ::caffe2::cublasGetErrorString(status)); \ 172 #define CUBLAS_CHECK(condition) \ 174 cublasStatus_t status = condition; \ 175 CHECK(status == CUBLAS_STATUS_SUCCESS) \ 176 << ::caffe2::cublasGetErrorString(status); \ 179 #define CURAND_ENFORCE(condition) \ 181 curandStatus_t status = condition; \ 184 CURAND_STATUS_SUCCESS, \ 190 ::caffe2::curandGetErrorString(status)); \ 192 #define CURAND_CHECK(condition) \ 194 curandStatus_t status = condition; \ 195 CHECK(status == CURAND_STATUS_SUCCESS) \ 196 << ::caffe2::curandGetErrorString(status); \ 199 #define CUDA_1D_KERNEL_LOOP(i, n) \ 200 for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 202 i += blockDim.x * gridDim.x) 208 #define CUDA_KERNEL_ASSERT(...) 210 #define CUDA_KERNEL_ASSERT(...) assert(__VA_ARGS__) 228 constexpr
int CAFFE_CUDA_NUM_THREADS = 512;
234 constexpr
int CAFFE_MAXIMUM_NUM_BLOCKS = 4096;
240 return std::min((N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS,
241 CAFFE_MAXIMUM_NUM_BLOCKS);
248 if (previous_ != newDevice) {
249 CUDA_ENFORCE(cudaSetDevice(newDevice));
254 CUDA_CHECK(cudaSetDevice(previous_));
262 #endif // CAFFE2_CORE_COMMON_GPU_H_ void DeviceQuery(const int device)
Runs a device query function and prints out the results to LOG(INFO).
void SetDefaultGPUID(const int deviceid)
Sets the default GPU id for Caffe2.
int GetCurrentGPUID()
Gets the current GPU id.
int CudaVersion()
A runtime function to report the cuda version that Caffe2 is built with.
bool HasCudaGPU()
Check if the current running session has a cuda gpu present.
bool GetCudaPeerAccessPattern(vector< vector< bool > > *pattern)
Return a peer access pattern by returning a matrix (in the format of a nested vector) of boolean valu...
int NumCudaDevices()
Returns the number of devices.
Simple registry implementation in Caffe2 that uses static variables to register object creators durin...
int GetGPUIDForPointer(const void *ptr)
Gets the GPU id that the current pointer is located at.
int GetDefaultGPUID()
Gets the default GPU id for Caffe2.
const cudaDeviceProp & GetDeviceProperty(const int deviceid)
Gets the device property for the given device.
const char * curandGetErrorString(curandStatus_t error)
Return a human readable curand error string.
int CAFFE_GET_BLOCKS(const int N)
Compute the number of blocks needed to run N threads.
const char * cublasGetErrorString(cublasStatus_t error)
Return a human readable cublas error string.