Caffe2 - C++ API
A deep learning, cross platform ML framework
net_gpu.h
1 #ifndef CAFFE2_CORE_NET_GPU_H_
2 #define CAFFE2_CORE_NET_GPU_H_
3 
4 #include "caffe2/core/context_gpu.h"
5 #include "caffe2/core/net.h"
6 
7 namespace caffe2 {
8 
9 namespace internal {
10 
11 struct Stream;
12 
13 struct Event {
14  public:
15  explicit Event(const DeviceOption& device_option);
16  ~Event() {
17  if (event_) {
18  CUDA_ENFORCE(cudaEventDestroy(event_));
19  }
20  }
21 
22  void record(const Stream& stream);
23 
24  int gpu_id_{-1};
25  cudaEvent_t event_{nullptr};
26  bool outstanding_{false};
27  bool neverRecorded_{true};
28  DISABLE_COPY_AND_ASSIGN(Event);
29 };
30 
31 } // namespace internal
32 
33 // Run an event-driven graph - before each operator chain, wait on
34 // each parent operator for the chain source (Stream::wait), then
35 // execute each operator (implicitly on the same stream).
36 class AsyncDAGNet : public DAGNetBase {
37  public:
38  AsyncDAGNet(const NetDef& net_def, Workspace* ws);
39  bool RunAt(const std::vector<int>& chain) override;
40  bool Run() override;
41 
42  protected:
43  // Tracks whether a given op has had an event recorded in each
44  // RunAt() iteration.
45  std::vector<int32_t> eventRecorded_;
46  std::vector<std::unique_ptr<internal::Event>> events_;
47  DISABLE_COPY_AND_ASSIGN(AsyncDAGNet);
48 };
49 
50 namespace gpu_single_thread {
51 
52 struct Task {
53  std::vector<std::unique_ptr<OperatorBase>>* ops_;
54  std::condition_variable* cv_;
55  std::mutex* mtx_;
56  int stream_id_;
57  bool done_ = false;
58 };
59 
60 class GPUExecutor {
61  public:
62  explicit GPUExecutor(int gpu_id) : gpu_id_(gpu_id) {}
63 
64  ~GPUExecutor() {
65  queue_.NoMoreJobs();
66  thread_.join();
67  }
68 
69  void RunJob(Task* task) {
70  queue_.Push(task);
71  }
72 
73  void start() {
74  thread_ = std::thread(&GPUExecutor::WorkerFunction, this);
75  }
76 
77  static std::shared_ptr<GPUExecutor> Get(int gpu);
78  static void Release(int gpu);
79 
80  private:
81  void set_affinity();
82  void WorkerFunction();
83 
84  std::thread thread_;
85  int gpu_id_;
86  SimpleQueue<Task*> queue_;
87  static std::shared_ptr<GPUExecutor> executors_[CAFFE2_COMPILE_TIME_MAX_GPUS];
88  static std::mutex gpu_mtx_[CAFFE2_COMPILE_TIME_MAX_GPUS];
89 };
90 }
91 
92 } // namespace caffe2
93 
94 #endif // CAFFE2_CORE_NET_GPU_H_
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:53
Simple registry implementation in Caffe2 that uses static variables to register object creators durin...