Caffe2 - C++ API
A deep learning, cross platform ML framework
mkl_memory.h
1 #ifndef CAFFE2_UTILS_MKL_MKL_MEMORY_H_
2 #define CAFFE2_UTILS_MKL_MKL_MEMORY_H_
3 
4 #include <string>
5 #include <vector>
6 #include <mutex>
7 
8 #include "caffe2/core/tensor.h" // for TIndex
9 #include "caffe2/core/flags.h" // for TIndex
10 #include "caffe2/utils/mkl/mkl_dnn_cppwrapper.h"
11 
12 // A global boolean variable that controls the behavior when we call View() on
13 // an MKLMemory: if it is set true, then the View() function will actually
14 // change the underlying storage. If it is set false, an implicit copy is
15 // triggered but the original storage is not affected.
16 CAFFE2_DECLARE_bool(caffe2_mkl_implicit_layout_change);
17 
18 namespace caffe2 {
19 namespace mkl {
20 
21 template <typename T>
23  public:
24  PrimitiveWrapper() {}
25  // Creates a primitive wrapper from an existing primitive. The wrapper
26  // takes over ownership.
27  explicit PrimitiveWrapper(dnnPrimitive_t primitive) : primitive_(primitive) {}
28 
29  template <typename Creator, typename FirstArg, typename... Args>
30  PrimitiveWrapper(Creator creator, FirstArg&& arg, Args&&... args) {
31  creator(&primitive_, arg, args...);
32  }
33 
34  ~PrimitiveWrapper() {
35  if (primitive_) {
36  MKLDNN_CHECK(dnnDelete<T>(primitive_));
37  }
38  }
39 
40  template <typename Creator, typename... Args>
41  void Reset(Creator creator, Args&&... args) {
42  if (primitive_) {
43  MKLDNN_SAFE_CALL(dnnDelete<T>(primitive_));
44  }
45  creator(&primitive_, args...);
46  }
47 
48  operator dnnPrimitive_t() const {
49  return primitive_;
50  }
51 
52  private:
53  dnnPrimitive_t primitive_ = 0;
54  DISABLE_COPY_AND_ASSIGN(PrimitiveWrapper);
55 };
56 
57 template <typename T>
59  public:
60  LayoutWrapper() {}
61  // Create a user layout from a TensorCPU with the given shapes.
62  explicit LayoutWrapper(const TensorCPU& tensor) {
63  Reset(tensor);
64  }
65 
66  // Create an internal layout from the primitive and type.
67  LayoutWrapper(const dnnPrimitive_t primitive, const dnnResourceType_t type) {
68  Reset(primitive, type);
69  }
70 
71  // Create a user layout from the given dimension, size and strides.
73  const size_t dimension,
74  const size_t size[],
75  const size_t strides[]) {
76  Reset(dimension, size, strides);
77  }
78 
79  // Destructs the layout wrapper.
80  ~LayoutWrapper() {
81  if (layout_)
82  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
83  }
84 
85  // Create a user layout from a TensorCPU with the given shapes.
86  void Reset(const TensorCPU& tensor) {
87  if (layout_)
88  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
89  CAFFE_ENFORCE(tensor.size(), "Cannot reset with an empty tensor.");
90  size_t dimension = tensor.ndim();
91  size_t size[dimension];
92  size_t strides[dimension];
93  for (int i = 0; i < dimension; ++i) {
94  size[i] = tensor.dim(dimension - i - 1);
95  strides[i] = (i == 0) ? 1 : strides[i - 1] * size[i - 1];
96  }
97  MKLDNN_SAFE_CALL(dnnLayoutCreate<T>(&layout_, dimension, size, strides));
98  }
99 
100  // Create an internal layout from the primitive and type.
101  void Reset(const dnnPrimitive_t primitive, const dnnResourceType_t type) {
102  CAFFE_ENFORCE(primitive, "Cannot reset with an unknwon primitive.");
103  CAFFE_ENFORCE(
104  type != dnnResourceNumber,
105  "Cannot reset with an unknown resource number.");
106  if (layout_) {
107  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
108  }
109  MKLDNN_SAFE_CALL(
110  dnnLayoutCreateFromPrimitive<T>(&layout_, primitive, type));
111  }
112 
113  // Create a user layout from the given dimension, size and strides.
114  void
115  Reset(const size_t dimension, const size_t size[], const size_t strides[]) {
116  if (layout_)
117  MKLDNN_CHECK(dnnLayoutDelete<T>(layout_));
118  MKLDNN_SAFE_CALL(dnnLayoutCreate<T>(&layout_, dimension, size, strides));
119  }
120 
121  operator dnnLayout_t() const {
122  return layout_;
123  }
124 
125  private:
126  dnnLayout_t layout_ = 0;
127  DISABLE_COPY_AND_ASSIGN(LayoutWrapper);
128 };
129 
136 template <typename T>
137 class MKLMemory {
138  public:
139  // Initializes an empty MKLMemory.
140  MKLMemory() {}
141  // Initialize an MKLMemory with the given size, strides, dnn
142  // primitive and type.
143  MKLMemory(
144  const size_t dimension,
145  const size_t size[],
146  const size_t strides[],
147  const dnnPrimitive_t primitive = nullptr,
148  const dnnResourceType_t type = dnnResourceNumber,
149  bool share_mem_if_possible = false) {
150  Reset(dimension, size, strides, primitive, type, share_mem_if_possible);
151  }
152 
153  // Initialize an MKLMemory, with the given dimension assuming a C-contiguous
154  // storage.
155  template <typename IndexType>
156  explicit MKLMemory(
157  const vector<IndexType>& dims,
158  const dnnPrimitive_t primitive = nullptr,
159  const dnnResourceType_t type = dnnResourceNumber,
160  bool share_mem_if_possible = false) {
161  Reset(dims, primitive, type, share_mem_if_possible);
162  }
163 
164  // Initialize an MKLMemory with the given size, strides, dnn
165  // primitive and type.
166  void Reset(
167  const size_t dimension,
168  const size_t size[],
169  const size_t strides[],
170  const dnnPrimitive_t primitive = nullptr,
171  const dnnResourceType_t type = dnnResourceNumber,
172  bool share_mem_if_possible = false) {
173  buffer_.reset();
174  dims_.resize(dimension);
175  for (int i = 0; i < dimension; ++i) {
176  dims_[i] = size[dimension - 1 - i];
177  }
178  user_layout_.Reset(dimension, size, strides);
179  if (primitive) {
180  layout_.Reset(primitive, type);
181  } else {
182  layout_.Reset(dimension, size, strides);
183  }
184  convert_in_.Reset(dnnConversionCreate<T>, user_layout_, layout_);
185  convert_out_.Reset(dnnConversionCreate<T>, layout_, user_layout_);
186  share_mem_if_possible_ = share_mem_if_possible;
187  layout_is_user_layout_ = dnnLayoutCompare<T>(layout_, user_layout_);
188  VLOG(2) << "layout is user layout? " << layout_is_user_layout_;
189  if (!share_mem_if_possible_) {
190  // If we are not going to share memory, we will simply allocate
191  // memory upfront.
192  buffer();
193  }
194  }
195 
196  // Initialize an MKLMemory, with the given dimension assuming a C-contiguous
197  // storage.
198  template <typename IndexType>
199  void Reset(
200  const vector<IndexType>& dims,
201  const dnnPrimitive_t primitive = nullptr,
202  const dnnResourceType_t type = dnnResourceNumber,
203  bool share_mem_if_possible = false) {
204  buffer_.reset();
205  dims_.resize(dims.size());
206  for (int i = 0; i < dims.size(); ++i) {
207  dims_[i] = dims[i];
208  }
209  size_t dimension = dims.size();
210  vector<size_t> size(dimension);
211  vector<size_t> strides(dimension);
212  for (int i = 0; i < dimension; ++i) {
213  size[i] = dims[dimension - i - 1];
214  strides[i] = (i == 0) ? 1 : strides[i - 1] * size[i - 1];
215  }
216  user_layout_.Reset(dims.size(), size.data(), strides.data());
217  if (primitive) {
218  layout_.Reset(primitive, type);
219  } else {
220  layout_.Reset(dimension, size.data(), strides.data());
221  }
222  convert_in_.Reset(dnnConversionCreate<T>, user_layout_, layout_);
223  convert_out_.Reset(dnnConversionCreate<T>, layout_, user_layout_);
224  share_mem_if_possible_ = share_mem_if_possible;
225  layout_is_user_layout_ = dnnLayoutCompare<T>(layout_, user_layout_);
226  VLOG(2) << "layout is user layout? " << layout_is_user_layout_;
227  if (!share_mem_if_possible_) {
228  // If we are not going to share memory, we will simply allocate
229  // memory upfront.
230  buffer();
231  }
232  }
233 
234  // Destructs the MKLMemory.
235  ~MKLMemory() {}
236 
237  void CopyFrom(const void* ptr) {
238  if (share_mem_if_possible_ && layout_is_user_layout_) {
239  VLOG(2) << "Sharing underlying memory and skip copy.";
240  buffer_.reset(const_cast<void*>(ptr), [](void*) -> void {});
241  } else {
242  VLOG(2) << "Copying external content.";
243  MKLDNN_SAFE_CALL(dnnConversionExecute<T>(
244  convert_in_, const_cast<void*>(ptr), buffer()));
245  }
246  }
247 
248  void CopyFrom(const TensorCPU& tensor) {
249  CAFFE_ENFORCE_EQ(
250  tensor.dims(),
251  dims_,
252  "Dims does not match the expected dims of the resource.");
253  CopyFrom(tensor.template data<T>());
254  }
255 
256  void CopyFrom(const MKLMemory<T>& other) {
257  if (share_mem_if_possible_ && dnnLayoutCompare(other.layout_, layout_)) {
258  buffer_ = other.buffer_;
259  } else {
260  PrimitiveWrapper<T> convert(
261  dnnConversionCreate<T>, other.layout_, layout_);
262  MKLDNN_SAFE_CALL(
263  dnnConversionExecute<T>(convert, other.buffer_, buffer()));
264  }
265  }
266 
267  bool ShareFromRaw(const void* ptr) {
268  if (share_mem_if_possible_ && layout_is_user_layout_) {
269  buffer_.reset(const_cast<void*>(ptr), [](void*) -> void {});
270  return true;
271  } else {
272  return false;
273  }
274  }
275 
276  bool ShareFromTensor(const TensorCPU& tensor) {
277  CAFFE_ENFORCE_EQ(
278  tensor.dims(),
279  dims_,
280  "Dims does not match the expected dims of the resource.");
281  return ShareFromRaw(tensor.template data<T>());
282  }
283 
284  bool ShareFrom(const MKLMemory<T>& other) {
285  if (share_mem_if_possible_ && dnnLayoutCompare<T>(other.layout_, layout_)) {
286  VLOG(2) << "Sharing underlying memory.";
287  buffer_ = other.buffer_;
288  if (!buffer_.get()) {
289  VLOG(2) << "Warning: the source MKLMemory has no content yet, so the "
290  "sharing actually has no effect.";
291  }
292  return true;
293  } else {
294  VLOG(2) << "Not sharing underlying memory.";
295  return false;
296  }
297  }
298 
299  void CopyTo(void* ptr) const {
300  if (buffer_.get() == ptr) {
301  // This is already mapping to the same memory region. Skip copy.
302  VLOG(2) << "CopyTo does not need actual copying, as we are sharing "
303  "memory with the output.";
304  return;
305  }
306  CAFFE_ENFORCE(
307  buffer_.get(), "Canot copy out from an uninitialized MKLMemory.");
308  VLOG(2) << "Copy to external memory.";
309  MKLDNN_SAFE_CALL(dnnConversionExecute<T>(convert_out_, buffer_.get(), ptr));
310  }
311 
312  void CopyTo(TensorCPU* tensor) const {
313  if (tensor->size() > 0 && buffer_.get() == tensor->mutable_data<T>()) {
314  // This is already mapping to the same memory region. Skip copy.
315  VLOG(2) << "CopyTo does not need actual copying, as we are sharing "
316  "memory with the output.";
317  return;
318  }
319  tensor->Resize(dims_);
320  CopyTo(tensor->mutable_data<T>());
321  }
322 
323  // Copies to another MKL memory.
324  //
325  // This function
326  void CopyTo(
327  MKLMemory<T>* other,
328  const dnnPrimitive_t primitive = nullptr,
329  const dnnResourceType_t type = dnnResourceNumber) {
330  if (buffer_.get() == other->buffer_.get()) {
331  VLOG(2) << "CopyTo does not need actual copying, as we are sharing "
332  "memory with the output.";
333  // This is already mapping to the same memory region. Skip copy.
334  return;
335  }
336  CAFFE_ENFORCE(
337  buffer_.get(), "Canot copy out from an uninitialized MKLMemory.");
338  // TODO(jiayq): if primitive creation is a big overhead and we will be
339  // consistently copying stuff with fixed src and dst layouts, consider
340  // making a cache for the primitive below.
341  VLOG(2) << "CopyTo requires copying. Performing direct copy.";
342  PrimitiveWrapper<T> convert(
343  dnnConversionCreate<T>, layout_, other->layout_);
344  if (dnnPrimitive_t(convert) == nullptr ||
345  dnnConversionExecute<T>(convert, buffer_.get(), other->buffer()) !=
346  E_SUCCESS) {
347  VLOG(2) << "Direct copy failed, will need to allocate output.";
348  // If CopyTo directly did not succeed, it could be because the target
349  // MKLMemory is not having the right layout. In this case we will reset
350  // the target and then do another copy.
351  other->Reset(dims_, primitive, type);
352  PrimitiveWrapper<T> convert2(
353  dnnConversionCreate<T>, layout_, other->layout_);
354  MKLDNN_SAFE_CALL(
355  dnnConversionExecute<T>(convert2, buffer_.get(), other->buffer()));
356  }
357  }
358 
359  inline void* buffer() {
360  if (buffer_ == nullptr) {
361  CAFFE_ENFORCE(
362  layout_ != nullptr, "Trying to allocate buffer but layout is empty.");
363  void* allocated = nullptr;
364  MKLDNN_SAFE_CALL(dnnAllocateBuffer<T>(&allocated, layout_));
365  buffer_.reset(allocated, [](void* ptr) -> void {
366  MKLDNN_CHECK(dnnReleaseBuffer<T>(ptr));
367  });
368  }
369  return buffer_.get();
370  }
371 
372  // MKLDNN does not use const void* even for the inputs, so we will
373  // have to use void* and rely on the underlying implementation to make
374  // sure that the buffer is actually not changed.
375  inline void* buffer() const {
376  CAFFE_ENFORCE(
377  buffer_ != nullptr, "Trying to refer to an unallocated buffer.");
378  return buffer_.get();
379  }
380 
381  inline const vector<TIndex>& dims() const {
382  return dims_;
383  }
384 
385  inline const int ndim() const { return dims_.size(); }
386 
387  inline int dim32(const int i) const {
388  CAFFE_ENFORCE_LT(dims_.at(i), std::numeric_limits<int>::max());
389  return static_cast<int>(dims_[i]);
390  }
391 
397  inline TIndex dim(const int i) const {
398  return dims_.at(i);
399  }
400 
401  inline const LayoutWrapper<T>& layout() const {
402  return layout_;
403  }
404 
405  // Returns a view of the content. We mark this function const, but be noted
406  // that the returned std::shared_ptr is not const protected - user discretion
407  // is recommended for correctness.
408  std::shared_ptr<void> View(
409  dnnLayout_t layout_wanted,
410  dnnPrimitive_t primitive,
411  dnnResourceType_t type) const {
412  std::lock_guard<std::mutex> lock(buffer_lock_);
413  if (dnnLayoutCompare<T>(layout_wanted, layout_)) {
414  // If they are the same, return the original content.
415  VLOG(2) << "Creating a view without the need of copying.";
416  return std::shared_ptr<void>(buffer_);
417  } else {
418  void* temp_buffer;
419  VLOG(2) << "Creating a view with copying.";
420  MKLDNN_SAFE_CALL(dnnAllocateBuffer<T>(&temp_buffer, layout_wanted));
421  PrimitiveWrapper<T> convert(
422  dnnConversionCreate<T>, layout_, layout_wanted);
423  MKLDNN_SAFE_CALL(dnnConversionExecute<T>(
424  convert, buffer_.get(), temp_buffer));
425  if (FLAGS_caffe2_mkl_implicit_layout_change) {
426  VLOG(2) << "Implicit layout change set. "
427  "Changing the underlying storage.";
428  // We will need to call Reset to set up all the member variables.
429  // This is not thread safe, so we might want to double check if this
430  // makes sense in actual use cases.
431  const_cast<MKLMemory<T>*>(this)->Reset(
432  dims_, primitive, type, share_mem_if_possible_);
433  CAFFE_ENFORCE(dnnLayoutCompare<T>(layout_wanted, layout_),
434  "You passed in a target layout that is not "
435  "generated by the given primitive and type.");
436  buffer_.reset(temp_buffer, [](void* ptr) -> void {
437  MKLDNN_CHECK(dnnReleaseBuffer<T>(ptr));
438  });
439  return std::shared_ptr<void>(buffer_);
440  } else {
441  return std::shared_ptr<void>(temp_buffer, [](void* ptr) -> void {
442  MKLDNN_CHECK(dnnReleaseBuffer<T>(ptr));
443  });
444  }
445  }
446  }
447 
448  private:
449  bool share_mem_if_possible_;
450  bool layout_is_user_layout_;
451  // The internal buffer in the specific dnn layout.
452  // It is marked mutable but any modification in a const function should
453  // be accompanied by the buffer lock, see the View() function.
454  mutable std::shared_ptr<void> buffer_;
455  // A mutex to control the access of buffer in the View() function.
456  mutable std::mutex buffer_lock_;
457  // The dimensions in the same order as Caffe2 does. This is used to
458  // interface with C2.
459  vector<TIndex> dims_;
460  // The user dnn layout.
461  LayoutWrapper<T> user_layout_;
462  // The internal dnn layout.
463  LayoutWrapper<T> layout_;
464  // The primitive to use to convert from user layout to internal layout
465  PrimitiveWrapper<T> convert_in_;
466  // The primitive to use to convert from internal layout to user layout
467  PrimitiveWrapper<T> convert_out_;
468 
469  DISABLE_COPY_AND_ASSIGN(MKLMemory);
470 };
471 
472 } // namespace mkl
473 } // namespace caffe2
474 
475 #endif // CAFFE2_UTILS_MKL_MKL_MEMORY_H_
int ndim() const
Returns the number of dimensions of the data.
Definition: tensor.h:530
TIndex dim(const int i) const
Returns the i-th dimension of the tensor.
Definition: tensor.h:606
const vector< TIndex > & dims() const
Returns the dimensions of the tensor as a vector.
Definition: tensor.h:552
TIndex size() const
Returns the size (i.e.
Definition: tensor.h:534
T * mutable_data()
Returns a typed pointer of the underlying storage.
Definition: tensor.h:519
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
Definition: tensor.h:73
Simple registry implementation in Caffe2 that uses static variables to register object creators durin...
TIndex dim(const int i) const
Returns the i-th dimension of the tensor.
Definition: mkl_memory.h:397
A wrapper around an opaque MKL internal resource that has certain layouts and convertion primitives s...
Definition: mkl_memory.h:137
Commandline flags support for Caffe2.
void Resize(Ts... dim_source)
Resizes a tensor.
Definition: tensor.h:263