Caffe2 - C++ API
A deep learning, cross platform ML framework
operator_gradient.h
1 #ifndef CAFFE2_CORE_OPERATOR_GRADIENT_H_
2 #define CAFFE2_CORE_OPERATOR_GRADIENT_H_
3 
4 #include "caffe2/core/registry.h"
5 #include "caffe2/proto/caffe2.pb.h"
6 #include "caffe2/utils/proto_utils.h"
7 
8 namespace caffe2 {
9 
10 /* @brief A struct that abstracts on top of dense and sparse blobs.
11  *
12  * For a dense blob, its gradient name should be written into dense_, and for
13  * a sparse blob, its gradient name should be written into indice_ for
14  * the sparse indices and value_ for the values.
15  */
17  string dense_;
18  string indices_;
19  string values_;
20 
21  inline bool IsDense() const {
22  return dense_.size();
23  }
24  inline bool IsSparse() const {
25  return (indices_.size() || values_.size());
26  }
27  inline bool IsEmpty() const {
28  return (!IsDense() && !IsSparse());
29  }
30 };
31 
36  vector<OperatorDef> ops_;
37  vector<GradientWrapper> g_input_;
38 
39  GradientOpsMeta() {}
41  const vector<OperatorDef>& ops,
42  const vector<GradientWrapper>& v)
43  : ops_(ops), g_input_(v) {}
44 };
45 
47  public:
49  const OperatorDef& def,
50  const vector<GradientWrapper>& g_output)
51  : def_(def), g_output_(g_output), g_input_(def.input_size()){};
52  virtual ~GradientMakerBase() {}
53  virtual bool CopyDeviceOption() const {
54  return true;
55  }
56  virtual bool CopyEngine() const {
57  return true;
58  }
59  virtual bool CopyArguments() const {
60  return true;
61  }
62 
74  virtual GradientOpsMeta Get() {
75  vector<OperatorDef> new_defs = GetGradientDefs();
76  for (auto& opdef : new_defs) {
77  opdef.set_is_gradient_op(true);
78  }
79  return GradientOpsMeta(new_defs, g_input_);
80  };
81 
82  const OperatorDef& Def() const {
83  return def_;
84  }
85 
86  protected:
87  virtual vector<OperatorDef> GetGradientDefs() {
88  CAFFE_NOT_IMPLEMENTED;
89  }
90 
91  // Helper functions to return names for the gradient computation.
92  // I(idx), O(idx): return the input and output names.
93  // GO(idx): return the name of the gradient for output idx.
94  // GI(idx), GI_I(idx), GI_V(idx): return the name of the gradient for
95  // input idx, and also registers that name into the gradient
96  // registry to be returned.
97  string I(const int i) {
98  CAFFE_ENFORCE((i >= 0) && (i < def_.input().size()));
99  return def_.input(i);
100  }
101  string O(const int i) {
102  CAFFE_ENFORCE((i >= 0) && (i < def_.output().size()));
103  return def_.output(i);
104  }
105  string GI(const int i) {
106  CAFFE_ENFORCE(
107  !g_input_.at(i).IsSparse(),
108  "Input ",
109  def_.input(i),
110  " already set to sparse.");
111  g_input_.at(i).dense_ = GradientName(def_.input(i));
112  return GradientName(def_.input(i));
113  }
114  string GI_I(const int i) {
115  CAFFE_ENFORCE(
116  !g_input_.at(i).IsDense(),
117  "Input ",
118  def_.input(i),
119  " already set to dense.");
120  g_input_.at(i).indices_ = GradientSliceIndices(def_.input(i));
121  return GradientSliceIndices(def_.input(i));
122  }
123  string GI_V(const int i) {
124  CAFFE_ENFORCE(
125  !g_input_.at(i).IsDense(),
126  "Input ",
127  def_.input(i),
128  " already set to dense.");
129  g_input_.at(i).values_ = GradientSliceValues(def_.input(i));
130  return GradientSliceValues(def_.input(i));
131  }
132  string GO(const int i) {
133  CAFFE_ENFORCE(
134  g_output_.at(i).IsDense(),
135  "Gradient of output ",
136  def_.output(i),
137  " is either sparse or not provided.");
138  return g_output_.at(i).dense_;
139  }
140  string GO_I(const int i) {
141  CAFFE_ENFORCE(
142  g_output_.at(i).IsSparse(),
143  "Gradient of output ",
144  def_.output(i),
145  " is either dense or not provided.");
146  return g_output_.at(i).indices_;
147  }
148  string GO_V(const int i) {
149  CAFFE_ENFORCE(
150  g_output_.at(i).IsSparse(),
151  "Gradient of output ",
152  def_.output(i),
153  "is either dense or not provided.");
154  return g_output_.at(i).values_;
155  }
156  const GradientWrapper& GradOut(int i) {
157  return g_output_.at(i);
158  }
159 
160  // Function to add a gradient pair to map.
161  void SetDense(const int i, const string& name) {
162  CAFFE_ENFORCE(
163  !g_input_.at(i).IsSparse(),
164  "Input ",
165  def_.input(i),
166  " already set to sparse.");
167  g_input_.at(i).dense_ = name;
168  }
169  void SetSparse(const int i, const string& indices, const string& values) {
170  CAFFE_ENFORCE(
171  !g_input_.at(i).IsDense(),
172  "Input ",
173  def_.input(i),
174  " already set to dense.");
175  g_input_.at(i).indices_ = indices;
176  g_input_.at(i).values_ = values;
177  }
178 
183  template <class... Args>
184  inline static vector<OperatorDef> SingleGradientDef(const Args&... args) {
185  return vector<OperatorDef>{CreateOperatorDef(args...)};
186  }
187 
188  public:
192  static CaffeMap<string, string> MatchGradsToParams(const OperatorDef& op) {
193  // NOTE: how to go beyond string-matching?
194  CaffeMap<string, string> m;
195  for (auto& out : op.output()) {
196  if (IsGradientBlob(out)) {
197  m[out] = out.substr(0, out.length() - 5);
198  }
199  }
200  return m;
201  }
202 
203  private:
204  // Utility functions for gradient name computation. We don't expose them
205  // in order to discourage the use of such names explicitly.
206  static string GradientName(const string& name) {
207  return name + "_grad";
208  }
209 
210  static bool IsGradientBlob(const string& name) {
211  return name.length() > 5 && name.find("_grad") == name.length() - 5;
212  }
213 
214  static string GradientNameToParam(const string& name) {
215  CHECK(IsGradientBlob(name));
216  return name.substr(0, name.length() - 5);
217  }
218 
219  static string GradientSliceIndices(const string& name) {
220  return name + "_grad_indices";
221  }
222 
223  static string GradientSliceValues(const string& name) {
224  return name + "_grad_values";
225  }
226 
227  protected:
228  // We make the member variables protected in case someone wants to write
229  // a fully custom Get() function.
230  const OperatorDef& def_;
231  const vector<GradientWrapper>& g_output_;
232  vector<GradientWrapper> g_input_;
233 };
234 
245  using GradientMakerBase::GradientMakerBase;
246  vector<OperatorDef> GetGradientDefs() override {
247  return vector<OperatorDef>();
248  }
249 };
250 
258  using GradientMakerBase::GradientMakerBase;
259  GradientOpsMeta Get() override {
260  CAFFE_ENFORCE(
261  false, "One should not call gradient for operator ", def_.type(), ".");
262  }
263 };
264 
273  using GradientMakerBase::GradientMakerBase;
274  GradientOpsMeta Get() override {
275  CAFFE_ENFORCE(
276  false,
277  "Operator ",
278  def_.type(),
279  " should have a gradient but is not implemented yet.");
280  }
281 };
282 
283 CAFFE_DECLARE_REGISTRY(
284  GradientRegistry,
286  const OperatorDef&,
287  const vector<GradientWrapper>&);
288 
289 #define REGISTER_GRADIENT(name, ...) \
290  CAFFE_REGISTER_CLASS(GradientRegistry, name, __VA_ARGS__)
291 #define REGISTER_GRADIENT_STR(str_name, ...) \
292  CAFFE_REGISTER_TYPED_CLASS(GradientRegistry, str_name, __VA_ARGS__)
293 
294 // NO_GRADIENT means that the operator does not need any gradient computation.
295 #define NO_GRADIENT(name) REGISTER_GRADIENT(name, NoGradient)
296 
297 // SHOULD_NOT_DO_GRADIENT means that the operator is not designed to have
298 // gradient operators. If you attempt to call the gradient, a log fatal will
299 // occur.
300 #define SHOULD_NOT_DO_GRADIENT(name) \
301  REGISTER_GRADIENT(name, ThrowInTheTowelIfGradientIsCalled)
302 
303 #define GRADIENT_NOT_IMPLEMENTED_YET(name) \
304  REGISTER_GRADIENT(name, GradientNotImplementedYet)
305 
310  const OperatorDef& def,
311  const vector<GradientWrapper>& g_output);
312 
313 } // namespace caffe2
314 
315 #endif // CAFFE2_CORE_OPERATOR_GRADIENT_H_
A struct that holds the gradient operators and related gradient maps.
static vector< OperatorDef > SingleGradientDef(const Args &... args)
a helper function to allow one to create one single operator def, which is usually the case for many ...
static CaffeMap< string, string > MatchGradsToParams(const OperatorDef &op)
Returns map that returns the parameters that the gradients are for.
A helper class to indicate that the gradient mechanism is not ready.
GradientOpsMeta Get() override
Returns the gradient ops meta.
A helper class to indicate that the operator should have no gradient.
GradientOpsMeta Get() override
Returns the gradient ops meta.
Simple registry implementation in Caffe2 that uses static variables to register object creators durin...
GradientOpsMeta GetGradientForOp(const OperatorDef &def, const vector< GradientWrapper > &g_output)
Gets the GradientOpsMeta for the given operator def.
Definition: operator.cc:136
A helper class to indicate that the operator does not need gradient computation.
virtual GradientOpsMeta Get()
Returns the gradient ops meta.