cpp/html/operator__gradient_8h_source.html

 #ifndef CAFFE2_CORE_OPERATOR_GRADIENT_H_
 #define CAFFE2_CORE_OPERATOR_GRADIENT_H_

 #include "caffe2/core/registry.h"
 #include "caffe2/proto/caffe2.pb.h"
 #include "caffe2/utils/proto_utils.h"

 namespace caffe2 {

 /* @brief A struct that abstracts on top of dense and sparse blobs.
  *
  * For a dense blob, its gradient name should be written into dense_, and for
  * a sparse blob, its gradient name should be written into indice_ for
  * the sparse indices and value_ for the values.
  */
 struct GradientWrapper {
   string dense_;
   string indices_;
   string values_;

   inline bool IsDense() const {
     return dense_.size();
   }
   inline bool IsSparse() const {
     return (indices_.size() || values_.size());
   }
   inline bool IsEmpty() const {
     return (!IsDense() && !IsSparse());
   }
 };

 struct GradientOpsMeta {
   vector<OperatorDef> ops_;
   vector<GradientWrapper> g_input_;

   GradientOpsMeta() {}
   GradientOpsMeta(
       const vector<OperatorDef>& ops,
       const vector<GradientWrapper>& v)
       : ops_(ops), g_input_(v) {}
 };

 class GradientMakerBase {
  public:
   GradientMakerBase(
       const OperatorDef& def,
       const vector<GradientWrapper>& g_output)
       : def_(def), g_output_(g_output), g_input_(def.input_size()){};
   virtual ~GradientMakerBase() {}
   virtual bool CopyDeviceOption() const {
     return true;
   }
   virtual bool CopyEngine() const {
     return true;
   }
   virtual bool CopyArguments() const {
     return true;
   }

   virtual GradientOpsMeta Get() {
     vector<OperatorDef> new_defs = GetGradientDefs();
     for (auto& opdef : new_defs) {
       opdef.set_is_gradient_op(true);
     }
     return GradientOpsMeta(new_defs, g_input_);
   };

   const OperatorDef& Def() const {
     return def_;
   }

  protected:
   virtual vector<OperatorDef> GetGradientDefs() {
     CAFFE_NOT_IMPLEMENTED;
   }

   // Helper functions to return names for the gradient computation.
   // I(idx), O(idx): return the input and output names.
   // GO(idx): return the name of the gradient for output idx.
   // GI(idx), GI_I(idx), GI_V(idx): return the name of the gradient for
   //     input idx, and also registers that name into the gradient
   //     registry to be returned.
   string I(const int i) {
     CAFFE_ENFORCE((i >= 0) && (i < def_.input().size()));
     return def_.input(i);
   }
   string O(const int i) {
     CAFFE_ENFORCE((i >= 0) && (i < def_.output().size()));
     return def_.output(i);
   }
   string GI(const int i) {
     CAFFE_ENFORCE(
         !g_input_.at(i).IsSparse(),
         "Input ",
         def_.input(i),
         " already set to sparse.");
     g_input_.at(i).dense_ = GradientName(def_.input(i));
     return GradientName(def_.input(i));
   }
   string GI_I(const int i) {
     CAFFE_ENFORCE(
         !g_input_.at(i).IsDense(),
         "Input ",
         def_.input(i),
         " already set to dense.");
     g_input_.at(i).indices_ = GradientSliceIndices(def_.input(i));
     return GradientSliceIndices(def_.input(i));
   }
   string GI_V(const int i) {
     CAFFE_ENFORCE(
         !g_input_.at(i).IsDense(),
         "Input ",
         def_.input(i),
         " already set to dense.");
     g_input_.at(i).values_ = GradientSliceValues(def_.input(i));
     return GradientSliceValues(def_.input(i));
   }
   string GO(const int i) {
     CAFFE_ENFORCE(
         g_output_.at(i).IsDense(),
         "Gradient of output ",
         def_.output(i),
         " is either sparse or not provided.");
     return g_output_.at(i).dense_;
   }
   string GO_I(const int i) {
     CAFFE_ENFORCE(
         g_output_.at(i).IsSparse(),
         "Gradient of output ",
         def_.output(i),
         " is either dense or not provided.");
     return g_output_.at(i).indices_;
   }
   string GO_V(const int i) {
     CAFFE_ENFORCE(
         g_output_.at(i).IsSparse(),
         "Gradient of output ",
         def_.output(i),
         "is either dense or not provided.");
     return g_output_.at(i).values_;
   }
   const GradientWrapper& GradOut(int i) {
     return g_output_.at(i);
   }

   // Function to add a gradient pair to map.
   void SetDense(const int i, const string& name) {
     CAFFE_ENFORCE(
         !g_input_.at(i).IsSparse(),
         "Input ",
         def_.input(i),
         " already set to sparse.");
     g_input_.at(i).dense_ = name;
   }
   void SetSparse(const int i, const string& indices, const string& values) {
     CAFFE_ENFORCE(
         !g_input_.at(i).IsDense(),
         "Input ",
         def_.input(i),
         " already set to dense.");
     g_input_.at(i).indices_ = indices;
     g_input_.at(i).values_ = values;
   }

   template <class... Args>
   inline static vector<OperatorDef> SingleGradientDef(const Args&... args) {
     return vector<OperatorDef>{CreateOperatorDef(args...)};
   }

  public:
   static CaffeMap<string, string> MatchGradsToParams(const OperatorDef& op) {
     // NOTE: how to go beyond string-matching?
     CaffeMap<string, string> m;
     for (auto& out : op.output()) {
       if (IsGradientBlob(out)) {
         m[out] = out.substr(0, out.length() - 5);
       }
     }
     return m;
   }

  private:
   // Utility functions for gradient name computation. We don't expose them
   // in order to discourage the use of such names explicitly.
   static string GradientName(const string& name) {
     return name + "_grad";
   }

   static bool IsGradientBlob(const string& name) {
     return name.length() > 5 && name.find("_grad") == name.length() - 5;
   }

   static string GradientNameToParam(const string& name) {
     CHECK(IsGradientBlob(name));
     return name.substr(0, name.length() - 5);
   }

   static string GradientSliceIndices(const string& name) {
     return name + "_grad_indices";
   }

   static string GradientSliceValues(const string& name) {
     return name + "_grad_values";
   }

  protected:
   // We make the member variables protected in case someone wants to write
   // a fully custom Get() function.
   const OperatorDef& def_;
   const vector<GradientWrapper>& g_output_;
   vector<GradientWrapper> g_input_;
 };

 class NoGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return vector<OperatorDef>();
   }
 };

 struct ThrowInTheTowelIfGradientIsCalled : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   GradientOpsMeta Get() override {
     CAFFE_ENFORCE(
         false, "One should not call gradient for operator ", def_.type(), ".");
   }
 };

 struct GradientNotImplementedYet : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   GradientOpsMeta Get() override {
     CAFFE_ENFORCE(
         false,
         "Operator ",
         def_.type(),
         " should have a gradient but is not implemented yet.");
   }
 };

 CAFFE_DECLARE_REGISTRY(
     GradientRegistry,
     GradientMakerBase,
     const OperatorDef&,
     const vector<GradientWrapper>&);

 #define REGISTER_GRADIENT(name, ...) \
   CAFFE_REGISTER_CLASS(GradientRegistry, name, __VA_ARGS__)
 #define REGISTER_GRADIENT_STR(str_name, ...) \
   CAFFE_REGISTER_TYPED_CLASS(GradientRegistry, str_name, __VA_ARGS__)

 // NO_GRADIENT means that the operator does not need any gradient computation.
 #define NO_GRADIENT(name) REGISTER_GRADIENT(name, NoGradient)

 // SHOULD_NOT_DO_GRADIENT means that the operator is not designed to have
 // gradient operators. If you attempt to call the gradient, a log fatal will
 // occur.
 #define SHOULD_NOT_DO_GRADIENT(name) \
   REGISTER_GRADIENT(name, ThrowInTheTowelIfGradientIsCalled)

 #define GRADIENT_NOT_IMPLEMENTED_YET(name) \
   REGISTER_GRADIENT(name, GradientNotImplementedYet)

 GradientOpsMeta GetGradientForOp(
     const OperatorDef& def,
     const vector<GradientWrapper>& g_output);

 } // namespace caffe2

 #endif // CAFFE2_CORE_OPERATOR_GRADIENT_H_
caffe2::GradientOpsMeta
A struct that holds the gradient operators and related gradient maps.
Definition: operator_gradient.h:35

caffe2::GradientMakerBase::SingleGradientDef
static vector< OperatorDef > SingleGradientDef(const Args &... args)
a helper function to allow one to create one single operator def, which is usually the case for many ...
Definition: operator_gradient.h:184

caffe2::GradientMakerBase::MatchGradsToParams
static CaffeMap< string, string > MatchGradsToParams(const OperatorDef &op)
Returns map that returns the parameters that the gradients are for.
Definition: operator_gradient.h:192

caffe2::GradientNotImplementedYet
A helper class to indicate that the gradient mechanism is not ready.
Definition: operator_gradient.h:272

caffe2::ThrowInTheTowelIfGradientIsCalled::Get
GradientOpsMeta Get() override
Returns the gradient ops meta.
Definition: operator_gradient.h:259

caffe2::ThrowInTheTowelIfGradientIsCalled
A helper class to indicate that the operator should have no gradient.
Definition: operator_gradient.h:257

caffe2::GradientNotImplementedYet::Get
GradientOpsMeta Get() override
Returns the gradient ops meta.
Definition: operator_gradient.h:274

caffe2::GradientMakerBase
Definition: operator_gradient.h:46

caffe2
Simple registry implementation in Caffe2 that uses static variables to register object creators durin...
Definition: convert_encoded_to_raw_leveldb.cc:31

caffe2::GetGradientForOp
GradientOpsMeta GetGradientForOp(const OperatorDef &def, const vector< GradientWrapper > &g_output)
Gets the GradientOpsMeta for the given operator def.
Definition: operator.cc:136

caffe2::GradientWrapper
Definition: operator_gradient.h:16

caffe2::NoGradient
A helper class to indicate that the operator does not need gradient computation.
Definition: operator_gradient.h:244

caffe2::GradientMakerBase::Get
virtual GradientOpsMeta Get()
Returns the gradient ops meta.
Definition: operator_gradient.h:74