Caffe2 - Python API
A deep learning, cross platform ML framework
gradient_checker.py
1 
3 import numpy as np
4 from caffe2.python import core, workspace
5 from caffe2.proto import caffe2_pb2
6 
7 
8 class NetGradientChecker(object):
9  @staticmethod
10  def Check(net, outputs_with_grad, input_values,
11  input_to_check, step_size=0.0001, threshold=0.05):
12  assert input_to_check in input_values.keys()
13 
14  net_copy = net.Clone(net.Name() + "_copy")
15 
16  grad_map = net_copy.AddGradientOperators(outputs_with_grad)
17  for name, value in input_values.items():
18  workspace.blobs[name] = value
19 
20  def GetLoss(new_value):
21  workspace.blobs[input_to_check] = new_value
22  workspace.RunNetOnce(net_copy)
23  return sum([
24  workspace.blobs[output]
25  for output in outputs_with_grad
26  ])
27 
28  def GetValue(dim, delta):
29  input_value = input_values[input_to_check].copy()
30  input_value.flat[dim] += delta
31  return input_value
32 
33  workspace.RunNetOnce(net_copy)
34  analitic_grad = workspace.blobs[grad_map[input_to_check]]
35 
36  grad_estimate = np.zeros_like(input_values[input_to_check])
37  for dim in range(input_values[input_to_check].size):
38  pos_loss = GetLoss(GetValue(dim, step_size))
39  neg_loss = GetLoss(GetValue(dim, -step_size))
40  grad_estimate.flat[dim] = (pos_loss - neg_loss) / step_size / 2
41 
42  err_msg = "Error in gradient check for net_copy {}: {}".format(
43  net.Name(), net.Proto())
44 
45  np.testing.assert_allclose(
46  analitic_grad, grad_estimate,
47  atol=threshold, rtol=threshold,
48  err_msg=err_msg,
49  )
50 
51 
53  """A gradient checker in Python.
54 
55  This is not the most efficient way to check gradients, as the Python
56  interface will involve a lot of copy back and forth operations. Use at your
57  own risk.
58  """
59 
60  def __init__(
61  self,
62  stepsize,
63  threshold,
64  device_option=caffe2_pb2.DeviceOption(),
65  workspace_name="gradient_check"
66  ):
67  self._stepsize = stepsize
68  self._threshold = threshold
69  self._device_option = device_option
70  self._workspace_name = workspace_name
71 
72  def GetLossAndGrad(
73  self, op, grad_ops, x, input_name, grad_name, outputs_with_grads
74  ):
75  # First, feed in the current input. Note that we are not changing
76  # anything else, so we don't need to feed in others.
77  workspace.FeedBlob(input_name, x, self._device_option)
78  # Run.
80  loss = 0.
81  # Get Loss and feed in the gradients, run gradient ops.
82  for idx in outputs_with_grads:
83  name = op.output[idx]
84  arr = workspace.FetchBlob(name)
85  loss += (arr**2).sum()
86  workspace.FeedBlob(name + '_grad', arr, self._device_option)
87  loss /= 2.
88  # Run gradient ops
90  # Get gradients
91  if isinstance(grad_name, core.GradientSlice):
92  workspace.FeedBlob('zeros', np.zeros_like(x, dtype=np.float32))
93  workspace.FeedBlob('one', np.ones(1, dtype=np.float32))
94  sparse_to_dense_op = core.CreateOperator(
95  'ScatterWeightedSum',
96  ['zeros', 'one', grad_name.indices, grad_name.values, 'one'],
97  'zeros')
98  workspace.RunOperatorOnce(sparse_to_dense_op)
99  grad = workspace.FetchBlob('zeros')
100  else:
101  grad = workspace.FetchBlob(grad_name)
102  return loss, grad
103 
104  def CheckSimple(
105  self,
106  op,
107  inputs,
108  input_to_check,
109  outputs_with_grads,
110  grad_ops=None,
111  input_device_options=None
112  ):
113  """Checks the operator in a very simple fashion by stacking a sum of
114  squares on the top.
115 
116  Inputs:
117  op: the operator to be checked.
118  inputs: the input data in numpy arrays.
119  input_to_check: an index specifying which input blob we should
120  check.
121  outputs_with_grads: indices specifying which output blobs will we
122  need to check gradients with. For these outputs, we will collect a
123  squared sum and also feed in their gradients.
124  grad_operator: the gradient operator. If not given, we will get the
125  gradient operator from the gradient registry.
126  input_device_options: an optional mapping from input names to
127  DeviceOptions (to override the default DeviceOption)
128  Outputs:
129  boolean: True if it passes, False if it does not pass.
130  """
131  if input_device_options is None:
132  input_device_options = {}
133  # Entering the checker workspace
134  old_ws_name = workspace.CurrentWorkspace()
135  if self._workspace_name != old_ws_name:
137 
138  op.device_option.CopyFrom(self._device_option)
139  if grad_ops is None:
140  # TODO(jiayq): use the gradient registration instead of the old
141  # hack.
142  grad_ops, g_input = core.GradientRegistry.GetGradientForOp(
143  op, [s + '_grad' for s in op.output])
144 
145  dims_to_check = inputs[input_to_check].size
146  # First, feed in the input.
147  for i, arr in enumerate(inputs):
149  op.input[i], arr,
150  input_device_options.get(
151  op.input[i], self._device_option))
152 
153  # Get the loss and gradient for the original.
154  input_name = op.input[input_to_check]
155  grad_name = g_input[input_to_check]
156  loss, grad = self.GetLossAndGrad(
157  op, grad_ops, inputs[input_to_check], input_name, grad_name,
158  outputs_with_grads
159  )
160  grad_estimate = np.zeros_like(inputs[input_to_check])
161  if grad_estimate.shape != grad.shape:
162  raise Exception(
163  "Mismatched gradient shapes: estimated ({}), grad ({})".format(
164  grad_estimate.shape, grad.shape))
165 
166  for current_dim in range(dims_to_check):
167  # Positive gradient
168  inputs[input_to_check].flat[current_dim] += self._stepsize
169  pos_loss, _ = self.GetLossAndGrad(
170  op, grad_ops, inputs[input_to_check], input_name,
171  grad_name, outputs_with_grads
172  )
173  # Negative gradient
174  inputs[input_to_check].flat[current_dim] -= self._stepsize * 2
175  neg_loss, _ = self.GetLossAndGrad(
176  op, grad_ops, inputs[input_to_check], input_name,
177  grad_name, outputs_with_grads
178  )
179  # Recover the value
180  inputs[input_to_check].flat[current_dim] += self._stepsize
181  grad_estimate.flat[current_dim] = (
182  pos_loss - neg_loss) / self._stepsize / 2
183  # Now, check correctness
184  fail_mat = ~np.isclose(
185  grad, grad_estimate, atol=self._threshold, rtol=self._threshold)
186  if np.any(fail_mat):
187  idx = np.flatnonzero(fail_mat)
188  print('Failed. [idx, grad, grad_estimate] are:')
189  print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T)
190  ret = False
191  else:
192  ret = True
193  # After finishing, cleaning up things.
194  if self._workspace_name != old_ws_name:
195  # We reset the workspace to make sure everything intermediate is
196  # cleaned up. Note that there is no need to delete a workspace -
197  # when empty it takes a very limited amount of memory.
199  workspace.SwitchWorkspace(old_ws_name)
200  return ret, grad, grad_estimate
def ResetWorkspace(root_folder=None)
Definition: workspace.py:130
SwitchWorkspace
Definition: workspace.py:30
def RunNetOnce(net)
Definition: workspace.py:160
def GetLossAndGrad(self, op, grad_ops, x, input_name, grad_name, outputs_with_grads)
def RunOperatorsOnce(operators)
Definition: workspace.py:152
CurrentWorkspace
Definition: workspace.py:24
def FeedBlob(name, arr, device_option=None)
Definition: workspace.py:229
def CreateOperator(operator_type, inputs, outputs, name='', control_input=None, device_option=None, arg=None, engine=None, kwargs)
Definition: core.py:259
def CheckSimple(self, op, inputs, input_to_check, outputs_with_grads, grad_ops=None, input_device_options=None)
def RunOperatorOnce(operator)
Definition: workspace.py:148
def GetGradientForOp(cls, op, g_output)
Definition: core.py:928
def FetchBlob(name)
Definition: workspace.py:276