4 from caffe2.python
import core, workspace
5 from caffe2.proto
import caffe2_pb2
10 def Check(net, outputs_with_grad, input_values,
11 input_to_check, step_size=0.0001, threshold=0.05):
12 assert input_to_check
in input_values.keys()
14 net_copy = net.Clone(net.Name() +
"_copy")
16 grad_map = net_copy.AddGradientOperators(outputs_with_grad)
17 for name, value
in input_values.items():
18 workspace.blobs[name] = value
20 def GetLoss(new_value):
21 workspace.blobs[input_to_check] = new_value
24 workspace.blobs[output]
25 for output
in outputs_with_grad
28 def GetValue(dim, delta):
29 input_value = input_values[input_to_check].copy()
30 input_value.flat[dim] += delta
34 analitic_grad = workspace.blobs[grad_map[input_to_check]]
36 grad_estimate = np.zeros_like(input_values[input_to_check])
37 for dim
in range(input_values[input_to_check].size):
38 pos_loss = GetLoss(GetValue(dim, step_size))
39 neg_loss = GetLoss(GetValue(dim, -step_size))
40 grad_estimate.flat[dim] = (pos_loss - neg_loss) / step_size / 2
42 err_msg =
"Error in gradient check for net_copy {}: {}".format(
43 net.Name(), net.Proto())
45 np.testing.assert_allclose(
46 analitic_grad, grad_estimate,
47 atol=threshold, rtol=threshold,
53 """A gradient checker in Python. 55 This is not the most efficient way to check gradients, as the Python 56 interface will involve a lot of copy back and forth operations. Use at your 64 device_option=caffe2_pb2.DeviceOption(),
65 workspace_name=
"gradient_check" 73 self, op, grad_ops, x, input_name, grad_name, outputs_with_grads
82 for idx
in outputs_with_grads:
85 loss += (arr**2).sum()
91 if isinstance(grad_name, core.GradientSlice):
96 [
'zeros',
'one', grad_name.indices, grad_name.values,
'one'],
111 input_device_options=None
113 """Checks the operator in a very simple fashion by stacking a sum of 117 op: the operator to be checked. 118 inputs: the input data in numpy arrays. 119 input_to_check: an index specifying which input blob we should 121 outputs_with_grads: indices specifying which output blobs will we 122 need to check gradients with. For these outputs, we will collect a 123 squared sum and also feed in their gradients. 124 grad_operator: the gradient operator. If not given, we will get the 125 gradient operator from the gradient registry. 126 input_device_options: an optional mapping from input names to 127 DeviceOptions (to override the default DeviceOption) 129 boolean: True if it passes, False if it does not pass. 131 if input_device_options
is None:
132 input_device_options = {}
143 op, [s +
'_grad' for s
in op.output])
145 dims_to_check = inputs[input_to_check].size
147 for i, arr
in enumerate(inputs):
150 input_device_options.get(
154 input_name = op.input[input_to_check]
155 grad_name = g_input[input_to_check]
157 op, grad_ops, inputs[input_to_check], input_name, grad_name,
160 grad_estimate = np.zeros_like(inputs[input_to_check])
161 if grad_estimate.shape != grad.shape:
163 "Mismatched gradient shapes: estimated ({}), grad ({})".format(
164 grad_estimate.shape, grad.shape))
166 for current_dim
in range(dims_to_check):
168 inputs[input_to_check].flat[current_dim] += self.
_stepsize 170 op, grad_ops, inputs[input_to_check], input_name,
171 grad_name, outputs_with_grads
174 inputs[input_to_check].flat[current_dim] -= self.
_stepsize * 2
176 op, grad_ops, inputs[input_to_check], input_name,
177 grad_name, outputs_with_grads
180 inputs[input_to_check].flat[current_dim] += self.
_stepsize 181 grad_estimate.flat[current_dim] = (
182 pos_loss - neg_loss) / self.
_stepsize / 2
184 fail_mat = ~np.isclose(
187 idx = np.flatnonzero(fail_mat)
188 print(
'Failed. [idx, grad, grad_estimate] are:')
189 print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T)
200 return ret, grad, grad_estimate
def ResetWorkspace(root_folder=None)
def GetLossAndGrad(self, op, grad_ops, x, input_name, grad_name, outputs_with_grads)
def RunOperatorsOnce(operators)
def FeedBlob(name, arr, device_option=None)
def CreateOperator(operator_type, inputs, outputs, name='', control_input=None, device_option=None, arg=None, engine=None, kwargs)
def CheckSimple(self, op, inputs, input_to_check, outputs_with_grads, grad_ops=None, input_device_options=None)
def RunOperatorOnce(operator)
def GetGradientForOp(cls, op, g_output)