4 The Hypothesis library uses *property-based testing* to check 5 invariants about the code under test under a variety of random inputs. 7 The key idea here is to express properties of the code under test 8 (e.g. that it passes a gradient check, that it implements a reference 9 function, etc), and then generate random instances and verify they 10 satisfy these properties. 12 The main functions of interest are exposed on `HypothesisTestCase`. 13 You can usually just add a short function in this to generate an 14 arbitrary number of test cases for your operator. 16 The key functions are: 18 - `assertDeviceChecks(devices, op, inputs, outputs)`. This asserts that the 19 operator computes the same outputs, regardless of which device it is executed 21 - `assertGradientChecks(device, op, inputs, output_, 22 outputs_with_grads)`. This implements a standard numerical gradient checker 23 for the operator in question. 24 - `assertReferenceChecks(device, op, inputs, reference)`. This runs the 25 reference function (effectively calling `reference(*inputs)`, and comparing 26 that to the output of output. 28 `hypothesis_test_util.py` exposes some useful pre-built samplers. 30 - `hu.gcs` - a gradient checker device (`gc`) and device checker devices (`dc`) 32 - `hu.gcs_cpu_only` - a CPU-only gradient checker device (`gc`) and 33 device checker devices (`dc`). Used for when your operator is only 34 implemented on the CPU. 37 from __future__
import absolute_import
38 from __future__
import division
39 from __future__
import print_function
40 from __future__
import unicode_literals
41 from caffe2.proto
import caffe2_pb2
42 from caffe2.python
import (
43 workspace, device_checker, gradient_checker, test_util, core)
47 import hypothesis.extra.numpy
48 import hypothesis.strategies
as st
55 if os.getenv(
'SANDCASTLE') ==
'1':
57 elif os.getenv(
'TW_JOB_USER') ==
'sandcastle':
61 hypothesis.settings.register_profile(
65 suppress_health_check=[hypothesis.HealthCheck.too_slow],
68 verbosity=hypothesis.Verbosity.verbose))
70 hypothesis.settings.register_profile(
73 suppress_health_check=[hypothesis.HealthCheck.too_slow],
76 verbosity=hypothesis.Verbosity.verbose))
77 hypothesis.settings.register_profile(
80 suppress_health_check=[hypothesis.HealthCheck.too_slow],
83 verbosity=hypothesis.Verbosity.verbose))
84 hypothesis.settings.load_profile(
85 'sandcastle' if is_sandcastle()
else os.getenv(
'CAFFE2_HYPOTHESIS_PROFILE',
90 def dims(min_value=1, max_value=5):
91 return st.integers(min_value=min_value, max_value=max_value)
94 def elements_of_type(dtype=np.float32, filter_=None):
96 if dtype
in (np.float16, np.float32, np.float64):
97 elems = st.floats(min_value=-1.0, max_value=1.0)
98 elif dtype
is np.int32:
99 elems = st.integers(min_value=0, max_value=2 ** 31 - 1)
100 elif dtype
is np.int64:
101 elems = st.integers(min_value=0, max_value=2 ** 63 - 1)
102 elif dtype
is np.bool:
103 elems = st.booleans()
105 raise ValueError(
"Unexpected dtype without elements provided")
106 return elems
if filter_
is None else elems.filter(filter_)
109 def arrays(dims, dtype=np.float32, elements=None):
111 elements = elements_of_type(dtype)
112 return hypothesis.extra.numpy.arrays(dtype, dims, elements=elements)
115 def tensor(min_dim=1, max_dim=4, dtype=np.float32, elements=None, **kwargs):
116 dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
117 return dims_.flatmap(
lambda dims: arrays(dims, dtype, elements))
120 def segment_ids(size, is_sorted):
122 return st.just(np.empty(shape=[0], dtype=np.int32))
127 elements=st.booleans()).map(
128 lambda x: np.cumsum(x, dtype=np.int32) - x[0])
133 elements=st.integers(min_value=0, max_value=2 * size))
136 def lengths(size, **kwargs):
142 return st.just(np.empty(shape=[0], dtype=np.int32))
144 min_value=0, max_value=size - 1
145 ).flatmap(
lambda num_boarders:
146 hypothesis.extra.numpy.arrays(
147 np.int32, num_boarders, elements=st.integers(
148 min_value=0, max_value=size
151 ).map(
lambda x: np.append(x, np.array([0, size], dtype=np.int32))
152 ).map(sorted).map(np.diff)
155 def segmented_tensor(
161 segment_generator=segment_ids,
165 gen_empty = st.booleans()
if allow_empty
else st.just(
False)
166 data_dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
167 data_dims_ = st.tuples(
168 gen_empty, data_dims_
169 ).map(
lambda pair: ([0]
if pair[0]
else []) + pair[1])
170 return data_dims_.flatmap(
lambda data_dims: st.tuples(
171 arrays(data_dims, dtype, elements),
172 segment_generator(data_dims[0], is_sorted=is_sorted),
176 def lengths_tensor(*args, **kwargs):
177 return segmented_tensor(*args, segment_generator=lengths, **kwargs)
180 def sparse_segmented_tensor(min_dim=1, max_dim=4, dtype=np.float32,
181 is_sorted=True, elements=None, allow_empty=False,
182 segment_generator=segment_ids, **kwargs):
183 gen_empty = st.booleans()
if allow_empty
else st.just(
False)
184 data_dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
185 all_dims_ = st.tuples(gen_empty, data_dims_).flatmap(
186 lambda pair: st.tuples(
188 (st.integers(min_value=1, max_value=pair[1][0])
if not pair[0]
191 return all_dims_.flatmap(
lambda dims: st.tuples(
192 arrays(dims[0], dtype, elements),
193 arrays(dims[1], dtype=np.int64, elements=st.integers(
194 min_value=0, max_value=dims[0][0] - 1)),
195 segment_generator(dims[1], is_sorted=is_sorted),
199 def sparse_lengths_tensor(**kwargs):
200 return sparse_segmented_tensor(segment_generator=lengths, **kwargs)
203 def tensors(n, min_dim=1, max_dim=4, dtype=np.float32, elements=None, **kwargs):
204 dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
205 return dims_.flatmap(
206 lambda dims: st.lists(arrays(dims, dtype, elements),
207 min_size=n, max_size=n))
209 cpu_do = caffe2_pb2.DeviceOption()
210 gpu_do = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA)
211 device_options = [cpu_do] + ([gpu_do]
if workspace.has_gpu_support
else [])
213 expanded_device_options = [cpu_do] + (
214 [caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, cuda_gpu_id=i)
216 if workspace.has_gpu_support
else [])
219 def device_checker_device_options():
220 return st.just(device_options)
223 def gradient_checker_device_option():
224 return st.sampled_from(device_options)
228 gc=gradient_checker_device_option(),
229 dc=device_checker_device_options()
232 gcs_cpu_only = dict(gc=st.sampled_from([cpu_do]), dc=st.just([cpu_do]))
233 gcs_gpu_only = dict(gc=st.sampled_from([gpu_do]), dc=st.just([gpu_do]))
236 @contextlib.contextmanager
237 def temp_workspace(name=b"temp_ws"):
249 input_device_options=None,
252 if input_device_options
is None:
253 input_device_options = {}
254 op = copy.deepcopy(op)
255 op.device_option.CopyFrom(device_option)
256 net = caffe2_pb2.NetDef()
258 net.name = op.name
if op.name
else "test" 260 with temp_workspace():
261 for (n, b)
in zip(op.input, inputs):
265 device_option=input_device_options.get(n, device_option)
274 A unittest.TestCase subclass with some helper functions for 275 utilizing the `hypothesis` (hypothesis.readthedocs.io) library. 283 input_device_options=None,
287 Asserts that the operator computes the same outputs, regardless of 288 which device it is executed on. 290 Useful for checking the consistency of GPU and CPU 291 implementations of operators. 295 @given(inputs=hu.tensors(n=2), in_place=st.booleans(), **hu.gcs) 296 def test_sum(self, inputs, in_place, gc, dc): 297 op = core.CreateOperator("Sum", ["X1", "X2"], 298 ["Y" if not in_place else "X1"]) 300 self.assertDeviceChecks(dc, op, [X1, X2], [0]) 304 device_options=device_options
307 dc.CheckSimple(op, inputs, outputs_to_check, input_device_options)
320 input_device_options=None,
323 Implements a standard numerical gradient checker for the operator 326 Useful for checking the consistency of the forward and 327 backward implementations of operators. 331 @given(inputs=hu.tensors(n=2), in_place=st.booleans(), **hu.gcs) 332 def test_sum(self, inputs, in_place, gc, dc): 333 op = core.CreateOperator("Sum", ["X1", "X2"], 334 ["Y" if not in_place else "X1"]) 336 self.assertGradientChecks(gc, op, [X1, X2], 0, [0]) 341 device_option=device_option,
342 workspace_name=str(device_option),
344 res, grad, grad_estimated = gc.CheckSimple(
345 op, inputs, outputs_to_check, outputs_with_grads,
347 input_device_options=input_device_options
349 self.assertEqual(grad.shape, grad_estimated.shape)
352 "Gradient check failed for input " + str(op.input[outputs_to_check])
355 def _assertGradReferenceChecks(
364 grad_blob_name = output_to_grad +
'_grad' 366 [op], {output_to_grad: grad_blob_name})
368 grad_ref_outputs = grad_reference(output_grad, ref_outputs, inputs)
372 self.assertEqual(len(grad_ref_outputs), len(inputs))
373 for (n, ref)
in zip(op.input, grad_ref_outputs):
374 grad_names = grad_map.get(n)
377 self.assertIsNone(ref)
383 val_name = grad_names
386 ref_vals, ref_indices = ref
387 val_name = grad_names.values
389 np.testing.assert_allclose(
394 err_msg=
'Gradient {0} is not matching the reference'.format(
398 if ref_indices
is not None:
400 np.testing.assert_allclose(indices, ref_indices,
401 atol=1e-4, rtol=1e-4)
403 def _assertInferTensorChecks(self, name, shapes, types, output):
404 if name
not in shapes:
408 if type(output)
is np.ndarray:
409 if output.dtype == np.dtype(
'float64'):
410 correct_type = caffe2_pb2.TensorProto.DOUBLE
411 elif output.dtype == np.dtype(
'float32'):
412 correct_type = caffe2_pb2.TensorProto.FLOAT
413 elif output.dtype == np.dtype(
'int32'):
414 correct_type = caffe2_pb2.TensorProto.INT32
415 elif output.dtype == np.dtype(
'int64'):
416 correct_type = caffe2_pb2.TensorProto.INT64
418 correct_type =
"unknown {}".format(np.dtype)
420 correct_type = str(type(output))
422 np.testing.assert_array_equal(
423 np.array(shapes[name]).astype(np.int32),
424 np.array(output.shape).astype(np.int32),
425 err_msg=
'Shape {} mismatch: {} vs. {}'.format(
430 if correct_type != caffe2_pb2.TensorProto.INT32:
432 np.testing.assert_equal(
435 err_msg=
'Type {} mismatch: {} vs. {}'.format(
436 name, types[name], correct_type,
439 except AssertionError
as e:
442 logging.warning(str(e))
443 if os.getenv(
'CAFFE2_ASSERT_SHAPEINFERENCE') ==
'1':
452 input_device_options=None,
457 outputs_to_check=None,
460 This runs the reference Python function implementation 461 (effectively calling `reference(*inputs)`, and compares that 462 to the output of output, with an absolute/relative tolerance 463 given by the `threshold` parameter. 465 Useful for checking the implementation matches the Python 466 (typically NumPy) implementation of the same functionality. 470 @given(X=hu.tensor(), inplace=st.booleans(), **hu.gcs) 471 def test_softsign(self, X, inplace, gc, dc): 472 op = core.CreateOperator( 473 "Softsign", ["X"], ["X" if inplace else "Y"]) 476 return (X / (1 + np.abs(X)),) 478 self.assertReferenceChecks(gc, op, [X], softsign) 480 if input_device_options
is None:
481 input_device_options = {}
483 op = copy.deepcopy(op)
484 op.device_option.CopyFrom(device_option)
486 with temp_workspace():
487 for (n, b)
in zip(op.input, inputs):
491 device_option=input_device_options.get(n, device_option)
493 print(
"Input", n, input_device_options.get(n, device_option))
495 net.Proto().op.extend([op])
496 test_shape_inference =
False 499 test_shape_inference =
True 500 except RuntimeError
as e:
503 logging.warning(str(e))
504 if os.getenv(
'CAFFE2_ASSERT_SHAPEINFERENCE') ==
'1':
507 reference_outputs = reference(*inputs)
508 if not (isinstance(reference_outputs, tuple)
or 509 isinstance(reference_outputs, list)):
511 "You are providing a wrong reference implementation. A " 512 "proper one should return a tuple/list of numpy arrays.")
513 if not outputs_to_check:
514 self.assertEqual(len(reference_outputs), len(op.output))
515 outputs_to_check = range(len(op.output))
517 for (output_index, ref)
in zip(outputs_to_check, reference_outputs):
518 output_blob_name = op.output[output_index]
520 if output.dtype.kind
in (
'S',
'O'):
521 np.testing.assert_array_equal(output, ref)
525 np.testing.assert_allclose(
526 output, ref, atol=atol, rtol=threshold,
528 'Output {0} is not matching the reference'.format(
532 if test_shape_inference:
534 output_blob_name, shapes, types, output)
536 if grad_reference
and output_to_grad:
539 op, inputs, reference_outputs,
540 output_to_grad, grad_reference)
543 def assertValidationChecks(
549 input_device_options=None,
552 if input_device_options
is None:
553 input_device_options = {}
555 assert len(set(list(op.input) + list(op.output))) == \
556 len(op.input) + len(op.output), \
557 "in-place ops are not supported in as_kwargs mode" 558 op = copy.deepcopy(op)
559 op.device_option.CopyFrom(device_option)
561 with temp_workspace():
562 for (n, b)
in zip(op.input, inputs):
566 device_option=input_device_options.get(n, device_option)
571 validator(**dict(zip(
572 list(op.input) + list(op.output), inputs + outputs)))
574 validator(inputs=inputs, outputs=outputs)
def GetBackwardPass(cls, operators, ys)
def ResetWorkspace(root_folder=None)
def InferShapesAndTypes(nets, blob_dimensions=None)
def RunOperatorsOnce(operators)
def FeedBlob(name, arr, device_option=None)
def assertGradientChecks(self, device_option, op, inputs, outputs_to_check, outputs_with_grads, grad_ops=None, threshold=0.005, stepsize=0.05, input_device_options=None)
def CreateNet(net, overwrite=False, input_blobs=None)
def RunOperatorOnce(operator)
def _assertInferTensorChecks(self, name, shapes, types, output)
def assertDeviceChecks(self, device_options, op, inputs, outputs_to_check, input_device_options=None, threshold=0.01)
def _assertGradReferenceChecks(self, op, inputs, ref_outputs, output_to_grad, grad_reference, threshold=1e-4)
def assertReferenceChecks(self, device_option, op, inputs, reference, input_device_options=None, threshold=1e-4, output_to_grad=None, grad_reference=None, atol=None, outputs_to_check=None)