3 from __future__
import absolute_import
4 from __future__
import division
5 from __future__
import print_function
6 from __future__
import unicode_literals
8 from caffe2.python
import core, scope, workspace
21 self, param_id, param, key=None, shape=None, length=None):
24 self.
name = str(param)
28 self.
size =
None if shape
is None else np.prod(shape)
29 self.
length = max(1, length
if length
is not None else 1)
38 ParameterType.SPARSE
if isinstance(self.
grad, core.GradientSlice)
39 else ParameterType.DENSE)
41 def cloned_init_net(self):
43 init_net, outputs = self.
blob.Net().ClonePartial(
55 """A helper model so we can write models more easily, without having to 56 manually define parameter initializations and operators separately. 57 In order to add support for specific operators, inherit from this class 58 and add corresponding methods. Operator representing methods should 59 take care of adding their parameters to params 62 def __init__(self, name=None, init_params=True, allow_not_known_ops=True,
63 skip_sparse_optim=False, param_model=None):
64 self.
name = name
or "model" 67 if param_model
is not None:
70 self.
params = param_model.params
90 def _infer_param_shape(self, param):
92 if str(param)
in op.output:
94 if arg.name ==
"shape":
98 def _update_param_info(self):
110 def add_param(self, param, key=None, shape=None, length=None):
112 if key
is not None and self.
net.input_record()
is not None:
113 idx = self.
net.input_record().field_blobs().index(key)
114 key = self.
net.input_record().field_names()[idx]
128 def param_info(self, grad_type=None, id=None):
131 assert grad_type
is None 133 assert info.param_id == id
135 elif grad_type
is not None:
138 if info.grad_type() == grad_type]
144 Returns the params in current namescope 146 if namescope
is None:
149 if not namescope.endswith(scope._NAMESCOPE_SEPARATOR):
150 namescope += scope._NAMESCOPE_SEPARATOR
157 if p.GetNameScope().startswith(namescope)
160 return [p
for p
in self.
params if 161 p.GetNameScope().startswith(namescope)]
164 return self.
net.Proto()
169 def RunAllOnGPU(self, *args, **kwargs):
171 self.
net.RunAllOnGPU(*args, **kwargs)
173 def CreateDB(self, blob_out, db, db_type, **kwargs):
175 [], blob_out, db=db, db_type=db_type, **kwargs)
178 def AddGradientOperators(self, *args, **kwargs):
180 raise RuntimeError(
"You cannot run AddGradientOperators twice.")
182 self.
grad_map = self.
net.AddGradientOperators(*args, **kwargs)
188 Given a list of parameters returns a dict from a parameter 189 to a corresponding gradient 194 raise RuntimeError(
"You need to run AddGradientOperators first.")
199 param_to_grad[p] = self.
grad_map[str(p)]
204 Returns a map for param => grad. 205 If params is not specified, all parameters will be considered. 208 raise RuntimeError(
"Need to call AddGradientOperators first")
217 return {param: grad
for param, grad
in param_to_grad.items()
218 if not isinstance(grad, core.GradientSlice)}
222 Returns the computed params in current namescope. 'Computed params' 223 are such parameters that are not optimized via gradient descent but are 224 directly computed from data, such as the running mean and variance 225 of Spatial Batch Normalization. 227 if namescope
is None:
230 if not namescope.endswith(scope._NAMESCOPE_SEPARATOR):
231 namescope += scope._NAMESCOPE_SEPARATOR
237 if p.GetNameScope() == namescope]
239 def GetAllParams(self, namescope=None):
243 self, unused_blob_in, blob_out, batch_size, db, db_type, **kwargs
245 """TensorProtosDBInput.""" 246 dbreader_name =
"dbreader_" + db
249 db=db, db_type=db_type)
251 dbreader, blob_out, batch_size=batch_size)
253 def AddOperator(self, op_type, inputs, parameters, *args, **kwargs):
255 Adds an operator to a model. Use parameters list 256 to specify which operator inputs are model parameters to be 261 model.SparseLengthsSum( 262 [embedding, indices, lengths], 263 parameters=[embedding], 266 Here embedding is a parameter to be optimized while indices 270 extra_parameters = filter(
lambda x: (x
not in inputs), parameters)
271 if len(extra_parameters) > 0:
272 raise Exception(
"Some parameters are not inputs: {}".format(
273 map(str, extra_parameters)
276 self.
params.extend(parameters)
279 def GetDevices(self):
281 "Use data_parallel_model to run model on multiple GPUs." 285 """Catch-all for all other operators, mostly those without params.""" 286 if op_type.startswith(
'__'):
287 raise AttributeError(op_type)
291 'Method ' + op_type +
' is not a registered operator.' +
293 ','.join(workspace.C.nearby_opnames(op_type)) +
']' 296 known_working_ops = [
323 "ScatterWeightedSum",
338 if op_type
not in known_working_ops:
341 "Operator {} is not known to be safe".format(op_type))
343 logging.warning(
"You are creating an op that the ModelHelperBase " 344 "does not recognize: {}.".format(op_type))
357 Takes a model net for training and returns a net which can be 358 used for prediction. For example, all gradient operators and 359 input operators are removed. 360 @param net_proto protobuf of the net you want to process (net.Proto()) 361 @param input_blobs list/set of blob names that are the inputs of predictor 362 @param output_blobs list/set of blob names that are outputs of predictor 363 @param device optional device option that is assigned 364 @param renames dictionary of blob name to a new name (optional) 365 @param disabled_inputs optional set of blobs that are 'switched off'. This 366 will cause branches with those blobs as inputs to be removed 368 predict_net =
core.Net(net_proto.name +
"_predict")
369 predict_proto = predict_net.Proto()
371 orig_external_inputs = set(net_proto.external_input)
372 orig_external_outputs = set(net_proto.external_output)
373 input_blobs = {str(b)
for b
in input_blobs}
374 known_blobs = set(orig_external_inputs).union(input_blobs)
375 output_blobs = {str(b)
for b
in output_blobs}
376 external_inputs = set(input_blobs)
377 external_outputs = set(output_blobs)
379 if disabled_inputs
is not None:
380 known_blobs = known_blobs - set(disabled_inputs)
382 ops = list(net_proto.op)
386 first_op_with_input = min(
388 j
for j
in range(len(ops))
389 if input_blobs.intersection(ops[j].input)
and ops[j].type !=
394 raise Exception(
"No ops with input={}".format(input_blobs))
396 last_op_with_output = max(
398 j
for j
in range(len(ops))
399 if output_blobs.intersection(ops[j].output)
403 raise Exception(
"No ops with output={}".format(output_blobs))
409 if arg.name ==
"is_test" and arg.i == 0:
411 "A operator had is_test=0, did you try to extract a " +
412 "predictor from a train model (instead of test model)?" +
413 " Op was: {}".format(str(op))
418 for op
in ops[first_op_with_input:(last_op_with_output + 1)]:
419 if known_blobs.issuperset(op.input):
420 if device
is not None:
421 op.device_option.device_type = device.device_type
422 op.device_option.cuda_gpu_id = device.cuda_gpu_id
424 predict_proto.op.extend([op])
425 known_blobs.update(op.output)
426 external_inputs.update(
427 set(op.input).intersection(orig_external_inputs)
429 external_outputs.update(
430 set(op.output).intersection(orig_external_outputs)
434 "Op {} had unknown inputs: {}".format(
435 op.type, set(op.input).difference(known_blobs)
439 def rename_list(proto_list):
444 new_list = proto_list[:]
445 for j, b
in enumerate(new_list):
447 new_list[j] = renames[b]
450 proto_list.extend(new_list)
454 predict_proto.external_input.extend(external_inputs)
455 predict_proto.external_output.extend(external_outputs)
457 rename_list(predict_proto.external_input)
458 rename_list(predict_proto.external_output)
460 for op
in predict_proto.op:
461 rename_list(op.input)
462 rename_list(op.output)
def GetParams(self, namescope=None, top_scope=False)
def _infer_param_shape(self, param)
def ExtractPredictorNet(net_proto, input_blobs, output_blobs, device=None, renames=None, disabled_inputs=None)
def get_param_to_grad(self, params)
def GetOptimizationPairs(self, params=None)
def AddOperator(self, op_type, inputs, parameters, args, kwargs)
def _update_param_info(self)
def GetComputedParams(self, namescope=None)
def __getattr__(self, op_type)
def TensorProtosDBInput(self, unused_blob_in, blob_out, batch_size, db, db_type, kwargs)