Caffe2 - Python API
A deep learning, cross platform ML framework
cifar10_training.py
1 
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 
7 import numpy as np
8 import os
9 import sys
10 from libfb import pyinit
11 
12 from caffe2.python import core, cnn, workspace
13 from caffe2.python import SparseTransformer
14 import caffe2.python.models.resnet as resnet
15 
16 
17 def AddInput(model, batch_size, db, db_type):
18  """Adds the data input part."""
19  # Load the data from a DB.
20  data_uint8, label_orig = model.TensorProtosDBInput(
21  [], ["data_uint8", "label_orig"], batch_size=batch_size,
22  db=db, db_type=db_type)
23  # Since we are going to do float computations, what we will do is to cast
24  # the data to float.
25  data = model.Cast(data_uint8, "data_nhwc", to=core.DataType.FLOAT)
26  data = model.NHWC2NCHW(data, "data")
27  data = model.Scale(data, data, scale=float(1. / 256))
28  data = model.StopGradient(data, data)
29 
30  # Flatten the label
31  label = model.net.FlattenToVec(label_orig, "label")
32  return data, label
33 
34 
35 def AddAccuracy(model, softmax, label):
36  """Adds an accuracy op to the model"""
37  accuracy = model.Accuracy([softmax, label], "accuracy")
38  return accuracy
39 
40 
41 def AddTrainingOperators(model, softmax, label, nn_model):
42  """Adds training operators to the model."""
43  xent = model.LabelCrossEntropy([softmax, label], 'xent')
44  loss = model.AveragedLoss(xent, "loss")
45  # For bookkeeping purposes, we will also compute the accuracy of the model.
46  AddAccuracy(model, softmax, label)
47  # Now, this is the key part of the training model: we add all the gradient
48  # operators to the model. The gradient is computed with respect to the loss
49  # that we computed above.
50  model.AddGradientOperators([loss])
51  # Now, here what we will do is a very simple stochastic gradient descent.
52  ITER = model.Iter("iter")
53  # We do a simple learning rate schedule where lr = base_lr * (t ^ gamma)
54  # Note that we are doing minimization, so the base_lr is negative so we are
55  # going the DOWNHILL direction.
56 
57  LR = model.LearningRate(
58  ITER, "LR", base_lr=-0.01, policy="step", stepsize=15000, gamma=0.5)
59  # ONE is a constant value that is used in the gradient update. We only need
60  # to create it once, so it is explicitly placed in param_init_net.
61  ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
62  # Now, for each parameter, we do the gradient updates.
63  for param in model.params:
64  # Note how we get the gradient of each parameter - CNNModelHelper keeps
65  # track of that.
66  param_grad = model.param_to_grad[param]
67  # The update is a simple weighted sum: param = param + param_grad * LR
68  model.WeightedSum([param, ONE, param_grad, LR], param)
69 
70 
72  """This adds a few bookkeeping operators that we can inspect later.
73 
74  These operators do not affect the training procedure: they only collect
75  statistics and prints them to file or to logs.
76  """
77  # Print basically prints out the content of the blob. to_file=1 routes the
78  # printed output to a file. The file is going to be stored under
79  # root_folder/[blob name]
80  model.Print('accuracy', [], to_file=1)
81  model.Print('loss', [], to_file=1)
82  # Summarizes the parameters. Different from Print, Summarize gives some
83  # statistics of the parameter, such as mean, std, min and max.
84  for param in model.params:
85  model.Summarize(param, [], to_file=1)
86  model.Summarize(model.param_to_grad[param], [], to_file=1)
87  # Now, if we really want to be very verbose, we can summarize EVERY blob
88  # that the model produces; it is probably not a good idea, because that
89  # is going to take time - summarization do not come for free. For this
90  # demo, we will only show how to summarize the parameters and their
91  # gradients.
92 
93 
94 def AlexNet(model, data, args):
95  conv1 = model.Conv(
96  data,
97  "conv1",
98  3,
99  64,
100  5,
101  ('XavierFill', {}),
102  ('ConstantFill', {}),
103  pad=2
104  )
105  relu1 = model.Relu(conv1, "conv1")
106  pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2)
107  conv2 = model.Conv(
108  pool1,
109  "conv2",
110  64,
111  192,
112  3,
113  ('XavierFill', {}),
114  ('ConstantFill', {}),
115  pad=1
116  )
117  relu2 = model.Relu(conv2, "conv2")
118  pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2)
119  conv3 = model.Conv(
120  pool2,
121  "conv3",
122  192,
123  384,
124  3,
125  ('XavierFill', {}),
126  ('ConstantFill', {}),
127  pad=1
128  )
129  relu3 = model.Relu(conv3, "conv3")
130  conv4 = model.Conv(
131  relu3,
132  "conv4",
133  384,
134  256,
135  3,
136  ('XavierFill', {}),
137  ('ConstantFill', {}),
138  pad=1
139  )
140  relu4 = model.Relu(conv4, "conv4")
141  conv5 = model.Conv(
142  relu4,
143  "conv5",
144  256,
145  256,
146  3,
147  ('XavierFill', {}),
148  ('ConstantFill', {}),
149  pad=1
150  )
151  relu5 = model.Relu(conv5, "conv5")
152  pool5 = model.MaxPool(relu5, "pool5", kernel=3, stride=2)
153  fc6 = model.FC(
154  pool5, "fc6", 256 * 3 * 3, 4096, ('XavierFill', {}),
155  ('ConstantFill', {})
156  )
157  relu6 = model.Relu(fc6, "fc6")
158  fc7 = model.FC(
159  relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})
160  )
161  relu7 = model.Relu(fc7, "fc7")
162  fc8 = model.FC(
163  relu7, "fc8", 4096, 10, ('XavierFill', {}), ('ConstantFill', {})
164  )
165  softmax = model.Softmax(fc8, "pred")
166  return softmax
167 
168 
169 def AlexNet_Prune(model, data, args):
170  conv1 = model.Conv(
171  data,
172  "conv1",
173  3,
174  64,
175  5,
176  ('XavierFill', {}),
177  ('ConstantFill', {}),
178  pad=2
179  )
180  relu1 = model.Relu(conv1, "conv1")
181  pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2)
182  conv2 = model.Conv(
183  pool1,
184  "conv2",
185  64,
186  192,
187  3,
188  ('XavierFill', {}),
189  ('ConstantFill', {}),
190  pad=1
191  )
192  relu2 = model.Relu(conv2, "conv2")
193  pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2)
194  conv3 = model.Conv(
195  pool2,
196  "conv3",
197  192,
198  384,
199  3,
200  ('XavierFill', {}),
201  ('ConstantFill', {}),
202  pad=1
203  )
204  relu3 = model.Relu(conv3, "conv3")
205  conv4 = model.Conv(
206  relu3,
207  "conv4",
208  384,
209  256,
210  3,
211  ('XavierFill', {}),
212  ('ConstantFill', {}),
213  pad=1
214  )
215  relu4 = model.Relu(conv4, "conv4")
216  conv5 = model.Conv(
217  relu4,
218  "conv5",
219  256,
220  256,
221  3,
222  ('XavierFill', {}),
223  ('ConstantFill', {}),
224  pad=1
225  )
226  relu5 = model.Relu(conv5, "conv5")
227  pool5 = model.MaxPool(relu5, "pool5", kernel=3, stride=2)
228  fc6 = model.FC_Prune(
229  pool5, "fc6", 256 * 3 * 3, 4096, ('XavierFill', {}),
230  ('ConstantFill', {}),
231  mask_init=None,
232  threshold=args.prune_thres * 2,
233  need_compress_rate=True,
234  comp_lb=args.comp_lb
235  )
236  compress_fc6 = fc6[1]
237  model.Print(compress_fc6, [], to_file=0)
238  fc6 = fc6[0]
239  relu6 = model.Relu(fc6, "fc6")
240  fc7 = model.FC_Prune(
241  relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {}),
242  mask_init=None,
243  threshold=args.prune_thres,
244  need_compress_rate=True,
245  comp_lb=args.comp_lb
246  )
247  compress_fc7 = fc7[1]
248  model.Print(compress_fc7, [], to_file=0)
249  fc7 = fc7[0]
250  relu7 = model.Relu(fc7, "fc7")
251  fc8 = model.FC(
252  relu7, "fc8", 4096, 10, ('XavierFill', {}), ('ConstantFill', {})
253  )
254  softmax = model.Softmax(fc8, "pred")
255  return softmax
256 
257 
258 def ConvBNReLUDrop(model, currentblob, outputblob,
259  input_dim, output_dim, drop_ratio=None):
260  currentblob = model.Conv(
261  currentblob,
262  outputblob,
263  input_dim,
264  output_dim,
265  3,
266  ('XavierFill', {}),
267  ('ConstantFill', {}),
268  stride=1,
269  pad=1
270  )
271  currentblob = model.SpatialBN(currentblob,
272  str(currentblob) + '_bn',
273  output_dim, epsilon=1e-3)
274  currentblob = model.Relu(currentblob, currentblob)
275  if drop_ratio:
276  currentblob = model.Dropout(currentblob,
277  str(currentblob) + '_dropout',
278  ratio=drop_ratio)
279  return currentblob
280 
281 
282 def VGG(model, data, args):
283  """Adds the VGG-Like kaggle winner Model on Cifar-10
284  The original blog about the model can be found on:
285  http://torch.ch/blog/2015/07/30/cifar.html
286  """
287  conv1 = ConvBNReLUDrop(model, data, 'conv1', 3, 64, drop_ratio=0.3)
288  conv2 = ConvBNReLUDrop(model, conv1, 'conv2', 64, 64)
289  pool2 = model.MaxPool(conv2, 'pool2', kernel=2, stride=1)
290  conv3 = ConvBNReLUDrop(model, pool2, 'conv3', 64, 128, drop_ratio=0.4)
291  conv4 = ConvBNReLUDrop(model, conv3, 'conv4', 128, 128)
292  pool4 = model.MaxPool(conv4, 'pool4', kernel=2, stride=2)
293 
294  conv5 = ConvBNReLUDrop(model, pool4, 'conv5', 128, 256, drop_ratio=0.4)
295  conv6 = ConvBNReLUDrop(model, conv5, 'conv6', 256, 256, drop_ratio=0.4)
296  conv7 = ConvBNReLUDrop(model, conv6, 'conv7', 256, 256)
297  pool7 = model.MaxPool(conv7, 'pool7', kernel=2, stride=2)
298 
299  conv8 = ConvBNReLUDrop(model, pool7, 'conv8', 256, 512, drop_ratio=0.4)
300  conv9 = ConvBNReLUDrop(model, conv8, 'conv9', 512, 512, drop_ratio=0.4)
301  conv10 = ConvBNReLUDrop(model, conv9, 'conv10', 512, 512)
302  pool10 = model.MaxPool(conv10, 'pool10', kernel=2, stride=2)
303 
304  conv11 = ConvBNReLUDrop(model, pool10, 'conv11',
305  512, 512, drop_ratio=0.4)
306  conv12 = ConvBNReLUDrop(model, conv11, 'conv12',
307  512, 512, drop_ratio=0.4)
308  conv13 = ConvBNReLUDrop(model, conv12, 'conv13', 512, 512)
309  pool13 = model.MaxPool(conv13, 'pool13', kernel=2, stride=2)
310 
311  fc14 = model.FC(
312  pool13, "fc14", 512, 512, ('XavierFill', {}),
313  ('ConstantFill', {})
314  )
315  relu14 = model.Relu(fc14, "fc14")
316  pred = model.FC(
317  relu14, "pred", 512, 10, ('XavierFill', {}),
318  ('ConstantFill', {})
319  )
320  softmax = model.Softmax(pred, 'softmax')
321  return softmax
322 
323 
324 def ResNet110(model, data, args):
325  """
326  Residual net as described in section 4.2 of He at. al. (2015)
327  """
329  model,
330  data,
331  num_input_channels=3,
332  num_groups=18,
333  num_labels=10,
334  )
335 
336 
337 def ResNet20(model, data, args):
338  """
339  Residual net as described in section 4.2 of He at. al. (2015)
340  """
342  model,
343  data,
344  num_input_channels=3,
345  num_groups=3,
346  num_labels=10,
347  )
348 
349 
350 def sparse_transform(model):
351  print("====================================================")
352  print(" Sparse Transformer ")
353  print("====================================================")
354  net_root, net_name2id, net_id2node = SparseTransformer.netbuilder(model)
356  net_root,
357  net_id2node,
358  net_name2id,
359  model.net.Proto().op,
360  model)
361  op_list = SparseTransformer.net2list(net_root)
362  del model.net.Proto().op[:]
363  model.net.Proto().op.extend(op_list)
364 
365 
366 def test_sparse(test_model):
367  # Sparse Implementation
368  sparse_transform(test_model)
369  sparse_test_accuracy = np.zeros(100)
370  for i in range(100):
371  workspace.RunNet(test_model.net.Proto().name)
372  sparse_test_accuracy[i] = workspace.FetchBlob('accuracy')
373  # After the execution is done, let's plot the values.
374  print('Sparse Test Accuracy:')
375  print(sparse_test_accuracy)
376  print('sparse_test_accuracy: %f' % sparse_test_accuracy.mean())
377 
378 
379 def trainNtest(model_gen, args):
380  print("Print running on GPU: %s" % args.gpu)
381  train_model = cnn.CNNModelHelper(
382  "NCHW",
383  name="Cifar_%s" % (args.model),
384  use_cudnn=True,
385  cudnn_exhaustive_search=True)
386  data, label = AddInput(
387  train_model, batch_size=64,
388  db=args.train_input_path,
389  db_type=args.db_type)
390  softmax = model_gen(train_model, data, args)
391  AddTrainingOperators(train_model, softmax, label, args.model)
392  AddBookkeepingOperators(train_model)
393 
394  if args.gpu:
395  train_model.param_init_net.RunAllOnGPU()
396  train_model.net.RunAllOnGPU()
397 
398  # The parameter initialization network only needs to be run once.
399  workspace.RunNetOnce(train_model.param_init_net)
400 
401  # Now, since we are going to run the main network multiple times,
402  # we first create the network - which puts the actual network generated
403  # from the protobuf into the workspace - and then call RunNet by
404  # its name.
405  workspace.CreateNet(train_model.net)
406 
407  # On the Python side, we will create two numpy arrays to record the accuracy
408  # and loss for each iteration.
409  epoch_num = 200
410  epoch_iters = 1000
411  record = 1000
412 
413  accuracy = np.zeros(int(epoch_num * epoch_iters / record))
414  loss = np.zeros(int(epoch_num * epoch_iters / record))
415  # Now, we will manually run the network for 200 iterations.
416  for e in range(epoch_num):
417  for i in range(epoch_iters):
418  workspace.RunNet(train_model.net.Proto().name)
419  if i % record is 0:
420  count = int(i / record)
421  accuracy[count] = workspace.FetchBlob('accuracy')
422  loss[count] = workspace.FetchBlob('loss')
423  print('Train Loss: {}'.format(loss[count]))
424  print('Train Accuracy: {}'.format(accuracy[count]))
425 
426  # Testing model. We will set the batch size to 100, so that the testing
427  # pass is 100 iterations (10,000 images in total).
428  # For the testing model, we need the data input part, the main LeNetModel
429  # part, and an accuracy part. Note that init_params is set False because
430  # we will be using the parameters obtained from the test model.
431  test_model = cnn.CNNModelHelper(
432  order="NCHW", name="cifar10_test", init_params=False)
433  data, label = AddInput(
434  test_model, batch_size=100,
435  db=args.test_input_path,
436  db_type=args.db_type)
437  softmax = model_gen(test_model, data, args)
438  AddAccuracy(test_model, softmax, label)
439 
440  # In[11]:
441  if args.gpu:
442  test_model.param_init_net.RunAllOnGPU()
443  test_model.net.RunAllOnGPU()
444  # Now, remember that we created the test net? We will run the test
445  # pass and report the test accuracy here.
446  workspace.RunNetOnce(test_model.param_init_net)
447  workspace.CreateNet(test_model.net)
448  # On the Python side, we will create two numpy arrays to record the accuracy
449  # and loss for each iteration.
450  test_accuracy = np.zeros(100)
451  for i in range(100):
452  workspace.RunNet(test_model.net.Proto().name)
453  test_accuracy[i] = workspace.FetchBlob('accuracy')
454 
455  print('Train Loss:')
456  print(loss)
457  print('Train Accuracy:')
458  print(accuracy)
459  print('Test Accuracy:')
460  print(test_accuracy)
461  print('test_accuracy: %f' % test_accuracy.mean())
462 
463  if args.model == 'AlexNet_Prune':
464  test_sparse(test_model)
465 
466 
467 MODEL_TYPE_FUNCTIONS = {
468  'AlexNet': AlexNet,
469  'AlexNet_Prune': AlexNet_Prune,
470  'VGG': VGG,
471  'ResNet-110': ResNet110,
472  'ResNet-20': ResNet20
473 }
474 
475 if __name__ == '__main__':
476  # it's hard to init flags correctly... so here it is
477  sys.argv.append('--caffe2_keep_on_shrink')
478 
479  # FbcodeArgumentParser calls initFacebook which is necessary for NNLoader
480  # initialization
481  parser = pyinit.FbcodeArgumentParser(description='cifar-10 Tutorial')
482 
483  # arguments starting with single '-' are compatible with Lua
484  parser.add_argument("--model", type=str, default='AlexNet',
485  choices=MODEL_TYPE_FUNCTIONS.keys(),
486  help="The batch size of benchmark data.")
487  parser.add_argument("--prune_thres", type=float, default=0.0001,
488  help="Pruning threshold for FC layers.")
489  parser.add_argument("--comp_lb", type=float, default=0.02,
490  help="Compression Lower Bound for FC layers.")
491  parser.add_argument("--gpu", default=False,
492  help="Whether to run on gpu", type=bool)
493  parser.add_argument("--train_input_path", type=str,
494  default=None,
495  required=True,
496  help="Path to the database for training data")
497  parser.add_argument("--test_input_path", type=str,
498  default=None,
499  required=True,
500  help="Path to the database for test data")
501  parser.add_argument("--db_type", type=str,
502  default="lmbd", help="Database type")
503  args = parser.parse_args()
504 
505  # If you would like to see some really detailed initializations,
506  # you can change --caffe2_log_level=0 to --caffe2_log_level=-1
507  core.GlobalInit(['caffe2', '--caffe2_log_level=0'])
508 
509  trainNtest(MODEL_TYPE_FUNCTIONS[args.model], args)
def ResNet20(model, data, args)
def VGG(model, data, args)
def create_resnet_32x32(model, data, num_input_channels, num_groups, num_labels, is_test=False)
Definition: resnet.py:275
def AddInput(model, batch_size, db, db_type)
def RunNet(name, num_iter=1)
Definition: workspace.py:164
def AddTrainingOperators(model, softmax, label, nn_model)
def RunNetOnce(net)
Definition: workspace.py:160
def Prune2Sparse(cur, id2node, name2id, ops, model)
def AddBookkeepingOperators(model)
def ResNet110(model, data, args)
def net2list(net_root)
def CreateNet(net, overwrite=False, input_blobs=None)
Definition: workspace.py:140
def GlobalInit(args)
Definition: core.py:66
def FetchBlob(name)
Definition: workspace.py:276
def AddAccuracy(model, softmax, label)