Caffe2 - Python API
A deep learning, cross platform ML framework
convnet_benchmarks.py
1 
3 """
4 Benchmark for common convnets.
5 
6 Speed on Titan X, with 10 warmup steps and 10 main steps and with different
7 versions of cudnn, are as follows (time reported below is per-batch time,
8 forward / forward+backward):
9 
10  CuDNN V3 CuDNN v4
11 AlexNet 32.5 / 108.0 27.4 / 90.1
12 OverFeat 113.0 / 342.3 91.7 / 276.5
13 Inception 134.5 / 485.8 125.7 / 450.6
14 VGG (batch 64) 200.8 / 650.0 164.1 / 551.7
15 
16 Speed on Inception with varied batch sizes and CuDNN v4 is as follows:
17 
18 Batch Size Speed per batch Speed per image
19  16 22.8 / 72.7 1.43 / 4.54
20  32 38.0 / 127.5 1.19 / 3.98
21  64 67.2 / 233.6 1.05 / 3.65
22 128 125.7 / 450.6 0.98 / 3.52
23 
24 Speed on Tesla M40, which 10 warmup steps and 10 main steps and with cudnn
25 v4, is as follows:
26 
27 AlexNet 68.4 / 218.1
28 OverFeat 210.5 / 630.3
29 Inception 300.2 / 1122.2
30 VGG (batch 64) 405.8 / 1327.7
31 
32 (Note that these numbers involve a "full" backprop, i.e. the gradient
33 with respect to the input image is also computed.)
34 
35 To get the numbers, simply run:
36 
37 for MODEL in AlexNet OverFeat Inception; do
38  PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
39  --batch_size 128 --model $MODEL --forward_only True
40 done
41 for MODEL in AlexNet OverFeat Inception; do
42  PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
43  --batch_size 128 --model $MODEL
44 done
45 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
46  --batch_size 64 --model VGGA --forward_only True
47 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
48  --batch_size 64 --model VGGA
49 
50 for BS in 16 32 64 128; do
51  PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
52  --batch_size $BS --model Inception --forward_only True
53  PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
54  --batch_size $BS --model Inception
55 done
56 
57 Note that VGG needs to be run at batch 64 due to memory limit on the backward
58 pass.
59 """
60 
61 import argparse
62 
63 from caffe2.python import cnn, workspace
64 
65 
66 def MLP(order, cudnn_ws):
67  model = cnn.CNNModelHelper()
68  d = 256
69  depth = 20
70  width = 3
71  for i in range(depth):
72  for j in range(width):
73  current = "fc_{}_{}".format(i, j) if i > 0 else "data"
74  next_ = "fc_{}_{}".format(i + 1, j)
75  model.FC(
76  current, next_,
77  dim_in=d, dim_out=d,
78  weight_init=model.XavierInit,
79  bias_init=model.XavierInit)
80  model.Sum(["fc_{}_{}".format(depth, j) for j in range(width)], ["sum"])
81  model.FC("sum", "last",
82  dim_in=d, dim_out=1000,
83  weight_init=model.XavierInit,
84  bias_init=model.XavierInit)
85  xent = model.LabelCrossEntropy(["last", "label"], "xent")
86  model.AveragedLoss(xent, "loss")
87  return model, d
88 
89 
90 def AlexNet(order, cudnn_ws):
91  model = cnn.CNNModelHelper(
92  order, name="alexnet",
93  use_cudnn=True, cudnn_exhaustive_search=True,
94  ws_nbytes_limit=cudnn_ws)
95  conv1 = model.Conv(
96  "data",
97  "conv1",
98  3,
99  64,
100  11,
101  ('XavierFill', {}),
102  ('ConstantFill', {}),
103  stride=4,
104  pad=2
105  )
106  relu1 = model.Relu(conv1, "conv1")
107  pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2)
108  conv2 = model.Conv(
109  pool1,
110  "conv2",
111  64,
112  192,
113  5,
114  ('XavierFill', {}),
115  ('ConstantFill', {}),
116  pad=2
117  )
118  relu2 = model.Relu(conv2, "conv2")
119  pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2)
120  conv3 = model.Conv(
121  pool2,
122  "conv3",
123  192,
124  384,
125  3,
126  ('XavierFill', {}),
127  ('ConstantFill', {}),
128  pad=1
129  )
130  relu3 = model.Relu(conv3, "conv3")
131  conv4 = model.Conv(
132  relu3,
133  "conv4",
134  384,
135  256,
136  3,
137  ('XavierFill', {}),
138  ('ConstantFill', {}),
139  pad=1
140  )
141  relu4 = model.Relu(conv4, "conv4")
142  conv5 = model.Conv(
143  relu4,
144  "conv5",
145  256,
146  256,
147  3,
148  ('XavierFill', {}),
149  ('ConstantFill', {}),
150  pad=1
151  )
152  relu5 = model.Relu(conv5, "conv5")
153  pool5 = model.MaxPool(relu5, "pool5", kernel=3, stride=2)
154  fc6 = model.FC(
155  pool5, "fc6", 256 * 6 * 6, 4096, ('XavierFill', {}),
156  ('ConstantFill', {})
157  )
158  relu6 = model.Relu(fc6, "fc6")
159  fc7 = model.FC(
160  relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})
161  )
162  relu7 = model.Relu(fc7, "fc7")
163  fc8 = model.FC(
164  relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
165  )
166  pred = model.Softmax(fc8, "pred")
167  xent = model.LabelCrossEntropy([pred, "label"], "xent")
168  loss = model.AveragedLoss(xent, "loss")
169  return model, 224
170 
171 
172 def OverFeat(order, cudnn_ws):
173  model = cnn.CNNModelHelper(
174  order, name="overfeat",
175  use_cudnn=True, cudnn_exhaustive_search=True,
176  ws_nbytes_limit=cudnn_ws)
177  conv1 = model.Conv(
178  "data",
179  "conv1",
180  3,
181  96,
182  11,
183  ('XavierFill', {}),
184  ('ConstantFill', {}),
185  stride=4
186  )
187  relu1 = model.Relu(conv1, "conv1")
188  pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
189  conv2 = model.Conv(
190  pool1, "conv2", 96, 256, 5, ('XavierFill', {}), ('ConstantFill', {})
191  )
192  relu2 = model.Relu(conv2, "conv2")
193  pool2 = model.MaxPool(relu2, "pool2", kernel=2, stride=2)
194  conv3 = model.Conv(
195  pool2,
196  "conv3",
197  256,
198  512,
199  3,
200  ('XavierFill', {}),
201  ('ConstantFill', {}),
202  pad=1
203  )
204  relu3 = model.Relu(conv3, "conv3")
205  conv4 = model.Conv(
206  relu3,
207  "conv4",
208  512,
209  1024,
210  3,
211  ('XavierFill', {}),
212  ('ConstantFill', {}),
213  pad=1
214  )
215  relu4 = model.Relu(conv4, "conv4")
216  conv5 = model.Conv(
217  relu4,
218  "conv5",
219  1024,
220  1024,
221  3,
222  ('XavierFill', {}),
223  ('ConstantFill', {}),
224  pad=1
225  )
226  relu5 = model.Relu(conv5, "conv5")
227  pool5 = model.MaxPool(relu5, "pool5", kernel=2, stride=2)
228  fc6 = model.FC(
229  pool5, "fc6", 1024 * 6 * 6, 3072, ('XavierFill', {}),
230  ('ConstantFill', {})
231  )
232  relu6 = model.Relu(fc6, "fc6")
233  fc7 = model.FC(
234  relu6, "fc7", 3072, 4096, ('XavierFill', {}), ('ConstantFill', {})
235  )
236  relu7 = model.Relu(fc7, "fc7")
237  fc8 = model.FC(
238  relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
239  )
240  pred = model.Softmax(fc8, "pred")
241  xent = model.LabelCrossEntropy([pred, "label"], "xent")
242  loss = model.AveragedLoss(xent, "loss")
243  return model, 231
244 
245 
246 def VGGA(order, cudnn_ws):
247  model = cnn.CNNModelHelper(
248  order, name='vgg-a',
249  use_cudnn=True, cudnn_exhaustive_search=True,
250  ws_nbytes_limit=cudnn_ws)
251  conv1 = model.Conv(
252  "data",
253  "conv1",
254  3,
255  64,
256  3,
257  ('XavierFill', {}),
258  ('ConstantFill', {}),
259  pad=1
260  )
261  relu1 = model.Relu(conv1, "conv1")
262  pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
263  conv2 = model.Conv(
264  pool1,
265  "conv2",
266  64,
267  128,
268  3,
269  ('XavierFill', {}),
270  ('ConstantFill', {}),
271  pad=1
272  )
273  relu2 = model.Relu(conv2, "conv2")
274  pool2 = model.MaxPool(relu2, "pool2", kernel=2, stride=2)
275  conv3 = model.Conv(
276  pool2,
277  "conv3",
278  128,
279  256,
280  3,
281  ('XavierFill', {}),
282  ('ConstantFill', {}),
283  pad=1
284  )
285  relu3 = model.Relu(conv3, "conv3")
286  conv4 = model.Conv(
287  relu3,
288  "conv4",
289  256,
290  256,
291  3,
292  ('XavierFill', {}),
293  ('ConstantFill', {}),
294  pad=1
295  )
296  relu4 = model.Relu(conv4, "conv4")
297  pool4 = model.MaxPool(relu4, "pool4", kernel=2, stride=2)
298  conv5 = model.Conv(
299  pool4,
300  "conv5",
301  256,
302  512,
303  3,
304  ('XavierFill', {}),
305  ('ConstantFill', {}),
306  pad=1
307  )
308  relu5 = model.Relu(conv5, "conv5")
309  conv6 = model.Conv(
310  relu5,
311  "conv6",
312  512,
313  512,
314  3,
315  ('XavierFill', {}),
316  ('ConstantFill', {}),
317  pad=1
318  )
319  relu6 = model.Relu(conv6, "conv6")
320  pool6 = model.MaxPool(relu6, "pool6", kernel=2, stride=2)
321  conv7 = model.Conv(
322  pool6,
323  "conv7",
324  512,
325  512,
326  3,
327  ('XavierFill', {}),
328  ('ConstantFill', {}),
329  pad=1
330  )
331  relu7 = model.Relu(conv7, "conv7")
332  conv8 = model.Conv(
333  relu7,
334  "conv8",
335  512,
336  512,
337  3,
338  ('XavierFill', {}),
339  ('ConstantFill', {}),
340  pad=1
341  )
342  relu8 = model.Relu(conv8, "conv8")
343  pool8 = model.MaxPool(relu8, "pool8", kernel=2, stride=2)
344 
345  fcix = model.FC(
346  pool8, "fcix", 512 * 7 * 7, 4096, ('XavierFill', {}),
347  ('ConstantFill', {})
348  )
349  reluix = model.Relu(fcix, "fcix")
350  fcx = model.FC(
351  reluix, "fcx", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})
352  )
353  relux = model.Relu(fcx, "fcx")
354  fcxi = model.FC(
355  relux, "fcxi", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
356  )
357  pred = model.Softmax(fcxi, "pred")
358  xent = model.LabelCrossEntropy([pred, "label"], "xent")
359  loss = model.AveragedLoss(xent, "loss")
360  return model, 231
361 
362 
363 def _InceptionModule(
364  model, input_blob, input_depth, output_name, conv1_depth, conv3_depths,
365  conv5_depths, pool_depth
366 ):
367  # path 1: 1x1 conv
368  conv1 = model.Conv(
369  input_blob, output_name + ":conv1", input_depth, conv1_depth, 1,
370  ('XavierFill', {}), ('ConstantFill', {})
371  )
372  conv1 = model.Relu(conv1, conv1)
373  # path 2: 1x1 conv + 3x3 conv
374  conv3_reduce = model.Conv(
375  input_blob, output_name + ":conv3_reduce", input_depth, conv3_depths[0],
376  1, ('XavierFill', {}), ('ConstantFill', {})
377  )
378  conv3_reduce = model.Relu(conv3_reduce, conv3_reduce)
379  conv3 = model.Conv(
380  conv3_reduce,
381  output_name + ":conv3",
382  conv3_depths[0],
383  conv3_depths[1],
384  3,
385  ('XavierFill', {}),
386  ('ConstantFill', {}),
387  pad=1
388  )
389  conv3 = model.Relu(conv3, conv3)
390  # path 3: 1x1 conv + 5x5 conv
391  conv5_reduce = model.Conv(
392  input_blob, output_name + ":conv5_reduce", input_depth, conv5_depths[0],
393  1, ('XavierFill', {}), ('ConstantFill', {})
394  )
395  conv5_reduce = model.Relu(conv5_reduce, conv5_reduce)
396  conv5 = model.Conv(
397  conv5_reduce,
398  output_name + ":conv5",
399  conv5_depths[0],
400  conv5_depths[1],
401  5,
402  ('XavierFill', {}),
403  ('ConstantFill', {}),
404  pad=2
405  )
406  conv5 = model.Relu(conv5, conv5)
407  # path 4: pool + 1x1 conv
408  pool = model.MaxPool(
409  input_blob,
410  output_name + ":pool",
411  kernel=3,
412  stride=1,
413  pad=1
414  )
415  pool_proj = model.Conv(
416  pool, output_name + ":pool_proj", input_depth, pool_depth, 1,
417  ('XavierFill', {}), ('ConstantFill', {})
418  )
419  pool_proj = model.Relu(pool_proj, pool_proj)
420  output = model.Concat([conv1, conv3, conv5, pool_proj], output_name)
421  return output
422 
423 
424 def Inception(order, cudnn_ws):
425  model = cnn.CNNModelHelper(
426  order, name="inception",
427  use_cudnn=True, cudnn_exhaustive_search=True,
428  ws_nbytes_limit=cudnn_ws)
429  conv1 = model.Conv(
430  "data",
431  "conv1",
432  3,
433  64,
434  7,
435  ('XavierFill', {}),
436  ('ConstantFill', {}),
437  stride=2,
438  pad=3
439  )
440  relu1 = model.Relu(conv1, "conv1")
441  pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2, pad=1)
442  conv2a = model.Conv(
443  pool1, "conv2a", 64, 64, 1, ('XavierFill', {}), ('ConstantFill', {})
444  )
445  conv2a = model.Relu(conv2a, conv2a)
446  conv2 = model.Conv(
447  conv2a,
448  "conv2",
449  64,
450  192,
451  3,
452  ('XavierFill', {}),
453  ('ConstantFill', {}),
454  pad=1
455  )
456  relu2 = model.Relu(conv2, "conv2")
457  pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2, pad=1)
458  # Inception modules
459  inc3 = _InceptionModule(
460  model, pool2, 192, "inc3", 64, [96, 128], [16, 32], 32
461  )
462  inc4 = _InceptionModule(
463  model, inc3, 256, "inc4", 128, [128, 192], [32, 96], 64
464  )
465  pool5 = model.MaxPool(inc4, "pool5", kernel=3, stride=2, pad=1)
466  inc5 = _InceptionModule(
467  model, pool5, 480, "inc5", 192, [96, 208], [16, 48], 64
468  )
469  inc6 = _InceptionModule(
470  model, inc5, 512, "inc6", 160, [112, 224], [24, 64], 64
471  )
472  inc7 = _InceptionModule(
473  model, inc6, 512, "inc7", 128, [128, 256], [24, 64], 64
474  )
475  inc8 = _InceptionModule(
476  model, inc7, 512, "inc8", 112, [144, 288], [32, 64], 64
477  )
478  inc9 = _InceptionModule(
479  model, inc8, 528, "inc9", 256, [160, 320], [32, 128], 128
480  )
481  pool9 = model.MaxPool(inc9, "pool9", kernel=3, stride=2, pad=1)
482  inc10 = _InceptionModule(
483  model, pool9, 832, "inc10", 256, [160, 320], [32, 128], 128
484  )
485  inc11 = _InceptionModule(
486  model, inc10, 832, "inc11", 384, [192, 384], [48, 128], 128
487  )
488  pool11 = model.AveragePool(inc11, "pool11", kernel=7, stride=1)
489  fc = model.FC(
490  pool11, "fc", 1024, 1000, ('XavierFill', {}), ('ConstantFill', {})
491  )
492  # It seems that Soumith's benchmark does not have softmax on top
493  # for Inception. We will add it anyway so we can have a proper
494  # backward pass.
495  pred = model.Softmax(fc, "pred")
496  xent = model.LabelCrossEntropy([pred, "label"], "xent")
497  loss = model.AveragedLoss(xent, "loss")
498  return model, 224
499 
500 
501 def AddParameterUpdate(model):
502  """ Simple plain SGD update -- not tuned to actually train the models """
503  ITER = model.Iter("iter")
504  LR = model.LearningRate(
505  ITER, "LR", base_lr=-1e-8, policy="step", stepsize=10000, gamma=0.999)
506  ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
507  for param in model.params:
508  param_grad = model.param_to_grad[param]
509  model.WeightedSum([param, ONE, param_grad, LR], param)
510 
511 
512 def Benchmark(model_gen, arg):
513  model, input_size = model_gen(arg.order, arg.cudnn_ws)
514  model.Proto().type = arg.net_type
515  model.Proto().num_workers = arg.num_workers
516 
517  # In order to be able to run everything without feeding more stuff, let's
518  # add the data and label blobs to the parameter initialization net as well.
519  if arg.order == "NCHW":
520  input_shape = [arg.batch_size, 3, input_size, input_size]
521  else:
522  input_shape = [arg.batch_size, input_size, input_size, 3]
523  if arg.model == "MLP":
524  input_shape = [arg.batch_size, input_size]
525 
526  model.param_init_net.GaussianFill(
527  [],
528  "data",
529  shape=input_shape,
530  mean=0.0,
531  std=1.0
532  )
533  model.param_init_net.UniformIntFill(
534  [],
535  "label",
536  shape=[arg.batch_size, ],
537  min=0,
538  max=999
539  )
540 
541  if arg.forward_only:
542  print('{}: running forward only.'.format(arg.model))
543  else:
544  print('{}: running forward-backward.'.format(arg.model))
545  model.AddGradientOperators(["loss"])
546  AddParameterUpdate(model)
547  if arg.order == 'NHWC':
548  print(
549  '==WARNING==\n'
550  'NHWC order with CuDNN may not be supported yet, so I might\n'
551  'exit suddenly.'
552  )
553 
554  if not arg.cpu:
555  model.param_init_net.RunAllOnGPU()
556  model.net.RunAllOnGPU()
557 
558  if arg.engine:
559  for op in model.net.Proto().op:
560  op.engine = arg.engine
561 
562  if arg.dump_model:
563  # Writes out the pbtxt for benchmarks on e.g. Android
564  with open(
565  "{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size), "w"
566  ) as fid:
567  fid.write(str(model.param_init_net.Proto()))
568  with open("{0}.pbtxt".format(arg.model, arg.batch_size), "w") as fid:
569  fid.write(str(model.net.Proto()))
570 
571  workspace.RunNetOnce(model.param_init_net)
572  workspace.CreateNet(model.net)
574  model.net.Proto().name, arg.warmup_iterations, arg.iterations,
575  arg.layer_wise_benchmark)
576 
577 
578 def GetArgumentParser():
579  parser = argparse.ArgumentParser(description="Caffe2 benchmark.")
580  parser.add_argument(
581  "--batch_size",
582  type=int,
583  default=128,
584  help="The batch size."
585  )
586  parser.add_argument("--model", type=str, help="The model to benchmark.")
587  parser.add_argument(
588  "--order",
589  type=str,
590  default="NCHW",
591  help="The order to evaluate."
592  )
593  parser.add_argument(
594  "--cudnn_ws",
595  type=int,
596  help="The cudnn workspace size."
597  )
598  parser.add_argument(
599  "--iterations",
600  type=int,
601  default=10,
602  help="Number of iterations to run the network."
603  )
604  parser.add_argument(
605  "--warmup_iterations",
606  type=int,
607  default=10,
608  help="Number of warm-up iterations before benchmarking."
609  )
610  parser.add_argument(
611  "--forward_only",
612  action='store_true',
613  help="If set, only run the forward pass."
614  )
615  parser.add_argument(
616  "--layer_wise_benchmark",
617  action='store_true',
618  help="If True, run the layer-wise benchmark as well."
619  )
620  parser.add_argument(
621  "--cpu",
622  action='store_true',
623  help="If True, run testing on CPU instead of GPU."
624  )
625  parser.add_argument(
626  "--engine",
627  type=str,
628  default="",
629  help="If set, blindly prefer the given engine(s) for every op.")
630  parser.add_argument(
631  "--dump_model",
632  action='store_true',
633  help="If True, dump the model prototxts to disk."
634  )
635  parser.add_argument("--net_type", type=str, default="dag")
636  parser.add_argument("--num_workers", type=int, default=2)
637  parser.add_argument("--use-nvtx", default=False, action='store_true')
638  parser.add_argument("--htrace_span_log_path", type=str)
639  return parser
640 
641 
642 if __name__ == '__main__':
643  args = GetArgumentParser().parse_args()
644  if (
645  not args.batch_size or not args.model or not args.order
646  ):
647  GetArgumentParser().print_help()
648  else:
650  ['caffe2', '--caffe2_log_level=0'] +
651  (['--caffe2_use_nvtx'] if args.use_nvtx else []) +
652  (['--caffe2_htrace_span_log_path=' + args.htrace_span_log_path]
653  if args.htrace_span_log_path else []))
654 
655  model_map = {
656  'AlexNet': AlexNet,
657  'OverFeat': OverFeat,
658  'VGGA': VGGA,
659  'Inception': Inception,
660  'MLP': MLP,
661  }
662  Benchmark(model_map[args.model], args)
def RunNetOnce(net)
Definition: workspace.py:160
def CreateNet(net, overwrite=False, input_blobs=None)
Definition: workspace.py:140