4 Benchmark for common convnets. 6 Speed on Titan X, with 10 warmup steps and 10 main steps and with different 7 versions of cudnn, are as follows (time reported below is per-batch time, 8 forward / forward+backward): 11 AlexNet 32.5 / 108.0 27.4 / 90.1 12 OverFeat 113.0 / 342.3 91.7 / 276.5 13 Inception 134.5 / 485.8 125.7 / 450.6 14 VGG (batch 64) 200.8 / 650.0 164.1 / 551.7 16 Speed on Inception with varied batch sizes and CuDNN v4 is as follows: 18 Batch Size Speed per batch Speed per image 19 16 22.8 / 72.7 1.43 / 4.54 20 32 38.0 / 127.5 1.19 / 3.98 21 64 67.2 / 233.6 1.05 / 3.65 22 128 125.7 / 450.6 0.98 / 3.52 24 Speed on Tesla M40, which 10 warmup steps and 10 main steps and with cudnn 28 OverFeat 210.5 / 630.3 29 Inception 300.2 / 1122.2 30 VGG (batch 64) 405.8 / 1327.7 32 (Note that these numbers involve a "full" backprop, i.e. the gradient 33 with respect to the input image is also computed.) 35 To get the numbers, simply run: 37 for MODEL in AlexNet OverFeat Inception; do 38 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 39 --batch_size 128 --model $MODEL --forward_only True 41 for MODEL in AlexNet OverFeat Inception; do 42 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 43 --batch_size 128 --model $MODEL 45 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 46 --batch_size 64 --model VGGA --forward_only True 47 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 48 --batch_size 64 --model VGGA 50 for BS in 16 32 64 128; do 51 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 52 --batch_size $BS --model Inception --forward_only True 53 PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \ 54 --batch_size $BS --model Inception 57 Note that VGG needs to be run at batch 64 due to memory limit on the backward 63 from caffe2.python
import cnn, workspace
66 def MLP(order, cudnn_ws):
71 for i
in range(depth):
72 for j
in range(width):
73 current =
"fc_{}_{}".format(i, j)
if i > 0
else "data" 74 next_ =
"fc_{}_{}".format(i + 1, j)
78 weight_init=model.XavierInit,
79 bias_init=model.XavierInit)
80 model.Sum([
"fc_{}_{}".format(depth, j)
for j
in range(width)], [
"sum"])
81 model.FC(
"sum",
"last",
82 dim_in=d, dim_out=1000,
83 weight_init=model.XavierInit,
84 bias_init=model.XavierInit)
85 xent = model.LabelCrossEntropy([
"last",
"label"],
"xent")
86 model.AveragedLoss(xent,
"loss")
90 def AlexNet(order, cudnn_ws):
92 order, name=
"alexnet",
93 use_cudnn=
True, cudnn_exhaustive_search=
True,
94 ws_nbytes_limit=cudnn_ws)
102 (
'ConstantFill', {}),
106 relu1 = model.Relu(conv1,
"conv1")
107 pool1 = model.MaxPool(relu1,
"pool1", kernel=3, stride=2)
115 (
'ConstantFill', {}),
118 relu2 = model.Relu(conv2,
"conv2")
119 pool2 = model.MaxPool(relu2,
"pool2", kernel=3, stride=2)
127 (
'ConstantFill', {}),
130 relu3 = model.Relu(conv3,
"conv3")
138 (
'ConstantFill', {}),
141 relu4 = model.Relu(conv4,
"conv4")
149 (
'ConstantFill', {}),
152 relu5 = model.Relu(conv5,
"conv5")
153 pool5 = model.MaxPool(relu5,
"pool5", kernel=3, stride=2)
155 pool5,
"fc6", 256 * 6 * 6, 4096, (
'XavierFill', {}),
158 relu6 = model.Relu(fc6,
"fc6")
160 relu6,
"fc7", 4096, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
162 relu7 = model.Relu(fc7,
"fc7")
164 relu7,
"fc8", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
166 pred = model.Softmax(fc8,
"pred")
167 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
168 loss = model.AveragedLoss(xent,
"loss")
172 def OverFeat(order, cudnn_ws):
174 order, name=
"overfeat",
175 use_cudnn=
True, cudnn_exhaustive_search=
True,
176 ws_nbytes_limit=cudnn_ws)
184 (
'ConstantFill', {}),
187 relu1 = model.Relu(conv1,
"conv1")
188 pool1 = model.MaxPool(relu1,
"pool1", kernel=2, stride=2)
190 pool1,
"conv2", 96, 256, 5, (
'XavierFill', {}), (
'ConstantFill', {})
192 relu2 = model.Relu(conv2,
"conv2")
193 pool2 = model.MaxPool(relu2,
"pool2", kernel=2, stride=2)
201 (
'ConstantFill', {}),
204 relu3 = model.Relu(conv3,
"conv3")
212 (
'ConstantFill', {}),
215 relu4 = model.Relu(conv4,
"conv4")
223 (
'ConstantFill', {}),
226 relu5 = model.Relu(conv5,
"conv5")
227 pool5 = model.MaxPool(relu5,
"pool5", kernel=2, stride=2)
229 pool5,
"fc6", 1024 * 6 * 6, 3072, (
'XavierFill', {}),
232 relu6 = model.Relu(fc6,
"fc6")
234 relu6,
"fc7", 3072, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
236 relu7 = model.Relu(fc7,
"fc7")
238 relu7,
"fc8", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
240 pred = model.Softmax(fc8,
"pred")
241 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
242 loss = model.AveragedLoss(xent,
"loss")
246 def VGGA(order, cudnn_ws):
249 use_cudnn=
True, cudnn_exhaustive_search=
True,
250 ws_nbytes_limit=cudnn_ws)
258 (
'ConstantFill', {}),
261 relu1 = model.Relu(conv1,
"conv1")
262 pool1 = model.MaxPool(relu1,
"pool1", kernel=2, stride=2)
270 (
'ConstantFill', {}),
273 relu2 = model.Relu(conv2,
"conv2")
274 pool2 = model.MaxPool(relu2,
"pool2", kernel=2, stride=2)
282 (
'ConstantFill', {}),
285 relu3 = model.Relu(conv3,
"conv3")
293 (
'ConstantFill', {}),
296 relu4 = model.Relu(conv4,
"conv4")
297 pool4 = model.MaxPool(relu4,
"pool4", kernel=2, stride=2)
305 (
'ConstantFill', {}),
308 relu5 = model.Relu(conv5,
"conv5")
316 (
'ConstantFill', {}),
319 relu6 = model.Relu(conv6,
"conv6")
320 pool6 = model.MaxPool(relu6,
"pool6", kernel=2, stride=2)
328 (
'ConstantFill', {}),
331 relu7 = model.Relu(conv7,
"conv7")
339 (
'ConstantFill', {}),
342 relu8 = model.Relu(conv8,
"conv8")
343 pool8 = model.MaxPool(relu8,
"pool8", kernel=2, stride=2)
346 pool8,
"fcix", 512 * 7 * 7, 4096, (
'XavierFill', {}),
349 reluix = model.Relu(fcix,
"fcix")
351 reluix,
"fcx", 4096, 4096, (
'XavierFill', {}), (
'ConstantFill', {})
353 relux = model.Relu(fcx,
"fcx")
355 relux,
"fcxi", 4096, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
357 pred = model.Softmax(fcxi,
"pred")
358 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
359 loss = model.AveragedLoss(xent,
"loss")
363 def _InceptionModule(
364 model, input_blob, input_depth, output_name, conv1_depth, conv3_depths,
365 conv5_depths, pool_depth
369 input_blob, output_name +
":conv1", input_depth, conv1_depth, 1,
370 (
'XavierFill', {}), (
'ConstantFill', {})
372 conv1 = model.Relu(conv1, conv1)
374 conv3_reduce = model.Conv(
375 input_blob, output_name +
":conv3_reduce", input_depth, conv3_depths[0],
376 1, (
'XavierFill', {}), (
'ConstantFill', {})
378 conv3_reduce = model.Relu(conv3_reduce, conv3_reduce)
381 output_name +
":conv3",
386 (
'ConstantFill', {}),
389 conv3 = model.Relu(conv3, conv3)
391 conv5_reduce = model.Conv(
392 input_blob, output_name +
":conv5_reduce", input_depth, conv5_depths[0],
393 1, (
'XavierFill', {}), (
'ConstantFill', {})
395 conv5_reduce = model.Relu(conv5_reduce, conv5_reduce)
398 output_name +
":conv5",
403 (
'ConstantFill', {}),
406 conv5 = model.Relu(conv5, conv5)
408 pool = model.MaxPool(
410 output_name +
":pool",
415 pool_proj = model.Conv(
416 pool, output_name +
":pool_proj", input_depth, pool_depth, 1,
417 (
'XavierFill', {}), (
'ConstantFill', {})
419 pool_proj = model.Relu(pool_proj, pool_proj)
420 output = model.Concat([conv1, conv3, conv5, pool_proj], output_name)
424 def Inception(order, cudnn_ws):
426 order, name=
"inception",
427 use_cudnn=
True, cudnn_exhaustive_search=
True,
428 ws_nbytes_limit=cudnn_ws)
436 (
'ConstantFill', {}),
440 relu1 = model.Relu(conv1,
"conv1")
441 pool1 = model.MaxPool(relu1,
"pool1", kernel=3, stride=2, pad=1)
443 pool1,
"conv2a", 64, 64, 1, (
'XavierFill', {}), (
'ConstantFill', {})
445 conv2a = model.Relu(conv2a, conv2a)
453 (
'ConstantFill', {}),
456 relu2 = model.Relu(conv2,
"conv2")
457 pool2 = model.MaxPool(relu2,
"pool2", kernel=3, stride=2, pad=1)
459 inc3 = _InceptionModule(
460 model, pool2, 192,
"inc3", 64, [96, 128], [16, 32], 32
462 inc4 = _InceptionModule(
463 model, inc3, 256,
"inc4", 128, [128, 192], [32, 96], 64
465 pool5 = model.MaxPool(inc4,
"pool5", kernel=3, stride=2, pad=1)
466 inc5 = _InceptionModule(
467 model, pool5, 480,
"inc5", 192, [96, 208], [16, 48], 64
469 inc6 = _InceptionModule(
470 model, inc5, 512,
"inc6", 160, [112, 224], [24, 64], 64
472 inc7 = _InceptionModule(
473 model, inc6, 512,
"inc7", 128, [128, 256], [24, 64], 64
475 inc8 = _InceptionModule(
476 model, inc7, 512,
"inc8", 112, [144, 288], [32, 64], 64
478 inc9 = _InceptionModule(
479 model, inc8, 528,
"inc9", 256, [160, 320], [32, 128], 128
481 pool9 = model.MaxPool(inc9,
"pool9", kernel=3, stride=2, pad=1)
482 inc10 = _InceptionModule(
483 model, pool9, 832,
"inc10", 256, [160, 320], [32, 128], 128
485 inc11 = _InceptionModule(
486 model, inc10, 832,
"inc11", 384, [192, 384], [48, 128], 128
488 pool11 = model.AveragePool(inc11,
"pool11", kernel=7, stride=1)
490 pool11,
"fc", 1024, 1000, (
'XavierFill', {}), (
'ConstantFill', {})
495 pred = model.Softmax(fc,
"pred")
496 xent = model.LabelCrossEntropy([pred,
"label"],
"xent")
497 loss = model.AveragedLoss(xent,
"loss")
502 """ Simple plain SGD update -- not tuned to actually train the models """ 503 ITER = model.Iter(
"iter")
504 LR = model.LearningRate(
505 ITER,
"LR", base_lr=-1e-8, policy=
"step", stepsize=10000, gamma=0.999)
506 ONE = model.param_init_net.ConstantFill([],
"ONE", shape=[1], value=1.0)
507 for param
in model.params:
508 param_grad = model.param_to_grad[param]
509 model.WeightedSum([param, ONE, param_grad, LR], param)
512 def Benchmark(model_gen, arg):
513 model, input_size = model_gen(arg.order, arg.cudnn_ws)
514 model.Proto().type = arg.net_type
515 model.Proto().num_workers = arg.num_workers
519 if arg.order ==
"NCHW":
520 input_shape = [arg.batch_size, 3, input_size, input_size]
522 input_shape = [arg.batch_size, input_size, input_size, 3]
523 if arg.model ==
"MLP":
524 input_shape = [arg.batch_size, input_size]
526 model.param_init_net.GaussianFill(
533 model.param_init_net.UniformIntFill(
536 shape=[arg.batch_size, ],
542 print(
'{}: running forward only.'.format(arg.model))
544 print(
'{}: running forward-backward.'.format(arg.model))
545 model.AddGradientOperators([
"loss"])
547 if arg.order ==
'NHWC':
550 'NHWC order with CuDNN may not be supported yet, so I might\n' 555 model.param_init_net.RunAllOnGPU()
556 model.net.RunAllOnGPU()
559 for op
in model.net.Proto().op:
560 op.engine = arg.engine
565 "{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size),
"w" 567 fid.write(str(model.param_init_net.Proto()))
568 with open(
"{0}.pbtxt".format(arg.model, arg.batch_size),
"w")
as fid:
569 fid.write(str(model.net.Proto()))
574 model.net.Proto().name, arg.warmup_iterations, arg.iterations,
575 arg.layer_wise_benchmark)
578 def GetArgumentParser():
579 parser = argparse.ArgumentParser(description=
"Caffe2 benchmark.")
584 help=
"The batch size." 586 parser.add_argument(
"--model", type=str, help=
"The model to benchmark.")
591 help=
"The order to evaluate." 596 help=
"The cudnn workspace size." 602 help=
"Number of iterations to run the network." 605 "--warmup_iterations",
608 help=
"Number of warm-up iterations before benchmarking." 613 help=
"If set, only run the forward pass." 616 "--layer_wise_benchmark",
618 help=
"If True, run the layer-wise benchmark as well." 623 help=
"If True, run testing on CPU instead of GPU." 629 help=
"If set, blindly prefer the given engine(s) for every op.")
633 help=
"If True, dump the model prototxts to disk." 635 parser.add_argument(
"--net_type", type=str, default=
"dag")
636 parser.add_argument(
"--num_workers", type=int, default=2)
637 parser.add_argument(
"--use-nvtx", default=
False, action=
'store_true')
638 parser.add_argument(
"--htrace_span_log_path", type=str)
642 if __name__ ==
'__main__':
643 args = GetArgumentParser().parse_args()
645 not args.batch_size
or not args.model
or not args.order
647 GetArgumentParser().print_help()
650 [
'caffe2',
'--caffe2_log_level=0'] +
651 ([
'--caffe2_use_nvtx']
if args.use_nvtx
else []) +
652 ([
'--caffe2_htrace_span_log_path=' + args.htrace_span_log_path]
653 if args.htrace_span_log_path
else []))
657 'OverFeat': OverFeat,
659 'Inception': Inception,
662 Benchmark(model_map[args.model], args)
def CreateNet(net, overwrite=False, input_blobs=None)
def AddParameterUpdate(model)