Caffe2 - Python API
A deep learning, cross platform ML framework
lstm_benchmark.py
1 
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 from caffe2.proto import caffe2_pb2
9 from caffe2.python import cnn, workspace, core, utils, rnn_cell
10 
11 import argparse
12 import numpy as np
13 import time
14 
15 import logging
16 
17 logging.basicConfig()
18 log = logging.getLogger("lstm_bench")
19 log.setLevel(logging.DEBUG)
20 
21 
22 def generate_data(T, shape, num_labels):
23  '''
24  Fill a queue with input data
25  '''
26  log.info("Generating T={} sequence batches".format(T))
27 
28  generate_input_init_net = core.Net('generate_input_init')
29  queue = generate_input_init_net.CreateBlobsQueue(
30  [], "inputqueue", num_blobs=1, capacity=T,
31  )
32  label_queue = generate_input_init_net.CreateBlobsQueue(
33  [], "labelqueue", num_blobs=1, capacity=T,
34  )
35 
36  workspace.RunNetOnce(generate_input_init_net)
37  generate_input_net = core.Net('generate_input')
38 
39  generate_input_net.EnqueueBlobs([queue, "scratch"], ["scratch"])
40  generate_input_net.EnqueueBlobs([label_queue, "label_scr"], ["label_scr"])
41  np.random.seed(2603)
42 
43  for t in range(T):
44  if (t % 50 == 0):
45  print("Generating data {}/{}".format(t, T))
46  # Randomize the seqlength
47  random_shape = [np.random.randint(1, shape[0])] + shape[1:]
48  X = np.random.rand(*random_shape).astype(np.float32)
49  batch_size = random_shape[1]
50  L = num_labels * batch_size
51  labels = (np.random.rand(random_shape[0]) * L).astype(np.int32)
52  workspace.FeedBlob("scratch", X)
53  workspace.FeedBlob("label_scr", labels)
54  workspace.RunNetOnce(generate_input_net.Proto())
55 
56  log.info("Finished data generation")
57 
58  return queue, label_queue
59 
60 
61 def create_model(args, queue, label_queue, input_shape):
62  model = cnn.CNNModelHelper(name="LSTM_bench")
63  seq_lengths, hidden_init, cell_init, target = \
64  model.net.AddExternalInputs(
65  'seq_lengths',
66  'hidden_init',
67  'cell_init',
68  'target',
69  )
70  input_blob = model.DequeueBlobs(queue, "input_data")
71  labels = model.DequeueBlobs(label_queue, "label")
72 
73  if args.implementation == "own":
74  output, last_hidden, _, last_state = rnn_cell.LSTM(
75  model=model,
76  input_blob=input_blob,
77  seq_lengths=seq_lengths,
78  initial_states=(hidden_init, cell_init),
79  dim_in=args.input_dim,
80  dim_out=args.hidden_dim,
81  scope="lstm1",
82  memory_optimization=args.memory_optimization,
83  )
84  elif args.implementation == "cudnn":
85  # We need to feed a placeholder input so that RecurrentInitOp
86  # can infer the dimensions.
87  model.param_init_net.ConstantFill([], input_blob, shape=input_shape)
88  output, last_hidden, _ = rnn_cell.cudnn_LSTM(
89  model=model,
90  input_blob=input_blob,
91  initial_states=(hidden_init, cell_init),
92  dim_in=args.input_dim,
93  dim_out=args.hidden_dim,
94  scope="cudnnlstm",
95  )
96 
97  else:
98  assert False, "Unknown implementation"
99 
100  weights = model.UniformFill(labels, "weights")
101  softmax, loss = model.SoftmaxWithLoss(
102  [model.Flatten(output), labels, weights],
103  ['softmax', 'loss'],
104  )
105 
106  model.AddGradientOperators([loss])
107 
108  # carry states over
109  model.net.Copy(last_hidden, hidden_init)
110  model.net.Copy(last_hidden, cell_init)
111 
112  workspace.FeedBlob(hidden_init, np.zeros(
113  [1, args.batch_size, args.hidden_dim], dtype=np.float32
114  ))
115  workspace.FeedBlob(cell_init, np.zeros(
116  [1, args.batch_size, args.hidden_dim], dtype=np.float32
117  ))
118  return model, output
119 
120 
121 def Caffe2LSTM(args):
122  T = args.data_size // args.batch_size
123 
124  input_blob_shape = [args.seq_length, args.batch_size, args.input_dim]
125  queue, label_queue = generate_data(T // args.seq_length,
126  input_blob_shape,
127  args.hidden_dim)
128 
130  "seq_lengths",
131  np.array([args.seq_length] * args.batch_size, dtype=np.int32)
132  )
133 
134  model, output = create_model(args, queue, label_queue, input_blob_shape)
135 
136  workspace.RunNetOnce(model.param_init_net)
137  workspace.CreateNet(model.net)
138 
139  last_time = time.time()
140  start_time = last_time
141  num_iters = T // args.seq_length
142  entries_per_iter = args.seq_length * args.batch_size
143 
144  # Run the Benchmark
145  log.info("------ Starting benchmark ------")
146  for iteration in range(0, num_iters, args.iters_to_report):
147  iters_once = min(args.iters_to_report, num_iters - iteration)
148  workspace.RunNet(model.net.Proto().name, iters_once)
149 
150  new_time = time.time()
151  log.info("Iter: {} / {}. Entries Per Second: {}k.". format(
152  iteration,
153  num_iters,
154  entries_per_iter * iters_once / (new_time - last_time) // 1000,
155  ))
156  last_time = new_time
157 
158  log.info("Done. Total EPS: {}k".format(
159  entries_per_iter * num_iters / (time.time() - start_time) // 1000,
160  ))
161 
162 
163 @utils.debug
164 def Benchmark(args):
165  Caffe2LSTM(args)
166 
167 
168 def GetArgumentParser():
169  parser = argparse.ArgumentParser(description="LSTM benchmark.")
170 
171  parser.add_argument(
172  "--hidden_dim",
173  type=int,
174  default=40,
175  help="Hidden dimension",
176  )
177  parser.add_argument(
178  "--input_dim",
179  type=int,
180  default=40,
181  help="Input dimension",
182  )
183  parser.add_argument(
184  "--batch_size",
185  type=int,
186  default=256,
187  help="The batch size."
188  )
189  parser.add_argument(
190  "--seq_length",
191  type=int,
192  default=20,
193  help="Max sequence length"
194  )
195  parser.add_argument(
196  "--data_size",
197  type=int,
198  default=10000000,
199  help="Number of data points to generate"
200  )
201  parser.add_argument(
202  "--iters_to_report",
203  type=int,
204  default=100,
205  help="Number of iteration to report progress"
206  )
207  parser.add_argument(
208  "--gpu",
209  action="store_true",
210  help="Run all on GPU",
211  )
212  parser.add_argument(
213  "--implementation",
214  type=str,
215  default="own",
216  help="'cudnn' or 'own'",
217  )
218  parser.add_argument(
219  "--memory_optimization",
220  action="store_true",
221  help="Whether to use memory optimized LSTM or not",
222  )
223 
224  return parser
225 
226 
227 if __name__ == '__main__':
228  args = GetArgumentParser().parse_args()
229 
231  'caffe2',
232  '--caffe2_log_level=0',
233  '--caffe2_print_blob_sizes_at_exit=0'])
234 
235  device = core.DeviceOption(
236  caffe2_pb2.CUDA if args.gpu else caffe2_pb2.CPU, 0)
237 
238  with core.DeviceScope(device):
239  Benchmark(args)
def LSTM(model, input_blob, seq_lengths, initial_states, dim_in, dim_out, scope, outputs_with_grads=(0,), return_params=False, memory_optimization=False, forget_bias=0.0)
Definition: rnn_cell.py:202
def cudnn_LSTM(model, input_blob, initial_states, dim_in, dim_out, scope, recurrent_params=None, input_params=None, num_layers=1, return_params=False)
Definition: rnn_cell.py:289
DeviceScope
Definition: core.py:27
def RunNet(name, num_iter=1)
Definition: workspace.py:164
def RunNetOnce(net)
Definition: workspace.py:160
def DeviceOption(device_type, cuda_gpu_id=0, random_seed=None)
Definition: core.py:103
def generate_data(T, shape, num_labels)
def FeedBlob(name, arr, device_option=None)
Definition: workspace.py:229
def CreateNet(net, overwrite=False, input_blobs=None)
Definition: workspace.py:140