3 from __future__
import absolute_import
4 from __future__
import division
5 from __future__
import print_function
6 from __future__
import unicode_literals
8 from caffe2.proto
import caffe2_pb2
9 from caffe2.python
import cnn, workspace, core, utils, rnn_cell
18 log = logging.getLogger(
"lstm_bench")
19 log.setLevel(logging.DEBUG)
24 Fill a queue with input data 26 log.info(
"Generating T={} sequence batches".format(T))
28 generate_input_init_net =
core.Net(
'generate_input_init')
29 queue = generate_input_init_net.CreateBlobsQueue(
30 [],
"inputqueue", num_blobs=1, capacity=T,
32 label_queue = generate_input_init_net.CreateBlobsQueue(
33 [],
"labelqueue", num_blobs=1, capacity=T,
37 generate_input_net =
core.Net(
'generate_input')
39 generate_input_net.EnqueueBlobs([queue,
"scratch"], [
"scratch"])
40 generate_input_net.EnqueueBlobs([label_queue,
"label_scr"], [
"label_scr"])
45 print(
"Generating data {}/{}".format(t, T))
47 random_shape = [np.random.randint(1, shape[0])] + shape[1:]
48 X = np.random.rand(*random_shape).astype(np.float32)
49 batch_size = random_shape[1]
50 L = num_labels * batch_size
51 labels = (np.random.rand(random_shape[0]) * L).astype(np.int32)
56 log.info(
"Finished data generation")
58 return queue, label_queue
61 def create_model(args, queue, label_queue, input_shape):
63 seq_lengths, hidden_init, cell_init, target = \
64 model.net.AddExternalInputs(
70 input_blob = model.DequeueBlobs(queue,
"input_data")
71 labels = model.DequeueBlobs(label_queue,
"label")
73 if args.implementation ==
"own":
76 input_blob=input_blob,
77 seq_lengths=seq_lengths,
78 initial_states=(hidden_init, cell_init),
79 dim_in=args.input_dim,
80 dim_out=args.hidden_dim,
82 memory_optimization=args.memory_optimization,
84 elif args.implementation ==
"cudnn":
87 model.param_init_net.ConstantFill([], input_blob, shape=input_shape)
90 input_blob=input_blob,
91 initial_states=(hidden_init, cell_init),
92 dim_in=args.input_dim,
93 dim_out=args.hidden_dim,
98 assert False,
"Unknown implementation" 100 weights = model.UniformFill(labels,
"weights")
101 softmax, loss = model.SoftmaxWithLoss(
102 [model.Flatten(output), labels, weights],
106 model.AddGradientOperators([loss])
109 model.net.Copy(last_hidden, hidden_init)
110 model.net.Copy(last_hidden, cell_init)
113 [1, args.batch_size, args.hidden_dim], dtype=np.float32
116 [1, args.batch_size, args.hidden_dim], dtype=np.float32
121 def Caffe2LSTM(args):
122 T = args.data_size // args.batch_size
124 input_blob_shape = [args.seq_length, args.batch_size, args.input_dim]
131 np.array([args.seq_length] * args.batch_size, dtype=np.int32)
134 model, output = create_model(args, queue, label_queue, input_blob_shape)
139 last_time = time.time()
140 start_time = last_time
141 num_iters = T // args.seq_length
142 entries_per_iter = args.seq_length * args.batch_size
145 log.info(
"------ Starting benchmark ------")
146 for iteration
in range(0, num_iters, args.iters_to_report):
147 iters_once = min(args.iters_to_report, num_iters - iteration)
150 new_time = time.time()
151 log.info(
"Iter: {} / {}. Entries Per Second: {}k.". format(
154 entries_per_iter * iters_once / (new_time - last_time) // 1000,
158 log.info(
"Done. Total EPS: {}k".format(
159 entries_per_iter * num_iters / (time.time() - start_time) // 1000,
168 def GetArgumentParser():
169 parser = argparse.ArgumentParser(description=
"LSTM benchmark.")
175 help=
"Hidden dimension",
181 help=
"Input dimension",
187 help=
"The batch size." 193 help=
"Max sequence length" 199 help=
"Number of data points to generate" 205 help=
"Number of iteration to report progress" 210 help=
"Run all on GPU",
216 help=
"'cudnn' or 'own'",
219 "--memory_optimization",
221 help=
"Whether to use memory optimized LSTM or not",
227 if __name__ ==
'__main__':
228 args = GetArgumentParser().parse_args()
232 '--caffe2_log_level=0',
233 '--caffe2_print_blob_sizes_at_exit=0'])
236 caffe2_pb2.CUDA
if args.gpu
else caffe2_pb2.CPU, 0)
def LSTM(model, input_blob, seq_lengths, initial_states, dim_in, dim_out, scope, outputs_with_grads=(0,), return_params=False, memory_optimization=False, forget_bias=0.0)
def cudnn_LSTM(model, input_blob, initial_states, dim_in, dim_out, scope, recurrent_params=None, input_params=None, num_layers=1, return_params=False)
def RunNet(name, num_iter=1)
def DeviceOption(device_type, cuda_gpu_id=0, random_seed=None)
def generate_data(T, shape, num_labels)
def FeedBlob(name, arr, device_option=None)
def CreateNet(net, overwrite=False, input_blobs=None)