3 """ A bunch of util functions to build Seq2Seq models with Caffe2.""" 5 from __future__
import absolute_import
6 from __future__
import division
7 from __future__
import print_function
8 from __future__
import unicode_literals
10 from caffe2.python
import rnn_cell
11 from caffe2.python.cnn
import CNNModelHelper
16 def __init__(self, init_params=True):
17 super(ModelHelper, self).__init__(
19 init_params=init_params,
23 def AddParam(self, name, init=None, init_value=None, trainable=True):
24 """Adds a parameter to the model's net and it's initializer if needed 27 init: a tuple (<initialization_op_name>, <initialization_op_kwargs>) 28 init_value: int, float or str. Can be used instead of `init` as a 29 simple constant initializer 30 trainable: bool, whether to compute gradient for this param or not 32 if init_value
is not None:
34 assert type(init_value)
in [int, float, str]
35 init = (
'ConstantFill', dict(
41 param = self.param_init_net.__getattr__(init[0])(
47 param = self.net.AddExternalInput(name)
50 self.params.append(param)
67 """ Unidirectional (forward pass) LSTM encoder.""" 69 outputs, final_hidden_state, _, final_cell_state =
rnn_cell.LSTM(
71 input_blob=embedded_inputs,
72 seq_lengths=input_lengths,
73 initial_states=(initial_hidden_state, initial_cell_state),
74 dim_in=embedding_size,
75 dim_out=encoder_num_units,
77 outputs_with_grads=([0]
if use_attention
else [1, 3]),
79 return outputs, final_hidden_state, final_cell_state
92 """ Bidirectional (forward pass and backward pass) LSTM encoder.""" 97 final_hidden_state_fw,
102 input_blob=embedded_inputs,
103 seq_lengths=input_lengths,
104 initial_states=(initial_hidden_state, initial_cell_state),
105 dim_in=embedding_size,
106 dim_out=encoder_num_units,
107 scope=
'forward_encoder',
108 outputs_with_grads=([0]
if use_attention
else [1, 3]),
112 reversed_embedded_inputs = model.net.ReversePackedSegs(
113 [embedded_inputs, input_lengths],
114 [
'reversed_embedded_inputs'],
119 final_hidden_state_bw,
124 input_blob=reversed_embedded_inputs,
125 seq_lengths=input_lengths,
126 initial_states=(initial_hidden_state, initial_cell_state),
127 dim_in=embedding_size,
128 dim_out=encoder_num_units,
129 scope=
'backward_encoder',
130 outputs_with_grads=([0]
if use_attention
else [1, 3]),
133 outputs_bw = model.net.ReversePackedSegs(
134 [outputs_bw, input_lengths],
139 outputs, _ = model.net.Concat(
140 [outputs_fw, outputs_bw],
141 [
'outputs',
'outputs_dim'],
145 final_hidden_state, _ = model.net.Concat(
146 [final_hidden_state_fw, final_hidden_state_bw],
147 [
'final_hidden_state',
'final_hidden_state_dim'],
151 final_cell_state, _ = model.net.Concat(
152 [final_cell_state_fw, final_cell_state_bw],
153 [
'final_cell_state',
'final_cell_state_dim'],
156 return outputs, final_hidden_state, final_cell_state
def LSTM(model, input_blob, seq_lengths, initial_states, dim_in, dim_out, scope, outputs_with_grads=(0,), return_params=False, memory_optimization=False, forget_bias=0.0)
def AddParam(self, name, init=None, init_value=None, trainable=True)
def rnn_unidirectional_encoder(model, embedded_inputs, input_lengths, initial_hidden_state, initial_cell_state, embedding_size, encoder_num_units, use_attention)
def rnn_bidirectional_encoder(model, embedded_inputs, input_lengths, initial_hidden_state, initial_cell_state, embedding_size, encoder_num_units, use_attention)