Caffe2 - Python API
A deep learning, cross platform ML framework
sparse_to_dense.py
1 
3 from __future__ import absolute_import
4 from __future__ import division
5 from __future__ import print_function
6 from __future__ import unicode_literals
7 
8 from caffe2.python import schema
9 from caffe2.python.layers.layers import (
10  ModelLayer,
11 )
12 import numpy as np
13 
14 
15 class SparseToDense(ModelLayer):
16  _known_types = ['FLOAT', 'ID_LIST']
17 
18  def __init__(self, model, input_record, input_specs,
19  name='sparse_to_dense', **kwargs):
20  """
21  `input_specs` follows the format of FeatureSpec from schema. To be more
22  precise it's a namedtuple that should have:
23  'feature_type', 'feature_names', 'feature_ids'
24  """
25  super(SparseToDense, self).__init__(model, name,
26  input_record, **kwargs)
27 
28  self.input_specs = input_specs
29 
30  outputs = []
31  for field, feature_specs in self.input_specs:
32  assert len(feature_specs.feature_names) ==\
33  len(feature_specs.feature_ids)
34  if feature_specs.feature_type == 'FLOAT':
35  outputs.append((
36  field,
38  (np.float32, (len(feature_specs.feature_ids), )),
39  model.net.NextScopedBlob(name + '_' + field + '_output')
40  )
41  ))
42  elif feature_specs.feature_type == 'ID_LIST':
43  outputs.append((
44  field,
46  ('ranges',
48  (
49  np.int32,
50  (len(feature_specs.feature_ids), 2)
51  ),
52  model.net.NextScopedBlob(
53  name + '_' + field + '_ranges')
54  ),
55  ),
56  ('values', input_record[field].values.items),
57  )
58  ))
59  elif feature_specs.feature_type == 'ID_SCORE_LIST':
60  outputs.append((
61  field,
63  ('ranges',
65  (
66  np.int32,
67  (len(feature_specs.feature_ids), 2)
68  ),
69  model.net.NextScopedBlob(
70  name + '_' + field + '_ranges')
71  ),
72  ),
73  ('ids', input_record[field].values.keys),
74  ('scores', input_record[field].values.values),
75  )
76  ))
77  else:
78  raise TypeError(
79  "Unsupported input type: {0}".
80  format(feature_specs.feature_type))
81 
82  # TODO(amalevich): This schema is producing ranges. And thus if there is
83  # something using it it should support ranges as well. It might be
84  # confusing, if we don't add better support for ranges/have it as a
85  # first layer
87  *outputs
88  )
89 
90  # TODO(amalevich): Consider moving this data to schema, instead
91  # Structs doens't support attaching metadata to them and clonning
92  # will break things badly, but this is the most elegant way to pass
93  # this info around. Should we change it or it'll be too much work and
94  # not worse it?
95  for field, feature_specs in input_specs:
97  self.output_schema[field],
99  feature_specs=feature_specs)
100  )
101  self.zero = model.global_constants['ZERO']
102  self.zero_range = model.global_constants['ZERO_RANGE']
103 
104  # Add operators to all types that need to be densified
105  def add_ops(self, net):
106  record = self.input_record
107  for field, feature_specs in self.input_specs:
108  if feature_specs.feature_type == 'FLOAT':
109  net.SparseToDenseMask(
110  [
111  record[field].keys(),
112  record[field].values(),
113  self.zero,
114  record[field].lengths(),
115  ],
116  [
117  self.output_schema[field](),
118  ],
119  mask=feature_specs.feature_ids,
120  )
121  elif feature_specs.feature_type == 'ID_LIST':
122  id_list_ranges = net.LengthsToRanges(
123  record[field].values.lengths(),
124  net.NextScopedBlob('id_list_ranges')
125  )
126  net.SparseToDenseMask(
127  [
128  record[field].keys(), id_list_ranges, self.zero_range,
129  record[field].lengths()
130  ],
131  self.output_schema[field].ranges(),
132  mask=feature_specs.feature_ids,
133  )
134  elif feature_specs.feature_type == 'ID_SCORE_LIST':
135  # TODO: merge this to the case above?
136  id_list_ranges = net.LengthsToRanges(
137  record[field].values.lengths(),
138  net.NextScopedBlob('id_score_list_ranges')
139  )
140  net.SparseToDenseMask(
141  [
142  record[field].keys(), id_list_ranges, self.zero_range,
143  record[field].lengths()
144  ],
145  self.output_schema[field].ranges(),
146  mask=feature_specs.feature_ids,
147  )
148 
149  def get_metadata(self):
150  metadata = []
151  for field, feature_specs in self.input_specs:
152  metadata.append(
153  (
154  {
155  'type': feature_specs.feature_type,
156  'names': feature_specs.feature_names,
157  'ids': feature_specs.feature_ids,
158  },
159  self.output_schema[field].field_blobs(),
160  self.output_schema[field].field_types()
161  )
162  )
163  if feature_specs.feature_type == 'FLOAT':
164  metadata[-1][0]['cardinality'] = 1
165  return metadata
def input_record(self)
Definition: layers.py:149
def attach_metadata_to_scalars(field, metadata)
Definition: schema.py:1013
def __init__(self, model, input_record, input_specs, name='sparse_to_dense', kwargs)