4 Benchmark for ads based model. 6 To run a benchmark with full forward-backward-update, do e.g. 8 OMP_NUM_THREADS=8 _build/opt/caffe2/caffe2/fb/ads/train_cpu.lpar \ 11 --loaderConfig /mnt/vol/gfsdataswarm-global/namespaces/ads/fblearner/users/ \ 12 dzhulgakov/caffe2/tests/test_direct_loader.config 14 For more details, run with --help. 16 from __future__
import absolute_import
17 from __future__
import division
18 from __future__
import print_function
22 from caffe2.python
import workspace, cnn, core
23 from caffe2.python.fb.models.mlp
import (
29 debug_sparse_mlp_decomposition,
30 debug_sparse_mlp_prune,
32 from caffe2.python.fb.models.loss
import BatchLRLoss
33 from caffe2.python.fb.metrics.metrics
import LogScoreReweightedMeasurements
34 from caffe2.python.fb.executor.executor
import Trainer
35 from caffe2.python.sgd
import build_sgd
36 from caffe2.python
import net_drawer
37 from caffe2.python
import SparseTransformer
39 from collections
import namedtuple
47 from libfb
import pyinit
48 import hiveio.par_init
49 import fblearner.nn.gen_conf
as conf_utils
53 hiveio.par_init.install_class_path()
55 for h
in logging.root.handlers:
56 h.setFormatter(logging.Formatter(
57 '%(levelname)s %(asctime)s : %(message)s'))
58 logger = logging.getLogger(__name__)
59 logger.setLevel(logging.DEBUG)
61 InputData = namedtuple(
62 'InputData', [
'data',
'label',
'weight',
'prod_pred',
'sparse_segments'])
65 def FakeData(args, model):
66 logger.info(
'Input dimensions is %d', args.input_dim)
69 size=(args.batchSize, args.input_dim)).astype(np.float32))
71 2, size=args.batchSize).astype(np.int32))
77 0, sparseBin, args.batchSize).astype(np.int64))
89 return InputData(data=
'data', label=
'label', weight=
'weight',
90 prod_pred=
None, sparse_segments=sparseSegments)
93 def NNLoaderData(args, model):
94 cfg = conf_utils.loadNNLoaderConfig(args.loaderConfig)
95 loaderConfig = conf_utils.getLoaderConfigFromNNLoaderConfig(cfg)
96 preperConfig = loaderConfig.preperConfig
97 metaFile = preperConfig.metaFile
98 assert metaFile,
'meta data not found' 100 if type(loaderConfig).__name__ ==
'LocalDirectLoader':
101 loaderConfig.batchConfig.batchSize = args.batchSize
102 logger.info(
'Batch size = %d', loaderConfig.batchConfig.batchSize)
104 logger.info(
'Batch size unknown here. will be determined by the reader')
106 logger.info(
'Parsing meta data %s', metaFile)
107 cmd =
'cat "{}" | {}'.format(metaFile, args.meta2json)
108 meta = json.loads(subprocess.check_output(cmd, shell=
True))
109 args.input_dim = len(meta[
'denseFeatureNames'])
110 logger.info(
'Input dimensions is %d', args.input_dim)
112 fields = [
'data',
'label',
'weight',
'prod_pred']
115 if preperConfig.skipSparse
or not preperConfig.sparseSegments.segments:
116 logger.info(
'No sparse features found')
118 segments = loaderConfig.preperConfig.sparseSegments.segments
119 logger.info(
'Found %d sparse segments', len(segments))
121 sparseFieldNames = (
'eid',
'key',
'val',
'size')
122 for i, segment
in enumerate(segments):
123 sparseData = [
'{}_{}'.format(fn, i)
for fn
in sparseFieldNames[:3]]
124 fields.extend(sparseData)
126 size = max(sf.mod + sf.offset
for sf
in segment.inputs)
127 sparseSegments.append(
128 dict(zip(sparseFieldNames, sparseData + [size])))
129 logger.info(
'Sparse segment %d: %s', i, sparseSegments[-1])
131 loader = model.param_init_net.NNLoaderCreate(
132 [], json_config=conf_utils.structToString(cfg))
134 model.net.NNLoaderRead([loader], fields, add_sparse_bias=
True)
136 return InputData(*(fields[:4] + [sparseSegments]))
139 def sparse_transform(model):
140 print(
"====================================================")
141 print(
" Sparse Transformer ")
142 print(
"====================================================")
145 net_root, net_id2node, net_name2id, model.net.Proto().op, model)
147 del model.net.Proto().op[:]
148 model.net.Proto().op.extend(op_list)
151 def train(model_gen, data_gen, args):
153 input_data = data_gen(args, model)
154 logger.info(input_data)
155 batch_loss = model_gen(args, model, input_data)
158 print(model.net.Proto())
160 netGraphFile = os.path.join(
161 os.path.expanduser(
'~'),
'public_html/net.png')
162 logger.info(
'Drawing network to %s', netGraphFile)
163 graph.write(netGraphFile, format=
'png')
164 except Exception
as err:
165 logger.error(
'Failed to draw net: %s', err)
168 model.AddGradientOperators([batch_loss.loss])
170 if model.net.Proto().op[-1].output[-1] ==
'data_grad':
171 logger.info(
'Skipping grad for data')
172 del model.net.Proto().op[-1].output[-1]
175 base_learning_rate=args.rateOfLearning,
177 gamma=args.learnRateDecay,
178 power=args.learnRatePower)
181 logger.info(
'Setting random seed to %d', args.seed)
182 model.param_init_net._net.device_option.CopyFrom(
186 model.param_init_net.RunAllOnGPU()
187 model.net.RunAllOnGPU()
190 model.net.Proto().type = args.net_type
191 model.net.Proto().num_workers = args.num_workers
195 epoch_size=args.epochSize // args.batchSize,
196 num_threads=args.numThreads,
197 num_epochs=args.maxEpoch,
198 reporter=LogScoreReweightedMeasurements(
199 batch_loss, input_data.weight, args.negDownsampleRate,
200 args.batchSize, args.last_n_stats))
201 trainer.run(args.maxEpoch)
203 print(model.net.Proto())
204 sparse_transform(model)
205 print(model.net.Proto())
209 def mlp_model(args, model, input_data):
210 hiddens = [int(s)
for s
in args.hidden.split(
'-')] + [2]
211 sums = mlp(model, input_data.data, args.input_dim, hiddens)
212 return BatchLRLoss(model, sums, input_data.label)
215 def mlp_decomp_model(args, model, input_data):
216 hiddens = [int(s)
for s
in args.hidden.split(
'-')] + [2]
217 sums = mlp_decomp(model, input_data.data, args.input_dim, hiddens)
218 return BatchLRLoss(model, sums, input_data.label)
221 def mlp_prune_model(args, model, input_data):
222 hiddens = [int(s)
for s
in args.hidden.split(
'-')] + [2]
223 sums = mlp_prune(model, input_data.data, args.input_dim,
224 hiddens, prune_thres=args.prune_thres,
225 comp_lb=args.compress_lb)
226 return BatchLRLoss(model, sums, input_data.label)
229 def sparse_mlp_model(args, model, input_data):
230 hiddens = [int(s)
for s
in args.hidden.split(
'-')] + [2]
231 sums = sparse_mlp(model, input_data.data, args.input_dim, hiddens,
232 input_data.sparse_segments)
233 return BatchLRLoss(model, sums, input_data.label)
236 def debug_sparse_mlp_model(args, model, input_data):
237 hiddens = [int(s)
for s
in args.hidden.split(
'-')] + [2]
238 sums = debug_sparse_mlp(model, input_data.data, args.input_dim, hiddens,
239 input_data.sparse_segments)
240 return BatchLRLoss(model, sums, input_data.label)
243 def debug_sparse_mlp_decomposition_model(args, model, input_data):
244 hiddens = [int(s)
for s
in args.hidden.split(
'-')] + [2]
245 sums = debug_sparse_mlp_decomposition(model, input_data.data,
246 args.input_dim, hiddens,
247 input_data.sparse_segments)
248 return BatchLRLoss(model, sums, input_data.label)
251 def debug_sparse_mlp_prune_model(args, model, input_data):
252 hiddens = [int(s)
for s
in args.hidden.split(
'-')] + [2]
253 sums = debug_sparse_mlp_prune(model, input_data.data, args.input_dim,
255 input_data.sparse_segments)
256 return BatchLRLoss(model, sums, input_data.label)
259 MODEL_TYPE_FUNCTIONS = {
261 'mlp_decomp': mlp_decomp_model,
262 'mlp_prune': mlp_prune_model,
263 'sparse_mlp': sparse_mlp_model,
264 'debug_sparse_mlp': debug_sparse_mlp_model,
265 'debug_sparse_mlp_decomposition': debug_sparse_mlp_decomposition_model,
266 'debug_sparse_mlp_prune': debug_sparse_mlp_prune_model,
271 if __name__ ==
'__main__':
273 sys.argv.append(
'--caffe2_keep_on_shrink')
277 parser = pyinit.FbcodeArgumentParser(description=
'Ads NN trainer')
280 parser.add_argument(
"-batchSize", type=int, default=100,
281 help=
"The batch size of benchmark data.")
282 parser.add_argument(
"-loaderConfig", type=str,
283 help=
"Json file with NNLoader's config. If empty some " 285 parser.add_argument(
"-meta", type=str, help=
"Meta file (deprecated)")
286 parser.add_argument(
"-hidden", type=str,
287 help=
"A dash-separated string specifying the " 288 "model dimensions without the output layer.")
289 parser.add_argument(
"-epochSize", type=int, default=1000000,
290 help=
"Examples to process in one take")
291 parser.add_argument(
"-maxEpoch", type=int,
292 help=
"Limit number of epochs, if empty reads all data")
293 parser.add_argument(
"-negDownsampleRate", type=float, default=0.1,
294 help=
"Used to compute the bias term")
295 parser.add_argument(
"-rateOfLearning", type=float, default=0.02,
296 help=
"Learning rate, `lr/(1+t*d)^p`")
297 parser.add_argument(
"-learnRateDecay", type=float, default=1e-06,
298 help=
"d in `lr/(1+t*d)^p`")
299 parser.add_argument(
"-learnRatePower", type=float, default=0.5,
300 help=
"p in `lr/(1+t*d)^p`")
301 parser.add_argument(
"-numThreads", type=int, help=
"If set runs hogwild")
302 parser.add_argument(
"-model_type", type=str, default=
'mlp',
303 choices=MODEL_TYPE_FUNCTIONS.keys(),
304 help=
"The model to benchmark.")
305 parser.add_argument(
"-seed", type=int, help=
"random seed.")
308 parser.add_argument(
"-output", help=
"not implemented")
311 parser.add_argument(
"--input_dim", type=int, default=1500,
312 help=
"The input dimension of benchmark data.")
313 parser.add_argument(
"--gpu", action=
"store_true",
314 help=
"If set, run testing on GPU.")
315 parser.add_argument(
"--net_type", type=str,
316 help=
"Set the type of the network to run with.")
317 parser.add_argument(
"--num_workers", type=int, default=4,
318 help=
"The number of workers, if the net type has " 320 parser.add_argument(
"--last_n_stats", type=int, default=0,
321 help=
"LastN reporting, big values can slow things down")
322 parser.add_argument(
"--meta2json",
323 default=
'_bin/fblearner/nn/ads/meta2json.llar',
324 help=
'Path to meta2json.lar')
325 parser.add_argument(
"--prune_thres", type=float, default=0.00001,
326 help=
"The threshold to prune the weights")
327 parser.add_argument(
"--compress_lb", type=float, default=0.05,
328 help=
"The lower bound of layer compression")
329 args = parser.parse_args()
332 data_gen = NNLoaderData
if args.loaderConfig
else FakeData
333 train(MODEL_TYPE_FUNCTIONS[args.model_type], data_gen, args)
def DeviceOption(device_type, cuda_gpu_id=0, random_seed=None)
def GetPydotGraph(operators_or_net, name=None, rankdir='LR', node_producer=None)
Module caffe2.experiments.python.train.
def FeedBlob(name, arr, device_option=None)