Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
sch_netem.c
Go to the documentation of this file.
1 /*
2  * net/sched/sch_netem.c Network emulator
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License.
8  *
9  * Many of the algorithms and ideas for this came from
10  * NIST Net which is not copyrighted.
11  *
12  * Authors: Stephen Hemminger <[email protected]>
13  * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14  */
15 
16 #include <linux/mm.h>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/skbuff.h>
23 #include <linux/vmalloc.h>
24 #include <linux/rtnetlink.h>
25 #include <linux/reciprocal_div.h>
26 
27 #include <net/netlink.h>
28 #include <net/pkt_sched.h>
29 #include <net/inet_ecn.h>
30 
31 #define VERSION "1.3"
32 
33 /* Network Emulation Queuing algorithm.
34  ====================================
35 
36  Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
37  Network Emulation Tool
38  [2] Luigi Rizzo, DummyNet for FreeBSD
39 
40  ----------------------------------------------------------------
41 
42  This started out as a simple way to delay outgoing packets to
43  test TCP but has grown to include most of the functionality
44  of a full blown network emulator like NISTnet. It can delay
45  packets and add random jitter (and correlation). The random
46  distribution can be loaded from a table as well to provide
47  normal, Pareto, or experimental curves. Packet loss,
48  duplication, and reordering can also be emulated.
49 
50  This qdisc does not do classification that can be handled in
51  layering other disciplines. It does not need to do bandwidth
52  control either since that can be handled by using token
53  bucket or other rate control.
54 
55  Correlated Loss Generator models
56 
57  Added generation of correlated loss according to the
58  "Gilbert-Elliot" model, a 4-state markov model.
59 
60  References:
61  [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
62  [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
63  and intuitive loss model for packet networks and its implementation
64  in the Netem module in the Linux kernel", available in [1]
65 
66  Authors: Stefano Salsano <stefano.salsano at uniroma2.it
67  Fabio Ludovici <fabio.ludovici at yahoo.it>
68 */
69 
71  /* internal t(ime)fifo qdisc uses sch->q and sch->limit */
72 
73  /* optional qdisc for classful handling (NULL at netem init) */
74  struct Qdisc *qdisc;
75 
77 
80 
94 
95  struct crndstate {
99 
100  struct disttable {
102  s16 table[0];
103  } *delay_dist;
104 
105  enum {
109  } loss_model;
110 
111  /* Correlated Loss Generation models */
112  struct clgstate {
113  /* state of the Markov chain */
115 
116  /* 4-states and Gilbert-Elliot models */
117  u32 a1; /* p13 for 4-states or p for GE */
118  u32 a2; /* p31 for 4-states or r for GE */
119  u32 a3; /* p32 for 4-states or h for GE */
120  u32 a4; /* p14 for 4-states or 1-k for GE */
121  u32 a5; /* p23 used only in 4-states */
122  } clg;
123 
124 };
125 
126 /* Time stamp put into socket buffer control block
127  * Only valid when skbs are in our internal t(ime)fifo queue.
128  */
129 struct netem_skb_cb {
131 };
132 
133 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
134 {
135  qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
136  return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
137 }
138 
139 /* init_crandom - initialize correlated random number generator
140  * Use entropy source for initial seed.
141  */
142 static void init_crandom(struct crndstate *state, unsigned long rho)
143 {
144  state->rho = rho;
145  state->last = net_random();
146 }
147 
148 /* get_crandom - correlated random number generator
149  * Next number depends on last value.
150  * rho is scaled to avoid floating point.
151  */
152 static u32 get_crandom(struct crndstate *state)
153 {
154  u64 value, rho;
155  unsigned long answer;
156 
157  if (state->rho == 0) /* no correlation */
158  return net_random();
159 
160  value = net_random();
161  rho = (u64)state->rho + 1;
162  answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
163  state->last = answer;
164  return answer;
165 }
166 
167 /* loss_4state - 4-state model loss generator
168  * Generates losses according to the 4-state Markov chain adopted in
169  * the GI (General and Intuitive) loss model.
170  */
171 static bool loss_4state(struct netem_sched_data *q)
172 {
173  struct clgstate *clg = &q->clg;
174  u32 rnd = net_random();
175 
176  /*
177  * Makes a comparison between rnd and the transition
178  * probabilities outgoing from the current state, then decides the
179  * next state and if the next packet has to be transmitted or lost.
180  * The four states correspond to:
181  * 1 => successfully transmitted packets within a gap period
182  * 4 => isolated losses within a gap period
183  * 3 => lost packets within a burst period
184  * 2 => successfully transmitted packets within a burst period
185  */
186  switch (clg->state) {
187  case 1:
188  if (rnd < clg->a4) {
189  clg->state = 4;
190  return true;
191  } else if (clg->a4 < rnd && rnd < clg->a1) {
192  clg->state = 3;
193  return true;
194  } else if (clg->a1 < rnd)
195  clg->state = 1;
196 
197  break;
198  case 2:
199  if (rnd < clg->a5) {
200  clg->state = 3;
201  return true;
202  } else
203  clg->state = 2;
204 
205  break;
206  case 3:
207  if (rnd < clg->a3)
208  clg->state = 2;
209  else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
210  clg->state = 1;
211  return true;
212  } else if (clg->a2 + clg->a3 < rnd) {
213  clg->state = 3;
214  return true;
215  }
216  break;
217  case 4:
218  clg->state = 1;
219  break;
220  }
221 
222  return false;
223 }
224 
225 /* loss_gilb_ell - Gilbert-Elliot model loss generator
226  * Generates losses according to the Gilbert-Elliot loss model or
227  * its special cases (Gilbert or Simple Gilbert)
228  *
229  * Makes a comparison between random number and the transition
230  * probabilities outgoing from the current state, then decides the
231  * next state. A second random number is extracted and the comparison
232  * with the loss probability of the current state decides if the next
233  * packet will be transmitted or lost.
234  */
235 static bool loss_gilb_ell(struct netem_sched_data *q)
236 {
237  struct clgstate *clg = &q->clg;
238 
239  switch (clg->state) {
240  case 1:
241  if (net_random() < clg->a1)
242  clg->state = 2;
243  if (net_random() < clg->a4)
244  return true;
245  case 2:
246  if (net_random() < clg->a2)
247  clg->state = 1;
248  if (clg->a3 > net_random())
249  return true;
250  }
251 
252  return false;
253 }
254 
255 static bool loss_event(struct netem_sched_data *q)
256 {
257  switch (q->loss_model) {
258  case CLG_RANDOM:
259  /* Random packet drop 0 => none, ~0 => all */
260  return q->loss && q->loss >= get_crandom(&q->loss_cor);
261 
262  case CLG_4_STATES:
263  /* 4state loss model algorithm (used also for GI model)
264  * Extracts a value from the markov 4 state loss generator,
265  * if it is 1 drops a packet and if needed writes the event in
266  * the kernel logs
267  */
268  return loss_4state(q);
269 
270  case CLG_GILB_ELL:
271  /* Gilbert-Elliot loss model algorithm
272  * Extracts a value from the Gilbert-Elliot loss generator,
273  * if it is 1 drops a packet and if needed writes the event in
274  * the kernel logs
275  */
276  return loss_gilb_ell(q);
277  }
278 
279  return false; /* not reached */
280 }
281 
282 
283 /* tabledist - return a pseudo-randomly distributed value with mean mu and
284  * std deviation sigma. Uses table lookup to approximate the desired
285  * distribution, and a uniformly-distributed pseudo-random source.
286  */
287 static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
288  struct crndstate *state,
289  const struct disttable *dist)
290 {
292  long t;
293  u32 rnd;
294 
295  if (sigma == 0)
296  return mu;
297 
298  rnd = get_crandom(state);
299 
300  /* default uniform distribution */
301  if (dist == NULL)
302  return (rnd % (2*sigma)) - sigma + mu;
303 
304  t = dist->table[rnd % dist->size];
305  x = (sigma % NETEM_DIST_SCALE) * t;
306  if (x >= 0)
307  x += NETEM_DIST_SCALE/2;
308  else
309  x -= NETEM_DIST_SCALE/2;
310 
311  return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
312 }
313 
314 static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
315 {
316  u64 ticks;
317 
318  len += q->packet_overhead;
319 
320  if (q->cell_size) {
321  u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
322 
323  if (len > cells * q->cell_size) /* extra cell needed for remainder */
324  cells++;
325  len = cells * (q->cell_size + q->cell_overhead);
326  }
327 
328  ticks = (u64)len * NSEC_PER_SEC;
329 
330  do_div(ticks, q->rate);
331  return PSCHED_NS2TICKS(ticks);
332 }
333 
334 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
335 {
336  struct sk_buff_head *list = &sch->q;
337  psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
338  struct sk_buff *skb = skb_peek_tail(list);
339 
340  /* Optimize for add at tail */
341  if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
342  return __skb_queue_tail(list, nskb);
343 
344  skb_queue_reverse_walk(list, skb) {
345  if (tnext >= netem_skb_cb(skb)->time_to_send)
346  break;
347  }
348 
349  __skb_queue_after(list, skb, nskb);
350 }
351 
352 /*
353  * Insert one skb into qdisc.
354  * Note: parent depends on return value to account for queue length.
355  * NET_XMIT_DROP: queue length didn't change.
356  * NET_XMIT_SUCCESS: one skb was queued.
357  */
358 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
359 {
360  struct netem_sched_data *q = qdisc_priv(sch);
361  /* We don't fill cb now as skb_unshare() may invalidate it */
362  struct netem_skb_cb *cb;
363  struct sk_buff *skb2;
364  int count = 1;
365 
366  /* Random duplication */
367  if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
368  ++count;
369 
370  /* Drop packet? */
371  if (loss_event(q)) {
372  if (q->ecn && INET_ECN_set_ce(skb))
373  sch->qstats.drops++; /* mark packet */
374  else
375  --count;
376  }
377  if (count == 0) {
378  sch->qstats.drops++;
379  kfree_skb(skb);
381  }
382 
383  /* If a delay is expected, orphan the skb. (orphaning usually takes
384  * place at TX completion time, so _before_ the link transit delay)
385  * Ideally, this orphaning should be done after the rate limiting
386  * module, because this breaks TCP Small Queue, and other mechanisms
387  * based on socket sk_wmem_alloc.
388  */
389  if (q->latency || q->jitter)
390  skb_orphan(skb);
391 
392  /*
393  * If we need to duplicate packet, then re-insert at top of the
394  * qdisc tree, since parent queuer expects that only one
395  * skb will be queued.
396  */
397  if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
398  struct Qdisc *rootq = qdisc_root(sch);
399  u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
400  q->duplicate = 0;
401 
402  qdisc_enqueue_root(skb2, rootq);
403  q->duplicate = dupsave;
404  }
405 
406  /*
407  * Randomized packet corruption.
408  * Make copy if needed since we are modifying
409  * If packet is going to be hardware checksummed, then
410  * do it now in software before we mangle it.
411  */
412  if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
413  if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
414  (skb->ip_summed == CHECKSUM_PARTIAL &&
415  skb_checksum_help(skb)))
416  return qdisc_drop(skb, sch);
417 
418  skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
419  }
420 
421  if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
422  return qdisc_reshape_fail(skb, sch);
423 
424  sch->qstats.backlog += qdisc_pkt_len(skb);
425 
426  cb = netem_skb_cb(skb);
427  if (q->gap == 0 || /* not doing reordering */
428  q->counter < q->gap - 1 || /* inside last reordering gap */
429  q->reorder < get_crandom(&q->reorder_cor)) {
430  psched_time_t now;
432 
433  delay = tabledist(q->latency, q->jitter,
434  &q->delay_cor, q->delay_dist);
435 
436  now = psched_get_time();
437 
438  if (q->rate) {
439  struct sk_buff_head *list = &sch->q;
440 
441  delay += packet_len_2_sched_time(skb->len, q);
442 
443  if (!skb_queue_empty(list)) {
444  /*
445  * Last packet in queue is reference point (now).
446  * First packet in queue is already in flight,
447  * calculate this time bonus and substract
448  * from delay.
449  */
450  delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
451  now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
452  }
453  }
454 
455  cb->time_to_send = now + delay;
456  ++q->counter;
457  tfifo_enqueue(skb, sch);
458  } else {
459  /*
460  * Do re-ordering by putting one out of N packets at the front
461  * of the queue.
462  */
463  cb->time_to_send = psched_get_time();
464  q->counter = 0;
465 
466  __skb_queue_head(&sch->q, skb);
467  sch->qstats.requeues++;
468  }
469 
470  return NET_XMIT_SUCCESS;
471 }
472 
473 static unsigned int netem_drop(struct Qdisc *sch)
474 {
475  struct netem_sched_data *q = qdisc_priv(sch);
476  unsigned int len;
477 
478  len = qdisc_queue_drop(sch);
479  if (!len && q->qdisc && q->qdisc->ops->drop)
480  len = q->qdisc->ops->drop(q->qdisc);
481  if (len)
482  sch->qstats.drops++;
483 
484  return len;
485 }
486 
487 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
488 {
489  struct netem_sched_data *q = qdisc_priv(sch);
490  struct sk_buff *skb;
491 
492  if (qdisc_is_throttled(sch))
493  return NULL;
494 
495 tfifo_dequeue:
496  skb = qdisc_peek_head(sch);
497  if (skb) {
498  const struct netem_skb_cb *cb = netem_skb_cb(skb);
499 
500  /* if more time remaining? */
501  if (cb->time_to_send <= psched_get_time()) {
502  __skb_unlink(skb, &sch->q);
503  sch->qstats.backlog -= qdisc_pkt_len(skb);
504 
505 #ifdef CONFIG_NET_CLS_ACT
506  /*
507  * If it's at ingress let's pretend the delay is
508  * from the network (tstamp will be updated).
509  */
510  if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
511  skb->tstamp.tv64 = 0;
512 #endif
513 
514  if (q->qdisc) {
515  int err = qdisc_enqueue(skb, q->qdisc);
516 
517  if (unlikely(err != NET_XMIT_SUCCESS)) {
518  if (net_xmit_drop_count(err)) {
519  sch->qstats.drops++;
520  qdisc_tree_decrease_qlen(sch, 1);
521  }
522  }
523  goto tfifo_dequeue;
524  }
525 deliver:
526  qdisc_unthrottled(sch);
527  qdisc_bstats_update(sch, skb);
528  return skb;
529  }
530 
531  if (q->qdisc) {
532  skb = q->qdisc->ops->dequeue(q->qdisc);
533  if (skb)
534  goto deliver;
535  }
537  }
538 
539  if (q->qdisc) {
540  skb = q->qdisc->ops->dequeue(q->qdisc);
541  if (skb)
542  goto deliver;
543  }
544  return NULL;
545 }
546 
547 static void netem_reset(struct Qdisc *sch)
548 {
549  struct netem_sched_data *q = qdisc_priv(sch);
550 
551  qdisc_reset_queue(sch);
552  if (q->qdisc)
553  qdisc_reset(q->qdisc);
555 }
556 
557 static void dist_free(struct disttable *d)
558 {
559  if (d) {
560  if (is_vmalloc_addr(d))
561  vfree(d);
562  else
563  kfree(d);
564  }
565 }
566 
567 /*
568  * Distribution data is a variable size payload containing
569  * signed 16 bit values.
570  */
571 static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
572 {
573  struct netem_sched_data *q = qdisc_priv(sch);
574  size_t n = nla_len(attr)/sizeof(__s16);
575  const __s16 *data = nla_data(attr);
576  spinlock_t *root_lock;
577  struct disttable *d;
578  int i;
579  size_t s;
580 
581  if (n > NETEM_DIST_MAX)
582  return -EINVAL;
583 
584  s = sizeof(struct disttable) + n * sizeof(s16);
585  d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
586  if (!d)
587  d = vmalloc(s);
588  if (!d)
589  return -ENOMEM;
590 
591  d->size = n;
592  for (i = 0; i < n; i++)
593  d->table[i] = data[i];
594 
595  root_lock = qdisc_root_sleeping_lock(sch);
596 
597  spin_lock_bh(root_lock);
598  swap(q->delay_dist, d);
599  spin_unlock_bh(root_lock);
600 
601  dist_free(d);
602  return 0;
603 }
604 
605 static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
606 {
607  struct netem_sched_data *q = qdisc_priv(sch);
608  const struct tc_netem_corr *c = nla_data(attr);
609 
610  init_crandom(&q->delay_cor, c->delay_corr);
611  init_crandom(&q->loss_cor, c->loss_corr);
612  init_crandom(&q->dup_cor, c->dup_corr);
613 }
614 
615 static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
616 {
617  struct netem_sched_data *q = qdisc_priv(sch);
618  const struct tc_netem_reorder *r = nla_data(attr);
619 
620  q->reorder = r->probability;
621  init_crandom(&q->reorder_cor, r->correlation);
622 }
623 
624 static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
625 {
626  struct netem_sched_data *q = qdisc_priv(sch);
627  const struct tc_netem_corrupt *r = nla_data(attr);
628 
629  q->corrupt = r->probability;
630  init_crandom(&q->corrupt_cor, r->correlation);
631 }
632 
633 static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
634 {
635  struct netem_sched_data *q = qdisc_priv(sch);
636  const struct tc_netem_rate *r = nla_data(attr);
637 
638  q->rate = r->rate;
640  q->cell_size = r->cell_size;
641  if (q->cell_size)
644 }
645 
646 static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
647 {
648  struct netem_sched_data *q = qdisc_priv(sch);
649  const struct nlattr *la;
650  int rem;
651 
652  nla_for_each_nested(la, attr, rem) {
653  u16 type = nla_type(la);
654 
655  switch(type) {
656  case NETEM_LOSS_GI: {
657  const struct tc_netem_gimodel *gi = nla_data(la);
658 
659  if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
660  pr_info("netem: incorrect gi model size\n");
661  return -EINVAL;
662  }
663 
664  q->loss_model = CLG_4_STATES;
665 
666  q->clg.state = 1;
667  q->clg.a1 = gi->p13;
668  q->clg.a2 = gi->p31;
669  q->clg.a3 = gi->p32;
670  q->clg.a4 = gi->p14;
671  q->clg.a5 = gi->p23;
672  break;
673  }
674 
675  case NETEM_LOSS_GE: {
676  const struct tc_netem_gemodel *ge = nla_data(la);
677 
678  if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
679  pr_info("netem: incorrect ge model size\n");
680  return -EINVAL;
681  }
682 
683  q->loss_model = CLG_GILB_ELL;
684  q->clg.state = 1;
685  q->clg.a1 = ge->p;
686  q->clg.a2 = ge->r;
687  q->clg.a3 = ge->h;
688  q->clg.a4 = ge->k1;
689  break;
690  }
691 
692  default:
693  pr_info("netem: unknown loss type %u\n", type);
694  return -EINVAL;
695  }
696  }
697 
698  return 0;
699 }
700 
701 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
702  [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
703  [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
704  [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
705  [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
706  [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
707  [TCA_NETEM_ECN] = { .type = NLA_U32 },
708 };
709 
710 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
711  const struct nla_policy *policy, int len)
712 {
713  int nested_len = nla_len(nla) - NLA_ALIGN(len);
714 
715  if (nested_len < 0) {
716  pr_info("netem: invalid attributes len %d\n", nested_len);
717  return -EINVAL;
718  }
719 
720  if (nested_len >= nla_attr_size(0))
721  return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
722  nested_len, policy);
723 
724  memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
725  return 0;
726 }
727 
728 /* Parse netlink message to set options */
729 static int netem_change(struct Qdisc *sch, struct nlattr *opt)
730 {
731  struct netem_sched_data *q = qdisc_priv(sch);
732  struct nlattr *tb[TCA_NETEM_MAX + 1];
733  struct tc_netem_qopt *qopt;
734  int ret;
735 
736  if (opt == NULL)
737  return -EINVAL;
738 
739  qopt = nla_data(opt);
740  ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
741  if (ret < 0)
742  return ret;
743 
744  sch->limit = qopt->limit;
745 
746  q->latency = qopt->latency;
747  q->jitter = qopt->jitter;
748  q->limit = qopt->limit;
749  q->gap = qopt->gap;
750  q->counter = 0;
751  q->loss = qopt->loss;
752  q->duplicate = qopt->duplicate;
753 
754  /* for compatibility with earlier versions.
755  * if gap is set, need to assume 100% probability
756  */
757  if (q->gap)
758  q->reorder = ~0;
759 
760  if (tb[TCA_NETEM_CORR])
761  get_correlation(sch, tb[TCA_NETEM_CORR]);
762 
763  if (tb[TCA_NETEM_DELAY_DIST]) {
764  ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
765  if (ret)
766  return ret;
767  }
768 
769  if (tb[TCA_NETEM_REORDER])
770  get_reorder(sch, tb[TCA_NETEM_REORDER]);
771 
772  if (tb[TCA_NETEM_CORRUPT])
773  get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
774 
775  if (tb[TCA_NETEM_RATE])
776  get_rate(sch, tb[TCA_NETEM_RATE]);
777 
778  if (tb[TCA_NETEM_ECN])
779  q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
780 
781  q->loss_model = CLG_RANDOM;
782  if (tb[TCA_NETEM_LOSS])
783  ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
784 
785  return ret;
786 }
787 
788 static int netem_init(struct Qdisc *sch, struct nlattr *opt)
789 {
790  struct netem_sched_data *q = qdisc_priv(sch);
791  int ret;
792 
793  if (!opt)
794  return -EINVAL;
795 
796  qdisc_watchdog_init(&q->watchdog, sch);
797 
798  q->loss_model = CLG_RANDOM;
799  ret = netem_change(sch, opt);
800  if (ret)
801  pr_info("netem: change failed\n");
802  return ret;
803 }
804 
805 static void netem_destroy(struct Qdisc *sch)
806 {
807  struct netem_sched_data *q = qdisc_priv(sch);
808 
810  if (q->qdisc)
811  qdisc_destroy(q->qdisc);
812  dist_free(q->delay_dist);
813 }
814 
815 static int dump_loss_model(const struct netem_sched_data *q,
816  struct sk_buff *skb)
817 {
818  struct nlattr *nest;
819 
820  nest = nla_nest_start(skb, TCA_NETEM_LOSS);
821  if (nest == NULL)
822  goto nla_put_failure;
823 
824  switch (q->loss_model) {
825  case CLG_RANDOM:
826  /* legacy loss model */
827  nla_nest_cancel(skb, nest);
828  return 0; /* no data */
829 
830  case CLG_4_STATES: {
831  struct tc_netem_gimodel gi = {
832  .p13 = q->clg.a1,
833  .p31 = q->clg.a2,
834  .p32 = q->clg.a3,
835  .p14 = q->clg.a4,
836  .p23 = q->clg.a5,
837  };
838 
839  if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
840  goto nla_put_failure;
841  break;
842  }
843  case CLG_GILB_ELL: {
844  struct tc_netem_gemodel ge = {
845  .p = q->clg.a1,
846  .r = q->clg.a2,
847  .h = q->clg.a3,
848  .k1 = q->clg.a4,
849  };
850 
851  if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
852  goto nla_put_failure;
853  break;
854  }
855  }
856 
857  nla_nest_end(skb, nest);
858  return 0;
859 
860 nla_put_failure:
861  nla_nest_cancel(skb, nest);
862  return -1;
863 }
864 
865 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
866 {
867  const struct netem_sched_data *q = qdisc_priv(sch);
868  struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
869  struct tc_netem_qopt qopt;
870  struct tc_netem_corr cor;
871  struct tc_netem_reorder reorder;
872  struct tc_netem_corrupt corrupt;
873  struct tc_netem_rate rate;
874 
875  qopt.latency = q->latency;
876  qopt.jitter = q->jitter;
877  qopt.limit = q->limit;
878  qopt.loss = q->loss;
879  qopt.gap = q->gap;
880  qopt.duplicate = q->duplicate;
881  if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
882  goto nla_put_failure;
883 
884  cor.delay_corr = q->delay_cor.rho;
885  cor.loss_corr = q->loss_cor.rho;
886  cor.dup_corr = q->dup_cor.rho;
887  if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
888  goto nla_put_failure;
889 
890  reorder.probability = q->reorder;
891  reorder.correlation = q->reorder_cor.rho;
892  if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
893  goto nla_put_failure;
894 
895  corrupt.probability = q->corrupt;
896  corrupt.correlation = q->corrupt_cor.rho;
897  if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
898  goto nla_put_failure;
899 
900  rate.rate = q->rate;
901  rate.packet_overhead = q->packet_overhead;
902  rate.cell_size = q->cell_size;
903  rate.cell_overhead = q->cell_overhead;
904  if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
905  goto nla_put_failure;
906 
907  if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
908  goto nla_put_failure;
909 
910  if (dump_loss_model(q, skb) != 0)
911  goto nla_put_failure;
912 
913  return nla_nest_end(skb, nla);
914 
915 nla_put_failure:
916  nlmsg_trim(skb, nla);
917  return -1;
918 }
919 
920 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
921  struct sk_buff *skb, struct tcmsg *tcm)
922 {
923  struct netem_sched_data *q = qdisc_priv(sch);
924 
925  if (cl != 1 || !q->qdisc) /* only one class */
926  return -ENOENT;
927 
928  tcm->tcm_handle |= TC_H_MIN(1);
929  tcm->tcm_info = q->qdisc->handle;
930 
931  return 0;
932 }
933 
934 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
935  struct Qdisc **old)
936 {
937  struct netem_sched_data *q = qdisc_priv(sch);
938 
939  sch_tree_lock(sch);
940  *old = q->qdisc;
941  q->qdisc = new;
942  if (*old) {
943  qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
944  qdisc_reset(*old);
945  }
946  sch_tree_unlock(sch);
947 
948  return 0;
949 }
950 
951 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
952 {
953  struct netem_sched_data *q = qdisc_priv(sch);
954  return q->qdisc;
955 }
956 
957 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
958 {
959  return 1;
960 }
961 
962 static void netem_put(struct Qdisc *sch, unsigned long arg)
963 {
964 }
965 
966 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
967 {
968  if (!walker->stop) {
969  if (walker->count >= walker->skip)
970  if (walker->fn(sch, 1, walker) < 0) {
971  walker->stop = 1;
972  return;
973  }
974  walker->count++;
975  }
976 }
977 
978 static const struct Qdisc_class_ops netem_class_ops = {
979  .graft = netem_graft,
980  .leaf = netem_leaf,
981  .get = netem_get,
982  .put = netem_put,
983  .walk = netem_walk,
984  .dump = netem_dump_class,
985 };
986 
987 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
988  .id = "netem",
989  .cl_ops = &netem_class_ops,
990  .priv_size = sizeof(struct netem_sched_data),
991  .enqueue = netem_enqueue,
992  .dequeue = netem_dequeue,
993  .peek = qdisc_peek_dequeued,
994  .drop = netem_drop,
995  .init = netem_init,
996  .reset = netem_reset,
997  .destroy = netem_destroy,
998  .change = netem_change,
999  .dump = netem_dump,
1000  .owner = THIS_MODULE,
1001 };
1002 
1003 
1004 static int __init netem_module_init(void)
1005 {
1006  pr_info("netem: version " VERSION "\n");
1007  return register_qdisc(&netem_qdisc_ops);
1008 }
1009 static void __exit netem_module_exit(void)
1010 {
1011  unregister_qdisc(&netem_qdisc_ops);
1012 }
1013 module_init(netem_module_init)
1014 module_exit(netem_module_exit)
1015 MODULE_LICENSE("GPL");