Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
sch_choke.c
Go to the documentation of this file.
1 /*
2  * net/sched/sch_choke.c CHOKE scheduler
3  *
4  * Copyright (c) 2011 Stephen Hemminger <[email protected]>
5  * Copyright (c) 2011 Eric Dumazet <[email protected]>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * version 2 as published by the Free Software Foundation.
10  *
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <linux/reciprocal_div.h>
18 #include <linux/vmalloc.h>
19 #include <net/pkt_sched.h>
20 #include <net/inet_ecn.h>
21 #include <net/red.h>
22 #include <net/flow_keys.h>
23 
24 /*
25  CHOKe stateless AQM for fair bandwidth allocation
26  =================================================
27 
28  CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
29  unresponsive flows) is a variant of RED that penalizes misbehaving flows but
30  maintains no flow state. The difference from RED is an additional step
31  during the enqueuing process. If average queue size is over the
32  low threshold (qmin), a packet is chosen at random from the queue.
33  If both the new and chosen packet are from the same flow, both
34  are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
35  needs to access packets in queue randomly. It has a minimal class
36  interface to allow overriding the builtin flow classifier with
37  filters.
38 
39  Source:
40  R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
41  Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
42  IEEE INFOCOM, 2000.
43 
44  A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
45  Characteristics", IEEE/ACM Transactions on Networking, 2004
46 
47  */
48 
49 /* Upper bound on size of sk_buff table (packets) */
50 #define CHOKE_MAX_QUEUE (128*1024 - 1)
51 
53 /* Parameters */
55  unsigned char flags;
56 
57  struct red_parms parms;
58 
59 /* Variables */
60  struct red_vars vars;
62  struct {
63  u32 prob_drop; /* Early probability drops */
64  u32 prob_mark; /* Early probability marks */
65  u32 forced_drop; /* Forced drops, qavg > max_thresh */
66  u32 forced_mark; /* Forced marks, qavg > max_thresh */
67  u32 pdrop; /* Drops due to queue limits */
68  u32 other; /* Drops due to drop() calls */
69  u32 matched; /* Drops to flow match */
70  } stats;
71 
72  unsigned int head;
73  unsigned int tail;
74 
75  unsigned int tab_mask; /* size - 1 */
76 
77  struct sk_buff **tab;
78 };
79 
80 /* deliver a random number between 0 and N - 1 */
81 static u32 random_N(unsigned int N)
82 {
83  return reciprocal_divide(random32(), N);
84 }
85 
86 /* number of elements in queue including holes */
87 static unsigned int choke_len(const struct choke_sched_data *q)
88 {
89  return (q->tail - q->head) & q->tab_mask;
90 }
91 
92 /* Is ECN parameter configured */
93 static int use_ecn(const struct choke_sched_data *q)
94 {
95  return q->flags & TC_RED_ECN;
96 }
97 
98 /* Should packets over max just be dropped (versus marked) */
99 static int use_harddrop(const struct choke_sched_data *q)
100 {
101  return q->flags & TC_RED_HARDDROP;
102 }
103 
104 /* Move head pointer forward to skip over holes */
105 static void choke_zap_head_holes(struct choke_sched_data *q)
106 {
107  do {
108  q->head = (q->head + 1) & q->tab_mask;
109  if (q->head == q->tail)
110  break;
111  } while (q->tab[q->head] == NULL);
112 }
113 
114 /* Move tail pointer backwards to reuse holes */
115 static void choke_zap_tail_holes(struct choke_sched_data *q)
116 {
117  do {
118  q->tail = (q->tail - 1) & q->tab_mask;
119  if (q->head == q->tail)
120  break;
121  } while (q->tab[q->tail] == NULL);
122 }
123 
124 /* Drop packet from queue array by creating a "hole" */
125 static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
126 {
127  struct choke_sched_data *q = qdisc_priv(sch);
128  struct sk_buff *skb = q->tab[idx];
129 
130  q->tab[idx] = NULL;
131 
132  if (idx == q->head)
133  choke_zap_head_holes(q);
134  if (idx == q->tail)
135  choke_zap_tail_holes(q);
136 
137  sch->qstats.backlog -= qdisc_pkt_len(skb);
138  qdisc_drop(skb, sch);
139  qdisc_tree_decrease_qlen(sch, 1);
140  --sch->q.qlen;
141 }
142 
143 struct choke_skb_cb {
146  struct flow_keys keys;
147 };
148 
149 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
150 {
151  qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
152  return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
153 }
154 
155 static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
156 {
157  choke_skb_cb(skb)->classid = classid;
158 }
159 
160 static u16 choke_get_classid(const struct sk_buff *skb)
161 {
162  return choke_skb_cb(skb)->classid;
163 }
164 
165 /*
166  * Compare flow of two packets
167  * Returns true only if source and destination address and port match.
168  * false for special cases
169  */
170 static bool choke_match_flow(struct sk_buff *skb1,
171  struct sk_buff *skb2)
172 {
173  if (skb1->protocol != skb2->protocol)
174  return false;
175 
176  if (!choke_skb_cb(skb1)->keys_valid) {
177  choke_skb_cb(skb1)->keys_valid = 1;
178  skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys);
179  }
180 
181  if (!choke_skb_cb(skb2)->keys_valid) {
182  choke_skb_cb(skb2)->keys_valid = 1;
183  skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys);
184  }
185 
186  return !memcmp(&choke_skb_cb(skb1)->keys,
187  &choke_skb_cb(skb2)->keys,
188  sizeof(struct flow_keys));
189 }
190 
191 /*
192  * Classify flow using either:
193  * 1. pre-existing classification result in skb
194  * 2. fast internal classification
195  * 3. use TC filter based classification
196  */
197 static bool choke_classify(struct sk_buff *skb,
198  struct Qdisc *sch, int *qerr)
199 
200 {
201  struct choke_sched_data *q = qdisc_priv(sch);
202  struct tcf_result res;
203  int result;
204 
205  result = tc_classify(skb, q->filter_list, &res);
206  if (result >= 0) {
207 #ifdef CONFIG_NET_CLS_ACT
208  switch (result) {
209  case TC_ACT_STOLEN:
210  case TC_ACT_QUEUED:
212  case TC_ACT_SHOT:
213  return false;
214  }
215 #endif
216  choke_set_classid(skb, TC_H_MIN(res.classid));
217  return true;
218  }
219 
220  return false;
221 }
222 
223 /*
224  * Select a packet at random from queue
225  * HACK: since queue can have holes from previous deletion; retry several
226  * times to find a random skb but then just give up and return the head
227  * Will return NULL if queue is empty (q->head == q->tail)
228  */
229 static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
230  unsigned int *pidx)
231 {
232  struct sk_buff *skb;
233  int retrys = 3;
234 
235  do {
236  *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask;
237  skb = q->tab[*pidx];
238  if (skb)
239  return skb;
240  } while (--retrys > 0);
241 
242  return q->tab[*pidx = q->head];
243 }
244 
245 /*
246  * Compare new packet with random packet in queue
247  * returns true if matched and sets *pidx
248  */
249 static bool choke_match_random(const struct choke_sched_data *q,
250  struct sk_buff *nskb,
251  unsigned int *pidx)
252 {
253  struct sk_buff *oskb;
254 
255  if (q->head == q->tail)
256  return false;
257 
258  oskb = choke_peek_random(q, pidx);
259  if (q->filter_list)
260  return choke_get_classid(nskb) == choke_get_classid(oskb);
261 
262  return choke_match_flow(oskb, nskb);
263 }
264 
265 static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
266 {
267  struct choke_sched_data *q = qdisc_priv(sch);
268  const struct red_parms *p = &q->parms;
270 
271  if (q->filter_list) {
272  /* If using external classifiers, get result and record it. */
273  if (!choke_classify(skb, sch, &ret))
274  goto other_drop; /* Packet was eaten by filter */
275  }
276 
277  choke_skb_cb(skb)->keys_valid = 0;
278  /* Compute average queue usage (see RED) */
279  q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
280  if (red_is_idling(&q->vars))
281  red_end_of_idle_period(&q->vars);
282 
283  /* Is queue small? */
284  if (q->vars.qavg <= p->qth_min)
285  q->vars.qcount = -1;
286  else {
287  unsigned int idx;
288 
289  /* Draw a packet at random from queue and compare flow */
290  if (choke_match_random(q, skb, &idx)) {
291  q->stats.matched++;
292  choke_drop_by_idx(sch, idx);
293  goto congestion_drop;
294  }
295 
296  /* Queue is large, always mark/drop */
297  if (q->vars.qavg > p->qth_max) {
298  q->vars.qcount = -1;
299 
300  sch->qstats.overlimits++;
301  if (use_harddrop(q) || !use_ecn(q) ||
302  !INET_ECN_set_ce(skb)) {
303  q->stats.forced_drop++;
304  goto congestion_drop;
305  }
306 
307  q->stats.forced_mark++;
308  } else if (++q->vars.qcount) {
309  if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
310  q->vars.qcount = 0;
311  q->vars.qR = red_random(p);
312 
313  sch->qstats.overlimits++;
314  if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
315  q->stats.prob_drop++;
316  goto congestion_drop;
317  }
318 
319  q->stats.prob_mark++;
320  }
321  } else
322  q->vars.qR = red_random(p);
323  }
324 
325  /* Admit new packet */
326  if (sch->q.qlen < q->limit) {
327  q->tab[q->tail] = skb;
328  q->tail = (q->tail + 1) & q->tab_mask;
329  ++sch->q.qlen;
330  sch->qstats.backlog += qdisc_pkt_len(skb);
331  return NET_XMIT_SUCCESS;
332  }
333 
334  q->stats.pdrop++;
335  return qdisc_drop(skb, sch);
336 
337 congestion_drop:
338  qdisc_drop(skb, sch);
339  return NET_XMIT_CN;
340 
341 other_drop:
342  if (ret & __NET_XMIT_BYPASS)
343  sch->qstats.drops++;
344  kfree_skb(skb);
345  return ret;
346 }
347 
348 static struct sk_buff *choke_dequeue(struct Qdisc *sch)
349 {
350  struct choke_sched_data *q = qdisc_priv(sch);
351  struct sk_buff *skb;
352 
353  if (q->head == q->tail) {
354  if (!red_is_idling(&q->vars))
355  red_start_of_idle_period(&q->vars);
356  return NULL;
357  }
358 
359  skb = q->tab[q->head];
360  q->tab[q->head] = NULL;
361  choke_zap_head_holes(q);
362  --sch->q.qlen;
363  sch->qstats.backlog -= qdisc_pkt_len(skb);
364  qdisc_bstats_update(sch, skb);
365 
366  return skb;
367 }
368 
369 static unsigned int choke_drop(struct Qdisc *sch)
370 {
371  struct choke_sched_data *q = qdisc_priv(sch);
372  unsigned int len;
373 
374  len = qdisc_queue_drop(sch);
375  if (len > 0)
376  q->stats.other++;
377  else {
378  if (!red_is_idling(&q->vars))
379  red_start_of_idle_period(&q->vars);
380  }
381 
382  return len;
383 }
384 
385 static void choke_reset(struct Qdisc *sch)
386 {
387  struct choke_sched_data *q = qdisc_priv(sch);
388 
389  red_restart(&q->vars);
390 }
391 
392 static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
393  [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
394  [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
395  [TCA_CHOKE_MAX_P] = { .type = NLA_U32 },
396 };
397 
398 
399 static void choke_free(void *addr)
400 {
401  if (addr) {
402  if (is_vmalloc_addr(addr))
403  vfree(addr);
404  else
405  kfree(addr);
406  }
407 }
408 
409 static int choke_change(struct Qdisc *sch, struct nlattr *opt)
410 {
411  struct choke_sched_data *q = qdisc_priv(sch);
412  struct nlattr *tb[TCA_CHOKE_MAX + 1];
413  const struct tc_red_qopt *ctl;
414  int err;
415  struct sk_buff **old = NULL;
416  unsigned int mask;
417  u32 max_P;
418 
419  if (opt == NULL)
420  return -EINVAL;
421 
422  err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
423  if (err < 0)
424  return err;
425 
426  if (tb[TCA_CHOKE_PARMS] == NULL ||
427  tb[TCA_CHOKE_STAB] == NULL)
428  return -EINVAL;
429 
430  max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
431 
432  ctl = nla_data(tb[TCA_CHOKE_PARMS]);
433 
434  if (ctl->limit > CHOKE_MAX_QUEUE)
435  return -EINVAL;
436 
437  mask = roundup_pow_of_two(ctl->limit + 1) - 1;
438  if (mask != q->tab_mask) {
439  struct sk_buff **ntab;
440 
441  ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
442  if (!ntab)
443  ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
444  if (!ntab)
445  return -ENOMEM;
446 
447  sch_tree_lock(sch);
448  old = q->tab;
449  if (old) {
450  unsigned int oqlen = sch->q.qlen, tail = 0;
451 
452  while (q->head != q->tail) {
453  struct sk_buff *skb = q->tab[q->head];
454 
455  q->head = (q->head + 1) & q->tab_mask;
456  if (!skb)
457  continue;
458  if (tail < mask) {
459  ntab[tail++] = skb;
460  continue;
461  }
462  sch->qstats.backlog -= qdisc_pkt_len(skb);
463  --sch->q.qlen;
464  qdisc_drop(skb, sch);
465  }
466  qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
467  q->head = 0;
468  q->tail = tail;
469  }
470 
471  q->tab_mask = mask;
472  q->tab = ntab;
473  } else
474  sch_tree_lock(sch);
475 
476  q->flags = ctl->flags;
477  q->limit = ctl->limit;
478 
479  red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
480  ctl->Plog, ctl->Scell_log,
481  nla_data(tb[TCA_CHOKE_STAB]),
482  max_P);
483  red_set_vars(&q->vars);
484 
485  if (q->head == q->tail)
486  red_end_of_idle_period(&q->vars);
487 
488  sch_tree_unlock(sch);
489  choke_free(old);
490  return 0;
491 }
492 
493 static int choke_init(struct Qdisc *sch, struct nlattr *opt)
494 {
495  return choke_change(sch, opt);
496 }
497 
498 static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
499 {
500  struct choke_sched_data *q = qdisc_priv(sch);
501  struct nlattr *opts = NULL;
502  struct tc_red_qopt opt = {
503  .limit = q->limit,
504  .flags = q->flags,
505  .qth_min = q->parms.qth_min >> q->parms.Wlog,
506  .qth_max = q->parms.qth_max >> q->parms.Wlog,
507  .Wlog = q->parms.Wlog,
508  .Plog = q->parms.Plog,
509  .Scell_log = q->parms.Scell_log,
510  };
511 
512  opts = nla_nest_start(skb, TCA_OPTIONS);
513  if (opts == NULL)
514  goto nla_put_failure;
515 
516  if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
517  nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
518  goto nla_put_failure;
519  return nla_nest_end(skb, opts);
520 
521 nla_put_failure:
522  nla_nest_cancel(skb, opts);
523  return -EMSGSIZE;
524 }
525 
526 static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
527 {
528  struct choke_sched_data *q = qdisc_priv(sch);
529  struct tc_choke_xstats st = {
530  .early = q->stats.prob_drop + q->stats.forced_drop,
531  .marked = q->stats.prob_mark + q->stats.forced_mark,
532  .pdrop = q->stats.pdrop,
533  .other = q->stats.other,
534  .matched = q->stats.matched,
535  };
536 
537  return gnet_stats_copy_app(d, &st, sizeof(st));
538 }
539 
540 static void choke_destroy(struct Qdisc *sch)
541 {
542  struct choke_sched_data *q = qdisc_priv(sch);
543 
545  choke_free(q->tab);
546 }
547 
548 static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
549 {
550  return NULL;
551 }
552 
553 static unsigned long choke_get(struct Qdisc *sch, u32 classid)
554 {
555  return 0;
556 }
557 
558 static void choke_put(struct Qdisc *q, unsigned long cl)
559 {
560 }
561 
562 static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
563  u32 classid)
564 {
565  return 0;
566 }
567 
568 static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
569 {
570  struct choke_sched_data *q = qdisc_priv(sch);
571 
572  if (cl)
573  return NULL;
574  return &q->filter_list;
575 }
576 
577 static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
578  struct sk_buff *skb, struct tcmsg *tcm)
579 {
580  tcm->tcm_handle |= TC_H_MIN(cl);
581  return 0;
582 }
583 
584 static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
585 {
586  if (!arg->stop) {
587  if (arg->fn(sch, 1, arg) < 0) {
588  arg->stop = 1;
589  return;
590  }
591  arg->count++;
592  }
593 }
594 
595 static const struct Qdisc_class_ops choke_class_ops = {
596  .leaf = choke_leaf,
597  .get = choke_get,
598  .put = choke_put,
599  .tcf_chain = choke_find_tcf,
600  .bind_tcf = choke_bind,
601  .unbind_tcf = choke_put,
602  .dump = choke_dump_class,
603  .walk = choke_walk,
604 };
605 
606 static struct sk_buff *choke_peek_head(struct Qdisc *sch)
607 {
608  struct choke_sched_data *q = qdisc_priv(sch);
609 
610  return (q->head != q->tail) ? q->tab[q->head] : NULL;
611 }
612 
613 static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
614  .id = "choke",
615  .priv_size = sizeof(struct choke_sched_data),
616 
617  .enqueue = choke_enqueue,
618  .dequeue = choke_dequeue,
619  .peek = choke_peek_head,
620  .drop = choke_drop,
621  .init = choke_init,
622  .destroy = choke_destroy,
623  .reset = choke_reset,
624  .change = choke_change,
625  .dump = choke_dump,
626  .dump_stats = choke_dump_stats,
627  .owner = THIS_MODULE,
628 };
629 
630 static int __init choke_module_init(void)
631 {
632  return register_qdisc(&choke_qdisc_ops);
633 }
634 
635 static void __exit choke_module_exit(void)
636 {
637  unregister_qdisc(&choke_qdisc_ops);
638 }
639 
640 module_init(choke_module_init)
641 module_exit(choke_module_exit)
642 
643 MODULE_LICENSE("GPL");