Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
cls_rsvp.h
Go to the documentation of this file.
1 /*
2  * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  *
9  * Authors: Alexey Kuznetsov, <[email protected]>
10  */
11 
12 /*
13  Comparing to general packet classification problem,
14  RSVP needs only sevaral relatively simple rules:
15 
16  * (dst, protocol) are always specified,
17  so that we are able to hash them.
18  * src may be exact, or may be wildcard, so that
19  we can keep a hash table plus one wildcard entry.
20  * source port (or flow label) is important only if src is given.
21 
22  IMPLEMENTATION.
23 
24  We use a two level hash table: The top level is keyed by
25  destination address and protocol ID, every bucket contains a list
26  of "rsvp sessions", identified by destination address, protocol and
27  DPI(="Destination Port ID"): triple (key, mask, offset).
28 
29  Every bucket has a smaller hash table keyed by source address
30  (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31  Every bucket is again a list of "RSVP flows", selected by
32  source address and SPI(="Source Port ID" here rather than
33  "security parameter index"): triple (key, mask, offset).
34 
35 
36  NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37  and all fragmented packets go to the best-effort traffic class.
38 
39 
40  NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41  only one "Generalized Port Identifier". So that for classic
42  ah, esp (and udp,tcp) both *pi should coincide or one of them
43  should be wildcard.
44 
45  At first sight, this redundancy is just a waste of CPU
46  resources. But DPI and SPI add the possibility to assign different
47  priorities to GPIs. Look also at note 4 about tunnels below.
48 
49 
50  NOTE 3. One complication is the case of tunneled packets.
51  We implement it as following: if the first lookup
52  matches a special session with "tunnelhdr" value not zero,
53  flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54  In this case, we pull tunnelhdr bytes and restart lookup
55  with tunnel ID added to the list of keys. Simple and stupid 8)8)
56  It's enough for PIMREG and IPIP.
57 
58 
59  NOTE 4. Two GPIs make it possible to parse even GRE packets.
60  F.e. DPI can select ETH_P_IP (and necessary flags to make
61  tunnelhdr correct) in GRE protocol field and SPI matches
62  GRE key. Is it not nice? 8)8)
63 
64 
65  Well, as result, despite its simplicity, we get a pretty
66  powerful classification engine. */
67 
68 
69 struct rsvp_head {
70  u32 tmap[256/32];
73  struct rsvp_session *ht[256];
74 };
75 
76 struct rsvp_session {
77  struct rsvp_session *next;
79  struct tc_rsvp_gpi dpi;
82  /* 16 (src,sport) hash slots, and one wildcard source slot */
83  struct rsvp_filter *ht[16 + 1];
84 };
85 
86 
87 struct rsvp_filter {
88  struct rsvp_filter *next;
90  struct tc_rsvp_gpi spi;
92 
93  struct tcf_result res;
94  struct tcf_exts exts;
95 
97  struct rsvp_session *sess;
98 };
99 
100 static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
101 {
102  unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
103 
104  h ^= h>>16;
105  h ^= h>>8;
106  return (h ^ protocol ^ tunnelid) & 0xFF;
107 }
108 
109 static inline unsigned int hash_src(__be32 *src)
110 {
111  unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
112 
113  h ^= h>>16;
114  h ^= h>>8;
115  h ^= h>>4;
116  return h & 0xF;
117 }
118 
119 static struct tcf_ext_map rsvp_ext_map = {
120  .police = TCA_RSVP_POLICE,
121  .action = TCA_RSVP_ACT
122 };
123 
124 #define RSVP_APPLY_RESULT() \
125 { \
126  int r = tcf_exts_exec(skb, &f->exts, res); \
127  if (r < 0) \
128  continue; \
129  else if (r > 0) \
130  return r; \
131 }
132 
133 static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
134  struct tcf_result *res)
135 {
136  struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
137  struct rsvp_session *s;
138  struct rsvp_filter *f;
139  unsigned int h1, h2;
140  __be32 *dst, *src;
141  u8 protocol;
142  u8 tunnelid = 0;
143  u8 *xprt;
144 #if RSVP_DST_LEN == 4
145  struct ipv6hdr *nhptr;
146 
147  if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
148  return -1;
149  nhptr = ipv6_hdr(skb);
150 #else
151  struct iphdr *nhptr;
152 
153  if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
154  return -1;
155  nhptr = ip_hdr(skb);
156 #endif
157 
158 restart:
159 
160 #if RSVP_DST_LEN == 4
161  src = &nhptr->saddr.s6_addr32[0];
162  dst = &nhptr->daddr.s6_addr32[0];
163  protocol = nhptr->nexthdr;
164  xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
165 #else
166  src = &nhptr->saddr;
167  dst = &nhptr->daddr;
168  protocol = nhptr->protocol;
169  xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
170  if (ip_is_fragment(nhptr))
171  return -1;
172 #endif
173 
174  h1 = hash_dst(dst, protocol, tunnelid);
175  h2 = hash_src(src);
176 
177  for (s = sht[h1]; s; s = s->next) {
178  if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
179  protocol == s->protocol &&
180  !(s->dpi.mask &
181  (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
182 #if RSVP_DST_LEN == 4
183  dst[0] == s->dst[0] &&
184  dst[1] == s->dst[1] &&
185  dst[2] == s->dst[2] &&
186 #endif
187  tunnelid == s->tunnelid) {
188 
189  for (f = s->ht[h2]; f; f = f->next) {
190  if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
191  !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
192 #if RSVP_DST_LEN == 4
193  &&
194  src[0] == f->src[0] &&
195  src[1] == f->src[1] &&
196  src[2] == f->src[2]
197 #endif
198  ) {
199  *res = f->res;
201 
202 matched:
203  if (f->tunnelhdr == 0)
204  return 0;
205 
206  tunnelid = f->res.classid;
207  nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
208  goto restart;
209  }
210  }
211 
212  /* And wildcard bucket... */
213  for (f = s->ht[16]; f; f = f->next) {
214  *res = f->res;
216  goto matched;
217  }
218  return -1;
219  }
220  }
221  return -1;
222 }
223 
224 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
225 {
226  struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
227  struct rsvp_session *s;
228  struct rsvp_filter *f;
229  unsigned int h1 = handle & 0xFF;
230  unsigned int h2 = (handle >> 8) & 0xFF;
231 
232  if (h2 > 16)
233  return 0;
234 
235  for (s = sht[h1]; s; s = s->next) {
236  for (f = s->ht[h2]; f; f = f->next) {
237  if (f->handle == handle)
238  return (unsigned long)f;
239  }
240  }
241  return 0;
242 }
243 
244 static void rsvp_put(struct tcf_proto *tp, unsigned long f)
245 {
246 }
247 
248 static int rsvp_init(struct tcf_proto *tp)
249 {
250  struct rsvp_head *data;
251 
252  data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
253  if (data) {
254  tp->root = data;
255  return 0;
256  }
257  return -ENOBUFS;
258 }
259 
260 static void
261 rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
262 {
263  tcf_unbind_filter(tp, &f->res);
264  tcf_exts_destroy(tp, &f->exts);
265  kfree(f);
266 }
267 
268 static void rsvp_destroy(struct tcf_proto *tp)
269 {
270  struct rsvp_head *data = xchg(&tp->root, NULL);
271  struct rsvp_session **sht;
272  int h1, h2;
273 
274  if (data == NULL)
275  return;
276 
277  sht = data->ht;
278 
279  for (h1 = 0; h1 < 256; h1++) {
280  struct rsvp_session *s;
281 
282  while ((s = sht[h1]) != NULL) {
283  sht[h1] = s->next;
284 
285  for (h2 = 0; h2 <= 16; h2++) {
286  struct rsvp_filter *f;
287 
288  while ((f = s->ht[h2]) != NULL) {
289  s->ht[h2] = f->next;
290  rsvp_delete_filter(tp, f);
291  }
292  }
293  kfree(s);
294  }
295  }
296  kfree(data);
297 }
298 
299 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
300 {
301  struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
302  unsigned int h = f->handle;
303  struct rsvp_session **sp;
304  struct rsvp_session *s = f->sess;
305  int i;
306 
307  for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
308  if (*fp == f) {
309  tcf_tree_lock(tp);
310  *fp = f->next;
311  tcf_tree_unlock(tp);
312  rsvp_delete_filter(tp, f);
313 
314  /* Strip tree */
315 
316  for (i = 0; i <= 16; i++)
317  if (s->ht[i])
318  return 0;
319 
320  /* OK, session has no flows */
321  for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
322  *sp; sp = &(*sp)->next) {
323  if (*sp == s) {
324  tcf_tree_lock(tp);
325  *sp = s->next;
326  tcf_tree_unlock(tp);
327 
328  kfree(s);
329  return 0;
330  }
331  }
332 
333  return 0;
334  }
335  }
336  return 0;
337 }
338 
339 static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
340 {
341  struct rsvp_head *data = tp->root;
342  int i = 0xFFFF;
343 
344  while (i-- > 0) {
345  u32 h;
346 
347  if ((data->hgenerator += 0x10000) == 0)
348  data->hgenerator = 0x10000;
349  h = data->hgenerator|salt;
350  if (rsvp_get(tp, h) == 0)
351  return h;
352  }
353  return 0;
354 }
355 
356 static int tunnel_bts(struct rsvp_head *data)
357 {
358  int n = data->tgenerator >> 5;
359  u32 b = 1 << (data->tgenerator & 0x1F);
360 
361  if (data->tmap[n] & b)
362  return 0;
363  data->tmap[n] |= b;
364  return 1;
365 }
366 
367 static void tunnel_recycle(struct rsvp_head *data)
368 {
369  struct rsvp_session **sht = data->ht;
370  u32 tmap[256/32];
371  int h1, h2;
372 
373  memset(tmap, 0, sizeof(tmap));
374 
375  for (h1 = 0; h1 < 256; h1++) {
376  struct rsvp_session *s;
377  for (s = sht[h1]; s; s = s->next) {
378  for (h2 = 0; h2 <= 16; h2++) {
379  struct rsvp_filter *f;
380 
381  for (f = s->ht[h2]; f; f = f->next) {
382  if (f->tunnelhdr == 0)
383  continue;
384  data->tgenerator = f->res.classid;
385  tunnel_bts(data);
386  }
387  }
388  }
389  }
390 
391  memcpy(data->tmap, tmap, sizeof(tmap));
392 }
393 
394 static u32 gen_tunnel(struct rsvp_head *data)
395 {
396  int i, k;
397 
398  for (k = 0; k < 2; k++) {
399  for (i = 255; i > 0; i--) {
400  if (++data->tgenerator == 0)
401  data->tgenerator = 1;
402  if (tunnel_bts(data))
403  return data->tgenerator;
404  }
405  tunnel_recycle(data);
406  }
407  return 0;
408 }
409 
410 static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
411  [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
412  [TCA_RSVP_DST] = { .type = NLA_BINARY,
413  .len = RSVP_DST_LEN * sizeof(u32) },
414  [TCA_RSVP_SRC] = { .type = NLA_BINARY,
415  .len = RSVP_DST_LEN * sizeof(u32) },
416  [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
417 };
418 
419 static int rsvp_change(struct sk_buff *in_skb,
420  struct tcf_proto *tp, unsigned long base,
421  u32 handle,
422  struct nlattr **tca,
423  unsigned long *arg)
424 {
425  struct rsvp_head *data = tp->root;
426  struct rsvp_filter *f, **fp;
427  struct rsvp_session *s, **sp;
428  struct tc_rsvp_pinfo *pinfo = NULL;
429  struct nlattr *opt = tca[TCA_OPTIONS];
430  struct nlattr *tb[TCA_RSVP_MAX + 1];
431  struct tcf_exts e;
432  unsigned int h1, h2;
433  __be32 *dst;
434  int err;
435 
436  if (opt == NULL)
437  return handle ? -EINVAL : 0;
438 
439  err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
440  if (err < 0)
441  return err;
442 
443  err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
444  if (err < 0)
445  return err;
446 
447  f = (struct rsvp_filter *)*arg;
448  if (f) {
449  /* Node exists: adjust only classid */
450 
451  if (f->handle != handle && handle)
452  goto errout2;
453  if (tb[TCA_RSVP_CLASSID]) {
454  f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
455  tcf_bind_filter(tp, &f->res, base);
456  }
457 
458  tcf_exts_change(tp, &f->exts, &e);
459  return 0;
460  }
461 
462  /* Now more serious part... */
463  err = -EINVAL;
464  if (handle)
465  goto errout2;
466  if (tb[TCA_RSVP_DST] == NULL)
467  goto errout2;
468 
469  err = -ENOBUFS;
470  f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
471  if (f == NULL)
472  goto errout2;
473 
474  h2 = 16;
475  if (tb[TCA_RSVP_SRC]) {
476  memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
477  h2 = hash_src(f->src);
478  }
479  if (tb[TCA_RSVP_PINFO]) {
480  pinfo = nla_data(tb[TCA_RSVP_PINFO]);
481  f->spi = pinfo->spi;
482  f->tunnelhdr = pinfo->tunnelhdr;
483  }
484  if (tb[TCA_RSVP_CLASSID])
485  f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
486 
487  dst = nla_data(tb[TCA_RSVP_DST]);
488  h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
489 
490  err = -ENOMEM;
491  if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
492  goto errout;
493 
494  if (f->tunnelhdr) {
495  err = -EINVAL;
496  if (f->res.classid > 255)
497  goto errout;
498 
499  err = -ENOMEM;
500  if (f->res.classid == 0 &&
501  (f->res.classid = gen_tunnel(data)) == 0)
502  goto errout;
503  }
504 
505  for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
506  if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
507  pinfo && pinfo->protocol == s->protocol &&
508  memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
509 #if RSVP_DST_LEN == 4
510  dst[0] == s->dst[0] &&
511  dst[1] == s->dst[1] &&
512  dst[2] == s->dst[2] &&
513 #endif
514  pinfo->tunnelid == s->tunnelid) {
515 
516 insert:
517  /* OK, we found appropriate session */
518 
519  fp = &s->ht[h2];
520 
521  f->sess = s;
522  if (f->tunnelhdr == 0)
523  tcf_bind_filter(tp, &f->res, base);
524 
525  tcf_exts_change(tp, &f->exts, &e);
526 
527  for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
528  if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
529  break;
530  f->next = *fp;
531  wmb();
532  *fp = f;
533 
534  *arg = (unsigned long)f;
535  return 0;
536  }
537  }
538 
539  /* No session found. Create new one. */
540 
541  err = -ENOBUFS;
542  s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
543  if (s == NULL)
544  goto errout;
545  memcpy(s->dst, dst, sizeof(s->dst));
546 
547  if (pinfo) {
548  s->dpi = pinfo->dpi;
549  s->protocol = pinfo->protocol;
550  s->tunnelid = pinfo->tunnelid;
551  }
552  for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
553  if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
554  break;
555  }
556  s->next = *sp;
557  wmb();
558  *sp = s;
559 
560  goto insert;
561 
562 errout:
563  kfree(f);
564 errout2:
565  tcf_exts_destroy(tp, &e);
566  return err;
567 }
568 
569 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
570 {
571  struct rsvp_head *head = tp->root;
572  unsigned int h, h1;
573 
574  if (arg->stop)
575  return;
576 
577  for (h = 0; h < 256; h++) {
578  struct rsvp_session *s;
579 
580  for (s = head->ht[h]; s; s = s->next) {
581  for (h1 = 0; h1 <= 16; h1++) {
582  struct rsvp_filter *f;
583 
584  for (f = s->ht[h1]; f; f = f->next) {
585  if (arg->count < arg->skip) {
586  arg->count++;
587  continue;
588  }
589  if (arg->fn(tp, (unsigned long)f, arg) < 0) {
590  arg->stop = 1;
591  return;
592  }
593  arg->count++;
594  }
595  }
596  }
597  }
598 }
599 
600 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
601  struct sk_buff *skb, struct tcmsg *t)
602 {
603  struct rsvp_filter *f = (struct rsvp_filter *)fh;
604  struct rsvp_session *s;
605  unsigned char *b = skb_tail_pointer(skb);
606  struct nlattr *nest;
607  struct tc_rsvp_pinfo pinfo;
608 
609  if (f == NULL)
610  return skb->len;
611  s = f->sess;
612 
613  t->tcm_handle = f->handle;
614 
615  nest = nla_nest_start(skb, TCA_OPTIONS);
616  if (nest == NULL)
617  goto nla_put_failure;
618 
619  if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
620  goto nla_put_failure;
621  pinfo.dpi = s->dpi;
622  pinfo.spi = f->spi;
623  pinfo.protocol = s->protocol;
624  pinfo.tunnelid = s->tunnelid;
625  pinfo.tunnelhdr = f->tunnelhdr;
626  pinfo.pad = 0;
627  if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
628  goto nla_put_failure;
629  if (f->res.classid &&
630  nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
631  goto nla_put_failure;
632  if (((f->handle >> 8) & 0xFF) != 16 &&
633  nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
634  goto nla_put_failure;
635 
636  if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
637  goto nla_put_failure;
638 
639  nla_nest_end(skb, nest);
640 
641  if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
642  goto nla_put_failure;
643  return skb->len;
644 
645 nla_put_failure:
646  nlmsg_trim(skb, b);
647  return -1;
648 }
649 
650 static struct tcf_proto_ops RSVP_OPS __read_mostly = {
651  .kind = RSVP_ID,
652  .classify = rsvp_classify,
653  .init = rsvp_init,
654  .destroy = rsvp_destroy,
655  .get = rsvp_get,
656  .put = rsvp_put,
657  .change = rsvp_change,
658  .delete = rsvp_delete,
659  .walk = rsvp_walk,
660  .dump = rsvp_dump,
661  .owner = THIS_MODULE,
662 };
663 
664 static int __init init_rsvp(void)
665 {
667 }
668 
669 static void __exit exit_rsvp(void)
670 {
672 }
673 
674 module_init(init_rsvp)
675 module_exit(exit_rsvp)