Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
fib_rules.c
Go to the documentation of this file.
1 /*
2  * net/core/fib_rules.c Generic Routing Rules
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation, version 2.
7  *
8  * Authors: Thomas Graf <[email protected]>
9  */
10 
11 #include <linux/types.h>
12 #include <linux/kernel.h>
13 #include <linux/slab.h>
14 #include <linux/list.h>
15 #include <linux/module.h>
16 #include <net/net_namespace.h>
17 #include <net/sock.h>
18 #include <net/fib_rules.h>
19 
21  u32 pref, u32 table, u32 flags)
22 {
23  struct fib_rule *r;
24 
25  r = kzalloc(ops->rule_size, GFP_KERNEL);
26  if (r == NULL)
27  return -ENOMEM;
28 
29  atomic_set(&r->refcnt, 1);
30  r->action = FR_ACT_TO_TBL;
31  r->pref = pref;
32  r->table = table;
33  r->flags = flags;
34  r->fr_net = hold_net(ops->fro_net);
35 
36  /* The lock is not required here, the list in unreacheable
37  * at the moment this function is called */
38  list_add_tail(&r->list, &ops->rules_list);
39  return 0;
40 }
42 
44 {
45  struct list_head *pos;
46  struct fib_rule *rule;
47 
48  if (!list_empty(&ops->rules_list)) {
49  pos = ops->rules_list.next;
50  if (pos->next != &ops->rules_list) {
51  rule = list_entry(pos->next, struct fib_rule, list);
52  if (rule->pref)
53  return rule->pref - 1;
54  }
55  }
56 
57  return 0;
58 }
60 
61 static void notify_rule_change(int event, struct fib_rule *rule,
62  struct fib_rules_ops *ops, struct nlmsghdr *nlh,
63  u32 pid);
64 
65 static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
66 {
67  struct fib_rules_ops *ops;
68 
69  rcu_read_lock();
70  list_for_each_entry_rcu(ops, &net->rules_ops, list) {
71  if (ops->family == family) {
72  if (!try_module_get(ops->owner))
73  ops = NULL;
74  rcu_read_unlock();
75  return ops;
76  }
77  }
78  rcu_read_unlock();
79 
80  return NULL;
81 }
82 
83 static void rules_ops_put(struct fib_rules_ops *ops)
84 {
85  if (ops)
86  module_put(ops->owner);
87 }
88 
89 static void flush_route_cache(struct fib_rules_ops *ops)
90 {
91  if (ops->flush_cache)
92  ops->flush_cache(ops);
93 }
94 
95 static int __fib_rules_register(struct fib_rules_ops *ops)
96 {
97  int err = -EEXIST;
98  struct fib_rules_ops *o;
99  struct net *net;
100 
101  net = ops->fro_net;
102 
103  if (ops->rule_size < sizeof(struct fib_rule))
104  return -EINVAL;
105 
106  if (ops->match == NULL || ops->configure == NULL ||
107  ops->compare == NULL || ops->fill == NULL ||
108  ops->action == NULL)
109  return -EINVAL;
110 
111  spin_lock(&net->rules_mod_lock);
113  if (ops->family == o->family)
114  goto errout;
115 
116  hold_net(net);
117  list_add_tail_rcu(&ops->list, &net->rules_ops);
118  err = 0;
119 errout:
120  spin_unlock(&net->rules_mod_lock);
121 
122  return err;
123 }
124 
127 {
128  struct fib_rules_ops *ops;
129  int err;
130 
131  ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
132  if (ops == NULL)
133  return ERR_PTR(-ENOMEM);
134 
135  INIT_LIST_HEAD(&ops->rules_list);
136  ops->fro_net = net;
137 
138  err = __fib_rules_register(ops);
139  if (err) {
140  kfree(ops);
141  ops = ERR_PTR(err);
142  }
143 
144  return ops;
145 }
147 
148 static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
149 {
150  struct fib_rule *rule, *tmp;
151 
152  list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
153  list_del_rcu(&rule->list);
154  if (ops->delete)
155  ops->delete(rule);
156  fib_rule_put(rule);
157  }
158 }
159 
160 static void fib_rules_put_rcu(struct rcu_head *head)
161 {
162  struct fib_rules_ops *ops = container_of(head, struct fib_rules_ops, rcu);
163  struct net *net = ops->fro_net;
164 
165  release_net(net);
166  kfree(ops);
167 }
168 
170 {
171  struct net *net = ops->fro_net;
172 
173  spin_lock(&net->rules_mod_lock);
174  list_del_rcu(&ops->list);
175  fib_rules_cleanup_ops(ops);
176  spin_unlock(&net->rules_mod_lock);
177 
178  call_rcu(&ops->rcu, fib_rules_put_rcu);
179 }
181 
182 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
183  struct flowi *fl, int flags)
184 {
185  int ret = 0;
186 
187  if (rule->iifindex && (rule->iifindex != fl->flowi_iif))
188  goto out;
189 
190  if (rule->oifindex && (rule->oifindex != fl->flowi_oif))
191  goto out;
192 
193  if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
194  goto out;
195 
196  ret = ops->match(rule, fl, flags);
197 out:
198  return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
199 }
200 
201 int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
202  int flags, struct fib_lookup_arg *arg)
203 {
204  struct fib_rule *rule;
205  int err;
206 
207  rcu_read_lock();
208 
209  list_for_each_entry_rcu(rule, &ops->rules_list, list) {
210 jumped:
211  if (!fib_rule_match(rule, ops, fl, flags))
212  continue;
213 
214  if (rule->action == FR_ACT_GOTO) {
215  struct fib_rule *target;
216 
217  target = rcu_dereference(rule->ctarget);
218  if (target == NULL) {
219  continue;
220  } else {
221  rule = target;
222  goto jumped;
223  }
224  } else if (rule->action == FR_ACT_NOP)
225  continue;
226  else
227  err = ops->action(rule, fl, flags, arg);
228 
229  if (err != -EAGAIN) {
230  if ((arg->flags & FIB_LOOKUP_NOREF) ||
231  likely(atomic_inc_not_zero(&rule->refcnt))) {
232  arg->rule = rule;
233  goto out;
234  }
235  break;
236  }
237  }
238 
239  err = -ESRCH;
240 out:
241  rcu_read_unlock();
242 
243  return err;
244 }
246 
247 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
248  struct fib_rules_ops *ops)
249 {
250  int err = -EINVAL;
251 
252  if (frh->src_len)
253  if (tb[FRA_SRC] == NULL ||
254  frh->src_len > (ops->addr_size * 8) ||
255  nla_len(tb[FRA_SRC]) != ops->addr_size)
256  goto errout;
257 
258  if (frh->dst_len)
259  if (tb[FRA_DST] == NULL ||
260  frh->dst_len > (ops->addr_size * 8) ||
261  nla_len(tb[FRA_DST]) != ops->addr_size)
262  goto errout;
263 
264  err = 0;
265 errout:
266  return err;
267 }
268 
269 static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
270 {
271  struct net *net = sock_net(skb->sk);
272  struct fib_rule_hdr *frh = nlmsg_data(nlh);
273  struct fib_rules_ops *ops = NULL;
274  struct fib_rule *rule, *r, *last = NULL;
275  struct nlattr *tb[FRA_MAX+1];
276  int err = -EINVAL, unresolved = 0;
277 
278  if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
279  goto errout;
280 
281  ops = lookup_rules_ops(net, frh->family);
282  if (ops == NULL) {
283  err = -EAFNOSUPPORT;
284  goto errout;
285  }
286 
287  err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
288  if (err < 0)
289  goto errout;
290 
291  err = validate_rulemsg(frh, tb, ops);
292  if (err < 0)
293  goto errout;
294 
295  rule = kzalloc(ops->rule_size, GFP_KERNEL);
296  if (rule == NULL) {
297  err = -ENOMEM;
298  goto errout;
299  }
300  rule->fr_net = hold_net(net);
301 
302  if (tb[FRA_PRIORITY])
303  rule->pref = nla_get_u32(tb[FRA_PRIORITY]);
304 
305  if (tb[FRA_IIFNAME]) {
306  struct net_device *dev;
307 
308  rule->iifindex = -1;
309  nla_strlcpy(rule->iifname, tb[FRA_IIFNAME], IFNAMSIZ);
310  dev = __dev_get_by_name(net, rule->iifname);
311  if (dev)
312  rule->iifindex = dev->ifindex;
313  }
314 
315  if (tb[FRA_OIFNAME]) {
316  struct net_device *dev;
317 
318  rule->oifindex = -1;
319  nla_strlcpy(rule->oifname, tb[FRA_OIFNAME], IFNAMSIZ);
320  dev = __dev_get_by_name(net, rule->oifname);
321  if (dev)
322  rule->oifindex = dev->ifindex;
323  }
324 
325  if (tb[FRA_FWMARK]) {
326  rule->mark = nla_get_u32(tb[FRA_FWMARK]);
327  if (rule->mark)
328  /* compatibility: if the mark value is non-zero all bits
329  * are compared unless a mask is explicitly specified.
330  */
331  rule->mark_mask = 0xFFFFFFFF;
332  }
333 
334  if (tb[FRA_FWMASK])
335  rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
336 
337  rule->action = frh->action;
338  rule->flags = frh->flags;
339  rule->table = frh_get_table(frh, tb);
340 
341  if (!tb[FRA_PRIORITY] && ops->default_pref)
342  rule->pref = ops->default_pref(ops);
343 
344  err = -EINVAL;
345  if (tb[FRA_GOTO]) {
346  if (rule->action != FR_ACT_GOTO)
347  goto errout_free;
348 
349  rule->target = nla_get_u32(tb[FRA_GOTO]);
350  /* Backward jumps are prohibited to avoid endless loops */
351  if (rule->target <= rule->pref)
352  goto errout_free;
353 
354  list_for_each_entry(r, &ops->rules_list, list) {
355  if (r->pref == rule->target) {
356  RCU_INIT_POINTER(rule->ctarget, r);
357  break;
358  }
359  }
360 
361  if (rcu_dereference_protected(rule->ctarget, 1) == NULL)
362  unresolved = 1;
363  } else if (rule->action == FR_ACT_GOTO)
364  goto errout_free;
365 
366  err = ops->configure(rule, skb, frh, tb);
367  if (err < 0)
368  goto errout_free;
369 
370  list_for_each_entry(r, &ops->rules_list, list) {
371  if (r->pref > rule->pref)
372  break;
373  last = r;
374  }
375 
376  fib_rule_get(rule);
377 
378  if (last)
379  list_add_rcu(&rule->list, &last->list);
380  else
381  list_add_rcu(&rule->list, &ops->rules_list);
382 
383  if (ops->unresolved_rules) {
384  /*
385  * There are unresolved goto rules in the list, check if
386  * any of them are pointing to this new rule.
387  */
388  list_for_each_entry(r, &ops->rules_list, list) {
389  if (r->action == FR_ACT_GOTO &&
390  r->target == rule->pref &&
391  rtnl_dereference(r->ctarget) == NULL) {
392  rcu_assign_pointer(r->ctarget, rule);
393  if (--ops->unresolved_rules == 0)
394  break;
395  }
396  }
397  }
398 
399  if (rule->action == FR_ACT_GOTO)
400  ops->nr_goto_rules++;
401 
402  if (unresolved)
403  ops->unresolved_rules++;
404 
405  notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
406  flush_route_cache(ops);
407  rules_ops_put(ops);
408  return 0;
409 
410 errout_free:
411  release_net(rule->fr_net);
412  kfree(rule);
413 errout:
414  rules_ops_put(ops);
415  return err;
416 }
417 
418 static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
419 {
420  struct net *net = sock_net(skb->sk);
421  struct fib_rule_hdr *frh = nlmsg_data(nlh);
422  struct fib_rules_ops *ops = NULL;
423  struct fib_rule *rule, *tmp;
424  struct nlattr *tb[FRA_MAX+1];
425  int err = -EINVAL;
426 
427  if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
428  goto errout;
429 
430  ops = lookup_rules_ops(net, frh->family);
431  if (ops == NULL) {
432  err = -EAFNOSUPPORT;
433  goto errout;
434  }
435 
436  err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy);
437  if (err < 0)
438  goto errout;
439 
440  err = validate_rulemsg(frh, tb, ops);
441  if (err < 0)
442  goto errout;
443 
444  list_for_each_entry(rule, &ops->rules_list, list) {
445  if (frh->action && (frh->action != rule->action))
446  continue;
447 
448  if (frh->table && (frh_get_table(frh, tb) != rule->table))
449  continue;
450 
451  if (tb[FRA_PRIORITY] &&
452  (rule->pref != nla_get_u32(tb[FRA_PRIORITY])))
453  continue;
454 
455  if (tb[FRA_IIFNAME] &&
456  nla_strcmp(tb[FRA_IIFNAME], rule->iifname))
457  continue;
458 
459  if (tb[FRA_OIFNAME] &&
460  nla_strcmp(tb[FRA_OIFNAME], rule->oifname))
461  continue;
462 
463  if (tb[FRA_FWMARK] &&
464  (rule->mark != nla_get_u32(tb[FRA_FWMARK])))
465  continue;
466 
467  if (tb[FRA_FWMASK] &&
468  (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
469  continue;
470 
471  if (!ops->compare(rule, frh, tb))
472  continue;
473 
474  if (rule->flags & FIB_RULE_PERMANENT) {
475  err = -EPERM;
476  goto errout;
477  }
478 
479  list_del_rcu(&rule->list);
480 
481  if (rule->action == FR_ACT_GOTO) {
482  ops->nr_goto_rules--;
483  if (rtnl_dereference(rule->ctarget) == NULL)
484  ops->unresolved_rules--;
485  }
486 
487  /*
488  * Check if this rule is a target to any of them. If so,
489  * disable them. As this operation is eventually very
490  * expensive, it is only performed if goto rules have
491  * actually been added.
492  */
493  if (ops->nr_goto_rules > 0) {
494  list_for_each_entry(tmp, &ops->rules_list, list) {
495  if (rtnl_dereference(tmp->ctarget) == rule) {
497  ops->unresolved_rules++;
498  }
499  }
500  }
501 
502  notify_rule_change(RTM_DELRULE, rule, ops, nlh,
503  NETLINK_CB(skb).portid);
504  if (ops->delete)
505  ops->delete(rule);
506  fib_rule_put(rule);
507  flush_route_cache(ops);
508  rules_ops_put(ops);
509  return 0;
510  }
511 
512  err = -ENOENT;
513 errout:
514  rules_ops_put(ops);
515  return err;
516 }
517 
518 static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
519  struct fib_rule *rule)
520 {
521  size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr))
522  + nla_total_size(IFNAMSIZ) /* FRA_IIFNAME */
523  + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
524  + nla_total_size(4) /* FRA_PRIORITY */
525  + nla_total_size(4) /* FRA_TABLE */
526  + nla_total_size(4) /* FRA_FWMARK */
527  + nla_total_size(4); /* FRA_FWMASK */
528 
529  if (ops->nlmsg_payload)
530  payload += ops->nlmsg_payload(rule);
531 
532  return payload;
533 }
534 
535 static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
536  u32 pid, u32 seq, int type, int flags,
537  struct fib_rules_ops *ops)
538 {
539  struct nlmsghdr *nlh;
540  struct fib_rule_hdr *frh;
541 
542  nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags);
543  if (nlh == NULL)
544  return -EMSGSIZE;
545 
546  frh = nlmsg_data(nlh);
547  frh->family = ops->family;
548  frh->table = rule->table;
549  if (nla_put_u32(skb, FRA_TABLE, rule->table))
550  goto nla_put_failure;
551  frh->res1 = 0;
552  frh->res2 = 0;
553  frh->action = rule->action;
554  frh->flags = rule->flags;
555 
556  if (rule->action == FR_ACT_GOTO &&
557  rcu_access_pointer(rule->ctarget) == NULL)
558  frh->flags |= FIB_RULE_UNRESOLVED;
559 
560  if (rule->iifname[0]) {
561  if (nla_put_string(skb, FRA_IIFNAME, rule->iifname))
562  goto nla_put_failure;
563  if (rule->iifindex == -1)
565  }
566 
567  if (rule->oifname[0]) {
568  if (nla_put_string(skb, FRA_OIFNAME, rule->oifname))
569  goto nla_put_failure;
570  if (rule->oifindex == -1)
572  }
573 
574  if ((rule->pref &&
575  nla_put_u32(skb, FRA_PRIORITY, rule->pref)) ||
576  (rule->mark &&
577  nla_put_u32(skb, FRA_FWMARK, rule->mark)) ||
578  ((rule->mark_mask || rule->mark) &&
579  nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) ||
580  (rule->target &&
581  nla_put_u32(skb, FRA_GOTO, rule->target)))
582  goto nla_put_failure;
583  if (ops->fill(rule, skb, frh) < 0)
584  goto nla_put_failure;
585 
586  return nlmsg_end(skb, nlh);
587 
588 nla_put_failure:
589  nlmsg_cancel(skb, nlh);
590  return -EMSGSIZE;
591 }
592 
593 static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
594  struct fib_rules_ops *ops)
595 {
596  int idx = 0;
597  struct fib_rule *rule;
598 
599  rcu_read_lock();
600  list_for_each_entry_rcu(rule, &ops->rules_list, list) {
601  if (idx < cb->args[1])
602  goto skip;
603 
604  if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
605  cb->nlh->nlmsg_seq, RTM_NEWRULE,
606  NLM_F_MULTI, ops) < 0)
607  break;
608 skip:
609  idx++;
610  }
611  rcu_read_unlock();
612  cb->args[1] = idx;
613  rules_ops_put(ops);
614 
615  return skb->len;
616 }
617 
618 static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
619 {
620  struct net *net = sock_net(skb->sk);
621  struct fib_rules_ops *ops;
622  int idx = 0, family;
623 
624  family = rtnl_msg_family(cb->nlh);
625  if (family != AF_UNSPEC) {
626  /* Protocol specific dump request */
627  ops = lookup_rules_ops(net, family);
628  if (ops == NULL)
629  return -EAFNOSUPPORT;
630 
631  return dump_rules(skb, cb, ops);
632  }
633 
634  rcu_read_lock();
635  list_for_each_entry_rcu(ops, &net->rules_ops, list) {
636  if (idx < cb->args[0] || !try_module_get(ops->owner))
637  goto skip;
638 
639  if (dump_rules(skb, cb, ops) < 0)
640  break;
641 
642  cb->args[1] = 0;
643 skip:
644  idx++;
645  }
646  rcu_read_unlock();
647  cb->args[0] = idx;
648 
649  return skb->len;
650 }
651 
652 static void notify_rule_change(int event, struct fib_rule *rule,
653  struct fib_rules_ops *ops, struct nlmsghdr *nlh,
654  u32 pid)
655 {
656  struct net *net;
657  struct sk_buff *skb;
658  int err = -ENOBUFS;
659 
660  net = ops->fro_net;
661  skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
662  if (skb == NULL)
663  goto errout;
664 
665  err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops);
666  if (err < 0) {
667  /* -EMSGSIZE implies BUG in fib_rule_nlmsg_size() */
668  WARN_ON(err == -EMSGSIZE);
669  kfree_skb(skb);
670  goto errout;
671  }
672 
673  rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
674  return;
675 errout:
676  if (err < 0)
677  rtnl_set_sk_err(net, ops->nlgroup, err);
678 }
679 
680 static void attach_rules(struct list_head *rules, struct net_device *dev)
681 {
682  struct fib_rule *rule;
683 
684  list_for_each_entry(rule, rules, list) {
685  if (rule->iifindex == -1 &&
686  strcmp(dev->name, rule->iifname) == 0)
687  rule->iifindex = dev->ifindex;
688  if (rule->oifindex == -1 &&
689  strcmp(dev->name, rule->oifname) == 0)
690  rule->oifindex = dev->ifindex;
691  }
692 }
693 
694 static void detach_rules(struct list_head *rules, struct net_device *dev)
695 {
696  struct fib_rule *rule;
697 
698  list_for_each_entry(rule, rules, list) {
699  if (rule->iifindex == dev->ifindex)
700  rule->iifindex = -1;
701  if (rule->oifindex == dev->ifindex)
702  rule->oifindex = -1;
703  }
704 }
705 
706 
707 static int fib_rules_event(struct notifier_block *this, unsigned long event,
708  void *ptr)
709 {
710  struct net_device *dev = ptr;
711  struct net *net = dev_net(dev);
712  struct fib_rules_ops *ops;
713 
714  ASSERT_RTNL();
715 
716  switch (event) {
717  case NETDEV_REGISTER:
718  list_for_each_entry(ops, &net->rules_ops, list)
719  attach_rules(&ops->rules_list, dev);
720  break;
721 
722  case NETDEV_UNREGISTER:
723  list_for_each_entry(ops, &net->rules_ops, list)
724  detach_rules(&ops->rules_list, dev);
725  break;
726  }
727 
728  return NOTIFY_DONE;
729 }
730 
731 static struct notifier_block fib_rules_notifier = {
732  .notifier_call = fib_rules_event,
733 };
734 
735 static int __net_init fib_rules_net_init(struct net *net)
736 {
737  INIT_LIST_HEAD(&net->rules_ops);
739  return 0;
740 }
741 
742 static struct pernet_operations fib_rules_net_ops = {
743  .init = fib_rules_net_init,
744 };
745 
746 static int __init fib_rules_init(void)
747 {
748  int err;
749  rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
750  rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
751  rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
752 
753  err = register_pernet_subsys(&fib_rules_net_ops);
754  if (err < 0)
755  goto fail;
756 
757  err = register_netdevice_notifier(&fib_rules_notifier);
758  if (err < 0)
759  goto fail_unregister;
760 
761  return 0;
762 
763 fail_unregister:
764  unregister_pernet_subsys(&fib_rules_net_ops);
765 fail:
769  return err;
770 }
771 
772 subsys_initcall(fib_rules_init);