Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
route.c
Go to the documentation of this file.
1 /*
2  * Linux INET6 implementation
3  * FIB front-end.
4  *
5  * Authors:
6  * Pedro Roque <[email protected]>
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version
11  * 2 of the License, or (at your option) any later version.
12  */
13 
14 /* Changes:
15  *
16  * YOSHIFUJI Hideaki @USAGI
17  * reworked default router selection.
18  * - respect outgoing interface
19  * - select from (probably) reachable routers (i.e.
20  * routers in REACHABLE, STALE, DELAY or PROBE states).
21  * - always select the same router if it is (probably)
22  * reachable. otherwise, round-robin the list.
23  * Ville Nuorvala
24  * Fixed routing subtrees.
25  */
26 
27 #define pr_fmt(fmt) "IPv6: " fmt
28 
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 
61 #include <asm/uaccess.h>
62 
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66 
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68  const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void ip6_dst_destroy(struct dst_entry *);
74 static void ip6_dst_ifdown(struct dst_entry *,
75  struct net_device *dev, int how);
76 static int ip6_dst_gc(struct dst_ops *ops);
77 
78 static int ip6_pkt_discard(struct sk_buff *skb);
79 static int ip6_pkt_discard_out(struct sk_buff *skb);
80 static void ip6_link_failure(struct sk_buff *skb);
81 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82  struct sk_buff *skb, u32 mtu);
83 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84  struct sk_buff *skb);
85 
86 #ifdef CONFIG_IPV6_ROUTE_INFO
87 static struct rt6_info *rt6_add_route_info(struct net *net,
88  const struct in6_addr *prefix, int prefixlen,
89  const struct in6_addr *gwaddr, int ifindex,
90  unsigned int pref);
91 static struct rt6_info *rt6_get_route_info(struct net *net,
92  const struct in6_addr *prefix, int prefixlen,
93  const struct in6_addr *gwaddr, int ifindex);
94 #endif
95 
96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97 {
98  struct rt6_info *rt = (struct rt6_info *) dst;
99  struct inet_peer *peer;
100  u32 *p = NULL;
101 
102  if (!(rt->dst.flags & DST_HOST))
103  return NULL;
104 
105  peer = rt6_get_peer_create(rt);
106  if (peer) {
107  u32 *old_p = __DST_METRICS_PTR(old);
108  unsigned long prev, new;
109 
110  p = peer->metrics;
111  if (inet_metrics_new(peer))
112  memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113 
114  new = (unsigned long) p;
115  prev = cmpxchg(&dst->_metrics, old, new);
116 
117  if (prev != old) {
118  p = __DST_METRICS_PTR(prev);
119  if (prev & DST_METRICS_READ_ONLY)
120  p = NULL;
121  }
122  }
123  return p;
124 }
125 
126 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127  struct sk_buff *skb,
128  const void *daddr)
129 {
130  struct in6_addr *p = &rt->rt6i_gateway;
131 
132  if (!ipv6_addr_any(p))
133  return (const void *) p;
134  else if (skb)
135  return &ipv6_hdr(skb)->daddr;
136  return daddr;
137 }
138 
139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140  struct sk_buff *skb,
141  const void *daddr)
142 {
143  struct rt6_info *rt = (struct rt6_info *) dst;
144  struct neighbour *n;
145 
146  daddr = choose_neigh_daddr(rt, skb, daddr);
147  n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148  if (n)
149  return n;
150  return neigh_create(&nd_tbl, daddr, dst->dev);
151 }
152 
153 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
154 {
155  struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156  if (!n) {
157  n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158  if (IS_ERR(n))
159  return PTR_ERR(n);
160  }
161  rt->n = n;
162 
163  return 0;
164 }
165 
166 static struct dst_ops ip6_dst_ops_template = {
167  .family = AF_INET6,
168  .protocol = cpu_to_be16(ETH_P_IPV6),
169  .gc = ip6_dst_gc,
170  .gc_thresh = 1024,
171  .check = ip6_dst_check,
172  .default_advmss = ip6_default_advmss,
173  .mtu = ip6_mtu,
174  .cow_metrics = ipv6_cow_metrics,
175  .destroy = ip6_dst_destroy,
176  .ifdown = ip6_dst_ifdown,
177  .negative_advice = ip6_negative_advice,
178  .link_failure = ip6_link_failure,
179  .update_pmtu = ip6_rt_update_pmtu,
180  .redirect = rt6_do_redirect,
181  .local_out = __ip6_local_out,
182  .neigh_lookup = ip6_neigh_lookup,
183 };
184 
185 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
186 {
187  unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188 
189  return mtu ? : dst->dev->mtu;
190 }
191 
192 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193  struct sk_buff *skb, u32 mtu)
194 {
195 }
196 
197 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198  struct sk_buff *skb)
199 {
200 }
201 
202 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203  unsigned long old)
204 {
205  return NULL;
206 }
207 
208 static struct dst_ops ip6_dst_blackhole_ops = {
209  .family = AF_INET6,
210  .protocol = cpu_to_be16(ETH_P_IPV6),
211  .destroy = ip6_dst_destroy,
212  .check = ip6_dst_check,
213  .mtu = ip6_blackhole_mtu,
214  .default_advmss = ip6_default_advmss,
215  .update_pmtu = ip6_rt_blackhole_update_pmtu,
216  .redirect = ip6_rt_blackhole_redirect,
217  .cow_metrics = ip6_rt_blackhole_cow_metrics,
218  .neigh_lookup = ip6_neigh_lookup,
219 };
220 
221 static const u32 ip6_template_metrics[RTAX_MAX] = {
222  [RTAX_HOPLIMIT - 1] = 0,
223 };
224 
225 static const struct rt6_info ip6_null_entry_template = {
226  .dst = {
227  .__refcnt = ATOMIC_INIT(1),
228  .__use = 1,
229  .obsolete = DST_OBSOLETE_FORCE_CHK,
230  .error = -ENETUNREACH,
231  .input = ip6_pkt_discard,
232  .output = ip6_pkt_discard_out,
233  },
234  .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
236  .rt6i_metric = ~(u32) 0,
237  .rt6i_ref = ATOMIC_INIT(1),
238 };
239 
240 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
241 
242 static int ip6_pkt_prohibit(struct sk_buff *skb);
243 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244 
245 static const struct rt6_info ip6_prohibit_entry_template = {
246  .dst = {
247  .__refcnt = ATOMIC_INIT(1),
248  .__use = 1,
249  .obsolete = DST_OBSOLETE_FORCE_CHK,
250  .error = -EACCES,
251  .input = ip6_pkt_prohibit,
252  .output = ip6_pkt_prohibit_out,
253  },
254  .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
256  .rt6i_metric = ~(u32) 0,
257  .rt6i_ref = ATOMIC_INIT(1),
258 };
259 
260 static const struct rt6_info ip6_blk_hole_entry_template = {
261  .dst = {
262  .__refcnt = ATOMIC_INIT(1),
263  .__use = 1,
264  .obsolete = DST_OBSOLETE_FORCE_CHK,
265  .error = -EINVAL,
266  .input = dst_discard,
267  .output = dst_discard,
268  },
269  .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
271  .rt6i_metric = ~(u32) 0,
272  .rt6i_ref = ATOMIC_INIT(1),
273 };
274 
275 #endif
276 
277 /* allocate dst with ip6_dst_ops */
278 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
279  struct net_device *dev,
280  int flags,
281  struct fib6_table *table)
282 {
283  struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284  0, DST_OBSOLETE_FORCE_CHK, flags);
285 
286  if (rt) {
287  struct dst_entry *dst = &rt->dst;
288 
289  memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290  rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291  rt->rt6i_genid = rt_genid(net);
292  }
293  return rt;
294 }
295 
296 static void ip6_dst_destroy(struct dst_entry *dst)
297 {
298  struct rt6_info *rt = (struct rt6_info *)dst;
299  struct inet6_dev *idev = rt->rt6i_idev;
300 
301  if (rt->n)
302  neigh_release(rt->n);
303 
304  if (!(rt->dst.flags & DST_HOST))
305  dst_destroy_metrics_generic(dst);
306 
307  if (idev) {
308  rt->rt6i_idev = NULL;
309  in6_dev_put(idev);
310  }
311 
312  if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
313  dst_release(dst->from);
314 
315  if (rt6_has_peer(rt)) {
316  struct inet_peer *peer = rt6_peer_ptr(rt);
317  inet_putpeer(peer);
318  }
319 }
320 
321 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
322 
323 static u32 rt6_peer_genid(void)
324 {
325  return atomic_read(&__rt6_peer_genid);
326 }
327 
328 void rt6_bind_peer(struct rt6_info *rt, int create)
329 {
330  struct inet_peer_base *base;
331  struct inet_peer *peer;
332 
333  base = inetpeer_base_ptr(rt->_rt6i_peer);
334  if (!base)
335  return;
336 
337  peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
338  if (peer) {
339  if (!rt6_set_peer(rt, peer))
340  inet_putpeer(peer);
341  else
342  rt->rt6i_peer_genid = rt6_peer_genid();
343  }
344 }
345 
346 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
347  int how)
348 {
349  struct rt6_info *rt = (struct rt6_info *)dst;
350  struct inet6_dev *idev = rt->rt6i_idev;
351  struct net_device *loopback_dev =
352  dev_net(dev)->loopback_dev;
353 
354  if (dev != loopback_dev) {
355  if (idev && idev->dev == dev) {
356  struct inet6_dev *loopback_idev =
357  in6_dev_get(loopback_dev);
358  if (loopback_idev) {
359  rt->rt6i_idev = loopback_idev;
360  in6_dev_put(idev);
361  }
362  }
363  if (rt->n && rt->n->dev == dev) {
364  rt->n->dev = loopback_dev;
365  dev_hold(loopback_dev);
366  dev_put(dev);
367  }
368  }
369 }
370 
371 static bool rt6_check_expired(const struct rt6_info *rt)
372 {
373  if (rt->rt6i_flags & RTF_EXPIRES) {
374  if (time_after(jiffies, rt->dst.expires))
375  return true;
376  } else if (rt->dst.from) {
377  return rt6_check_expired((struct rt6_info *) rt->dst.from);
378  }
379  return false;
380 }
381 
382 static bool rt6_need_strict(const struct in6_addr *daddr)
383 {
384  return ipv6_addr_type(daddr) &
386 }
387 
388 /*
389  * Route lookup. Any table->tb6_lock is implied.
390  */
391 
392 static inline struct rt6_info *rt6_device_match(struct net *net,
393  struct rt6_info *rt,
394  const struct in6_addr *saddr,
395  int oif,
396  int flags)
397 {
398  struct rt6_info *local = NULL;
399  struct rt6_info *sprt;
400 
401  if (!oif && ipv6_addr_any(saddr))
402  goto out;
403 
404  for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
405  struct net_device *dev = sprt->dst.dev;
406 
407  if (oif) {
408  if (dev->ifindex == oif)
409  return sprt;
410  if (dev->flags & IFF_LOOPBACK) {
411  if (!sprt->rt6i_idev ||
412  sprt->rt6i_idev->dev->ifindex != oif) {
413  if (flags & RT6_LOOKUP_F_IFACE && oif)
414  continue;
415  if (local && (!oif ||
416  local->rt6i_idev->dev->ifindex == oif))
417  continue;
418  }
419  local = sprt;
420  }
421  } else {
422  if (ipv6_chk_addr(net, saddr, dev,
423  flags & RT6_LOOKUP_F_IFACE))
424  return sprt;
425  }
426  }
427 
428  if (oif) {
429  if (local)
430  return local;
431 
432  if (flags & RT6_LOOKUP_F_IFACE)
433  return net->ipv6.ip6_null_entry;
434  }
435 out:
436  return rt;
437 }
438 
439 #ifdef CONFIG_IPV6_ROUTER_PREF
440 static void rt6_probe(struct rt6_info *rt)
441 {
442  struct neighbour *neigh;
443  /*
444  * Okay, this does not seem to be appropriate
445  * for now, however, we need to check if it
446  * is really so; aka Router Reachability Probing.
447  *
448  * Router Reachability Probe MUST be rate-limited
449  * to no more than one per minute.
450  */
451  neigh = rt ? rt->n : NULL;
452  if (!neigh || (neigh->nud_state & NUD_VALID))
453  return;
454  read_lock_bh(&neigh->lock);
455  if (!(neigh->nud_state & NUD_VALID) &&
456  time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
457  struct in6_addr mcaddr;
458  struct in6_addr *target;
459 
460  neigh->updated = jiffies;
461  read_unlock_bh(&neigh->lock);
462 
463  target = (struct in6_addr *)&neigh->primary_key;
464  addrconf_addr_solict_mult(target, &mcaddr);
465  ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
466  } else {
467  read_unlock_bh(&neigh->lock);
468  }
469 }
470 #else
471 static inline void rt6_probe(struct rt6_info *rt)
472 {
473 }
474 #endif
475 
476 /*
477  * Default Router Selection (RFC 2461 6.3.6)
478  */
479 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
480 {
481  struct net_device *dev = rt->dst.dev;
482  if (!oif || dev->ifindex == oif)
483  return 2;
484  if ((dev->flags & IFF_LOOPBACK) &&
485  rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
486  return 1;
487  return 0;
488 }
489 
490 static inline int rt6_check_neigh(struct rt6_info *rt)
491 {
492  struct neighbour *neigh;
493  int m;
494 
495  neigh = rt->n;
496  if (rt->rt6i_flags & RTF_NONEXTHOP ||
497  !(rt->rt6i_flags & RTF_GATEWAY))
498  m = 1;
499  else if (neigh) {
500  read_lock_bh(&neigh->lock);
501  if (neigh->nud_state & NUD_VALID)
502  m = 2;
503 #ifdef CONFIG_IPV6_ROUTER_PREF
504  else if (neigh->nud_state & NUD_FAILED)
505  m = 0;
506 #endif
507  else
508  m = 1;
509  read_unlock_bh(&neigh->lock);
510  } else
511  m = 0;
512  return m;
513 }
514 
515 static int rt6_score_route(struct rt6_info *rt, int oif,
516  int strict)
517 {
518  int m, n;
519 
520  m = rt6_check_dev(rt, oif);
521  if (!m && (strict & RT6_LOOKUP_F_IFACE))
522  return -1;
523 #ifdef CONFIG_IPV6_ROUTER_PREF
525 #endif
526  n = rt6_check_neigh(rt);
527  if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
528  return -1;
529  return m;
530 }
531 
532 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
533  int *mpri, struct rt6_info *match)
534 {
535  int m;
536 
537  if (rt6_check_expired(rt))
538  goto out;
539 
540  m = rt6_score_route(rt, oif, strict);
541  if (m < 0)
542  goto out;
543 
544  if (m > *mpri) {
545  if (strict & RT6_LOOKUP_F_REACHABLE)
546  rt6_probe(match);
547  *mpri = m;
548  match = rt;
549  } else if (strict & RT6_LOOKUP_F_REACHABLE) {
550  rt6_probe(rt);
551  }
552 
553 out:
554  return match;
555 }
556 
557 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
558  struct rt6_info *rr_head,
559  u32 metric, int oif, int strict)
560 {
561  struct rt6_info *rt, *match;
562  int mpri = -1;
563 
564  match = NULL;
565  for (rt = rr_head; rt && rt->rt6i_metric == metric;
566  rt = rt->dst.rt6_next)
567  match = find_match(rt, oif, strict, &mpri, match);
568  for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
569  rt = rt->dst.rt6_next)
570  match = find_match(rt, oif, strict, &mpri, match);
571 
572  return match;
573 }
574 
575 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
576 {
577  struct rt6_info *match, *rt0;
578  struct net *net;
579 
580  rt0 = fn->rr_ptr;
581  if (!rt0)
582  fn->rr_ptr = rt0 = fn->leaf;
583 
584  match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
585 
586  if (!match &&
587  (strict & RT6_LOOKUP_F_REACHABLE)) {
588  struct rt6_info *next = rt0->dst.rt6_next;
589 
590  /* no entries matched; do round-robin */
591  if (!next || next->rt6i_metric != rt0->rt6i_metric)
592  next = fn->leaf;
593 
594  if (next != rt0)
595  fn->rr_ptr = next;
596  }
597 
598  net = dev_net(rt0->dst.dev);
599  return match ? match : net->ipv6.ip6_null_entry;
600 }
601 
602 #ifdef CONFIG_IPV6_ROUTE_INFO
603 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
604  const struct in6_addr *gwaddr)
605 {
606  struct net *net = dev_net(dev);
607  struct route_info *rinfo = (struct route_info *) opt;
608  struct in6_addr prefix_buf, *prefix;
609  unsigned int pref;
610  unsigned long lifetime;
611  struct rt6_info *rt;
612 
613  if (len < sizeof(struct route_info)) {
614  return -EINVAL;
615  }
616 
617  /* Sanity check for prefix_len and length */
618  if (rinfo->length > 3) {
619  return -EINVAL;
620  } else if (rinfo->prefix_len > 128) {
621  return -EINVAL;
622  } else if (rinfo->prefix_len > 64) {
623  if (rinfo->length < 2) {
624  return -EINVAL;
625  }
626  } else if (rinfo->prefix_len > 0) {
627  if (rinfo->length < 1) {
628  return -EINVAL;
629  }
630  }
631 
632  pref = rinfo->route_pref;
633  if (pref == ICMPV6_ROUTER_PREF_INVALID)
634  return -EINVAL;
635 
636  lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
637 
638  if (rinfo->length == 3)
639  prefix = (struct in6_addr *)rinfo->prefix;
640  else {
641  /* this function is safe */
642  ipv6_addr_prefix(&prefix_buf,
643  (struct in6_addr *)rinfo->prefix,
644  rinfo->prefix_len);
645  prefix = &prefix_buf;
646  }
647 
648  rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
649  dev->ifindex);
650 
651  if (rt && !lifetime) {
652  ip6_del_rt(rt);
653  rt = NULL;
654  }
655 
656  if (!rt && lifetime)
657  rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
658  pref);
659  else if (rt)
660  rt->rt6i_flags = RTF_ROUTEINFO |
661  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
662 
663  if (rt) {
664  if (!addrconf_finite_timeout(lifetime))
665  rt6_clean_expires(rt);
666  else
667  rt6_set_expires(rt, jiffies + HZ * lifetime);
668 
669  dst_release(&rt->dst);
670  }
671  return 0;
672 }
673 #endif
674 
675 #define BACKTRACK(__net, saddr) \
676 do { \
677  if (rt == __net->ipv6.ip6_null_entry) { \
678  struct fib6_node *pn; \
679  while (1) { \
680  if (fn->fn_flags & RTN_TL_ROOT) \
681  goto out; \
682  pn = fn->parent; \
683  if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
684  fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
685  else \
686  fn = pn; \
687  if (fn->fn_flags & RTN_RTINFO) \
688  goto restart; \
689  } \
690  } \
691 } while (0)
692 
693 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
694  struct fib6_table *table,
695  struct flowi6 *fl6, int flags)
696 {
697  struct fib6_node *fn;
698  struct rt6_info *rt;
699 
700  read_lock_bh(&table->tb6_lock);
701  fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
702 restart:
703  rt = fn->leaf;
704  rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
705  BACKTRACK(net, &fl6->saddr);
706 out:
707  dst_use(&rt->dst, jiffies);
708  read_unlock_bh(&table->tb6_lock);
709  return rt;
710 
711 }
712 
713 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
714  int flags)
715 {
716  return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
717 }
719 
720 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
721  const struct in6_addr *saddr, int oif, int strict)
722 {
723  struct flowi6 fl6 = {
724  .flowi6_oif = oif,
725  .daddr = *daddr,
726  };
727  struct dst_entry *dst;
728  int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
729 
730  if (saddr) {
731  memcpy(&fl6.saddr, saddr, sizeof(*saddr));
732  flags |= RT6_LOOKUP_F_HAS_SADDR;
733  }
734 
735  dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
736  if (dst->error == 0)
737  return (struct rt6_info *) dst;
738 
739  dst_release(dst);
740 
741  return NULL;
742 }
743 
745 
746 /* ip6_ins_rt is called with FREE table->tb6_lock.
747  It takes new route entry, the addition fails by any reason the
748  route is freed. In any case, if caller does not hold it, it may
749  be destroyed.
750  */
751 
752 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
753 {
754  int err;
755  struct fib6_table *table;
756 
757  table = rt->rt6i_table;
758  write_lock_bh(&table->tb6_lock);
759  err = fib6_add(&table->tb6_root, rt, info);
760  write_unlock_bh(&table->tb6_lock);
761 
762  return err;
763 }
764 
765 int ip6_ins_rt(struct rt6_info *rt)
766 {
767  struct nl_info info = {
768  .nl_net = dev_net(rt->dst.dev),
769  };
770  return __ip6_ins_rt(rt, &info);
771 }
772 
773 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
774  const struct in6_addr *daddr,
775  const struct in6_addr *saddr)
776 {
777  struct rt6_info *rt;
778 
779  /*
780  * Clone the route.
781  */
782 
783  rt = ip6_rt_copy(ort, daddr);
784 
785  if (rt) {
786  int attempts = !in_softirq();
787 
788  if (!(rt->rt6i_flags & RTF_GATEWAY)) {
789  if (ort->rt6i_dst.plen != 128 &&
790  ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
791  rt->rt6i_flags |= RTF_ANYCAST;
792  rt->rt6i_gateway = *daddr;
793  }
794 
795  rt->rt6i_flags |= RTF_CACHE;
796 
797 #ifdef CONFIG_IPV6_SUBTREES
798  if (rt->rt6i_src.plen && saddr) {
799  rt->rt6i_src.addr = *saddr;
800  rt->rt6i_src.plen = 128;
801  }
802 #endif
803 
804  retry:
805  if (rt6_bind_neighbour(rt, rt->dst.dev)) {
806  struct net *net = dev_net(rt->dst.dev);
807  int saved_rt_min_interval =
808  net->ipv6.sysctl.ip6_rt_gc_min_interval;
809  int saved_rt_elasticity =
810  net->ipv6.sysctl.ip6_rt_gc_elasticity;
811 
812  if (attempts-- > 0) {
813  net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
814  net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
815 
816  ip6_dst_gc(&net->ipv6.ip6_dst_ops);
817 
818  net->ipv6.sysctl.ip6_rt_gc_elasticity =
819  saved_rt_elasticity;
820  net->ipv6.sysctl.ip6_rt_gc_min_interval =
821  saved_rt_min_interval;
822  goto retry;
823  }
824 
825  net_warn_ratelimited("Neighbour table overflow\n");
826  dst_free(&rt->dst);
827  return NULL;
828  }
829  }
830 
831  return rt;
832 }
833 
834 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
835  const struct in6_addr *daddr)
836 {
837  struct rt6_info *rt = ip6_rt_copy(ort, daddr);
838 
839  if (rt) {
840  rt->rt6i_flags |= RTF_CACHE;
841  rt->n = neigh_clone(ort->n);
842  }
843  return rt;
844 }
845 
846 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
847  struct flowi6 *fl6, int flags)
848 {
849  struct fib6_node *fn;
850  struct rt6_info *rt, *nrt;
851  int strict = 0;
852  int attempts = 3;
853  int err;
854  int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
855 
856  strict |= flags & RT6_LOOKUP_F_IFACE;
857 
858 relookup:
859  read_lock_bh(&table->tb6_lock);
860 
861 restart_2:
862  fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
863 
864 restart:
865  rt = rt6_select(fn, oif, strict | reachable);
866 
867  BACKTRACK(net, &fl6->saddr);
868  if (rt == net->ipv6.ip6_null_entry ||
869  rt->rt6i_flags & RTF_CACHE)
870  goto out;
871 
872  dst_hold(&rt->dst);
873  read_unlock_bh(&table->tb6_lock);
874 
875  if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
876  nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
877  else if (!(rt->dst.flags & DST_HOST))
878  nrt = rt6_alloc_clone(rt, &fl6->daddr);
879  else
880  goto out2;
881 
882  dst_release(&rt->dst);
883  rt = nrt ? : net->ipv6.ip6_null_entry;
884 
885  dst_hold(&rt->dst);
886  if (nrt) {
887  err = ip6_ins_rt(nrt);
888  if (!err)
889  goto out2;
890  }
891 
892  if (--attempts <= 0)
893  goto out2;
894 
895  /*
896  * Race condition! In the gap, when table->tb6_lock was
897  * released someone could insert this route. Relookup.
898  */
899  dst_release(&rt->dst);
900  goto relookup;
901 
902 out:
903  if (reachable) {
904  reachable = 0;
905  goto restart_2;
906  }
907  dst_hold(&rt->dst);
908  read_unlock_bh(&table->tb6_lock);
909 out2:
910  rt->dst.lastuse = jiffies;
911  rt->dst.__use++;
912 
913  return rt;
914 }
915 
916 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
917  struct flowi6 *fl6, int flags)
918 {
919  return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
920 }
921 
922 static struct dst_entry *ip6_route_input_lookup(struct net *net,
923  struct net_device *dev,
924  struct flowi6 *fl6, int flags)
925 {
926  if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
927  flags |= RT6_LOOKUP_F_IFACE;
928 
929  return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
930 }
931 
932 void ip6_route_input(struct sk_buff *skb)
933 {
934  const struct ipv6hdr *iph = ipv6_hdr(skb);
935  struct net *net = dev_net(skb->dev);
936  int flags = RT6_LOOKUP_F_HAS_SADDR;
937  struct flowi6 fl6 = {
938  .flowi6_iif = skb->dev->ifindex,
939  .daddr = iph->daddr,
940  .saddr = iph->saddr,
941  .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
942  .flowi6_mark = skb->mark,
943  .flowi6_proto = iph->nexthdr,
944  };
945 
946  skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
947 }
948 
949 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
950  struct flowi6 *fl6, int flags)
951 {
952  return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
953 }
954 
955 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
956  struct flowi6 *fl6)
957 {
958  int flags = 0;
959 
960  fl6->flowi6_iif = LOOPBACK_IFINDEX;
961 
962  if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
963  flags |= RT6_LOOKUP_F_IFACE;
964 
965  if (!ipv6_addr_any(&fl6->saddr))
966  flags |= RT6_LOOKUP_F_HAS_SADDR;
967  else if (sk)
968  flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
969 
970  return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
971 }
972 
974 
975 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
976 {
977  struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
978  struct dst_entry *new = NULL;
979 
980  rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
981  if (rt) {
982  new = &rt->dst;
983 
984  memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
985  rt6_init_peer(rt, net->ipv6.peers);
986 
987  new->__use = 1;
988  new->input = dst_discard;
989  new->output = dst_discard;
990 
991  if (dst_metrics_read_only(&ort->dst))
992  new->_metrics = ort->dst._metrics;
993  else
994  dst_copy_metrics(new, &ort->dst);
995  rt->rt6i_idev = ort->rt6i_idev;
996  if (rt->rt6i_idev)
997  in6_dev_hold(rt->rt6i_idev);
998 
999  rt->rt6i_gateway = ort->rt6i_gateway;
1000  rt->rt6i_flags = ort->rt6i_flags;
1001  rt6_clean_expires(rt);
1002  rt->rt6i_metric = 0;
1003 
1004  memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1005 #ifdef CONFIG_IPV6_SUBTREES
1006  memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1007 #endif
1008 
1009  dst_free(new);
1010  }
1011 
1012  dst_release(dst_orig);
1013  return new ? new : ERR_PTR(-ENOMEM);
1014 }
1015 
1016 /*
1017  * Destination cache support functions
1018  */
1019 
1020 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1021 {
1022  struct rt6_info *rt;
1023 
1024  rt = (struct rt6_info *) dst;
1025 
1026  /* All IPV6 dsts are created with ->obsolete set to the value
1027  * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1028  * into this function always.
1029  */
1030  if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1031  return NULL;
1032 
1033  if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1034  if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1035  if (!rt6_has_peer(rt))
1036  rt6_bind_peer(rt, 0);
1037  rt->rt6i_peer_genid = rt6_peer_genid();
1038  }
1039  return dst;
1040  }
1041  return NULL;
1042 }
1043 
1044 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1045 {
1046  struct rt6_info *rt = (struct rt6_info *) dst;
1047 
1048  if (rt) {
1049  if (rt->rt6i_flags & RTF_CACHE) {
1050  if (rt6_check_expired(rt)) {
1051  ip6_del_rt(rt);
1052  dst = NULL;
1053  }
1054  } else {
1055  dst_release(dst);
1056  dst = NULL;
1057  }
1058  }
1059  return dst;
1060 }
1061 
1062 static void ip6_link_failure(struct sk_buff *skb)
1063 {
1064  struct rt6_info *rt;
1065 
1067 
1068  rt = (struct rt6_info *) skb_dst(skb);
1069  if (rt) {
1070  if (rt->rt6i_flags & RTF_CACHE)
1071  rt6_update_expires(rt, 0);
1072  else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1073  rt->rt6i_node->fn_sernum = -1;
1074  }
1075 }
1076 
1077 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1078  struct sk_buff *skb, u32 mtu)
1079 {
1080  struct rt6_info *rt6 = (struct rt6_info*)dst;
1081 
1082  dst_confirm(dst);
1083  if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1084  struct net *net = dev_net(dst->dev);
1085 
1086  rt6->rt6i_flags |= RTF_MODIFIED;
1087  if (mtu < IPV6_MIN_MTU) {
1088  u32 features = dst_metric(dst, RTAX_FEATURES);
1089  mtu = IPV6_MIN_MTU;
1090  features |= RTAX_FEATURE_ALLFRAG;
1091  dst_metric_set(dst, RTAX_FEATURES, features);
1092  }
1093  dst_metric_set(dst, RTAX_MTU, mtu);
1094  rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1095  }
1096 }
1097 
1098 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1099  int oif, u32 mark)
1100 {
1101  const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1102  struct dst_entry *dst;
1103  struct flowi6 fl6;
1104 
1105  memset(&fl6, 0, sizeof(fl6));
1106  fl6.flowi6_oif = oif;
1107  fl6.flowi6_mark = mark;
1108  fl6.flowi6_flags = 0;
1109  fl6.daddr = iph->daddr;
1110  fl6.saddr = iph->saddr;
1111  fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1112 
1113  dst = ip6_route_output(net, NULL, &fl6);
1114  if (!dst->error)
1115  ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1116  dst_release(dst);
1117 }
1119 
1120 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1121 {
1122  ip6_update_pmtu(skb, sock_net(sk), mtu,
1123  sk->sk_bound_dev_if, sk->sk_mark);
1124 }
1126 
1127 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1128 {
1129  const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1130  struct dst_entry *dst;
1131  struct flowi6 fl6;
1132 
1133  memset(&fl6, 0, sizeof(fl6));
1134  fl6.flowi6_oif = oif;
1135  fl6.flowi6_mark = mark;
1136  fl6.flowi6_flags = 0;
1137  fl6.daddr = iph->daddr;
1138  fl6.saddr = iph->saddr;
1139  fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1140 
1141  dst = ip6_route_output(net, NULL, &fl6);
1142  if (!dst->error)
1143  rt6_do_redirect(dst, NULL, skb);
1144  dst_release(dst);
1145 }
1147 
1148 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1149 {
1150  ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1151 }
1153 
1154 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1155 {
1156  struct net_device *dev = dst->dev;
1157  unsigned int mtu = dst_mtu(dst);
1158  struct net *net = dev_net(dev);
1159 
1160  mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1161 
1162  if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1163  mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1164 
1165  /*
1166  * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1167  * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1168  * IPV6_MAXPLEN is also valid and means: "any MSS,
1169  * rely only on pmtu discovery"
1170  */
1171  if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1172  mtu = IPV6_MAXPLEN;
1173  return mtu;
1174 }
1175 
1176 static unsigned int ip6_mtu(const struct dst_entry *dst)
1177 {
1178  struct inet6_dev *idev;
1179  unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1180 
1181  if (mtu)
1182  return mtu;
1183 
1184  mtu = IPV6_MIN_MTU;
1185 
1186  rcu_read_lock();
1187  idev = __in6_dev_get(dst->dev);
1188  if (idev)
1189  mtu = idev->cnf.mtu6;
1190  rcu_read_unlock();
1191 
1192  return mtu;
1193 }
1194 
1195 static struct dst_entry *icmp6_dst_gc_list;
1196 static DEFINE_SPINLOCK(icmp6_dst_lock);
1197 
1199  struct neighbour *neigh,
1200  struct flowi6 *fl6)
1201 {
1202  struct dst_entry *dst;
1203  struct rt6_info *rt;
1204  struct inet6_dev *idev = in6_dev_get(dev);
1205  struct net *net = dev_net(dev);
1206 
1207  if (unlikely(!idev))
1208  return ERR_PTR(-ENODEV);
1209 
1210  rt = ip6_dst_alloc(net, dev, 0, NULL);
1211  if (unlikely(!rt)) {
1212  in6_dev_put(idev);
1213  dst = ERR_PTR(-ENOMEM);
1214  goto out;
1215  }
1216 
1217  if (neigh)
1218  neigh_hold(neigh);
1219  else {
1220  neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1221  if (IS_ERR(neigh)) {
1222  in6_dev_put(idev);
1223  dst_free(&rt->dst);
1224  return ERR_CAST(neigh);
1225  }
1226  }
1227 
1228  rt->dst.flags |= DST_HOST;
1229  rt->dst.output = ip6_output;
1230  rt->n = neigh;
1231  atomic_set(&rt->dst.__refcnt, 1);
1232  rt->rt6i_dst.addr = fl6->daddr;
1233  rt->rt6i_dst.plen = 128;
1234  rt->rt6i_idev = idev;
1235  dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1236 
1237  spin_lock_bh(&icmp6_dst_lock);
1238  rt->dst.next = icmp6_dst_gc_list;
1239  icmp6_dst_gc_list = &rt->dst;
1240  spin_unlock_bh(&icmp6_dst_lock);
1241 
1242  fib6_force_start_gc(net);
1243 
1244  dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1245 
1246 out:
1247  return dst;
1248 }
1249 
1250 int icmp6_dst_gc(void)
1251 {
1252  struct dst_entry *dst, **pprev;
1253  int more = 0;
1254 
1255  spin_lock_bh(&icmp6_dst_lock);
1256  pprev = &icmp6_dst_gc_list;
1257 
1258  while ((dst = *pprev) != NULL) {
1259  if (!atomic_read(&dst->__refcnt)) {
1260  *pprev = dst->next;
1261  dst_free(dst);
1262  } else {
1263  pprev = &dst->next;
1264  ++more;
1265  }
1266  }
1267 
1268  spin_unlock_bh(&icmp6_dst_lock);
1269 
1270  return more;
1271 }
1272 
1273 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1274  void *arg)
1275 {
1276  struct dst_entry *dst, **pprev;
1277 
1278  spin_lock_bh(&icmp6_dst_lock);
1279  pprev = &icmp6_dst_gc_list;
1280  while ((dst = *pprev) != NULL) {
1281  struct rt6_info *rt = (struct rt6_info *) dst;
1282  if (func(rt, arg)) {
1283  *pprev = dst->next;
1284  dst_free(dst);
1285  } else {
1286  pprev = &dst->next;
1287  }
1288  }
1289  spin_unlock_bh(&icmp6_dst_lock);
1290 }
1291 
1292 static int ip6_dst_gc(struct dst_ops *ops)
1293 {
1294  unsigned long now = jiffies;
1295  struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1296  int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1297  int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1298  int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1299  int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1300  unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1301  int entries;
1302 
1303  entries = dst_entries_get_fast(ops);
1304  if (time_after(rt_last_gc + rt_min_interval, now) &&
1305  entries <= rt_max_size)
1306  goto out;
1307 
1308  net->ipv6.ip6_rt_gc_expire++;
1309  fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1310  net->ipv6.ip6_rt_last_gc = now;
1311  entries = dst_entries_get_slow(ops);
1312  if (entries < ops->gc_thresh)
1313  net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1314 out:
1315  net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1316  return entries > rt_max_size;
1317 }
1318 
1319 /* Clean host part of a prefix. Not necessary in radix tree,
1320  but results in cleaner routing tables.
1321 
1322  Remove it only when all the things will work!
1323  */
1324 
1325 int ip6_dst_hoplimit(struct dst_entry *dst)
1326 {
1327  int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1328  if (hoplimit == 0) {
1329  struct net_device *dev = dst->dev;
1330  struct inet6_dev *idev;
1331 
1332  rcu_read_lock();
1333  idev = __in6_dev_get(dev);
1334  if (idev)
1335  hoplimit = idev->cnf.hop_limit;
1336  else
1337  hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1338  rcu_read_unlock();
1339  }
1340  return hoplimit;
1341 }
1343 
1344 /*
1345  *
1346  */
1347 
1349 {
1350  int err;
1351  struct net *net = cfg->fc_nlinfo.nl_net;
1352  struct rt6_info *rt = NULL;
1353  struct net_device *dev = NULL;
1354  struct inet6_dev *idev = NULL;
1355  struct fib6_table *table;
1356  int addr_type;
1357 
1358  if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1359  return -EINVAL;
1360 #ifndef CONFIG_IPV6_SUBTREES
1361  if (cfg->fc_src_len)
1362  return -EINVAL;
1363 #endif
1364  if (cfg->fc_ifindex) {
1365  err = -ENODEV;
1366  dev = dev_get_by_index(net, cfg->fc_ifindex);
1367  if (!dev)
1368  goto out;
1369  idev = in6_dev_get(dev);
1370  if (!idev)
1371  goto out;
1372  }
1373 
1374  if (cfg->fc_metric == 0)
1375  cfg->fc_metric = IP6_RT_PRIO_USER;
1376 
1377  err = -ENOBUFS;
1378  if (cfg->fc_nlinfo.nlh &&
1379  !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1380  table = fib6_get_table(net, cfg->fc_table);
1381  if (!table) {
1382  pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1383  table = fib6_new_table(net, cfg->fc_table);
1384  }
1385  } else {
1386  table = fib6_new_table(net, cfg->fc_table);
1387  }
1388 
1389  if (!table)
1390  goto out;
1391 
1392  rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1393 
1394  if (!rt) {
1395  err = -ENOMEM;
1396  goto out;
1397  }
1398 
1399  if (cfg->fc_flags & RTF_EXPIRES)
1400  rt6_set_expires(rt, jiffies +
1402  else
1403  rt6_clean_expires(rt);
1404 
1405  if (cfg->fc_protocol == RTPROT_UNSPEC)
1406  cfg->fc_protocol = RTPROT_BOOT;
1407  rt->rt6i_protocol = cfg->fc_protocol;
1408 
1409  addr_type = ipv6_addr_type(&cfg->fc_dst);
1410 
1411  if (addr_type & IPV6_ADDR_MULTICAST)
1412  rt->dst.input = ip6_mc_input;
1413  else if (cfg->fc_flags & RTF_LOCAL)
1414  rt->dst.input = ip6_input;
1415  else
1416  rt->dst.input = ip6_forward;
1417 
1418  rt->dst.output = ip6_output;
1419 
1420  ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1421  rt->rt6i_dst.plen = cfg->fc_dst_len;
1422  if (rt->rt6i_dst.plen == 128)
1423  rt->dst.flags |= DST_HOST;
1424 
1425  if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1426  u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1427  if (!metrics) {
1428  err = -ENOMEM;
1429  goto out;
1430  }
1431  dst_init_metrics(&rt->dst, metrics, 0);
1432  }
1433 #ifdef CONFIG_IPV6_SUBTREES
1434  ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1435  rt->rt6i_src.plen = cfg->fc_src_len;
1436 #endif
1437 
1438  rt->rt6i_metric = cfg->fc_metric;
1439 
1440  /* We cannot add true routes via loopback here,
1441  they would result in kernel looping; promote them to reject routes
1442  */
1443  if ((cfg->fc_flags & RTF_REJECT) ||
1444  (dev && (dev->flags & IFF_LOOPBACK) &&
1445  !(addr_type & IPV6_ADDR_LOOPBACK) &&
1446  !(cfg->fc_flags & RTF_LOCAL))) {
1447  /* hold loopback dev/idev if we haven't done so. */
1448  if (dev != net->loopback_dev) {
1449  if (dev) {
1450  dev_put(dev);
1451  in6_dev_put(idev);
1452  }
1453  dev = net->loopback_dev;
1454  dev_hold(dev);
1455  idev = in6_dev_get(dev);
1456  if (!idev) {
1457  err = -ENODEV;
1458  goto out;
1459  }
1460  }
1461  rt->dst.output = ip6_pkt_discard_out;
1462  rt->dst.input = ip6_pkt_discard;
1464  switch (cfg->fc_type) {
1465  case RTN_BLACKHOLE:
1466  rt->dst.error = -EINVAL;
1467  break;
1468  case RTN_PROHIBIT:
1469  rt->dst.error = -EACCES;
1470  break;
1471  case RTN_THROW:
1472  rt->dst.error = -EAGAIN;
1473  break;
1474  default:
1475  rt->dst.error = -ENETUNREACH;
1476  break;
1477  }
1478  goto install_route;
1479  }
1480 
1481  if (cfg->fc_flags & RTF_GATEWAY) {
1482  const struct in6_addr *gw_addr;
1483  int gwa_type;
1484 
1485  gw_addr = &cfg->fc_gateway;
1486  rt->rt6i_gateway = *gw_addr;
1487  gwa_type = ipv6_addr_type(gw_addr);
1488 
1489  if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1490  struct rt6_info *grt;
1491 
1492  /* IPv6 strictly inhibits using not link-local
1493  addresses as nexthop address.
1494  Otherwise, router will not able to send redirects.
1495  It is very good, but in some (rare!) circumstances
1496  (SIT, PtP, NBMA NOARP links) it is handy to allow
1497  some exceptions. --ANK
1498  */
1499  err = -EINVAL;
1500  if (!(gwa_type & IPV6_ADDR_UNICAST))
1501  goto out;
1502 
1503  grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1504 
1505  err = -EHOSTUNREACH;
1506  if (!grt)
1507  goto out;
1508  if (dev) {
1509  if (dev != grt->dst.dev) {
1510  dst_release(&grt->dst);
1511  goto out;
1512  }
1513  } else {
1514  dev = grt->dst.dev;
1515  idev = grt->rt6i_idev;
1516  dev_hold(dev);
1517  in6_dev_hold(grt->rt6i_idev);
1518  }
1519  if (!(grt->rt6i_flags & RTF_GATEWAY))
1520  err = 0;
1521  dst_release(&grt->dst);
1522 
1523  if (err)
1524  goto out;
1525  }
1526  err = -EINVAL;
1527  if (!dev || (dev->flags & IFF_LOOPBACK))
1528  goto out;
1529  }
1530 
1531  err = -ENODEV;
1532  if (!dev)
1533  goto out;
1534 
1535  if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1536  if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1537  err = -EINVAL;
1538  goto out;
1539  }
1540  rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1541  rt->rt6i_prefsrc.plen = 128;
1542  } else
1543  rt->rt6i_prefsrc.plen = 0;
1544 
1545  if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1546  err = rt6_bind_neighbour(rt, dev);
1547  if (err)
1548  goto out;
1549  }
1550 
1551  rt->rt6i_flags = cfg->fc_flags;
1552 
1553 install_route:
1554  if (cfg->fc_mx) {
1555  struct nlattr *nla;
1556  int remaining;
1557 
1558  nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1559  int type = nla_type(nla);
1560 
1561  if (type) {
1562  if (type > RTAX_MAX) {
1563  err = -EINVAL;
1564  goto out;
1565  }
1566 
1567  dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1568  }
1569  }
1570  }
1571 
1572  rt->dst.dev = dev;
1573  rt->rt6i_idev = idev;
1574  rt->rt6i_table = table;
1575 
1576  cfg->fc_nlinfo.nl_net = dev_net(dev);
1577 
1578  return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1579 
1580 out:
1581  if (dev)
1582  dev_put(dev);
1583  if (idev)
1584  in6_dev_put(idev);
1585  if (rt)
1586  dst_free(&rt->dst);
1587  return err;
1588 }
1589 
1590 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1591 {
1592  int err;
1593  struct fib6_table *table;
1594  struct net *net = dev_net(rt->dst.dev);
1595 
1596  if (rt == net->ipv6.ip6_null_entry) {
1597  err = -ENOENT;
1598  goto out;
1599  }
1600 
1601  table = rt->rt6i_table;
1602  write_lock_bh(&table->tb6_lock);
1603  err = fib6_del(rt, info);
1604  write_unlock_bh(&table->tb6_lock);
1605 
1606 out:
1607  dst_release(&rt->dst);
1608  return err;
1609 }
1610 
1611 int ip6_del_rt(struct rt6_info *rt)
1612 {
1613  struct nl_info info = {
1614  .nl_net = dev_net(rt->dst.dev),
1615  };
1616  return __ip6_del_rt(rt, &info);
1617 }
1618 
1619 static int ip6_route_del(struct fib6_config *cfg)
1620 {
1621  struct fib6_table *table;
1622  struct fib6_node *fn;
1623  struct rt6_info *rt;
1624  int err = -ESRCH;
1625 
1626  table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1627  if (!table)
1628  return err;
1629 
1630  read_lock_bh(&table->tb6_lock);
1631 
1632  fn = fib6_locate(&table->tb6_root,
1633  &cfg->fc_dst, cfg->fc_dst_len,
1634  &cfg->fc_src, cfg->fc_src_len);
1635 
1636  if (fn) {
1637  for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1638  if (cfg->fc_ifindex &&
1639  (!rt->dst.dev ||
1640  rt->dst.dev->ifindex != cfg->fc_ifindex))
1641  continue;
1642  if (cfg->fc_flags & RTF_GATEWAY &&
1643  !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1644  continue;
1645  if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1646  continue;
1647  dst_hold(&rt->dst);
1648  read_unlock_bh(&table->tb6_lock);
1649 
1650  return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1651  }
1652  }
1653  read_unlock_bh(&table->tb6_lock);
1654 
1655  return err;
1656 }
1657 
1658 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1659 {
1660  struct net *net = dev_net(skb->dev);
1661  struct netevent_redirect netevent;
1662  struct rt6_info *rt, *nrt = NULL;
1663  const struct in6_addr *target;
1664  struct ndisc_options ndopts;
1665  const struct in6_addr *dest;
1666  struct neighbour *old_neigh;
1667  struct inet6_dev *in6_dev;
1668  struct neighbour *neigh;
1669  struct icmp6hdr *icmph;
1670  int optlen, on_link;
1671  u8 *lladdr;
1672 
1673  optlen = skb->tail - skb->transport_header;
1674  optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1675 
1676  if (optlen < 0) {
1677  net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1678  return;
1679  }
1680 
1681  icmph = icmp6_hdr(skb);
1682  target = (const struct in6_addr *) (icmph + 1);
1683  dest = target + 1;
1684 
1685  if (ipv6_addr_is_multicast(dest)) {
1686  net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1687  return;
1688  }
1689 
1690  on_link = 0;
1691  if (ipv6_addr_equal(dest, target)) {
1692  on_link = 1;
1693  } else if (ipv6_addr_type(target) !=
1695  net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1696  return;
1697  }
1698 
1699  in6_dev = __in6_dev_get(skb->dev);
1700  if (!in6_dev)
1701  return;
1702  if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1703  return;
1704 
1705  /* RFC2461 8.1:
1706  * The IP source address of the Redirect MUST be the same as the current
1707  * first-hop router for the specified ICMP Destination Address.
1708  */
1709 
1710  if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1711  net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1712  return;
1713  }
1714 
1715  lladdr = NULL;
1716  if (ndopts.nd_opts_tgt_lladdr) {
1717  lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1718  skb->dev);
1719  if (!lladdr) {
1720  net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1721  return;
1722  }
1723  }
1724 
1725  rt = (struct rt6_info *) dst;
1726  if (rt == net->ipv6.ip6_null_entry) {
1727  net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1728  return;
1729  }
1730 
1731  /* Redirect received -> path was valid.
1732  * Look, redirects are sent only in response to data packets,
1733  * so that this nexthop apparently is reachable. --ANK
1734  */
1735  dst_confirm(&rt->dst);
1736 
1737  neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1738  if (!neigh)
1739  return;
1740 
1741  /* Duplicate redirect: silently ignore. */
1742  old_neigh = rt->n;
1743  if (neigh == old_neigh)
1744  goto out;
1745 
1746  /*
1747  * We have finally decided to accept it.
1748  */
1749 
1750  neigh_update(neigh, lladdr, NUD_STALE,
1753  (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1755  );
1756 
1757  nrt = ip6_rt_copy(rt, dest);
1758  if (!nrt)
1759  goto out;
1760 
1762  if (on_link)
1763  nrt->rt6i_flags &= ~RTF_GATEWAY;
1764 
1765  nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1766  nrt->n = neigh_clone(neigh);
1767 
1768  if (ip6_ins_rt(nrt))
1769  goto out;
1770 
1771  netevent.old = &rt->dst;
1772  netevent.old_neigh = old_neigh;
1773  netevent.new = &nrt->dst;
1774  netevent.new_neigh = neigh;
1775  netevent.daddr = dest;
1777 
1778  if (rt->rt6i_flags & RTF_CACHE) {
1779  rt = (struct rt6_info *) dst_clone(&rt->dst);
1780  ip6_del_rt(rt);
1781  }
1782 
1783 out:
1784  neigh_release(neigh);
1785 }
1786 
1787 /*
1788  * Misc support functions
1789  */
1790 
1791 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1792  const struct in6_addr *dest)
1793 {
1794  struct net *net = dev_net(ort->dst.dev);
1795  struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1796  ort->rt6i_table);
1797 
1798  if (rt) {
1799  rt->dst.input = ort->dst.input;
1800  rt->dst.output = ort->dst.output;
1801  rt->dst.flags |= DST_HOST;
1802 
1803  rt->rt6i_dst.addr = *dest;
1804  rt->rt6i_dst.plen = 128;
1805  dst_copy_metrics(&rt->dst, &ort->dst);
1806  rt->dst.error = ort->dst.error;
1807  rt->rt6i_idev = ort->rt6i_idev;
1808  if (rt->rt6i_idev)
1809  in6_dev_hold(rt->rt6i_idev);
1810  rt->dst.lastuse = jiffies;
1811 
1812  rt->rt6i_gateway = ort->rt6i_gateway;
1813  rt->rt6i_flags = ort->rt6i_flags;
1814  if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1816  rt6_set_from(rt, ort);
1817  else
1818  rt6_clean_expires(rt);
1819  rt->rt6i_metric = 0;
1820 
1821 #ifdef CONFIG_IPV6_SUBTREES
1822  memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1823 #endif
1824  memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1825  rt->rt6i_table = ort->rt6i_table;
1826  }
1827  return rt;
1828 }
1829 
1830 #ifdef CONFIG_IPV6_ROUTE_INFO
1831 static struct rt6_info *rt6_get_route_info(struct net *net,
1832  const struct in6_addr *prefix, int prefixlen,
1833  const struct in6_addr *gwaddr, int ifindex)
1834 {
1835  struct fib6_node *fn;
1836  struct rt6_info *rt = NULL;
1837  struct fib6_table *table;
1838 
1839  table = fib6_get_table(net, RT6_TABLE_INFO);
1840  if (!table)
1841  return NULL;
1842 
1843  read_lock_bh(&table->tb6_lock);
1844  fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1845  if (!fn)
1846  goto out;
1847 
1848  for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1849  if (rt->dst.dev->ifindex != ifindex)
1850  continue;
1852  continue;
1853  if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1854  continue;
1855  dst_hold(&rt->dst);
1856  break;
1857  }
1858 out:
1859  read_unlock_bh(&table->tb6_lock);
1860  return rt;
1861 }
1862 
1863 static struct rt6_info *rt6_add_route_info(struct net *net,
1864  const struct in6_addr *prefix, int prefixlen,
1865  const struct in6_addr *gwaddr, int ifindex,
1866  unsigned int pref)
1867 {
1868  struct fib6_config cfg = {
1870  .fc_metric = IP6_RT_PRIO_USER,
1871  .fc_ifindex = ifindex,
1872  .fc_dst_len = prefixlen,
1873  .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1874  RTF_UP | RTF_PREF(pref),
1875  .fc_nlinfo.portid = 0,
1876  .fc_nlinfo.nlh = NULL,
1877  .fc_nlinfo.nl_net = net,
1878  };
1879 
1880  cfg.fc_dst = *prefix;
1881  cfg.fc_gateway = *gwaddr;
1882 
1883  /* We should treat it as a default route if prefix length is 0. */
1884  if (!prefixlen)
1885  cfg.fc_flags |= RTF_DEFAULT;
1886 
1887  ip6_route_add(&cfg);
1888 
1889  return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1890 }
1891 #endif
1892 
1893 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1894 {
1895  struct rt6_info *rt;
1896  struct fib6_table *table;
1897 
1898  table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1899  if (!table)
1900  return NULL;
1901 
1902  read_lock_bh(&table->tb6_lock);
1903  for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1904  if (dev == rt->dst.dev &&
1906  ipv6_addr_equal(&rt->rt6i_gateway, addr))
1907  break;
1908  }
1909  if (rt)
1910  dst_hold(&rt->dst);
1911  read_unlock_bh(&table->tb6_lock);
1912  return rt;
1913 }
1914 
1915 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1916  struct net_device *dev,
1917  unsigned int pref)
1918 {
1919  struct fib6_config cfg = {
1921  .fc_metric = IP6_RT_PRIO_USER,
1922  .fc_ifindex = dev->ifindex,
1923  .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1924  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1925  .fc_nlinfo.portid = 0,
1926  .fc_nlinfo.nlh = NULL,
1927  .fc_nlinfo.nl_net = dev_net(dev),
1928  };
1929 
1930  cfg.fc_gateway = *gwaddr;
1931 
1932  ip6_route_add(&cfg);
1933 
1934  return rt6_get_dflt_router(gwaddr, dev);
1935 }
1936 
1937 void rt6_purge_dflt_routers(struct net *net)
1938 {
1939  struct rt6_info *rt;
1940  struct fib6_table *table;
1941 
1942  /* NOTE: Keep consistent with rt6_get_dflt_router */
1943  table = fib6_get_table(net, RT6_TABLE_DFLT);
1944  if (!table)
1945  return;
1946 
1947 restart:
1948  read_lock_bh(&table->tb6_lock);
1949  for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1950  if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1951  dst_hold(&rt->dst);
1952  read_unlock_bh(&table->tb6_lock);
1953  ip6_del_rt(rt);
1954  goto restart;
1955  }
1956  }
1957  read_unlock_bh(&table->tb6_lock);
1958 }
1959 
1960 static void rtmsg_to_fib6_config(struct net *net,
1961  struct in6_rtmsg *rtmsg,
1962  struct fib6_config *cfg)
1963 {
1964  memset(cfg, 0, sizeof(*cfg));
1965 
1966  cfg->fc_table = RT6_TABLE_MAIN;
1967  cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1968  cfg->fc_metric = rtmsg->rtmsg_metric;
1969  cfg->fc_expires = rtmsg->rtmsg_info;
1970  cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1971  cfg->fc_src_len = rtmsg->rtmsg_src_len;
1972  cfg->fc_flags = rtmsg->rtmsg_flags;
1973 
1974  cfg->fc_nlinfo.nl_net = net;
1975 
1976  cfg->fc_dst = rtmsg->rtmsg_dst;
1977  cfg->fc_src = rtmsg->rtmsg_src;
1978  cfg->fc_gateway = rtmsg->rtmsg_gateway;
1979 }
1980 
1981 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1982 {
1983  struct fib6_config cfg;
1984  struct in6_rtmsg rtmsg;
1985  int err;
1986 
1987  switch(cmd) {
1988  case SIOCADDRT: /* Add a route */
1989  case SIOCDELRT: /* Delete a route */
1990  if (!capable(CAP_NET_ADMIN))
1991  return -EPERM;
1992  err = copy_from_user(&rtmsg, arg,
1993  sizeof(struct in6_rtmsg));
1994  if (err)
1995  return -EFAULT;
1996 
1997  rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1998 
1999  rtnl_lock();
2000  switch (cmd) {
2001  case SIOCADDRT:
2002  err = ip6_route_add(&cfg);
2003  break;
2004  case SIOCDELRT:
2005  err = ip6_route_del(&cfg);
2006  break;
2007  default:
2008  err = -EINVAL;
2009  }
2010  rtnl_unlock();
2011 
2012  return err;
2013  }
2014 
2015  return -EINVAL;
2016 }
2017 
2018 /*
2019  * Drop the packet on the floor
2020  */
2021 
2022 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2023 {
2024  int type;
2025  struct dst_entry *dst = skb_dst(skb);
2026  switch (ipstats_mib_noroutes) {
2028  type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2029  if (type == IPV6_ADDR_ANY) {
2030  IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2032  break;
2033  }
2034  /* FALLTHROUGH */
2036  IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2037  ipstats_mib_noroutes);
2038  break;
2039  }
2040  icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2041  kfree_skb(skb);
2042  return 0;
2043 }
2044 
2045 static int ip6_pkt_discard(struct sk_buff *skb)
2046 {
2047  return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2048 }
2049 
2050 static int ip6_pkt_discard_out(struct sk_buff *skb)
2051 {
2052  skb->dev = skb_dst(skb)->dev;
2053  return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2054 }
2055 
2056 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2057 
2058 static int ip6_pkt_prohibit(struct sk_buff *skb)
2059 {
2060  return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2061 }
2062 
2063 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2064 {
2065  skb->dev = skb_dst(skb)->dev;
2066  return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2067 }
2068 
2069 #endif
2070 
2071 /*
2072  * Allocate a dst for local (unicast / anycast) address.
2073  */
2074 
2076  const struct in6_addr *addr,
2077  bool anycast)
2078 {
2079  struct net *net = dev_net(idev->dev);
2080  struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2081  int err;
2082 
2083  if (!rt) {
2084  net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2085  return ERR_PTR(-ENOMEM);
2086  }
2087 
2088  in6_dev_hold(idev);
2089 
2090  rt->dst.flags |= DST_HOST;
2091  rt->dst.input = ip6_input;
2092  rt->dst.output = ip6_output;
2093  rt->rt6i_idev = idev;
2094 
2096  if (anycast)
2097  rt->rt6i_flags |= RTF_ANYCAST;
2098  else
2099  rt->rt6i_flags |= RTF_LOCAL;
2100  err = rt6_bind_neighbour(rt, rt->dst.dev);
2101  if (err) {
2102  dst_free(&rt->dst);
2103  return ERR_PTR(err);
2104  }
2105 
2106  rt->rt6i_dst.addr = *addr;
2107  rt->rt6i_dst.plen = 128;
2109 
2110  atomic_set(&rt->dst.__refcnt, 1);
2111 
2112  return rt;
2113 }
2114 
2115 int ip6_route_get_saddr(struct net *net,
2116  struct rt6_info *rt,
2117  const struct in6_addr *daddr,
2118  unsigned int prefs,
2119  struct in6_addr *saddr)
2120 {
2121  struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2122  int err = 0;
2123  if (rt->rt6i_prefsrc.plen)
2124  *saddr = rt->rt6i_prefsrc.addr;
2125  else
2126  err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2127  daddr, prefs, saddr);
2128  return err;
2129 }
2130 
2131 /* remove deleted ip from prefsrc entries */
2133  struct net_device *dev;
2134  struct net *net;
2135  struct in6_addr *addr;
2136 };
2137 
2138 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2139 {
2140  struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2141  struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2142  struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2143 
2144  if (((void *)rt->dst.dev == dev || !dev) &&
2145  rt != net->ipv6.ip6_null_entry &&
2146  ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2147  /* remove prefsrc entry */
2148  rt->rt6i_prefsrc.plen = 0;
2149  }
2150  return 0;
2151 }
2152 
2154 {
2155  struct net *net = dev_net(ifp->idev->dev);
2156  struct arg_dev_net_ip adni = {
2157  .dev = ifp->idev->dev,
2158  .net = net,
2159  .addr = &ifp->addr,
2160  };
2161  fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2162 }
2163 
2164 struct arg_dev_net {
2165  struct net_device *dev;
2166  struct net *net;
2167 };
2168 
2169 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2170 {
2171  const struct arg_dev_net *adn = arg;
2172  const struct net_device *dev = adn->dev;
2173 
2174  if ((rt->dst.dev == dev || !dev) &&
2175  rt != adn->net->ipv6.ip6_null_entry)
2176  return -1;
2177 
2178  return 0;
2179 }
2180 
2181 void rt6_ifdown(struct net *net, struct net_device *dev)
2182 {
2183  struct arg_dev_net adn = {
2184  .dev = dev,
2185  .net = net,
2186  };
2187 
2188  fib6_clean_all(net, fib6_ifdown, 0, &adn);
2189  icmp6_clean_all(fib6_ifdown, &adn);
2190 }
2191 
2193  struct net_device *dev;
2194  unsigned int mtu;
2195 };
2196 
2197 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2198 {
2199  struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2200  struct inet6_dev *idev;
2201 
2202  /* In IPv6 pmtu discovery is not optional,
2203  so that RTAX_MTU lock cannot disable it.
2204  We still use this lock to block changes
2205  caused by addrconf/ndisc.
2206  */
2207 
2208  idev = __in6_dev_get(arg->dev);
2209  if (!idev)
2210  return 0;
2211 
2212  /* For administrative MTU increase, there is no way to discover
2213  IPv6 PMTU increase, so PMTU increase should be updated here.
2214  Since RFC 1981 doesn't include administrative MTU increase
2215  update PMTU increase is a MUST. (i.e. jumbo frame)
2216  */
2217  /*
2218  If new MTU is less than route PMTU, this new MTU will be the
2219  lowest MTU in the path, update the route PMTU to reflect PMTU
2220  decreases; if new MTU is greater than route PMTU, and the
2221  old MTU is the lowest MTU in the path, update the route PMTU
2222  to reflect the increase. In this case if the other nodes' MTU
2223  also have the lowest MTU, TOO BIG MESSAGE will be lead to
2224  PMTU discouvery.
2225  */
2226  if (rt->dst.dev == arg->dev &&
2227  !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2228  (dst_mtu(&rt->dst) >= arg->mtu ||
2229  (dst_mtu(&rt->dst) < arg->mtu &&
2230  dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2231  dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2232  }
2233  return 0;
2234 }
2235 
2236 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2237 {
2238  struct rt6_mtu_change_arg arg = {
2239  .dev = dev,
2240  .mtu = mtu,
2241  };
2242 
2243  fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2244 }
2245 
2246 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2247  [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2248  [RTA_OIF] = { .type = NLA_U32 },
2249  [RTA_IIF] = { .type = NLA_U32 },
2250  [RTA_PRIORITY] = { .type = NLA_U32 },
2251  [RTA_METRICS] = { .type = NLA_NESTED },
2252 };
2253 
2254 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2255  struct fib6_config *cfg)
2256 {
2257  struct rtmsg *rtm;
2258  struct nlattr *tb[RTA_MAX+1];
2259  int err;
2260 
2261  err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2262  if (err < 0)
2263  goto errout;
2264 
2265  err = -EINVAL;
2266  rtm = nlmsg_data(nlh);
2267  memset(cfg, 0, sizeof(*cfg));
2268 
2269  cfg->fc_table = rtm->rtm_table;
2270  cfg->fc_dst_len = rtm->rtm_dst_len;
2271  cfg->fc_src_len = rtm->rtm_src_len;
2272  cfg->fc_flags = RTF_UP;
2273  cfg->fc_protocol = rtm->rtm_protocol;
2274  cfg->fc_type = rtm->rtm_type;
2275 
2276  if (rtm->rtm_type == RTN_UNREACHABLE ||
2277  rtm->rtm_type == RTN_BLACKHOLE ||
2278  rtm->rtm_type == RTN_PROHIBIT ||
2279  rtm->rtm_type == RTN_THROW)
2280  cfg->fc_flags |= RTF_REJECT;
2281 
2282  if (rtm->rtm_type == RTN_LOCAL)
2283  cfg->fc_flags |= RTF_LOCAL;
2284 
2285  cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2286  cfg->fc_nlinfo.nlh = nlh;
2287  cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2288 
2289  if (tb[RTA_GATEWAY]) {
2290  nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2291  cfg->fc_flags |= RTF_GATEWAY;
2292  }
2293 
2294  if (tb[RTA_DST]) {
2295  int plen = (rtm->rtm_dst_len + 7) >> 3;
2296 
2297  if (nla_len(tb[RTA_DST]) < plen)
2298  goto errout;
2299 
2300  nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2301  }
2302 
2303  if (tb[RTA_SRC]) {
2304  int plen = (rtm->rtm_src_len + 7) >> 3;
2305 
2306  if (nla_len(tb[RTA_SRC]) < plen)
2307  goto errout;
2308 
2309  nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2310  }
2311 
2312  if (tb[RTA_PREFSRC])
2313  nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2314 
2315  if (tb[RTA_OIF])
2316  cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2317 
2318  if (tb[RTA_PRIORITY])
2319  cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2320 
2321  if (tb[RTA_METRICS]) {
2322  cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2323  cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2324  }
2325 
2326  if (tb[RTA_TABLE])
2327  cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2328 
2329  err = 0;
2330 errout:
2331  return err;
2332 }
2333 
2334 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2335 {
2336  struct fib6_config cfg;
2337  int err;
2338 
2339  err = rtm_to_fib6_config(skb, nlh, &cfg);
2340  if (err < 0)
2341  return err;
2342 
2343  return ip6_route_del(&cfg);
2344 }
2345 
2346 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2347 {
2348  struct fib6_config cfg;
2349  int err;
2350 
2351  err = rtm_to_fib6_config(skb, nlh, &cfg);
2352  if (err < 0)
2353  return err;
2354 
2355  return ip6_route_add(&cfg);
2356 }
2357 
2358 static inline size_t rt6_nlmsg_size(void)
2359 {
2360  return NLMSG_ALIGN(sizeof(struct rtmsg))
2361  + nla_total_size(16) /* RTA_SRC */
2362  + nla_total_size(16) /* RTA_DST */
2363  + nla_total_size(16) /* RTA_GATEWAY */
2364  + nla_total_size(16) /* RTA_PREFSRC */
2365  + nla_total_size(4) /* RTA_TABLE */
2366  + nla_total_size(4) /* RTA_IIF */
2367  + nla_total_size(4) /* RTA_OIF */
2368  + nla_total_size(4) /* RTA_PRIORITY */
2369  + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2370  + nla_total_size(sizeof(struct rta_cacheinfo));
2371 }
2372 
2373 static int rt6_fill_node(struct net *net,
2374  struct sk_buff *skb, struct rt6_info *rt,
2375  struct in6_addr *dst, struct in6_addr *src,
2376  int iif, int type, u32 portid, u32 seq,
2377  int prefix, int nowait, unsigned int flags)
2378 {
2379  struct rtmsg *rtm;
2380  struct nlmsghdr *nlh;
2381  long expires;
2382  u32 table;
2383  struct neighbour *n;
2384 
2385  if (prefix) { /* user wants prefix routes only */
2386  if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2387  /* success since this is not a prefix route */
2388  return 1;
2389  }
2390  }
2391 
2392  nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2393  if (!nlh)
2394  return -EMSGSIZE;
2395 
2396  rtm = nlmsg_data(nlh);
2397  rtm->rtm_family = AF_INET6;
2398  rtm->rtm_dst_len = rt->rt6i_dst.plen;
2399  rtm->rtm_src_len = rt->rt6i_src.plen;
2400  rtm->rtm_tos = 0;
2401  if (rt->rt6i_table)
2402  table = rt->rt6i_table->tb6_id;
2403  else
2404  table = RT6_TABLE_UNSPEC;
2405  rtm->rtm_table = table;
2406  if (nla_put_u32(skb, RTA_TABLE, table))
2407  goto nla_put_failure;
2408  if (rt->rt6i_flags & RTF_REJECT) {
2409  switch (rt->dst.error) {
2410  case -EINVAL:
2411  rtm->rtm_type = RTN_BLACKHOLE;
2412  break;
2413  case -EACCES:
2414  rtm->rtm_type = RTN_PROHIBIT;
2415  break;
2416  case -EAGAIN:
2417  rtm->rtm_type = RTN_THROW;
2418  break;
2419  default:
2420  rtm->rtm_type = RTN_UNREACHABLE;
2421  break;
2422  }
2423  }
2424  else if (rt->rt6i_flags & RTF_LOCAL)
2425  rtm->rtm_type = RTN_LOCAL;
2426  else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2427  rtm->rtm_type = RTN_LOCAL;
2428  else
2429  rtm->rtm_type = RTN_UNICAST;
2430  rtm->rtm_flags = 0;
2432  rtm->rtm_protocol = rt->rt6i_protocol;
2433  if (rt->rt6i_flags & RTF_DYNAMIC)
2435  else if (rt->rt6i_flags & RTF_ADDRCONF) {
2436  if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2437  rtm->rtm_protocol = RTPROT_RA;
2438  else
2439  rtm->rtm_protocol = RTPROT_KERNEL;
2440  }
2441 
2442  if (rt->rt6i_flags & RTF_CACHE)
2443  rtm->rtm_flags |= RTM_F_CLONED;
2444 
2445  if (dst) {
2446  if (nla_put(skb, RTA_DST, 16, dst))
2447  goto nla_put_failure;
2448  rtm->rtm_dst_len = 128;
2449  } else if (rtm->rtm_dst_len)
2450  if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2451  goto nla_put_failure;
2452 #ifdef CONFIG_IPV6_SUBTREES
2453  if (src) {
2454  if (nla_put(skb, RTA_SRC, 16, src))
2455  goto nla_put_failure;
2456  rtm->rtm_src_len = 128;
2457  } else if (rtm->rtm_src_len &&
2458  nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2459  goto nla_put_failure;
2460 #endif
2461  if (iif) {
2462 #ifdef CONFIG_IPV6_MROUTE
2463  if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2464  int err = ip6mr_get_route(net, skb, rtm, nowait);
2465  if (err <= 0) {
2466  if (!nowait) {
2467  if (err == 0)
2468  return 0;
2469  goto nla_put_failure;
2470  } else {
2471  if (err == -EMSGSIZE)
2472  goto nla_put_failure;
2473  }
2474  }
2475  } else
2476 #endif
2477  if (nla_put_u32(skb, RTA_IIF, iif))
2478  goto nla_put_failure;
2479  } else if (dst) {
2480  struct in6_addr saddr_buf;
2481  if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2482  nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2483  goto nla_put_failure;
2484  }
2485 
2486  if (rt->rt6i_prefsrc.plen) {
2487  struct in6_addr saddr_buf;
2488  saddr_buf = rt->rt6i_prefsrc.addr;
2489  if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2490  goto nla_put_failure;
2491  }
2492 
2493  if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2494  goto nla_put_failure;
2495 
2496  n = rt->n;
2497  if (n) {
2498  if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2499  goto nla_put_failure;
2500  }
2501 
2502  if (rt->dst.dev &&
2503  nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2504  goto nla_put_failure;
2505  if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2506  goto nla_put_failure;
2507 
2508  expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2509 
2510  if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2511  goto nla_put_failure;
2512 
2513  return nlmsg_end(skb, nlh);
2514 
2515 nla_put_failure:
2516  nlmsg_cancel(skb, nlh);
2517  return -EMSGSIZE;
2518 }
2519 
2520 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2521 {
2522  struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2523  int prefix;
2524 
2525  if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2526  struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2527  prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2528  } else
2529  prefix = 0;
2530 
2531  return rt6_fill_node(arg->net,
2532  arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2533  NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2534  prefix, 0, NLM_F_MULTI);
2535 }
2536 
2537 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2538 {
2539  struct net *net = sock_net(in_skb->sk);
2540  struct nlattr *tb[RTA_MAX+1];
2541  struct rt6_info *rt;
2542  struct sk_buff *skb;
2543  struct rtmsg *rtm;
2544  struct flowi6 fl6;
2545  int err, iif = 0, oif = 0;
2546 
2547  err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2548  if (err < 0)
2549  goto errout;
2550 
2551  err = -EINVAL;
2552  memset(&fl6, 0, sizeof(fl6));
2553 
2554  if (tb[RTA_SRC]) {
2555  if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2556  goto errout;
2557 
2558  fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2559  }
2560 
2561  if (tb[RTA_DST]) {
2562  if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2563  goto errout;
2564 
2565  fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2566  }
2567 
2568  if (tb[RTA_IIF])
2569  iif = nla_get_u32(tb[RTA_IIF]);
2570 
2571  if (tb[RTA_OIF])
2572  oif = nla_get_u32(tb[RTA_OIF]);
2573 
2574  if (iif) {
2575  struct net_device *dev;
2576  int flags = 0;
2577 
2578  dev = __dev_get_by_index(net, iif);
2579  if (!dev) {
2580  err = -ENODEV;
2581  goto errout;
2582  }
2583 
2584  fl6.flowi6_iif = iif;
2585 
2586  if (!ipv6_addr_any(&fl6.saddr))
2587  flags |= RT6_LOOKUP_F_HAS_SADDR;
2588 
2589  rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2590  flags);
2591  } else {
2592  fl6.flowi6_oif = oif;
2593 
2594  rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2595  }
2596 
2597  skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2598  if (!skb) {
2599  dst_release(&rt->dst);
2600  err = -ENOBUFS;
2601  goto errout;
2602  }
2603 
2604  /* Reserve room for dummy headers, this skb can pass
2605  through good chunk of routing engine.
2606  */
2607  skb_reset_mac_header(skb);
2608  skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2609 
2610  skb_dst_set(skb, &rt->dst);
2611 
2612  err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2613  RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2614  nlh->nlmsg_seq, 0, 0, 0);
2615  if (err < 0) {
2616  kfree_skb(skb);
2617  goto errout;
2618  }
2619 
2620  err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2621 errout:
2622  return err;
2623 }
2624 
2625 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2626 {
2627  struct sk_buff *skb;
2628  struct net *net = info->nl_net;
2629  u32 seq;
2630  int err;
2631 
2632  err = -ENOBUFS;
2633  seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2634 
2635  skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2636  if (!skb)
2637  goto errout;
2638 
2639  err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2640  event, info->portid, seq, 0, 0, 0);
2641  if (err < 0) {
2642  /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2643  WARN_ON(err == -EMSGSIZE);
2644  kfree_skb(skb);
2645  goto errout;
2646  }
2647  rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2648  info->nlh, gfp_any());
2649  return;
2650 errout:
2651  if (err < 0)
2653 }
2654 
2655 static int ip6_route_dev_notify(struct notifier_block *this,
2656  unsigned long event, void *data)
2657 {
2658  struct net_device *dev = (struct net_device *)data;
2659  struct net *net = dev_net(dev);
2660 
2661  if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2662  net->ipv6.ip6_null_entry->dst.dev = dev;
2663  net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2664 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2665  net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2666  net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2667  net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2668  net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2669 #endif
2670  }
2671 
2672  return NOTIFY_OK;
2673 }
2674 
2675 /*
2676  * /proc
2677  */
2678 
2679 #ifdef CONFIG_PROC_FS
2680 
2681 struct rt6_proc_arg
2682 {
2683  char *buffer;
2684  int offset;
2685  int length;
2686  int skip;
2687  int len;
2688 };
2689 
2690 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2691 {
2692  struct seq_file *m = p_arg;
2693  struct neighbour *n;
2694 
2695  seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2696 
2697 #ifdef CONFIG_IPV6_SUBTREES
2698  seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2699 #else
2700  seq_puts(m, "00000000000000000000000000000000 00 ");
2701 #endif
2702  n = rt->n;
2703  if (n) {
2704  seq_printf(m, "%pi6", n->primary_key);
2705  } else {
2706  seq_puts(m, "00000000000000000000000000000000");
2707  }
2708  seq_printf(m, " %08x %08x %08x %08x %8s\n",
2709  rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2710  rt->dst.__use, rt->rt6i_flags,
2711  rt->dst.dev ? rt->dst.dev->name : "");
2712  return 0;
2713 }
2714 
2715 static int ipv6_route_show(struct seq_file *m, void *v)
2716 {
2717  struct net *net = (struct net *)m->private;
2718  fib6_clean_all_ro(net, rt6_info_route, 0, m);
2719  return 0;
2720 }
2721 
2722 static int ipv6_route_open(struct inode *inode, struct file *file)
2723 {
2724  return single_open_net(inode, file, ipv6_route_show);
2725 }
2726 
2727 static const struct file_operations ipv6_route_proc_fops = {
2728  .owner = THIS_MODULE,
2729  .open = ipv6_route_open,
2730  .read = seq_read,
2731  .llseek = seq_lseek,
2732  .release = single_release_net,
2733 };
2734 
2735 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2736 {
2737  struct net *net = (struct net *)seq->private;
2738  seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2739  net->ipv6.rt6_stats->fib_nodes,
2740  net->ipv6.rt6_stats->fib_route_nodes,
2741  net->ipv6.rt6_stats->fib_rt_alloc,
2742  net->ipv6.rt6_stats->fib_rt_entries,
2743  net->ipv6.rt6_stats->fib_rt_cache,
2744  dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2745  net->ipv6.rt6_stats->fib_discarded_routes);
2746 
2747  return 0;
2748 }
2749 
2750 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2751 {
2752  return single_open_net(inode, file, rt6_stats_seq_show);
2753 }
2754 
2755 static const struct file_operations rt6_stats_seq_fops = {
2756  .owner = THIS_MODULE,
2757  .open = rt6_stats_seq_open,
2758  .read = seq_read,
2759  .llseek = seq_lseek,
2760  .release = single_release_net,
2761 };
2762 #endif /* CONFIG_PROC_FS */
2763 
2764 #ifdef CONFIG_SYSCTL
2765 
2766 static
2767 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2768  void __user *buffer, size_t *lenp, loff_t *ppos)
2769 {
2770  struct net *net;
2771  int delay;
2772  if (!write)
2773  return -EINVAL;
2774 
2775  net = (struct net *)ctl->extra1;
2776  delay = net->ipv6.sysctl.flush_delay;
2777  proc_dointvec(ctl, write, buffer, lenp, ppos);
2778  fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2779  return 0;
2780 }
2781 
2782 ctl_table ipv6_route_table_template[] = {
2783  {
2784  .procname = "flush",
2785  .data = &init_net.ipv6.sysctl.flush_delay,
2786  .maxlen = sizeof(int),
2787  .mode = 0200,
2788  .proc_handler = ipv6_sysctl_rtcache_flush
2789  },
2790  {
2791  .procname = "gc_thresh",
2792  .data = &ip6_dst_ops_template.gc_thresh,
2793  .maxlen = sizeof(int),
2794  .mode = 0644,
2796  },
2797  {
2798  .procname = "max_size",
2799  .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2800  .maxlen = sizeof(int),
2801  .mode = 0644,
2803  },
2804  {
2805  .procname = "gc_min_interval",
2806  .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2807  .maxlen = sizeof(int),
2808  .mode = 0644,
2810  },
2811  {
2812  .procname = "gc_timeout",
2813  .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2814  .maxlen = sizeof(int),
2815  .mode = 0644,
2817  },
2818  {
2819  .procname = "gc_interval",
2820  .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2821  .maxlen = sizeof(int),
2822  .mode = 0644,
2824  },
2825  {
2826  .procname = "gc_elasticity",
2827  .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2828  .maxlen = sizeof(int),
2829  .mode = 0644,
2831  },
2832  {
2833  .procname = "mtu_expires",
2834  .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2835  .maxlen = sizeof(int),
2836  .mode = 0644,
2838  },
2839  {
2840  .procname = "min_adv_mss",
2841  .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2842  .maxlen = sizeof(int),
2843  .mode = 0644,
2845  },
2846  {
2847  .procname = "gc_min_interval_ms",
2848  .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2849  .maxlen = sizeof(int),
2850  .mode = 0644,
2852  },
2853  { }
2854 };
2855 
2856 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2857 {
2858  struct ctl_table *table;
2859 
2860  table = kmemdup(ipv6_route_table_template,
2861  sizeof(ipv6_route_table_template),
2862  GFP_KERNEL);
2863 
2864  if (table) {
2865  table[0].data = &net->ipv6.sysctl.flush_delay;
2866  table[0].extra1 = net;
2867  table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2868  table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2869  table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2870  table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2871  table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2872  table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2873  table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2874  table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2875  table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2876  }
2877 
2878  return table;
2879 }
2880 #endif
2881 
2882 static int __net_init ip6_route_net_init(struct net *net)
2883 {
2884  int ret = -ENOMEM;
2885 
2886  memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2887  sizeof(net->ipv6.ip6_dst_ops));
2888 
2889  if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2890  goto out_ip6_dst_ops;
2891 
2892  net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2893  sizeof(*net->ipv6.ip6_null_entry),
2894  GFP_KERNEL);
2895  if (!net->ipv6.ip6_null_entry)
2896  goto out_ip6_dst_entries;
2897  net->ipv6.ip6_null_entry->dst.path =
2898  (struct dst_entry *)net->ipv6.ip6_null_entry;
2899  net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2900  dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2901  ip6_template_metrics, true);
2902 
2903 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2904  net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2905  sizeof(*net->ipv6.ip6_prohibit_entry),
2906  GFP_KERNEL);
2907  if (!net->ipv6.ip6_prohibit_entry)
2908  goto out_ip6_null_entry;
2909  net->ipv6.ip6_prohibit_entry->dst.path =
2910  (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2911  net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2912  dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2913  ip6_template_metrics, true);
2914 
2915  net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2916  sizeof(*net->ipv6.ip6_blk_hole_entry),
2917  GFP_KERNEL);
2918  if (!net->ipv6.ip6_blk_hole_entry)
2919  goto out_ip6_prohibit_entry;
2920  net->ipv6.ip6_blk_hole_entry->dst.path =
2921  (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2922  net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2923  dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2924  ip6_template_metrics, true);
2925 #endif
2926 
2927  net->ipv6.sysctl.flush_delay = 0;
2928  net->ipv6.sysctl.ip6_rt_max_size = 4096;
2929  net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2930  net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2931  net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2932  net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2933  net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2934  net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2935 
2936  net->ipv6.ip6_rt_gc_expire = 30*HZ;
2937 
2938  ret = 0;
2939 out:
2940  return ret;
2941 
2942 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2943 out_ip6_prohibit_entry:
2944  kfree(net->ipv6.ip6_prohibit_entry);
2945 out_ip6_null_entry:
2946  kfree(net->ipv6.ip6_null_entry);
2947 #endif
2948 out_ip6_dst_entries:
2949  dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2950 out_ip6_dst_ops:
2951  goto out;
2952 }
2953 
2954 static void __net_exit ip6_route_net_exit(struct net *net)
2955 {
2956  kfree(net->ipv6.ip6_null_entry);
2957 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2958  kfree(net->ipv6.ip6_prohibit_entry);
2959  kfree(net->ipv6.ip6_blk_hole_entry);
2960 #endif
2961  dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2962 }
2963 
2964 static int __net_init ip6_route_net_init_late(struct net *net)
2965 {
2966 #ifdef CONFIG_PROC_FS
2967  proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2968  proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2969 #endif
2970  return 0;
2971 }
2972 
2973 static void __net_exit ip6_route_net_exit_late(struct net *net)
2974 {
2975 #ifdef CONFIG_PROC_FS
2976  proc_net_remove(net, "ipv6_route");
2977  proc_net_remove(net, "rt6_stats");
2978 #endif
2979 }
2980 
2981 static struct pernet_operations ip6_route_net_ops = {
2982  .init = ip6_route_net_init,
2983  .exit = ip6_route_net_exit,
2984 };
2985 
2986 static int __net_init ipv6_inetpeer_init(struct net *net)
2987 {
2988  struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2989 
2990  if (!bp)
2991  return -ENOMEM;
2992  inet_peer_base_init(bp);
2993  net->ipv6.peers = bp;
2994  return 0;
2995 }
2996 
2997 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2998 {
2999  struct inet_peer_base *bp = net->ipv6.peers;
3000 
3001  net->ipv6.peers = NULL;
3003  kfree(bp);
3004 }
3005 
3006 static struct pernet_operations ipv6_inetpeer_ops = {
3007  .init = ipv6_inetpeer_init,
3008  .exit = ipv6_inetpeer_exit,
3009 };
3010 
3011 static struct pernet_operations ip6_route_net_late_ops = {
3012  .init = ip6_route_net_init_late,
3013  .exit = ip6_route_net_exit_late,
3014 };
3015 
3016 static struct notifier_block ip6_route_dev_notifier = {
3017  .notifier_call = ip6_route_dev_notify,
3018  .priority = 0,
3019 };
3020 
3022 {
3023  int ret;
3024 
3025  ret = -ENOMEM;
3026  ip6_dst_ops_template.kmem_cachep =
3027  kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3029  if (!ip6_dst_ops_template.kmem_cachep)
3030  goto out;
3031 
3032  ret = dst_entries_init(&ip6_dst_blackhole_ops);
3033  if (ret)
3034  goto out_kmem_cache;
3035 
3036  ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3037  if (ret)
3038  goto out_dst_entries;
3039 
3040  ret = register_pernet_subsys(&ip6_route_net_ops);
3041  if (ret)
3042  goto out_register_inetpeer;
3043 
3044  ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3045 
3046  /* Registering of the loopback is done before this portion of code,
3047  * the loopback reference in rt6_info will not be taken, do it
3048  * manually for init_net */
3049  init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3050  init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3051  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3052  init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3053  init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3054  init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3055  init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3056  #endif
3057  ret = fib6_init();
3058  if (ret)
3059  goto out_register_subsys;
3060 
3061  ret = xfrm6_init();
3062  if (ret)
3063  goto out_fib6_init;
3064 
3065  ret = fib6_rules_init();
3066  if (ret)
3067  goto xfrm6_init;
3068 
3069  ret = register_pernet_subsys(&ip6_route_net_late_ops);
3070  if (ret)
3071  goto fib6_rules_init;
3072 
3073  ret = -ENOBUFS;
3074  if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3075  __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3076  __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3077  goto out_register_late_subsys;
3078 
3079  ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3080  if (ret)
3081  goto out_register_late_subsys;
3082 
3083 out:
3084  return ret;
3085 
3086 out_register_late_subsys:
3087  unregister_pernet_subsys(&ip6_route_net_late_ops);
3090 xfrm6_init:
3091  xfrm6_fini();
3092 out_fib6_init:
3093  fib6_gc_cleanup();
3094 out_register_subsys:
3095  unregister_pernet_subsys(&ip6_route_net_ops);
3096 out_register_inetpeer:
3097  unregister_pernet_subsys(&ipv6_inetpeer_ops);
3098 out_dst_entries:
3099  dst_entries_destroy(&ip6_dst_blackhole_ops);
3100 out_kmem_cache:
3101  kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3102  goto out;
3103 }
3104 
3106 {
3107  unregister_netdevice_notifier(&ip6_route_dev_notifier);
3108  unregister_pernet_subsys(&ip6_route_net_late_ops);
3110  xfrm6_fini();
3111  fib6_gc_cleanup();
3112  unregister_pernet_subsys(&ipv6_inetpeer_ops);
3113  unregister_pernet_subsys(&ip6_route_net_ops);
3114  dst_entries_destroy(&ip6_dst_blackhole_ops);
3115  kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3116 }