Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
anycast.c
Go to the documentation of this file.
1 /*
2  * Anycast support for IPv6
3  * Linux INET6 implementation
4  *
5  * Authors:
6  * David L Stevens ([email protected])
7  *
8  * based heavily on net/ipv6/mcast.c
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License
12  * as published by the Free Software Foundation; either version
13  * 2 of the License, or (at your option) any later version.
14  */
15 
16 #include <linux/capability.h>
17 #include <linux/module.h>
18 #include <linux/errno.h>
19 #include <linux/types.h>
20 #include <linux/random.h>
21 #include <linux/string.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
24 #include <linux/net.h>
25 #include <linux/in6.h>
26 #include <linux/netdevice.h>
27 #include <linux/if_arp.h>
28 #include <linux/route.h>
29 #include <linux/init.h>
30 #include <linux/proc_fs.h>
31 #include <linux/seq_file.h>
32 #include <linux/slab.h>
33 
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/snmp.h>
37 
38 #include <net/ipv6.h>
39 #include <net/protocol.h>
40 #include <net/if_inet6.h>
41 #include <net/ndisc.h>
42 #include <net/addrconf.h>
43 #include <net/ip6_route.h>
44 
45 #include <net/checksum.h>
46 
47 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
48 
49 /* Big ac list lock for all the sockets */
50 static DEFINE_RWLOCK(ipv6_sk_ac_lock);
51 
52 
53 /*
54  * socket join an anycast group
55  */
56 
57 int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
58 {
59  struct ipv6_pinfo *np = inet6_sk(sk);
60  struct net_device *dev = NULL;
61  struct inet6_dev *idev;
62  struct ipv6_ac_socklist *pac;
63  struct net *net = sock_net(sk);
64  int ishost = !net->ipv6.devconf_all->forwarding;
65  int err = 0;
66 
67  if (!capable(CAP_NET_ADMIN))
68  return -EPERM;
69  if (ipv6_addr_is_multicast(addr))
70  return -EINVAL;
71  if (ipv6_chk_addr(net, addr, NULL, 0))
72  return -EINVAL;
73 
74  pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
75  if (pac == NULL)
76  return -ENOMEM;
77  pac->acl_next = NULL;
78  pac->acl_addr = *addr;
79 
80  rcu_read_lock();
81  if (ifindex == 0) {
82  struct rt6_info *rt;
83 
84  rt = rt6_lookup(net, addr, NULL, 0, 0);
85  if (rt) {
86  dev = rt->dst.dev;
87  dst_release(&rt->dst);
88  } else if (ishost) {
89  err = -EADDRNOTAVAIL;
90  goto error;
91  } else {
92  /* router, no matching interface: just pick one */
93  dev = dev_get_by_flags_rcu(net, IFF_UP,
95  }
96  } else
97  dev = dev_get_by_index_rcu(net, ifindex);
98 
99  if (dev == NULL) {
100  err = -ENODEV;
101  goto error;
102  }
103 
104  idev = __in6_dev_get(dev);
105  if (!idev) {
106  if (ifindex)
107  err = -ENODEV;
108  else
109  err = -EADDRNOTAVAIL;
110  goto error;
111  }
112  /* reset ishost, now that we have a specific device */
113  ishost = !idev->cnf.forwarding;
114 
115  pac->acl_ifindex = dev->ifindex;
116 
117  /* XXX
118  * For hosts, allow link-local or matching prefix anycasts.
119  * This obviates the need for propagating anycast routes while
120  * still allowing some non-router anycast participation.
121  */
122  if (!ipv6_chk_prefix(addr, dev)) {
123  if (ishost)
124  err = -EADDRNOTAVAIL;
125  if (err)
126  goto error;
127  }
128 
129  err = ipv6_dev_ac_inc(dev, addr);
130  if (!err) {
131  write_lock_bh(&ipv6_sk_ac_lock);
132  pac->acl_next = np->ipv6_ac_list;
133  np->ipv6_ac_list = pac;
134  write_unlock_bh(&ipv6_sk_ac_lock);
135  pac = NULL;
136  }
137 
138 error:
139  rcu_read_unlock();
140  if (pac)
141  sock_kfree_s(sk, pac, sizeof(*pac));
142  return err;
143 }
144 
145 /*
146  * socket leave an anycast group
147  */
148 int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
149 {
150  struct ipv6_pinfo *np = inet6_sk(sk);
151  struct net_device *dev;
152  struct ipv6_ac_socklist *pac, *prev_pac;
153  struct net *net = sock_net(sk);
154 
155  write_lock_bh(&ipv6_sk_ac_lock);
156  prev_pac = NULL;
157  for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
158  if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
159  ipv6_addr_equal(&pac->acl_addr, addr))
160  break;
161  prev_pac = pac;
162  }
163  if (!pac) {
164  write_unlock_bh(&ipv6_sk_ac_lock);
165  return -ENOENT;
166  }
167  if (prev_pac)
168  prev_pac->acl_next = pac->acl_next;
169  else
170  np->ipv6_ac_list = pac->acl_next;
171 
172  write_unlock_bh(&ipv6_sk_ac_lock);
173 
174  rcu_read_lock();
175  dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
176  if (dev)
177  ipv6_dev_ac_dec(dev, &pac->acl_addr);
178  rcu_read_unlock();
179 
180  sock_kfree_s(sk, pac, sizeof(*pac));
181  return 0;
182 }
183 
185 {
186  struct ipv6_pinfo *np = inet6_sk(sk);
187  struct net_device *dev = NULL;
188  struct ipv6_ac_socklist *pac;
189  struct net *net = sock_net(sk);
190  int prev_index;
191 
192  write_lock_bh(&ipv6_sk_ac_lock);
193  pac = np->ipv6_ac_list;
194  np->ipv6_ac_list = NULL;
195  write_unlock_bh(&ipv6_sk_ac_lock);
196 
197  prev_index = 0;
198  rcu_read_lock();
199  while (pac) {
200  struct ipv6_ac_socklist *next = pac->acl_next;
201 
202  if (pac->acl_ifindex != prev_index) {
203  dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
204  prev_index = pac->acl_ifindex;
205  }
206  if (dev)
207  ipv6_dev_ac_dec(dev, &pac->acl_addr);
208  sock_kfree_s(sk, pac, sizeof(*pac));
209  pac = next;
210  }
211  rcu_read_unlock();
212 }
213 
214 static void aca_put(struct ifacaddr6 *ac)
215 {
216  if (atomic_dec_and_test(&ac->aca_refcnt)) {
217  in6_dev_put(ac->aca_idev);
218  dst_release(&ac->aca_rt->dst);
219  kfree(ac);
220  }
221 }
222 
223 /*
224  * device anycast group inc (add if not found)
225  */
226 int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
227 {
228  struct ifacaddr6 *aca;
229  struct inet6_dev *idev;
230  struct rt6_info *rt;
231  int err;
232 
233  idev = in6_dev_get(dev);
234 
235  if (idev == NULL)
236  return -EINVAL;
237 
238  write_lock_bh(&idev->lock);
239  if (idev->dead) {
240  err = -ENODEV;
241  goto out;
242  }
243 
244  for (aca = idev->ac_list; aca; aca = aca->aca_next) {
245  if (ipv6_addr_equal(&aca->aca_addr, addr)) {
246  aca->aca_users++;
247  err = 0;
248  goto out;
249  }
250  }
251 
252  /*
253  * not found: create a new one.
254  */
255 
256  aca = kzalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
257 
258  if (aca == NULL) {
259  err = -ENOMEM;
260  goto out;
261  }
262 
263  rt = addrconf_dst_alloc(idev, addr, true);
264  if (IS_ERR(rt)) {
265  kfree(aca);
266  err = PTR_ERR(rt);
267  goto out;
268  }
269 
270  aca->aca_addr = *addr;
271  aca->aca_idev = idev;
272  aca->aca_rt = rt;
273  aca->aca_users = 1;
274  /* aca_tstamp should be updated upon changes */
275  aca->aca_cstamp = aca->aca_tstamp = jiffies;
276  atomic_set(&aca->aca_refcnt, 2);
277  spin_lock_init(&aca->aca_lock);
278 
279  aca->aca_next = idev->ac_list;
280  idev->ac_list = aca;
281  write_unlock_bh(&idev->lock);
282 
283  ip6_ins_rt(rt);
284 
285  addrconf_join_solict(dev, &aca->aca_addr);
286 
287  aca_put(aca);
288  return 0;
289 out:
290  write_unlock_bh(&idev->lock);
291  in6_dev_put(idev);
292  return err;
293 }
294 
295 /*
296  * device anycast group decrement
297  */
298 int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
299 {
300  struct ifacaddr6 *aca, *prev_aca;
301 
302  write_lock_bh(&idev->lock);
303  prev_aca = NULL;
304  for (aca = idev->ac_list; aca; aca = aca->aca_next) {
305  if (ipv6_addr_equal(&aca->aca_addr, addr))
306  break;
307  prev_aca = aca;
308  }
309  if (!aca) {
310  write_unlock_bh(&idev->lock);
311  return -ENOENT;
312  }
313  if (--aca->aca_users > 0) {
314  write_unlock_bh(&idev->lock);
315  return 0;
316  }
317  if (prev_aca)
318  prev_aca->aca_next = aca->aca_next;
319  else
320  idev->ac_list = aca->aca_next;
321  write_unlock_bh(&idev->lock);
322  addrconf_leave_solict(idev, &aca->aca_addr);
323 
324  dst_hold(&aca->aca_rt->dst);
325  ip6_del_rt(aca->aca_rt);
326 
327  aca_put(aca);
328  return 0;
329 }
330 
331 /* called with rcu_read_lock() */
332 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
333 {
334  struct inet6_dev *idev = __in6_dev_get(dev);
335 
336  if (idev == NULL)
337  return -ENODEV;
338  return __ipv6_dev_ac_dec(idev, addr);
339 }
340 
341 /*
342  * check if the interface has this anycast address
343  * called with rcu_read_lock()
344  */
345 static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr)
346 {
347  struct inet6_dev *idev;
348  struct ifacaddr6 *aca;
349 
350  idev = __in6_dev_get(dev);
351  if (idev) {
352  read_lock_bh(&idev->lock);
353  for (aca = idev->ac_list; aca; aca = aca->aca_next)
354  if (ipv6_addr_equal(&aca->aca_addr, addr))
355  break;
356  read_unlock_bh(&idev->lock);
357  return aca != NULL;
358  }
359  return false;
360 }
361 
362 /*
363  * check if given interface (or any, if dev==0) has this anycast address
364  */
365 bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
366  const struct in6_addr *addr)
367 {
368  bool found = false;
369 
370  rcu_read_lock();
371  if (dev)
372  found = ipv6_chk_acast_dev(dev, addr);
373  else
374  for_each_netdev_rcu(net, dev)
375  if (ipv6_chk_acast_dev(dev, addr)) {
376  found = true;
377  break;
378  }
379  rcu_read_unlock();
380  return found;
381 }
382 
383 
384 #ifdef CONFIG_PROC_FS
385 struct ac6_iter_state {
386  struct seq_net_private p;
387  struct net_device *dev;
388  struct inet6_dev *idev;
389 };
390 
391 #define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private)
392 
393 static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
394 {
395  struct ifacaddr6 *im = NULL;
396  struct ac6_iter_state *state = ac6_seq_private(seq);
397  struct net *net = seq_file_net(seq);
398 
399  state->idev = NULL;
400  for_each_netdev_rcu(net, state->dev) {
401  struct inet6_dev *idev;
402  idev = __in6_dev_get(state->dev);
403  if (!idev)
404  continue;
405  read_lock_bh(&idev->lock);
406  im = idev->ac_list;
407  if (im) {
408  state->idev = idev;
409  break;
410  }
411  read_unlock_bh(&idev->lock);
412  }
413  return im;
414 }
415 
416 static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
417 {
418  struct ac6_iter_state *state = ac6_seq_private(seq);
419 
420  im = im->aca_next;
421  while (!im) {
422  if (likely(state->idev != NULL))
423  read_unlock_bh(&state->idev->lock);
424 
425  state->dev = next_net_device_rcu(state->dev);
426  if (!state->dev) {
427  state->idev = NULL;
428  break;
429  }
430  state->idev = __in6_dev_get(state->dev);
431  if (!state->idev)
432  continue;
433  read_lock_bh(&state->idev->lock);
434  im = state->idev->ac_list;
435  }
436  return im;
437 }
438 
439 static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
440 {
441  struct ifacaddr6 *im = ac6_get_first(seq);
442  if (im)
443  while (pos && (im = ac6_get_next(seq, im)) != NULL)
444  --pos;
445  return pos ? NULL : im;
446 }
447 
448 static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
449  __acquires(RCU)
450 {
451  rcu_read_lock();
452  return ac6_get_idx(seq, *pos);
453 }
454 
455 static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
456 {
457  struct ifacaddr6 *im = ac6_get_next(seq, v);
458 
459  ++*pos;
460  return im;
461 }
462 
463 static void ac6_seq_stop(struct seq_file *seq, void *v)
464  __releases(RCU)
465 {
466  struct ac6_iter_state *state = ac6_seq_private(seq);
467 
468  if (likely(state->idev != NULL)) {
469  read_unlock_bh(&state->idev->lock);
470  state->idev = NULL;
471  }
472  rcu_read_unlock();
473 }
474 
475 static int ac6_seq_show(struct seq_file *seq, void *v)
476 {
477  struct ifacaddr6 *im = (struct ifacaddr6 *)v;
478  struct ac6_iter_state *state = ac6_seq_private(seq);
479 
480  seq_printf(seq, "%-4d %-15s %pi6 %5d\n",
481  state->dev->ifindex, state->dev->name,
482  &im->aca_addr, im->aca_users);
483  return 0;
484 }
485 
486 static const struct seq_operations ac6_seq_ops = {
487  .start = ac6_seq_start,
488  .next = ac6_seq_next,
489  .stop = ac6_seq_stop,
490  .show = ac6_seq_show,
491 };
492 
493 static int ac6_seq_open(struct inode *inode, struct file *file)
494 {
495  return seq_open_net(inode, file, &ac6_seq_ops,
496  sizeof(struct ac6_iter_state));
497 }
498 
499 static const struct file_operations ac6_seq_fops = {
500  .owner = THIS_MODULE,
501  .open = ac6_seq_open,
502  .read = seq_read,
503  .llseek = seq_lseek,
504  .release = seq_release_net,
505 };
506 
507 int __net_init ac6_proc_init(struct net *net)
508 {
509  if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops))
510  return -ENOMEM;
511 
512  return 0;
513 }
514 
515 void ac6_proc_exit(struct net *net)
516 {
517  proc_net_remove(net, "anycast6");
518 }
519 #endif
520