Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
devinet.c
Go to the documentation of this file.
1 /*
2  * NET3 IP device support routines.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version
7  * 2 of the License, or (at your option) any later version.
8  *
9  * Derived from the IP parts of dev.c 1.0.19
10  * Authors: Ross Biro
11  * Fred N. van Kempen, <[email protected]>
12  * Mark Evans, <[email protected]>
13  *
14  * Additional Authors:
15  * Alan Cox, <[email protected]>
16  * Alexey Kuznetsov, <[email protected]>
17  *
18  * Changes:
19  * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20  * lists.
21  * Cyrus Durgin: updated for kmod
22  * Matthias Andree: in devinet_ioctl, compare label and
23  * address (4.4BSD alias style support),
24  * fall back to comparing just the label
25  * if no match found.
26  */
27 
28 
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65 
66 #include "fib_lookup.h"
67 
68 static struct ipv4_devconf ipv4_devconf = {
69  .data = {
70  [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71  [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72  [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73  [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
74  },
75 };
76 
77 static struct ipv4_devconf ipv4_devconf_dflt = {
78  .data = {
79  [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
80  [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
81  [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
82  [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
83  [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84  },
85 };
86 
87 #define IPV4_DEVCONF_DFLT(net, attr) \
88  IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89 
90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
91  [IFA_LOCAL] = { .type = NLA_U32 },
92  [IFA_ADDRESS] = { .type = NLA_U32 },
93  [IFA_BROADCAST] = { .type = NLA_U32 },
94  [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96 
97 #define IN4_ADDR_HSIZE_SHIFT 8
98 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
99 
100 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
101 static DEFINE_SPINLOCK(inet_addr_hash_lock);
102 
103 static u32 inet_addr_hash(struct net *net, __be32 addr)
104 {
105  u32 val = (__force u32) addr ^ net_hash_mix(net);
106 
107  return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
108 }
109 
110 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
111 {
112  u32 hash = inet_addr_hash(net, ifa->ifa_local);
113 
114  spin_lock(&inet_addr_hash_lock);
115  hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
116  spin_unlock(&inet_addr_hash_lock);
117 }
118 
119 static void inet_hash_remove(struct in_ifaddr *ifa)
120 {
121  spin_lock(&inet_addr_hash_lock);
122  hlist_del_init_rcu(&ifa->hash);
123  spin_unlock(&inet_addr_hash_lock);
124 }
125 
134 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
135 {
136  u32 hash = inet_addr_hash(net, addr);
137  struct net_device *result = NULL;
138  struct in_ifaddr *ifa;
139  struct hlist_node *node;
140 
141  rcu_read_lock();
142  hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
143  if (ifa->ifa_local == addr) {
144  struct net_device *dev = ifa->ifa_dev->dev;
145 
146  if (!net_eq(dev_net(dev), net))
147  continue;
148  result = dev;
149  break;
150  }
151  }
152  if (!result) {
153  struct flowi4 fl4 = { .daddr = addr };
154  struct fib_result res = { 0 };
155  struct fib_table *local;
156 
157  /* Fallback to FIB local table so that communication
158  * over loopback subnets work.
159  */
160  local = fib_get_table(net, RT_TABLE_LOCAL);
161  if (local &&
162  !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
163  res.type == RTN_LOCAL)
164  result = FIB_RES_DEV(res);
165  }
166  if (result && devref)
167  dev_hold(result);
168  rcu_read_unlock();
169  return result;
170 }
172 
173 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
174 
175 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
176 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
177  int destroy);
178 #ifdef CONFIG_SYSCTL
179 static void devinet_sysctl_register(struct in_device *idev);
180 static void devinet_sysctl_unregister(struct in_device *idev);
181 #else
182 static void devinet_sysctl_register(struct in_device *idev)
183 {
184 }
185 static void devinet_sysctl_unregister(struct in_device *idev)
186 {
187 }
188 #endif
189 
190 /* Locks all the inet devices. */
191 
192 static struct in_ifaddr *inet_alloc_ifa(void)
193 {
194  return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
195 }
196 
197 static void inet_rcu_free_ifa(struct rcu_head *head)
198 {
199  struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
200  if (ifa->ifa_dev)
201  in_dev_put(ifa->ifa_dev);
202  kfree(ifa);
203 }
204 
205 static void inet_free_ifa(struct in_ifaddr *ifa)
206 {
207  call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
208 }
209 
210 void in_dev_finish_destroy(struct in_device *idev)
211 {
212  struct net_device *dev = idev->dev;
213 
214  WARN_ON(idev->ifa_list);
215  WARN_ON(idev->mc_list);
216 #ifdef NET_REFCNT_DEBUG
217  pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
218 #endif
219  dev_put(dev);
220  if (!idev->dead)
221  pr_err("Freeing alive in_device %p\n", idev);
222  else
223  kfree(idev);
224 }
226 
227 static struct in_device *inetdev_init(struct net_device *dev)
228 {
229  struct in_device *in_dev;
230 
231  ASSERT_RTNL();
232 
233  in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
234  if (!in_dev)
235  goto out;
236  memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
237  sizeof(in_dev->cnf));
238  in_dev->cnf.sysctl = NULL;
239  in_dev->dev = dev;
240  in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
241  if (!in_dev->arp_parms)
242  goto out_kfree;
243  if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
244  dev_disable_lro(dev);
245  /* Reference in_dev->dev */
246  dev_hold(dev);
247  /* Account for reference dev->ip_ptr (below) */
248  in_dev_hold(in_dev);
249 
250  devinet_sysctl_register(in_dev);
251  ip_mc_init_dev(in_dev);
252  if (dev->flags & IFF_UP)
253  ip_mc_up(in_dev);
254 
255  /* we can receive as soon as ip_ptr is set -- do this last */
256  rcu_assign_pointer(dev->ip_ptr, in_dev);
257 out:
258  return in_dev;
259 out_kfree:
260  kfree(in_dev);
261  in_dev = NULL;
262  goto out;
263 }
264 
265 static void in_dev_rcu_put(struct rcu_head *head)
266 {
267  struct in_device *idev = container_of(head, struct in_device, rcu_head);
268  in_dev_put(idev);
269 }
270 
271 static void inetdev_destroy(struct in_device *in_dev)
272 {
273  struct in_ifaddr *ifa;
274  struct net_device *dev;
275 
276  ASSERT_RTNL();
277 
278  dev = in_dev->dev;
279 
280  in_dev->dead = 1;
281 
282  ip_mc_destroy_dev(in_dev);
283 
284  while ((ifa = in_dev->ifa_list) != NULL) {
285  inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
286  inet_free_ifa(ifa);
287  }
288 
290 
291  devinet_sysctl_unregister(in_dev);
292  neigh_parms_release(&arp_tbl, in_dev->arp_parms);
293  arp_ifdown(dev);
294 
295  call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
296 }
297 
298 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
299 {
300  rcu_read_lock();
301  for_primary_ifa(in_dev) {
302  if (inet_ifa_match(a, ifa)) {
303  if (!b || inet_ifa_match(b, ifa)) {
304  rcu_read_unlock();
305  return 1;
306  }
307  }
308  } endfor_ifa(in_dev);
309  rcu_read_unlock();
310  return 0;
311 }
312 
313 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
314  int destroy, struct nlmsghdr *nlh, u32 portid)
315 {
316  struct in_ifaddr *promote = NULL;
317  struct in_ifaddr *ifa, *ifa1 = *ifap;
318  struct in_ifaddr *last_prim = in_dev->ifa_list;
319  struct in_ifaddr *prev_prom = NULL;
320  int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
321 
322  ASSERT_RTNL();
323 
324  /* 1. Deleting primary ifaddr forces deletion all secondaries
325  * unless alias promotion is set
326  **/
327 
328  if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
329  struct in_ifaddr **ifap1 = &ifa1->ifa_next;
330 
331  while ((ifa = *ifap1) != NULL) {
332  if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
333  ifa1->ifa_scope <= ifa->ifa_scope)
334  last_prim = ifa;
335 
336  if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
337  ifa1->ifa_mask != ifa->ifa_mask ||
338  !inet_ifa_match(ifa1->ifa_address, ifa)) {
339  ifap1 = &ifa->ifa_next;
340  prev_prom = ifa;
341  continue;
342  }
343 
344  if (!do_promote) {
345  inet_hash_remove(ifa);
346  *ifap1 = ifa->ifa_next;
347 
348  rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
349  blocking_notifier_call_chain(&inetaddr_chain,
350  NETDEV_DOWN, ifa);
351  inet_free_ifa(ifa);
352  } else {
353  promote = ifa;
354  break;
355  }
356  }
357  }
358 
359  /* On promotion all secondaries from subnet are changing
360  * the primary IP, we must remove all their routes silently
361  * and later to add them back with new prefsrc. Do this
362  * while all addresses are on the device list.
363  */
364  for (ifa = promote; ifa; ifa = ifa->ifa_next) {
365  if (ifa1->ifa_mask == ifa->ifa_mask &&
366  inet_ifa_match(ifa1->ifa_address, ifa))
367  fib_del_ifaddr(ifa, ifa1);
368  }
369 
370  /* 2. Unlink it */
371 
372  *ifap = ifa1->ifa_next;
373  inet_hash_remove(ifa1);
374 
375  /* 3. Announce address deletion */
376 
377  /* Send message first, then call notifier.
378  At first sight, FIB update triggered by notifier
379  will refer to already deleted ifaddr, that could confuse
380  netlink listeners. It is not true: look, gated sees
381  that route deleted and if it still thinks that ifaddr
382  is valid, it will try to restore deleted routes... Grr.
383  So that, this order is correct.
384  */
385  rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
386  blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
387 
388  if (promote) {
389  struct in_ifaddr *next_sec = promote->ifa_next;
390 
391  if (prev_prom) {
392  prev_prom->ifa_next = promote->ifa_next;
393  promote->ifa_next = last_prim->ifa_next;
394  last_prim->ifa_next = promote;
395  }
396 
397  promote->ifa_flags &= ~IFA_F_SECONDARY;
398  rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
399  blocking_notifier_call_chain(&inetaddr_chain,
400  NETDEV_UP, promote);
401  for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
402  if (ifa1->ifa_mask != ifa->ifa_mask ||
403  !inet_ifa_match(ifa1->ifa_address, ifa))
404  continue;
405  fib_add_ifaddr(ifa);
406  }
407 
408  }
409  if (destroy)
410  inet_free_ifa(ifa1);
411 }
412 
413 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
414  int destroy)
415 {
416  __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
417 }
418 
419 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
420  u32 portid)
421 {
422  struct in_device *in_dev = ifa->ifa_dev;
423  struct in_ifaddr *ifa1, **ifap, **last_primary;
424 
425  ASSERT_RTNL();
426 
427  if (!ifa->ifa_local) {
428  inet_free_ifa(ifa);
429  return 0;
430  }
431 
432  ifa->ifa_flags &= ~IFA_F_SECONDARY;
433  last_primary = &in_dev->ifa_list;
434 
435  for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
436  ifap = &ifa1->ifa_next) {
437  if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
438  ifa->ifa_scope <= ifa1->ifa_scope)
439  last_primary = &ifa1->ifa_next;
440  if (ifa1->ifa_mask == ifa->ifa_mask &&
441  inet_ifa_match(ifa1->ifa_address, ifa)) {
442  if (ifa1->ifa_local == ifa->ifa_local) {
443  inet_free_ifa(ifa);
444  return -EEXIST;
445  }
446  if (ifa1->ifa_scope != ifa->ifa_scope) {
447  inet_free_ifa(ifa);
448  return -EINVAL;
449  }
450  ifa->ifa_flags |= IFA_F_SECONDARY;
451  }
452  }
453 
454  if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
455  net_srandom(ifa->ifa_local);
456  ifap = last_primary;
457  }
458 
459  ifa->ifa_next = *ifap;
460  *ifap = ifa;
461 
462  inet_hash_insert(dev_net(in_dev->dev), ifa);
463 
464  /* Send message first, then call notifier.
465  Notifier will trigger FIB update, so that
466  listeners of netlink will know about new ifaddr */
467  rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
468  blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
469 
470  return 0;
471 }
472 
473 static int inet_insert_ifa(struct in_ifaddr *ifa)
474 {
475  return __inet_insert_ifa(ifa, NULL, 0);
476 }
477 
478 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
479 {
480  struct in_device *in_dev = __in_dev_get_rtnl(dev);
481 
482  ASSERT_RTNL();
483 
484  if (!in_dev) {
485  inet_free_ifa(ifa);
486  return -ENOBUFS;
487  }
488  ipv4_devconf_setall(in_dev);
489  if (ifa->ifa_dev != in_dev) {
490  WARN_ON(ifa->ifa_dev);
491  in_dev_hold(in_dev);
492  ifa->ifa_dev = in_dev;
493  }
494  if (ipv4_is_loopback(ifa->ifa_local))
495  ifa->ifa_scope = RT_SCOPE_HOST;
496  return inet_insert_ifa(ifa);
497 }
498 
499 /* Caller must hold RCU or RTNL :
500  * We dont take a reference on found in_device
501  */
502 struct in_device *inetdev_by_index(struct net *net, int ifindex)
503 {
504  struct net_device *dev;
505  struct in_device *in_dev = NULL;
506 
507  rcu_read_lock();
508  dev = dev_get_by_index_rcu(net, ifindex);
509  if (dev)
510  in_dev = rcu_dereference_rtnl(dev->ip_ptr);
511  rcu_read_unlock();
512  return in_dev;
513 }
515 
516 /* Called only from RTNL semaphored context. No locks. */
517 
518 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
519  __be32 mask)
520 {
521  ASSERT_RTNL();
522 
523  for_primary_ifa(in_dev) {
524  if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
525  return ifa;
526  } endfor_ifa(in_dev);
527  return NULL;
528 }
529 
530 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
531 {
532  struct net *net = sock_net(skb->sk);
533  struct nlattr *tb[IFA_MAX+1];
534  struct in_device *in_dev;
535  struct ifaddrmsg *ifm;
536  struct in_ifaddr *ifa, **ifap;
537  int err = -EINVAL;
538 
539  ASSERT_RTNL();
540 
541  err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
542  if (err < 0)
543  goto errout;
544 
545  ifm = nlmsg_data(nlh);
546  in_dev = inetdev_by_index(net, ifm->ifa_index);
547  if (in_dev == NULL) {
548  err = -ENODEV;
549  goto errout;
550  }
551 
552  for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
553  ifap = &ifa->ifa_next) {
554  if (tb[IFA_LOCAL] &&
555  ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
556  continue;
557 
558  if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
559  continue;
560 
561  if (tb[IFA_ADDRESS] &&
562  (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
563  !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
564  continue;
565 
566  __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
567  return 0;
568  }
569 
570  err = -EADDRNOTAVAIL;
571 errout:
572  return err;
573 }
574 
575 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
576 {
577  struct nlattr *tb[IFA_MAX+1];
578  struct in_ifaddr *ifa;
579  struct ifaddrmsg *ifm;
580  struct net_device *dev;
581  struct in_device *in_dev;
582  int err;
583 
584  err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
585  if (err < 0)
586  goto errout;
587 
588  ifm = nlmsg_data(nlh);
589  err = -EINVAL;
590  if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
591  goto errout;
592 
593  dev = __dev_get_by_index(net, ifm->ifa_index);
594  err = -ENODEV;
595  if (dev == NULL)
596  goto errout;
597 
598  in_dev = __in_dev_get_rtnl(dev);
599  err = -ENOBUFS;
600  if (in_dev == NULL)
601  goto errout;
602 
603  ifa = inet_alloc_ifa();
604  if (ifa == NULL)
605  /*
606  * A potential indev allocation can be left alive, it stays
607  * assigned to its device and is destroy with it.
608  */
609  goto errout;
610 
611  ipv4_devconf_setall(in_dev);
612  in_dev_hold(in_dev);
613 
614  if (tb[IFA_ADDRESS] == NULL)
615  tb[IFA_ADDRESS] = tb[IFA_LOCAL];
616 
617  INIT_HLIST_NODE(&ifa->hash);
618  ifa->ifa_prefixlen = ifm->ifa_prefixlen;
619  ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
620  ifa->ifa_flags = ifm->ifa_flags;
621  ifa->ifa_scope = ifm->ifa_scope;
622  ifa->ifa_dev = in_dev;
623 
624  ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
625  ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
626 
627  if (tb[IFA_BROADCAST])
628  ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
629 
630  if (tb[IFA_LABEL])
631  nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
632  else
633  memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
634 
635  return ifa;
636 
637 errout:
638  return ERR_PTR(err);
639 }
640 
641 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
642 {
643  struct net *net = sock_net(skb->sk);
644  struct in_ifaddr *ifa;
645 
646  ASSERT_RTNL();
647 
648  ifa = rtm_to_ifaddr(net, nlh);
649  if (IS_ERR(ifa))
650  return PTR_ERR(ifa);
651 
652  return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
653 }
654 
655 /*
656  * Determine a default network mask, based on the IP address.
657  */
658 
659 static int inet_abc_len(__be32 addr)
660 {
661  int rc = -1; /* Something else, probably a multicast. */
662 
663  if (ipv4_is_zeronet(addr))
664  rc = 0;
665  else {
666  __u32 haddr = ntohl(addr);
667 
668  if (IN_CLASSA(haddr))
669  rc = 8;
670  else if (IN_CLASSB(haddr))
671  rc = 16;
672  else if (IN_CLASSC(haddr))
673  rc = 24;
674  }
675 
676  return rc;
677 }
678 
679 
680 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
681 {
682  struct ifreq ifr;
683  struct sockaddr_in sin_orig;
684  struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
685  struct in_device *in_dev;
686  struct in_ifaddr **ifap = NULL;
687  struct in_ifaddr *ifa = NULL;
688  struct net_device *dev;
689  char *colon;
690  int ret = -EFAULT;
691  int tryaddrmatch = 0;
692 
693  /*
694  * Fetch the caller's info block into kernel space
695  */
696 
697  if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
698  goto out;
699  ifr.ifr_name[IFNAMSIZ - 1] = 0;
700 
701  /* save original address for comparison */
702  memcpy(&sin_orig, sin, sizeof(*sin));
703 
704  colon = strchr(ifr.ifr_name, ':');
705  if (colon)
706  *colon = 0;
707 
708  dev_load(net, ifr.ifr_name);
709 
710  switch (cmd) {
711  case SIOCGIFADDR: /* Get interface address */
712  case SIOCGIFBRDADDR: /* Get the broadcast address */
713  case SIOCGIFDSTADDR: /* Get the destination address */
714  case SIOCGIFNETMASK: /* Get the netmask for the interface */
715  /* Note that these ioctls will not sleep,
716  so that we do not impose a lock.
717  One day we will be forced to put shlock here (I mean SMP)
718  */
719  tryaddrmatch = (sin_orig.sin_family == AF_INET);
720  memset(sin, 0, sizeof(*sin));
721  sin->sin_family = AF_INET;
722  break;
723 
724  case SIOCSIFFLAGS:
725  ret = -EPERM;
726  if (!capable(CAP_NET_ADMIN))
727  goto out;
728  break;
729  case SIOCSIFADDR: /* Set interface address (and family) */
730  case SIOCSIFBRDADDR: /* Set the broadcast address */
731  case SIOCSIFDSTADDR: /* Set the destination address */
732  case SIOCSIFNETMASK: /* Set the netmask for the interface */
733  ret = -EPERM;
734  if (!capable(CAP_NET_ADMIN))
735  goto out;
736  ret = -EINVAL;
737  if (sin->sin_family != AF_INET)
738  goto out;
739  break;
740  default:
741  ret = -EINVAL;
742  goto out;
743  }
744 
745  rtnl_lock();
746 
747  ret = -ENODEV;
748  dev = __dev_get_by_name(net, ifr.ifr_name);
749  if (!dev)
750  goto done;
751 
752  if (colon)
753  *colon = ':';
754 
755  in_dev = __in_dev_get_rtnl(dev);
756  if (in_dev) {
757  if (tryaddrmatch) {
758  /* Matthias Andree */
759  /* compare label and address (4.4BSD style) */
760  /* note: we only do this for a limited set of ioctls
761  and only if the original address family was AF_INET.
762  This is checked above. */
763  for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
764  ifap = &ifa->ifa_next) {
765  if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
766  sin_orig.sin_addr.s_addr ==
767  ifa->ifa_local) {
768  break; /* found */
769  }
770  }
771  }
772  /* we didn't get a match, maybe the application is
773  4.3BSD-style and passed in junk so we fall back to
774  comparing just the label */
775  if (!ifa) {
776  for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
777  ifap = &ifa->ifa_next)
778  if (!strcmp(ifr.ifr_name, ifa->ifa_label))
779  break;
780  }
781  }
782 
783  ret = -EADDRNOTAVAIL;
784  if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
785  goto done;
786 
787  switch (cmd) {
788  case SIOCGIFADDR: /* Get interface address */
789  sin->sin_addr.s_addr = ifa->ifa_local;
790  goto rarok;
791 
792  case SIOCGIFBRDADDR: /* Get the broadcast address */
793  sin->sin_addr.s_addr = ifa->ifa_broadcast;
794  goto rarok;
795 
796  case SIOCGIFDSTADDR: /* Get the destination address */
797  sin->sin_addr.s_addr = ifa->ifa_address;
798  goto rarok;
799 
800  case SIOCGIFNETMASK: /* Get the netmask for the interface */
801  sin->sin_addr.s_addr = ifa->ifa_mask;
802  goto rarok;
803 
804  case SIOCSIFFLAGS:
805  if (colon) {
806  ret = -EADDRNOTAVAIL;
807  if (!ifa)
808  break;
809  ret = 0;
810  if (!(ifr.ifr_flags & IFF_UP))
811  inet_del_ifa(in_dev, ifap, 1);
812  break;
813  }
814  ret = dev_change_flags(dev, ifr.ifr_flags);
815  break;
816 
817  case SIOCSIFADDR: /* Set interface address (and family) */
818  ret = -EINVAL;
819  if (inet_abc_len(sin->sin_addr.s_addr) < 0)
820  break;
821 
822  if (!ifa) {
823  ret = -ENOBUFS;
824  ifa = inet_alloc_ifa();
825  INIT_HLIST_NODE(&ifa->hash);
826  if (!ifa)
827  break;
828  if (colon)
829  memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
830  else
831  memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
832  } else {
833  ret = 0;
834  if (ifa->ifa_local == sin->sin_addr.s_addr)
835  break;
836  inet_del_ifa(in_dev, ifap, 0);
837  ifa->ifa_broadcast = 0;
838  ifa->ifa_scope = 0;
839  }
840 
841  ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
842 
843  if (!(dev->flags & IFF_POINTOPOINT)) {
844  ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
845  ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
846  if ((dev->flags & IFF_BROADCAST) &&
847  ifa->ifa_prefixlen < 31)
848  ifa->ifa_broadcast = ifa->ifa_address |
849  ~ifa->ifa_mask;
850  } else {
851  ifa->ifa_prefixlen = 32;
852  ifa->ifa_mask = inet_make_mask(32);
853  }
854  ret = inet_set_ifa(dev, ifa);
855  break;
856 
857  case SIOCSIFBRDADDR: /* Set the broadcast address */
858  ret = 0;
859  if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
860  inet_del_ifa(in_dev, ifap, 0);
861  ifa->ifa_broadcast = sin->sin_addr.s_addr;
862  inet_insert_ifa(ifa);
863  }
864  break;
865 
866  case SIOCSIFDSTADDR: /* Set the destination address */
867  ret = 0;
868  if (ifa->ifa_address == sin->sin_addr.s_addr)
869  break;
870  ret = -EINVAL;
871  if (inet_abc_len(sin->sin_addr.s_addr) < 0)
872  break;
873  ret = 0;
874  inet_del_ifa(in_dev, ifap, 0);
875  ifa->ifa_address = sin->sin_addr.s_addr;
876  inet_insert_ifa(ifa);
877  break;
878 
879  case SIOCSIFNETMASK: /* Set the netmask for the interface */
880 
881  /*
882  * The mask we set must be legal.
883  */
884  ret = -EINVAL;
885  if (bad_mask(sin->sin_addr.s_addr, 0))
886  break;
887  ret = 0;
888  if (ifa->ifa_mask != sin->sin_addr.s_addr) {
889  __be32 old_mask = ifa->ifa_mask;
890  inet_del_ifa(in_dev, ifap, 0);
891  ifa->ifa_mask = sin->sin_addr.s_addr;
892  ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
893 
894  /* See if current broadcast address matches
895  * with current netmask, then recalculate
896  * the broadcast address. Otherwise it's a
897  * funny address, so don't touch it since
898  * the user seems to know what (s)he's doing...
899  */
900  if ((dev->flags & IFF_BROADCAST) &&
901  (ifa->ifa_prefixlen < 31) &&
902  (ifa->ifa_broadcast ==
903  (ifa->ifa_local|~old_mask))) {
904  ifa->ifa_broadcast = (ifa->ifa_local |
905  ~sin->sin_addr.s_addr);
906  }
907  inet_insert_ifa(ifa);
908  }
909  break;
910  }
911 done:
912  rtnl_unlock();
913 out:
914  return ret;
915 rarok:
916  rtnl_unlock();
917  ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
918  goto out;
919 }
920 
921 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
922 {
923  struct in_device *in_dev = __in_dev_get_rtnl(dev);
924  struct in_ifaddr *ifa;
925  struct ifreq ifr;
926  int done = 0;
927 
928  if (!in_dev)
929  goto out;
930 
931  for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
932  if (!buf) {
933  done += sizeof(ifr);
934  continue;
935  }
936  if (len < (int) sizeof(ifr))
937  break;
938  memset(&ifr, 0, sizeof(struct ifreq));
939  if (ifa->ifa_label)
940  strcpy(ifr.ifr_name, ifa->ifa_label);
941  else
942  strcpy(ifr.ifr_name, dev->name);
943 
944  (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
945  (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
946  ifa->ifa_local;
947 
948  if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
949  done = -EFAULT;
950  break;
951  }
952  buf += sizeof(struct ifreq);
953  len -= sizeof(struct ifreq);
954  done += sizeof(struct ifreq);
955  }
956 out:
957  return done;
958 }
959 
961 {
962  __be32 addr = 0;
963  struct in_device *in_dev;
964  struct net *net = dev_net(dev);
965 
966  rcu_read_lock();
967  in_dev = __in_dev_get_rcu(dev);
968  if (!in_dev)
969  goto no_in_dev;
970 
971  for_primary_ifa(in_dev) {
972  if (ifa->ifa_scope > scope)
973  continue;
974  if (!dst || inet_ifa_match(dst, ifa)) {
975  addr = ifa->ifa_local;
976  break;
977  }
978  if (!addr)
979  addr = ifa->ifa_local;
980  } endfor_ifa(in_dev);
981 
982  if (addr)
983  goto out_unlock;
984 no_in_dev:
985 
986  /* Not loopback addresses on loopback should be preferred
987  in this case. It is importnat that lo is the first interface
988  in dev_base list.
989  */
990  for_each_netdev_rcu(net, dev) {
991  in_dev = __in_dev_get_rcu(dev);
992  if (!in_dev)
993  continue;
994 
995  for_primary_ifa(in_dev) {
996  if (ifa->ifa_scope != RT_SCOPE_LINK &&
997  ifa->ifa_scope <= scope) {
998  addr = ifa->ifa_local;
999  goto out_unlock;
1000  }
1001  } endfor_ifa(in_dev);
1002  }
1003 out_unlock:
1004  rcu_read_unlock();
1005  return addr;
1006 }
1008 
1009 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1010  __be32 local, int scope)
1011 {
1012  int same = 0;
1013  __be32 addr = 0;
1014 
1015  for_ifa(in_dev) {
1016  if (!addr &&
1017  (local == ifa->ifa_local || !local) &&
1018  ifa->ifa_scope <= scope) {
1019  addr = ifa->ifa_local;
1020  if (same)
1021  break;
1022  }
1023  if (!same) {
1024  same = (!local || inet_ifa_match(local, ifa)) &&
1025  (!dst || inet_ifa_match(dst, ifa));
1026  if (same && addr) {
1027  if (local || !dst)
1028  break;
1029  /* Is the selected addr into dst subnet? */
1030  if (inet_ifa_match(addr, ifa))
1031  break;
1032  /* No, then can we use new local src? */
1033  if (ifa->ifa_scope <= scope) {
1034  addr = ifa->ifa_local;
1035  break;
1036  }
1037  /* search for large dst subnet for addr */
1038  same = 0;
1039  }
1040  }
1041  } endfor_ifa(in_dev);
1042 
1043  return same ? addr : 0;
1044 }
1045 
1046 /*
1047  * Confirm that local IP address exists using wildcards:
1048  * - in_dev: only on this interface, 0=any interface
1049  * - dst: only in the same subnet as dst, 0=any dst
1050  * - local: address, 0=autoselect the local address
1051  * - scope: maximum allowed scope value for the local address
1052  */
1053 __be32 inet_confirm_addr(struct in_device *in_dev,
1054  __be32 dst, __be32 local, int scope)
1055 {
1056  __be32 addr = 0;
1057  struct net_device *dev;
1058  struct net *net;
1059 
1060  if (scope != RT_SCOPE_LINK)
1061  return confirm_addr_indev(in_dev, dst, local, scope);
1062 
1063  net = dev_net(in_dev->dev);
1064  rcu_read_lock();
1065  for_each_netdev_rcu(net, dev) {
1066  in_dev = __in_dev_get_rcu(dev);
1067  if (in_dev) {
1068  addr = confirm_addr_indev(in_dev, dst, local, scope);
1069  if (addr)
1070  break;
1071  }
1072  }
1073  rcu_read_unlock();
1074 
1075  return addr;
1076 }
1078 
1079 /*
1080  * Device notifier
1081  */
1082 
1084 {
1085  return blocking_notifier_chain_register(&inetaddr_chain, nb);
1086 }
1088 
1090 {
1091  return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1092 }
1094 
1095 /* Rename ifa_labels for a device name change. Make some effort to preserve
1096  * existing alias numbering and to create unique labels if possible.
1097 */
1098 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1099 {
1100  struct in_ifaddr *ifa;
1101  int named = 0;
1102 
1103  for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1104  char old[IFNAMSIZ], *dot;
1105 
1106  memcpy(old, ifa->ifa_label, IFNAMSIZ);
1107  memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1108  if (named++ == 0)
1109  goto skip;
1110  dot = strchr(old, ':');
1111  if (dot == NULL) {
1112  sprintf(old, ":%d", named);
1113  dot = old;
1114  }
1115  if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1116  strcat(ifa->ifa_label, dot);
1117  else
1118  strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1119 skip:
1120  rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1121  }
1122 }
1123 
1124 static bool inetdev_valid_mtu(unsigned int mtu)
1125 {
1126  return mtu >= 68;
1127 }
1128 
1129 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1130  struct in_device *in_dev)
1131 
1132 {
1133  struct in_ifaddr *ifa;
1134 
1135  for (ifa = in_dev->ifa_list; ifa;
1136  ifa = ifa->ifa_next) {
1138  ifa->ifa_local, dev,
1139  ifa->ifa_local, NULL,
1140  dev->dev_addr, NULL);
1141  }
1142 }
1143 
1144 /* Called only under RTNL semaphore */
1145 
1146 static int inetdev_event(struct notifier_block *this, unsigned long event,
1147  void *ptr)
1148 {
1149  struct net_device *dev = ptr;
1150  struct in_device *in_dev = __in_dev_get_rtnl(dev);
1151 
1152  ASSERT_RTNL();
1153 
1154  if (!in_dev) {
1155  if (event == NETDEV_REGISTER) {
1156  in_dev = inetdev_init(dev);
1157  if (!in_dev)
1158  return notifier_from_errno(-ENOMEM);
1159  if (dev->flags & IFF_LOOPBACK) {
1160  IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1161  IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1162  }
1163  } else if (event == NETDEV_CHANGEMTU) {
1164  /* Re-enabling IP */
1165  if (inetdev_valid_mtu(dev->mtu))
1166  in_dev = inetdev_init(dev);
1167  }
1168  goto out;
1169  }
1170 
1171  switch (event) {
1172  case NETDEV_REGISTER:
1173  pr_debug("%s: bug\n", __func__);
1174  RCU_INIT_POINTER(dev->ip_ptr, NULL);
1175  break;
1176  case NETDEV_UP:
1177  if (!inetdev_valid_mtu(dev->mtu))
1178  break;
1179  if (dev->flags & IFF_LOOPBACK) {
1180  struct in_ifaddr *ifa = inet_alloc_ifa();
1181 
1182  if (ifa) {
1183  INIT_HLIST_NODE(&ifa->hash);
1184  ifa->ifa_local =
1185  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1186  ifa->ifa_prefixlen = 8;
1187  ifa->ifa_mask = inet_make_mask(8);
1188  in_dev_hold(in_dev);
1189  ifa->ifa_dev = in_dev;
1190  ifa->ifa_scope = RT_SCOPE_HOST;
1191  memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1192  inet_insert_ifa(ifa);
1193  }
1194  }
1195  ip_mc_up(in_dev);
1196  /* fall through */
1197  case NETDEV_CHANGEADDR:
1198  if (!IN_DEV_ARP_NOTIFY(in_dev))
1199  break;
1200  /* fall through */
1201  case NETDEV_NOTIFY_PEERS:
1202  /* Send gratuitous ARP to notify of link change */
1203  inetdev_send_gratuitous_arp(dev, in_dev);
1204  break;
1205  case NETDEV_DOWN:
1206  ip_mc_down(in_dev);
1207  break;
1209  ip_mc_unmap(in_dev);
1210  break;
1212  ip_mc_remap(in_dev);
1213  break;
1214  case NETDEV_CHANGEMTU:
1215  if (inetdev_valid_mtu(dev->mtu))
1216  break;
1217  /* disable IP when MTU is not enough */
1218  case NETDEV_UNREGISTER:
1219  inetdev_destroy(in_dev);
1220  break;
1221  case NETDEV_CHANGENAME:
1222  /* Do not notify about label change, this event is
1223  * not interesting to applications using netlink.
1224  */
1225  inetdev_changename(dev, in_dev);
1226 
1227  devinet_sysctl_unregister(in_dev);
1228  devinet_sysctl_register(in_dev);
1229  break;
1230  }
1231 out:
1232  return NOTIFY_DONE;
1233 }
1234 
1235 static struct notifier_block ip_netdev_notifier = {
1236  .notifier_call = inetdev_event,
1237 };
1238 
1239 static size_t inet_nlmsg_size(void)
1240 {
1241  return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1242  + nla_total_size(4) /* IFA_ADDRESS */
1243  + nla_total_size(4) /* IFA_LOCAL */
1244  + nla_total_size(4) /* IFA_BROADCAST */
1245  + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1246 }
1247 
1248 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1249  u32 portid, u32 seq, int event, unsigned int flags)
1250 {
1251  struct ifaddrmsg *ifm;
1252  struct nlmsghdr *nlh;
1253 
1254  nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1255  if (nlh == NULL)
1256  return -EMSGSIZE;
1257 
1258  ifm = nlmsg_data(nlh);
1259  ifm->ifa_family = AF_INET;
1260  ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1261  ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1262  ifm->ifa_scope = ifa->ifa_scope;
1263  ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1264 
1265  if ((ifa->ifa_address &&
1266  nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1267  (ifa->ifa_local &&
1268  nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1269  (ifa->ifa_broadcast &&
1270  nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1271  (ifa->ifa_label[0] &&
1272  nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1273  goto nla_put_failure;
1274 
1275  return nlmsg_end(skb, nlh);
1276 
1277 nla_put_failure:
1278  nlmsg_cancel(skb, nlh);
1279  return -EMSGSIZE;
1280 }
1281 
1282 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1283 {
1284  struct net *net = sock_net(skb->sk);
1285  int h, s_h;
1286  int idx, s_idx;
1287  int ip_idx, s_ip_idx;
1288  struct net_device *dev;
1289  struct in_device *in_dev;
1290  struct in_ifaddr *ifa;
1291  struct hlist_head *head;
1292  struct hlist_node *node;
1293 
1294  s_h = cb->args[0];
1295  s_idx = idx = cb->args[1];
1296  s_ip_idx = ip_idx = cb->args[2];
1297 
1298  for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1299  idx = 0;
1300  head = &net->dev_index_head[h];
1301  rcu_read_lock();
1302  hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1303  if (idx < s_idx)
1304  goto cont;
1305  if (h > s_h || idx > s_idx)
1306  s_ip_idx = 0;
1307  in_dev = __in_dev_get_rcu(dev);
1308  if (!in_dev)
1309  goto cont;
1310 
1311  for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1312  ifa = ifa->ifa_next, ip_idx++) {
1313  if (ip_idx < s_ip_idx)
1314  continue;
1315  if (inet_fill_ifaddr(skb, ifa,
1316  NETLINK_CB(cb->skb).portid,
1317  cb->nlh->nlmsg_seq,
1318  RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1319  rcu_read_unlock();
1320  goto done;
1321  }
1322  }
1323 cont:
1324  idx++;
1325  }
1326  rcu_read_unlock();
1327  }
1328 
1329 done:
1330  cb->args[0] = h;
1331  cb->args[1] = idx;
1332  cb->args[2] = ip_idx;
1333 
1334  return skb->len;
1335 }
1336 
1337 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1338  u32 portid)
1339 {
1340  struct sk_buff *skb;
1341  u32 seq = nlh ? nlh->nlmsg_seq : 0;
1342  int err = -ENOBUFS;
1343  struct net *net;
1344 
1345  net = dev_net(ifa->ifa_dev->dev);
1346  skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1347  if (skb == NULL)
1348  goto errout;
1349 
1350  err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1351  if (err < 0) {
1352  /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1353  WARN_ON(err == -EMSGSIZE);
1354  kfree_skb(skb);
1355  goto errout;
1356  }
1357  rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1358  return;
1359 errout:
1360  if (err < 0)
1362 }
1363 
1364 static size_t inet_get_link_af_size(const struct net_device *dev)
1365 {
1366  struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1367 
1368  if (!in_dev)
1369  return 0;
1370 
1371  return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1372 }
1373 
1374 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1375 {
1376  struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1377  struct nlattr *nla;
1378  int i;
1379 
1380  if (!in_dev)
1381  return -ENODATA;
1382 
1383  nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1384  if (nla == NULL)
1385  return -EMSGSIZE;
1386 
1387  for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1388  ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1389 
1390  return 0;
1391 }
1392 
1393 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1394  [IFLA_INET_CONF] = { .type = NLA_NESTED },
1395 };
1396 
1397 static int inet_validate_link_af(const struct net_device *dev,
1398  const struct nlattr *nla)
1399 {
1400  struct nlattr *a, *tb[IFLA_INET_MAX+1];
1401  int err, rem;
1402 
1403  if (dev && !__in_dev_get_rtnl(dev))
1404  return -EAFNOSUPPORT;
1405 
1406  err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1407  if (err < 0)
1408  return err;
1409 
1410  if (tb[IFLA_INET_CONF]) {
1411  nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1412  int cfgid = nla_type(a);
1413 
1414  if (nla_len(a) < 4)
1415  return -EINVAL;
1416 
1417  if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1418  return -EINVAL;
1419  }
1420  }
1421 
1422  return 0;
1423 }
1424 
1425 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1426 {
1427  struct in_device *in_dev = __in_dev_get_rtnl(dev);
1428  struct nlattr *a, *tb[IFLA_INET_MAX+1];
1429  int rem;
1430 
1431  if (!in_dev)
1432  return -EAFNOSUPPORT;
1433 
1434  if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1435  BUG();
1436 
1437  if (tb[IFLA_INET_CONF]) {
1438  nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1439  ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1440  }
1441 
1442  return 0;
1443 }
1444 
1445 #ifdef CONFIG_SYSCTL
1446 
1447 static void devinet_copy_dflt_conf(struct net *net, int i)
1448 {
1449  struct net_device *dev;
1450 
1451  rcu_read_lock();
1452  for_each_netdev_rcu(net, dev) {
1453  struct in_device *in_dev;
1454 
1455  in_dev = __in_dev_get_rcu(dev);
1456  if (in_dev && !test_bit(i, in_dev->cnf.state))
1457  in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1458  }
1459  rcu_read_unlock();
1460 }
1461 
1462 /* called with RTNL locked */
1463 static void inet_forward_change(struct net *net)
1464 {
1465  struct net_device *dev;
1466  int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1467 
1468  IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1469  IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1470 
1471  for_each_netdev(net, dev) {
1472  struct in_device *in_dev;
1473  if (on)
1474  dev_disable_lro(dev);
1475  rcu_read_lock();
1476  in_dev = __in_dev_get_rcu(dev);
1477  if (in_dev)
1478  IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1479  rcu_read_unlock();
1480  }
1481 }
1482 
1483 static int devinet_conf_proc(ctl_table *ctl, int write,
1484  void __user *buffer,
1485  size_t *lenp, loff_t *ppos)
1486 {
1487  int old_value = *(int *)ctl->data;
1488  int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1489  int new_value = *(int *)ctl->data;
1490 
1491  if (write) {
1492  struct ipv4_devconf *cnf = ctl->extra1;
1493  struct net *net = ctl->extra2;
1494  int i = (int *)ctl->data - cnf->data;
1495 
1496  set_bit(i, cnf->state);
1497 
1498  if (cnf == net->ipv4.devconf_dflt)
1499  devinet_copy_dflt_conf(net, i);
1500  if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1501  i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1502  if ((new_value == 0) && (old_value != 0))
1503  rt_cache_flush(net);
1504  }
1505 
1506  return ret;
1507 }
1508 
1509 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1510  void __user *buffer,
1511  size_t *lenp, loff_t *ppos)
1512 {
1513  int *valp = ctl->data;
1514  int val = *valp;
1515  loff_t pos = *ppos;
1516  int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1517 
1518  if (write && *valp != val) {
1519  struct net *net = ctl->extra2;
1520 
1521  if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1522  if (!rtnl_trylock()) {
1523  /* Restore the original values before restarting */
1524  *valp = val;
1525  *ppos = pos;
1526  return restart_syscall();
1527  }
1528  if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1529  inet_forward_change(net);
1530  } else if (*valp) {
1531  struct ipv4_devconf *cnf = ctl->extra1;
1532  struct in_device *idev =
1533  container_of(cnf, struct in_device, cnf);
1534  dev_disable_lro(idev->dev);
1535  }
1536  rtnl_unlock();
1537  rt_cache_flush(net);
1538  }
1539  }
1540 
1541  return ret;
1542 }
1543 
1544 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1545  void __user *buffer,
1546  size_t *lenp, loff_t *ppos)
1547 {
1548  int *valp = ctl->data;
1549  int val = *valp;
1550  int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1551  struct net *net = ctl->extra2;
1552 
1553  if (write && *valp != val)
1554  rt_cache_flush(net);
1555 
1556  return ret;
1557 }
1558 
1559 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1560  { \
1561  .procname = name, \
1562  .data = ipv4_devconf.data + \
1563  IPV4_DEVCONF_ ## attr - 1, \
1564  .maxlen = sizeof(int), \
1565  .mode = mval, \
1566  .proc_handler = proc, \
1567  .extra1 = &ipv4_devconf, \
1568  }
1569 
1570 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1571  DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1572 
1573 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1574  DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1575 
1576 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1577  DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1578 
1579 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1580  DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1581 
1582 static struct devinet_sysctl_table {
1583  struct ctl_table_header *sysctl_header;
1584  struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1585 } devinet_sysctl = {
1586  .devinet_vars = {
1587  DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1588  devinet_sysctl_forward),
1589  DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1590 
1591  DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1592  DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1593  DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1594  DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1595  DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1596  DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1597  "accept_source_route"),
1598  DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1599  DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1600  DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1601  DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1602  DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1603  DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1604  DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1605  DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1606  DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1607  DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1608  DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1609  DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1610  DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1611 
1612  DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1613  DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1614  DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1615  "force_igmp_version"),
1616  DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1617  "promote_secondaries"),
1618  DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1619  "route_localnet"),
1620  },
1621 };
1622 
1623 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1624  struct ipv4_devconf *p)
1625 {
1626  int i;
1627  struct devinet_sysctl_table *t;
1628  char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1629 
1630  t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1631  if (!t)
1632  goto out;
1633 
1634  for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1635  t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1636  t->devinet_vars[i].extra1 = p;
1637  t->devinet_vars[i].extra2 = net;
1638  }
1639 
1640  snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1641 
1642  t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1643  if (!t->sysctl_header)
1644  goto free;
1645 
1646  p->sysctl = t;
1647  return 0;
1648 
1649 free:
1650  kfree(t);
1651 out:
1652  return -ENOBUFS;
1653 }
1654 
1655 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1656 {
1657  struct devinet_sysctl_table *t = cnf->sysctl;
1658 
1659  if (t == NULL)
1660  return;
1661 
1662  cnf->sysctl = NULL;
1663  unregister_net_sysctl_table(t->sysctl_header);
1664  kfree(t);
1665 }
1666 
1667 static void devinet_sysctl_register(struct in_device *idev)
1668 {
1669  neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1670  __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1671  &idev->cnf);
1672 }
1673 
1674 static void devinet_sysctl_unregister(struct in_device *idev)
1675 {
1676  __devinet_sysctl_unregister(&idev->cnf);
1677  neigh_sysctl_unregister(idev->arp_parms);
1678 }
1679 
1680 static struct ctl_table ctl_forward_entry[] = {
1681  {
1682  .procname = "ip_forward",
1683  .data = &ipv4_devconf.data[
1684  IPV4_DEVCONF_FORWARDING - 1],
1685  .maxlen = sizeof(int),
1686  .mode = 0644,
1687  .proc_handler = devinet_sysctl_forward,
1688  .extra1 = &ipv4_devconf,
1689  .extra2 = &init_net,
1690  },
1691  { },
1692 };
1693 #endif
1694 
1695 static __net_init int devinet_init_net(struct net *net)
1696 {
1697  int err;
1698  struct ipv4_devconf *all, *dflt;
1699 #ifdef CONFIG_SYSCTL
1700  struct ctl_table *tbl = ctl_forward_entry;
1701  struct ctl_table_header *forw_hdr;
1702 #endif
1703 
1704  err = -ENOMEM;
1705  all = &ipv4_devconf;
1706  dflt = &ipv4_devconf_dflt;
1707 
1708  if (!net_eq(net, &init_net)) {
1709  all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1710  if (all == NULL)
1711  goto err_alloc_all;
1712 
1713  dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1714  if (dflt == NULL)
1715  goto err_alloc_dflt;
1716 
1717 #ifdef CONFIG_SYSCTL
1718  tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1719  if (tbl == NULL)
1720  goto err_alloc_ctl;
1721 
1722  tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1723  tbl[0].extra1 = all;
1724  tbl[0].extra2 = net;
1725 #endif
1726  }
1727 
1728 #ifdef CONFIG_SYSCTL
1729  err = __devinet_sysctl_register(net, "all", all);
1730  if (err < 0)
1731  goto err_reg_all;
1732 
1733  err = __devinet_sysctl_register(net, "default", dflt);
1734  if (err < 0)
1735  goto err_reg_dflt;
1736 
1737  err = -ENOMEM;
1738  forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1739  if (forw_hdr == NULL)
1740  goto err_reg_ctl;
1741  net->ipv4.forw_hdr = forw_hdr;
1742 #endif
1743 
1744  net->ipv4.devconf_all = all;
1745  net->ipv4.devconf_dflt = dflt;
1746  return 0;
1747 
1748 #ifdef CONFIG_SYSCTL
1749 err_reg_ctl:
1750  __devinet_sysctl_unregister(dflt);
1751 err_reg_dflt:
1752  __devinet_sysctl_unregister(all);
1753 err_reg_all:
1754  if (tbl != ctl_forward_entry)
1755  kfree(tbl);
1756 err_alloc_ctl:
1757 #endif
1758  if (dflt != &ipv4_devconf_dflt)
1759  kfree(dflt);
1760 err_alloc_dflt:
1761  if (all != &ipv4_devconf)
1762  kfree(all);
1763 err_alloc_all:
1764  return err;
1765 }
1766 
1767 static __net_exit void devinet_exit_net(struct net *net)
1768 {
1769 #ifdef CONFIG_SYSCTL
1770  struct ctl_table *tbl;
1771 
1772  tbl = net->ipv4.forw_hdr->ctl_table_arg;
1773  unregister_net_sysctl_table(net->ipv4.forw_hdr);
1774  __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1775  __devinet_sysctl_unregister(net->ipv4.devconf_all);
1776  kfree(tbl);
1777 #endif
1778  kfree(net->ipv4.devconf_dflt);
1779  kfree(net->ipv4.devconf_all);
1780 }
1781 
1782 static __net_initdata struct pernet_operations devinet_ops = {
1783  .init = devinet_init_net,
1784  .exit = devinet_exit_net,
1785 };
1786 
1787 static struct rtnl_af_ops inet_af_ops = {
1788  .family = AF_INET,
1789  .fill_link_af = inet_fill_link_af,
1790  .get_link_af_size = inet_get_link_af_size,
1791  .validate_link_af = inet_validate_link_af,
1792  .set_link_af = inet_set_link_af,
1793 };
1794 
1796 {
1797  int i;
1798 
1799  for (i = 0; i < IN4_ADDR_HSIZE; i++)
1800  INIT_HLIST_HEAD(&inet_addr_lst[i]);
1801 
1802  register_pernet_subsys(&devinet_ops);
1803 
1804  register_gifconf(PF_INET, inet_gifconf);
1805  register_netdevice_notifier(&ip_netdev_notifier);
1806 
1807  rtnl_af_register(&inet_af_ops);
1808 
1809  rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1810  rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1811  rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1812 }
1813