Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
af_netlink.c
Go to the documentation of this file.
1 /*
2  * NETLINK Kernel-user communication protocol.
3  *
4  * Authors: Alan Cox <[email protected]>
5  * Alexey Kuznetsov <[email protected]>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  *
12  * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
13  * added netlink_proto_exit
14  * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <[email protected]>
15  * use nlk_sk, as sk->protinfo is on a diet 8)
16  * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <[email protected]>
17  * - inc module use count of module that owns
18  * the kernel socket in case userspace opens
19  * socket of same protocol
20  * - remove all module support, since netlink is
21  * mandatory if CONFIG_NET=y these days
22  */
23 
24 #include <linux/module.h>
25 
26 #include <linux/capability.h>
27 #include <linux/kernel.h>
28 #include <linux/init.h>
29 #include <linux/signal.h>
30 #include <linux/sched.h>
31 #include <linux/errno.h>
32 #include <linux/string.h>
33 #include <linux/stat.h>
34 #include <linux/socket.h>
35 #include <linux/un.h>
36 #include <linux/fcntl.h>
37 #include <linux/termios.h>
38 #include <linux/sockios.h>
39 #include <linux/net.h>
40 #include <linux/fs.h>
41 #include <linux/slab.h>
42 #include <asm/uaccess.h>
43 #include <linux/skbuff.h>
44 #include <linux/netdevice.h>
45 #include <linux/rtnetlink.h>
46 #include <linux/proc_fs.h>
47 #include <linux/seq_file.h>
48 #include <linux/notifier.h>
49 #include <linux/security.h>
50 #include <linux/jhash.h>
51 #include <linux/jiffies.h>
52 #include <linux/random.h>
53 #include <linux/bitops.h>
54 #include <linux/mm.h>
55 #include <linux/types.h>
56 #include <linux/audit.h>
57 #include <linux/mutex.h>
58 
59 #include <net/net_namespace.h>
60 #include <net/sock.h>
61 #include <net/scm.h>
62 #include <net/netlink.h>
63 
64 #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
65 #define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
66 
67 struct netlink_sock {
68  /* struct sock has to be the first member of netlink_sock */
69  struct sock sk;
76  unsigned long *groups;
77  unsigned long state;
80  struct mutex *cb_mutex;
82  void (*netlink_rcv)(struct sk_buff *skb);
84  struct module *module;
85 };
86 
87 struct listeners {
88  struct rcu_head rcu;
89  unsigned long masks[0];
90 };
91 
92 #define NETLINK_KERNEL_SOCKET 0x1
93 #define NETLINK_RECV_PKTINFO 0x2
94 #define NETLINK_BROADCAST_SEND_ERROR 0x4
95 #define NETLINK_RECV_NO_ENOBUFS 0x8
96 
97 static inline struct netlink_sock *nlk_sk(struct sock *sk)
98 {
99  return container_of(sk, struct netlink_sock, sk);
100 }
101 
102 static inline int netlink_is_kernel(struct sock *sk)
103 {
104  return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
105 }
106 
108  struct hlist_head *table;
109  unsigned long rehash_time;
110 
111  unsigned int mask;
112  unsigned int shift;
113 
114  unsigned int entries;
115  unsigned int max_shift;
116 
118 };
119 
124  unsigned int flags;
125  unsigned int groups;
126  struct mutex *cb_mutex;
127  struct module *module;
128  void (*bind)(int group);
130 };
131 
132 static struct netlink_table *nl_table;
133 
134 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
135 
136 static int netlink_dump(struct sock *sk);
137 
138 static DEFINE_RWLOCK(nl_table_lock);
139 static atomic_t nl_table_users = ATOMIC_INIT(0);
140 
141 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
142 
143 static ATOMIC_NOTIFIER_HEAD(netlink_chain);
144 
145 static inline u32 netlink_group_mask(u32 group)
146 {
147  return group ? 1 << (group - 1) : 0;
148 }
149 
150 static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid)
151 {
152  return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
153 }
154 
155 static void netlink_destroy_callback(struct netlink_callback *cb)
156 {
157  kfree_skb(cb->skb);
158  kfree(cb);
159 }
160 
161 static void netlink_consume_callback(struct netlink_callback *cb)
162 {
163  consume_skb(cb->skb);
164  kfree(cb);
165 }
166 
167 static void netlink_sock_destruct(struct sock *sk)
168 {
169  struct netlink_sock *nlk = nlk_sk(sk);
170 
171  if (nlk->cb) {
172  if (nlk->cb->done)
173  nlk->cb->done(nlk->cb);
174 
175  module_put(nlk->cb->module);
176  netlink_destroy_callback(nlk->cb);
177  }
178 
180 
181  if (!sock_flag(sk, SOCK_DEAD)) {
182  printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
183  return;
184  }
185 
186  WARN_ON(atomic_read(&sk->sk_rmem_alloc));
188  WARN_ON(nlk_sk(sk)->groups);
189 }
190 
191 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
192  * SMP. Look, when several writers sleep and reader wakes them up, all but one
193  * immediately hit write lock and grab all the cpus. Exclusive sleep solves
194  * this, _but_ remember, it adds useless work on UP machines.
195  */
196 
198  __acquires(nl_table_lock)
199 {
200  might_sleep();
201 
202  write_lock_irq(&nl_table_lock);
203 
204  if (atomic_read(&nl_table_users)) {
206 
207  add_wait_queue_exclusive(&nl_table_wait, &wait);
208  for (;;) {
210  if (atomic_read(&nl_table_users) == 0)
211  break;
212  write_unlock_irq(&nl_table_lock);
213  schedule();
214  write_lock_irq(&nl_table_lock);
215  }
216 
218  remove_wait_queue(&nl_table_wait, &wait);
219  }
220 }
221 
223  __releases(nl_table_lock)
224 {
225  write_unlock_irq(&nl_table_lock);
226  wake_up(&nl_table_wait);
227 }
228 
229 static inline void
230 netlink_lock_table(void)
231 {
232  /* read_lock() synchronizes us to netlink_table_grab */
233 
234  read_lock(&nl_table_lock);
235  atomic_inc(&nl_table_users);
236  read_unlock(&nl_table_lock);
237 }
238 
239 static inline void
240 netlink_unlock_table(void)
241 {
242  if (atomic_dec_and_test(&nl_table_users))
243  wake_up(&nl_table_wait);
244 }
245 
246 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
247 {
248  struct nl_portid_hash *hash = &nl_table[protocol].hash;
249  struct hlist_head *head;
250  struct sock *sk;
251  struct hlist_node *node;
252 
253  read_lock(&nl_table_lock);
254  head = nl_portid_hashfn(hash, portid);
255  sk_for_each(sk, node, head) {
256  if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) {
257  sock_hold(sk);
258  goto found;
259  }
260  }
261  sk = NULL;
262 found:
263  read_unlock(&nl_table_lock);
264  return sk;
265 }
266 
267 static struct hlist_head *nl_portid_hash_zalloc(size_t size)
268 {
269  if (size <= PAGE_SIZE)
270  return kzalloc(size, GFP_ATOMIC);
271  else
272  return (struct hlist_head *)
274  get_order(size));
275 }
276 
277 static void nl_portid_hash_free(struct hlist_head *table, size_t size)
278 {
279  if (size <= PAGE_SIZE)
280  kfree(table);
281  else
282  free_pages((unsigned long)table, get_order(size));
283 }
284 
285 static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
286 {
287  unsigned int omask, mask, shift;
288  size_t osize, size;
289  struct hlist_head *otable, *table;
290  int i;
291 
292  omask = mask = hash->mask;
293  osize = size = (mask + 1) * sizeof(*table);
294  shift = hash->shift;
295 
296  if (grow) {
297  if (++shift > hash->max_shift)
298  return 0;
299  mask = mask * 2 + 1;
300  size *= 2;
301  }
302 
303  table = nl_portid_hash_zalloc(size);
304  if (!table)
305  return 0;
306 
307  otable = hash->table;
308  hash->table = table;
309  hash->mask = mask;
310  hash->shift = shift;
311  get_random_bytes(&hash->rnd, sizeof(hash->rnd));
312 
313  for (i = 0; i <= omask; i++) {
314  struct sock *sk;
315  struct hlist_node *node, *tmp;
316 
317  sk_for_each_safe(sk, node, tmp, &otable[i])
318  __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
319  }
320 
321  nl_portid_hash_free(otable, osize);
322  hash->rehash_time = jiffies + 10 * 60 * HZ;
323  return 1;
324 }
325 
326 static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len)
327 {
328  int avg = hash->entries >> hash->shift;
329 
330  if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1))
331  return 1;
332 
333  if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
334  nl_portid_hash_rehash(hash, 0);
335  return 1;
336  }
337 
338  return 0;
339 }
340 
341 static const struct proto_ops netlink_ops;
342 
343 static void
344 netlink_update_listeners(struct sock *sk)
345 {
346  struct netlink_table *tbl = &nl_table[sk->sk_protocol];
347  struct hlist_node *node;
348  unsigned long mask;
349  unsigned int i;
350  struct listeners *listeners;
351 
352  listeners = nl_deref_protected(tbl->listeners);
353  if (!listeners)
354  return;
355 
356  for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
357  mask = 0;
358  sk_for_each_bound(sk, node, &tbl->mc_list) {
359  if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
360  mask |= nlk_sk(sk)->groups[i];
361  }
362  listeners->masks[i] = mask;
363  }
364  /* this function is only called with the netlink table "grabbed", which
365  * makes sure updates are visible before bind or setsockopt return. */
366 }
367 
368 static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
369 {
370  struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
371  struct hlist_head *head;
372  int err = -EADDRINUSE;
373  struct sock *osk;
374  struct hlist_node *node;
375  int len;
376 
378  head = nl_portid_hashfn(hash, portid);
379  len = 0;
380  sk_for_each(osk, node, head) {
381  if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid))
382  break;
383  len++;
384  }
385  if (node)
386  goto err;
387 
388  err = -EBUSY;
389  if (nlk_sk(sk)->portid)
390  goto err;
391 
392  err = -ENOMEM;
393  if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
394  goto err;
395 
396  if (len && nl_portid_hash_dilute(hash, len))
397  head = nl_portid_hashfn(hash, portid);
398  hash->entries++;
399  nlk_sk(sk)->portid = portid;
400  sk_add_node(sk, head);
401  err = 0;
402 
403 err:
405  return err;
406 }
407 
408 static void netlink_remove(struct sock *sk)
409 {
411  if (sk_del_node_init(sk))
412  nl_table[sk->sk_protocol].hash.entries--;
413  if (nlk_sk(sk)->subscriptions)
414  __sk_del_bind_node(sk);
416 }
417 
418 static struct proto netlink_proto = {
419  .name = "NETLINK",
420  .owner = THIS_MODULE,
421  .obj_size = sizeof(struct netlink_sock),
422 };
423 
424 static int __netlink_create(struct net *net, struct socket *sock,
425  struct mutex *cb_mutex, int protocol)
426 {
427  struct sock *sk;
428  struct netlink_sock *nlk;
429 
430  sock->ops = &netlink_ops;
431 
432  sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
433  if (!sk)
434  return -ENOMEM;
435 
436  sock_init_data(sock, sk);
437 
438  nlk = nlk_sk(sk);
439  if (cb_mutex) {
440  nlk->cb_mutex = cb_mutex;
441  } else {
442  nlk->cb_mutex = &nlk->cb_def_mutex;
443  mutex_init(nlk->cb_mutex);
444  }
445  init_waitqueue_head(&nlk->wait);
446 
447  sk->sk_destruct = netlink_sock_destruct;
448  sk->sk_protocol = protocol;
449  return 0;
450 }
451 
452 static int netlink_create(struct net *net, struct socket *sock, int protocol,
453  int kern)
454 {
455  struct module *module = NULL;
456  struct mutex *cb_mutex;
457  struct netlink_sock *nlk;
458  void (*bind)(int group);
459  int err = 0;
460 
461  sock->state = SS_UNCONNECTED;
462 
463  if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
464  return -ESOCKTNOSUPPORT;
465 
467  return -EPROTONOSUPPORT;
468 
469  netlink_lock_table();
470 #ifdef CONFIG_MODULES
471  if (!nl_table[protocol].registered) {
472  netlink_unlock_table();
473  request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
474  netlink_lock_table();
475  }
476 #endif
477  if (nl_table[protocol].registered &&
478  try_module_get(nl_table[protocol].module))
479  module = nl_table[protocol].module;
480  else
481  err = -EPROTONOSUPPORT;
482  cb_mutex = nl_table[protocol].cb_mutex;
483  bind = nl_table[protocol].bind;
484  netlink_unlock_table();
485 
486  if (err < 0)
487  goto out;
488 
489  err = __netlink_create(net, sock, cb_mutex, protocol);
490  if (err < 0)
491  goto out_module;
492 
494  sock_prot_inuse_add(net, &netlink_proto, 1);
495  local_bh_enable();
496 
497  nlk = nlk_sk(sock->sk);
498  nlk->module = module;
499  nlk->netlink_bind = bind;
500 out:
501  return err;
502 
503 out_module:
504  module_put(module);
505  goto out;
506 }
507 
508 static int netlink_release(struct socket *sock)
509 {
510  struct sock *sk = sock->sk;
511  struct netlink_sock *nlk;
512 
513  if (!sk)
514  return 0;
515 
516  netlink_remove(sk);
517  sock_orphan(sk);
518  nlk = nlk_sk(sk);
519 
520  /*
521  * OK. Socket is unlinked, any packets that arrive now
522  * will be purged.
523  */
524 
525  sock->sk = NULL;
527 
529 
530  if (nlk->portid) {
531  struct netlink_notify n = {
532  .net = sock_net(sk),
533  .protocol = sk->sk_protocol,
534  .portid = nlk->portid,
535  };
536  atomic_notifier_call_chain(&netlink_chain,
537  NETLINK_URELEASE, &n);
538  }
539 
540  module_put(nlk->module);
541 
543  if (netlink_is_kernel(sk)) {
544  BUG_ON(nl_table[sk->sk_protocol].registered == 0);
545  if (--nl_table[sk->sk_protocol].registered == 0) {
546  struct listeners *old;
547 
548  old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
549  RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
550  kfree_rcu(old, rcu);
551  nl_table[sk->sk_protocol].module = NULL;
552  nl_table[sk->sk_protocol].bind = NULL;
553  nl_table[sk->sk_protocol].flags = 0;
554  nl_table[sk->sk_protocol].registered = 0;
555  }
556  } else if (nlk->subscriptions) {
557  netlink_update_listeners(sk);
558  }
560 
561  kfree(nlk->groups);
562  nlk->groups = NULL;
563 
565  sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
566  local_bh_enable();
567  sock_put(sk);
568  return 0;
569 }
570 
571 static int netlink_autobind(struct socket *sock)
572 {
573  struct sock *sk = sock->sk;
574  struct net *net = sock_net(sk);
575  struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
576  struct hlist_head *head;
577  struct sock *osk;
578  struct hlist_node *node;
579  s32 portid = task_tgid_vnr(current);
580  int err;
581  static s32 rover = -4097;
582 
583 retry:
584  cond_resched();
586  head = nl_portid_hashfn(hash, portid);
587  sk_for_each(osk, node, head) {
588  if (!net_eq(sock_net(osk), net))
589  continue;
590  if (nlk_sk(osk)->portid == portid) {
591  /* Bind collision, search negative portid values. */
592  portid = rover--;
593  if (rover > -4097)
594  rover = -4097;
596  goto retry;
597  }
598  }
600 
601  err = netlink_insert(sk, net, portid);
602  if (err == -EADDRINUSE)
603  goto retry;
604 
605  /* If 2 threads race to autobind, that is fine. */
606  if (err == -EBUSY)
607  err = 0;
608 
609  return err;
610 }
611 
612 static inline int netlink_capable(const struct socket *sock, unsigned int flag)
613 {
614  return (nl_table[sock->sk->sk_protocol].flags & flag) ||
616 }
617 
618 static void
619 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
620 {
621  struct netlink_sock *nlk = nlk_sk(sk);
622 
623  if (nlk->subscriptions && !subscriptions)
624  __sk_del_bind_node(sk);
625  else if (!nlk->subscriptions && subscriptions)
626  sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
628 }
629 
630 static int netlink_realloc_groups(struct sock *sk)
631 {
632  struct netlink_sock *nlk = nlk_sk(sk);
633  unsigned int groups;
634  unsigned long *new_groups;
635  int err = 0;
636 
638 
639  groups = nl_table[sk->sk_protocol].groups;
640  if (!nl_table[sk->sk_protocol].registered) {
641  err = -ENOENT;
642  goto out_unlock;
643  }
644 
645  if (nlk->ngroups >= groups)
646  goto out_unlock;
647 
648  new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
649  if (new_groups == NULL) {
650  err = -ENOMEM;
651  goto out_unlock;
652  }
653  memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
654  NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
655 
656  nlk->groups = new_groups;
657  nlk->ngroups = groups;
658  out_unlock:
660  return err;
661 }
662 
663 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
664  int addr_len)
665 {
666  struct sock *sk = sock->sk;
667  struct net *net = sock_net(sk);
668  struct netlink_sock *nlk = nlk_sk(sk);
669  struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
670  int err;
671 
672  if (nladdr->nl_family != AF_NETLINK)
673  return -EINVAL;
674 
675  /* Only superuser is allowed to listen multicasts */
676  if (nladdr->nl_groups) {
677  if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
678  return -EPERM;
679  err = netlink_realloc_groups(sk);
680  if (err)
681  return err;
682  }
683 
684  if (nlk->portid) {
685  if (nladdr->nl_pid != nlk->portid)
686  return -EINVAL;
687  } else {
688  err = nladdr->nl_pid ?
689  netlink_insert(sk, net, nladdr->nl_pid) :
690  netlink_autobind(sock);
691  if (err)
692  return err;
693  }
694 
695  if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
696  return 0;
697 
699  netlink_update_subscriptions(sk, nlk->subscriptions +
700  hweight32(nladdr->nl_groups) -
701  hweight32(nlk->groups[0]));
702  nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
703  netlink_update_listeners(sk);
705 
706  if (nlk->netlink_bind && nlk->groups[0]) {
707  int i;
708 
709  for (i=0; i<nlk->ngroups; i++) {
710  if (test_bit(i, nlk->groups))
711  nlk->netlink_bind(i);
712  }
713  }
714 
715  return 0;
716 }
717 
718 static int netlink_connect(struct socket *sock, struct sockaddr *addr,
719  int alen, int flags)
720 {
721  int err = 0;
722  struct sock *sk = sock->sk;
723  struct netlink_sock *nlk = nlk_sk(sk);
724  struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
725 
726  if (alen < sizeof(addr->sa_family))
727  return -EINVAL;
728 
729  if (addr->sa_family == AF_UNSPEC) {
730  sk->sk_state = NETLINK_UNCONNECTED;
731  nlk->dst_portid = 0;
732  nlk->dst_group = 0;
733  return 0;
734  }
735  if (addr->sa_family != AF_NETLINK)
736  return -EINVAL;
737 
738  /* Only superuser is allowed to send multicasts */
739  if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
740  return -EPERM;
741 
742  if (!nlk->portid)
743  err = netlink_autobind(sock);
744 
745  if (err == 0) {
746  sk->sk_state = NETLINK_CONNECTED;
747  nlk->dst_portid = nladdr->nl_pid;
748  nlk->dst_group = ffs(nladdr->nl_groups);
749  }
750 
751  return err;
752 }
753 
754 static int netlink_getname(struct socket *sock, struct sockaddr *addr,
755  int *addr_len, int peer)
756 {
757  struct sock *sk = sock->sk;
758  struct netlink_sock *nlk = nlk_sk(sk);
759  DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
760 
761  nladdr->nl_family = AF_NETLINK;
762  nladdr->nl_pad = 0;
763  *addr_len = sizeof(*nladdr);
764 
765  if (peer) {
766  nladdr->nl_pid = nlk->dst_portid;
767  nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
768  } else {
769  nladdr->nl_pid = nlk->portid;
770  nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
771  }
772  return 0;
773 }
774 
775 static void netlink_overrun(struct sock *sk)
776 {
777  struct netlink_sock *nlk = nlk_sk(sk);
778 
779  if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
780  if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
781  sk->sk_err = ENOBUFS;
782  sk->sk_error_report(sk);
783  }
784  }
785  atomic_inc(&sk->sk_drops);
786 }
787 
788 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
789 {
790  struct sock *sock;
791  struct netlink_sock *nlk;
792 
793  sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
794  if (!sock)
795  return ERR_PTR(-ECONNREFUSED);
796 
797  /* Don't bother queuing skb if kernel socket has no input function */
798  nlk = nlk_sk(sock);
799  if (sock->sk_state == NETLINK_CONNECTED &&
800  nlk->dst_portid != nlk_sk(ssk)->portid) {
801  sock_put(sock);
802  return ERR_PTR(-ECONNREFUSED);
803  }
804  return sock;
805 }
806 
807 struct sock *netlink_getsockbyfilp(struct file *filp)
808 {
809  struct inode *inode = filp->f_path.dentry->d_inode;
810  struct sock *sock;
811 
812  if (!S_ISSOCK(inode->i_mode))
813  return ERR_PTR(-ENOTSOCK);
814 
815  sock = SOCKET_I(inode)->sk;
816  if (sock->sk_family != AF_NETLINK)
817  return ERR_PTR(-EINVAL);
818 
819  sock_hold(sock);
820  return sock;
821 }
822 
823 /*
824  * Attach a skb to a netlink socket.
825  * The caller must hold a reference to the destination socket. On error, the
826  * reference is dropped. The skb is not send to the destination, just all
827  * all error checks are performed and memory in the queue is reserved.
828  * Return values:
829  * < 0: error. skb freed, reference to sock dropped.
830  * 0: continue
831  * 1: repeat lookup - reference dropped while waiting for socket memory.
832  */
833 int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
834  long *timeo, struct sock *ssk)
835 {
836  struct netlink_sock *nlk;
837 
838  nlk = nlk_sk(sk);
839 
840  if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
841  test_bit(0, &nlk->state)) {
843  if (!*timeo) {
844  if (!ssk || netlink_is_kernel(ssk))
845  netlink_overrun(sk);
846  sock_put(sk);
847  kfree_skb(skb);
848  return -EAGAIN;
849  }
850 
852  add_wait_queue(&nlk->wait, &wait);
853 
854  if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
855  test_bit(0, &nlk->state)) &&
856  !sock_flag(sk, SOCK_DEAD))
857  *timeo = schedule_timeout(*timeo);
858 
860  remove_wait_queue(&nlk->wait, &wait);
861  sock_put(sk);
862 
863  if (signal_pending(current)) {
864  kfree_skb(skb);
865  return sock_intr_errno(*timeo);
866  }
867  return 1;
868  }
869  skb_set_owner_r(skb, sk);
870  return 0;
871 }
872 
873 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
874 {
875  int len = skb->len;
876 
877  skb_queue_tail(&sk->sk_receive_queue, skb);
878  sk->sk_data_ready(sk, len);
879  return len;
880 }
881 
882 int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
883 {
884  int len = __netlink_sendskb(sk, skb);
885 
886  sock_put(sk);
887  return len;
888 }
889 
890 void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
891 {
892  kfree_skb(skb);
893  sock_put(sk);
894 }
895 
896 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
897 {
898  int delta;
899 
900  skb_orphan(skb);
901 
902  delta = skb->end - skb->tail;
903  if (delta * 2 < skb->truesize)
904  return skb;
905 
906  if (skb_shared(skb)) {
907  struct sk_buff *nskb = skb_clone(skb, allocation);
908  if (!nskb)
909  return skb;
910  consume_skb(skb);
911  skb = nskb;
912  }
913 
914  if (!pskb_expand_head(skb, 0, -delta, allocation))
915  skb->truesize -= delta;
916 
917  return skb;
918 }
919 
920 static void netlink_rcv_wake(struct sock *sk)
921 {
922  struct netlink_sock *nlk = nlk_sk(sk);
923 
924  if (skb_queue_empty(&sk->sk_receive_queue))
925  clear_bit(0, &nlk->state);
926  if (!test_bit(0, &nlk->state))
928 }
929 
930 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
931  struct sock *ssk)
932 {
933  int ret;
934  struct netlink_sock *nlk = nlk_sk(sk);
935 
936  ret = -ECONNREFUSED;
937  if (nlk->netlink_rcv != NULL) {
938  ret = skb->len;
939  skb_set_owner_r(skb, sk);
940  NETLINK_CB(skb).ssk = ssk;
941  nlk->netlink_rcv(skb);
942  consume_skb(skb);
943  } else {
944  kfree_skb(skb);
945  }
946  sock_put(sk);
947  return ret;
948 }
949 
950 int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
951  u32 portid, int nonblock)
952 {
953  struct sock *sk;
954  int err;
955  long timeo;
956 
957  skb = netlink_trim(skb, gfp_any());
958 
959  timeo = sock_sndtimeo(ssk, nonblock);
960 retry:
961  sk = netlink_getsockbyportid(ssk, portid);
962  if (IS_ERR(sk)) {
963  kfree_skb(skb);
964  return PTR_ERR(sk);
965  }
966  if (netlink_is_kernel(sk))
967  return netlink_unicast_kernel(sk, skb, ssk);
968 
969  if (sk_filter(sk, skb)) {
970  err = skb->len;
971  kfree_skb(skb);
972  sock_put(sk);
973  return err;
974  }
975 
976  err = netlink_attachskb(sk, skb, &timeo, ssk);
977  if (err == 1)
978  goto retry;
979  if (err)
980  return err;
981 
982  return netlink_sendskb(sk, skb);
983 }
985 
986 int netlink_has_listeners(struct sock *sk, unsigned int group)
987 {
988  int res = 0;
989  struct listeners *listeners;
990 
991  BUG_ON(!netlink_is_kernel(sk));
992 
993  rcu_read_lock();
994  listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
995 
996  if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
997  res = test_bit(group - 1, listeners->masks);
998 
999  rcu_read_unlock();
1000 
1001  return res;
1002 }
1004 
1005 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1006 {
1007  struct netlink_sock *nlk = nlk_sk(sk);
1008 
1009  if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
1010  !test_bit(0, &nlk->state)) {
1011  skb_set_owner_r(skb, sk);
1012  __netlink_sendskb(sk, skb);
1013  return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
1014  }
1015  return -1;
1016 }
1017 
1019  struct sock *exclude_sk;
1020  struct net *net;
1023  int failure;
1028  struct sk_buff *skb, *skb2;
1029  int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
1030  void *tx_data;
1031 };
1032 
1033 static int do_one_broadcast(struct sock *sk,
1034  struct netlink_broadcast_data *p)
1035 {
1036  struct netlink_sock *nlk = nlk_sk(sk);
1037  int val;
1038 
1039  if (p->exclude_sk == sk)
1040  goto out;
1041 
1042  if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1043  !test_bit(p->group - 1, nlk->groups))
1044  goto out;
1045 
1046  if (!net_eq(sock_net(sk), p->net))
1047  goto out;
1048 
1049  if (p->failure) {
1050  netlink_overrun(sk);
1051  goto out;
1052  }
1053 
1054  sock_hold(sk);
1055  if (p->skb2 == NULL) {
1056  if (skb_shared(p->skb)) {
1057  p->skb2 = skb_clone(p->skb, p->allocation);
1058  } else {
1059  p->skb2 = skb_get(p->skb);
1060  /*
1061  * skb ownership may have been set when
1062  * delivered to a previous socket.
1063  */
1064  skb_orphan(p->skb2);
1065  }
1066  }
1067  if (p->skb2 == NULL) {
1068  netlink_overrun(sk);
1069  /* Clone failed. Notify ALL listeners. */
1070  p->failure = 1;
1072  p->delivery_failure = 1;
1073  } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1074  kfree_skb(p->skb2);
1075  p->skb2 = NULL;
1076  } else if (sk_filter(sk, p->skb2)) {
1077  kfree_skb(p->skb2);
1078  p->skb2 = NULL;
1079  } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1080  netlink_overrun(sk);
1082  p->delivery_failure = 1;
1083  } else {
1084  p->congested |= val;
1085  p->delivered = 1;
1086  p->skb2 = NULL;
1087  }
1088  sock_put(sk);
1089 
1090 out:
1091  return 0;
1092 }
1093 
1094 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
1095  u32 group, gfp_t allocation,
1096  int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1097  void *filter_data)
1098 {
1099  struct net *net = sock_net(ssk);
1100  struct netlink_broadcast_data info;
1101  struct hlist_node *node;
1102  struct sock *sk;
1103 
1104  skb = netlink_trim(skb, allocation);
1105 
1106  info.exclude_sk = ssk;
1107  info.net = net;
1108  info.portid = portid;
1109  info.group = group;
1110  info.failure = 0;
1111  info.delivery_failure = 0;
1112  info.congested = 0;
1113  info.delivered = 0;
1114  info.allocation = allocation;
1115  info.skb = skb;
1116  info.skb2 = NULL;
1117  info.tx_filter = filter;
1118  info.tx_data = filter_data;
1119 
1120  /* While we sleep in clone, do not allow to change socket list */
1121 
1122  netlink_lock_table();
1123 
1124  sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1125  do_one_broadcast(sk, &info);
1126 
1127  consume_skb(skb);
1128 
1129  netlink_unlock_table();
1130 
1131  if (info.delivery_failure) {
1132  kfree_skb(info.skb2);
1133  return -ENOBUFS;
1134  }
1135  consume_skb(info.skb2);
1136 
1137  if (info.delivered) {
1138  if (info.congested && (allocation & __GFP_WAIT))
1139  yield();
1140  return 0;
1141  }
1142  return -ESRCH;
1143 }
1145 
1146 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
1147  u32 group, gfp_t allocation)
1148 {
1149  return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
1150  NULL, NULL);
1151 }
1153 
1155  struct sock *exclude_sk;
1158  int code;
1159 };
1160 
1161 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1162 {
1163  struct netlink_sock *nlk = nlk_sk(sk);
1164  int ret = 0;
1165 
1166  if (sk == p->exclude_sk)
1167  goto out;
1168 
1169  if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1170  goto out;
1171 
1172  if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1173  !test_bit(p->group - 1, nlk->groups))
1174  goto out;
1175 
1176  if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
1177  ret = 1;
1178  goto out;
1179  }
1180 
1181  sk->sk_err = p->code;
1182  sk->sk_error_report(sk);
1183 out:
1184  return ret;
1185 }
1186 
1197 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
1198 {
1199  struct netlink_set_err_data info;
1200  struct hlist_node *node;
1201  struct sock *sk;
1202  int ret = 0;
1203 
1204  info.exclude_sk = ssk;
1205  info.portid = portid;
1206  info.group = group;
1207  /* sk->sk_err wants a positive error value */
1208  info.code = -code;
1209 
1210  read_lock(&nl_table_lock);
1211 
1212  sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1213  ret += do_one_set_err(sk, &info);
1214 
1215  read_unlock(&nl_table_lock);
1216  return ret;
1217 }
1219 
1220 /* must be called with netlink table grabbed */
1221 static void netlink_update_socket_mc(struct netlink_sock *nlk,
1222  unsigned int group,
1223  int is_new)
1224 {
1225  int old, new = !!is_new, subscriptions;
1226 
1227  old = test_bit(group - 1, nlk->groups);
1228  subscriptions = nlk->subscriptions - old + new;
1229  if (new)
1230  __set_bit(group - 1, nlk->groups);
1231  else
1232  __clear_bit(group - 1, nlk->groups);
1233  netlink_update_subscriptions(&nlk->sk, subscriptions);
1234  netlink_update_listeners(&nlk->sk);
1235 }
1236 
1237 static int netlink_setsockopt(struct socket *sock, int level, int optname,
1238  char __user *optval, unsigned int optlen)
1239 {
1240  struct sock *sk = sock->sk;
1241  struct netlink_sock *nlk = nlk_sk(sk);
1242  unsigned int val = 0;
1243  int err;
1244 
1245  if (level != SOL_NETLINK)
1246  return -ENOPROTOOPT;
1247 
1248  if (optlen >= sizeof(int) &&
1249  get_user(val, (unsigned int __user *)optval))
1250  return -EFAULT;
1251 
1252  switch (optname) {
1253  case NETLINK_PKTINFO:
1254  if (val)
1255  nlk->flags |= NETLINK_RECV_PKTINFO;
1256  else
1257  nlk->flags &= ~NETLINK_RECV_PKTINFO;
1258  err = 0;
1259  break;
1261  case NETLINK_DROP_MEMBERSHIP: {
1262  if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
1263  return -EPERM;
1264  err = netlink_realloc_groups(sk);
1265  if (err)
1266  return err;
1267  if (!val || val - 1 >= nlk->ngroups)
1268  return -EINVAL;
1270  netlink_update_socket_mc(nlk, val,
1271  optname == NETLINK_ADD_MEMBERSHIP);
1273 
1274  if (nlk->netlink_bind)
1275  nlk->netlink_bind(val);
1276 
1277  err = 0;
1278  break;
1279  }
1281  if (val)
1283  else
1285  err = 0;
1286  break;
1287  case NETLINK_NO_ENOBUFS:
1288  if (val) {
1290  clear_bit(0, &nlk->state);
1291  wake_up_interruptible(&nlk->wait);
1292  } else {
1293  nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1294  }
1295  err = 0;
1296  break;
1297  default:
1298  err = -ENOPROTOOPT;
1299  }
1300  return err;
1301 }
1302 
1303 static int netlink_getsockopt(struct socket *sock, int level, int optname,
1304  char __user *optval, int __user *optlen)
1305 {
1306  struct sock *sk = sock->sk;
1307  struct netlink_sock *nlk = nlk_sk(sk);
1308  int len, val, err;
1309 
1310  if (level != SOL_NETLINK)
1311  return -ENOPROTOOPT;
1312 
1313  if (get_user(len, optlen))
1314  return -EFAULT;
1315  if (len < 0)
1316  return -EINVAL;
1317 
1318  switch (optname) {
1319  case NETLINK_PKTINFO:
1320  if (len < sizeof(int))
1321  return -EINVAL;
1322  len = sizeof(int);
1323  val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
1324  if (put_user(len, optlen) ||
1325  put_user(val, optval))
1326  return -EFAULT;
1327  err = 0;
1328  break;
1330  if (len < sizeof(int))
1331  return -EINVAL;
1332  len = sizeof(int);
1333  val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
1334  if (put_user(len, optlen) ||
1335  put_user(val, optval))
1336  return -EFAULT;
1337  err = 0;
1338  break;
1339  case NETLINK_NO_ENOBUFS:
1340  if (len < sizeof(int))
1341  return -EINVAL;
1342  len = sizeof(int);
1343  val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
1344  if (put_user(len, optlen) ||
1345  put_user(val, optval))
1346  return -EFAULT;
1347  err = 0;
1348  break;
1349  default:
1350  err = -ENOPROTOOPT;
1351  }
1352  return err;
1353 }
1354 
1355 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1356 {
1357  struct nl_pktinfo info;
1358 
1359  info.group = NETLINK_CB(skb).dst_group;
1360  put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1361 }
1362 
1363 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1364  struct msghdr *msg, size_t len)
1365 {
1366  struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1367  struct sock *sk = sock->sk;
1368  struct netlink_sock *nlk = nlk_sk(sk);
1369  struct sockaddr_nl *addr = msg->msg_name;
1370  u32 dst_portid;
1371  u32 dst_group;
1372  struct sk_buff *skb;
1373  int err;
1374  struct scm_cookie scm;
1375 
1376  if (msg->msg_flags&MSG_OOB)
1377  return -EOPNOTSUPP;
1378 
1379  if (NULL == siocb->scm)
1380  siocb->scm = &scm;
1381 
1382  err = scm_send(sock, msg, siocb->scm, true);
1383  if (err < 0)
1384  return err;
1385 
1386  if (msg->msg_namelen) {
1387  err = -EINVAL;
1388  if (addr->nl_family != AF_NETLINK)
1389  goto out;
1390  dst_portid = addr->nl_pid;
1391  dst_group = ffs(addr->nl_groups);
1392  err = -EPERM;
1393  if ((dst_group || dst_portid) &&
1394  !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
1395  goto out;
1396  } else {
1397  dst_portid = nlk->dst_portid;
1398  dst_group = nlk->dst_group;
1399  }
1400 
1401  if (!nlk->portid) {
1402  err = netlink_autobind(sock);
1403  if (err)
1404  goto out;
1405  }
1406 
1407  err = -EMSGSIZE;
1408  if (len > sk->sk_sndbuf - 32)
1409  goto out;
1410  err = -ENOBUFS;
1411  skb = alloc_skb(len, GFP_KERNEL);
1412  if (skb == NULL)
1413  goto out;
1414 
1415  NETLINK_CB(skb).portid = nlk->portid;
1416  NETLINK_CB(skb).dst_group = dst_group;
1417  NETLINK_CB(skb).creds = siocb->scm->creds;
1418 
1419  err = -EFAULT;
1420  if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1421  kfree_skb(skb);
1422  goto out;
1423  }
1424 
1425  err = security_netlink_send(sk, skb);
1426  if (err) {
1427  kfree_skb(skb);
1428  goto out;
1429  }
1430 
1431  if (dst_group) {
1432  atomic_inc(&skb->users);
1433  netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
1434  }
1435  err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
1436 
1437 out:
1438  scm_destroy(siocb->scm);
1439  return err;
1440 }
1441 
1442 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1443  struct msghdr *msg, size_t len,
1444  int flags)
1445 {
1446  struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1447  struct scm_cookie scm;
1448  struct sock *sk = sock->sk;
1449  struct netlink_sock *nlk = nlk_sk(sk);
1450  int noblock = flags&MSG_DONTWAIT;
1451  size_t copied;
1452  struct sk_buff *skb, *data_skb;
1453  int err, ret;
1454 
1455  if (flags&MSG_OOB)
1456  return -EOPNOTSUPP;
1457 
1458  copied = 0;
1459 
1460  skb = skb_recv_datagram(sk, flags, noblock, &err);
1461  if (skb == NULL)
1462  goto out;
1463 
1464  data_skb = skb;
1465 
1466 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1467  if (unlikely(skb_shinfo(skb)->frag_list)) {
1468  /*
1469  * If this skb has a frag_list, then here that means that we
1470  * will have to use the frag_list skb's data for compat tasks
1471  * and the regular skb's data for normal (non-compat) tasks.
1472  *
1473  * If we need to send the compat skb, assign it to the
1474  * 'data_skb' variable so that it will be used below for data
1475  * copying. We keep 'skb' for everything else, including
1476  * freeing both later.
1477  */
1478  if (flags & MSG_CMSG_COMPAT)
1479  data_skb = skb_shinfo(skb)->frag_list;
1480  }
1481 #endif
1482 
1483  msg->msg_namelen = 0;
1484 
1485  copied = data_skb->len;
1486  if (len < copied) {
1487  msg->msg_flags |= MSG_TRUNC;
1488  copied = len;
1489  }
1490 
1491  skb_reset_transport_header(data_skb);
1492  err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
1493 
1494  if (msg->msg_name) {
1495  struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
1496  addr->nl_family = AF_NETLINK;
1497  addr->nl_pad = 0;
1498  addr->nl_pid = NETLINK_CB(skb).portid;
1499  addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
1500  msg->msg_namelen = sizeof(*addr);
1501  }
1502 
1503  if (nlk->flags & NETLINK_RECV_PKTINFO)
1504  netlink_cmsg_recv_pktinfo(msg, skb);
1505 
1506  if (NULL == siocb->scm) {
1507  memset(&scm, 0, sizeof(scm));
1508  siocb->scm = &scm;
1509  }
1510  siocb->scm->creds = *NETLINK_CREDS(skb);
1511  if (flags & MSG_TRUNC)
1512  copied = data_skb->len;
1513 
1514  skb_free_datagram(sk, skb);
1515 
1516  if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1517  ret = netlink_dump(sk);
1518  if (ret) {
1519  sk->sk_err = ret;
1520  sk->sk_error_report(sk);
1521  }
1522  }
1523 
1524  scm_recv(sock, msg, siocb->scm, flags);
1525 out:
1526  netlink_rcv_wake(sk);
1527  return err ? : copied;
1528 }
1529 
1530 static void netlink_data_ready(struct sock *sk, int len)
1531 {
1532  BUG();
1533 }
1534 
1535 /*
1536  * We export these functions to other modules. They provide a
1537  * complete set of kernel non-blocking support for message
1538  * queueing.
1539  */
1540 
1541 struct sock *
1542 __netlink_kernel_create(struct net *net, int unit, struct module *module,
1543  struct netlink_kernel_cfg *cfg)
1544 {
1545  struct socket *sock;
1546  struct sock *sk;
1547  struct netlink_sock *nlk;
1548  struct listeners *listeners = NULL;
1549  struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
1550  unsigned int groups;
1551 
1552  BUG_ON(!nl_table);
1553 
1554  if (unit < 0 || unit >= MAX_LINKS)
1555  return NULL;
1556 
1557  if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1558  return NULL;
1559 
1560  /*
1561  * We have to just have a reference on the net from sk, but don't
1562  * get_net it. Besides, we cannot get and then put the net here.
1563  * So we create one inside init_net and the move it to net.
1564  */
1565 
1566  if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
1567  goto out_sock_release_nosk;
1568 
1569  sk = sock->sk;
1570  sk_change_net(sk, net);
1571 
1572  if (!cfg || cfg->groups < 32)
1573  groups = 32;
1574  else
1575  groups = cfg->groups;
1576 
1577  listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1578  if (!listeners)
1579  goto out_sock_release;
1580 
1581  sk->sk_data_ready = netlink_data_ready;
1582  if (cfg && cfg->input)
1583  nlk_sk(sk)->netlink_rcv = cfg->input;
1584 
1585  if (netlink_insert(sk, net, 0))
1586  goto out_sock_release;
1587 
1588  nlk = nlk_sk(sk);
1589  nlk->flags |= NETLINK_KERNEL_SOCKET;
1590 
1592  if (!nl_table[unit].registered) {
1593  nl_table[unit].groups = groups;
1594  rcu_assign_pointer(nl_table[unit].listeners, listeners);
1595  nl_table[unit].cb_mutex = cb_mutex;
1596  nl_table[unit].module = module;
1597  if (cfg) {
1598  nl_table[unit].bind = cfg->bind;
1599  nl_table[unit].flags = cfg->flags;
1600  }
1601  nl_table[unit].registered = 1;
1602  } else {
1603  kfree(listeners);
1604  nl_table[unit].registered++;
1605  }
1607  return sk;
1608 
1609 out_sock_release:
1610  kfree(listeners);
1612  return NULL;
1613 
1614 out_sock_release_nosk:
1615  sock_release(sock);
1616  return NULL;
1617 }
1619 
1620 void
1621 netlink_kernel_release(struct sock *sk)
1622 {
1623  sk_release_kernel(sk);
1624 }
1626 
1627 int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1628 {
1629  struct listeners *new, *old;
1630  struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1631 
1632  if (groups < 32)
1633  groups = 32;
1634 
1635  if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
1636  new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1637  if (!new)
1638  return -ENOMEM;
1639  old = nl_deref_protected(tbl->listeners);
1640  memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1641  rcu_assign_pointer(tbl->listeners, new);
1642 
1643  kfree_rcu(old, rcu);
1644  }
1645  tbl->groups = groups;
1646 
1647  return 0;
1648 }
1649 
1662 int netlink_change_ngroups(struct sock *sk, unsigned int groups)
1663 {
1664  int err;
1665 
1667  err = __netlink_change_ngroups(sk, groups);
1669 
1670  return err;
1671 }
1672 
1673 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1674 {
1675  struct sock *sk;
1676  struct hlist_node *node;
1677  struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
1678 
1679  sk_for_each_bound(sk, node, &tbl->mc_list)
1680  netlink_update_socket_mc(nlk_sk(sk), group, 0);
1681 }
1682 
1691 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1692 {
1694  __netlink_clear_multicast_users(ksk, group);
1696 }
1697 
1698 struct nlmsghdr *
1699 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
1700 {
1701  struct nlmsghdr *nlh;
1702  int size = NLMSG_LENGTH(len);
1703 
1704  nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
1705  nlh->nlmsg_type = type;
1706  nlh->nlmsg_len = size;
1707  nlh->nlmsg_flags = flags;
1708  nlh->nlmsg_pid = portid;
1709  nlh->nlmsg_seq = seq;
1710  if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
1711  memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
1712  return nlh;
1713 }
1715 
1716 /*
1717  * It looks a bit ugly.
1718  * It would be better to create kernel thread.
1719  */
1720 
1721 static int netlink_dump(struct sock *sk)
1722 {
1723  struct netlink_sock *nlk = nlk_sk(sk);
1724  struct netlink_callback *cb;
1725  struct sk_buff *skb = NULL;
1726  struct nlmsghdr *nlh;
1727  int len, err = -ENOBUFS;
1728  int alloc_size;
1729 
1730  mutex_lock(nlk->cb_mutex);
1731 
1732  cb = nlk->cb;
1733  if (cb == NULL) {
1734  err = -EINVAL;
1735  goto errout_skb;
1736  }
1737 
1738  alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
1739 
1740  skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
1741  if (!skb)
1742  goto errout_skb;
1743 
1744  len = cb->dump(skb, cb);
1745 
1746  if (len > 0) {
1747  mutex_unlock(nlk->cb_mutex);
1748 
1749  if (sk_filter(sk, skb))
1750  kfree_skb(skb);
1751  else
1752  __netlink_sendskb(sk, skb);
1753  return 0;
1754  }
1755 
1756  nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1757  if (!nlh)
1758  goto errout_skb;
1759 
1760  nl_dump_check_consistent(cb, nlh);
1761 
1762  memcpy(nlmsg_data(nlh), &len, sizeof(len));
1763 
1764  if (sk_filter(sk, skb))
1765  kfree_skb(skb);
1766  else
1767  __netlink_sendskb(sk, skb);
1768 
1769  if (cb->done)
1770  cb->done(cb);
1771  nlk->cb = NULL;
1772  mutex_unlock(nlk->cb_mutex);
1773 
1774  module_put(cb->module);
1775  netlink_consume_callback(cb);
1776  return 0;
1777 
1778 errout_skb:
1779  mutex_unlock(nlk->cb_mutex);
1780  kfree_skb(skb);
1781  return err;
1782 }
1783 
1784 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1785  const struct nlmsghdr *nlh,
1786  struct netlink_dump_control *control)
1787 {
1788  struct netlink_callback *cb;
1789  struct sock *sk;
1790  struct netlink_sock *nlk;
1791  int ret;
1792 
1793  cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1794  if (cb == NULL)
1795  return -ENOBUFS;
1796 
1797  cb->dump = control->dump;
1798  cb->done = control->done;
1799  cb->nlh = nlh;
1800  cb->data = control->data;
1801  cb->module = control->module;
1802  cb->min_dump_alloc = control->min_dump_alloc;
1803  atomic_inc(&skb->users);
1804  cb->skb = skb;
1805 
1806  sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
1807  if (sk == NULL) {
1808  netlink_destroy_callback(cb);
1809  return -ECONNREFUSED;
1810  }
1811  nlk = nlk_sk(sk);
1812 
1813  mutex_lock(nlk->cb_mutex);
1814  /* A dump is in progress... */
1815  if (nlk->cb) {
1816  mutex_unlock(nlk->cb_mutex);
1817  netlink_destroy_callback(cb);
1818  ret = -EBUSY;
1819  goto out;
1820  }
1821  /* add reference of module which cb->dump belongs to */
1822  if (!try_module_get(cb->module)) {
1823  mutex_unlock(nlk->cb_mutex);
1824  netlink_destroy_callback(cb);
1825  ret = -EPROTONOSUPPORT;
1826  goto out;
1827  }
1828 
1829  nlk->cb = cb;
1830  mutex_unlock(nlk->cb_mutex);
1831 
1832  ret = netlink_dump(sk);
1833 out:
1834  sock_put(sk);
1835 
1836  if (ret)
1837  return ret;
1838 
1839  /* We successfully started a dump, by returning -EINTR we
1840  * signal not to send ACK even if it was requested.
1841  */
1842  return -EINTR;
1843 }
1845 
1846 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1847 {
1848  struct sk_buff *skb;
1849  struct nlmsghdr *rep;
1850  struct nlmsgerr *errmsg;
1851  size_t payload = sizeof(*errmsg);
1852 
1853  /* error messages get the original request appened */
1854  if (err)
1855  payload += nlmsg_len(nlh);
1856 
1857  skb = nlmsg_new(payload, GFP_KERNEL);
1858  if (!skb) {
1859  struct sock *sk;
1860 
1861  sk = netlink_lookup(sock_net(in_skb->sk),
1862  in_skb->sk->sk_protocol,
1863  NETLINK_CB(in_skb).portid);
1864  if (sk) {
1865  sk->sk_err = ENOBUFS;
1866  sk->sk_error_report(sk);
1867  sock_put(sk);
1868  }
1869  return;
1870  }
1871 
1872  rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
1873  NLMSG_ERROR, payload, 0);
1874  errmsg = nlmsg_data(rep);
1875  errmsg->error = err;
1876  memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
1877  netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
1878 }
1880 
1881 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1882  struct nlmsghdr *))
1883 {
1884  struct nlmsghdr *nlh;
1885  int err;
1886 
1887  while (skb->len >= nlmsg_total_size(0)) {
1888  int msglen;
1889 
1890  nlh = nlmsg_hdr(skb);
1891  err = 0;
1892 
1893  if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
1894  return 0;
1895 
1896  /* Only requests are handled by the kernel */
1897  if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
1898  goto ack;
1899 
1900  /* Skip control messages */
1901  if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
1902  goto ack;
1903 
1904  err = cb(skb, nlh);
1905  if (err == -EINTR)
1906  goto skip;
1907 
1908 ack:
1909  if (nlh->nlmsg_flags & NLM_F_ACK || err)
1910  netlink_ack(skb, nlh, err);
1911 
1912 skip:
1913  msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1914  if (msglen > skb->len)
1915  msglen = skb->len;
1916  skb_pull(skb, msglen);
1917  }
1918 
1919  return 0;
1920 }
1922 
1932 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
1933  unsigned int group, int report, gfp_t flags)
1934 {
1935  int err = 0;
1936 
1937  if (group) {
1938  int exclude_portid = 0;
1939 
1940  if (report) {
1941  atomic_inc(&skb->users);
1942  exclude_portid = portid;
1943  }
1944 
1945  /* errors reported via destination sk->sk_err, but propagate
1946  * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
1947  err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
1948  }
1949 
1950  if (report) {
1951  int err2;
1952 
1953  err2 = nlmsg_unicast(sk, skb, portid);
1954  if (!err || err == -ESRCH)
1955  err = err2;
1956  }
1957 
1958  return err;
1959 }
1961 
1962 #ifdef CONFIG_PROC_FS
1963 struct nl_seq_iter {
1964  struct seq_net_private p;
1965  int link;
1966  int hash_idx;
1967 };
1968 
1969 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1970 {
1971  struct nl_seq_iter *iter = seq->private;
1972  int i, j;
1973  struct sock *s;
1974  struct hlist_node *node;
1975  loff_t off = 0;
1976 
1977  for (i = 0; i < MAX_LINKS; i++) {
1978  struct nl_portid_hash *hash = &nl_table[i].hash;
1979 
1980  for (j = 0; j <= hash->mask; j++) {
1981  sk_for_each(s, node, &hash->table[j]) {
1982  if (sock_net(s) != seq_file_net(seq))
1983  continue;
1984  if (off == pos) {
1985  iter->link = i;
1986  iter->hash_idx = j;
1987  return s;
1988  }
1989  ++off;
1990  }
1991  }
1992  }
1993  return NULL;
1994 }
1995 
1996 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
1997  __acquires(nl_table_lock)
1998 {
1999  read_lock(&nl_table_lock);
2000  return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2001 }
2002 
2003 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2004 {
2005  struct sock *s;
2006  struct nl_seq_iter *iter;
2007  int i, j;
2008 
2009  ++*pos;
2010 
2011  if (v == SEQ_START_TOKEN)
2012  return netlink_seq_socket_idx(seq, 0);
2013 
2014  iter = seq->private;
2015  s = v;
2016  do {
2017  s = sk_next(s);
2018  } while (s && sock_net(s) != seq_file_net(seq));
2019  if (s)
2020  return s;
2021 
2022  i = iter->link;
2023  j = iter->hash_idx + 1;
2024 
2025  do {
2026  struct nl_portid_hash *hash = &nl_table[i].hash;
2027 
2028  for (; j <= hash->mask; j++) {
2029  s = sk_head(&hash->table[j]);
2030  while (s && sock_net(s) != seq_file_net(seq))
2031  s = sk_next(s);
2032  if (s) {
2033  iter->link = i;
2034  iter->hash_idx = j;
2035  return s;
2036  }
2037  }
2038 
2039  j = 0;
2040  } while (++i < MAX_LINKS);
2041 
2042  return NULL;
2043 }
2044 
2045 static void netlink_seq_stop(struct seq_file *seq, void *v)
2046  __releases(nl_table_lock)
2047 {
2048  read_unlock(&nl_table_lock);
2049 }
2050 
2051 
2052 static int netlink_seq_show(struct seq_file *seq, void *v)
2053 {
2054  if (v == SEQ_START_TOKEN) {
2055  seq_puts(seq,
2056  "sk Eth Pid Groups "
2057  "Rmem Wmem Dump Locks Drops Inode\n");
2058  } else {
2059  struct sock *s = v;
2060  struct netlink_sock *nlk = nlk_sk(s);
2061 
2062  seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
2063  s,
2064  s->sk_protocol,
2065  nlk->portid,
2066  nlk->groups ? (u32)nlk->groups[0] : 0,
2067  sk_rmem_alloc_get(s),
2068  sk_wmem_alloc_get(s),
2069  nlk->cb,
2070  atomic_read(&s->sk_refcnt),
2071  atomic_read(&s->sk_drops),
2072  sock_i_ino(s)
2073  );
2074 
2075  }
2076  return 0;
2077 }
2078 
2079 static const struct seq_operations netlink_seq_ops = {
2080  .start = netlink_seq_start,
2081  .next = netlink_seq_next,
2082  .stop = netlink_seq_stop,
2083  .show = netlink_seq_show,
2084 };
2085 
2086 
2087 static int netlink_seq_open(struct inode *inode, struct file *file)
2088 {
2089  return seq_open_net(inode, file, &netlink_seq_ops,
2090  sizeof(struct nl_seq_iter));
2091 }
2092 
2093 static const struct file_operations netlink_seq_fops = {
2094  .owner = THIS_MODULE,
2095  .open = netlink_seq_open,
2096  .read = seq_read,
2097  .llseek = seq_lseek,
2098  .release = seq_release_net,
2099 };
2100 
2101 #endif
2102 
2104 {
2105  return atomic_notifier_chain_register(&netlink_chain, nb);
2106 }
2108 
2110 {
2111  return atomic_notifier_chain_unregister(&netlink_chain, nb);
2112 }
2114 
2115 static const struct proto_ops netlink_ops = {
2116  .family = PF_NETLINK,
2117  .owner = THIS_MODULE,
2118  .release = netlink_release,
2119  .bind = netlink_bind,
2120  .connect = netlink_connect,
2121  .socketpair = sock_no_socketpair,
2122  .accept = sock_no_accept,
2123  .getname = netlink_getname,
2124  .poll = datagram_poll,
2125  .ioctl = sock_no_ioctl,
2126  .listen = sock_no_listen,
2127  .shutdown = sock_no_shutdown,
2128  .setsockopt = netlink_setsockopt,
2129  .getsockopt = netlink_getsockopt,
2130  .sendmsg = netlink_sendmsg,
2131  .recvmsg = netlink_recvmsg,
2132  .mmap = sock_no_mmap,
2133  .sendpage = sock_no_sendpage,
2134 };
2135 
2136 static const struct net_proto_family netlink_family_ops = {
2137  .family = PF_NETLINK,
2138  .create = netlink_create,
2139  .owner = THIS_MODULE, /* for consistency 8) */
2140 };
2141 
2142 static int __net_init netlink_net_init(struct net *net)
2143 {
2144 #ifdef CONFIG_PROC_FS
2145  if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
2146  return -ENOMEM;
2147 #endif
2148  return 0;
2149 }
2150 
2151 static void __net_exit netlink_net_exit(struct net *net)
2152 {
2153 #ifdef CONFIG_PROC_FS
2154  proc_net_remove(net, "netlink");
2155 #endif
2156 }
2157 
2158 static void __init netlink_add_usersock_entry(void)
2159 {
2160  struct listeners *listeners;
2161  int groups = 32;
2162 
2163  listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2164  if (!listeners)
2165  panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2166 
2168 
2169  nl_table[NETLINK_USERSOCK].groups = groups;
2170  rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2171  nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2172  nl_table[NETLINK_USERSOCK].registered = 1;
2174 
2176 }
2177 
2178 static struct pernet_operations __net_initdata netlink_net_ops = {
2179  .init = netlink_net_init,
2180  .exit = netlink_net_exit,
2181 };
2182 
2183 static int __init netlink_proto_init(void)
2184 {
2185  struct sk_buff *dummy_skb;
2186  int i;
2187  unsigned long limit;
2188  unsigned int order;
2189  int err = proto_register(&netlink_proto, 0);
2190 
2191  if (err != 0)
2192  goto out;
2193 
2194  BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
2195 
2196  nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2197  if (!nl_table)
2198  goto panic;
2199 
2200  if (totalram_pages >= (128 * 1024))
2201  limit = totalram_pages >> (21 - PAGE_SHIFT);
2202  else
2203  limit = totalram_pages >> (23 - PAGE_SHIFT);
2204 
2205  order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
2206  limit = (1UL << order) / sizeof(struct hlist_head);
2207  order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
2208 
2209  for (i = 0; i < MAX_LINKS; i++) {
2210  struct nl_portid_hash *hash = &nl_table[i].hash;
2211 
2212  hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table));
2213  if (!hash->table) {
2214  while (i-- > 0)
2215  nl_portid_hash_free(nl_table[i].hash.table,
2216  1 * sizeof(*hash->table));
2217  kfree(nl_table);
2218  goto panic;
2219  }
2220  hash->max_shift = order;
2221  hash->shift = 0;
2222  hash->mask = 0;
2223  hash->rehash_time = jiffies;
2224  }
2225 
2226  netlink_add_usersock_entry();
2227 
2228  sock_register(&netlink_family_ops);
2229  register_pernet_subsys(&netlink_net_ops);
2230  /* The netlink device handler may be needed early. */
2231  rtnetlink_init();
2232 out:
2233  return err;
2234 panic:
2235  panic("netlink_init: Cannot allocate nl_table\n");
2236 }
2237 
2238 core_initcall(netlink_proto_init);