Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
af_unix.c
Go to the documentation of this file.
1 /*
2  * NET4: Implementation of BSD Unix domain sockets.
3  *
4  * Authors: Alan Cox, <[email protected]>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  * Linus Torvalds : Assorted bug cures.
13  * Niibe Yutaka : async I/O support.
14  * Carsten Paeth : PF_UNIX check, address fixes.
15  * Alan Cox : Limit size of allocated blocks.
16  * Alan Cox : Fixed the stupid socketpair bug.
17  * Alan Cox : BSD compatibility fine tuning.
18  * Alan Cox : Fixed a bug in connect when interrupted.
19  * Alan Cox : Sorted out a proper draft version of
20  * file descriptor passing hacked up from
21  * Mike Shaver's work.
22  * Marty Leisner : Fixes to fd passing
23  * Nick Nevin : recvmsg bugfix.
24  * Alan Cox : Started proper garbage collector
25  * Heiko EiBfeldt : Missing verify_area check
26  * Alan Cox : Started POSIXisms
27  * Andreas Schwab : Replace inode by dentry for proper
28  * reference counting
29  * Kirk Petersen : Made this a module
30  * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31  * Lots of bug fixes.
32  * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33  * by above two patches.
34  * Andrea Arcangeli : If possible we block in connect(2)
35  * if the max backlog of the listen socket
36  * is been reached. This won't break
37  * old apps and it will avoid huge amount
38  * of socks hashed (this for unix_gc()
39  * performances reasons).
40  * Security fix that limits the max
41  * number of socks to 2*max_files and
42  * the number of skb queueable in the
43  * dgram receiver.
44  * Artur Skawina : Hash function optimizations
45  * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46  * Malcolm Beattie : Set peercred for socketpair
47  * Michal Ostrowski : Module initialization cleanup.
48  * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49  * the core infrastructure is doing that
50  * for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  * [TO FIX]
56  * ECONNREFUSED is not returned from one end of a connected() socket to the
57  * other the moment one end closes.
58  * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  * [NOT TO FIX]
61  * accept() returns a path name even if the connecting socket has closed
62  * in the meantime (BSD loses the path and gives up).
63  * accept() returns 0 length path for an unbound connector. BSD returns 16
64  * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  * BSD af_unix apparently has connect forgetting to block properly.
67  * (need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  * Bug fixes and improvements.
71  * - client shutdown killed server socket.
72  * - removed all useless cli/sti pairs.
73  *
74  * Semantic changes/extensions.
75  * - generic control message passing.
76  * - SCM_CREDENTIALS control message.
77  * - "Abstract" (not FS based) socket bindings.
78  * Abstract names are sequences of bytes (not zero terminated)
79  * started by 0, so that this name space does not intersect
80  * with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
120 DEFINE_SPINLOCK(unix_table_lock);
121 EXPORT_SYMBOL_GPL(unix_table_lock);
122 static atomic_long_t unix_nr_socks;
123 
124 
125 static struct hlist_head *unix_sockets_unbound(void *addr)
126 {
127  unsigned long hash = (unsigned long)addr;
128 
129  hash ^= hash >> 16;
130  hash ^= hash >> 8;
131  hash %= UNIX_HASH_SIZE;
133 }
134 
135 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
136 
137 #ifdef CONFIG_SECURITY_NETWORK
138 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
139 {
140  memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
141 }
142 
143 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
144 {
145  scm->secid = *UNIXSID(skb);
146 }
147 #else
148 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
149 { }
150 
151 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
152 { }
153 #endif /* CONFIG_SECURITY_NETWORK */
154 
155 /*
156  * SMP locking strategy:
157  * hash table is protected with spinlock unix_table_lock
158  * each socket state is protected by separate spin lock.
159  */
160 
161 static inline unsigned int unix_hash_fold(__wsum n)
162 {
163  unsigned int hash = (__force unsigned int)n;
164 
165  hash ^= hash>>16;
166  hash ^= hash>>8;
167  return hash&(UNIX_HASH_SIZE-1);
168 }
169 
170 #define unix_peer(sk) (unix_sk(sk)->peer)
171 
172 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
173 {
174  return unix_peer(osk) == sk;
175 }
176 
177 static inline int unix_may_send(struct sock *sk, struct sock *osk)
178 {
179  return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
180 }
181 
182 static inline int unix_recvq_full(struct sock const *sk)
183 {
184  return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
185 }
186 
187 struct sock *unix_peer_get(struct sock *s)
188 {
189  struct sock *peer;
190 
191  unix_state_lock(s);
192  peer = unix_peer(s);
193  if (peer)
194  sock_hold(peer);
196  return peer;
197 }
199 
200 static inline void unix_release_addr(struct unix_address *addr)
201 {
202  if (atomic_dec_and_test(&addr->refcnt))
203  kfree(addr);
204 }
205 
206 /*
207  * Check unix socket name:
208  * - should be not zero length.
209  * - if started by not zero, should be NULL terminated (FS object)
210  * - if started by zero, it is abstract name.
211  */
212 
213 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
214 {
215  if (len <= sizeof(short) || len > sizeof(*sunaddr))
216  return -EINVAL;
217  if (!sunaddr || sunaddr->sun_family != AF_UNIX)
218  return -EINVAL;
219  if (sunaddr->sun_path[0]) {
220  /*
221  * This may look like an off by one error but it is a bit more
222  * subtle. 108 is the longest valid AF_UNIX path for a binding.
223  * sun_path[108] doesn't as such exist. However in kernel space
224  * we are guaranteed that it is a valid memory location in our
225  * kernel address buffer.
226  */
227  ((char *)sunaddr)[len] = 0;
228  len = strlen(sunaddr->sun_path)+1+sizeof(short);
229  return len;
230  }
231 
232  *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
233  return len;
234 }
235 
236 static void __unix_remove_socket(struct sock *sk)
237 {
238  sk_del_node_init(sk);
239 }
240 
241 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243  WARN_ON(!sk_unhashed(sk));
244  sk_add_node(sk, list);
245 }
246 
247 static inline void unix_remove_socket(struct sock *sk)
248 {
249  spin_lock(&unix_table_lock);
250  __unix_remove_socket(sk);
251  spin_unlock(&unix_table_lock);
252 }
253 
254 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
255 {
256  spin_lock(&unix_table_lock);
257  __unix_insert_socket(list, sk);
258  spin_unlock(&unix_table_lock);
259 }
260 
261 static struct sock *__unix_find_socket_byname(struct net *net,
262  struct sockaddr_un *sunname,
263  int len, int type, unsigned int hash)
264 {
265  struct sock *s;
266  struct hlist_node *node;
267 
268  sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
269  struct unix_sock *u = unix_sk(s);
270 
271  if (!net_eq(sock_net(s), net))
272  continue;
273 
274  if (u->addr->len == len &&
275  !memcmp(u->addr->name, sunname, len))
276  goto found;
277  }
278  s = NULL;
279 found:
280  return s;
281 }
282 
283 static inline struct sock *unix_find_socket_byname(struct net *net,
284  struct sockaddr_un *sunname,
285  int len, int type,
286  unsigned int hash)
287 {
288  struct sock *s;
289 
290  spin_lock(&unix_table_lock);
291  s = __unix_find_socket_byname(net, sunname, len, type, hash);
292  if (s)
293  sock_hold(s);
294  spin_unlock(&unix_table_lock);
295  return s;
296 }
297 
298 static struct sock *unix_find_socket_byinode(struct inode *i)
299 {
300  struct sock *s;
301  struct hlist_node *node;
302 
303  spin_lock(&unix_table_lock);
304  sk_for_each(s, node,
305  &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
306  struct dentry *dentry = unix_sk(s)->path.dentry;
307 
308  if (dentry && dentry->d_inode == i) {
309  sock_hold(s);
310  goto found;
311  }
312  }
313  s = NULL;
314 found:
315  spin_unlock(&unix_table_lock);
316  return s;
317 }
318 
319 static inline int unix_writable(struct sock *sk)
320 {
321  return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
322 }
323 
324 static void unix_write_space(struct sock *sk)
325 {
326  struct socket_wq *wq;
327 
328  rcu_read_lock();
329  if (unix_writable(sk)) {
330  wq = rcu_dereference(sk->sk_wq);
331  if (wq_has_sleeper(wq))
334  sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
335  }
336  rcu_read_unlock();
337 }
338 
339 /* When dgram socket disconnects (or changes its peer), we clear its receive
340  * queue of packets arrived from previous peer. First, it allows to do
341  * flow control based only on wmem_alloc; second, sk connected to peer
342  * may receive messages only from that peer. */
343 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
344 {
345  if (!skb_queue_empty(&sk->sk_receive_queue)) {
348 
349  /* If one link of bidirectional dgram pipe is disconnected,
350  * we signal error. Messages are lost. Do not make this,
351  * when peer was not connected to us.
352  */
353  if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
354  other->sk_err = ECONNRESET;
355  other->sk_error_report(other);
356  }
357  }
358 }
359 
360 static void unix_sock_destructor(struct sock *sk)
361 {
362  struct unix_sock *u = unix_sk(sk);
363 
365 
367  WARN_ON(!sk_unhashed(sk));
368  WARN_ON(sk->sk_socket);
369  if (!sock_flag(sk, SOCK_DEAD)) {
370  printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
371  return;
372  }
373 
374  if (u->addr)
375  unix_release_addr(u->addr);
376 
377  atomic_long_dec(&unix_nr_socks);
379  sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
380  local_bh_enable();
381 #ifdef UNIX_REFCNT_DEBUG
382  printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
383  atomic_long_read(&unix_nr_socks));
384 #endif
385 }
386 
387 static int unix_release_sock(struct sock *sk, int embrion)
388 {
389  struct unix_sock *u = unix_sk(sk);
390  struct path path;
391  struct sock *skpair;
392  struct sk_buff *skb;
393  int state;
394 
395  unix_remove_socket(sk);
396 
397  /* Clear state */
398  unix_state_lock(sk);
399  sock_orphan(sk);
401  path = u->path;
402  u->path.dentry = NULL;
403  u->path.mnt = NULL;
404  state = sk->sk_state;
405  sk->sk_state = TCP_CLOSE;
406  unix_state_unlock(sk);
407 
408  wake_up_interruptible_all(&u->peer_wait);
409 
410  skpair = unix_peer(sk);
411 
412  if (skpair != NULL) {
413  if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
414  unix_state_lock(skpair);
415  /* No more writes */
416  skpair->sk_shutdown = SHUTDOWN_MASK;
417  if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
418  skpair->sk_err = ECONNRESET;
419  unix_state_unlock(skpair);
420  skpair->sk_state_change(skpair);
421  sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
422  }
423  sock_put(skpair); /* It may now die */
424  unix_peer(sk) = NULL;
425  }
426 
427  /* Try to flush out this socket. Throw out buffers at least */
428 
429  while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
430  if (state == TCP_LISTEN)
431  unix_release_sock(skb->sk, 1);
432  /* passed fds are erased in the kfree_skb hook */
433  kfree_skb(skb);
434  }
435 
436  if (path.dentry)
437  path_put(&path);
438 
439  sock_put(sk);
440 
441  /* ---- Socket is dead now and most probably destroyed ---- */
442 
443  /*
444  * Fixme: BSD difference: In BSD all sockets connected to us get
445  * ECONNRESET and we die on the spot. In Linux we behave
446  * like files and pipes do and wait for the last
447  * dereference.
448  *
449  * Can't we simply set sock->err?
450  *
451  * What the above comment does talk about? --ANK(980817)
452  */
453 
454  if (unix_tot_inflight)
455  unix_gc(); /* Garbage collect fds */
456 
457  return 0;
458 }
459 
460 static void init_peercred(struct sock *sk)
461 {
462  put_pid(sk->sk_peer_pid);
463  if (sk->sk_peer_cred)
464  put_cred(sk->sk_peer_cred);
465  sk->sk_peer_pid = get_pid(task_tgid(current));
467 }
468 
469 static void copy_peercred(struct sock *sk, struct sock *peersk)
470 {
471  put_pid(sk->sk_peer_pid);
472  if (sk->sk_peer_cred)
473  put_cred(sk->sk_peer_cred);
474  sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
475  sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
476 }
477 
478 static int unix_listen(struct socket *sock, int backlog)
479 {
480  int err;
481  struct sock *sk = sock->sk;
482  struct unix_sock *u = unix_sk(sk);
483  struct pid *old_pid = NULL;
484 
485  err = -EOPNOTSUPP;
486  if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
487  goto out; /* Only stream/seqpacket sockets accept */
488  err = -EINVAL;
489  if (!u->addr)
490  goto out; /* No listens on an unbound socket */
491  unix_state_lock(sk);
492  if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
493  goto out_unlock;
494  if (backlog > sk->sk_max_ack_backlog)
495  wake_up_interruptible_all(&u->peer_wait);
497  sk->sk_state = TCP_LISTEN;
498  /* set credentials so connect can copy them */
499  init_peercred(sk);
500  err = 0;
501 
502 out_unlock:
503  unix_state_unlock(sk);
504  put_pid(old_pid);
505 out:
506  return err;
507 }
508 
509 static int unix_release(struct socket *);
510 static int unix_bind(struct socket *, struct sockaddr *, int);
511 static int unix_stream_connect(struct socket *, struct sockaddr *,
512  int addr_len, int flags);
513 static int unix_socketpair(struct socket *, struct socket *);
514 static int unix_accept(struct socket *, struct socket *, int);
515 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
516 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
517 static unsigned int unix_dgram_poll(struct file *, struct socket *,
518  poll_table *);
519 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
520 static int unix_shutdown(struct socket *, int);
521 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
522  struct msghdr *, size_t);
523 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
524  struct msghdr *, size_t, int);
525 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
526  struct msghdr *, size_t);
527 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
528  struct msghdr *, size_t, int);
529 static int unix_dgram_connect(struct socket *, struct sockaddr *,
530  int, int);
531 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
532  struct msghdr *, size_t);
533 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
534  struct msghdr *, size_t, int);
535 
536 static void unix_set_peek_off(struct sock *sk, int val)
537 {
538  struct unix_sock *u = unix_sk(sk);
539 
540  mutex_lock(&u->readlock);
541  sk->sk_peek_off = val;
542  mutex_unlock(&u->readlock);
543 }
544 
545 
546 static const struct proto_ops unix_stream_ops = {
547  .family = PF_UNIX,
548  .owner = THIS_MODULE,
549  .release = unix_release,
550  .bind = unix_bind,
551  .connect = unix_stream_connect,
552  .socketpair = unix_socketpair,
553  .accept = unix_accept,
554  .getname = unix_getname,
555  .poll = unix_poll,
556  .ioctl = unix_ioctl,
557  .listen = unix_listen,
558  .shutdown = unix_shutdown,
559  .setsockopt = sock_no_setsockopt,
560  .getsockopt = sock_no_getsockopt,
561  .sendmsg = unix_stream_sendmsg,
562  .recvmsg = unix_stream_recvmsg,
563  .mmap = sock_no_mmap,
564  .sendpage = sock_no_sendpage,
565  .set_peek_off = unix_set_peek_off,
566 };
567 
568 static const struct proto_ops unix_dgram_ops = {
569  .family = PF_UNIX,
570  .owner = THIS_MODULE,
571  .release = unix_release,
572  .bind = unix_bind,
573  .connect = unix_dgram_connect,
574  .socketpair = unix_socketpair,
575  .accept = sock_no_accept,
576  .getname = unix_getname,
577  .poll = unix_dgram_poll,
578  .ioctl = unix_ioctl,
579  .listen = sock_no_listen,
580  .shutdown = unix_shutdown,
581  .setsockopt = sock_no_setsockopt,
582  .getsockopt = sock_no_getsockopt,
583  .sendmsg = unix_dgram_sendmsg,
584  .recvmsg = unix_dgram_recvmsg,
585  .mmap = sock_no_mmap,
586  .sendpage = sock_no_sendpage,
587  .set_peek_off = unix_set_peek_off,
588 };
589 
590 static const struct proto_ops unix_seqpacket_ops = {
591  .family = PF_UNIX,
592  .owner = THIS_MODULE,
593  .release = unix_release,
594  .bind = unix_bind,
595  .connect = unix_stream_connect,
596  .socketpair = unix_socketpair,
597  .accept = unix_accept,
598  .getname = unix_getname,
599  .poll = unix_dgram_poll,
600  .ioctl = unix_ioctl,
601  .listen = unix_listen,
602  .shutdown = unix_shutdown,
603  .setsockopt = sock_no_setsockopt,
604  .getsockopt = sock_no_getsockopt,
605  .sendmsg = unix_seqpacket_sendmsg,
606  .recvmsg = unix_seqpacket_recvmsg,
607  .mmap = sock_no_mmap,
608  .sendpage = sock_no_sendpage,
609  .set_peek_off = unix_set_peek_off,
610 };
611 
612 static struct proto unix_proto = {
613  .name = "UNIX",
614  .owner = THIS_MODULE,
615  .obj_size = sizeof(struct unix_sock),
616 };
617 
618 /*
619  * AF_UNIX sockets do not interact with hardware, hence they
620  * dont trigger interrupts - so it's safe for them to have
621  * bh-unsafe locking for their sk_receive_queue.lock. Split off
622  * this special lock-class by reinitializing the spinlock key:
623  */
624 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
625 
626 static struct sock *unix_create1(struct net *net, struct socket *sock)
627 {
628  struct sock *sk = NULL;
629  struct unix_sock *u;
630 
631  atomic_long_inc(&unix_nr_socks);
632  if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
633  goto out;
634 
635  sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
636  if (!sk)
637  goto out;
638 
639  sock_init_data(sock, sk);
641  &af_unix_sk_receive_queue_lock_key);
642 
643  sk->sk_write_space = unix_write_space;
644  sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
645  sk->sk_destruct = unix_sock_destructor;
646  u = unix_sk(sk);
647  u->path.dentry = NULL;
648  u->path.mnt = NULL;
649  spin_lock_init(&u->lock);
650  atomic_long_set(&u->inflight, 0);
651  INIT_LIST_HEAD(&u->link);
652  mutex_init(&u->readlock); /* single task reading lock */
653  init_waitqueue_head(&u->peer_wait);
654  unix_insert_socket(unix_sockets_unbound(sk), sk);
655 out:
656  if (sk == NULL)
657  atomic_long_dec(&unix_nr_socks);
658  else {
660  sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
661  local_bh_enable();
662  }
663  return sk;
664 }
665 
666 static int unix_create(struct net *net, struct socket *sock, int protocol,
667  int kern)
668 {
669  if (protocol && protocol != PF_UNIX)
670  return -EPROTONOSUPPORT;
671 
672  sock->state = SS_UNCONNECTED;
673 
674  switch (sock->type) {
675  case SOCK_STREAM:
676  sock->ops = &unix_stream_ops;
677  break;
678  /*
679  * Believe it or not BSD has AF_UNIX, SOCK_RAW though
680  * nothing uses it.
681  */
682  case SOCK_RAW:
683  sock->type = SOCK_DGRAM;
684  case SOCK_DGRAM:
685  sock->ops = &unix_dgram_ops;
686  break;
687  case SOCK_SEQPACKET:
688  sock->ops = &unix_seqpacket_ops;
689  break;
690  default:
691  return -ESOCKTNOSUPPORT;
692  }
693 
694  return unix_create1(net, sock) ? 0 : -ENOMEM;
695 }
696 
697 static int unix_release(struct socket *sock)
698 {
699  struct sock *sk = sock->sk;
700 
701  if (!sk)
702  return 0;
703 
704  sock->sk = NULL;
705 
706  return unix_release_sock(sk, 0);
707 }
708 
709 static int unix_autobind(struct socket *sock)
710 {
711  struct sock *sk = sock->sk;
712  struct net *net = sock_net(sk);
713  struct unix_sock *u = unix_sk(sk);
714  static u32 ordernum = 1;
715  struct unix_address *addr;
716  int err;
717  unsigned int retries = 0;
718 
719  mutex_lock(&u->readlock);
720 
721  err = 0;
722  if (u->addr)
723  goto out;
724 
725  err = -ENOMEM;
726  addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
727  if (!addr)
728  goto out;
729 
730  addr->name->sun_family = AF_UNIX;
731  atomic_set(&addr->refcnt, 1);
732 
733 retry:
734  addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
735  addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
736 
737  spin_lock(&unix_table_lock);
738  ordernum = (ordernum+1)&0xFFFFF;
739 
740  if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
741  addr->hash)) {
742  spin_unlock(&unix_table_lock);
743  /*
744  * __unix_find_socket_byname() may take long time if many names
745  * are already in use.
746  */
747  cond_resched();
748  /* Give up if all names seems to be in use. */
749  if (retries++ == 0xFFFFF) {
750  err = -ENOSPC;
751  kfree(addr);
752  goto out;
753  }
754  goto retry;
755  }
756  addr->hash ^= sk->sk_type;
757 
758  __unix_remove_socket(sk);
759  u->addr = addr;
760  __unix_insert_socket(&unix_socket_table[addr->hash], sk);
761  spin_unlock(&unix_table_lock);
762  err = 0;
763 
765  return err;
766 }
767 
768 static struct sock *unix_find_other(struct net *net,
769  struct sockaddr_un *sunname, int len,
770  int type, unsigned int hash, int *error)
771 {
772  struct sock *u;
773  struct path path;
774  int err = 0;
775 
776  if (sunname->sun_path[0]) {
777  struct inode *inode;
778  err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
779  if (err)
780  goto fail;
781  inode = path.dentry->d_inode;
782  err = inode_permission(inode, MAY_WRITE);
783  if (err)
784  goto put_fail;
785 
786  err = -ECONNREFUSED;
787  if (!S_ISSOCK(inode->i_mode))
788  goto put_fail;
789  u = unix_find_socket_byinode(inode);
790  if (!u)
791  goto put_fail;
792 
793  if (u->sk_type == type)
794  touch_atime(&path);
795 
796  path_put(&path);
797 
798  err = -EPROTOTYPE;
799  if (u->sk_type != type) {
800  sock_put(u);
801  goto fail;
802  }
803  } else {
804  err = -ECONNREFUSED;
805  u = unix_find_socket_byname(net, sunname, len, type, hash);
806  if (u) {
807  struct dentry *dentry;
808  dentry = unix_sk(u)->path.dentry;
809  if (dentry)
810  touch_atime(&unix_sk(u)->path);
811  } else
812  goto fail;
813  }
814  return u;
815 
816 put_fail:
817  path_put(&path);
818 fail:
819  *error = err;
820  return NULL;
821 }
822 
823 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
824 {
825  struct dentry *dentry;
826  struct path path;
827  int err = 0;
828  /*
829  * Get the parent directory, calculate the hash for last
830  * component.
831  */
832  dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
833  err = PTR_ERR(dentry);
834  if (IS_ERR(dentry))
835  return err;
836 
837  /*
838  * All right, let's create it.
839  */
840  err = security_path_mknod(&path, dentry, mode, 0);
841  if (!err) {
842  err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
843  if (!err) {
844  res->mnt = mntget(path.mnt);
845  res->dentry = dget(dentry);
846  }
847  }
848  done_path_create(&path, dentry);
849  return err;
850 }
851 
852 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
853 {
854  struct sock *sk = sock->sk;
855  struct net *net = sock_net(sk);
856  struct unix_sock *u = unix_sk(sk);
857  struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
858  char *sun_path = sunaddr->sun_path;
859  int err;
860  unsigned int hash;
861  struct unix_address *addr;
862  struct hlist_head *list;
863 
864  err = -EINVAL;
865  if (sunaddr->sun_family != AF_UNIX)
866  goto out;
867 
868  if (addr_len == sizeof(short)) {
869  err = unix_autobind(sock);
870  goto out;
871  }
872 
873  err = unix_mkname(sunaddr, addr_len, &hash);
874  if (err < 0)
875  goto out;
876  addr_len = err;
877 
878  mutex_lock(&u->readlock);
879 
880  err = -EINVAL;
881  if (u->addr)
882  goto out_up;
883 
884  err = -ENOMEM;
885  addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
886  if (!addr)
887  goto out_up;
888 
889  memcpy(addr->name, sunaddr, addr_len);
890  addr->len = addr_len;
891  addr->hash = hash ^ sk->sk_type;
892  atomic_set(&addr->refcnt, 1);
893 
894  if (sun_path[0]) {
895  struct path path;
896  umode_t mode = S_IFSOCK |
897  (SOCK_INODE(sock)->i_mode & ~current_umask());
898  err = unix_mknod(sun_path, mode, &path);
899  if (err) {
900  if (err == -EEXIST)
901  err = -EADDRINUSE;
902  unix_release_addr(addr);
903  goto out_up;
904  }
905  addr->hash = UNIX_HASH_SIZE;
906  hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
907  spin_lock(&unix_table_lock);
908  u->path = path;
909  list = &unix_socket_table[hash];
910  } else {
911  spin_lock(&unix_table_lock);
912  err = -EADDRINUSE;
913  if (__unix_find_socket_byname(net, sunaddr, addr_len,
914  sk->sk_type, hash)) {
915  unix_release_addr(addr);
916  goto out_unlock;
917  }
918 
919  list = &unix_socket_table[addr->hash];
920  }
921 
922  err = 0;
923  __unix_remove_socket(sk);
924  u->addr = addr;
925  __unix_insert_socket(list, sk);
926 
927 out_unlock:
928  spin_unlock(&unix_table_lock);
929 out_up:
930  mutex_unlock(&u->readlock);
931 out:
932  return err;
933 }
934 
935 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
936 {
937  if (unlikely(sk1 == sk2) || !sk2) {
938  unix_state_lock(sk1);
939  return;
940  }
941  if (sk1 < sk2) {
942  unix_state_lock(sk1);
944  } else {
945  unix_state_lock(sk2);
947  }
948 }
949 
950 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
951 {
952  if (unlikely(sk1 == sk2) || !sk2) {
953  unix_state_unlock(sk1);
954  return;
955  }
956  unix_state_unlock(sk1);
957  unix_state_unlock(sk2);
958 }
959 
960 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
961  int alen, int flags)
962 {
963  struct sock *sk = sock->sk;
964  struct net *net = sock_net(sk);
965  struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
966  struct sock *other;
967  unsigned int hash;
968  int err;
969 
970  if (addr->sa_family != AF_UNSPEC) {
971  err = unix_mkname(sunaddr, alen, &hash);
972  if (err < 0)
973  goto out;
974  alen = err;
975 
976  if (test_bit(SOCK_PASSCRED, &sock->flags) &&
977  !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
978  goto out;
979 
980 restart:
981  other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
982  if (!other)
983  goto out;
984 
985  unix_state_double_lock(sk, other);
986 
987  /* Apparently VFS overslept socket death. Retry. */
988  if (sock_flag(other, SOCK_DEAD)) {
989  unix_state_double_unlock(sk, other);
990  sock_put(other);
991  goto restart;
992  }
993 
994  err = -EPERM;
995  if (!unix_may_send(sk, other))
996  goto out_unlock;
997 
998  err = security_unix_may_send(sk->sk_socket, other->sk_socket);
999  if (err)
1000  goto out_unlock;
1001 
1002  } else {
1003  /*
1004  * 1003.1g breaking connected state with AF_UNSPEC
1005  */
1006  other = NULL;
1007  unix_state_double_lock(sk, other);
1008  }
1009 
1010  /*
1011  * If it was connected, reconnect.
1012  */
1013  if (unix_peer(sk)) {
1014  struct sock *old_peer = unix_peer(sk);
1015  unix_peer(sk) = other;
1016  unix_state_double_unlock(sk, other);
1017 
1018  if (other != old_peer)
1019  unix_dgram_disconnected(sk, old_peer);
1020  sock_put(old_peer);
1021  } else {
1022  unix_peer(sk) = other;
1023  unix_state_double_unlock(sk, other);
1024  }
1025  return 0;
1026 
1027 out_unlock:
1028  unix_state_double_unlock(sk, other);
1029  sock_put(other);
1030 out:
1031  return err;
1032 }
1033 
1034 static long unix_wait_for_peer(struct sock *other, long timeo)
1035 {
1036  struct unix_sock *u = unix_sk(other);
1037  int sched;
1038  DEFINE_WAIT(wait);
1039 
1041 
1042  sched = !sock_flag(other, SOCK_DEAD) &&
1043  !(other->sk_shutdown & RCV_SHUTDOWN) &&
1044  unix_recvq_full(other);
1045 
1046  unix_state_unlock(other);
1047 
1048  if (sched)
1049  timeo = schedule_timeout(timeo);
1050 
1051  finish_wait(&u->peer_wait, &wait);
1052  return timeo;
1053 }
1054 
1055 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1056  int addr_len, int flags)
1057 {
1058  struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1059  struct sock *sk = sock->sk;
1060  struct net *net = sock_net(sk);
1061  struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1062  struct sock *newsk = NULL;
1063  struct sock *other = NULL;
1064  struct sk_buff *skb = NULL;
1065  unsigned int hash;
1066  int st;
1067  int err;
1068  long timeo;
1069 
1070  err = unix_mkname(sunaddr, addr_len, &hash);
1071  if (err < 0)
1072  goto out;
1073  addr_len = err;
1074 
1075  if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1076  (err = unix_autobind(sock)) != 0)
1077  goto out;
1078 
1079  timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1080 
1081  /* First of all allocate resources.
1082  If we will make it after state is locked,
1083  we will have to recheck all again in any case.
1084  */
1085 
1086  err = -ENOMEM;
1087 
1088  /* create new sock for complete connection */
1089  newsk = unix_create1(sock_net(sk), NULL);
1090  if (newsk == NULL)
1091  goto out;
1092 
1093  /* Allocate skb for sending to listening sock */
1094  skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1095  if (skb == NULL)
1096  goto out;
1097 
1098 restart:
1099  /* Find listening sock. */
1100  other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1101  if (!other)
1102  goto out;
1103 
1104  /* Latch state of peer */
1105  unix_state_lock(other);
1106 
1107  /* Apparently VFS overslept socket death. Retry. */
1108  if (sock_flag(other, SOCK_DEAD)) {
1109  unix_state_unlock(other);
1110  sock_put(other);
1111  goto restart;
1112  }
1113 
1114  err = -ECONNREFUSED;
1115  if (other->sk_state != TCP_LISTEN)
1116  goto out_unlock;
1117  if (other->sk_shutdown & RCV_SHUTDOWN)
1118  goto out_unlock;
1119 
1120  if (unix_recvq_full(other)) {
1121  err = -EAGAIN;
1122  if (!timeo)
1123  goto out_unlock;
1124 
1125  timeo = unix_wait_for_peer(other, timeo);
1126 
1127  err = sock_intr_errno(timeo);
1128  if (signal_pending(current))
1129  goto out;
1130  sock_put(other);
1131  goto restart;
1132  }
1133 
1134  /* Latch our state.
1135 
1136  It is tricky place. We need to grab our state lock and cannot
1137  drop lock on peer. It is dangerous because deadlock is
1138  possible. Connect to self case and simultaneous
1139  attempt to connect are eliminated by checking socket
1140  state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1141  check this before attempt to grab lock.
1142 
1143  Well, and we have to recheck the state after socket locked.
1144  */
1145  st = sk->sk_state;
1146 
1147  switch (st) {
1148  case TCP_CLOSE:
1149  /* This is ok... continue with connect */
1150  break;
1151  case TCP_ESTABLISHED:
1152  /* Socket is already connected */
1153  err = -EISCONN;
1154  goto out_unlock;
1155  default:
1156  err = -EINVAL;
1157  goto out_unlock;
1158  }
1159 
1161 
1162  if (sk->sk_state != st) {
1163  unix_state_unlock(sk);
1164  unix_state_unlock(other);
1165  sock_put(other);
1166  goto restart;
1167  }
1168 
1169  err = security_unix_stream_connect(sk, other, newsk);
1170  if (err) {
1171  unix_state_unlock(sk);
1172  goto out_unlock;
1173  }
1174 
1175  /* The way is open! Fastly set all the necessary fields... */
1176 
1177  sock_hold(sk);
1178  unix_peer(newsk) = sk;
1179  newsk->sk_state = TCP_ESTABLISHED;
1180  newsk->sk_type = sk->sk_type;
1181  init_peercred(newsk);
1182  newu = unix_sk(newsk);
1183  RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1184  otheru = unix_sk(other);
1185 
1186  /* copy address information from listening to new sock*/
1187  if (otheru->addr) {
1188  atomic_inc(&otheru->addr->refcnt);
1189  newu->addr = otheru->addr;
1190  }
1191  if (otheru->path.dentry) {
1192  path_get(&otheru->path);
1193  newu->path = otheru->path;
1194  }
1195 
1196  /* Set credentials */
1197  copy_peercred(sk, other);
1198 
1199  sock->state = SS_CONNECTED;
1200  sk->sk_state = TCP_ESTABLISHED;
1201  sock_hold(newsk);
1202 
1203  smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1204  unix_peer(sk) = newsk;
1205 
1206  unix_state_unlock(sk);
1207 
1208  /* take ten and and send info to listening sock */
1209  spin_lock(&other->sk_receive_queue.lock);
1210  __skb_queue_tail(&other->sk_receive_queue, skb);
1211  spin_unlock(&other->sk_receive_queue.lock);
1212  unix_state_unlock(other);
1213  other->sk_data_ready(other, 0);
1214  sock_put(other);
1215  return 0;
1216 
1217 out_unlock:
1218  if (other)
1219  unix_state_unlock(other);
1220 
1221 out:
1222  kfree_skb(skb);
1223  if (newsk)
1224  unix_release_sock(newsk, 0);
1225  if (other)
1226  sock_put(other);
1227  return err;
1228 }
1229 
1230 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1231 {
1232  struct sock *ska = socka->sk, *skb = sockb->sk;
1233 
1234  /* Join our sockets back to back */
1235  sock_hold(ska);
1236  sock_hold(skb);
1237  unix_peer(ska) = skb;
1238  unix_peer(skb) = ska;
1239  init_peercred(ska);
1240  init_peercred(skb);
1241 
1242  if (ska->sk_type != SOCK_DGRAM) {
1243  ska->sk_state = TCP_ESTABLISHED;
1244  skb->sk_state = TCP_ESTABLISHED;
1245  socka->state = SS_CONNECTED;
1246  sockb->state = SS_CONNECTED;
1247  }
1248  return 0;
1249 }
1250 
1251 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1252 {
1253  struct sock *sk = sock->sk;
1254  struct sock *tsk;
1255  struct sk_buff *skb;
1256  int err;
1257 
1258  err = -EOPNOTSUPP;
1259  if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1260  goto out;
1261 
1262  err = -EINVAL;
1263  if (sk->sk_state != TCP_LISTEN)
1264  goto out;
1265 
1266  /* If socket state is TCP_LISTEN it cannot change (for now...),
1267  * so that no locks are necessary.
1268  */
1269 
1270  skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1271  if (!skb) {
1272  /* This means receive shutdown. */
1273  if (err == 0)
1274  err = -EINVAL;
1275  goto out;
1276  }
1277 
1278  tsk = skb->sk;
1279  skb_free_datagram(sk, skb);
1281 
1282  /* attach accepted sock to socket */
1283  unix_state_lock(tsk);
1284  newsock->state = SS_CONNECTED;
1285  sock_graft(tsk, newsock);
1286  unix_state_unlock(tsk);
1287  return 0;
1288 
1289 out:
1290  return err;
1291 }
1292 
1293 
1294 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1295 {
1296  struct sock *sk = sock->sk;
1297  struct unix_sock *u;
1298  DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1299  int err = 0;
1300 
1301  if (peer) {
1302  sk = unix_peer_get(sk);
1303 
1304  err = -ENOTCONN;
1305  if (!sk)
1306  goto out;
1307  err = 0;
1308  } else {
1309  sock_hold(sk);
1310  }
1311 
1312  u = unix_sk(sk);
1313  unix_state_lock(sk);
1314  if (!u->addr) {
1315  sunaddr->sun_family = AF_UNIX;
1316  sunaddr->sun_path[0] = 0;
1317  *uaddr_len = sizeof(short);
1318  } else {
1319  struct unix_address *addr = u->addr;
1320 
1321  *uaddr_len = addr->len;
1322  memcpy(sunaddr, addr->name, *uaddr_len);
1323  }
1324  unix_state_unlock(sk);
1325  sock_put(sk);
1326 out:
1327  return err;
1328 }
1329 
1330 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1331 {
1332  int i;
1333 
1334  scm->fp = UNIXCB(skb).fp;
1335  UNIXCB(skb).fp = NULL;
1336 
1337  for (i = scm->fp->count-1; i >= 0; i--)
1338  unix_notinflight(scm->fp->fp[i]);
1339 }
1340 
1341 static void unix_destruct_scm(struct sk_buff *skb)
1342 {
1343  struct scm_cookie scm;
1344  memset(&scm, 0, sizeof(scm));
1345  scm.pid = UNIXCB(skb).pid;
1346  scm.cred = UNIXCB(skb).cred;
1347  if (UNIXCB(skb).fp)
1348  unix_detach_fds(&scm, skb);
1349 
1350  /* Alas, it calls VFS */
1351  /* So fscking what? fput() had been SMP-safe since the last Summer */
1352  scm_destroy(&scm);
1353  sock_wfree(skb);
1354 }
1355 
1356 #define MAX_RECURSION_LEVEL 4
1357 
1358 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1359 {
1360  int i;
1361  unsigned char max_level = 0;
1362  int unix_sock_count = 0;
1363 
1364  for (i = scm->fp->count - 1; i >= 0; i--) {
1365  struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1366 
1367  if (sk) {
1368  unix_sock_count++;
1369  max_level = max(max_level,
1370  unix_sk(sk)->recursion_level);
1371  }
1372  }
1373  if (unlikely(max_level > MAX_RECURSION_LEVEL))
1374  return -ETOOMANYREFS;
1375 
1376  /*
1377  * Need to duplicate file references for the sake of garbage
1378  * collection. Otherwise a socket in the fps might become a
1379  * candidate for GC while the skb is not yet queued.
1380  */
1381  UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1382  if (!UNIXCB(skb).fp)
1383  return -ENOMEM;
1384 
1385  if (unix_sock_count) {
1386  for (i = scm->fp->count - 1; i >= 0; i--)
1387  unix_inflight(scm->fp->fp[i]);
1388  }
1389  return max_level;
1390 }
1391 
1392 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1393 {
1394  int err = 0;
1395 
1396  UNIXCB(skb).pid = get_pid(scm->pid);
1397  if (scm->cred)
1398  UNIXCB(skb).cred = get_cred(scm->cred);
1399  UNIXCB(skb).fp = NULL;
1400  if (scm->fp && send_fds)
1401  err = unix_attach_fds(scm, skb);
1402 
1403  skb->destructor = unix_destruct_scm;
1404  return err;
1405 }
1406 
1407 /*
1408  * Some apps rely on write() giving SCM_CREDENTIALS
1409  * We include credentials if source or destination socket
1410  * asserted SOCK_PASSCRED.
1411  */
1412 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1413  const struct sock *other)
1414 {
1415  if (UNIXCB(skb).cred)
1416  return;
1417  if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1418  !other->sk_socket ||
1419  test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1420  UNIXCB(skb).pid = get_pid(task_tgid(current));
1421  UNIXCB(skb).cred = get_current_cred();
1422  }
1423 }
1424 
1425 /*
1426  * Send AF_UNIX data.
1427  */
1428 
1429 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1430  struct msghdr *msg, size_t len)
1431 {
1432  struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1433  struct sock *sk = sock->sk;
1434  struct net *net = sock_net(sk);
1435  struct unix_sock *u = unix_sk(sk);
1436  struct sockaddr_un *sunaddr = msg->msg_name;
1437  struct sock *other = NULL;
1438  int namelen = 0; /* fake GCC */
1439  int err;
1440  unsigned int hash;
1441  struct sk_buff *skb;
1442  long timeo;
1443  struct scm_cookie tmp_scm;
1444  int max_level;
1445  int data_len = 0;
1446 
1447  if (NULL == siocb->scm)
1448  siocb->scm = &tmp_scm;
1449  wait_for_unix_gc();
1450  err = scm_send(sock, msg, siocb->scm, false);
1451  if (err < 0)
1452  return err;
1453 
1454  err = -EOPNOTSUPP;
1455  if (msg->msg_flags&MSG_OOB)
1456  goto out;
1457 
1458  if (msg->msg_namelen) {
1459  err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1460  if (err < 0)
1461  goto out;
1462  namelen = err;
1463  } else {
1464  sunaddr = NULL;
1465  err = -ENOTCONN;
1466  other = unix_peer_get(sk);
1467  if (!other)
1468  goto out;
1469  }
1470 
1471  if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1472  && (err = unix_autobind(sock)) != 0)
1473  goto out;
1474 
1475  err = -EMSGSIZE;
1476  if (len > sk->sk_sndbuf - 32)
1477  goto out;
1478 
1479  if (len > SKB_MAX_ALLOC)
1480  data_len = min_t(size_t,
1481  len - SKB_MAX_ALLOC,
1483 
1484  skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1485  msg->msg_flags & MSG_DONTWAIT, &err);
1486  if (skb == NULL)
1487  goto out;
1488 
1489  err = unix_scm_to_skb(siocb->scm, skb, true);
1490  if (err < 0)
1491  goto out_free;
1492  max_level = err + 1;
1493  unix_get_secdata(siocb->scm, skb);
1494 
1495  skb_put(skb, len - data_len);
1496  skb->data_len = data_len;
1497  skb->len = len;
1498  err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1499  if (err)
1500  goto out_free;
1501 
1502  timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1503 
1504 restart:
1505  if (!other) {
1506  err = -ECONNRESET;
1507  if (sunaddr == NULL)
1508  goto out_free;
1509 
1510  other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1511  hash, &err);
1512  if (other == NULL)
1513  goto out_free;
1514  }
1515 
1516  if (sk_filter(other, skb) < 0) {
1517  /* Toss the packet but do not return any error to the sender */
1518  err = len;
1519  goto out_free;
1520  }
1521 
1522  unix_state_lock(other);
1523  err = -EPERM;
1524  if (!unix_may_send(sk, other))
1525  goto out_unlock;
1526 
1527  if (sock_flag(other, SOCK_DEAD)) {
1528  /*
1529  * Check with 1003.1g - what should
1530  * datagram error
1531  */
1532  unix_state_unlock(other);
1533  sock_put(other);
1534 
1535  err = 0;
1536  unix_state_lock(sk);
1537  if (unix_peer(sk) == other) {
1538  unix_peer(sk) = NULL;
1539  unix_state_unlock(sk);
1540 
1541  unix_dgram_disconnected(sk, other);
1542  sock_put(other);
1543  err = -ECONNREFUSED;
1544  } else {
1545  unix_state_unlock(sk);
1546  }
1547 
1548  other = NULL;
1549  if (err)
1550  goto out_free;
1551  goto restart;
1552  }
1553 
1554  err = -EPIPE;
1555  if (other->sk_shutdown & RCV_SHUTDOWN)
1556  goto out_unlock;
1557 
1558  if (sk->sk_type != SOCK_SEQPACKET) {
1559  err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1560  if (err)
1561  goto out_unlock;
1562  }
1563 
1564  if (unix_peer(other) != sk && unix_recvq_full(other)) {
1565  if (!timeo) {
1566  err = -EAGAIN;
1567  goto out_unlock;
1568  }
1569 
1570  timeo = unix_wait_for_peer(other, timeo);
1571 
1572  err = sock_intr_errno(timeo);
1573  if (signal_pending(current))
1574  goto out_free;
1575 
1576  goto restart;
1577  }
1578 
1579  if (sock_flag(other, SOCK_RCVTSTAMP))
1580  __net_timestamp(skb);
1581  maybe_add_creds(skb, sock, other);
1582  skb_queue_tail(&other->sk_receive_queue, skb);
1583  if (max_level > unix_sk(other)->recursion_level)
1584  unix_sk(other)->recursion_level = max_level;
1585  unix_state_unlock(other);
1586  other->sk_data_ready(other, len);
1587  sock_put(other);
1588  scm_destroy(siocb->scm);
1589  return len;
1590 
1591 out_unlock:
1592  unix_state_unlock(other);
1593 out_free:
1594  kfree_skb(skb);
1595 out:
1596  if (other)
1597  sock_put(other);
1598  scm_destroy(siocb->scm);
1599  return err;
1600 }
1601 
1602 
1603 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1604  struct msghdr *msg, size_t len)
1605 {
1606  struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1607  struct sock *sk = sock->sk;
1608  struct sock *other = NULL;
1609  int err, size;
1610  struct sk_buff *skb;
1611  int sent = 0;
1612  struct scm_cookie tmp_scm;
1613  bool fds_sent = false;
1614  int max_level;
1615 
1616  if (NULL == siocb->scm)
1617  siocb->scm = &tmp_scm;
1618  wait_for_unix_gc();
1619  err = scm_send(sock, msg, siocb->scm, false);
1620  if (err < 0)
1621  return err;
1622 
1623  err = -EOPNOTSUPP;
1624  if (msg->msg_flags&MSG_OOB)
1625  goto out_err;
1626 
1627  if (msg->msg_namelen) {
1628  err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1629  goto out_err;
1630  } else {
1631  err = -ENOTCONN;
1632  other = unix_peer(sk);
1633  if (!other)
1634  goto out_err;
1635  }
1636 
1637  if (sk->sk_shutdown & SEND_SHUTDOWN)
1638  goto pipe_err;
1639 
1640  while (sent < len) {
1641  /*
1642  * Optimisation for the fact that under 0.01% of X
1643  * messages typically need breaking up.
1644  */
1645 
1646  size = len-sent;
1647 
1648  /* Keep two messages in the pipe so it schedules better */
1649  if (size > ((sk->sk_sndbuf >> 1) - 64))
1650  size = (sk->sk_sndbuf >> 1) - 64;
1651 
1652  if (size > SKB_MAX_ALLOC)
1653  size = SKB_MAX_ALLOC;
1654 
1655  /*
1656  * Grab a buffer
1657  */
1658 
1659  skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1660  &err);
1661 
1662  if (skb == NULL)
1663  goto out_err;
1664 
1665  /*
1666  * If you pass two values to the sock_alloc_send_skb
1667  * it tries to grab the large buffer with GFP_NOFS
1668  * (which can fail easily), and if it fails grab the
1669  * fallback size buffer which is under a page and will
1670  * succeed. [Alan]
1671  */
1672  size = min_t(int, size, skb_tailroom(skb));
1673 
1674 
1675  /* Only send the fds in the first buffer */
1676  err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1677  if (err < 0) {
1678  kfree_skb(skb);
1679  goto out_err;
1680  }
1681  max_level = err + 1;
1682  fds_sent = true;
1683 
1684  err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1685  if (err) {
1686  kfree_skb(skb);
1687  goto out_err;
1688  }
1689 
1690  unix_state_lock(other);
1691 
1692  if (sock_flag(other, SOCK_DEAD) ||
1693  (other->sk_shutdown & RCV_SHUTDOWN))
1694  goto pipe_err_free;
1695 
1696  maybe_add_creds(skb, sock, other);
1697  skb_queue_tail(&other->sk_receive_queue, skb);
1698  if (max_level > unix_sk(other)->recursion_level)
1699  unix_sk(other)->recursion_level = max_level;
1700  unix_state_unlock(other);
1701  other->sk_data_ready(other, size);
1702  sent += size;
1703  }
1704 
1705  scm_destroy(siocb->scm);
1706  siocb->scm = NULL;
1707 
1708  return sent;
1709 
1710 pipe_err_free:
1711  unix_state_unlock(other);
1712  kfree_skb(skb);
1713 pipe_err:
1714  if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1715  send_sig(SIGPIPE, current, 0);
1716  err = -EPIPE;
1717 out_err:
1718  scm_destroy(siocb->scm);
1719  siocb->scm = NULL;
1720  return sent ? : err;
1721 }
1722 
1723 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1724  struct msghdr *msg, size_t len)
1725 {
1726  int err;
1727  struct sock *sk = sock->sk;
1728 
1729  err = sock_error(sk);
1730  if (err)
1731  return err;
1732 
1733  if (sk->sk_state != TCP_ESTABLISHED)
1734  return -ENOTCONN;
1735 
1736  if (msg->msg_namelen)
1737  msg->msg_namelen = 0;
1738 
1739  return unix_dgram_sendmsg(kiocb, sock, msg, len);
1740 }
1741 
1742 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1743  struct msghdr *msg, size_t size,
1744  int flags)
1745 {
1746  struct sock *sk = sock->sk;
1747 
1748  if (sk->sk_state != TCP_ESTABLISHED)
1749  return -ENOTCONN;
1750 
1751  return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1752 }
1753 
1754 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1755 {
1756  struct unix_sock *u = unix_sk(sk);
1757 
1758  msg->msg_namelen = 0;
1759  if (u->addr) {
1760  msg->msg_namelen = u->addr->len;
1761  memcpy(msg->msg_name, u->addr->name, u->addr->len);
1762  }
1763 }
1764 
1765 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1766  struct msghdr *msg, size_t size,
1767  int flags)
1768 {
1769  struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1770  struct scm_cookie tmp_scm;
1771  struct sock *sk = sock->sk;
1772  struct unix_sock *u = unix_sk(sk);
1773  int noblock = flags & MSG_DONTWAIT;
1774  struct sk_buff *skb;
1775  int err;
1776  int peeked, skip;
1777 
1778  err = -EOPNOTSUPP;
1779  if (flags&MSG_OOB)
1780  goto out;
1781 
1782  msg->msg_namelen = 0;
1783 
1785  if (err) {
1786  err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1787  goto out;
1788  }
1789 
1790  skip = sk_peek_offset(sk, flags);
1791 
1792  skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1793  if (!skb) {
1794  unix_state_lock(sk);
1795  /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1796  if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1797  (sk->sk_shutdown & RCV_SHUTDOWN))
1798  err = 0;
1799  unix_state_unlock(sk);
1800  goto out_unlock;
1801  }
1802 
1803  wake_up_interruptible_sync_poll(&u->peer_wait,
1805 
1806  if (msg->msg_name)
1807  unix_copy_addr(msg, skb->sk);
1808 
1809  if (size > skb->len - skip)
1810  size = skb->len - skip;
1811  else if (size < skb->len - skip)
1812  msg->msg_flags |= MSG_TRUNC;
1813 
1814  err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1815  if (err)
1816  goto out_free;
1817 
1818  if (sock_flag(sk, SOCK_RCVTSTAMP))
1819  __sock_recv_timestamp(msg, sk, skb);
1820 
1821  if (!siocb->scm) {
1822  siocb->scm = &tmp_scm;
1823  memset(&tmp_scm, 0, sizeof(tmp_scm));
1824  }
1825  scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1826  unix_set_secdata(siocb->scm, skb);
1827 
1828  if (!(flags & MSG_PEEK)) {
1829  if (UNIXCB(skb).fp)
1830  unix_detach_fds(siocb->scm, skb);
1831 
1832  sk_peek_offset_bwd(sk, skb->len);
1833  } else {
1834  /* It is questionable: on PEEK we could:
1835  - do not return fds - good, but too simple 8)
1836  - return fds, and do not return them on read (old strategy,
1837  apparently wrong)
1838  - clone fds (I chose it for now, it is the most universal
1839  solution)
1840 
1841  POSIX 1003.1g does not actually define this clearly
1842  at all. POSIX 1003.1g doesn't define a lot of things
1843  clearly however!
1844 
1845  */
1846 
1847  sk_peek_offset_fwd(sk, size);
1848 
1849  if (UNIXCB(skb).fp)
1850  siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1851  }
1852  err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1853 
1854  scm_recv(sock, msg, siocb->scm, flags);
1855 
1856 out_free:
1857  skb_free_datagram(sk, skb);
1858 out_unlock:
1859  mutex_unlock(&u->readlock);
1860 out:
1861  return err;
1862 }
1863 
1864 /*
1865  * Sleep until data has arrive. But check for races..
1866  */
1867 
1868 static long unix_stream_data_wait(struct sock *sk, long timeo)
1869 {
1870  DEFINE_WAIT(wait);
1871 
1872  unix_state_lock(sk);
1873 
1874  for (;;) {
1875  prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1876 
1877  if (!skb_queue_empty(&sk->sk_receive_queue) ||
1878  sk->sk_err ||
1879  (sk->sk_shutdown & RCV_SHUTDOWN) ||
1880  signal_pending(current) ||
1881  !timeo)
1882  break;
1883 
1884  set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1885  unix_state_unlock(sk);
1886  timeo = schedule_timeout(timeo);
1887  unix_state_lock(sk);
1888  clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1889  }
1890 
1891  finish_wait(sk_sleep(sk), &wait);
1892  unix_state_unlock(sk);
1893  return timeo;
1894 }
1895 
1896 
1897 
1898 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1899  struct msghdr *msg, size_t size,
1900  int flags)
1901 {
1902  struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1903  struct scm_cookie tmp_scm;
1904  struct sock *sk = sock->sk;
1905  struct unix_sock *u = unix_sk(sk);
1906  struct sockaddr_un *sunaddr = msg->msg_name;
1907  int copied = 0;
1908  int check_creds = 0;
1909  int target;
1910  int err = 0;
1911  long timeo;
1912  int skip;
1913 
1914  err = -EINVAL;
1915  if (sk->sk_state != TCP_ESTABLISHED)
1916  goto out;
1917 
1918  err = -EOPNOTSUPP;
1919  if (flags&MSG_OOB)
1920  goto out;
1921 
1922  target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1923  timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1924 
1925  msg->msg_namelen = 0;
1926 
1927  /* Lock the socket to prevent queue disordering
1928  * while sleeps in memcpy_tomsg
1929  */
1930 
1931  if (!siocb->scm) {
1932  siocb->scm = &tmp_scm;
1933  memset(&tmp_scm, 0, sizeof(tmp_scm));
1934  }
1935 
1937  if (err) {
1938  err = sock_intr_errno(timeo);
1939  goto out;
1940  }
1941 
1942  skip = sk_peek_offset(sk, flags);
1943 
1944  do {
1945  int chunk;
1946  struct sk_buff *skb;
1947 
1948  unix_state_lock(sk);
1949  skb = skb_peek(&sk->sk_receive_queue);
1950 again:
1951  if (skb == NULL) {
1952  unix_sk(sk)->recursion_level = 0;
1953  if (copied >= target)
1954  goto unlock;
1955 
1956  /*
1957  * POSIX 1003.1g mandates this order.
1958  */
1959 
1960  err = sock_error(sk);
1961  if (err)
1962  goto unlock;
1963  if (sk->sk_shutdown & RCV_SHUTDOWN)
1964  goto unlock;
1965 
1966  unix_state_unlock(sk);
1967  err = -EAGAIN;
1968  if (!timeo)
1969  break;
1970  mutex_unlock(&u->readlock);
1971 
1972  timeo = unix_stream_data_wait(sk, timeo);
1973 
1974  if (signal_pending(current)
1976  err = sock_intr_errno(timeo);
1977  goto out;
1978  }
1979 
1980  continue;
1981  unlock:
1982  unix_state_unlock(sk);
1983  break;
1984  }
1985 
1986  if (skip >= skb->len) {
1987  skip -= skb->len;
1988  skb = skb_peek_next(skb, &sk->sk_receive_queue);
1989  goto again;
1990  }
1991 
1992  unix_state_unlock(sk);
1993 
1994  if (check_creds) {
1995  /* Never glue messages from different writers */
1996  if ((UNIXCB(skb).pid != siocb->scm->pid) ||
1997  (UNIXCB(skb).cred != siocb->scm->cred))
1998  break;
1999  } else {
2000  /* Copy credentials */
2001  scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
2002  check_creds = 1;
2003  }
2004 
2005  /* Copy address just once */
2006  if (sunaddr) {
2007  unix_copy_addr(msg, skb->sk);
2008  sunaddr = NULL;
2009  }
2010 
2011  chunk = min_t(unsigned int, skb->len - skip, size);
2012  if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
2013  if (copied == 0)
2014  copied = -EFAULT;
2015  break;
2016  }
2017  copied += chunk;
2018  size -= chunk;
2019 
2020  /* Mark read part of skb as used */
2021  if (!(flags & MSG_PEEK)) {
2022  skb_pull(skb, chunk);
2023 
2024  sk_peek_offset_bwd(sk, chunk);
2025 
2026  if (UNIXCB(skb).fp)
2027  unix_detach_fds(siocb->scm, skb);
2028 
2029  if (skb->len)
2030  break;
2031 
2032  skb_unlink(skb, &sk->sk_receive_queue);
2033  consume_skb(skb);
2034 
2035  if (siocb->scm->fp)
2036  break;
2037  } else {
2038  /* It is questionable, see note in unix_dgram_recvmsg.
2039  */
2040  if (UNIXCB(skb).fp)
2041  siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2042 
2043  sk_peek_offset_fwd(sk, chunk);
2044 
2045  break;
2046  }
2047  } while (size);
2048 
2049  mutex_unlock(&u->readlock);
2050  scm_recv(sock, msg, siocb->scm, flags);
2051 out:
2052  return copied ? : err;
2053 }
2054 
2055 static int unix_shutdown(struct socket *sock, int mode)
2056 {
2057  struct sock *sk = sock->sk;
2058  struct sock *other;
2059 
2060  if (mode < SHUT_RD || mode > SHUT_RDWR)
2061  return -EINVAL;
2062  /* This maps:
2063  * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2064  * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2065  * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2066  */
2067  ++mode;
2068 
2069  unix_state_lock(sk);
2070  sk->sk_shutdown |= mode;
2071  other = unix_peer(sk);
2072  if (other)
2073  sock_hold(other);
2074  unix_state_unlock(sk);
2075  sk->sk_state_change(sk);
2076 
2077  if (other &&
2078  (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2079 
2080  int peer_mode = 0;
2081 
2082  if (mode&RCV_SHUTDOWN)
2083  peer_mode |= SEND_SHUTDOWN;
2084  if (mode&SEND_SHUTDOWN)
2085  peer_mode |= RCV_SHUTDOWN;
2086  unix_state_lock(other);
2087  other->sk_shutdown |= peer_mode;
2088  unix_state_unlock(other);
2089  other->sk_state_change(other);
2090  if (peer_mode == SHUTDOWN_MASK)
2091  sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2092  else if (peer_mode & RCV_SHUTDOWN)
2093  sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2094  }
2095  if (other)
2096  sock_put(other);
2097 
2098  return 0;
2099 }
2100 
2101 long unix_inq_len(struct sock *sk)
2102 {
2103  struct sk_buff *skb;
2104  long amount = 0;
2105 
2106  if (sk->sk_state == TCP_LISTEN)
2107  return -EINVAL;
2108 
2109  spin_lock(&sk->sk_receive_queue.lock);
2110  if (sk->sk_type == SOCK_STREAM ||
2111  sk->sk_type == SOCK_SEQPACKET) {
2112  skb_queue_walk(&sk->sk_receive_queue, skb)
2113  amount += skb->len;
2114  } else {
2115  skb = skb_peek(&sk->sk_receive_queue);
2116  if (skb)
2117  amount = skb->len;
2118  }
2119  spin_unlock(&sk->sk_receive_queue.lock);
2120 
2121  return amount;
2122 }
2124 
2125 long unix_outq_len(struct sock *sk)
2126 {
2127  return sk_wmem_alloc_get(sk);
2128 }
2130 
2131 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2132 {
2133  struct sock *sk = sock->sk;
2134  long amount = 0;
2135  int err;
2136 
2137  switch (cmd) {
2138  case SIOCOUTQ:
2139  amount = unix_outq_len(sk);
2140  err = put_user(amount, (int __user *)arg);
2141  break;
2142  case SIOCINQ:
2143  amount = unix_inq_len(sk);
2144  if (amount < 0)
2145  err = amount;
2146  else
2147  err = put_user(amount, (int __user *)arg);
2148  break;
2149  default:
2150  err = -ENOIOCTLCMD;
2151  break;
2152  }
2153  return err;
2154 }
2155 
2156 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2157 {
2158  struct sock *sk = sock->sk;
2159  unsigned int mask;
2160 
2161  sock_poll_wait(file, sk_sleep(sk), wait);
2162  mask = 0;
2163 
2164  /* exceptional events? */
2165  if (sk->sk_err)
2166  mask |= POLLERR;
2167  if (sk->sk_shutdown == SHUTDOWN_MASK)
2168  mask |= POLLHUP;
2169  if (sk->sk_shutdown & RCV_SHUTDOWN)
2170  mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2171 
2172  /* readable? */
2173  if (!skb_queue_empty(&sk->sk_receive_queue))
2174  mask |= POLLIN | POLLRDNORM;
2175 
2176  /* Connection-based need to check for termination and startup */
2177  if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2178  sk->sk_state == TCP_CLOSE)
2179  mask |= POLLHUP;
2180 
2181  /*
2182  * we set writable also when the other side has shut down the
2183  * connection. This prevents stuck sockets.
2184  */
2185  if (unix_writable(sk))
2186  mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2187 
2188  return mask;
2189 }
2190 
2191 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2192  poll_table *wait)
2193 {
2194  struct sock *sk = sock->sk, *other;
2195  unsigned int mask, writable;
2196 
2197  sock_poll_wait(file, sk_sleep(sk), wait);
2198  mask = 0;
2199 
2200  /* exceptional events? */
2201  if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2202  mask |= POLLERR;
2203  if (sk->sk_shutdown & RCV_SHUTDOWN)
2204  mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2205  if (sk->sk_shutdown == SHUTDOWN_MASK)
2206  mask |= POLLHUP;
2207 
2208  /* readable? */
2209  if (!skb_queue_empty(&sk->sk_receive_queue))
2210  mask |= POLLIN | POLLRDNORM;
2211 
2212  /* Connection-based need to check for termination and startup */
2213  if (sk->sk_type == SOCK_SEQPACKET) {
2214  if (sk->sk_state == TCP_CLOSE)
2215  mask |= POLLHUP;
2216  /* connection hasn't started yet? */
2217  if (sk->sk_state == TCP_SYN_SENT)
2218  return mask;
2219  }
2220 
2221  /* No write status requested, avoid expensive OUT tests. */
2222  if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2223  return mask;
2224 
2225  writable = unix_writable(sk);
2226  other = unix_peer_get(sk);
2227  if (other) {
2228  if (unix_peer(other) != sk) {
2229  sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2230  if (unix_recvq_full(other))
2231  writable = 0;
2232  }
2233  sock_put(other);
2234  }
2235 
2236  if (writable)
2237  mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2238  else
2239  set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2240 
2241  return mask;
2242 }
2243 
2244 #ifdef CONFIG_PROC_FS
2245 
2246 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2247 
2248 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2249 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2250 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2251 
2252 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2253 {
2254  unsigned long offset = get_offset(*pos);
2255  unsigned long bucket = get_bucket(*pos);
2256  struct sock *sk;
2257  unsigned long count = 0;
2258 
2259  for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2260  if (sock_net(sk) != seq_file_net(seq))
2261  continue;
2262  if (++count == offset)
2263  break;
2264  }
2265 
2266  return sk;
2267 }
2268 
2269 static struct sock *unix_next_socket(struct seq_file *seq,
2270  struct sock *sk,
2271  loff_t *pos)
2272 {
2273  unsigned long bucket;
2274 
2275  while (sk > (struct sock *)SEQ_START_TOKEN) {
2276  sk = sk_next(sk);
2277  if (!sk)
2278  goto next_bucket;
2279  if (sock_net(sk) == seq_file_net(seq))
2280  return sk;
2281  }
2282 
2283  do {
2284  sk = unix_from_bucket(seq, pos);
2285  if (sk)
2286  return sk;
2287 
2288 next_bucket:
2289  bucket = get_bucket(*pos) + 1;
2290  *pos = set_bucket_offset(bucket, 1);
2291  } while (bucket < ARRAY_SIZE(unix_socket_table));
2292 
2293  return NULL;
2294 }
2295 
2296 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2297  __acquires(unix_table_lock)
2298 {
2299  spin_lock(&unix_table_lock);
2300 
2301  if (!*pos)
2302  return SEQ_START_TOKEN;
2303 
2304  if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2305  return NULL;
2306 
2307  return unix_next_socket(seq, NULL, pos);
2308 }
2309 
2310 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2311 {
2312  ++*pos;
2313  return unix_next_socket(seq, v, pos);
2314 }
2315 
2316 static void unix_seq_stop(struct seq_file *seq, void *v)
2317  __releases(unix_table_lock)
2318 {
2319  spin_unlock(&unix_table_lock);
2320 }
2321 
2322 static int unix_seq_show(struct seq_file *seq, void *v)
2323 {
2324 
2325  if (v == SEQ_START_TOKEN)
2326  seq_puts(seq, "Num RefCount Protocol Flags Type St "
2327  "Inode Path\n");
2328  else {
2329  struct sock *s = v;
2330  struct unix_sock *u = unix_sk(s);
2331  unix_state_lock(s);
2332 
2333  seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2334  s,
2335  atomic_read(&s->sk_refcnt),
2336  0,
2337  s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2338  s->sk_type,
2339  s->sk_socket ?
2340  (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2341  (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2342  sock_i_ino(s));
2343 
2344  if (u->addr) {
2345  int i, len;
2346  seq_putc(seq, ' ');
2347 
2348  i = 0;
2349  len = u->addr->len - sizeof(short);
2350  if (!UNIX_ABSTRACT(s))
2351  len--;
2352  else {
2353  seq_putc(seq, '@');
2354  i++;
2355  }
2356  for ( ; i < len; i++)
2357  seq_putc(seq, u->addr->name->sun_path[i]);
2358  }
2359  unix_state_unlock(s);
2360  seq_putc(seq, '\n');
2361  }
2362 
2363  return 0;
2364 }
2365 
2366 static const struct seq_operations unix_seq_ops = {
2367  .start = unix_seq_start,
2368  .next = unix_seq_next,
2369  .stop = unix_seq_stop,
2370  .show = unix_seq_show,
2371 };
2372 
2373 static int unix_seq_open(struct inode *inode, struct file *file)
2374 {
2375  return seq_open_net(inode, file, &unix_seq_ops,
2376  sizeof(struct seq_net_private));
2377 }
2378 
2379 static const struct file_operations unix_seq_fops = {
2380  .owner = THIS_MODULE,
2381  .open = unix_seq_open,
2382  .read = seq_read,
2383  .llseek = seq_lseek,
2384  .release = seq_release_net,
2385 };
2386 
2387 #endif
2388 
2389 static const struct net_proto_family unix_family_ops = {
2390  .family = PF_UNIX,
2391  .create = unix_create,
2392  .owner = THIS_MODULE,
2393 };
2394 
2395 
2396 static int __net_init unix_net_init(struct net *net)
2397 {
2398  int error = -ENOMEM;
2399 
2400  net->unx.sysctl_max_dgram_qlen = 10;
2401  if (unix_sysctl_register(net))
2402  goto out;
2403 
2404 #ifdef CONFIG_PROC_FS
2405  if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2407  goto out;
2408  }
2409 #endif
2410  error = 0;
2411 out:
2412  return error;
2413 }
2414 
2415 static void __net_exit unix_net_exit(struct net *net)
2416 {
2418  proc_net_remove(net, "unix");
2419 }
2420 
2421 static struct pernet_operations unix_net_ops = {
2422  .init = unix_net_init,
2423  .exit = unix_net_exit,
2424 };
2425 
2426 static int __init af_unix_init(void)
2427 {
2428  int rc = -1;
2429  struct sk_buff *dummy_skb;
2430 
2431  BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2432 
2433  rc = proto_register(&unix_proto, 1);
2434  if (rc != 0) {
2435  printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2436  __func__);
2437  goto out;
2438  }
2439 
2440  sock_register(&unix_family_ops);
2441  register_pernet_subsys(&unix_net_ops);
2442 out:
2443  return rc;
2444 }
2445 
2446 static void __exit af_unix_exit(void)
2447 {
2449  proto_unregister(&unix_proto);
2450  unregister_pernet_subsys(&unix_net_ops);
2451 }
2452 
2453 /* Earlier than device_initcall() so that other drivers invoking
2454  request_module() don't end up in a loop when modprobe tries
2455  to use a UNIX socket. But later than subsys_initcall() because
2456  we depend on stuff initialised there */
2457 fs_initcall(af_unix_init);
2458 module_exit(af_unix_exit);
2459 
2460 MODULE_LICENSE("GPL");