Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
datagram.c
Go to the documentation of this file.
1 /*
2  * SUCS NET3:
3  *
4  * Generic datagram handling routines. These are generic for all
5  * protocols. Possibly a generic IP version on top of these would
6  * make sense. Not tonight however 8-).
7  * This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
8  * NetROM layer all have identical poll code and mostly
9  * identical recvmsg() code. So we share it here. The poll was
10  * shared before but buried in udp.c so I moved it.
11  *
12  * Authors: Alan Cox <[email protected]>. (datagram_poll() from old
13  * udp.c code)
14  *
15  * Fixes:
16  * Alan Cox : NULL return from skb_peek_copy()
17  * understood
18  * Alan Cox : Rewrote skb_read_datagram to avoid the
19  * skb_peek_copy stuff.
20  * Alan Cox : Added support for SOCK_SEQPACKET.
21  * IPX can no longer use the SO_TYPE hack
22  * but AX.25 now works right, and SPX is
23  * feasible.
24  * Alan Cox : Fixed write poll of non IP protocol
25  * crash.
26  * Florian La Roche: Changed for my new skbuff handling.
27  * Darryl Miles : Fixed non-blocking SOCK_SEQPACKET.
28  * Linus Torvalds : BSD semantic fixes.
29  * Alan Cox : Datagram iovec handling
30  * Darryl Miles : Fixed non-blocking SOCK_STREAM.
31  * Alan Cox : POSIXisms
32  * Pete Wyckoff : Unconnected accept() fix.
33  *
34  */
35 
36 #include <linux/module.h>
37 #include <linux/types.h>
38 #include <linux/kernel.h>
39 #include <asm/uaccess.h>
40 #include <linux/mm.h>
41 #include <linux/interrupt.h>
42 #include <linux/errno.h>
43 #include <linux/sched.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/rtnetlink.h>
47 #include <linux/poll.h>
48 #include <linux/highmem.h>
49 #include <linux/spinlock.h>
50 #include <linux/slab.h>
51 
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 
55 #include <net/checksum.h>
56 #include <net/sock.h>
57 #include <net/tcp_states.h>
58 #include <trace/events/skb.h>
59 
60 /*
61  * Is a socket 'connection oriented' ?
62  */
63 static inline int connection_based(struct sock *sk)
64 {
65  return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
66 }
67 
68 static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
69  void *key)
70 {
71  unsigned long bits = (unsigned long)key;
72 
73  /*
74  * Avoid a wakeup if event not interesting for us
75  */
76  if (bits && !(bits & (POLLIN | POLLERR)))
77  return 0;
78  return autoremove_wake_function(wait, mode, sync, key);
79 }
80 /*
81  * Wait for a packet..
82  */
83 static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
84 {
85  int error;
86  DEFINE_WAIT_FUNC(wait, receiver_wake_function);
87 
88  prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
89 
90  /* Socket errors? */
91  error = sock_error(sk);
92  if (error)
93  goto out_err;
94 
95  if (!skb_queue_empty(&sk->sk_receive_queue))
96  goto out;
97 
98  /* Socket shut down? */
99  if (sk->sk_shutdown & RCV_SHUTDOWN)
100  goto out_noerr;
101 
102  /* Sequenced packets can come disconnected.
103  * If so we report the problem
104  */
105  error = -ENOTCONN;
106  if (connection_based(sk) &&
107  !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
108  goto out_err;
109 
110  /* handle signals */
111  if (signal_pending(current))
112  goto interrupted;
113 
114  error = 0;
115  *timeo_p = schedule_timeout(*timeo_p);
116 out:
117  finish_wait(sk_sleep(sk), &wait);
118  return error;
119 interrupted:
120  error = sock_intr_errno(*timeo_p);
121 out_err:
122  *err = error;
123  goto out;
124 out_noerr:
125  *err = 0;
126  error = 1;
127  goto out;
128 }
129 
161 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
162  int *peeked, int *off, int *err)
163 {
164  struct sk_buff *skb;
165  long timeo;
166  /*
167  * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
168  */
169  int error = sock_error(sk);
170 
171  if (error)
172  goto no_packet;
173 
174  timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
175 
176  do {
177  /* Again only user level code calls this function, so nothing
178  * interrupt level will suddenly eat the receive_queue.
179  *
180  * Look at current nfs client by the way...
181  * However, this function was correct in any case. 8)
182  */
183  unsigned long cpu_flags;
184  struct sk_buff_head *queue = &sk->sk_receive_queue;
185 
186  spin_lock_irqsave(&queue->lock, cpu_flags);
187  skb_queue_walk(queue, skb) {
188  *peeked = skb->peeked;
189  if (flags & MSG_PEEK) {
190  if (*off >= skb->len) {
191  *off -= skb->len;
192  continue;
193  }
194  skb->peeked = 1;
195  atomic_inc(&skb->users);
196  } else
197  __skb_unlink(skb, queue);
198 
199  spin_unlock_irqrestore(&queue->lock, cpu_flags);
200  return skb;
201  }
202  spin_unlock_irqrestore(&queue->lock, cpu_flags);
203 
204  /* User doesn't want to wait */
205  error = -EAGAIN;
206  if (!timeo)
207  goto no_packet;
208 
209  } while (!wait_for_packet(sk, err, &timeo));
210 
211  return NULL;
212 
213 no_packet:
214  *err = error;
215  return NULL;
216 }
218 
219 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
220  int noblock, int *err)
221 {
222  int peeked, off = 0;
223 
224  return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
225  &peeked, &off, err);
226 }
228 
229 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
230 {
231  consume_skb(skb);
232  sk_mem_reclaim_partial(sk);
233 }
235 
236 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
237 {
238  bool slow;
239 
240  if (likely(atomic_read(&skb->users) == 1))
241  smp_rmb();
242  else if (likely(!atomic_dec_and_test(&skb->users)))
243  return;
244 
245  slow = lock_sock_fast(sk);
246  skb_orphan(skb);
247  sk_mem_reclaim_partial(sk);
248  unlock_sock_fast(sk, slow);
249 
250  /* skb is now orphaned, can be freed outside of locked section */
251  __kfree_skb(skb);
252 }
254 
276 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
277 {
278  int err = 0;
279 
280  if (flags & MSG_PEEK) {
281  err = -ENOENT;
282  spin_lock_bh(&sk->sk_receive_queue.lock);
283  if (skb == skb_peek(&sk->sk_receive_queue)) {
284  __skb_unlink(skb, &sk->sk_receive_queue);
285  atomic_dec(&skb->users);
286  err = 0;
287  }
288  spin_unlock_bh(&sk->sk_receive_queue.lock);
289  }
290 
291  kfree_skb(skb);
292  atomic_inc(&sk->sk_drops);
293  sk_mem_reclaim_partial(sk);
294 
295  return err;
296 }
298 
308 int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
309  struct iovec *to, int len)
310 {
311  int start = skb_headlen(skb);
312  int i, copy = start - offset;
313  struct sk_buff *frag_iter;
314 
315  trace_skb_copy_datagram_iovec(skb, len);
316 
317  /* Copy header. */
318  if (copy > 0) {
319  if (copy > len)
320  copy = len;
321  if (memcpy_toiovec(to, skb->data + offset, copy))
322  goto fault;
323  if ((len -= copy) == 0)
324  return 0;
325  offset += copy;
326  }
327 
328  /* Copy paged appendix. Hmm... why does this look so complicated? */
329  for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
330  int end;
331  const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
332 
333  WARN_ON(start > offset + len);
334 
335  end = start + skb_frag_size(frag);
336  if ((copy = end - offset) > 0) {
337  int err;
338  u8 *vaddr;
339  struct page *page = skb_frag_page(frag);
340 
341  if (copy > len)
342  copy = len;
343  vaddr = kmap(page);
344  err = memcpy_toiovec(to, vaddr + frag->page_offset +
345  offset - start, copy);
346  kunmap(page);
347  if (err)
348  goto fault;
349  if (!(len -= copy))
350  return 0;
351  offset += copy;
352  }
353  start = end;
354  }
355 
356  skb_walk_frags(skb, frag_iter) {
357  int end;
358 
359  WARN_ON(start > offset + len);
360 
361  end = start + frag_iter->len;
362  if ((copy = end - offset) > 0) {
363  if (copy > len)
364  copy = len;
365  if (skb_copy_datagram_iovec(frag_iter,
366  offset - start,
367  to, copy))
368  goto fault;
369  if ((len -= copy) == 0)
370  return 0;
371  offset += copy;
372  }
373  start = end;
374  }
375  if (!len)
376  return 0;
377 
378 fault:
379  return -EFAULT;
380 }
382 
395  const struct iovec *to, int to_offset,
396  int len)
397 {
398  int start = skb_headlen(skb);
399  int i, copy = start - offset;
400  struct sk_buff *frag_iter;
401 
402  /* Copy header. */
403  if (copy > 0) {
404  if (copy > len)
405  copy = len;
406  if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
407  goto fault;
408  if ((len -= copy) == 0)
409  return 0;
410  offset += copy;
411  to_offset += copy;
412  }
413 
414  /* Copy paged appendix. Hmm... why does this look so complicated? */
415  for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
416  int end;
417  const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
418 
419  WARN_ON(start > offset + len);
420 
421  end = start + skb_frag_size(frag);
422  if ((copy = end - offset) > 0) {
423  int err;
424  u8 *vaddr;
425  struct page *page = skb_frag_page(frag);
426 
427  if (copy > len)
428  copy = len;
429  vaddr = kmap(page);
430  err = memcpy_toiovecend(to, vaddr + frag->page_offset +
431  offset - start, to_offset, copy);
432  kunmap(page);
433  if (err)
434  goto fault;
435  if (!(len -= copy))
436  return 0;
437  offset += copy;
438  to_offset += copy;
439  }
440  start = end;
441  }
442 
443  skb_walk_frags(skb, frag_iter) {
444  int end;
445 
446  WARN_ON(start > offset + len);
447 
448  end = start + frag_iter->len;
449  if ((copy = end - offset) > 0) {
450  if (copy > len)
451  copy = len;
452  if (skb_copy_datagram_const_iovec(frag_iter,
453  offset - start,
454  to, to_offset,
455  copy))
456  goto fault;
457  if ((len -= copy) == 0)
458  return 0;
459  offset += copy;
460  to_offset += copy;
461  }
462  start = end;
463  }
464  if (!len)
465  return 0;
466 
467 fault:
468  return -EFAULT;
469 }
471 
484  const struct iovec *from, int from_offset,
485  int len)
486 {
487  int start = skb_headlen(skb);
488  int i, copy = start - offset;
489  struct sk_buff *frag_iter;
490 
491  /* Copy header. */
492  if (copy > 0) {
493  if (copy > len)
494  copy = len;
495  if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
496  copy))
497  goto fault;
498  if ((len -= copy) == 0)
499  return 0;
500  offset += copy;
501  from_offset += copy;
502  }
503 
504  /* Copy paged appendix. Hmm... why does this look so complicated? */
505  for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
506  int end;
507  const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
508 
509  WARN_ON(start > offset + len);
510 
511  end = start + skb_frag_size(frag);
512  if ((copy = end - offset) > 0) {
513  int err;
514  u8 *vaddr;
515  struct page *page = skb_frag_page(frag);
516 
517  if (copy > len)
518  copy = len;
519  vaddr = kmap(page);
520  err = memcpy_fromiovecend(vaddr + frag->page_offset +
521  offset - start,
522  from, from_offset, copy);
523  kunmap(page);
524  if (err)
525  goto fault;
526 
527  if (!(len -= copy))
528  return 0;
529  offset += copy;
530  from_offset += copy;
531  }
532  start = end;
533  }
534 
535  skb_walk_frags(skb, frag_iter) {
536  int end;
537 
538  WARN_ON(start > offset + len);
539 
540  end = start + frag_iter->len;
541  if ((copy = end - offset) > 0) {
542  if (copy > len)
543  copy = len;
544  if (skb_copy_datagram_from_iovec(frag_iter,
545  offset - start,
546  from,
547  from_offset,
548  copy))
549  goto fault;
550  if ((len -= copy) == 0)
551  return 0;
552  offset += copy;
553  from_offset += copy;
554  }
555  start = end;
556  }
557  if (!len)
558  return 0;
559 
560 fault:
561  return -EFAULT;
562 }
564 
565 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
566  u8 __user *to, int len,
567  __wsum *csump)
568 {
569  int start = skb_headlen(skb);
570  int i, copy = start - offset;
571  struct sk_buff *frag_iter;
572  int pos = 0;
573 
574  /* Copy header. */
575  if (copy > 0) {
576  int err = 0;
577  if (copy > len)
578  copy = len;
579  *csump = csum_and_copy_to_user(skb->data + offset, to, copy,
580  *csump, &err);
581  if (err)
582  goto fault;
583  if ((len -= copy) == 0)
584  return 0;
585  offset += copy;
586  to += copy;
587  pos = copy;
588  }
589 
590  for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
591  int end;
592  const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
593 
594  WARN_ON(start > offset + len);
595 
596  end = start + skb_frag_size(frag);
597  if ((copy = end - offset) > 0) {
598  __wsum csum2;
599  int err = 0;
600  u8 *vaddr;
601  struct page *page = skb_frag_page(frag);
602 
603  if (copy > len)
604  copy = len;
605  vaddr = kmap(page);
606  csum2 = csum_and_copy_to_user(vaddr +
607  frag->page_offset +
608  offset - start,
609  to, copy, 0, &err);
610  kunmap(page);
611  if (err)
612  goto fault;
613  *csump = csum_block_add(*csump, csum2, pos);
614  if (!(len -= copy))
615  return 0;
616  offset += copy;
617  to += copy;
618  pos += copy;
619  }
620  start = end;
621  }
622 
623  skb_walk_frags(skb, frag_iter) {
624  int end;
625 
626  WARN_ON(start > offset + len);
627 
628  end = start + frag_iter->len;
629  if ((copy = end - offset) > 0) {
630  __wsum csum2 = 0;
631  if (copy > len)
632  copy = len;
633  if (skb_copy_and_csum_datagram(frag_iter,
634  offset - start,
635  to, copy,
636  &csum2))
637  goto fault;
638  *csump = csum_block_add(*csump, csum2, pos);
639  if ((len -= copy) == 0)
640  return 0;
641  offset += copy;
642  to += copy;
643  pos += copy;
644  }
645  start = end;
646  }
647  if (!len)
648  return 0;
649 
650 fault:
651  return -EFAULT;
652 }
653 
655 {
656  __sum16 sum;
657 
658  sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
659  if (likely(!sum)) {
660  if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
661  netdev_rx_csum_fault(skb->dev);
663  }
664  return sum;
665 }
667 
669 {
670  return __skb_checksum_complete_head(skb, skb->len);
671 }
673 
688  int hlen, struct iovec *iov)
689 {
690  __wsum csum;
691  int chunk = skb->len - hlen;
692 
693  if (!chunk)
694  return 0;
695 
696  /* Skip filled elements.
697  * Pretty silly, look at memcpy_toiovec, though 8)
698  */
699  while (!iov->iov_len)
700  iov++;
701 
702  if (iov->iov_len < chunk) {
703  if (__skb_checksum_complete(skb))
704  goto csum_error;
705  if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
706  goto fault;
707  } else {
708  csum = csum_partial(skb->data, hlen, skb->csum);
709  if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
710  chunk, &csum))
711  goto fault;
712  if (csum_fold(csum))
713  goto csum_error;
714  if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
715  netdev_rx_csum_fault(skb->dev);
716  iov->iov_len -= chunk;
717  iov->iov_base += chunk;
718  }
719  return 0;
720 csum_error:
721  return -EINVAL;
722 fault:
723  return -EFAULT;
724 }
726 
741 unsigned int datagram_poll(struct file *file, struct socket *sock,
742  poll_table *wait)
743 {
744  struct sock *sk = sock->sk;
745  unsigned int mask;
746 
747  sock_poll_wait(file, sk_sleep(sk), wait);
748  mask = 0;
749 
750  /* exceptional events? */
751  if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
752  mask |= POLLERR;
753  if (sk->sk_shutdown & RCV_SHUTDOWN)
754  mask |= POLLRDHUP | POLLIN | POLLRDNORM;
755  if (sk->sk_shutdown == SHUTDOWN_MASK)
756  mask |= POLLHUP;
757 
758  /* readable? */
759  if (!skb_queue_empty(&sk->sk_receive_queue))
760  mask |= POLLIN | POLLRDNORM;
761 
762  /* Connection-based need to check for termination and startup */
763  if (connection_based(sk)) {
764  if (sk->sk_state == TCP_CLOSE)
765  mask |= POLLHUP;
766  /* connection hasn't started yet? */
767  if (sk->sk_state == TCP_SYN_SENT)
768  return mask;
769  }
770 
771  /* writable? */
772  if (sock_writeable(sk))
773  mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
774  else
775  set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
776 
777  return mask;
778 }