Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
proto.c
Go to the documentation of this file.
1 /*
2  * net/dccp/proto.c
3  *
4  * An implementation of the DCCP protocol
5  * Arnaldo Carvalho de Melo <[email protected]>
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25 
26 #include <net/inet_sock.h>
27 #include <net/sock.h>
28 #include <net/xfrm.h>
29 
30 #include <asm/ioctls.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35 
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39 
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41 
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43 
44 struct percpu_counter dccp_orphan_count;
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46 
47 struct inet_hashinfo dccp_hashinfo;
48 EXPORT_SYMBOL_GPL(dccp_hashinfo);
49 
50 /* the maximum queue length for tx in packets. 0 is no limit */
51 int sysctl_dccp_tx_qlen __read_mostly = 5;
52 
53 #ifdef CONFIG_IP_DCCP_DEBUG
54 static const char *dccp_state_name(const int state)
55 {
56  static const char *const dccp_state_names[] = {
57  [DCCP_OPEN] = "OPEN",
58  [DCCP_REQUESTING] = "REQUESTING",
59  [DCCP_PARTOPEN] = "PARTOPEN",
60  [DCCP_LISTEN] = "LISTEN",
61  [DCCP_RESPOND] = "RESPOND",
62  [DCCP_CLOSING] = "CLOSING",
63  [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
64  [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
65  [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
66  [DCCP_TIME_WAIT] = "TIME_WAIT",
67  [DCCP_CLOSED] = "CLOSED",
68  };
69 
70  if (state >= DCCP_MAX_STATES)
71  return "INVALID STATE!";
72  else
73  return dccp_state_names[state];
74 }
75 #endif
76 
77 void dccp_set_state(struct sock *sk, const int state)
78 {
79  const int oldstate = sk->sk_state;
80 
81  dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
82  dccp_state_name(oldstate), dccp_state_name(state));
83  WARN_ON(state == oldstate);
84 
85  switch (state) {
86  case DCCP_OPEN:
87  if (oldstate != DCCP_OPEN)
89  /* Client retransmits all Confirm options until entering OPEN */
90  if (oldstate == DCCP_PARTOPEN)
91  dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
92  break;
93 
94  case DCCP_CLOSED:
95  if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
96  oldstate == DCCP_CLOSING)
98 
99  sk->sk_prot->unhash(sk);
100  if (inet_csk(sk)->icsk_bind_hash != NULL &&
102  inet_put_port(sk);
103  /* fall through */
104  default:
105  if (oldstate == DCCP_OPEN)
107  }
108 
109  /* Change state AFTER socket is unhashed to avoid closed
110  * socket sitting in hash tables.
111  */
112  sk->sk_state = state;
113 }
114 
116 
117 static void dccp_finish_passive_close(struct sock *sk)
118 {
119  switch (sk->sk_state) {
120  case DCCP_PASSIVE_CLOSE:
121  /* Node (client or server) has received Close packet. */
124  break;
126  /*
127  * Client received CloseReq. We set the `active' flag so that
128  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
129  */
130  dccp_send_close(sk, 1);
132  }
133 }
134 
135 void dccp_done(struct sock *sk)
136 {
138  dccp_clear_xmit_timers(sk);
139 
141 
142  if (!sock_flag(sk, SOCK_DEAD))
143  sk->sk_state_change(sk);
144  else
146 }
147 
149 
150 const char *dccp_packet_name(const int type)
151 {
152  static const char *const dccp_packet_names[] = {
153  [DCCP_PKT_REQUEST] = "REQUEST",
154  [DCCP_PKT_RESPONSE] = "RESPONSE",
155  [DCCP_PKT_DATA] = "DATA",
156  [DCCP_PKT_ACK] = "ACK",
157  [DCCP_PKT_DATAACK] = "DATAACK",
158  [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
159  [DCCP_PKT_CLOSE] = "CLOSE",
160  [DCCP_PKT_RESET] = "RESET",
161  [DCCP_PKT_SYNC] = "SYNC",
162  [DCCP_PKT_SYNCACK] = "SYNCACK",
163  };
164 
165  if (type >= DCCP_NR_PKT_TYPES)
166  return "INVALID";
167  else
168  return dccp_packet_names[type];
169 }
170 
172 
173 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174 {
175  struct dccp_sock *dp = dccp_sk(sk);
176  struct inet_connection_sock *icsk = inet_csk(sk);
177 
178  icsk->icsk_rto = DCCP_TIMEOUT_INIT;
180  sk->sk_state = DCCP_CLOSED;
183  dp->dccps_mss_cache = 536;
184  dp->dccps_rate_last = jiffies;
185  dp->dccps_role = DCCP_ROLE_UNDEFINED;
188 
190 
191  INIT_LIST_HEAD(&dp->dccps_featneg);
192  /* control socket doesn't need feat nego */
193  if (likely(ctl_sock_initialized))
194  return dccp_feat_init(sk);
195  return 0;
196 }
197 
199 
200 void dccp_destroy_sock(struct sock *sk)
201 {
202  struct dccp_sock *dp = dccp_sk(sk);
203 
204  /*
205  * DCCP doesn't use sk_write_queue, just sk_send_head
206  * for retransmissions
207  */
208  if (sk->sk_send_head != NULL) {
209  kfree_skb(sk->sk_send_head);
210  sk->sk_send_head = NULL;
211  }
212 
213  /* Clean up a referenced DCCP bind bucket. */
214  if (inet_csk(sk)->icsk_bind_hash != NULL)
215  inet_put_port(sk);
216 
218  dp->dccps_service_list = NULL;
219 
220  if (dp->dccps_hc_rx_ackvec != NULL) {
222  dp->dccps_hc_rx_ackvec = NULL;
223  }
227 
228  /* clean up feature negotiation state */
230 }
231 
233 
234 static inline int dccp_listen_start(struct sock *sk, int backlog)
235 {
236  struct dccp_sock *dp = dccp_sk(sk);
237 
238  dp->dccps_role = DCCP_ROLE_LISTEN;
239  /* do not start to listen if feature negotiation setup fails */
241  return -EPROTO;
242  return inet_csk_listen_start(sk, backlog);
243 }
244 
245 static inline int dccp_need_reset(int state)
246 {
247  return state != DCCP_CLOSED && state != DCCP_LISTEN &&
248  state != DCCP_REQUESTING;
249 }
250 
251 int dccp_disconnect(struct sock *sk, int flags)
252 {
253  struct inet_connection_sock *icsk = inet_csk(sk);
254  struct inet_sock *inet = inet_sk(sk);
255  int err = 0;
256  const int old_state = sk->sk_state;
257 
258  if (old_state != DCCP_CLOSED)
260 
261  /*
262  * This corresponds to the ABORT function of RFC793, sec. 3.8
263  * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
264  */
265  if (old_state == DCCP_LISTEN) {
267  } else if (dccp_need_reset(old_state)) {
269  sk->sk_err = ECONNRESET;
270  } else if (old_state == DCCP_REQUESTING)
271  sk->sk_err = ECONNRESET;
272 
273  dccp_clear_xmit_timers(sk);
274 
275  __skb_queue_purge(&sk->sk_receive_queue);
276  __skb_queue_purge(&sk->sk_write_queue);
277  if (sk->sk_send_head != NULL) {
279  sk->sk_send_head = NULL;
280  }
281 
282  inet->inet_dport = 0;
283 
284  if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
285  inet_reset_saddr(sk);
286 
287  sk->sk_shutdown = 0;
288  sock_reset_flag(sk, SOCK_DONE);
289 
290  icsk->icsk_backoff = 0;
291  inet_csk_delack_init(sk);
292  __sk_dst_reset(sk);
293 
294  WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
295 
296  sk->sk_error_report(sk);
297  return err;
298 }
299 
301 
302 /*
303  * Wait for a DCCP event.
304  *
305  * Note that we don't need to lock the socket, as the upper poll layers
306  * take care of normal races (between the test and the event) and we don't
307  * go look at any of the socket buffers directly.
308  */
309 unsigned int dccp_poll(struct file *file, struct socket *sock,
310  poll_table *wait)
311 {
312  unsigned int mask;
313  struct sock *sk = sock->sk;
314 
315  sock_poll_wait(file, sk_sleep(sk), wait);
316  if (sk->sk_state == DCCP_LISTEN)
317  return inet_csk_listen_poll(sk);
318 
319  /* Socket is not locked. We are protected from async events
320  by poll logic and correct handling of state changes
321  made by another threads is impossible in any case.
322  */
323 
324  mask = 0;
325  if (sk->sk_err)
326  mask = POLLERR;
327 
328  if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
329  mask |= POLLHUP;
330  if (sk->sk_shutdown & RCV_SHUTDOWN)
331  mask |= POLLIN | POLLRDNORM | POLLRDHUP;
332 
333  /* Connected? */
334  if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
335  if (atomic_read(&sk->sk_rmem_alloc) > 0)
336  mask |= POLLIN | POLLRDNORM;
337 
338  if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
339  if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
340  mask |= POLLOUT | POLLWRNORM;
341  } else { /* send SIGIO later */
343  &sk->sk_socket->flags);
344  set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
345 
346  /* Race breaker. If space is freed after
347  * wspace test but before the flags are set,
348  * IO signal will be lost.
349  */
350  if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
351  mask |= POLLOUT | POLLWRNORM;
352  }
353  }
354  }
355  return mask;
356 }
357 
359 
360 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
361 {
362  int rc = -ENOTCONN;
363 
364  lock_sock(sk);
365 
366  if (sk->sk_state == DCCP_LISTEN)
367  goto out;
368 
369  switch (cmd) {
370  case SIOCINQ: {
371  struct sk_buff *skb;
372  unsigned long amount = 0;
373 
374  skb = skb_peek(&sk->sk_receive_queue);
375  if (skb != NULL) {
376  /*
377  * We will only return the amount of this packet since
378  * that is all that will be read.
379  */
380  amount = skb->len;
381  }
382  rc = put_user(amount, (int __user *)arg);
383  }
384  break;
385  default:
386  rc = -ENOIOCTLCMD;
387  break;
388  }
389 out:
390  release_sock(sk);
391  return rc;
392 }
393 
395 
396 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
397  char __user *optval, unsigned int optlen)
398 {
399  struct dccp_sock *dp = dccp_sk(sk);
400  struct dccp_service_list *sl = NULL;
401 
402  if (service == DCCP_SERVICE_INVALID_VALUE ||
403  optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
404  return -EINVAL;
405 
406  if (optlen > sizeof(service)) {
407  sl = kmalloc(optlen, GFP_KERNEL);
408  if (sl == NULL)
409  return -ENOMEM;
410 
411  sl->dccpsl_nr = optlen / sizeof(u32) - 1;
412  if (copy_from_user(sl->dccpsl_list,
413  optval + sizeof(service),
414  optlen - sizeof(service)) ||
415  dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
416  kfree(sl);
417  return -EFAULT;
418  }
419  }
420 
421  lock_sock(sk);
422  dp->dccps_service = service;
423 
425 
426  dp->dccps_service_list = sl;
427  release_sock(sk);
428  return 0;
429 }
430 
431 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
432 {
433  u8 *list, len;
434  int i, rc;
435 
436  if (cscov < 0 || cscov > 15)
437  return -EINVAL;
438  /*
439  * Populate a list of permissible values, in the range cscov...15. This
440  * is necessary since feature negotiation of single values only works if
441  * both sides incidentally choose the same value. Since the list starts
442  * lowest-value first, negotiation will pick the smallest shared value.
443  */
444  if (cscov == 0)
445  return 0;
446  len = 16 - cscov;
447 
448  list = kmalloc(len, GFP_KERNEL);
449  if (list == NULL)
450  return -ENOBUFS;
451 
452  for (i = 0; i < len; i++)
453  list[i] = cscov++;
454 
455  rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
456 
457  if (rc == 0) {
458  if (rx)
459  dccp_sk(sk)->dccps_pcrlen = cscov;
460  else
461  dccp_sk(sk)->dccps_pcslen = cscov;
462  }
463  kfree(list);
464  return rc;
465 }
466 
467 static int dccp_setsockopt_ccid(struct sock *sk, int type,
468  char __user *optval, unsigned int optlen)
469 {
470  u8 *val;
471  int rc = 0;
472 
473  if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
474  return -EINVAL;
475 
476  val = memdup_user(optval, optlen);
477  if (IS_ERR(val))
478  return PTR_ERR(val);
479 
480  lock_sock(sk);
481  if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
482  rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
483 
484  if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
485  rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
486  release_sock(sk);
487 
488  kfree(val);
489  return rc;
490 }
491 
492 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
493  char __user *optval, unsigned int optlen)
494 {
495  struct dccp_sock *dp = dccp_sk(sk);
496  int val, err = 0;
497 
498  switch (optname) {
500  DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
501  return 0;
504  DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
505  return 0;
506  case DCCP_SOCKOPT_CCID:
509  return dccp_setsockopt_ccid(sk, optname, optval, optlen);
510  }
511 
512  if (optlen < (int)sizeof(int))
513  return -EINVAL;
514 
515  if (get_user(val, (int __user *)optval))
516  return -EFAULT;
517 
518  if (optname == DCCP_SOCKOPT_SERVICE)
519  return dccp_setsockopt_service(sk, val, optval, optlen);
520 
521  lock_sock(sk);
522  switch (optname) {
524  if (dp->dccps_role != DCCP_ROLE_SERVER)
525  err = -EOPNOTSUPP;
526  else
527  dp->dccps_server_timewait = (val != 0);
528  break;
530  err = dccp_setsockopt_cscov(sk, val, false);
531  break;
533  err = dccp_setsockopt_cscov(sk, val, true);
534  break;
536  if (sk->sk_state != DCCP_CLOSED)
537  err = -EISCONN;
538  else if (val < 0 || val >= DCCPQ_POLICY_MAX)
539  err = -EINVAL;
540  else
541  dp->dccps_qpolicy = val;
542  break;
544  if (val < 0)
545  err = -EINVAL;
546  else
547  dp->dccps_tx_qlen = val;
548  break;
549  default:
550  err = -ENOPROTOOPT;
551  break;
552  }
553  release_sock(sk);
554 
555  return err;
556 }
557 
558 int dccp_setsockopt(struct sock *sk, int level, int optname,
559  char __user *optval, unsigned int optlen)
560 {
561  if (level != SOL_DCCP)
562  return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
563  optname, optval,
564  optlen);
565  return do_dccp_setsockopt(sk, level, optname, optval, optlen);
566 }
567 
569 
570 #ifdef CONFIG_COMPAT
571 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
572  char __user *optval, unsigned int optlen)
573 {
574  if (level != SOL_DCCP)
575  return inet_csk_compat_setsockopt(sk, level, optname,
576  optval, optlen);
577  return do_dccp_setsockopt(sk, level, optname, optval, optlen);
578 }
579 
580 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
581 #endif
582 
583 static int dccp_getsockopt_service(struct sock *sk, int len,
584  __be32 __user *optval,
585  int __user *optlen)
586 {
587  const struct dccp_sock *dp = dccp_sk(sk);
588  const struct dccp_service_list *sl;
589  int err = -ENOENT, slen = 0, total_len = sizeof(u32);
590 
591  lock_sock(sk);
592  if ((sl = dp->dccps_service_list) != NULL) {
593  slen = sl->dccpsl_nr * sizeof(u32);
594  total_len += slen;
595  }
596 
597  err = -EINVAL;
598  if (total_len > len)
599  goto out;
600 
601  err = 0;
602  if (put_user(total_len, optlen) ||
603  put_user(dp->dccps_service, optval) ||
604  (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
605  err = -EFAULT;
606 out:
607  release_sock(sk);
608  return err;
609 }
610 
611 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
612  char __user *optval, int __user *optlen)
613 {
614  struct dccp_sock *dp;
615  int val, len;
616 
617  if (get_user(len, optlen))
618  return -EFAULT;
619 
620  if (len < (int)sizeof(int))
621  return -EINVAL;
622 
623  dp = dccp_sk(sk);
624 
625  switch (optname) {
627  DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
628  return 0;
630  return dccp_getsockopt_service(sk, len,
631  (__be32 __user *)optval, optlen);
633  val = dp->dccps_mss_cache;
634  break;
636  return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
638  val = ccid_get_current_tx_ccid(dp);
639  if (val < 0)
640  return -ENOPROTOOPT;
641  break;
643  val = ccid_get_current_rx_ccid(dp);
644  if (val < 0)
645  return -ENOPROTOOPT;
646  break;
648  val = dp->dccps_server_timewait;
649  break;
651  val = dp->dccps_pcslen;
652  break;
654  val = dp->dccps_pcrlen;
655  break;
657  val = dp->dccps_qpolicy;
658  break;
660  val = dp->dccps_tx_qlen;
661  break;
662  case 128 ... 191:
663  return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
664  len, (u32 __user *)optval, optlen);
665  case 192 ... 255:
666  return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
667  len, (u32 __user *)optval, optlen);
668  default:
669  return -ENOPROTOOPT;
670  }
671 
672  len = sizeof(val);
673  if (put_user(len, optlen) || copy_to_user(optval, &val, len))
674  return -EFAULT;
675 
676  return 0;
677 }
678 
679 int dccp_getsockopt(struct sock *sk, int level, int optname,
680  char __user *optval, int __user *optlen)
681 {
682  if (level != SOL_DCCP)
683  return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
684  optname, optval,
685  optlen);
686  return do_dccp_getsockopt(sk, level, optname, optval, optlen);
687 }
688 
690 
691 #ifdef CONFIG_COMPAT
692 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
693  char __user *optval, int __user *optlen)
694 {
695  if (level != SOL_DCCP)
696  return inet_csk_compat_getsockopt(sk, level, optname,
697  optval, optlen);
698  return do_dccp_getsockopt(sk, level, optname, optval, optlen);
699 }
700 
701 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
702 #endif
703 
704 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
705 {
706  struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
707 
708  /*
709  * Assign an (opaque) qpolicy priority value to skb->priority.
710  *
711  * We are overloading this skb field for use with the qpolicy subystem.
712  * The skb->priority is normally used for the SO_PRIORITY option, which
713  * is initialised from sk_priority. Since the assignment of sk_priority
714  * to skb->priority happens later (on layer 3), we overload this field
715  * for use with queueing priorities as long as the skb is on layer 4.
716  * The default priority value (if nothing is set) is 0.
717  */
718  skb->priority = 0;
719 
720  for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
721 
722  if (!CMSG_OK(msg, cmsg))
723  return -EINVAL;
724 
725  if (cmsg->cmsg_level != SOL_DCCP)
726  continue;
727 
728  if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
729  !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
730  return -EINVAL;
731 
732  switch (cmsg->cmsg_type) {
733  case DCCP_SCM_PRIORITY:
734  if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
735  return -EINVAL;
736  skb->priority = *(__u32 *)CMSG_DATA(cmsg);
737  break;
738  default:
739  return -EINVAL;
740  }
741  }
742  return 0;
743 }
744 
745 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
746  size_t len)
747 {
748  const struct dccp_sock *dp = dccp_sk(sk);
749  const int flags = msg->msg_flags;
750  const int noblock = flags & MSG_DONTWAIT;
751  struct sk_buff *skb;
752  int rc, size;
753  long timeo;
754 
755  if (len > dp->dccps_mss_cache)
756  return -EMSGSIZE;
757 
758  lock_sock(sk);
759 
760  if (dccp_qpolicy_full(sk)) {
761  rc = -EAGAIN;
762  goto out_release;
763  }
764 
765  timeo = sock_sndtimeo(sk, noblock);
766 
767  /*
768  * We have to use sk_stream_wait_connect here to set sk_write_pending,
769  * so that the trick in dccp_rcv_request_sent_state_process.
770  */
771  /* Wait for a connection to finish. */
772  if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
773  if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
774  goto out_release;
775 
776  size = sk->sk_prot->max_header + len;
777  release_sock(sk);
778  skb = sock_alloc_send_skb(sk, size, noblock, &rc);
779  lock_sock(sk);
780  if (skb == NULL)
781  goto out_release;
782 
783  skb_reserve(skb, sk->sk_prot->max_header);
784  rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
785  if (rc != 0)
786  goto out_discard;
787 
788  rc = dccp_msghdr_parse(msg, skb);
789  if (rc != 0)
790  goto out_discard;
791 
792  dccp_qpolicy_push(sk, skb);
793  /*
794  * The xmit_timer is set if the TX CCID is rate-based and will expire
795  * when congestion control permits to release further packets into the
796  * network. Window-based CCIDs do not use this timer.
797  */
798  if (!timer_pending(&dp->dccps_xmit_timer))
799  dccp_write_xmit(sk);
800 out_release:
801  release_sock(sk);
802  return rc ? : len;
803 out_discard:
804  kfree_skb(skb);
805  goto out_release;
806 }
807 
809 
810 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
811  size_t len, int nonblock, int flags, int *addr_len)
812 {
813  const struct dccp_hdr *dh;
814  long timeo;
815 
816  lock_sock(sk);
817 
818  if (sk->sk_state == DCCP_LISTEN) {
819  len = -ENOTCONN;
820  goto out;
821  }
822 
823  timeo = sock_rcvtimeo(sk, nonblock);
824 
825  do {
826  struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
827 
828  if (skb == NULL)
829  goto verify_sock_status;
830 
831  dh = dccp_hdr(skb);
832 
833  switch (dh->dccph_type) {
834  case DCCP_PKT_DATA:
835  case DCCP_PKT_DATAACK:
836  goto found_ok_skb;
837 
838  case DCCP_PKT_CLOSE:
839  case DCCP_PKT_CLOSEREQ:
840  if (!(flags & MSG_PEEK))
841  dccp_finish_passive_close(sk);
842  /* fall through */
843  case DCCP_PKT_RESET:
844  dccp_pr_debug("found fin (%s) ok!\n",
845  dccp_packet_name(dh->dccph_type));
846  len = 0;
847  goto found_fin_ok;
848  default:
849  dccp_pr_debug("packet_type=%s\n",
850  dccp_packet_name(dh->dccph_type));
851  sk_eat_skb(sk, skb, false);
852  }
853 verify_sock_status:
854  if (sock_flag(sk, SOCK_DONE)) {
855  len = 0;
856  break;
857  }
858 
859  if (sk->sk_err) {
860  len = sock_error(sk);
861  break;
862  }
863 
864  if (sk->sk_shutdown & RCV_SHUTDOWN) {
865  len = 0;
866  break;
867  }
868 
869  if (sk->sk_state == DCCP_CLOSED) {
870  if (!sock_flag(sk, SOCK_DONE)) {
871  /* This occurs when user tries to read
872  * from never connected socket.
873  */
874  len = -ENOTCONN;
875  break;
876  }
877  len = 0;
878  break;
879  }
880 
881  if (!timeo) {
882  len = -EAGAIN;
883  break;
884  }
885 
886  if (signal_pending(current)) {
887  len = sock_intr_errno(timeo);
888  break;
889  }
890 
891  sk_wait_data(sk, &timeo);
892  continue;
893  found_ok_skb:
894  if (len > skb->len)
895  len = skb->len;
896  else if (len < skb->len)
897  msg->msg_flags |= MSG_TRUNC;
898 
899  if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
900  /* Exception. Bailout! */
901  len = -EFAULT;
902  break;
903  }
904  if (flags & MSG_TRUNC)
905  len = skb->len;
906  found_fin_ok:
907  if (!(flags & MSG_PEEK))
908  sk_eat_skb(sk, skb, false);
909  break;
910  } while (1);
911 out:
912  release_sock(sk);
913  return len;
914 }
915 
917 
918 int inet_dccp_listen(struct socket *sock, int backlog)
919 {
920  struct sock *sk = sock->sk;
921  unsigned char old_state;
922  int err;
923 
924  lock_sock(sk);
925 
926  err = -EINVAL;
927  if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
928  goto out;
929 
930  old_state = sk->sk_state;
931  if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
932  goto out;
933 
934  /* Really, if the socket is already in listen state
935  * we can only allow the backlog to be adjusted.
936  */
937  if (old_state != DCCP_LISTEN) {
938  /*
939  * FIXME: here it probably should be sk->sk_prot->listen_start
940  * see tcp_listen_start
941  */
942  err = dccp_listen_start(sk, backlog);
943  if (err)
944  goto out;
945  }
947  err = 0;
948 
949 out:
950  release_sock(sk);
951  return err;
952 }
953 
955 
956 static void dccp_terminate_connection(struct sock *sk)
957 {
959 
960  switch (sk->sk_state) {
961  case DCCP_PASSIVE_CLOSE:
963  dccp_finish_passive_close(sk);
964  break;
965  case DCCP_PARTOPEN:
966  dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
967  inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
968  /* fall through */
969  case DCCP_OPEN:
970  dccp_send_close(sk, 1);
971 
972  if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
973  !dccp_sk(sk)->dccps_server_timewait)
974  next_state = DCCP_ACTIVE_CLOSEREQ;
975  else
976  next_state = DCCP_CLOSING;
977  /* fall through */
978  default:
979  dccp_set_state(sk, next_state);
980  }
981 }
982 
983 void dccp_close(struct sock *sk, long timeout)
984 {
985  struct dccp_sock *dp = dccp_sk(sk);
986  struct sk_buff *skb;
987  u32 data_was_unread = 0;
988  int state;
989 
990  lock_sock(sk);
991 
993 
994  if (sk->sk_state == DCCP_LISTEN) {
996 
997  /* Special case. */
999 
1000  goto adjudge_to_death;
1001  }
1002 
1003  sk_stop_timer(sk, &dp->dccps_xmit_timer);
1004 
1005  /*
1006  * We need to flush the recv. buffs. We do this only on the
1007  * descriptor close, not protocol-sourced closes, because the
1008  *reader process may not have drained the data yet!
1009  */
1010  while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1011  data_was_unread += skb->len;
1012  __kfree_skb(skb);
1013  }
1014 
1015  if (data_was_unread) {
1016  /* Unread data was tossed, send an appropriate Reset Code */
1017  DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1020  } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1021  /* Check zero linger _after_ checking for unread data. */
1022  sk->sk_prot->disconnect(sk, 0);
1023  } else if (sk->sk_state != DCCP_CLOSED) {
1024  /*
1025  * Normal connection termination. May need to wait if there are
1026  * still packets in the TX queue that are delayed by the CCID.
1027  */
1028  dccp_flush_write_queue(sk, &timeout);
1029  dccp_terminate_connection(sk);
1030  }
1031 
1032  /*
1033  * Flush write queue. This may be necessary in several cases:
1034  * - we have been closed by the peer but still have application data;
1035  * - abortive termination (unread data or zero linger time),
1036  * - normal termination but queue could not be flushed within time limit
1037  */
1038  __skb_queue_purge(&sk->sk_write_queue);
1039 
1040  sk_stream_wait_close(sk, timeout);
1041 
1042 adjudge_to_death:
1043  state = sk->sk_state;
1044  sock_hold(sk);
1045  sock_orphan(sk);
1046 
1047  /*
1048  * It is the last release_sock in its life. It will remove backlog.
1049  */
1050  release_sock(sk);
1051  /*
1052  * Now socket is owned by kernel and we acquire BH lock
1053  * to finish close. No need to check for user refs.
1054  */
1055  local_bh_disable();
1056  bh_lock_sock(sk);
1058 
1059  percpu_counter_inc(sk->sk_prot->orphan_count);
1060 
1061  /* Have we already been destroyed by a softirq or backlog? */
1062  if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1063  goto out;
1064 
1065  if (sk->sk_state == DCCP_CLOSED)
1067 
1068  /* Otherwise, socket is reprieved until protocol close. */
1069 
1070 out:
1071  bh_unlock_sock(sk);
1072  local_bh_enable();
1073  sock_put(sk);
1074 }
1075 
1077 
1078 void dccp_shutdown(struct sock *sk, int how)
1079 {
1080  dccp_pr_debug("called shutdown(%x)\n", how);
1081 }
1082 
1084 
1085 static inline int dccp_mib_init(void)
1086 {
1087  return snmp_mib_init((void __percpu **)dccp_statistics,
1088  sizeof(struct dccp_mib),
1089  __alignof__(struct dccp_mib));
1090 }
1091 
1092 static inline void dccp_mib_exit(void)
1093 {
1094  snmp_mib_free((void __percpu **)dccp_statistics);
1095 }
1096 
1097 static int thash_entries;
1098 module_param(thash_entries, int, 0444);
1099 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1100 
1101 #ifdef CONFIG_IP_DCCP_DEBUG
1102 bool dccp_debug;
1103 module_param(dccp_debug, bool, 0644);
1104 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1105 
1106 EXPORT_SYMBOL_GPL(dccp_debug);
1107 #endif
1108 
1109 static int __init dccp_init(void)
1110 {
1111  unsigned long goal;
1112  int ehash_order, bhash_order, i;
1113  int rc;
1114 
1115  BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1116  FIELD_SIZEOF(struct sk_buff, cb));
1117  rc = percpu_counter_init(&dccp_orphan_count, 0);
1118  if (rc)
1119  goto out_fail;
1120  rc = -ENOBUFS;
1121  inet_hashinfo_init(&dccp_hashinfo);
1122  dccp_hashinfo.bind_bucket_cachep =
1123  kmem_cache_create("dccp_bind_bucket",
1124  sizeof(struct inet_bind_bucket), 0,
1126  if (!dccp_hashinfo.bind_bucket_cachep)
1127  goto out_free_percpu;
1128 
1129  /*
1130  * Size and allocate the main established and bind bucket
1131  * hash tables.
1132  *
1133  * The methodology is similar to that of the buffer cache.
1134  */
1135  if (totalram_pages >= (128 * 1024))
1136  goal = totalram_pages >> (21 - PAGE_SHIFT);
1137  else
1138  goal = totalram_pages >> (23 - PAGE_SHIFT);
1139 
1140  if (thash_entries)
1141  goal = (thash_entries *
1142  sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1143  for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1144  ;
1145  do {
1146  unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1147  sizeof(struct inet_ehash_bucket);
1148 
1149  while (hash_size & (hash_size - 1))
1150  hash_size--;
1151  dccp_hashinfo.ehash_mask = hash_size - 1;
1152  dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1154  } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1155 
1156  if (!dccp_hashinfo.ehash) {
1157  DCCP_CRIT("Failed to allocate DCCP established hash table");
1158  goto out_free_bind_bucket_cachep;
1159  }
1160 
1161  for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) {
1162  INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1163  INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
1164  }
1165 
1166  if (inet_ehash_locks_alloc(&dccp_hashinfo))
1167  goto out_free_dccp_ehash;
1168 
1169  bhash_order = ehash_order;
1170 
1171  do {
1172  dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1173  sizeof(struct inet_bind_hashbucket);
1174  if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1175  bhash_order > 0)
1176  continue;
1177  dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1179  } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1180 
1181  if (!dccp_hashinfo.bhash) {
1182  DCCP_CRIT("Failed to allocate DCCP bind hash table");
1183  goto out_free_dccp_locks;
1184  }
1185 
1186  for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1187  spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1188  INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1189  }
1190 
1191  rc = dccp_mib_init();
1192  if (rc)
1193  goto out_free_dccp_bhash;
1194 
1195  rc = dccp_ackvec_init();
1196  if (rc)
1197  goto out_free_dccp_mib;
1198 
1199  rc = dccp_sysctl_init();
1200  if (rc)
1201  goto out_ackvec_exit;
1202 
1203  rc = ccid_initialize_builtins();
1204  if (rc)
1205  goto out_sysctl_exit;
1206 
1208 
1209  return 0;
1210 
1211 out_sysctl_exit:
1212  dccp_sysctl_exit();
1213 out_ackvec_exit:
1214  dccp_ackvec_exit();
1215 out_free_dccp_mib:
1216  dccp_mib_exit();
1217 out_free_dccp_bhash:
1218  free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1219 out_free_dccp_locks:
1220  inet_ehash_locks_free(&dccp_hashinfo);
1221 out_free_dccp_ehash:
1222  free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1223 out_free_bind_bucket_cachep:
1224  kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1225 out_free_percpu:
1226  percpu_counter_destroy(&dccp_orphan_count);
1227 out_fail:
1228  dccp_hashinfo.bhash = NULL;
1229  dccp_hashinfo.ehash = NULL;
1230  dccp_hashinfo.bind_bucket_cachep = NULL;
1231  return rc;
1232 }
1233 
1234 static void __exit dccp_fini(void)
1235 {
1237  dccp_mib_exit();
1238  free_pages((unsigned long)dccp_hashinfo.bhash,
1239  get_order(dccp_hashinfo.bhash_size *
1240  sizeof(struct inet_bind_hashbucket)));
1241  free_pages((unsigned long)dccp_hashinfo.ehash,
1242  get_order((dccp_hashinfo.ehash_mask + 1) *
1243  sizeof(struct inet_ehash_bucket)));
1244  inet_ehash_locks_free(&dccp_hashinfo);
1245  kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1246  dccp_ackvec_exit();
1247  dccp_sysctl_exit();
1248  percpu_counter_destroy(&dccp_orphan_count);
1249 }
1250 
1251 module_init(dccp_init);
1252 module_exit(dccp_fini);
1253 
1254 MODULE_LICENSE("GPL");
1255 MODULE_AUTHOR("Arnaldo Carvalho de Melo <[email protected]>");
1256 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");