Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
inet_connection_sock.c
Go to the documentation of this file.
1 /*
2  * INET An implementation of the TCP/IP protocol suite for the LINUX
3  * operating system. INET is implemented using the BSD Socket
4  * interface as the means of communication with the user level.
5  *
6  * Support for INET connection oriented protocols.
7  *
8  * Authors: See the TCP sources
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License
12  * as published by the Free Software Foundation; either version
13  * 2 of the License, or(at your option) any later version.
14  */
15 
16 #include <linux/module.h>
17 #include <linux/jhash.h>
18 
20 #include <net/inet_hashtables.h>
21 #include <net/inet_timewait_sock.h>
22 #include <net/ip.h>
23 #include <net/route.h>
24 #include <net/tcp_states.h>
25 #include <net/xfrm.h>
26 
27 #ifdef INET_CSK_DEBUG
28 const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
29 EXPORT_SYMBOL(inet_csk_timer_bug_msg);
30 #endif
31 
32 /*
33  * This struct holds the first and last local port number.
34  */
37  .range = { 32768, 61000 },
38 };
39 
42 
44 {
45  unsigned int seq;
46 
47  do {
48  seq = read_seqbegin(&sysctl_local_ports.lock);
49 
50  *low = sysctl_local_ports.range[0];
51  *high = sysctl_local_ports.range[1];
52  } while (read_seqretry(&sysctl_local_ports.lock, seq));
53 }
55 
56 int inet_csk_bind_conflict(const struct sock *sk,
57  const struct inet_bind_bucket *tb, bool relax)
58 {
59  struct sock *sk2;
60  struct hlist_node *node;
61  int reuse = sk->sk_reuse;
62 
63  /*
64  * Unlike other sk lookup places we do not check
65  * for sk_net here, since _all_ the socks listed
66  * in tb->owners list belong to the same net - the
67  * one this bucket belongs to.
68  */
69 
70  sk_for_each_bound(sk2, node, &tb->owners) {
71  if (sk != sk2 &&
72  !inet_v6_ipv6only(sk2) &&
73  (!sk->sk_bound_dev_if ||
74  !sk2->sk_bound_dev_if ||
75  sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
76  if (!reuse || !sk2->sk_reuse ||
77  sk2->sk_state == TCP_LISTEN) {
78  const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
79  if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
80  sk2_rcv_saddr == sk_rcv_saddr(sk))
81  break;
82  }
83  if (!relax && reuse && sk2->sk_reuse &&
84  sk2->sk_state != TCP_LISTEN) {
85  const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
86 
87  if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
88  sk2_rcv_saddr == sk_rcv_saddr(sk))
89  break;
90  }
91  }
92  }
93  return node != NULL;
94 }
96 
97 /* Obtain a reference to a local port for the given sock,
98  * if snum is zero it means select any available local port.
99  */
100 int inet_csk_get_port(struct sock *sk, unsigned short snum)
101 {
102  struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
103  struct inet_bind_hashbucket *head;
104  struct hlist_node *node;
105  struct inet_bind_bucket *tb;
106  int ret, attempts = 5;
107  struct net *net = sock_net(sk);
108  int smallest_size = -1, smallest_rover;
109 
111  if (!snum) {
112  int remaining, rover, low, high;
113 
114 again:
115  inet_get_local_port_range(&low, &high);
116  remaining = (high - low) + 1;
117  smallest_rover = rover = net_random() % remaining + low;
118 
119  smallest_size = -1;
120  do {
121  if (inet_is_reserved_local_port(rover))
122  goto next_nolock;
123  head = &hashinfo->bhash[inet_bhashfn(net, rover,
124  hashinfo->bhash_size)];
125  spin_lock(&head->lock);
126  inet_bind_bucket_for_each(tb, node, &head->chain)
127  if (net_eq(ib_net(tb), net) && tb->port == rover) {
128  if (tb->fastreuse > 0 &&
129  sk->sk_reuse &&
130  sk->sk_state != TCP_LISTEN &&
131  (tb->num_owners < smallest_size || smallest_size == -1)) {
132  smallest_size = tb->num_owners;
133  smallest_rover = rover;
134  if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
135  !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
136  snum = smallest_rover;
137  goto tb_found;
138  }
139  }
140  if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
141  snum = rover;
142  goto tb_found;
143  }
144  goto next;
145  }
146  break;
147  next:
148  spin_unlock(&head->lock);
149  next_nolock:
150  if (++rover > high)
151  rover = low;
152  } while (--remaining > 0);
153 
154  /* Exhausted local port range during search? It is not
155  * possible for us to be holding one of the bind hash
156  * locks if this test triggers, because if 'remaining'
157  * drops to zero, we broke out of the do/while loop at
158  * the top level, not from the 'break;' statement.
159  */
160  ret = 1;
161  if (remaining <= 0) {
162  if (smallest_size != -1) {
163  snum = smallest_rover;
164  goto have_snum;
165  }
166  goto fail;
167  }
168  /* OK, here is the one we will use. HEAD is
169  * non-NULL and we hold it's mutex.
170  */
171  snum = rover;
172  } else {
173 have_snum:
174  head = &hashinfo->bhash[inet_bhashfn(net, snum,
175  hashinfo->bhash_size)];
176  spin_lock(&head->lock);
177  inet_bind_bucket_for_each(tb, node, &head->chain)
178  if (net_eq(ib_net(tb), net) && tb->port == snum)
179  goto tb_found;
180  }
181  tb = NULL;
182  goto tb_not_found;
183 tb_found:
184  if (!hlist_empty(&tb->owners)) {
185  if (sk->sk_reuse == SK_FORCE_REUSE)
186  goto success;
187 
188  if (tb->fastreuse > 0 &&
189  sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
190  smallest_size == -1) {
191  goto success;
192  } else {
193  ret = 1;
194  if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
195  if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
196  smallest_size != -1 && --attempts >= 0) {
197  spin_unlock(&head->lock);
198  goto again;
199  }
200 
201  goto fail_unlock;
202  }
203  }
204  }
205 tb_not_found:
206  ret = 1;
207  if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep,
208  net, head, snum)) == NULL)
209  goto fail_unlock;
210  if (hlist_empty(&tb->owners)) {
211  if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
212  tb->fastreuse = 1;
213  else
214  tb->fastreuse = 0;
215  } else if (tb->fastreuse &&
216  (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
217  tb->fastreuse = 0;
218 success:
219  if (!inet_csk(sk)->icsk_bind_hash)
220  inet_bind_hash(sk, tb, snum);
221  WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
222  ret = 0;
223 
224 fail_unlock:
225  spin_unlock(&head->lock);
226 fail:
227  local_bh_enable();
228  return ret;
229 }
231 
232 /*
233  * Wait for an incoming connection, avoid race conditions. This must be called
234  * with the socket locked.
235  */
236 static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
237 {
238  struct inet_connection_sock *icsk = inet_csk(sk);
239  DEFINE_WAIT(wait);
240  int err;
241 
242  /*
243  * True wake-one mechanism for incoming connections: only
244  * one process gets woken up, not the 'whole herd'.
245  * Since we do not 'race & poll' for established sockets
246  * anymore, the common case will execute the loop only once.
247  *
248  * Subtle issue: "add_wait_queue_exclusive()" will be added
249  * after any current non-exclusive waiters, and we know that
250  * it will always _stay_ after any new non-exclusive waiters
251  * because all non-exclusive waiters are added at the
252  * beginning of the wait-queue. As such, it's ok to "drop"
253  * our exclusiveness temporarily when we get woken up without
254  * having to remove and re-insert us on the wait queue.
255  */
256  for (;;) {
257  prepare_to_wait_exclusive(sk_sleep(sk), &wait,
259  release_sock(sk);
260  if (reqsk_queue_empty(&icsk->icsk_accept_queue))
261  timeo = schedule_timeout(timeo);
262  lock_sock(sk);
263  err = 0;
264  if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
265  break;
266  err = -EINVAL;
267  if (sk->sk_state != TCP_LISTEN)
268  break;
269  err = sock_intr_errno(timeo);
270  if (signal_pending(current))
271  break;
272  err = -EAGAIN;
273  if (!timeo)
274  break;
275  }
276  finish_wait(sk_sleep(sk), &wait);
277  return err;
278 }
279 
280 /*
281  * This will accept the next outstanding connection.
282  */
283 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
284 {
285  struct inet_connection_sock *icsk = inet_csk(sk);
286  struct request_sock_queue *queue = &icsk->icsk_accept_queue;
287  struct sock *newsk;
288  struct request_sock *req;
289  int error;
290 
291  lock_sock(sk);
292 
293  /* We need to make sure that this socket is listening,
294  * and that it has something pending.
295  */
296  error = -EINVAL;
297  if (sk->sk_state != TCP_LISTEN)
298  goto out_err;
299 
300  /* Find already established connection */
301  if (reqsk_queue_empty(queue)) {
302  long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
303 
304  /* If this is a non blocking socket don't sleep */
305  error = -EAGAIN;
306  if (!timeo)
307  goto out_err;
308 
309  error = inet_csk_wait_for_connect(sk, timeo);
310  if (error)
311  goto out_err;
312  }
313  req = reqsk_queue_remove(queue);
314  newsk = req->sk;
315 
316  sk_acceptq_removed(sk);
317  if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) {
318  spin_lock_bh(&queue->fastopenq->lock);
319  if (tcp_rsk(req)->listener) {
320  /* We are still waiting for the final ACK from 3WHS
321  * so can't free req now. Instead, we set req->sk to
322  * NULL to signify that the child socket is taken
323  * so reqsk_fastopen_remove() will free the req
324  * when 3WHS finishes (or is aborted).
325  */
326  req->sk = NULL;
327  req = NULL;
328  }
329  spin_unlock_bh(&queue->fastopenq->lock);
330  }
331 out:
332  release_sock(sk);
333  if (req)
334  __reqsk_free(req);
335  return newsk;
336 out_err:
337  newsk = NULL;
338  req = NULL;
339  *err = error;
340  goto out;
341 }
343 
344 /*
345  * Using different timers for retransmit, delayed acks and probes
346  * We may wish use just one timer maintaining a list of expire jiffies
347  * to optimize.
348  */
350  void (*retransmit_handler)(unsigned long),
351  void (*delack_handler)(unsigned long),
352  void (*keepalive_handler)(unsigned long))
353 {
354  struct inet_connection_sock *icsk = inet_csk(sk);
355 
356  setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler,
357  (unsigned long)sk);
358  setup_timer(&icsk->icsk_delack_timer, delack_handler,
359  (unsigned long)sk);
360  setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
361  icsk->icsk_pending = icsk->icsk_ack.pending = 0;
362 }
364 
366 {
367  struct inet_connection_sock *icsk = inet_csk(sk);
368 
369  icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0;
370 
372  sk_stop_timer(sk, &icsk->icsk_delack_timer);
373  sk_stop_timer(sk, &sk->sk_timer);
374 }
376 
378 {
379  sk_stop_timer(sk, &sk->sk_timer);
380 }
382 
383 void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
384 {
385  sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
386 }
388 
389 struct dst_entry *inet_csk_route_req(struct sock *sk,
390  struct flowi4 *fl4,
391  const struct request_sock *req)
392 {
393  struct rtable *rt;
394  const struct inet_request_sock *ireq = inet_rsk(req);
395  struct ip_options_rcu *opt = inet_rsk(req)->opt;
396  struct net *net = sock_net(sk);
397  int flags = inet_sk_flowi_flags(sk);
398 
399  flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
401  sk->sk_protocol,
402  flags,
403  (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
404  ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
405  security_req_classify_flow(req, flowi4_to_flowi(fl4));
406  rt = ip_route_output_flow(net, fl4, sk);
407  if (IS_ERR(rt))
408  goto no_route;
409  if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
410  goto route_err;
411  return &rt->dst;
412 
413 route_err:
414  ip_rt_put(rt);
415 no_route:
417  return NULL;
418 }
420 
422  struct sock *newsk,
423  const struct request_sock *req)
424 {
425  const struct inet_request_sock *ireq = inet_rsk(req);
426  struct inet_sock *newinet = inet_sk(newsk);
427  struct ip_options_rcu *opt;
428  struct net *net = sock_net(sk);
429  struct flowi4 *fl4;
430  struct rtable *rt;
431 
432  fl4 = &newinet->cork.fl.u.ip4;
433 
434  rcu_read_lock();
435  opt = rcu_dereference(newinet->inet_opt);
436  flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
438  sk->sk_protocol, inet_sk_flowi_flags(sk),
439  (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr,
440  ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport);
441  security_req_classify_flow(req, flowi4_to_flowi(fl4));
442  rt = ip_route_output_flow(net, fl4, sk);
443  if (IS_ERR(rt))
444  goto no_route;
445  if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
446  goto route_err;
447  rcu_read_unlock();
448  return &rt->dst;
449 
450 route_err:
451  ip_rt_put(rt);
452 no_route:
453  rcu_read_unlock();
455  return NULL;
456 }
458 
459 static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
460  const u32 rnd, const u32 synq_hsize)
461 {
462  return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
463 }
464 
465 #if IS_ENABLED(CONFIG_IPV6)
466 #define AF_INET_FAMILY(fam) ((fam) == AF_INET)
467 #else
468 #define AF_INET_FAMILY(fam) 1
469 #endif
470 
471 struct request_sock *inet_csk_search_req(const struct sock *sk,
472  struct request_sock ***prevp,
473  const __be16 rport, const __be32 raddr,
474  const __be32 laddr)
475 {
476  const struct inet_connection_sock *icsk = inet_csk(sk);
477  struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
478  struct request_sock *req, **prev;
479 
480  for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd,
481  lopt->nr_table_entries)];
482  (req = *prev) != NULL;
483  prev = &req->dl_next) {
484  const struct inet_request_sock *ireq = inet_rsk(req);
485 
486  if (ireq->rmt_port == rport &&
487  ireq->rmt_addr == raddr &&
488  ireq->loc_addr == laddr &&
489  AF_INET_FAMILY(req->rsk_ops->family)) {
490  WARN_ON(req->sk);
491  *prevp = prev;
492  break;
493  }
494  }
495 
496  return req;
497 }
499 
501  unsigned long timeout)
502 {
503  struct inet_connection_sock *icsk = inet_csk(sk);
504  struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
505  const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port,
506  lopt->hash_rnd, lopt->nr_table_entries);
507 
508  reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
509  inet_csk_reqsk_queue_added(sk, timeout);
510 }
512 
513 /* Only thing we need from tcp.h */
514 extern int sysctl_tcp_synack_retries;
515 
516 
517 /* Decide when to expire the request and when to resend SYN-ACK */
518 static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
519  const int max_retries,
520  const u8 rskq_defer_accept,
521  int *expire, int *resend)
522 {
523  if (!rskq_defer_accept) {
524  *expire = req->retrans >= thresh;
525  *resend = 1;
526  return;
527  }
528  *expire = req->retrans >= thresh &&
529  (!inet_rsk(req)->acked || req->retrans >= max_retries);
530  /*
531  * Do not resend while waiting for data after ACK,
532  * start to resend on end of deferring period to give
533  * last chance for data or ACK to create established socket.
534  */
535  *resend = !inet_rsk(req)->acked ||
536  req->retrans >= rskq_defer_accept - 1;
537 }
538 
539 void inet_csk_reqsk_queue_prune(struct sock *parent,
540  const unsigned long interval,
541  const unsigned long timeout,
542  const unsigned long max_rto)
543 {
544  struct inet_connection_sock *icsk = inet_csk(parent);
545  struct request_sock_queue *queue = &icsk->icsk_accept_queue;
546  struct listen_sock *lopt = queue->listen_opt;
547  int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
548  int thresh = max_retries;
549  unsigned long now = jiffies;
550  struct request_sock **reqp, *req;
551  int i, budget;
552 
553  if (lopt == NULL || lopt->qlen == 0)
554  return;
555 
556  /* Normally all the openreqs are young and become mature
557  * (i.e. converted to established socket) for first timeout.
558  * If synack was not acknowledged for 1 second, it means
559  * one of the following things: synack was lost, ack was lost,
560  * rtt is high or nobody planned to ack (i.e. synflood).
561  * When server is a bit loaded, queue is populated with old
562  * open requests, reducing effective size of queue.
563  * When server is well loaded, queue size reduces to zero
564  * after several minutes of work. It is not synflood,
565  * it is normal operation. The solution is pruning
566  * too old entries overriding normal timeout, when
567  * situation becomes dangerous.
568  *
569  * Essentially, we reserve half of room for young
570  * embrions; and abort old ones without pity, if old
571  * ones are about to clog our table.
572  */
573  if (lopt->qlen>>(lopt->max_qlen_log-1)) {
574  int young = (lopt->qlen_young<<1);
575 
576  while (thresh > 2) {
577  if (lopt->qlen < young)
578  break;
579  thresh--;
580  young <<= 1;
581  }
582  }
583 
584  if (queue->rskq_defer_accept)
585  max_retries = queue->rskq_defer_accept;
586 
587  budget = 2 * (lopt->nr_table_entries / (timeout / interval));
588  i = lopt->clock_hand;
589 
590  do {
591  reqp=&lopt->syn_table[i];
592  while ((req = *reqp) != NULL) {
593  if (time_after_eq(now, req->expires)) {
594  int expire = 0, resend = 0;
595 
596  syn_ack_recalc(req, thresh, max_retries,
597  queue->rskq_defer_accept,
598  &expire, &resend);
599  req->rsk_ops->syn_ack_timeout(parent, req);
600  if (!expire &&
601  (!resend ||
602  !req->rsk_ops->rtx_syn_ack(parent, req, NULL) ||
603  inet_rsk(req)->acked)) {
604  unsigned long timeo;
605 
606  if (req->retrans++ == 0)
607  lopt->qlen_young--;
608  timeo = min((timeout << req->retrans), max_rto);
609  req->expires = now + timeo;
610  reqp = &req->dl_next;
611  continue;
612  }
613 
614  /* Drop this request */
615  inet_csk_reqsk_queue_unlink(parent, req, reqp);
616  reqsk_queue_removed(queue, req);
617  reqsk_free(req);
618  continue;
619  }
620  reqp = &req->dl_next;
621  }
622 
623  i = (i + 1) & (lopt->nr_table_entries - 1);
624 
625  } while (--budget > 0);
626 
627  lopt->clock_hand = i;
628 
629  if (lopt->qlen)
630  inet_csk_reset_keepalive_timer(parent, interval);
631 }
633 
642 struct sock *inet_csk_clone_lock(const struct sock *sk,
643  const struct request_sock *req,
644  const gfp_t priority)
645 {
646  struct sock *newsk = sk_clone_lock(sk, priority);
647 
648  if (newsk != NULL) {
649  struct inet_connection_sock *newicsk = inet_csk(newsk);
650 
651  newsk->sk_state = TCP_SYN_RECV;
652  newicsk->icsk_bind_hash = NULL;
653 
654  inet_sk(newsk)->inet_dport = inet_rsk(req)->rmt_port;
655  inet_sk(newsk)->inet_num = ntohs(inet_rsk(req)->loc_port);
656  inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port;
658 
659  newicsk->icsk_retransmits = 0;
660  newicsk->icsk_backoff = 0;
661  newicsk->icsk_probes_out = 0;
662 
663  /* Deinitialize accept_queue to trap illegal accesses. */
664  memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
665 
666  security_inet_csk_clone(newsk, req);
667  }
668  return newsk;
669 }
671 
672 /*
673  * At this point, there should be no process reference to this
674  * socket, and thus no user references at all. Therefore we
675  * can assume the socket waitqueue is inactive and nobody will
676  * try to jump onto it.
677  */
678 void inet_csk_destroy_sock(struct sock *sk)
679 {
680  WARN_ON(sk->sk_state != TCP_CLOSE);
681  WARN_ON(!sock_flag(sk, SOCK_DEAD));
682 
683  /* It cannot be in hash table! */
684  WARN_ON(!sk_unhashed(sk));
685 
686  /* If it has not 0 inet_sk(sk)->inet_num, it must be bound */
687  WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash);
688 
689  sk->sk_prot->destroy(sk);
690 
692 
693  xfrm_sk_free_policy(sk);
694 
696 
697  percpu_counter_dec(sk->sk_prot->orphan_count);
698  sock_put(sk);
699 }
701 
702 int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
703 {
704  struct inet_sock *inet = inet_sk(sk);
705  struct inet_connection_sock *icsk = inet_csk(sk);
706  int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
707 
708  if (rc != 0)
709  return rc;
710 
711  sk->sk_max_ack_backlog = 0;
712  sk->sk_ack_backlog = 0;
713  inet_csk_delack_init(sk);
714 
715  /* There is race window here: we announce ourselves listening,
716  * but this transition is still not validated by get_port().
717  * It is OK, because this socket enters to hash table only
718  * after validation is complete.
719  */
720  sk->sk_state = TCP_LISTEN;
721  if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
722  inet->inet_sport = htons(inet->inet_num);
723 
724  sk_dst_reset(sk);
725  sk->sk_prot->hash(sk);
726 
727  return 0;
728  }
729 
730  sk->sk_state = TCP_CLOSE;
732  return -EADDRINUSE;
733 }
735 
736 /*
737  * This routine closes sockets which have been at least partially
738  * opened, but not yet accepted.
739  */
740 void inet_csk_listen_stop(struct sock *sk)
741 {
742  struct inet_connection_sock *icsk = inet_csk(sk);
743  struct request_sock_queue *queue = &icsk->icsk_accept_queue;
744  struct request_sock *acc_req;
745  struct request_sock *req;
746 
748 
749  /* make all the listen_opt local to us */
750  acc_req = reqsk_queue_yank_acceptq(queue);
751 
752  /* Following specs, it would be better either to send FIN
753  * (and enter FIN-WAIT-1, it is normal close)
754  * or to send active reset (abort).
755  * Certainly, it is pretty dangerous while synflood, but it is
756  * bad justification for our negligence 8)
757  * To be honest, we are not able to make either
758  * of the variants now. --ANK
759  */
760  reqsk_queue_destroy(queue);
761 
762  while ((req = acc_req) != NULL) {
763  struct sock *child = req->sk;
764 
765  acc_req = req->dl_next;
766 
768  bh_lock_sock(child);
769  WARN_ON(sock_owned_by_user(child));
770  sock_hold(child);
771 
772  sk->sk_prot->disconnect(child, O_NONBLOCK);
773 
774  sock_orphan(child);
775 
776  percpu_counter_inc(sk->sk_prot->orphan_count);
777 
778  if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) {
779  BUG_ON(tcp_sk(child)->fastopen_rsk != req);
780  BUG_ON(sk != tcp_rsk(req)->listener);
781 
782  /* Paranoid, to prevent race condition if
783  * an inbound pkt destined for child is
784  * blocked by sock lock in tcp_v4_rcv().
785  * Also to satisfy an assertion in
786  * tcp_v4_destroy_sock().
787  */
788  tcp_sk(child)->fastopen_rsk = NULL;
789  sock_put(sk);
790  }
791  inet_csk_destroy_sock(child);
792 
793  bh_unlock_sock(child);
794  local_bh_enable();
795  sock_put(child);
796 
797  sk_acceptq_removed(sk);
798  __reqsk_free(req);
799  }
800  if (queue->fastopenq != NULL) {
801  /* Free all the reqs queued in rskq_rst_head. */
802  spin_lock_bh(&queue->fastopenq->lock);
803  acc_req = queue->fastopenq->rskq_rst_head;
804  queue->fastopenq->rskq_rst_head = NULL;
805  spin_unlock_bh(&queue->fastopenq->lock);
806  while ((req = acc_req) != NULL) {
807  acc_req = req->dl_next;
808  __reqsk_free(req);
809  }
810  }
811  WARN_ON(sk->sk_ack_backlog);
812 }
814 
815 void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
816 {
817  struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
818  const struct inet_sock *inet = inet_sk(sk);
819 
820  sin->sin_family = AF_INET;
821  sin->sin_addr.s_addr = inet->inet_daddr;
822  sin->sin_port = inet->inet_dport;
823 }
825 
826 #ifdef CONFIG_COMPAT
827 int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
828  char __user *optval, int __user *optlen)
829 {
830  const struct inet_connection_sock *icsk = inet_csk(sk);
831 
832  if (icsk->icsk_af_ops->compat_getsockopt != NULL)
833  return icsk->icsk_af_ops->compat_getsockopt(sk, level, optname,
834  optval, optlen);
835  return icsk->icsk_af_ops->getsockopt(sk, level, optname,
836  optval, optlen);
837 }
839 
840 int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
841  char __user *optval, unsigned int optlen)
842 {
843  const struct inet_connection_sock *icsk = inet_csk(sk);
844 
845  if (icsk->icsk_af_ops->compat_setsockopt != NULL)
846  return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname,
847  optval, optlen);
848  return icsk->icsk_af_ops->setsockopt(sk, level, optname,
849  optval, optlen);
850 }
852 #endif
853 
854 static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl)
855 {
856  const struct inet_sock *inet = inet_sk(sk);
857  const struct ip_options_rcu *inet_opt;
858  __be32 daddr = inet->inet_daddr;
859  struct flowi4 *fl4;
860  struct rtable *rt;
861 
862  rcu_read_lock();
863  inet_opt = rcu_dereference(inet->inet_opt);
864  if (inet_opt && inet_opt->opt.srr)
865  daddr = inet_opt->opt.faddr;
866  fl4 = &fl->u.ip4;
867  rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr,
868  inet->inet_saddr, inet->inet_dport,
869  inet->inet_sport, sk->sk_protocol,
870  RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
871  if (IS_ERR(rt))
872  rt = NULL;
873  if (rt)
874  sk_setup_caps(sk, &rt->dst);
875  rcu_read_unlock();
876 
877  return &rt->dst;
878 }
879 
880 struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu)
881 {
882  struct dst_entry *dst = __sk_dst_check(sk, 0);
883  struct inet_sock *inet = inet_sk(sk);
884 
885  if (!dst) {
886  dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
887  if (!dst)
888  goto out;
889  }
890  dst->ops->update_pmtu(dst, sk, NULL, mtu);
891 
892  dst = __sk_dst_check(sk, 0);
893  if (!dst)
894  dst = inet_csk_rebuild_route(sk, &inet->cork.fl);
895 out:
896  return dst;
897 }