Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
transport.c
Go to the documentation of this file.
1 /* SCTP kernel implementation
2  * Copyright (c) 1999-2000 Cisco, Inc.
3  * Copyright (c) 1999-2001 Motorola, Inc.
4  * Copyright (c) 2001-2003 International Business Machines Corp.
5  * Copyright (c) 2001 Intel Corp.
6  * Copyright (c) 2001 La Monte H.P. Yarroll
7  *
8  * This file is part of the SCTP kernel implementation
9  *
10  * This module provides the abstraction for an SCTP tranport representing
11  * a remote transport address. For local transport addresses, we just use
12  * union sctp_addr.
13  *
14  * This SCTP implementation is free software;
15  * you can redistribute it and/or modify it under the terms of
16  * the GNU General Public License as published by
17  * the Free Software Foundation; either version 2, or (at your option)
18  * any later version.
19  *
20  * This SCTP implementation is distributed in the hope that it
21  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
22  * ************************
23  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
24  * See the GNU General Public License for more details.
25  *
26  * You should have received a copy of the GNU General Public License
27  * along with GNU CC; see the file COPYING. If not, write to
28  * the Free Software Foundation, 59 Temple Place - Suite 330,
29  * Boston, MA 02111-1307, USA.
30  *
31  * Please send any bug reports or fixes you make to the
32  * email address(es):
33  * lksctp developers <[email protected]>
34  *
35  * Or submit a bug report through the following website:
36  * http://www.sf.net/projects/lksctp
37  *
38  * Written or modified by:
39  * La Monte H.P. Yarroll <[email protected]>
40  * Karl Knutson <[email protected]>
41  * Jon Grimm <[email protected]>
42  * Xingang Guo <[email protected]>
43  * Hui Huang <[email protected]>
44  * Sridhar Samudrala <[email protected]>
45  * Ardelle Fan <[email protected]>
46  *
47  * Any bugs reported given to us we will try to fix... any fixes shared will
48  * be incorporated into the next SCTP release.
49  */
50 
51 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52 
53 #include <linux/slab.h>
54 #include <linux/types.h>
55 #include <linux/random.h>
56 #include <net/sctp/sctp.h>
57 #include <net/sctp/sm.h>
58 
59 /* 1st Level Abstractions. */
60 
61 /* Initialize a new transport from provided memory. */
62 static struct sctp_transport *sctp_transport_init(struct net *net,
63  struct sctp_transport *peer,
64  const union sctp_addr *addr,
65  gfp_t gfp)
66 {
67  /* Copy in the address. */
68  peer->ipaddr = *addr;
69  peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
70  memset(&peer->saddr, 0, sizeof(union sctp_addr));
71 
72  peer->sack_generation = 0;
73 
74  /* From 6.3.1 RTO Calculation:
75  *
76  * C1) Until an RTT measurement has been made for a packet sent to the
77  * given destination transport address, set RTO to the protocol
78  * parameter 'RTO.Initial'.
79  */
80  peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
81 
82  peer->last_time_heard = jiffies;
84 
85  peer->param_flags = SPP_HB_DISABLE |
88 
89  /* Initialize the default path max_retrans. */
90  peer->pathmaxrxt = net->sctp.max_retrans_path;
91  peer->pf_retrans = net->sctp.pf_retrans;
92 
93  INIT_LIST_HEAD(&peer->transmitted);
94  INIT_LIST_HEAD(&peer->send_ready);
95  INIT_LIST_HEAD(&peer->transports);
96 
98  (unsigned long)peer);
100  (unsigned long)peer);
101  setup_timer(&peer->proto_unreach_timer,
102  sctp_generate_proto_unreach_event, (unsigned long)peer);
103 
104  /* Initialize the 64-bit random nonce sent with heartbeat. */
105  get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
106 
107  atomic_set(&peer->refcnt, 1);
108 
109  return peer;
110 }
111 
112 /* Allocate and initialize a new transport. */
114  const union sctp_addr *addr,
115  gfp_t gfp)
116 {
117  struct sctp_transport *transport;
118 
119  transport = t_new(struct sctp_transport, gfp);
120  if (!transport)
121  goto fail;
122 
123  if (!sctp_transport_init(net, transport, addr, gfp))
124  goto fail_init;
125 
126  transport->malloced = 1;
127  SCTP_DBG_OBJCNT_INC(transport);
128 
129  return transport;
130 
131 fail_init:
132  kfree(transport);
133 
134 fail:
135  return NULL;
136 }
137 
138 /* This transport is no longer needed. Free up if possible, or
139  * delay until it last reference count.
140  */
142 {
143  transport->dead = 1;
144 
145  /* Try to delete the heartbeat timer. */
146  if (del_timer(&transport->hb_timer))
147  sctp_transport_put(transport);
148 
149  /* Delete the T3_rtx timer if it's active.
150  * There is no point in not doing this now and letting
151  * structure hang around in memory since we know
152  * the tranport is going away.
153  */
154  if (timer_pending(&transport->T3_rtx_timer) &&
155  del_timer(&transport->T3_rtx_timer))
156  sctp_transport_put(transport);
157 
158  /* Delete the ICMP proto unreachable timer if it's active. */
159  if (timer_pending(&transport->proto_unreach_timer) &&
160  del_timer(&transport->proto_unreach_timer))
161  sctp_association_put(transport->asoc);
162 
163  sctp_transport_put(transport);
164 }
165 
166 /* Destroy the transport data structure.
167  * Assumes there are no more users of this structure.
168  */
169 static void sctp_transport_destroy(struct sctp_transport *transport)
170 {
171  SCTP_ASSERT(transport->dead, "Transport is not dead", return);
172 
173  if (transport->asoc)
174  sctp_association_put(transport->asoc);
175 
176  sctp_packet_free(&transport->packet);
177 
178  dst_release(transport->dst);
179  kfree(transport);
180  SCTP_DBG_OBJCNT_DEC(transport);
181 }
182 
183 /* Start T3_rtx timer if it is not already running and update the heartbeat
184  * timer. This routine is called every time a DATA chunk is sent.
185  */
187 {
188  /* RFC 2960 6.3.2 Retransmission Timer Rules
189  *
190  * R1) Every time a DATA chunk is sent to any address(including a
191  * retransmission), if the T3-rtx timer of that address is not running
192  * start it running so that it will expire after the RTO of that
193  * address.
194  */
195 
196  if (!timer_pending(&transport->T3_rtx_timer))
197  if (!mod_timer(&transport->T3_rtx_timer,
198  jiffies + transport->rto))
199  sctp_transport_hold(transport);
200 
201  /* When a data chunk is sent, reset the heartbeat interval. */
202  if (!mod_timer(&transport->hb_timer,
203  sctp_transport_timeout(transport)))
204  sctp_transport_hold(transport);
205 }
206 
207 /* This transport has been assigned to an association.
208  * Initialize fields from the association or from the sock itself.
209  * Register the reference count in the association.
210  */
212  struct sctp_association *asoc)
213 {
214  transport->asoc = asoc;
215  sctp_association_hold(asoc);
216 }
217 
218 /* Initialize the pmtu of a transport. */
219 void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
220 {
221  /* If we don't have a fresh route, look one up */
222  if (!transport->dst || transport->dst->obsolete) {
223  dst_release(transport->dst);
224  transport->af_specific->get_dst(transport, &transport->saddr,
225  &transport->fl, sk);
226  }
227 
228  if (transport->dst) {
229  transport->pathmtu = dst_mtu(transport->dst);
230  } else
231  transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
232 }
233 
235 {
236  struct dst_entry *dst;
237 
238  if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
239  pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
240  __func__, pmtu,
242  /* Use default minimum segment size and disable
243  * pmtu discovery on this transport.
244  */
246  } else {
247  t->pathmtu = pmtu;
248  }
249 
250  dst = sctp_transport_dst_check(t);
251  if (!dst)
252  t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
253 
254  if (dst) {
255  dst->ops->update_pmtu(dst, sk, NULL, pmtu);
256 
257  dst = sctp_transport_dst_check(t);
258  if (!dst)
259  t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
260  }
261 }
262 
263 /* Caches the dst entry and source address for a transport's destination
264  * address.
265  */
266 void sctp_transport_route(struct sctp_transport *transport,
267  union sctp_addr *saddr, struct sctp_sock *opt)
268 {
269  struct sctp_association *asoc = transport->asoc;
270  struct sctp_af *af = transport->af_specific;
271 
272  af->get_dst(transport, saddr, &transport->fl, sctp_opt2sk(opt));
273 
274  if (saddr)
275  memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
276  else
277  af->get_saddr(opt, transport, &transport->fl);
278 
279  if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
280  return;
281  }
282  if (transport->dst) {
283  transport->pathmtu = dst_mtu(transport->dst);
284 
285  /* Initialize sk->sk_rcv_saddr, if the transport is the
286  * association's active path for getsockname().
287  */
288  if (asoc && (!asoc->peer.primary_path ||
289  (transport == asoc->peer.active_path)))
290  opt->pf->af->to_sk_saddr(&transport->saddr,
291  asoc->base.sk);
292  } else
293  transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
294 }
295 
296 /* Hold a reference to a transport. */
297 void sctp_transport_hold(struct sctp_transport *transport)
298 {
299  atomic_inc(&transport->refcnt);
300 }
301 
302 /* Release a reference to a transport and clean up
303  * if there are no more references.
304  */
305 void sctp_transport_put(struct sctp_transport *transport)
306 {
307  if (atomic_dec_and_test(&transport->refcnt))
308  sctp_transport_destroy(transport);
309 }
310 
311 /* Update transport's RTO based on the newly calculated RTT. */
313 {
314  /* Check for valid transport. */
315  SCTP_ASSERT(tp, "NULL transport", return);
316 
317  /* We should not be doing any RTO updates unless rto_pending is set. */
318  SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return);
319 
320  if (tp->rttvar || tp->srtt) {
321  struct net *net = sock_net(tp->asoc->base.sk);
322  /* 6.3.1 C3) When a new RTT measurement R' is made, set
323  * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|
324  * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'
325  */
326 
327  /* Note: The above algorithm has been rewritten to
328  * express rto_beta and rto_alpha as inverse powers
329  * of two.
330  * For example, assuming the default value of RTO.Alpha of
331  * 1/8, rto_alpha would be expressed as 3.
332  */
333  tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta)
334  + (((__u32)abs64((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta);
335  tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha)
336  + (rtt >> net->sctp.rto_alpha);
337  } else {
338  /* 6.3.1 C2) When the first RTT measurement R is made, set
339  * SRTT <- R, RTTVAR <- R/2.
340  */
341  tp->srtt = rtt;
342  tp->rttvar = rtt >> 1;
343  }
344 
345  /* 6.3.1 G1) Whenever RTTVAR is computed, if RTTVAR = 0, then
346  * adjust RTTVAR <- G, where G is the CLOCK GRANULARITY.
347  */
348  if (tp->rttvar == 0)
350 
351  /* 6.3.1 C3) After the computation, update RTO <- SRTT + 4 * RTTVAR. */
352  tp->rto = tp->srtt + (tp->rttvar << 2);
353 
354  /* 6.3.1 C6) Whenever RTO is computed, if it is less than RTO.Min
355  * seconds then it is rounded up to RTO.Min seconds.
356  */
357  if (tp->rto < tp->asoc->rto_min)
358  tp->rto = tp->asoc->rto_min;
359 
360  /* 6.3.1 C7) A maximum value may be placed on RTO provided it is
361  * at least RTO.max seconds.
362  */
363  if (tp->rto > tp->asoc->rto_max)
364  tp->rto = tp->asoc->rto_max;
365 
366  tp->rtt = rtt;
367 
368  /* Reset rto_pending so that a new RTT measurement is started when a
369  * new data chunk is sent.
370  */
371  tp->rto_pending = 0;
372 
373  SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d "
374  "rttvar: %d, rto: %ld\n", __func__,
375  tp, rtt, tp->srtt, tp->rttvar, tp->rto);
376 }
377 
378 /* This routine updates the transport's cwnd and partial_bytes_acked
379  * parameters based on the bytes acked in the received SACK.
380  */
382  __u32 sack_ctsn, __u32 bytes_acked)
383 {
384  struct sctp_association *asoc = transport->asoc;
385  __u32 cwnd, ssthresh, flight_size, pba, pmtu;
386 
387  cwnd = transport->cwnd;
388  flight_size = transport->flight_size;
389 
390  /* See if we need to exit Fast Recovery first */
391  if (asoc->fast_recovery &&
392  TSN_lte(asoc->fast_recovery_exit, sack_ctsn))
393  asoc->fast_recovery = 0;
394 
395  /* The appropriate cwnd increase algorithm is performed if, and only
396  * if the cumulative TSN whould advanced and the congestion window is
397  * being fully utilized.
398  */
399  if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) ||
400  (flight_size < cwnd))
401  return;
402 
403  ssthresh = transport->ssthresh;
404  pba = transport->partial_bytes_acked;
405  pmtu = transport->asoc->pathmtu;
406 
407  if (cwnd <= ssthresh) {
408  /* RFC 4960 7.2.1
409  * o When cwnd is less than or equal to ssthresh, an SCTP
410  * endpoint MUST use the slow-start algorithm to increase
411  * cwnd only if the current congestion window is being fully
412  * utilized, an incoming SACK advances the Cumulative TSN
413  * Ack Point, and the data sender is not in Fast Recovery.
414  * Only when these three conditions are met can the cwnd be
415  * increased; otherwise, the cwnd MUST not be increased.
416  * If these conditions are met, then cwnd MUST be increased
417  * by, at most, the lesser of 1) the total size of the
418  * previously outstanding DATA chunk(s) acknowledged, and
419  * 2) the destination's path MTU. This upper bound protects
420  * against the ACK-Splitting attack outlined in [SAVAGE99].
421  */
422  if (asoc->fast_recovery)
423  return;
424 
425  if (bytes_acked > pmtu)
426  cwnd += pmtu;
427  else
428  cwnd += bytes_acked;
429  SCTP_DEBUG_PRINTK("%s: SLOW START: transport: %p, "
430  "bytes_acked: %d, cwnd: %d, ssthresh: %d, "
431  "flight_size: %d, pba: %d\n",
432  __func__,
433  transport, bytes_acked, cwnd,
434  ssthresh, flight_size, pba);
435  } else {
436  /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh,
437  * upon each SACK arrival that advances the Cumulative TSN Ack
438  * Point, increase partial_bytes_acked by the total number of
439  * bytes of all new chunks acknowledged in that SACK including
440  * chunks acknowledged by the new Cumulative TSN Ack and by
441  * Gap Ack Blocks.
442  *
443  * When partial_bytes_acked is equal to or greater than cwnd
444  * and before the arrival of the SACK the sender had cwnd or
445  * more bytes of data outstanding (i.e., before arrival of the
446  * SACK, flightsize was greater than or equal to cwnd),
447  * increase cwnd by MTU, and reset partial_bytes_acked to
448  * (partial_bytes_acked - cwnd).
449  */
450  pba += bytes_acked;
451  if (pba >= cwnd) {
452  cwnd += pmtu;
453  pba = ((cwnd < pba) ? (pba - cwnd) : 0);
454  }
455  SCTP_DEBUG_PRINTK("%s: CONGESTION AVOIDANCE: "
456  "transport: %p, bytes_acked: %d, cwnd: %d, "
457  "ssthresh: %d, flight_size: %d, pba: %d\n",
458  __func__,
459  transport, bytes_acked, cwnd,
460  ssthresh, flight_size, pba);
461  }
462 
463  transport->cwnd = cwnd;
464  transport->partial_bytes_acked = pba;
465 }
466 
467 /* This routine is used to lower the transport's cwnd when congestion is
468  * detected.
469  */
472 {
473  struct sctp_association *asoc = transport->asoc;
474 
475  switch (reason) {
477  /* RFC 2960 Section 7.2.3, sctpimpguide
478  * When the T3-rtx timer expires on an address, SCTP should
479  * perform slow start by:
480  * ssthresh = max(cwnd/2, 4*MTU)
481  * cwnd = 1*MTU
482  * partial_bytes_acked = 0
483  */
484  transport->ssthresh = max(transport->cwnd/2,
485  4*asoc->pathmtu);
486  transport->cwnd = asoc->pathmtu;
487 
488  /* T3-rtx also clears fast recovery */
489  asoc->fast_recovery = 0;
490  break;
491 
493  /* RFC 2960 7.2.4 Adjust the ssthresh and cwnd of the
494  * destination address(es) to which the missing DATA chunks
495  * were last sent, according to the formula described in
496  * Section 7.2.3.
497  *
498  * RFC 2960 7.2.3, sctpimpguide Upon detection of packet
499  * losses from SACK (see Section 7.2.4), An endpoint
500  * should do the following:
501  * ssthresh = max(cwnd/2, 4*MTU)
502  * cwnd = ssthresh
503  * partial_bytes_acked = 0
504  */
505  if (asoc->fast_recovery)
506  return;
507 
508  /* Mark Fast recovery */
509  asoc->fast_recovery = 1;
510  asoc->fast_recovery_exit = asoc->next_tsn - 1;
511 
512  transport->ssthresh = max(transport->cwnd/2,
513  4*asoc->pathmtu);
514  transport->cwnd = transport->ssthresh;
515  break;
516 
518  /* RFC 2481 Section 6.1.2.
519  * If the sender receives an ECN-Echo ACK packet
520  * then the sender knows that congestion was encountered in the
521  * network on the path from the sender to the receiver. The
522  * indication of congestion should be treated just as a
523  * congestion loss in non-ECN Capable TCP. That is, the TCP
524  * source halves the congestion window "cwnd" and reduces the
525  * slow start threshold "ssthresh".
526  * A critical condition is that TCP does not react to
527  * congestion indications more than once every window of
528  * data (or more loosely more than once every round-trip time).
529  */
530  if (time_after(jiffies, transport->last_time_ecne_reduced +
531  transport->rtt)) {
532  transport->ssthresh = max(transport->cwnd/2,
533  4*asoc->pathmtu);
534  transport->cwnd = transport->ssthresh;
535  transport->last_time_ecne_reduced = jiffies;
536  }
537  break;
538 
540  /* RFC 2960 Section 7.2.1, sctpimpguide
541  * When the endpoint does not transmit data on a given
542  * transport address, the cwnd of the transport address
543  * should be adjusted to max(cwnd/2, 4*MTU) per RTO.
544  * NOTE: Although the draft recommends that this check needs
545  * to be done every RTO interval, we do it every hearbeat
546  * interval.
547  */
548  transport->cwnd = max(transport->cwnd/2,
549  4*asoc->pathmtu);
550  break;
551  }
552 
553  transport->partial_bytes_acked = 0;
554  SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: "
555  "%d ssthresh: %d\n", __func__,
556  transport, reason,
557  transport->cwnd, transport->ssthresh);
558 }
559 
560 /* Apply Max.Burst limit to the congestion window:
561  * sctpimpguide-05 2.14.2
562  * D) When the time comes for the sender to
563  * transmit new DATA chunks, the protocol parameter Max.Burst MUST
564  * first be applied to limit how many new DATA chunks may be sent.
565  * The limit is applied by adjusting cwnd as follows:
566  * if ((flightsize+ Max.Burst * MTU) < cwnd)
567  * cwnd = flightsize + Max.Burst * MTU
568  */
569 
571 {
572  struct sctp_association *asoc = t->asoc;
573  u32 old_cwnd = t->cwnd;
574  u32 max_burst_bytes;
575 
576  if (t->burst_limited)
577  return;
578 
579  max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu);
580  if (max_burst_bytes < old_cwnd) {
581  t->cwnd = max_burst_bytes;
582  t->burst_limited = old_cwnd;
583  }
584 }
585 
586 /* Restore the old cwnd congestion window, after the burst had it's
587  * desired effect.
588  */
590 {
591  if (t->burst_limited) {
592  t->cwnd = t->burst_limited;
593  t->burst_limited = 0;
594  }
595 }
596 
597 /* What is the next timeout value for this transport? */
599 {
600  unsigned long timeout;
601  timeout = t->rto + sctp_jitter(t->rto);
602  if ((t->state != SCTP_UNCONFIRMED) &&
603  (t->state != SCTP_PF))
604  timeout += t->hbinterval;
605  timeout += jiffies;
606  return timeout;
607 }
608 
609 /* Reset transport variables to their initial values */
611 {
612  struct sctp_association *asoc = t->asoc;
613 
614  /* RFC 2960 (bis), Section 5.2.4
615  * All the congestion control parameters (e.g., cwnd, ssthresh)
616  * related to this peer MUST be reset to their initial values
617  * (see Section 6.2.1)
618  */
619  t->cwnd = min(4*asoc->pathmtu, max_t(__u32, 2*asoc->pathmtu, 4380));
620  t->burst_limited = 0;
621  t->ssthresh = asoc->peer.i.a_rwnd;
622  t->rto = asoc->rto_initial;
623  t->rtt = 0;
624  t->srtt = 0;
625  t->rttvar = 0;
626 
627  /* Reset these additional varibles so that we have a clean
628  * slate.
629  */
630  t->partial_bytes_acked = 0;
631  t->flight_size = 0;
632  t->error_count = 0;
633  t->rto_pending = 0;
634  t->hb_sent = 0;
635 
636  /* Initialize the state information for SFR-CACC */
637  t->cacc.changeover_active = 0;
638  t->cacc.cycling_changeover = 0;
639  t->cacc.next_tsn_at_change = 0;
640  t->cacc.cacc_saw_newack = 0;
641 }
642 
643 /* Schedule retransmission on the given transport */
645 {
646  /* Stop pending T3_rtx_timer */
647  if (timer_pending(&t->T3_rtx_timer)) {
650  }
651  sctp_retransmit(&t->asoc->outqueue, t, SCTP_RTXR_T3_RTX);
652  if (!timer_pending(&t->T3_rtx_timer)) {
653  if (!mod_timer(&t->T3_rtx_timer, jiffies + t->rto))
655  }
656  return;
657 }