Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ccid2.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2005, 2006 Andrea Bittau <[email protected]>
3  *
4  * Changes to meet Linux coding standards, and DCCP infrastructure fixes.
5  *
6  * Copyright (c) 2006 Arnaldo Carvalho de Melo <[email protected]>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  */
22 
23 /*
24  * This implementation should follow RFC 4341
25  */
26 #include <linux/slab.h>
27 #include "../feat.h"
28 #include "ccid2.h"
29 
30 
31 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
32 static bool ccid2_debug;
33 #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
34 #else
35 #define ccid2_pr_debug(format, a...)
36 #endif
37 
38 static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
39 {
40  struct ccid2_seq *seqp;
41  int i;
42 
43  /* check if we have space to preserve the pointer to the buffer */
44  if (hc->tx_seqbufc >= (sizeof(hc->tx_seqbuf) /
45  sizeof(struct ccid2_seq *)))
46  return -ENOMEM;
47 
48  /* allocate buffer and initialize linked list */
49  seqp = kmalloc(CCID2_SEQBUF_LEN * sizeof(struct ccid2_seq), gfp_any());
50  if (seqp == NULL)
51  return -ENOMEM;
52 
53  for (i = 0; i < (CCID2_SEQBUF_LEN - 1); i++) {
54  seqp[i].ccid2s_next = &seqp[i + 1];
55  seqp[i + 1].ccid2s_prev = &seqp[i];
56  }
57  seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = seqp;
58  seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
59 
60  /* This is the first allocation. Initiate the head and tail. */
61  if (hc->tx_seqbufc == 0)
62  hc->tx_seqh = hc->tx_seqt = seqp;
63  else {
64  /* link the existing list with the one we just created */
65  hc->tx_seqh->ccid2s_next = seqp;
66  seqp->ccid2s_prev = hc->tx_seqh;
67 
68  hc->tx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
69  seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hc->tx_seqt;
70  }
71 
72  /* store the original pointer to the buffer so we can free it */
73  hc->tx_seqbuf[hc->tx_seqbufc] = seqp;
74  hc->tx_seqbufc++;
75 
76  return 0;
77 }
78 
79 static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
80 {
81  if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
84 }
85 
86 static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
87 {
88  u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2);
89 
90  /*
91  * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
92  * RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always
93  * acceptable since this causes starvation/deadlock whenever cwnd < 2.
94  * The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled).
95  */
96  if (val == 0 || val > max_ratio) {
97  DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
98  val = max_ratio;
99  }
101  min_t(u32, val, DCCPF_ACK_RATIO_MAX));
102 }
103 
104 static void ccid2_check_l_ack_ratio(struct sock *sk)
105 {
106  struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
107 
108  /*
109  * After a loss, idle period, application limited period, or RTO we
110  * need to check that the ack ratio is still less than the congestion
111  * window. Otherwise, we will send an entire congestion window of
112  * packets and got no response because we haven't sent ack ratio
113  * packets yet.
114  * If the ack ratio does need to be reduced, we reduce it to half of
115  * the congestion window (or 1 if that's zero) instead of to the
116  * congestion window. This prevents problems if one ack is lost.
117  */
119  ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U);
120 }
121 
122 static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
123 {
126  DCCPF_SEQ_WMAX));
127 }
128 
129 static void ccid2_hc_tx_rto_expire(unsigned long data)
130 {
131  struct sock *sk = (struct sock *)data;
132  struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
133  const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
134 
135  bh_lock_sock(sk);
136  if (sock_owned_by_user(sk)) {
137  sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + HZ / 5);
138  goto out;
139  }
140 
141  ccid2_pr_debug("RTO_EXPIRE\n");
142 
143  /* back-off timer */
144  hc->tx_rto <<= 1;
145  if (hc->tx_rto > DCCP_RTO_MAX)
146  hc->tx_rto = DCCP_RTO_MAX;
147 
148  /* adjust pipe, cwnd etc */
149  hc->tx_ssthresh = hc->tx_cwnd / 2;
150  if (hc->tx_ssthresh < 2)
151  hc->tx_ssthresh = 2;
152  hc->tx_cwnd = 1;
153  hc->tx_pipe = 0;
154 
155  /* clear state about stuff we sent */
156  hc->tx_seqt = hc->tx_seqh;
157  hc->tx_packets_acked = 0;
158 
159  /* clear ack ratio state. */
160  hc->tx_rpseq = 0;
161  hc->tx_rpdupack = -1;
162  ccid2_change_l_ack_ratio(sk, 1);
163 
164  /* if we were blocked before, we may now send cwnd=1 packet */
165  if (sender_was_blocked)
166  tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
167  /* restart backed-off timer */
168  sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
169 out:
170  bh_unlock_sock(sk);
171  sock_put(sk);
172 }
173 
174 /*
175  * Congestion window validation (RFC 2861).
176  */
177 static bool ccid2_do_cwv = true;
178 module_param(ccid2_do_cwv, bool, 0644);
179 MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation");
180 
188 static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd)
189 {
190  hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4;
191 }
192 
193 /* This borrows the code of tcp_cwnd_application_limited() */
194 static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now)
195 {
196  struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
197  /* don't reduce cwnd below the initial window (IW) */
198  u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache),
199  win_used = max(hc->tx_cwnd_used, init_win);
200 
201  if (win_used < hc->tx_cwnd) {
202  hc->tx_ssthresh = max(hc->tx_ssthresh,
203  (hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2));
204  hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1;
205  }
206  hc->tx_cwnd_used = 0;
207  hc->tx_cwnd_stamp = now;
208 
209  ccid2_check_l_ack_ratio(sk);
210 }
211 
212 /* This borrows the code of tcp_cwnd_restart() */
213 static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
214 {
215  struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
216  u32 cwnd = hc->tx_cwnd, restart_cwnd,
217  iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache);
218 
219  hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2));
220 
221  /* don't reduce cwnd below the initial window (IW) */
222  restart_cwnd = min(cwnd, iwnd);
223  cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto;
224  hc->tx_cwnd = max(cwnd, restart_cwnd);
225 
226  hc->tx_cwnd_stamp = now;
227  hc->tx_cwnd_used = 0;
228 
229  ccid2_check_l_ack_ratio(sk);
230 }
231 
232 static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
233 {
234  struct dccp_sock *dp = dccp_sk(sk);
235  struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
236  const u32 now = ccid2_time_stamp;
237  struct ccid2_seq *next;
238 
239  /* slow-start after idle periods (RFC 2581, RFC 2861) */
240  if (ccid2_do_cwv && !hc->tx_pipe &&
241  (s32)(now - hc->tx_lsndtime) >= hc->tx_rto)
242  ccid2_cwnd_restart(sk, now);
243 
244  hc->tx_lsndtime = now;
245  hc->tx_pipe += 1;
246 
247  /* see whether cwnd was fully used (RFC 2861), update expected window */
248  if (ccid2_cwnd_network_limited(hc)) {
249  ccid2_update_used_window(hc, hc->tx_cwnd);
250  hc->tx_cwnd_used = 0;
251  hc->tx_cwnd_stamp = now;
252  } else {
253  if (hc->tx_pipe > hc->tx_cwnd_used)
254  hc->tx_cwnd_used = hc->tx_pipe;
255 
256  ccid2_update_used_window(hc, hc->tx_cwnd_used);
257 
258  if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto)
259  ccid2_cwnd_application_limited(sk, now);
260  }
261 
262  hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
263  hc->tx_seqh->ccid2s_acked = 0;
264  hc->tx_seqh->ccid2s_sent = now;
265 
266  next = hc->tx_seqh->ccid2s_next;
267  /* check if we need to alloc more space */
268  if (next == hc->tx_seqt) {
269  if (ccid2_hc_tx_alloc_seq(hc)) {
270  DCCP_CRIT("packet history - out of memory!");
271  /* FIXME: find a more graceful way to bail out */
272  return;
273  }
274  next = hc->tx_seqh->ccid2s_next;
275  BUG_ON(next == hc->tx_seqt);
276  }
277  hc->tx_seqh = next;
278 
279  ccid2_pr_debug("cwnd=%d pipe=%d\n", hc->tx_cwnd, hc->tx_pipe);
280 
281  /*
282  * FIXME: The code below is broken and the variables have been removed
283  * from the socket struct. The `ackloss' variable was always set to 0,
284  * and with arsent there are several problems:
285  * (i) it doesn't just count the number of Acks, but all sent packets;
286  * (ii) it is expressed in # of packets, not # of windows, so the
287  * comparison below uses the wrong formula: Appendix A of RFC 4341
288  * comes up with the number K = cwnd / (R^2 - R) of consecutive windows
289  * of data with no lost or marked Ack packets. If arsent were the # of
290  * consecutive Acks received without loss, then Ack Ratio needs to be
291  * decreased by 1 when
292  * arsent >= K * cwnd / R = cwnd^2 / (R^3 - R^2)
293  * where cwnd / R is the number of Acks received per window of data
294  * (cf. RFC 4341, App. A). The problems are that
295  * - arsent counts other packets as well;
296  * - the comparison uses a formula different from RFC 4341;
297  * - computing a cubic/quadratic equation each time is too complicated.
298  * Hence a different algorithm is needed.
299  */
300 #if 0
301  /* Ack Ratio. Need to maintain a concept of how many windows we sent */
302  hc->tx_arsent++;
303  /* We had an ack loss in this window... */
304  if (hc->tx_ackloss) {
305  if (hc->tx_arsent >= hc->tx_cwnd) {
306  hc->tx_arsent = 0;
307  hc->tx_ackloss = 0;
308  }
309  } else {
310  /* No acks lost up to now... */
311  /* decrease ack ratio if enough packets were sent */
312  if (dp->dccps_l_ack_ratio > 1) {
313  /* XXX don't calculate denominator each time */
314  int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
315  dp->dccps_l_ack_ratio;
316 
317  denom = hc->tx_cwnd * hc->tx_cwnd / denom;
318 
319  if (hc->tx_arsent >= denom) {
320  ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
321  hc->tx_arsent = 0;
322  }
323  } else {
324  /* we can't increase ack ratio further [1] */
325  hc->tx_arsent = 0; /* or maybe set it to cwnd*/
326  }
327  }
328 #endif
329 
330  sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
331 
332 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
333  do {
334  struct ccid2_seq *seqp = hc->tx_seqt;
335 
336  while (seqp != hc->tx_seqh) {
337  ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
338  (unsigned long long)seqp->ccid2s_seq,
339  seqp->ccid2s_acked, seqp->ccid2s_sent);
340  seqp = seqp->ccid2s_next;
341  }
342  } while (0);
343  ccid2_pr_debug("=========\n");
344 #endif
345 }
346 
356 static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
357 {
358  struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
359  long m = mrtt ? : 1;
360 
361  if (hc->tx_srtt == 0) {
362  /* First measurement m */
363  hc->tx_srtt = m << 3;
364  hc->tx_mdev = m << 1;
365 
366  hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
367  hc->tx_rttvar = hc->tx_mdev_max;
368 
369  hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
370  } else {
371  /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
372  m -= (hc->tx_srtt >> 3);
373  hc->tx_srtt += m;
374 
375  /* Similarly, update scaled mdev with regard to |m| */
376  if (m < 0) {
377  m = -m;
378  m -= (hc->tx_mdev >> 2);
379  /*
380  * This neutralises RTO increase when RTT < SRTT - mdev
381  * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
382  * in Linux TCP", USENIX 2002, pp. 49-62).
383  */
384  if (m > 0)
385  m >>= 3;
386  } else {
387  m -= (hc->tx_mdev >> 2);
388  }
389  hc->tx_mdev += m;
390 
391  if (hc->tx_mdev > hc->tx_mdev_max) {
392  hc->tx_mdev_max = hc->tx_mdev;
393  if (hc->tx_mdev_max > hc->tx_rttvar)
394  hc->tx_rttvar = hc->tx_mdev_max;
395  }
396 
397  /*
398  * Decay RTTVAR at most once per flight, exploiting that
399  * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
400  * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
401  * GAR is a useful bound for FlightSize = pipe.
402  * AWL is probably too low here, as it over-estimates pipe.
403  */
404  if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
405  if (hc->tx_mdev_max < hc->tx_rttvar)
406  hc->tx_rttvar -= (hc->tx_rttvar -
407  hc->tx_mdev_max) >> 2;
408  hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
409  hc->tx_mdev_max = tcp_rto_min(sk);
410  }
411  }
412 
413  /*
414  * Set RTO from SRTT and RTTVAR
415  * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
416  * This agrees with RFC 4341, 5:
417  * "Because DCCP does not retransmit data, DCCP does not require
418  * TCP's recommended minimum timeout of one second".
419  */
420  hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
421 
422  if (hc->tx_rto > DCCP_RTO_MAX)
423  hc->tx_rto = DCCP_RTO_MAX;
424 }
425 
426 static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
427  unsigned int *maxincr)
428 {
429  struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
430  struct dccp_sock *dp = dccp_sk(sk);
431  int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio;
432 
433  if (hc->tx_cwnd < dp->dccps_l_seq_win &&
434  r_seq_used < dp->dccps_r_seq_win) {
435  if (hc->tx_cwnd < hc->tx_ssthresh) {
436  if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) {
437  hc->tx_cwnd += 1;
438  *maxincr -= 1;
439  hc->tx_packets_acked = 0;
440  }
441  } else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
442  hc->tx_cwnd += 1;
443  hc->tx_packets_acked = 0;
444  }
445  }
446 
447  /*
448  * Adjust the local sequence window and the ack ratio to allow about
449  * 5 times the number of packets in the network (RFC 4340 7.5.2)
450  */
451  if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win)
452  ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2);
453  else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2)
454  ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U);
455 
457  ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2);
458  else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2)
459  ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2);
460 
461  /*
462  * FIXME: RTT is sampled several times per acknowledgment (for each
463  * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
464  * This causes the RTT to be over-estimated, since the older entries
465  * in the Ack Vector have earlier sending times.
466  * The cleanest solution is to not use the ccid2s_sent field at all
467  * and instead use DCCP timestamps: requires changes in other places.
468  */
469  ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
470 }
471 
472 static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
473 {
474  struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
475 
476  if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
477  ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
478  return;
479  }
480 
482 
483  hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
484  hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
485 
486  ccid2_check_l_ack_ratio(sk);
487 }
488 
489 static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
490  u8 option, u8 *optval, u8 optlen)
491 {
492  struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
493 
494  switch (option) {
495  case DCCPO_ACK_VECTOR_0:
496  case DCCPO_ACK_VECTOR_1:
497  return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen,
498  option - DCCPO_ACK_VECTOR_0);
499  }
500  return 0;
501 }
502 
503 static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
504 {
505  struct dccp_sock *dp = dccp_sk(sk);
506  struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
507  const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
508  struct dccp_ackvec_parsed *avp;
509  u64 ackno, seqno;
510  struct ccid2_seq *seqp;
511  int done = 0;
512  unsigned int maxincr = 0;
513 
514  /* check reverse path congestion */
515  seqno = DCCP_SKB_CB(skb)->dccpd_seq;
516 
517  /* XXX this whole "algorithm" is broken. Need to fix it to keep track
518  * of the seqnos of the dupacks so that rpseq and rpdupack are correct
519  * -sorbo.
520  */
521  /* need to bootstrap */
522  if (hc->tx_rpdupack == -1) {
523  hc->tx_rpdupack = 0;
524  hc->tx_rpseq = seqno;
525  } else {
526  /* check if packet is consecutive */
527  if (dccp_delta_seqno(hc->tx_rpseq, seqno) == 1)
528  hc->tx_rpseq = seqno;
529  /* it's a later packet */
530  else if (after48(seqno, hc->tx_rpseq)) {
531  hc->tx_rpdupack++;
532 
533  /* check if we got enough dupacks */
534  if (hc->tx_rpdupack >= NUMDUPACK) {
535  hc->tx_rpdupack = -1; /* XXX lame */
536  hc->tx_rpseq = 0;
537 #ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__
538  /*
539  * FIXME: Ack Congestion Control is broken; in
540  * the current state instabilities occurred with
541  * Ack Ratios greater than 1; causing hang-ups
542  * and long RTO timeouts. This needs to be fixed
543  * before opening up dynamic changes. -- gerrit
544  */
545  ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
546 #endif
547  }
548  }
549  }
550 
551  /* check forward path congestion */
552  if (dccp_packet_without_ack(skb))
553  return;
554 
555  /* still didn't send out new data packets */
556  if (hc->tx_seqh == hc->tx_seqt)
557  goto done;
558 
559  ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
560  if (after48(ackno, hc->tx_high_ack))
561  hc->tx_high_ack = ackno;
562 
563  seqp = hc->tx_seqt;
564  while (before48(seqp->ccid2s_seq, ackno)) {
565  seqp = seqp->ccid2s_next;
566  if (seqp == hc->tx_seqh) {
567  seqp = hc->tx_seqh->ccid2s_prev;
568  break;
569  }
570  }
571 
572  /*
573  * In slow-start, cwnd can increase up to a maximum of Ack Ratio/2
574  * packets per acknowledgement. Rounding up avoids that cwnd is not
575  * advanced when Ack Ratio is 1 and gives a slight edge otherwise.
576  */
577  if (hc->tx_cwnd < hc->tx_ssthresh)
578  maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
579 
580  /* go through all ack vectors */
582  /* go through this ack vector */
583  for (; avp->len--; avp->vec++) {
584  u64 ackno_end_rl = SUB48(ackno,
585  dccp_ackvec_runlen(avp->vec));
586 
587  ccid2_pr_debug("ackvec %llu |%u,%u|\n",
588  (unsigned long long)ackno,
589  dccp_ackvec_state(avp->vec) >> 6,
590  dccp_ackvec_runlen(avp->vec));
591  /* if the seqno we are analyzing is larger than the
592  * current ackno, then move towards the tail of our
593  * seqnos.
594  */
595  while (after48(seqp->ccid2s_seq, ackno)) {
596  if (seqp == hc->tx_seqt) {
597  done = 1;
598  break;
599  }
600  seqp = seqp->ccid2s_prev;
601  }
602  if (done)
603  break;
604 
605  /* check all seqnos in the range of the vector
606  * run length
607  */
608  while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
609  const u8 state = dccp_ackvec_state(avp->vec);
610 
611  /* new packet received or marked */
612  if (state != DCCPAV_NOT_RECEIVED &&
613  !seqp->ccid2s_acked) {
614  if (state == DCCPAV_ECN_MARKED)
615  ccid2_congestion_event(sk,
616  seqp);
617  else
618  ccid2_new_ack(sk, seqp,
619  &maxincr);
620 
621  seqp->ccid2s_acked = 1;
622  ccid2_pr_debug("Got ack for %llu\n",
623  (unsigned long long)seqp->ccid2s_seq);
624  hc->tx_pipe--;
625  }
626  if (seqp == hc->tx_seqt) {
627  done = 1;
628  break;
629  }
630  seqp = seqp->ccid2s_prev;
631  }
632  if (done)
633  break;
634 
635  ackno = SUB48(ackno_end_rl, 1);
636  }
637  if (done)
638  break;
639  }
640 
641  /* The state about what is acked should be correct now
642  * Check for NUMDUPACK
643  */
644  seqp = hc->tx_seqt;
645  while (before48(seqp->ccid2s_seq, hc->tx_high_ack)) {
646  seqp = seqp->ccid2s_next;
647  if (seqp == hc->tx_seqh) {
648  seqp = hc->tx_seqh->ccid2s_prev;
649  break;
650  }
651  }
652  done = 0;
653  while (1) {
654  if (seqp->ccid2s_acked) {
655  done++;
656  if (done == NUMDUPACK)
657  break;
658  }
659  if (seqp == hc->tx_seqt)
660  break;
661  seqp = seqp->ccid2s_prev;
662  }
663 
664  /* If there are at least 3 acknowledgements, anything unacknowledged
665  * below the last sequence number is considered lost
666  */
667  if (done == NUMDUPACK) {
668  struct ccid2_seq *last_acked = seqp;
669 
670  /* check for lost packets */
671  while (1) {
672  if (!seqp->ccid2s_acked) {
673  ccid2_pr_debug("Packet lost: %llu\n",
674  (unsigned long long)seqp->ccid2s_seq);
675  /* XXX need to traverse from tail -> head in
676  * order to detect multiple congestion events in
677  * one ack vector.
678  */
679  ccid2_congestion_event(sk, seqp);
680  hc->tx_pipe--;
681  }
682  if (seqp == hc->tx_seqt)
683  break;
684  seqp = seqp->ccid2s_prev;
685  }
686 
687  hc->tx_seqt = last_acked;
688  }
689 
690  /* trim acked packets in tail */
691  while (hc->tx_seqt != hc->tx_seqh) {
692  if (!hc->tx_seqt->ccid2s_acked)
693  break;
694 
695  hc->tx_seqt = hc->tx_seqt->ccid2s_next;
696  }
697 
698  /* restart RTO timer if not all outstanding data has been acked */
699  if (hc->tx_pipe == 0)
700  sk_stop_timer(sk, &hc->tx_rtotimer);
701  else
702  sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
703 done:
704  /* check if incoming Acks allow pending packets to be sent */
705  if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
706  tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
708 }
709 
710 static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
711 {
712  struct ccid2_hc_tx_sock *hc = ccid_priv(ccid);
713  struct dccp_sock *dp = dccp_sk(sk);
714  u32 max_ratio;
715 
716  /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
717  hc->tx_ssthresh = ~0U;
718 
719  /* Use larger initial windows (RFC 4341, section 5). */
720  hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
721  hc->tx_expected_wnd = hc->tx_cwnd;
722 
723  /* Make sure that Ack Ratio is enabled and within bounds. */
724  max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
725  if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
726  dp->dccps_l_ack_ratio = max_ratio;
727 
728  /* XXX init ~ to window size... */
729  if (ccid2_hc_tx_alloc_seq(hc))
730  return -ENOMEM;
731 
733  hc->tx_rpdupack = -1;
735  hc->tx_cwnd_used = 0;
736  setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
737  (unsigned long)sk);
738  INIT_LIST_HEAD(&hc->tx_av_chunks);
739  return 0;
740 }
741 
742 static void ccid2_hc_tx_exit(struct sock *sk)
743 {
744  struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
745  int i;
746 
747  sk_stop_timer(sk, &hc->tx_rtotimer);
748 
749  for (i = 0; i < hc->tx_seqbufc; i++)
750  kfree(hc->tx_seqbuf[i]);
751  hc->tx_seqbufc = 0;
752 }
753 
754 static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
755 {
756  struct ccid2_hc_rx_sock *hc = ccid2_hc_rx_sk(sk);
757 
758  if (!dccp_data_packet(skb))
759  return;
760 
761  if (++hc->rx_num_data_pkts >= dccp_sk(sk)->dccps_r_ack_ratio) {
762  dccp_send_ack(sk);
763  hc->rx_num_data_pkts = 0;
764  }
765 }
766 
768  .ccid_id = DCCPC_CCID2,
769  .ccid_name = "TCP-like",
770  .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
771  .ccid_hc_tx_init = ccid2_hc_tx_init,
772  .ccid_hc_tx_exit = ccid2_hc_tx_exit,
773  .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
774  .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
775  .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
776  .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
777  .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
778  .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
779 };
780 
781 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
782 module_param(ccid2_debug, bool, 0644);
783 MODULE_PARM_DESC(ccid2_debug, "Enable CCID-2 debug messages");
784 #endif