Linux Kernel
3.7.1
Main Page
Related Pages
Modules
Namespaces
Data Structures
Files
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
Pages
net
ipv4
tcp_vegas.c
Go to the documentation of this file.
1
/*
2
* TCP Vegas congestion control
3
*
4
* This is based on the congestion detection/avoidance scheme described in
5
* Lawrence S. Brakmo and Larry L. Peterson.
6
* "TCP Vegas: End to end congestion avoidance on a global internet."
7
* IEEE Journal on Selected Areas in Communication, 13(8):1465--1480,
8
* October 1995. Available from:
9
* ftp://ftp.cs.arizona.edu/xkernel/Papers/jsac.ps
10
*
11
* See http://www.cs.arizona.edu/xkernel/ for their implementation.
12
* The main aspects that distinguish this implementation from the
13
* Arizona Vegas implementation are:
14
* o We do not change the loss detection or recovery mechanisms of
15
* Linux in any way. Linux already recovers from losses quite well,
16
* using fine-grained timers, NewReno, and FACK.
17
* o To avoid the performance penalty imposed by increasing cwnd
18
* only every-other RTT during slow start, we increase during
19
* every RTT during slow start, just like Reno.
20
* o Largely to allow continuous cwnd growth during slow start,
21
* we use the rate at which ACKs come back as the "actual"
22
* rate, rather than the rate at which data is sent.
23
* o To speed convergence to the right rate, we set the cwnd
24
* to achieve the right ("actual") rate when we exit slow start.
25
* o To filter out the noise caused by delayed ACKs, we use the
26
* minimum RTT sample observed during the last RTT to calculate
27
* the actual rate.
28
* o When the sender re-starts from idle, it waits until it has
29
* received ACKs for an entire flight of new data before making
30
* a cwnd adjustment decision. The original Vegas implementation
31
* assumed senders never went idle.
32
*/
33
34
#include <
linux/mm.h
>
35
#include <linux/module.h>
36
#include <
linux/skbuff.h
>
37
#include <linux/inet_diag.h>
38
39
#include <
net/tcp.h
>
40
41
#include "
tcp_vegas.h
"
42
43
static
int
alpha
= 2;
44
static
int
beta = 4;
45
static
int
gamma
= 1;
46
47
module_param
(
alpha
,
int
, 0644);
48
MODULE_PARM_DESC
(
alpha
,
"lower bound of packets in network"
);
49
module_param
(beta,
int
, 0644);
50
MODULE_PARM_DESC
(beta,
"upper bound of packets in network"
);
51
module_param
(
gamma
,
int
, 0644);
52
MODULE_PARM_DESC
(
gamma
,
"limit on increase (scale by 2)"
);
53
54
55
/* There are several situations when we must "re-start" Vegas:
56
*
57
* o when a connection is established
58
* o after an RTO
59
* o after fast recovery
60
* o when we send a packet and there is no outstanding
61
* unacknowledged data (restarting an idle connection)
62
*
63
* In these circumstances we cannot do a Vegas calculation at the
64
* end of the first RTT, because any calculation we do is using
65
* stale info -- both the saved cwnd and congestion feedback are
66
* stale.
67
*
68
* Instead we must wait until the completion of an RTT during
69
* which we actually receive ACKs.
70
*/
71
static
void
vegas_enable(
struct
sock
*
sk
)
72
{
73
const
struct
tcp_sock
*tp = tcp_sk(sk);
74
struct
vegas
*
vegas
= inet_csk_ca(sk);
75
76
/* Begin taking Vegas samples next time we send something. */
77
vegas->
doing_vegas_now
= 1;
78
79
/* Set the beginning of the next send window. */
80
vegas->
beg_snd_nxt
= tp->
snd_nxt
;
81
82
vegas->
cntRTT
= 0;
83
vegas->
minRTT
= 0x7fffffff;
84
}
85
86
/* Stop taking Vegas samples for now. */
87
static
inline
void
vegas_disable(
struct
sock
*
sk
)
88
{
89
struct
vegas
*
vegas
= inet_csk_ca(sk);
90
91
vegas->
doing_vegas_now
= 0;
92
}
93
94
void
tcp_vegas_init
(
struct
sock
*
sk
)
95
{
96
struct
vegas
*
vegas
= inet_csk_ca(sk);
97
98
vegas->
baseRTT
= 0x7fffffff;
99
vegas_enable(sk);
100
}
101
EXPORT_SYMBOL_GPL
(
tcp_vegas_init
);
102
103
/* Do RTT sampling needed for Vegas.
104
* Basically we:
105
* o min-filter RTT samples from within an RTT to get the current
106
* propagation delay + queuing delay (we are min-filtering to try to
107
* avoid the effects of delayed ACKs)
108
* o min-filter RTT samples from a much longer window (forever for now)
109
* to find the propagation delay (baseRTT)
110
*/
111
void
tcp_vegas_pkts_acked
(
struct
sock
*
sk
,
u32
cnt
,
s32
rtt_us)
112
{
113
struct
vegas
*
vegas
= inet_csk_ca(sk);
114
u32
vrtt;
115
116
if
(rtt_us < 0)
117
return
;
118
119
/* Never allow zero rtt or baseRTT */
120
vrtt = rtt_us + 1;
121
122
/* Filter to find propagation delay: */
123
if
(vrtt < vegas->
baseRTT
)
124
vegas->
baseRTT
= vrtt;
125
126
/* Find the min RTT during the last RTT to find
127
* the current prop. delay + queuing delay:
128
*/
129
vegas->
minRTT
=
min
(vegas->
minRTT
, vrtt);
130
vegas->
cntRTT
++;
131
}
132
EXPORT_SYMBOL_GPL
(
tcp_vegas_pkts_acked
);
133
134
void
tcp_vegas_state
(
struct
sock
*
sk
,
u8
ca_state)
135
{
136
137
if
(ca_state ==
TCP_CA_Open
)
138
vegas_enable(sk);
139
else
140
vegas_disable(sk);
141
}
142
EXPORT_SYMBOL_GPL
(
tcp_vegas_state
);
143
144
/*
145
* If the connection is idle and we are restarting,
146
* then we don't want to do any Vegas calculations
147
* until we get fresh RTT samples. So when we
148
* restart, we reset our Vegas state to a clean
149
* slate. After we get acks for this flight of
150
* packets, _then_ we can make Vegas calculations
151
* again.
152
*/
153
void
tcp_vegas_cwnd_event
(
struct
sock
*
sk
,
enum
tcp_ca_event
event
)
154
{
155
if
(event ==
CA_EVENT_CWND_RESTART
||
156
event ==
CA_EVENT_TX_START
)
157
tcp_vegas_init
(sk);
158
}
159
EXPORT_SYMBOL_GPL
(
tcp_vegas_cwnd_event
);
160
161
static
inline
u32
tcp_vegas_ssthresh(
struct
tcp_sock
*tp)
162
{
163
return
min
(tp->
snd_ssthresh
, tp->
snd_cwnd
-1);
164
}
165
166
static
void
tcp_vegas_cong_avoid(
struct
sock
*
sk
,
u32
ack
,
u32
in_flight)
167
{
168
struct
tcp_sock
*tp = tcp_sk(sk);
169
struct
vegas
*
vegas
= inet_csk_ca(sk);
170
171
if
(!vegas->
doing_vegas_now
) {
172
tcp_reno_cong_avoid
(sk, ack, in_flight);
173
return
;
174
}
175
176
if
(
after
(ack, vegas->
beg_snd_nxt
)) {
177
/* Do the Vegas once-per-RTT cwnd adjustment. */
178
179
/* Save the extent of the current window so we can use this
180
* at the end of the next RTT.
181
*/
182
vegas->
beg_snd_nxt
= tp->
snd_nxt
;
183
184
/* We do the Vegas calculations only if we got enough RTT
185
* samples that we can be reasonably sure that we got
186
* at least one RTT sample that wasn't from a delayed ACK.
187
* If we only had 2 samples total,
188
* then that means we're getting only 1 ACK per RTT, which
189
* means they're almost certainly delayed ACKs.
190
* If we have 3 samples, we should be OK.
191
*/
192
193
if
(vegas->
cntRTT
<= 2) {
194
/* We don't have enough RTT samples to do the Vegas
195
* calculation, so we'll behave like Reno.
196
*/
197
tcp_reno_cong_avoid
(sk, ack, in_flight);
198
}
else
{
199
u32
rtt, diff;
200
u64
target_cwnd;
201
202
/* We have enough RTT samples, so, using the Vegas
203
* algorithm, we determine if we should increase or
204
* decrease cwnd, and by how much.
205
*/
206
207
/* Pluck out the RTT we are using for the Vegas
208
* calculations. This is the min RTT seen during the
209
* last RTT. Taking the min filters out the effects
210
* of delayed ACKs, at the cost of noticing congestion
211
* a bit later.
212
*/
213
rtt = vegas->
minRTT
;
214
215
/* Calculate the cwnd we should have, if we weren't
216
* going too fast.
217
*
218
* This is:
219
* (actual rate in segments) * baseRTT
220
*/
221
target_cwnd = tp->
snd_cwnd
* vegas->
baseRTT
/ rtt;
222
223
/* Calculate the difference between the window we had,
224
* and the window we would like to have. This quantity
225
* is the "Diff" from the Arizona Vegas papers.
226
*/
227
diff = tp->
snd_cwnd
* (rtt-vegas->
baseRTT
) / vegas->
baseRTT
;
228
229
if
(diff >
gamma
&& tp->
snd_cwnd
<= tp->
snd_ssthresh
) {
230
/* Going too fast. Time to slow down
231
* and switch to congestion avoidance.
232
*/
233
234
/* Set cwnd to match the actual rate
235
* exactly:
236
* cwnd = (actual rate) * baseRTT
237
* Then we add 1 because the integer
238
* truncation robs us of full link
239
* utilization.
240
*/
241
tp->
snd_cwnd
=
min
(tp->
snd_cwnd
, (
u32
)target_cwnd+1);
242
tp->
snd_ssthresh
= tcp_vegas_ssthresh(tp);
243
244
}
else
if
(tp->
snd_cwnd
<= tp->
snd_ssthresh
) {
245
/* Slow start. */
246
tcp_slow_start
(tp);
247
}
else
{
248
/* Congestion avoidance. */
249
250
/* Figure out where we would like cwnd
251
* to be.
252
*/
253
if
(diff > beta) {
254
/* The old window was too fast, so
255
* we slow down.
256
*/
257
tp->
snd_cwnd
--;
258
tp->
snd_ssthresh
259
= tcp_vegas_ssthresh(tp);
260
}
else
if
(diff <
alpha
) {
261
/* We don't have enough extra packets
262
* in the network, so speed up.
263
*/
264
tp->
snd_cwnd
++;
265
}
else
{
266
/* Sending just as fast as we
267
* should be.
268
*/
269
}
270
}
271
272
if
(tp->
snd_cwnd
< 2)
273
tp->
snd_cwnd
= 2;
274
else
if
(tp->
snd_cwnd
> tp->
snd_cwnd_clamp
)
275
tp->
snd_cwnd
= tp->
snd_cwnd_clamp
;
276
277
tp->
snd_ssthresh
= tcp_current_ssthresh(sk);
278
}
279
280
/* Wipe the slate clean for the next RTT. */
281
vegas->
cntRTT
= 0;
282
vegas->
minRTT
= 0x7fffffff;
283
}
284
/* Use normal slow start */
285
else
if
(tp->
snd_cwnd
<= tp->
snd_ssthresh
)
286
tcp_slow_start
(tp);
287
288
}
289
290
/* Extract info for Tcp socket info provided via netlink. */
291
void
tcp_vegas_get_info
(
struct
sock
*sk,
u32
ext
,
struct
sk_buff
*
skb
)
292
{
293
const
struct
vegas *
ca
= inet_csk_ca(sk);
294
if
(ext & (1 << (
INET_DIAG_VEGASINFO
- 1))) {
295
struct
tcpvegas_info
info = {
296
.
tcpv_enabled
= ca->
doing_vegas_now
,
297
.tcpv_rttcnt = ca->
cntRTT
,
298
.tcpv_rtt = ca->
baseRTT
,
299
.tcpv_minrtt = ca->
minRTT
,
300
};
301
302
nla_put
(skb,
INET_DIAG_VEGASINFO
,
sizeof
(info), &info);
303
}
304
}
305
EXPORT_SYMBOL_GPL
(
tcp_vegas_get_info
);
306
307
static
struct
tcp_congestion_ops
tcp_vegas
__read_mostly
= {
308
.flags =
TCP_CONG_RTT_STAMP
,
309
.init =
tcp_vegas_init
,
310
.ssthresh =
tcp_reno_ssthresh
,
311
.cong_avoid = tcp_vegas_cong_avoid,
312
.min_cwnd =
tcp_reno_min_cwnd
,
313
.pkts_acked =
tcp_vegas_pkts_acked
,
314
.set_state =
tcp_vegas_state
,
315
.cwnd_event =
tcp_vegas_cwnd_event
,
316
.get_info =
tcp_vegas_get_info
,
317
318
.owner =
THIS_MODULE
,
319
.name =
"vegas"
,
320
};
321
322
static
int
__init
tcp_vegas_register(
void
)
323
{
324
BUILD_BUG_ON
(
sizeof
(
struct
vegas) >
ICSK_CA_PRIV_SIZE
);
325
tcp_register_congestion_control
(&tcp_vegas);
326
return
0;
327
}
328
329
static
void
__exit
tcp_vegas_unregister(
void
)
330
{
331
tcp_unregister_congestion_control
(&tcp_vegas);
332
}
333
334
module_init
(tcp_vegas_register);
335
module_exit
(tcp_vegas_unregister);
336
337
MODULE_AUTHOR
(
"Stephen Hemminger"
);
338
MODULE_LICENSE
(
"GPL"
);
339
MODULE_DESCRIPTION
(
"TCP Vegas"
);
Generated on Thu Jan 10 2013 14:58:59 for Linux Kernel by
1.8.2