64 #define pr_fmt(fmt) "TCP: " fmt
67 #include <linux/slab.h>
68 #include <linux/module.h>
69 #include <linux/sysctl.h>
70 #include <linux/kernel.h>
75 #include <asm/unaligned.h>
106 #define FLAG_DATA 0x01
107 #define FLAG_WIN_UPDATE 0x02
108 #define FLAG_DATA_ACKED 0x04
109 #define FLAG_RETRANS_DATA_ACKED 0x08
110 #define FLAG_SYN_ACKED 0x10
111 #define FLAG_DATA_SACKED 0x20
112 #define FLAG_ECE 0x40
113 #define FLAG_SLOWPATH 0x100
114 #define FLAG_ONLY_ORIG_SACKED 0x200
115 #define FLAG_SND_UNA_ADVANCED 0x400
116 #define FLAG_DSACKING_ACK 0x800
117 #define FLAG_NONHEAD_RETRANS_ACKED 0x1000
118 #define FLAG_SACK_RENEGING 0x2000
120 #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
121 #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
122 #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
123 #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
124 #define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
126 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
127 #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
135 const unsigned int lss = icsk->
icsk_ack.last_seg_size;
143 len = skb_shinfo(skb)->gso_size ? : skb->
len;
144 if (len >= icsk->
icsk_ack.rcv_mss) {
152 len += skb->
data - skb_transport_header(skb);
165 len -= tcp_sk(sk)->tcp_header_len;
178 static void tcp_incr_quickack(
struct sock *
sk)
181 unsigned int quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->
icsk_ack.rcv_mss);
185 if (quickacks > icsk->
icsk_ack.quick)
189 static void tcp_enter_quickack_mode(
struct sock *sk)
192 tcp_incr_quickack(sk);
201 static inline bool tcp_in_quickack_mode(
const struct sock *sk)
208 static inline void TCP_ECN_queue_cwr(
struct tcp_sock *tp)
216 if (tcp_hdr(skb)->cwr)
220 static inline void TCP_ECN_withdraw_cwr(
struct tcp_sock *tp)
225 static inline void TCP_ECN_check_ce(
struct tcp_sock *tp,
const struct sk_buff *skb)
237 tcp_enter_quickack_mode((
struct sock *)tp);
242 tcp_enter_quickack_mode((
struct sock *)tp);
251 static inline void TCP_ECN_rcv_synack(
struct tcp_sock *tp,
const struct tcphdr *
th)
257 static inline void TCP_ECN_rcv_syn(
struct tcp_sock *tp,
const struct tcphdr *th)
263 static bool TCP_ECN_rcv_ecn_echo(
const struct tcp_sock *tp,
const struct tcphdr *th)
275 static void tcp_fixup_sndbuf(
struct sock *sk)
310 static int __tcp_grow_window(
const struct sock *sk,
const struct sk_buff *skb)
314 int truesize = tcp_win_from_space(skb->
truesize) >> 1;
318 if (truesize <= skb->
len)
319 return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
327 static void tcp_grow_window(
struct sock *sk,
const struct sk_buff *skb)
334 !sk_under_memory_pressure(sk)) {
343 incr = __tcp_grow_window(sk, skb);
346 incr =
max_t(
int, incr, 2 * skb->
len);
349 inet_csk(sk)->icsk_ack.quick |= 1;
356 static void tcp_fixup_rcvbuf(
struct sock *sk)
358 u32 mss = tcp_sk(sk)->advmss;
369 while (tcp_win_from_space(rcvmem) < mss)
387 tcp_fixup_rcvbuf(sk);
389 tcp_fixup_sndbuf(sk);
393 maxwin = tcp_full_space(sk);
415 static void tcp_clamp_window(
struct sock *sk)
424 !sk_under_memory_pressure(sk) &&
425 sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
442 const struct tcp_sock *tp = tcp_sk(sk);
449 inet_csk(sk)->icsk_ack.rcv_mss = hint;
472 if (new_sample != 0) {
484 m -= (new_sample >> 3);
500 static inline void tcp_rcv_rtt_measure(
struct tcp_sock *tp)
513 static inline void tcp_rcv_rtt_measure_ts(
struct sock *sk,
517 if (tp->
rx_opt.rcv_tsecr &&
551 int new_clamp =
space;
561 while (tcp_win_from_space(rcvmem) < tp->
advmss)
589 static void tcp_event_data_recv(
struct sock *sk,
struct sk_buff *skb)
595 inet_csk_schedule_ack(sk);
597 tcp_measure_rcv_mss(sk, skb);
599 tcp_rcv_rtt_measure(tp);
607 tcp_incr_quickack(sk);
610 int m = now - icsk->
icsk_ack.lrcvtime;
615 }
else if (m < icsk->
icsk_ack.ato) {
623 tcp_incr_quickack(sk);
629 TCP_ECN_check_ce(tp, skb);
632 tcp_grow_window(sk, skb);
644 static void tcp_rtt_estimator(
struct sock *sk,
const __u32 mrtt)
668 m -= (tp->
srtt >> 3);
672 m -= (tp->
mdev >> 2);
684 m -= (tp->
mdev >> 2);
712 const struct tcp_sock *tp = tcp_sk(sk);
723 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
759 static void tcp_dsack_seen(
struct tcp_sock *tp)
764 static void tcp_update_reordering(
struct sock *sk,
const int metric,
776 else if (tcp_is_reno(tp))
778 else if (tcp_is_fack(tp))
784 #if FASTRETRANS_DEBUG > 1
785 pr_debug(
"Disorder%d %d %u f%u s%u rr%d\n",
786 tp->
rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
796 tcp_disable_early_retrans(tp);
800 static void tcp_verify_retransmit_hint(
struct tcp_sock *tp,
struct sk_buff *skb)
812 static void tcp_skb_mark_lost(
struct tcp_sock *tp,
struct sk_buff *skb)
815 tcp_verify_retransmit_hint(tp, skb);
817 tp->
lost_out += tcp_skb_pcount(skb);
822 static void tcp_skb_mark_lost_uncond_verify(
struct tcp_sock *tp,
825 tcp_verify_retransmit_hint(tp, skb);
828 tp->
lost_out += tcp_skb_pcount(skb);
927 static bool tcp_is_sackblock_valid(
struct tcp_sock *tp,
bool is_dsack,
928 u32 start_seq,
u32 end_seq)
931 if (
after(end_seq, tp->
snd_nxt) || !before(start_seq, end_seq))
935 if (!before(start_seq, tp->
snd_nxt))
961 return !before(start_seq, end_seq - tp->
max_window);
973 static void tcp_mark_lost_retrans(
struct sock *sk)
980 u32 received_upto = tcp_highest_sack_seq(tp);
990 if (skb == tcp_send_head(sk))
1011 if (
after(received_upto, ack_seq)) {
1012 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1015 tcp_skb_mark_lost_uncond_verify(tp, skb);
1018 if (before(ack_seq, new_low_seq))
1019 new_low_seq = ack_seq;
1020 cnt += tcp_skb_pcount(skb);
1028 static bool tcp_check_dsack(
struct sock *sk,
const struct sk_buff *ack_skb,
1035 bool dup_sack =
false;
1037 if (before(start_seq_0,
TCP_SKB_CB(ack_skb)->ack_seq)) {
1041 }
else if (num_sacks > 1) {
1045 if (!
after(end_seq_0, end_seq_1) &&
1046 !before(start_seq_0, start_seq_1)) {
1056 !
after(end_seq_0, prior_snd_una) &&
1077 static int tcp_match_skb_to_sack(
struct sock *sk,
struct sk_buff *skb,
1078 u32 start_seq,
u32 end_seq)
1088 if (tcp_skb_pcount(skb) > 1 && !in_sack &&
1090 mss = tcp_skb_mss(skb);
1106 if (pkt_len > mss) {
1107 unsigned int new_len = (pkt_len /
mss) * mss;
1108 if (!in_sack && new_len < pkt_len) {
1110 if (new_len > skb->
len)
1124 static u8 tcp_sacktag_one(
struct sock *sk,
1126 u32 start_seq,
u32 end_seq,
1127 bool dup_sack,
int pcount)
1146 if (sacked & TCPCB_SACKED_RETRANS) {
1157 if (!(sacked & TCPCB_RETRANS)) {
1161 if (before(start_seq,
1162 tcp_highest_sack_seq(tp)))
1172 sacked &= ~TCPCB_LOST;
1181 fack_count += pcount;
1196 if (dup_sack && (sacked & TCPCB_SACKED_RETRANS)) {
1197 sacked &= ~TCPCB_SACKED_RETRANS;
1207 static bool tcp_shifted_skb(
struct sock *sk,
struct sk_buff *skb,
1209 unsigned int pcount,
int shifted,
int mss,
1213 struct sk_buff *
prev = tcp_write_queue_prev(sk, skb);
1215 u32 end_seq = start_seq + shifted;
1225 tcp_sacktag_one(sk, state,
TCP_SKB_CB(skb)->sacked,
1226 start_seq, end_seq, dup_sack, pcount);
1234 skb_shinfo(prev)->gso_segs += pcount;
1235 BUG_ON(skb_shinfo(skb)->gso_segs < pcount);
1236 skb_shinfo(skb)->gso_segs -= pcount;
1243 if (!skb_shinfo(prev)->gso_size) {
1244 skb_shinfo(prev)->gso_size =
mss;
1249 if (skb_shinfo(skb)->gso_segs <= 1) {
1250 skb_shinfo(skb)->gso_size = 0;
1251 skb_shinfo(skb)->gso_type = 0;
1258 BUG_ON(!tcp_skb_pcount(skb));
1275 if (skb == tcp_highest_sack(sk))
1276 tcp_advance_highest_sack(sk, skb);
1278 tcp_unlink_write_queue(skb, sk);
1279 sk_wmem_free_skb(sk, skb);
1289 static int tcp_skb_seglen(
const struct sk_buff *skb)
1291 return tcp_skb_pcount(skb) == 1 ? skb->
len : tcp_skb_mss(skb);
1295 static int skb_can_shift(
const struct sk_buff *skb)
1297 return !skb_headlen(skb) && skb_is_nonlinear(skb);
1305 u32 start_seq,
u32 end_seq,
1315 if (!sk_can_gso(sk))
1322 if (!skb_can_shift(skb))
1329 if (
unlikely(skb == tcp_write_queue_head(sk)))
1331 prev = tcp_write_queue_prev(sk, skb);
1341 pcount = tcp_skb_pcount(skb);
1342 mss = tcp_skb_seglen(skb);
1347 if (mss != tcp_skb_seglen(prev))
1356 if (tcp_skb_pcount(skb) <= 1)
1383 mss = tcp_skb_mss(skb);
1388 if (mss != tcp_skb_seglen(prev))
1393 }
else if (len < mss) {
1407 if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
1413 if (prev == tcp_write_queue_tail(sk))
1415 skb = tcp_write_queue_next(sk, prev);
1417 if (!skb_can_shift(skb) ||
1418 (skb == tcp_send_head(sk)) ||
1419 ((
TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
1420 (mss != tcp_skb_seglen(skb)))
1425 pcount += tcp_skb_pcount(skb);
1426 tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
1444 u32 start_seq,
u32 end_seq,
1452 bool dup_sack = dup_sack_in;
1454 if (skb == tcp_send_head(sk))
1461 if ((next_dup !=
NULL) &&
1463 in_sack = tcp_match_skb_to_sack(sk, skb,
1475 tmp = tcp_shift_skb_data(sk, skb, state,
1476 start_seq, end_seq, dup_sack);
1485 in_sack = tcp_match_skb_to_sack(sk, skb,
1502 tcp_skb_pcount(skb));
1505 tcp_highest_sack_seq(tp)))
1506 tcp_advance_highest_sack(sk, skb);
1522 if (skb == tcp_send_head(sk))
1533 static struct sk_buff *tcp_maybe_skipping_dsack(
struct sk_buff *skb,
1539 if (next_dup ==
NULL)
1542 if (before(next_dup->
start_seq, skip_to_seq)) {
1543 skb = tcp_sacktag_skip(skb, sk, state, next_dup->
start_seq);
1544 skb = tcp_sacktag_walk(skb, sk,
NULL, state,
1558 tcp_sacktag_write_queue(
struct sock *sk,
const struct sk_buff *ack_skb,
1563 const unsigned char *
ptr = (skb_transport_header(ack_skb) +
1572 bool found_dup_sack =
false;
1574 int first_sack_index;
1582 tcp_highest_sack_reset(sk);
1585 found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
1586 num_sacks, prior_snd_una);
1601 first_sack_index = 0;
1602 for (i = 0; i < num_sacks; i++) {
1603 bool dup_sack = !i && found_dup_sack;
1608 if (!tcp_is_sackblock_valid(tp, dup_sack,
1609 sp[used_sacks].start_seq,
1610 sp[used_sacks].end_seq)) {
1628 first_sack_index = -1;
1633 if (!
after(sp[used_sacks].end_seq, prior_snd_una))
1640 for (i = used_sacks - 1; i > 0; i--) {
1641 for (j = 0; j <
i; j++) {
1642 if (
after(sp[j].start_seq, sp[j + 1].start_seq)) {
1643 swap(sp[j], sp[j + 1]);
1646 if (j == first_sack_index)
1647 first_sack_index = j + 1;
1652 skb = tcp_write_queue_head(sk);
1662 while (tcp_sack_cache_ok(tp, cache) && !cache->
start_seq &&
1667 while (i < used_sacks) {
1670 bool dup_sack = (found_dup_sack && (i == first_sack_index));
1673 if (found_dup_sack && ((i + 1) == first_sack_index))
1674 next_dup = &sp[i + 1];
1677 while (tcp_sack_cache_ok(tp, cache) &&
1678 !before(start_seq, cache->
end_seq))
1682 if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
1686 if (before(start_seq, cache->
start_seq)) {
1687 skb = tcp_sacktag_skip(skb, sk, &state,
1689 skb = tcp_sacktag_walk(skb, sk, next_dup,
1700 skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
1705 if (tcp_highest_sack_seq(tp) == cache->
end_seq) {
1707 skb = tcp_highest_sack(sk);
1715 skb = tcp_sacktag_skip(skb, sk, &state, cache->
end_seq);
1721 if (!before(start_seq, tcp_highest_sack_seq(tp))) {
1722 skb = tcp_highest_sack(sk);
1727 skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
1730 skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
1731 start_seq, end_seq, dup_sack);
1748 for (j = 0; j < used_sacks; j++)
1751 tcp_mark_lost_retrans(sk);
1762 #if FASTRETRANS_DEBUG > 0
1766 WARN_ON((
int)tcp_packets_in_flight(tp) < 0);
1774 static bool tcp_limit_reno_sacked(
struct tcp_sock *tp)
1792 static void tcp_check_reno_reordering(
struct sock *sk,
const int addend)
1795 if (tcp_limit_reno_sacked(tp))
1796 tcp_update_reordering(sk, tp->
packets_out + addend, 0);
1801 static void tcp_add_reno_sack(
struct sock *sk)
1805 tcp_check_reno_reordering(sk, 0);
1811 static void tcp_remove_reno_sacks(
struct sock *sk,
int acked)
1822 tcp_check_reno_reordering(sk, acked);
1826 static inline void tcp_reset_reno_sack(
struct tcp_sock *tp)
1831 static int tcp_is_sackfrto(
const struct tcp_sock *tp)
1833 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
1841 const struct tcp_sock *tp = tcp_sk(sk);
1852 if (tcp_is_sackfrto(tp))
1859 skb = tcp_write_queue_head(sk);
1860 if (tcp_skb_is_last(sk, skb))
1862 skb = tcp_write_queue_next(sk, skb);
1864 if (skb == tcp_send_head(sk))
1869 if (!(
TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1929 skb = tcp_write_queue_head(sk);
1932 if (
TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1933 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1960 static void tcp_enter_frto_loss(
struct sock *sk,
int allowed_segments,
int flag)
1967 if (tcp_is_reno(tp))
1968 tcp_reset_reno_sack(tp);
1971 if (skb == tcp_send_head(sk))
1981 if (
TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1988 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2000 if (!(
TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2002 tp->
lost_out += tcp_skb_pcount(skb);
2008 tp->
snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2015 sysctl_tcp_reordering);
2018 TCP_ECN_queue_cwr(tp);
2020 tcp_clear_all_retrans_hints(tp);
2023 static void tcp_clear_retrans_partial(
struct tcp_sock *tp)
2034 tcp_clear_retrans_partial(tp);
2062 tcp_clear_retrans_partial(tp);
2064 if (tcp_is_reno(tp))
2065 tcp_reset_reno_sack(tp);
2075 tcp_clear_all_retrans_hints(tp);
2078 if (skb == tcp_send_head(sk))
2083 TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
2084 if (!(
TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
2085 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
2087 tp->
lost_out += tcp_skb_pcount(skb);
2097 TCP_ECN_queue_cwr(tp);
2108 static bool tcp_check_sack_reneging(
struct sock *sk,
int flag)
2124 static inline int tcp_fackets_out(
const struct tcp_sock *tp)
2144 static inline int tcp_dupack_heuristics(
const struct tcp_sock *tp)
2149 static bool tcp_pause_early_retransmit(
struct sock *sk,
int flag)
2152 unsigned long delay;
2158 if (sysctl_tcp_early_retrans < 2 || (flag &
FLAG_ECE) || !tp->
srtt)
2162 if (!
time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2170 static inline int tcp_skb_timedout(
const struct sock *sk,
2176 static inline int tcp_head_timedout(
const struct sock *sk)
2178 const struct tcp_sock *tp = tcp_sk(sk);
2181 tcp_skb_timedout(sk, tcp_write_queue_head(sk));
2277 static bool tcp_time_to_recover(
struct sock *sk,
int flag)
2291 if (tcp_dupack_heuristics(tp) > tp->
reordering)
2297 if (tcp_is_fack(tp) && tcp_head_timedout(sk))
2318 if ((tp->
thin_dupack || sysctl_tcp_thin_dupack) &&
2319 tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 &&
2320 tcp_is_sack(tp) && !tcp_send_head(sk))
2331 return !tcp_pause_early_retransmit(sk, flag);
2348 static void tcp_timeout_skbs(
struct sock *sk)
2353 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2358 skb = tcp_write_queue_head(sk);
2361 if (skb == tcp_send_head(sk))
2363 if (!tcp_skb_timedout(sk, skb))
2366 tcp_skb_mark_lost(tp, skb);
2380 static void tcp_mark_head_lost(
struct sock *sk,
int packets,
int mark_head)
2395 if (mark_head && skb != tcp_write_queue_head(sk))
2398 skb = tcp_write_queue_head(sk);
2403 if (skb == tcp_send_head(sk))
2414 if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
2415 (
TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
2416 cnt += tcp_skb_pcount(skb);
2418 if (cnt > packets) {
2419 if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
2420 (
TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
2421 (oldcnt >= packets))
2424 mss = skb_shinfo(skb)->gso_size;
2425 err =
tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
2431 tcp_skb_mark_lost(tp, skb);
2441 static void tcp_update_scoreboard(
struct sock *sk,
int fast_rexmit)
2445 if (tcp_is_reno(tp)) {
2446 tcp_mark_head_lost(sk, 1, 1);
2447 }
else if (tcp_is_fack(tp)) {
2451 tcp_mark_head_lost(sk, lost, 0);
2454 if (sacked_upto >= 0)
2455 tcp_mark_head_lost(sk, sacked_upto, 0);
2456 else if (fast_rexmit)
2457 tcp_mark_head_lost(sk, 1, 1);
2460 tcp_timeout_skbs(sk);
2466 static inline void tcp_moderate_cwnd(
struct tcp_sock *tp)
2469 tcp_packets_in_flight(tp) + tcp_max_burst(tp));
2476 static inline bool tcp_packet_delayed(
const struct tcp_sock *tp)
2485 #if FASTRETRANS_DEBUG > 1
2491 if (sk->sk_family ==
AF_INET) {
2492 pr_debug(
"Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2499 #if IS_ENABLED(CONFIG_IPV6)
2500 else if (sk->sk_family ==
AF_INET6) {
2502 pr_debug(
"Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2512 #define DBGUNDO(x...) do { } while (0)
2515 static void tcp_undo_cwr(
struct sock *sk,
const bool undo_ssthresh)
2529 TCP_ECN_withdraw_cwr(tp);
2537 static inline bool tcp_may_undo(
const struct tcp_sock *tp)
2543 static bool tcp_try_undo_recovery(
struct sock *sk)
2547 if (tcp_may_undo(tp)) {
2554 tcp_undo_cwr(sk,
true);
2567 tcp_moderate_cwnd(tp);
2575 static void tcp_try_undo_dsack(
struct sock *sk)
2581 tcp_undo_cwr(sk,
true);
2601 static bool tcp_any_retrans_done(
const struct sock *sk)
2603 const struct tcp_sock *tp = tcp_sk(sk);
2609 skb = tcp_write_queue_head(sk);
2618 static int tcp_try_undo_partial(
struct sock *sk,
int acked)
2622 int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->
reordering);
2624 if (tcp_may_undo(tp)) {
2628 if (!tcp_any_retrans_done(sk))
2631 tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
2634 tcp_undo_cwr(sk,
false);
2647 static bool tcp_try_undo_loss(
struct sock *sk)
2651 if (tcp_may_undo(tp)) {
2654 if (skb == tcp_send_head(sk))
2659 tcp_clear_all_retrans_hints(tp);
2663 tcp_undo_cwr(sk,
true);
2665 inet_csk(sk)->icsk_retransmits = 0;
2667 if (tcp_is_sack(tp))
2684 static void tcp_init_cwnd_reduction(
struct sock *sk,
const bool set_ssthresh)
2695 tp->
snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
2696 TCP_ECN_queue_cwr(tp);
2699 static void tcp_cwnd_reduction(
struct sock *sk,
int newly_acked_sacked,
2712 sndcnt =
min_t(
int, delta,
2714 newly_acked_sacked) + 1);
2717 sndcnt =
max(sndcnt, (fast_rexmit ? 1 : 0));
2718 tp->
snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
2721 static inline void tcp_end_cwnd_reduction(
struct sock *sk)
2726 if (inet_csk(sk)->icsk_ca_state ==
TCP_CA_CWR ||
2741 if (inet_csk(sk)->icsk_ca_state <
TCP_CA_CWR) {
2743 tcp_init_cwnd_reduction(sk, set_ssthresh);
2748 static void tcp_try_keep_open(
struct sock *sk)
2753 if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
2756 if (inet_csk(sk)->icsk_ca_state != state) {
2757 tcp_set_ca_state(sk, state);
2762 static void tcp_try_to_open(
struct sock *sk,
int flag,
int newly_acked_sacked)
2771 if (flag & FLAG_ECE)
2774 if (inet_csk(sk)->icsk_ca_state !=
TCP_CA_CWR) {
2775 tcp_try_keep_open(sk);
2777 tcp_moderate_cwnd(tp);
2779 tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
2783 static void tcp_mtup_probe_failed(
struct sock *sk)
2791 static void tcp_mtup_probe_success(
struct sock *sk)
2823 if (skb == tcp_send_head(sk))
2825 if (tcp_skb_seglen(skb) > mss &&
2826 !(
TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
2827 if (
TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2828 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
2831 tcp_skb_mark_lost_uncond_verify(tp, skb);
2835 tcp_clear_retrans_hints_partial(tp);
2840 if (tcp_is_reno(tp))
2841 tcp_limit_reno_sacked(tp);
2861 static void tcp_enter_recovery(
struct sock *sk,
bool ece_ack)
2866 if (tcp_is_reno(tp))
2877 if (inet_csk(sk)->icsk_ca_state <
TCP_CA_CWR) {
2880 tcp_init_cwnd_reduction(sk,
true);
2896 static void tcp_fastretrans_alert(
struct sock *sk,
int pkts_acked,
2897 int prior_sacked,
bool is_dupack,
2904 int newly_acked_sacked = 0;
2905 int fast_rexmit = 0;
2914 if (flag & FLAG_ECE)
2918 if (tcp_check_sack_reneging(sk, flag))
2933 if (tcp_try_undo_recovery(sk))
2941 tcp_end_cwnd_reduction(sk);
2947 if (tcp_is_reno(tp))
2948 tcp_reset_reno_sack(tp);
2949 if (tcp_try_undo_recovery(sk))
2951 tcp_end_cwnd_reduction(sk);
2960 if (tcp_is_reno(tp) && is_dupack)
2961 tcp_add_reno_sack(sk);
2963 do_lost = tcp_try_undo_partial(sk, pkts_acked);
2964 newly_acked_sacked = pkts_acked + tp->
sacked_out - prior_sacked;
2967 if (flag & FLAG_DATA_ACKED)
2969 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
2970 tcp_reset_reno_sack(tp);
2971 if (!tcp_try_undo_loss(sk)) {
2972 tcp_moderate_cwnd(tp);
2980 if (tcp_is_reno(tp)) {
2981 if (flag & FLAG_SND_UNA_ADVANCED)
2982 tcp_reset_reno_sack(tp);
2984 tcp_add_reno_sack(sk);
2986 newly_acked_sacked = pkts_acked + tp->
sacked_out - prior_sacked;
2989 tcp_try_undo_dsack(sk);
2991 if (!tcp_time_to_recover(sk, flag)) {
2992 tcp_try_to_open(sk, flag, newly_acked_sacked);
3000 tcp_mtup_probe_failed(sk);
3008 tcp_enter_recovery(sk, (flag & FLAG_ECE));
3012 if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
3013 tcp_update_scoreboard(sk, fast_rexmit);
3014 tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
3020 tcp_rtt_estimator(sk, seq_rtt);
3022 inet_csk(sk)->icsk_backoff = 0;
3029 static void tcp_ack_saw_tstamp(
struct sock *sk,
int flag)
3051 static void tcp_ack_no_tstamp(
struct sock *sk,
u32 seq_rtt,
int flag)
3068 static inline void tcp_ack_update_rtt(
struct sock *sk,
const int flag,
3071 const struct tcp_sock *tp = tcp_sk(sk);
3074 tcp_ack_saw_tstamp(sk, flag);
3075 else if (seq_rtt >= 0)
3076 tcp_ack_no_tstamp(sk, seq_rtt, flag);
3079 static void tcp_cong_avoid(
struct sock *sk,
u32 ack,
u32 in_flight)
3082 icsk->
icsk_ca_ops->cong_avoid(sk, ack, in_flight);
3102 u32 rto = inet_csk(sk)->icsk_rto;
3105 struct sk_buff *skb = tcp_write_queue_head(sk);
3133 tcp_enter_recovery(sk,
false);
3134 tcp_update_scoreboard(sk, 1);
3146 packets_acked = tcp_skb_pcount(skb);
3149 packets_acked -= tcp_skb_pcount(skb);
3151 if (packets_acked) {
3152 BUG_ON(tcp_skb_pcount(skb) == 0);
3156 return packets_acked;
3163 static int tcp_clean_rtx_queue(
struct sock *sk,
int prior_fackets,
3170 int fully_acked =
true;
3176 s32 ca_seq_rtt = -1;
3177 ktime_t last_ackt = net_invalid_timestamp();
3179 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3186 if (tcp_skb_pcount(skb) == 1 ||
3190 acked_pcount = tcp_tso_acked(sk, skb);
3194 fully_acked =
false;
3196 acked_pcount = tcp_skb_pcount(skb);
3199 if (sacked & TCPCB_RETRANS) {
3200 if (sacked & TCPCB_SACKED_RETRANS)
3205 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
3208 ca_seq_rtt = now - scb->
when;
3211 seq_rtt = ca_seq_rtt;
3213 if (!(sacked & TCPCB_SACKED_ACKED))
3214 reord =
min(pkts_acked, reord);
3217 if (sacked & TCPCB_SACKED_ACKED)
3223 pkts_acked += acked_pcount;
3242 tcp_unlink_write_queue(skb, sk);
3243 sk_wmem_free_skb(sk, skb);
3254 if (skb && (
TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3259 = inet_csk(sk)->icsk_ca_ops;
3263 tcp_mtup_probe_success(sk);
3266 tcp_ack_update_rtt(sk, flag, seq_rtt);
3269 if (tcp_is_reno(tp)) {
3270 tcp_remove_reno_sacks(sk, pkts_acked);
3275 if (reord < prior_fackets)
3276 tcp_update_reordering(sk, tp->
fackets_out - reord, 0);
3278 delta = tcp_is_fack(tp) ? pkts_acked :
3292 !ktime_equal(last_ackt,
3293 net_invalid_timestamp()))
3296 else if (ca_seq_rtt >= 0)
3304 #if FASTRETRANS_DEBUG > 0
3309 icsk = inet_csk(sk);
3330 static void tcp_ack_probe(
struct sock *sk)
3332 const struct tcp_sock *tp = tcp_sk(sk);
3337 if (!
after(
TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
3350 static inline bool tcp_ack_is_dubious(
const struct sock *sk,
const int flag)
3356 static inline bool tcp_may_raise_cwnd(
const struct sock *sk,
const int flag)
3358 const struct tcp_sock *tp = tcp_sk(sk);
3360 !tcp_in_cwnd_reduction(sk);
3366 static inline bool tcp_may_update_window(
const struct tcp_sock *tp,
3367 const u32 ack,
const u32 ack_seq,
3380 static int tcp_ack_update_window(
struct sock *sk,
const struct sk_buff *skb,
u32 ack,
3385 u32 nwin =
ntohs(tcp_hdr(skb)->window);
3388 nwin <<= tp->
rx_opt.snd_wscale;
3390 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3392 tcp_update_wl(tp, ack_seq);
3401 tcp_fast_path_check(sk);
3418 static void tcp_conservative_spur_to_response(
struct tcp_sock *tp)
3423 TCP_ECN_queue_cwr(tp);
3424 tcp_moderate_cwnd(tp);
3430 static void tcp_cwr_spur_to_response(
struct sock *sk)
3435 static void tcp_undo_spur_to_response(
struct sock *sk,
int flag)
3437 if (flag & FLAG_ECE)
3438 tcp_cwr_spur_to_response(sk);
3440 tcp_undo_cwr(sk,
true);
3473 static bool tcp_process_frto(
struct sock *sk,
int flag)
3480 if (flag & FLAG_DATA_ACKED)
3481 inet_csk(sk)->icsk_retransmits = 0;
3484 ((tp->
frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3488 tcp_enter_frto_loss(sk, (tp->
frto_counter == 1 ? 2 : 3), flag);
3492 if (!tcp_is_sackfrto(tp)) {
3500 if (!(flag & FLAG_DATA_ACKED)) {
3501 tcp_enter_frto_loss(sk, (tp->
frto_counter == 1 ? 0 : 3),
3506 if (!(flag & FLAG_DATA_ACKED) && (tp->
frto_counter == 1)) {
3509 tcp_packets_in_flight(tp));
3518 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3519 (flag & FLAG_NOT_DUP))
3522 tcp_enter_frto_loss(sk, 3, flag);
3529 tp->
snd_cwnd = tcp_packets_in_flight(tp) + 2;
3533 tcp_enter_frto_loss(sk, 2, flag);
3537 switch (sysctl_tcp_frto_response) {
3539 tcp_undo_spur_to_response(sk, flag);
3542 tcp_conservative_spur_to_response(tp);
3545 tcp_cwr_spur_to_response(sk);
3556 static int tcp_ack(
struct sock *sk,
const struct sk_buff *skb,
int flag)
3563 bool is_dupack =
false;
3564 u32 prior_in_flight;
3569 bool frto_cwnd =
false;
3574 if (before(ack, prior_snd_una))
3586 if (
after(ack, prior_snd_una))
3589 if (sysctl_tcp_abc) {
3599 prior_in_flight = tcp_packets_in_flight(tp);
3606 tcp_update_wl(tp, ack_seq);
3619 flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3622 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3624 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3641 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3646 frto_cwnd = tcp_process_frto(sk, flag);
3651 if (tcp_ack_is_dubious(sk, flag)) {
3653 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
3654 tcp_may_raise_cwnd(sk, flag))
3655 tcp_cong_avoid(sk, ack, prior_in_flight);
3656 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED |
FLAG_NOT_DUP));
3657 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3660 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
3661 tcp_cong_avoid(sk, ack, prior_in_flight);
3664 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3674 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3680 if (tcp_send_head(sk))
3693 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3694 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3707 const u8 **hvpp,
int estab,
3710 const unsigned char *
ptr;
3711 const struct tcphdr *th = tcp_hdr(skb);
3714 ptr = (
const unsigned char *)(th + 1);
3717 while (length > 0) {
3731 if (opsize > length)
3736 u16 in_mss = get_unaligned_be16(ptr);
3750 if (snd_wscale > 14) {
3772 tcp_sack_reset(opt_rx);
3780 TCP_SKB_CB(skb)->sacked = (ptr - 2) - (
unsigned char *)
th;
3783 #ifdef CONFIG_TCP_MD5SIG
3823 foc ==
NULL || !th->syn || (opsize & 1))
3829 else if (foc->
len != 0)
3841 static bool tcp_parse_aligned_timestamp(
struct tcp_sock *tp,
const struct tcphdr *th)
3847 tp->
rx_opt.saw_tstamp = 1;
3860 static bool tcp_fast_parse_options(
const struct sk_buff *skb,
3867 if (th->doff == (
sizeof(*th) / 4)) {
3868 tp->
rx_opt.saw_tstamp = 0;
3870 }
else if (tp->
rx_opt.tstamp_ok &&
3872 if (tcp_parse_aligned_timestamp(tp, th))
3879 #ifdef CONFIG_TCP_MD5SIG
3885 int length = (th->doff << 2) -
sizeof(*th);
3886 const u8 *ptr = (
const u8 *)(th + 1);
3892 while (length > 0) {
3904 if (opsize < 2 || opsize > length)
3917 static inline void tcp_store_ts_recent(
struct tcp_sock *tp)
3923 static inline void tcp_replace_ts_recent(
struct tcp_sock *tp,
u32 seq)
3933 if (tcp_paws_check(&tp->
rx_opt, 0))
3934 tcp_store_ts_recent(tp);
3961 static int tcp_disordered_ack(
const struct sock *sk,
const struct sk_buff *skb)
3963 const struct tcp_sock *tp = tcp_sk(sk);
3964 const struct tcphdr *th = tcp_hdr(skb);
3975 !tcp_may_update_window(tp, ack, seq,
ntohs(th->
window) << tp->
rx_opt.snd_wscale) &&
3978 (
s32)(tp->
rx_opt.ts_recent - tp->
rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) /
HZ);
3981 static inline bool tcp_paws_discard(
const struct sock *sk,
3984 const struct tcp_sock *tp = tcp_sk(sk);
3987 !tcp_disordered_ack(sk, skb);
4003 static inline bool tcp_sequence(
const struct tcp_sock *tp,
u32 seq,
u32 end_seq)
4005 return !before(end_seq, tp->
rcv_wup) &&
4013 switch (sk->sk_state) {
4048 static void tcp_fin(
struct sock *sk)
4052 inet_csk_schedule_ack(sk);
4057 switch (sk->sk_state) {
4062 inet_csk(sk)->icsk_ack.pingpong = 1;
4092 pr_err(
"%s: Impossible, sk->sk_state=%d\n",
4093 __func__, sk->sk_state);
4101 if (tcp_is_sack(tp))
4102 tcp_sack_reset(&tp->
rx_opt);
4130 static void tcp_dsack_set(
struct sock *sk,
u32 seq,
u32 end_seq)
4134 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4150 static void tcp_dsack_extend(
struct sock *sk,
u32 seq,
u32 end_seq)
4155 tcp_dsack_set(sk, seq, end_seq);
4160 static void tcp_send_dupack(
struct sock *sk,
const struct sk_buff *skb)
4167 tcp_enter_quickack_mode(sk);
4169 if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
4174 tcp_dsack_set(sk,
TCP_SKB_CB(skb)->seq, end_seq);
4184 static void tcp_sack_maybe_coalesce(
struct tcp_sock *tp)
4193 for (this_sack = 1; this_sack < tp->
rx_opt.num_sacks;) {
4201 for (i = this_sack; i < tp->
rx_opt.num_sacks; i++)
4205 this_sack++, swalk++;
4209 static void tcp_sack_new_ofo_skb(
struct sock *sk,
u32 seq,
u32 end_seq)
4213 int cur_sacks = tp->
rx_opt.num_sacks;
4219 for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
4220 if (tcp_sack_extend(sp, seq, end_seq)) {
4222 for (; this_sack > 0; this_sack--, sp--)
4223 swap(*sp, *(sp - 1));
4225 tcp_sack_maybe_coalesce(tp);
4241 for (; this_sack > 0; this_sack--, sp--)
4253 static void tcp_sack_remove(
struct tcp_sock *tp)
4256 int num_sacks = tp->
rx_opt.num_sacks;
4261 tp->
rx_opt.num_sacks = 0;
4265 for (this_sack = 0; this_sack < num_sacks;) {
4274 for (i=this_sack+1; i < num_sacks; i++)
4282 tp->
rx_opt.num_sacks = num_sacks;
4288 static void tcp_ofo_queue(
struct sock *sk)
4298 if (before(
TCP_SKB_CB(skb)->seq, dsack_high)) {
4299 __u32 dsack = dsack_high;
4300 if (before(
TCP_SKB_CB(skb)->end_seq, dsack_high))
4302 tcp_dsack_extend(sk,
TCP_SKB_CB(skb)->seq, dsack);
4306 SOCK_DEBUG(sk,
"ofo packet was already received\n");
4311 SOCK_DEBUG(sk,
"ofo requeuing : rcv_next %X seq %X - %X\n",
4318 if (tcp_hdr(skb)->fin)
4323 static bool tcp_prune_ofo_queue(
struct sock *sk);
4324 static int tcp_prune_queue(
struct sock *sk);
4326 static int tcp_try_rmem_schedule(
struct sock *sk,
struct sk_buff *skb,
4330 !sk_rmem_schedule(sk, skb, size)) {
4332 if (tcp_prune_queue(sk) < 0)
4335 if (!sk_rmem_schedule(sk, skb, size)) {
4336 if (!tcp_prune_ofo_queue(sk))
4339 if (!sk_rmem_schedule(sk, skb, size))
4359 static bool tcp_try_coalesce(
struct sock *sk,
4366 *fragstolen =
false;
4368 if (tcp_hdr(from)->fin)
4379 sk_mem_charge(sk, delta);
4386 static void tcp_data_queue_ofo(
struct sock *sk,
struct sk_buff *skb)
4392 TCP_ECN_check_ce(tp, skb);
4402 inet_csk_schedule_ack(sk);
4405 SOCK_DEBUG(sk,
"out of order segment: rcv_next %X seq %X - %X\n",
4411 if (tcp_is_sack(tp)) {
4412 tp->
rx_opt.num_sacks = 1;
4427 if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
4434 if (!tp->
rx_opt.num_sacks ||
4455 if (skb1 && before(seq,
TCP_SKB_CB(skb1)->end_seq)) {
4461 tcp_dsack_set(sk, seq, end_seq);
4466 tcp_dsack_set(sk, seq,
4473 skb1 = skb_queue_prev(
4489 if (before(end_seq,
TCP_SKB_CB(skb1)->end_seq)) {
4502 if (tcp_is_sack(tp))
4503 tcp_sack_new_ofo_skb(sk, seq, end_seq);
4506 skb_set_owner_r(skb, sk);
4515 __skb_pull(skb, hdrlen);
4517 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
4518 tcp_sk(sk)->rcv_nxt =
TCP_SKB_CB(skb)->end_seq;
4521 skb_set_owner_r(skb, sk);
4539 if (tcp_try_rmem_schedule(sk, skb, size +
sizeof(*th)))
4543 skb_reset_transport_header(skb);
4544 memset(th, 0,
sizeof(*th));
4551 TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
4553 if (tcp_queue_rcv(sk, skb,
sizeof(*th), &fragstolen)) {
4565 static void tcp_data_queue(
struct sock *sk,
struct sk_buff *skb)
4567 const struct tcphdr *th = tcp_hdr(skb);
4570 bool fragstolen =
false;
4576 __skb_pull(skb, th->doff * 4);
4578 TCP_ECN_accept_cwr(tp, skb);
4587 if (tcp_receive_window(tp) == 0)
4603 eaten = (chunk == skb->
len);
4612 tcp_try_rmem_schedule(sk, skb, skb->
truesize))
4615 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
4619 tcp_event_data_recv(sk, skb);
4630 inet_csk(sk)->icsk_ack.pingpong = 0;
4633 if (tp->
rx_opt.num_sacks)
4634 tcp_sack_remove(tp);
4636 tcp_fast_path_check(sk);
4651 tcp_enter_quickack_mode(sk);
4652 inet_csk_schedule_ack(sk);
4662 tcp_enter_quickack_mode(sk);
4666 SOCK_DEBUG(sk,
"partial packet: rcv_next %X seq %X - %X\n",
4675 if (!tcp_receive_window(tp))
4680 tcp_data_queue_ofo(sk, skb);
4688 if (!skb_queue_is_last(list, skb))
4689 next = skb_queue_next(list, skb);
4691 __skb_unlink(skb, list);
4719 skb_queue_walk_from_safe(list, skb, n) {
4723 if (!before(start,
TCP_SKB_CB(skb)->end_seq)) {
4724 skb = tcp_collapse_one(sk, skb, list);
4735 if (!tcp_hdr(skb)->
syn && !tcp_hdr(skb)->fin &&
4738 end_of_skbs =
false;
4742 if (!skb_queue_is_last(list, skb)) {
4743 struct sk_buff *next = skb_queue_next(list, skb);
4746 end_of_skbs =
false;
4754 if (end_of_skbs || tcp_hdr(skb)->
syn || tcp_hdr(skb)->fin)
4757 while (before(start, end)) {
4759 unsigned int header = skb_headroom(skb);
4765 if (end - start < copy)
4771 skb_set_mac_header(nskb, skb_mac_header(skb) - skb->
head);
4772 skb_set_network_header(nskb, (skb_network_header(skb) -
4774 skb_set_transport_header(nskb, (skb_transport_header(skb) -
4776 skb_reserve(nskb, header);
4778 memcpy(nskb->cb, skb->cb,
sizeof(skb->cb));
4780 __skb_queue_before(list, skb, nskb);
4781 skb_set_owner_r(nskb, sk);
4790 size =
min(copy, size);
4797 if (!before(start,
TCP_SKB_CB(skb)->end_seq)) {
4798 skb = tcp_collapse_one(sk, skb, list);
4801 tcp_hdr(skb)->
syn ||
4812 static void tcp_collapse_ofo_queue(
struct sock *sk)
4839 head, skb, start, end);
4859 static bool tcp_prune_ofo_queue(
struct sock *sk)
4874 tcp_sack_reset(&tp->
rx_opt);
4888 static int tcp_prune_queue(
struct sock *sk)
4897 tcp_clamp_window(sk);
4898 else if (sk_under_memory_pressure(sk))
4901 tcp_collapse_ofo_queue(sk);
4915 tcp_prune_ofo_queue(sk);
4953 static bool tcp_should_expand_sndbuf(
const struct sock *sk)
4955 const struct tcp_sock *tp = tcp_sk(sk);
4964 if (sk_under_memory_pressure(sk))
4968 if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
4984 static void tcp_new_space(
struct sock *sk)
4988 if (tcp_should_expand_sndbuf(sk)) {
4995 sndmem *= 2 * demanded;
5004 static void tcp_check_space(
struct sock *sk)
5014 static inline void tcp_data_snd_check(
struct sock *sk)
5016 tcp_push_pending_frames(sk);
5017 tcp_check_space(sk);
5023 static void __tcp_ack_snd_check(
struct sock *sk,
int ofo_possible)
5028 if (((tp->
rcv_nxt - tp->
rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
5034 tcp_in_quickack_mode(sk) ||
5045 static inline void tcp_ack_snd_check(
struct sock *sk)
5047 if (!inet_csk_ack_scheduled(sk)) {
5051 __tcp_ack_snd_check(sk, 1);
5064 static void tcp_check_urg(
struct sock *sk,
const struct tcphdr *th)
5069 if (ptr && !sysctl_tcp_stdurg)
5130 static void tcp_urg(
struct sock *sk,
struct sk_buff *skb,
const struct tcphdr *th)
5136 tcp_check_urg(sk, th);
5144 if (ptr < skb->len) {
5155 static int tcp_copy_to_iovec(
struct sock *sk,
struct sk_buff *skb,
int hlen)
5158 int chunk = skb->
len - hlen;
5162 if (skb_csum_unnecessary(skb))
5178 static __sum16 __tcp_checksum_complete_user(
struct sock *sk,
5185 result = __tcp_checksum_complete(skb);
5188 result = __tcp_checksum_complete(skb);
5193 static inline bool tcp_checksum_complete_user(
struct sock *sk,
5196 return !skb_csum_unnecessary(skb) &&
5197 __tcp_checksum_complete_user(sk, skb);
5200 #ifdef CONFIG_NET_DMA
5201 static bool tcp_dma_try_early_copy(
struct sock *sk,
struct sk_buff *skb,
5205 int chunk = skb->
len - hlen;
5207 bool copied_early =
false;
5209 if (tp->
ucopy.wakeup)
5212 if (!tp->
ucopy.dma_chan && tp->
ucopy.pinned_list)
5215 if (tp->
ucopy.dma_chan && skb_csum_unnecessary(skb)) {
5219 tp->
ucopy.iov, chunk,
5220 tp->
ucopy.pinned_list);
5225 tp->
ucopy.dma_cookie = dma_cookie;
5226 copied_early =
true;
5232 if ((tp->
ucopy.len == 0) ||
5235 tp->
ucopy.wakeup = 1;
5238 }
else if (chunk > 0) {
5239 tp->
ucopy.wakeup = 1;
5243 return copied_early;
5247 static void tcp_send_challenge_ack(
struct sock *sk)
5250 static u32 challenge_timestamp;
5251 static unsigned int challenge_count;
5254 if (now != challenge_timestamp) {
5255 challenge_timestamp = now;
5256 challenge_count = 0;
5267 static bool tcp_validate_incoming(
struct sock *sk,
struct sk_buff *skb,
5268 const struct tcphdr *th,
int syn_inerr)
5270 const u8 *hash_location;
5274 if (tcp_fast_parse_options(skb, th, tp, &hash_location) &&
5276 tcp_paws_discard(sk, skb)) {
5279 tcp_send_dupack(sk, skb);
5296 tcp_send_dupack(sk, skb);
5312 tcp_send_challenge_ack(sk);
5326 tcp_send_challenge_ack(sk);
5361 const struct tcphdr *th,
unsigned int len)
5366 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
5382 tp->
rx_opt.saw_tstamp = 0;
5406 if (!tcp_parse_aligned_timestamp(tp, th))
5420 if (len <= tcp_header_len) {
5422 if (len == tcp_header_len) {
5427 if (tcp_header_len ==
5430 tcp_store_ts_recent(tp);
5435 tcp_ack(sk, skb, 0);
5437 tcp_data_snd_check(sk);
5445 int copied_early = 0;
5446 bool fragstolen =
false;
5449 len - tcp_header_len <= tp->
ucopy.len) {
5450 #ifdef CONFIG_NET_DMA
5453 tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
5462 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
5470 if (tcp_header_len ==
5474 tcp_store_ts_recent(tp);
5476 tcp_rcv_rtt_measure_ts(sk, skb);
5478 __skb_pull(skb, tcp_header_len);
5486 if (tcp_checksum_complete_user(sk, skb))
5493 if (tcp_header_len ==
5496 tcp_store_ts_recent(tp);
5498 tcp_rcv_rtt_measure_ts(sk, skb);
5506 eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
5510 tcp_event_data_recv(sk, skb);
5515 tcp_data_snd_check(sk);
5516 if (!inet_csk_ack_scheduled(sk))
5521 __tcp_ack_snd_check(sk, 0);
5523 #ifdef CONFIG_NET_DMA
5525 __skb_queue_tail(&sk->sk_async_wait_queue, skb);
5536 if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
5543 if (!tcp_validate_incoming(sk, skb, th, 1))
5547 if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
5553 tcp_replace_ts_recent(tp,
TCP_SKB_CB(skb)->seq);
5555 tcp_rcv_rtt_measure_ts(sk, skb);
5558 tcp_urg(sk, skb, th);
5561 tcp_data_queue(sk, skb);
5563 tcp_data_snd_check(sk);
5564 tcp_ack_snd_check(sk);
5585 security_inet_conn_established(sk, skb);
5605 if (!tp->
rx_opt.snd_wscale)
5606 __tcp_fast_path_on(tp, tp->
snd_wnd);
5616 static bool tcp_rcv_fastopen_synack(
struct sock *sk,
struct sk_buff *synack,
5624 if (mss == tp->
rx_opt.user_mss) {
5626 const u8 *hash_location;
5629 tcp_clear_options(&
opt);
5630 opt.user_mss =
opt.mss_clamp = 0;
5632 mss =
opt.mss_clamp;
5642 syn_drop = (cookie->
len <= 0 && data &&
5643 inet_csk(sk)->icsk_retransmits);
5649 if (data == tcp_send_head(sk) ||
5660 static int tcp_rcv_synsent_state_process(
struct sock *sk,
struct sk_buff *skb,
5661 const struct tcphdr *th,
unsigned int len)
5663 const u8 *hash_location;
5668 int saved_clamp = tp->
rx_opt.mss_clamp;
5683 goto reset_and_undo;
5689 goto reset_and_undo;
5713 goto discard_and_undo;
5722 TCP_ECN_rcv_synack(tp, th);
5725 tcp_ack(sk, skb, FLAG_SLOWPATH);
5738 if (!tp->
rx_opt.wscale_ok) {
5743 if (tp->
rx_opt.saw_tstamp) {
5744 tp->
rx_opt.tstamp_ok = 1;
5748 tcp_store_ts_recent(tp);
5753 if (tcp_is_sack(tp) && sysctl_tcp_fack)
5754 tcp_enable_fack(tp);
5767 tp->
rx_opt.cookie_plus > 0) {
5768 int cookie_size = tp->
rx_opt.cookie_plus
5770 int cookie_pair_size = cookie_size
5782 if (
sizeof(cvp->
cookie_pair) >= cookie_pair_size) {
5784 hash_location, cookie_size);
5794 tcp_rcv_fastopen_synack(sk, skb, &foc))
5807 inet_csk_schedule_ack(sk);
5809 tcp_enter_quickack_mode(sk);
5831 goto discard_and_undo;
5835 if (tp->
rx_opt.ts_recent_stamp && tp->
rx_opt.saw_tstamp &&
5836 tcp_paws_reject(&tp->
rx_opt, 0))
5837 goto discard_and_undo;
5846 if (tp->
rx_opt.saw_tstamp) {
5847 tp->
rx_opt.tstamp_ok = 1;
5848 tcp_store_ts_recent(tp);
5865 TCP_ECN_rcv_syn(tp, th);
5894 tcp_clear_options(&tp->
rx_opt);
5895 tp->
rx_opt.mss_clamp = saved_clamp;
5899 tcp_clear_options(&tp->
rx_opt);
5900 tp->
rx_opt.mss_clamp = saved_clamp;
5912 const struct tcphdr *th,
unsigned int len)
5919 tp->
rx_opt.saw_tstamp = 0;
5921 switch (sk->sk_state) {
5961 queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
5966 tcp_urg(sk, skb, th);
5968 tcp_data_snd_check(sk);
5980 if (!tcp_validate_incoming(sk, skb, th, 0))
5985 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
5987 switch (sk->sk_state) {
5994 tcp_synack_rtt_meas(sk, req);
6027 if (tp->
rx_opt.tstamp_ok)
6051 tcp_fast_path_on(tp);
6082 dst = __sk_dst_get(sk);
6100 tmo = tcp_fin_time(sk);
6140 tcp_replace_ts_recent(tp,
TCP_SKB_CB(skb)->seq);
6143 tcp_urg(sk, skb, th);
6146 switch (sk->sk_state) {
6168 tcp_data_queue(sk, skb);
6175 tcp_data_snd_check(sk);
6176 tcp_ack_snd_check(sk);