1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX
41da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket
51da177e4SLinus Torvalds * interface as the means of communication with the user level.
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds * Implementation of the Transmission Control Protocol(TCP).
81da177e4SLinus Torvalds *
902c30a84SJesper Juhl * Authors: Ross Biro
101da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
111da177e4SLinus Torvalds * Mark Evans, <evansmp@uhura.aston.ac.uk>
121da177e4SLinus Torvalds * Corey Minyard <wf-rch!minyard@relay.EU.net>
131da177e4SLinus Torvalds * Florian La Roche, <flla@stud.uni-sb.de>
141da177e4SLinus Torvalds * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
151da177e4SLinus Torvalds * Linus Torvalds, <torvalds@cs.helsinki.fi>
161da177e4SLinus Torvalds * Alan Cox, <gw4pts@gw4pts.ampr.org>
171da177e4SLinus Torvalds * Matthew Dillon, <dillon@apollo.west.oic.com>
181da177e4SLinus Torvalds * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
191da177e4SLinus Torvalds * Jorge Cwik, <jorge@laser.satlink.net>
201da177e4SLinus Torvalds */
211da177e4SLinus Torvalds
221da177e4SLinus Torvalds #include <linux/module.h>
235a0e3ad6STejun Heo #include <linux/gfp.h>
241da177e4SLinus Torvalds #include <net/tcp.h>
255691276bSJason Xing #include <net/rstreason.h>
261da177e4SLinus Torvalds
tcp_clamp_rto_to_user_timeout(const struct sock * sk)27b701a99eSJon Maxwell static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
28b701a99eSJon Maxwell {
2958169ec9SEric Dumazet const struct inet_connection_sock *icsk = inet_csk(sk);
30614e8316SEric Dumazet const struct tcp_sock *tp = tcp_sk(sk);
31614e8316SEric Dumazet u32 elapsed, user_timeout;
329efdda4eSEric Dumazet s32 remaining;
33b701a99eSJon Maxwell
34d58f2e15SEric Dumazet user_timeout = READ_ONCE(icsk->icsk_user_timeout);
35d58f2e15SEric Dumazet if (!user_timeout)
36b701a99eSJon Maxwell return icsk->icsk_rto;
37614e8316SEric Dumazet
38614e8316SEric Dumazet elapsed = tcp_time_stamp_ts(tp) - tp->retrans_stamp;
39614e8316SEric Dumazet if (tp->tcp_usec_ts)
40614e8316SEric Dumazet elapsed /= USEC_PER_MSEC;
41614e8316SEric Dumazet
42d58f2e15SEric Dumazet remaining = user_timeout - elapsed;
439efdda4eSEric Dumazet if (remaining <= 0)
44b701a99eSJon Maxwell return 1; /* user timeout has passed; fire ASAP */
459efdda4eSEric Dumazet
469efdda4eSEric Dumazet return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
47b701a99eSJon Maxwell }
48b701a99eSJon Maxwell
tcp_clamp_probe0_to_user_timeout(const struct sock * sk,u32 when)49344db93aSEnke Chen u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
50344db93aSEnke Chen {
5158169ec9SEric Dumazet const struct inet_connection_sock *icsk = inet_csk(sk);
52d58f2e15SEric Dumazet u32 remaining, user_timeout;
53344db93aSEnke Chen s32 elapsed;
54344db93aSEnke Chen
55d58f2e15SEric Dumazet user_timeout = READ_ONCE(icsk->icsk_user_timeout);
56d58f2e15SEric Dumazet if (!user_timeout || !icsk->icsk_probes_tstamp)
57344db93aSEnke Chen return when;
58344db93aSEnke Chen
59344db93aSEnke Chen elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;
60344db93aSEnke Chen if (unlikely(elapsed < 0))
61344db93aSEnke Chen elapsed = 0;
62d58f2e15SEric Dumazet remaining = msecs_to_jiffies(user_timeout) - elapsed;
63344db93aSEnke Chen remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);
64344db93aSEnke Chen
65344db93aSEnke Chen return min_t(u32, remaining, when);
66344db93aSEnke Chen }
67344db93aSEnke Chen
68c380d37eSRichard Sailer /**
69c380d37eSRichard Sailer * tcp_write_err() - close socket and save error info
70c380d37eSRichard Sailer * @sk: The socket the error has appeared on.
71c380d37eSRichard Sailer *
72c380d37eSRichard Sailer * Returns: Nothing (void)
73c380d37eSRichard Sailer */
74c380d37eSRichard Sailer
tcp_write_err(struct sock * sk)751da177e4SLinus Torvalds static void tcp_write_err(struct sock *sk)
761da177e4SLinus Torvalds {
77853c3bd7SEric Dumazet tcp_done_with_error(sk, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT);
7802a1d6e7SEric Dumazet __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
791da177e4SLinus Torvalds }
801da177e4SLinus Torvalds
81c380d37eSRichard Sailer /**
82c380d37eSRichard Sailer * tcp_out_of_resources() - Close socket if out of resources
83c380d37eSRichard Sailer * @sk: pointer to current socket
84c380d37eSRichard Sailer * @do_reset: send a last packet with reset flag
85c380d37eSRichard Sailer *
86c380d37eSRichard Sailer * Do not allow orphaned sockets to eat all our resources.
871da177e4SLinus Torvalds * This is direct violation of TCP specs, but it is required
881da177e4SLinus Torvalds * to prevent DoS attacks. It is called when a retransmission timeout
891da177e4SLinus Torvalds * or zero probe timeout occurs on orphaned socket.
901da177e4SLinus Torvalds *
914ee806d5SDan Streetman * Also close if our net namespace is exiting; in that case there is no
924ee806d5SDan Streetman * hope of ever communicating again since all netns interfaces are already
934ee806d5SDan Streetman * down (or about to be down), and we need to release our dst references,
944ee806d5SDan Streetman * which have been moved to the netns loopback interface, so the namespace
954ee806d5SDan Streetman * can finish exiting. This condition is only possible if we are a kernel
964ee806d5SDan Streetman * socket, as those do not hold references to the namespace.
974ee806d5SDan Streetman *
98caa20d9aSStephen Hemminger * Criteria is still not confirmed experimentally and may change.
991da177e4SLinus Torvalds * We kill the socket, if:
1001da177e4SLinus Torvalds * 1. If number of orphaned sockets exceeds an administratively configured
1011da177e4SLinus Torvalds * limit.
1021da177e4SLinus Torvalds * 2. If we have strong memory pressure.
1034ee806d5SDan Streetman * 3. If our net namespace is exiting.
1041da177e4SLinus Torvalds */
tcp_out_of_resources(struct sock * sk,bool do_reset)105b248230cSYuchung Cheng static int tcp_out_of_resources(struct sock *sk, bool do_reset)
1061da177e4SLinus Torvalds {
1071da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk);
108ad1af0feSDavid S. Miller int shift = 0;
1091da177e4SLinus Torvalds
1101da177e4SLinus Torvalds /* If peer does not open window for long time, or did not transmit
1111da177e4SLinus Torvalds * anything for long time, penalize it. */
112d635fbe2SEric Dumazet if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
113ad1af0feSDavid S. Miller shift++;
1141da177e4SLinus Torvalds
1151da177e4SLinus Torvalds /* If some dubious ICMP arrived, penalize even more. */
116cee1af82SEric Dumazet if (READ_ONCE(sk->sk_err_soft))
117ad1af0feSDavid S. Miller shift++;
1181da177e4SLinus Torvalds
119efcdbf24SArun Sharma if (tcp_check_oom(sk, shift)) {
1201da177e4SLinus Torvalds /* Catch exceptional cases, when connection requires reset.
1211da177e4SLinus Torvalds * 1. Last segment was sent recently. */
122d635fbe2SEric Dumazet if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
1231da177e4SLinus Torvalds /* 2. Window is closed. */
1241da177e4SLinus Torvalds (!tp->snd_wnd && !tp->packets_out))
125b248230cSYuchung Cheng do_reset = true;
1261da177e4SLinus Torvalds if (do_reset)
1275691276bSJason Xing tcp_send_active_reset(sk, GFP_ATOMIC,
1288407994fSJason Xing SK_RST_REASON_TCP_ABORT_ON_MEMORY);
1291da177e4SLinus Torvalds tcp_done(sk);
13002a1d6e7SEric Dumazet __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
1311da177e4SLinus Torvalds return 1;
1321da177e4SLinus Torvalds }
1334ee806d5SDan Streetman
1344ee806d5SDan Streetman if (!check_net(sock_net(sk))) {
1354ee806d5SDan Streetman /* Not possible to send reset; just close */
1364ee806d5SDan Streetman tcp_done(sk);
1374ee806d5SDan Streetman return 1;
1384ee806d5SDan Streetman }
1394ee806d5SDan Streetman
1401da177e4SLinus Torvalds return 0;
1411da177e4SLinus Torvalds }
1421da177e4SLinus Torvalds
143c380d37eSRichard Sailer /**
144c380d37eSRichard Sailer * tcp_orphan_retries() - Returns maximal number of retries on an orphaned socket
145c380d37eSRichard Sailer * @sk: Pointer to the current socket.
146c380d37eSRichard Sailer * @alive: bool, socket alive state
147c380d37eSRichard Sailer */
tcp_orphan_retries(struct sock * sk,bool alive)1487533ce30SRichard Sailer static int tcp_orphan_retries(struct sock *sk, bool alive)
1491da177e4SLinus Torvalds {
15039e24435SKuniyuki Iwashima int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
1511da177e4SLinus Torvalds
1521da177e4SLinus Torvalds /* We know from an ICMP that something is wrong. */
153cee1af82SEric Dumazet if (READ_ONCE(sk->sk_err_soft) && !alive)
1541da177e4SLinus Torvalds retries = 0;
1551da177e4SLinus Torvalds
1561da177e4SLinus Torvalds /* However, if socket sent something recently, select some safe
1571da177e4SLinus Torvalds * number of retries. 8 corresponds to >100 seconds with minimal
1581da177e4SLinus Torvalds * RTO of 200msec. */
1591da177e4SLinus Torvalds if (retries == 0 && alive)
1601da177e4SLinus Torvalds retries = 8;
1611da177e4SLinus Torvalds return retries;
1621da177e4SLinus Torvalds }
1631da177e4SLinus Torvalds
tcp_mtu_probing(struct inet_connection_sock * icsk,struct sock * sk)164ce55dd36SEric Dumazet static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
165ce55dd36SEric Dumazet {
166d0f36847SEric Dumazet const struct net *net = sock_net(sk);
167d0f36847SEric Dumazet int mss;
168b0f9ca53SFan Du
169ce55dd36SEric Dumazet /* Black hole detection */
170f47d00e0SKuniyuki Iwashima if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
171d0f36847SEric Dumazet return;
172d0f36847SEric Dumazet
173ce55dd36SEric Dumazet if (!icsk->icsk_mtup.enabled) {
174ce55dd36SEric Dumazet icsk->icsk_mtup.enabled = 1;
175c74df29aSEric Dumazet icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
176ce55dd36SEric Dumazet } else {
1778beb5c5fSEric Dumazet mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
17888d78bc0SKuniyuki Iwashima mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
1798e92d442SKuniyuki Iwashima mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor));
18078eb166cSKuniyuki Iwashima mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
181ce55dd36SEric Dumazet icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
182d0f36847SEric Dumazet }
183ce55dd36SEric Dumazet tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
184ce55dd36SEric Dumazet }
185ce55dd36SEric Dumazet
tcp_model_timeout(struct sock * sk,unsigned int boundary,unsigned int rto_base)18601a523b0SYuchung Cheng static unsigned int tcp_model_timeout(struct sock *sk,
18701a523b0SYuchung Cheng unsigned int boundary,
18801a523b0SYuchung Cheng unsigned int rto_base)
18901a523b0SYuchung Cheng {
19001a523b0SYuchung Cheng unsigned int linear_backoff_thresh, timeout;
191c380d37eSRichard Sailer
19201a523b0SYuchung Cheng linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base);
19301a523b0SYuchung Cheng if (boundary <= linear_backoff_thresh)
19401a523b0SYuchung Cheng timeout = ((2 << boundary) - 1) * rto_base;
19501a523b0SYuchung Cheng else
19601a523b0SYuchung Cheng timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
19701a523b0SYuchung Cheng (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
19801a523b0SYuchung Cheng return jiffies_to_msecs(timeout);
19901a523b0SYuchung Cheng }
200c380d37eSRichard Sailer /**
201c380d37eSRichard Sailer * retransmits_timed_out() - returns true if this connection has timed out
202c380d37eSRichard Sailer * @sk: The current socket
203c380d37eSRichard Sailer * @boundary: max number of retransmissions
204c380d37eSRichard Sailer * @timeout: A custom timeout value.
205c380d37eSRichard Sailer * If set to 0 the default timeout is calculated and used.
206c380d37eSRichard Sailer * Using TCP_RTO_MIN and the number of unsuccessful retransmits.
207c380d37eSRichard Sailer *
208c380d37eSRichard Sailer * The default "timeout" value this function can calculate and use
209c380d37eSRichard Sailer * is equivalent to the timeout of a TCP Connection
210c380d37eSRichard Sailer * after "boundary" unsuccessful, exponentially backed-off
211ce682ef6SEric Dumazet * retransmissions with an initial RTO of TCP_RTO_MIN.
2122f7de571SDamian Lukowski */
retransmits_timed_out(struct sock * sk,unsigned int boundary,unsigned int timeout)2132f7de571SDamian Lukowski static bool retransmits_timed_out(struct sock *sk,
214dca43c75SJerry Chu unsigned int boundary,
215ce682ef6SEric Dumazet unsigned int timeout)
2162f7de571SDamian Lukowski {
217614e8316SEric Dumazet struct tcp_sock *tp = tcp_sk(sk);
218614e8316SEric Dumazet unsigned int start_ts, delta;
2192f7de571SDamian Lukowski
2202f7de571SDamian Lukowski if (!inet_csk(sk)->icsk_retransmits)
2212f7de571SDamian Lukowski return false;
2222f7de571SDamian Lukowski
223614e8316SEric Dumazet start_ts = tp->retrans_stamp;
2243256a2d6SEric Dumazet if (likely(timeout == 0)) {
2253256a2d6SEric Dumazet unsigned int rto_base = TCP_RTO_MIN;
2263256a2d6SEric Dumazet
2273256a2d6SEric Dumazet if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
2283256a2d6SEric Dumazet rto_base = tcp_timeout_init(sk);
2293256a2d6SEric Dumazet timeout = tcp_model_timeout(sk, boundary, rto_base);
2303256a2d6SEric Dumazet }
2312f7de571SDamian Lukowski
232614e8316SEric Dumazet if (tp->tcp_usec_ts) {
233614e8316SEric Dumazet /* delta maybe off up to a jiffy due to timer granularity. */
234614e8316SEric Dumazet delta = tp->tcp_mstamp - start_ts + jiffies_to_usecs(1);
235614e8316SEric Dumazet return (s32)(delta - timeout * USEC_PER_MSEC) >= 0;
236614e8316SEric Dumazet }
237614e8316SEric Dumazet return (s32)(tcp_time_stamp_ts(tp) - start_ts - timeout) >= 0;
2382f7de571SDamian Lukowski }
2392f7de571SDamian Lukowski
2401da177e4SLinus Torvalds /* A write timeout has occurred. Process the after effects. */
tcp_write_timeout(struct sock * sk)2411da177e4SLinus Torvalds static int tcp_write_timeout(struct sock *sk)
2421da177e4SLinus Torvalds {
2435d424d5aSJohn Heffner struct inet_connection_sock *icsk = inet_csk(sk);
244c968601dSYuchung Cheng struct tcp_sock *tp = tcp_sk(sk);
2456fa25166SNikolay Borisov struct net *net = sock_net(sk);
246a41e8a88SEric Dumazet bool expired = false, do_reset;
247ccce324dSDavid Morley int retry_until, max_retransmits;
2481da177e4SLinus Torvalds
2491da177e4SLinus Torvalds if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
2509c30ae83SYuchung Cheng if (icsk->icsk_retransmits)
2519c30ae83SYuchung Cheng __dst_negative_advice(sk);
252d44fd4a7SEric Dumazet /* Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */
253d44fd4a7SEric Dumazet retry_until = READ_ONCE(icsk->icsk_syn_retries) ? :
25420a3b1c0SKuniyuki Iwashima READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
255ccce324dSDavid Morley
256ccce324dSDavid Morley max_retransmits = retry_until;
257ccce324dSDavid Morley if (sk->sk_state == TCP_SYN_SENT)
258ccce324dSDavid Morley max_retransmits += READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts);
259ccce324dSDavid Morley
260ccce324dSDavid Morley expired = icsk->icsk_retransmits >= max_retransmits;
2611da177e4SLinus Torvalds } else {
26239e24435SKuniyuki Iwashima if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
2635d424d5aSJohn Heffner /* Black hole detection */
264ce55dd36SEric Dumazet tcp_mtu_probing(icsk, sk);
2651da177e4SLinus Torvalds
2669c30ae83SYuchung Cheng __dst_negative_advice(sk);
2671da177e4SLinus Torvalds }
2681da177e4SLinus Torvalds
26939e24435SKuniyuki Iwashima retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
2701da177e4SLinus Torvalds if (sock_flag(sk, SOCK_DEAD)) {
2717533ce30SRichard Sailer const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
2721da177e4SLinus Torvalds
2731da177e4SLinus Torvalds retry_until = tcp_orphan_retries(sk, alive);
2746fa12c85SDamian Lukowski do_reset = alive ||
275ce682ef6SEric Dumazet !retransmits_timed_out(sk, retry_until, 0);
2761da177e4SLinus Torvalds
2776fa12c85SDamian Lukowski if (tcp_out_of_resources(sk, do_reset))
2781da177e4SLinus Torvalds return 1;
2791da177e4SLinus Torvalds }
280a41e8a88SEric Dumazet }
281a41e8a88SEric Dumazet if (!expired)
282ce682ef6SEric Dumazet expired = retransmits_timed_out(sk, retry_until,
283d58f2e15SEric Dumazet READ_ONCE(icsk->icsk_user_timeout));
2847268586bSYuchung Cheng tcp_fastopen_active_detect_blackhole(sk, expired);
285*6982826fSMatthieu Baerts (NGI0) mptcp_active_detect_blackhole(sk, expired);
286f89013f6SLawrence Brakmo
287f89013f6SLawrence Brakmo if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
288f89013f6SLawrence Brakmo tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
289f89013f6SLawrence Brakmo icsk->icsk_retransmits,
290f89013f6SLawrence Brakmo icsk->icsk_rto, (int)expired);
291f89013f6SLawrence Brakmo
292ce682ef6SEric Dumazet if (expired) {
2931da177e4SLinus Torvalds /* Has it gone just too far? */
2941da177e4SLinus Torvalds tcp_write_err(sk);
2951da177e4SLinus Torvalds return 1;
2961da177e4SLinus Torvalds }
297f89013f6SLawrence Brakmo
2989c30ae83SYuchung Cheng if (sk_rethink_txhash(sk)) {
2999c30ae83SYuchung Cheng tp->timeout_rehash++;
3009c30ae83SYuchung Cheng __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH);
3019c30ae83SYuchung Cheng }
3029c30ae83SYuchung Cheng
3031da177e4SLinus Torvalds return 0;
3041da177e4SLinus Torvalds }
3051da177e4SLinus Torvalds
306c10d9310SEric Dumazet /* Called with BH disabled */
tcp_delack_timer_handler(struct sock * sk)3076f458dfbSEric Dumazet void tcp_delack_timer_handler(struct sock *sk)
3081da177e4SLinus Torvalds {
309463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
31030c6f0bfSfuyuanli struct tcp_sock *tp = tcp_sk(sk);
3111da177e4SLinus Torvalds
31230c6f0bfSfuyuanli if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
31330c6f0bfSfuyuanli return;
31430c6f0bfSfuyuanli
31530c6f0bfSfuyuanli /* Handling the sack compression case */
31630c6f0bfSfuyuanli if (tp->compressed_ack) {
31730c6f0bfSfuyuanli tcp_mstamp_refresh(tp);
31830c6f0bfSfuyuanli tcp_sack_compress_send_ack(sk);
31930c6f0bfSfuyuanli return;
32030c6f0bfSfuyuanli }
32130c6f0bfSfuyuanli
32230c6f0bfSfuyuanli if (!(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
3234890b686SEric Dumazet return;
3241da177e4SLinus Torvalds
325463c84b9SArnaldo Carvalho de Melo if (time_after(icsk->icsk_ack.timeout, jiffies)) {
326463c84b9SArnaldo Carvalho de Melo sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
3274890b686SEric Dumazet return;
3281da177e4SLinus Torvalds }
329463c84b9SArnaldo Carvalho de Melo icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
3301da177e4SLinus Torvalds
331463c84b9SArnaldo Carvalho de Melo if (inet_csk_ack_scheduled(sk)) {
33231954cd8SWei Wang if (!inet_csk_in_pingpong_mode(sk)) {
3331da177e4SLinus Torvalds /* Delayed ACK missed: inflate ATO. */
33495b9a87cSDavid Morley icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, icsk->icsk_rto);
3351da177e4SLinus Torvalds } else {
3361da177e4SLinus Torvalds /* Delayed ACK missed: leave pingpong mode and
3371da177e4SLinus Torvalds * deflate ATO.
3381da177e4SLinus Torvalds */
33931954cd8SWei Wang inet_csk_exit_pingpong_mode(sk);
340463c84b9SArnaldo Carvalho de Melo icsk->icsk_ack.ato = TCP_ATO_MIN;
3411da177e4SLinus Torvalds }
34230c6f0bfSfuyuanli tcp_mstamp_refresh(tp);
3431da177e4SLinus Torvalds tcp_send_ack(sk);
34402a1d6e7SEric Dumazet __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
3451da177e4SLinus Torvalds }
3466f458dfbSEric Dumazet }
3476f458dfbSEric Dumazet
348c380d37eSRichard Sailer
349c380d37eSRichard Sailer /**
350c380d37eSRichard Sailer * tcp_delack_timer() - The TCP delayed ACK timeout handler
3513628e3cbSAndrew Lunn * @t: Pointer to the timer. (gets casted to struct sock *)
352c380d37eSRichard Sailer *
353c380d37eSRichard Sailer * This function gets (indirectly) called when the kernel timer for a TCP packet
354c380d37eSRichard Sailer * of this socket expires. Calls tcp_delack_timer_handler() to do the actual work.
355c380d37eSRichard Sailer *
356c380d37eSRichard Sailer * Returns: Nothing (void)
357c380d37eSRichard Sailer */
tcp_delack_timer(struct timer_list * t)35859f379f9SKees Cook static void tcp_delack_timer(struct timer_list *t)
3596f458dfbSEric Dumazet {
36059f379f9SKees Cook struct inet_connection_sock *icsk =
36159f379f9SKees Cook from_timer(icsk, t, icsk_delack_timer);
36259f379f9SKees Cook struct sock *sk = &icsk->icsk_inet.sk;
3636f458dfbSEric Dumazet
3646f458dfbSEric Dumazet bh_lock_sock(sk);
3656f458dfbSEric Dumazet if (!sock_owned_by_user(sk)) {
3666f458dfbSEric Dumazet tcp_delack_timer_handler(sk);
3676f458dfbSEric Dumazet } else {
36802a1d6e7SEric Dumazet __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
3696f458dfbSEric Dumazet /* deleguate our work to tcp_release_cb() */
3707aa5470cSEric Dumazet if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
371144d56e9SEric Dumazet sock_hold(sk);
3726f458dfbSEric Dumazet }
3731da177e4SLinus Torvalds bh_unlock_sock(sk);
3741da177e4SLinus Torvalds sock_put(sk);
3751da177e4SLinus Torvalds }
3761da177e4SLinus Torvalds
tcp_probe_timer(struct sock * sk)3771da177e4SLinus Torvalds static void tcp_probe_timer(struct sock *sk)
3781da177e4SLinus Torvalds {
3796687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
38075c119afSEric Dumazet struct sk_buff *skb = tcp_send_head(sk);
3811da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk);
3821da177e4SLinus Torvalds int max_probes;
3831da177e4SLinus Torvalds
38475c119afSEric Dumazet if (tp->packets_out || !skb) {
3856687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out = 0;
3869d9b1ee0SEnke Chen icsk->icsk_probes_tstamp = 0;
3871da177e4SLinus Torvalds return;
3881da177e4SLinus Torvalds }
3891da177e4SLinus Torvalds
390b248230cSYuchung Cheng /* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as
391b248230cSYuchung Cheng * long as the receiver continues to respond probes. We support this by
392b248230cSYuchung Cheng * default and reset icsk_probes_out with incoming ACKs. But if the
393b248230cSYuchung Cheng * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we
394b248230cSYuchung Cheng * kill the socket when the retry count and the time exceeds the
395b248230cSYuchung Cheng * corresponding system limit. We also implement similar policy when
396b248230cSYuchung Cheng * we use RTO to probe window in tcp_retransmit_timer().
3971da177e4SLinus Torvalds */
398d58f2e15SEric Dumazet if (!icsk->icsk_probes_tstamp) {
3999d9b1ee0SEnke Chen icsk->icsk_probes_tstamp = tcp_jiffies32;
400d58f2e15SEric Dumazet } else {
401d58f2e15SEric Dumazet u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
4021da177e4SLinus Torvalds
403d58f2e15SEric Dumazet if (user_timeout &&
404d58f2e15SEric Dumazet (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
405d58f2e15SEric Dumazet msecs_to_jiffies(user_timeout))
406d58f2e15SEric Dumazet goto abort;
407d58f2e15SEric Dumazet }
40839e24435SKuniyuki Iwashima max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
4091da177e4SLinus Torvalds if (sock_flag(sk, SOCK_DEAD)) {
4107533ce30SRichard Sailer const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
4111da177e4SLinus Torvalds
4121da177e4SLinus Torvalds max_probes = tcp_orphan_retries(sk, alive);
413b248230cSYuchung Cheng if (!alive && icsk->icsk_backoff >= max_probes)
414b248230cSYuchung Cheng goto abort;
415b248230cSYuchung Cheng if (tcp_out_of_resources(sk, true))
4161da177e4SLinus Torvalds return;
4171da177e4SLinus Torvalds }
4181da177e4SLinus Torvalds
4193976535aSYuchung Cheng if (icsk->icsk_probes_out >= max_probes) {
420b248230cSYuchung Cheng abort: tcp_write_err(sk);
4211da177e4SLinus Torvalds } else {
4221da177e4SLinus Torvalds /* Only send another probe if we didn't close things up. */
4231da177e4SLinus Torvalds tcp_send_probe0(sk);
4241da177e4SLinus Torvalds }
4251da177e4SLinus Torvalds }
4261da177e4SLinus Torvalds
tcp_update_rto_stats(struct sock * sk)4273868ab0fSAananth V static void tcp_update_rto_stats(struct sock *sk)
4283868ab0fSAananth V {
4293868ab0fSAananth V struct inet_connection_sock *icsk = inet_csk(sk);
4303868ab0fSAananth V struct tcp_sock *tp = tcp_sk(sk);
4313868ab0fSAananth V
4323868ab0fSAananth V if (!icsk->icsk_retransmits) {
4333868ab0fSAananth V tp->total_rto_recoveries++;
43499d67955SEric Dumazet tp->rto_stamp = tcp_time_stamp_ms(tp);
4353868ab0fSAananth V }
4363868ab0fSAananth V icsk->icsk_retransmits++;
4373868ab0fSAananth V tp->total_rto++;
4383868ab0fSAananth V }
4393868ab0fSAananth V
4401da177e4SLinus Torvalds /*
4418336886fSJerry Chu * Timer for Fast Open socket to retransmit SYNACK. Note that the
4428336886fSJerry Chu * sk here is the child socket, not the parent (listener) socket.
4438336886fSJerry Chu */
tcp_fastopen_synack_timer(struct sock * sk,struct request_sock * req)444d983ea6fSEric Dumazet static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
4458336886fSJerry Chu {
4468336886fSJerry Chu struct inet_connection_sock *icsk = inet_csk(sk);
447c7d13c8fSYuchung Cheng struct tcp_sock *tp = tcp_sk(sk);
44820a3b1c0SKuniyuki Iwashima int max_retries;
4498336886fSJerry Chu
45042cb80a2SEric Dumazet req->rsk_ops->syn_ack_timeout(req);
4518336886fSJerry Chu
452d44fd4a7SEric Dumazet /* Add one more retry for fastopen.
453d44fd4a7SEric Dumazet * Paired with WRITE_ONCE() in tcp_sock_set_syncnt()
454d44fd4a7SEric Dumazet */
455d44fd4a7SEric Dumazet max_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
45620a3b1c0SKuniyuki Iwashima READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
45720a3b1c0SKuniyuki Iwashima
458e6c022a4SEric Dumazet if (req->num_timeout >= max_retries) {
4598336886fSJerry Chu tcp_write_err(sk);
4608336886fSJerry Chu return;
4618336886fSJerry Chu }
4628c3cfe19SYuchung Cheng /* Lower cwnd after certain SYNACK timeout like tcp_init_transfer() */
4638c3cfe19SYuchung Cheng if (icsk->icsk_retransmits == 1)
4648c3cfe19SYuchung Cheng tcp_enter_loss(sk);
4658336886fSJerry Chu /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
4668336886fSJerry Chu * returned from rtx_syn_ack() to make it more persistent like
4678336886fSJerry Chu * regular retransmit because if the child socket has been accepted
4688336886fSJerry Chu * it's not good to give up too easily.
4698336886fSJerry Chu */
470e6c022a4SEric Dumazet inet_rtx_syn_ack(sk, req);
471e6c022a4SEric Dumazet req->num_timeout++;
4723868ab0fSAananth V tcp_update_rto_stats(sk);
473c7d13c8fSYuchung Cheng if (!tp->retrans_stamp)
4749d0c00f5SEric Dumazet tp->retrans_stamp = tcp_time_stamp_ts(tp);
4758336886fSJerry Chu inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
4768ea731d4SJie Meng req->timeout << req->num_timeout, TCP_RTO_MAX);
4778336886fSJerry Chu }
4788336886fSJerry Chu
tcp_rtx_probe0_timed_out(const struct sock * sk,const struct sk_buff * skb,u32 rtx_delta)479e89688e3SMenglong Dong static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
480614e8316SEric Dumazet const struct sk_buff *skb,
481614e8316SEric Dumazet u32 rtx_delta)
482e89688e3SMenglong Dong {
48397a90635SEric Dumazet const struct inet_connection_sock *icsk = inet_csk(sk);
48497a90635SEric Dumazet u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
485e89688e3SMenglong Dong const struct tcp_sock *tp = tcp_sk(sk);
48697a90635SEric Dumazet int timeout = TCP_RTO_MAX * 2;
48736534d3cSEric Dumazet s32 rcv_delta;
488e89688e3SMenglong Dong
48997a90635SEric Dumazet if (user_timeout) {
49097a90635SEric Dumazet /* If user application specified a TCP_USER_TIMEOUT,
49197a90635SEric Dumazet * it does not want win 0 packets to 'reset the timer'
49297a90635SEric Dumazet * while retransmits are not making progress.
49397a90635SEric Dumazet */
49497a90635SEric Dumazet if (rtx_delta > user_timeout)
49597a90635SEric Dumazet return true;
49697a90635SEric Dumazet timeout = min_t(u32, timeout, msecs_to_jiffies(user_timeout));
49797a90635SEric Dumazet }
49836534d3cSEric Dumazet /* Note: timer interrupt might have been delayed by at least one jiffy,
49936534d3cSEric Dumazet * and tp->rcv_tstamp might very well have been written recently.
50036534d3cSEric Dumazet * rcv_delta can thus be negative.
50136534d3cSEric Dumazet */
50297a90635SEric Dumazet rcv_delta = icsk->icsk_timeout - tp->rcv_tstamp;
503e89688e3SMenglong Dong if (rcv_delta <= timeout)
504e89688e3SMenglong Dong return false;
505e89688e3SMenglong Dong
506614e8316SEric Dumazet return msecs_to_jiffies(rtx_delta) > timeout;
507e89688e3SMenglong Dong }
5081da177e4SLinus Torvalds
509c380d37eSRichard Sailer /**
510c380d37eSRichard Sailer * tcp_retransmit_timer() - The TCP retransmit timeout handler
511c380d37eSRichard Sailer * @sk: Pointer to the current socket.
512c380d37eSRichard Sailer *
513c380d37eSRichard Sailer * This function gets called when the kernel timer for a TCP packet
514c380d37eSRichard Sailer * of this socket expires.
515c380d37eSRichard Sailer *
516974d8f86SZheng Yongjun * It handles retransmission, timer adjustment and other necessary measures.
517c380d37eSRichard Sailer *
518c380d37eSRichard Sailer * Returns: Nothing (void)
519c380d37eSRichard Sailer */
tcp_retransmit_timer(struct sock * sk)520f1ecd5d9SDamian Lukowski void tcp_retransmit_timer(struct sock *sk)
5211da177e4SLinus Torvalds {
5221da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk);
523ae5c3f40SNikolay Borisov struct net *net = sock_net(sk);
524463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
525d983ea6fSEric Dumazet struct request_sock *req;
5260d580fbdSEric Dumazet struct sk_buff *skb;
5271da177e4SLinus Torvalds
528d983ea6fSEric Dumazet req = rcu_dereference_protected(tp->fastopen_rsk,
529d983ea6fSEric Dumazet lockdep_sock_is_held(sk));
530d983ea6fSEric Dumazet if (req) {
53137561f68SJerry Chu WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
5328336886fSJerry Chu sk->sk_state != TCP_FIN_WAIT1);
533d983ea6fSEric Dumazet tcp_fastopen_synack_timer(sk, req);
5348336886fSJerry Chu /* Before we receive ACK to our SYN-ACK don't retransmit
5358336886fSJerry Chu * anything else (e.g., data or FIN segments).
5368336886fSJerry Chu */
5378336886fSJerry Chu return;
5388336886fSJerry Chu }
5390d580fbdSEric Dumazet
5400d580fbdSEric Dumazet if (!tp->packets_out)
5410d580fbdSEric Dumazet return;
5420d580fbdSEric Dumazet
5430d580fbdSEric Dumazet skb = tcp_rtx_queue_head(sk);
5440d580fbdSEric Dumazet if (WARN_ON_ONCE(!skb))
54588f8598dSYuchung Cheng return;
5461da177e4SLinus Torvalds
5471da177e4SLinus Torvalds if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
5481da177e4SLinus Torvalds !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
5491da177e4SLinus Torvalds /* Receiver dastardly shrinks window. Our retransmits
5501da177e4SLinus Torvalds * become zero probes, but we should not timeout this
5511da177e4SLinus Torvalds * connection. If the socket is an orphan, time it out,
5521da177e4SLinus Torvalds * we cannot allow such beasts to hang infinitely.
5531da177e4SLinus Torvalds */
5541da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk);
555031c44b7SMenglong Dong u32 rtx_delta;
556031c44b7SMenglong Dong
557614e8316SEric Dumazet rtx_delta = tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?:
558614e8316SEric Dumazet tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb));
559614e8316SEric Dumazet if (tp->tcp_usec_ts)
560614e8316SEric Dumazet rtx_delta /= USEC_PER_MSEC;
561614e8316SEric Dumazet
562569508c9SYOSHIFUJI Hideaki if (sk->sk_family == AF_INET) {
563031c44b7SMenglong Dong net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n",
564031c44b7SMenglong Dong &inet->inet_daddr, ntohs(inet->inet_dport),
565031c44b7SMenglong Dong inet->inet_num, tp->snd_una, tp->snd_nxt,
566031c44b7SMenglong Dong jiffies_to_msecs(jiffies - tp->rcv_tstamp),
567031c44b7SMenglong Dong rtx_delta);
5681da177e4SLinus Torvalds }
569dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
570569508c9SYOSHIFUJI Hideaki else if (sk->sk_family == AF_INET6) {
571031c44b7SMenglong Dong net_dbg_ratelimited("Probing zero-window on %pI6:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n",
572031c44b7SMenglong Dong &sk->sk_v6_daddr, ntohs(inet->inet_dport),
573031c44b7SMenglong Dong inet->inet_num, tp->snd_una, tp->snd_nxt,
574031c44b7SMenglong Dong jiffies_to_msecs(jiffies - tp->rcv_tstamp),
575031c44b7SMenglong Dong rtx_delta);
576569508c9SYOSHIFUJI Hideaki }
577569508c9SYOSHIFUJI Hideaki #endif
578614e8316SEric Dumazet if (tcp_rtx_probe0_timed_out(sk, skb, rtx_delta)) {
5791da177e4SLinus Torvalds tcp_write_err(sk);
5801da177e4SLinus Torvalds goto out;
5811da177e4SLinus Torvalds }
5825ae344c9SNeal Cardwell tcp_enter_loss(sk);
5830d580fbdSEric Dumazet tcp_retransmit_skb(sk, skb, 1);
5841da177e4SLinus Torvalds __sk_dst_reset(sk);
5851da177e4SLinus Torvalds goto out_reset_timer;
5861da177e4SLinus Torvalds }
5871da177e4SLinus Torvalds
588e1561fe2SYuchung Cheng __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
5891da177e4SLinus Torvalds if (tcp_write_timeout(sk))
5901da177e4SLinus Torvalds goto out;
5911da177e4SLinus Torvalds
592463c84b9SArnaldo Carvalho de Melo if (icsk->icsk_retransmits == 0) {
593e1561fe2SYuchung Cheng int mib_idx = 0;
59440b215e5SPavel Emelyanov
595c60ce4e2SIlpo Järvinen if (icsk->icsk_ca_state == TCP_CA_Recovery) {
596bc079e9eSIlpo Järvinen if (tcp_is_sack(tp))
597bc079e9eSIlpo Järvinen mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
598bc079e9eSIlpo Järvinen else
599bc079e9eSIlpo Järvinen mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
6006687e988SArnaldo Carvalho de Melo } else if (icsk->icsk_ca_state == TCP_CA_Loss) {
60140b215e5SPavel Emelyanov mib_idx = LINUX_MIB_TCPLOSSFAILURES;
602c60ce4e2SIlpo Järvinen } else if ((icsk->icsk_ca_state == TCP_CA_Disorder) ||
603c60ce4e2SIlpo Järvinen tp->sacked_out) {
604c60ce4e2SIlpo Järvinen if (tcp_is_sack(tp))
605c60ce4e2SIlpo Järvinen mib_idx = LINUX_MIB_TCPSACKFAILURES;
606c60ce4e2SIlpo Järvinen else
607c60ce4e2SIlpo Järvinen mib_idx = LINUX_MIB_TCPRENOFAILURES;
6081da177e4SLinus Torvalds }
609e1561fe2SYuchung Cheng if (mib_idx)
61002a1d6e7SEric Dumazet __NET_INC_STATS(sock_net(sk), mib_idx);
6111da177e4SLinus Torvalds }
6121da177e4SLinus Torvalds
6135ae344c9SNeal Cardwell tcp_enter_loss(sk);
6141da177e4SLinus Torvalds
6153868ab0fSAananth V tcp_update_rto_stats(sk);
61675c119afSEric Dumazet if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
6171da177e4SLinus Torvalds /* Retransmission failed because of local congestion,
618590d2026SYuchung Cheng * Let senders fight for local resources conservatively.
6191da177e4SLinus Torvalds */
620463c84b9SArnaldo Carvalho de Melo inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
621590d2026SYuchung Cheng TCP_RESOURCE_PROBE_INTERVAL,
6223f421baaSArnaldo Carvalho de Melo TCP_RTO_MAX);
6231da177e4SLinus Torvalds goto out;
6241da177e4SLinus Torvalds }
6251da177e4SLinus Torvalds
6261da177e4SLinus Torvalds /* Increase the timeout each time we retransmit. Note that
6271da177e4SLinus Torvalds * we do not increase the rtt estimate. rto is initialized
6281da177e4SLinus Torvalds * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests
6291da177e4SLinus Torvalds * that doubling rto each time is the least we can get away with.
6301da177e4SLinus Torvalds * In KA9Q, Karn uses this for the first few times, and then
6311da177e4SLinus Torvalds * goes to quadratic. netBSD doubles, but only goes up to *64,
6321da177e4SLinus Torvalds * and clamps at 1 to 64 sec afterwards. Note that 120 sec is
6331da177e4SLinus Torvalds * defined in the protocol as the maximum possible RTT. I guess
6341da177e4SLinus Torvalds * we'll have to use something other than TCP to talk to the
6351da177e4SLinus Torvalds * University of Mars.
6361da177e4SLinus Torvalds *
6371da177e4SLinus Torvalds * PAWS allows us longer timeouts and large windows, so once
6381da177e4SLinus Torvalds * implemented ftp to mars will work nicely. We will have to fix
6391da177e4SLinus Torvalds * the 120 second clamps though!
6401da177e4SLinus Torvalds */
6411da177e4SLinus Torvalds
6421da177e4SLinus Torvalds out_reset_timer:
64336e31b0aSAndreas Petlund /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
64436e31b0aSAndreas Petlund * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
64536e31b0aSAndreas Petlund * might be increased if the stream oscillates between thin and thick,
64636e31b0aSAndreas Petlund * thus the old value might already be too high compared to the value
64736e31b0aSAndreas Petlund * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
64836e31b0aSAndreas Petlund * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
64936e31b0aSAndreas Petlund * exponential backoff behaviour to avoid continue hammering
65036e31b0aSAndreas Petlund * linear-timeout retransmissions into a black hole
65136e31b0aSAndreas Petlund */
65236e31b0aSAndreas Petlund if (sk->sk_state == TCP_ESTABLISHED &&
6537c6f2a86SKuniyuki Iwashima (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
65436e31b0aSAndreas Petlund tcp_stream_is_thin(tp) &&
65536e31b0aSAndreas Petlund icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
65636e31b0aSAndreas Petlund icsk->icsk_backoff = 0;
657e4dd0d3aSJason Xing icsk->icsk_rto = clamp(__tcp_set_rto(tp),
658e4dd0d3aSJason Xing tcp_rto_min(sk),
659e4dd0d3aSJason Xing TCP_RTO_MAX);
660ccce324dSDavid Morley } else if (sk->sk_state != TCP_SYN_SENT ||
66114dd92d0SEric Dumazet tp->total_rto >
662ccce324dSDavid Morley READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
663ccce324dSDavid Morley /* Use normal (exponential) backoff unless linear timeouts are
664ccce324dSDavid Morley * activated.
665ccce324dSDavid Morley */
66614dd92d0SEric Dumazet icsk->icsk_backoff++;
667463c84b9SArnaldo Carvalho de Melo icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
66836e31b0aSAndreas Petlund }
669b701a99eSJon Maxwell inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
670b701a99eSJon Maxwell tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
67139e24435SKuniyuki Iwashima if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
6721da177e4SLinus Torvalds __sk_dst_reset(sk);
6731da177e4SLinus Torvalds
6741da177e4SLinus Torvalds out:;
6751da177e4SLinus Torvalds }
6761da177e4SLinus Torvalds
677c380d37eSRichard Sailer /* Called with bottom-half processing disabled.
678c380d37eSRichard Sailer Called by tcp_write_timer() */
tcp_write_timer_handler(struct sock * sk)6796f458dfbSEric Dumazet void tcp_write_timer_handler(struct sock *sk)
6801da177e4SLinus Torvalds {
681463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
6821da177e4SLinus Torvalds int event;
6831da177e4SLinus Torvalds
68402b2faafSEric Dumazet if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
68502b2faafSEric Dumazet !icsk->icsk_pending)
6864890b686SEric Dumazet return;
6871da177e4SLinus Torvalds
688463c84b9SArnaldo Carvalho de Melo if (time_after(icsk->icsk_timeout, jiffies)) {
689463c84b9SArnaldo Carvalho de Melo sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
6904890b686SEric Dumazet return;
6911da177e4SLinus Torvalds }
6921da177e4SLinus Torvalds
6939a568de4SEric Dumazet tcp_mstamp_refresh(tcp_sk(sk));
694463c84b9SArnaldo Carvalho de Melo event = icsk->icsk_pending;
6951da177e4SLinus Torvalds
6961da177e4SLinus Torvalds switch (event) {
69757dde7f7SYuchung Cheng case ICSK_TIME_REO_TIMEOUT:
69857dde7f7SYuchung Cheng tcp_rack_reo_timeout(sk);
69957dde7f7SYuchung Cheng break;
7006ba8a3b1SNandita Dukkipati case ICSK_TIME_LOSS_PROBE:
7016ba8a3b1SNandita Dukkipati tcp_send_loss_probe(sk);
7026ba8a3b1SNandita Dukkipati break;
703463c84b9SArnaldo Carvalho de Melo case ICSK_TIME_RETRANS:
7046ba8a3b1SNandita Dukkipati icsk->icsk_pending = 0;
7051da177e4SLinus Torvalds tcp_retransmit_timer(sk);
7061da177e4SLinus Torvalds break;
707463c84b9SArnaldo Carvalho de Melo case ICSK_TIME_PROBE0:
7086ba8a3b1SNandita Dukkipati icsk->icsk_pending = 0;
7091da177e4SLinus Torvalds tcp_probe_timer(sk);
7101da177e4SLinus Torvalds break;
7111da177e4SLinus Torvalds }
7126f458dfbSEric Dumazet }
7136f458dfbSEric Dumazet
tcp_write_timer(struct timer_list * t)71459f379f9SKees Cook static void tcp_write_timer(struct timer_list *t)
7156f458dfbSEric Dumazet {
71659f379f9SKees Cook struct inet_connection_sock *icsk =
71759f379f9SKees Cook from_timer(icsk, t, icsk_retransmit_timer);
71859f379f9SKees Cook struct sock *sk = &icsk->icsk_inet.sk;
7196f458dfbSEric Dumazet
7206f458dfbSEric Dumazet bh_lock_sock(sk);
7216f458dfbSEric Dumazet if (!sock_owned_by_user(sk)) {
7226f458dfbSEric Dumazet tcp_write_timer_handler(sk);
7236f458dfbSEric Dumazet } else {
724c380d37eSRichard Sailer /* delegate our work to tcp_release_cb() */
7257aa5470cSEric Dumazet if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
726144d56e9SEric Dumazet sock_hold(sk);
7276f458dfbSEric Dumazet }
7281da177e4SLinus Torvalds bh_unlock_sock(sk);
7291da177e4SLinus Torvalds sock_put(sk);
7301da177e4SLinus Torvalds }
7311da177e4SLinus Torvalds
tcp_syn_ack_timeout(const struct request_sock * req)73242cb80a2SEric Dumazet void tcp_syn_ack_timeout(const struct request_sock *req)
73372659eccSOctavian Purdila {
73442cb80a2SEric Dumazet struct net *net = read_pnet(&inet_rsk(req)->ireq_net);
73542cb80a2SEric Dumazet
73602a1d6e7SEric Dumazet __NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS);
73772659eccSOctavian Purdila }
73872659eccSOctavian Purdila EXPORT_SYMBOL(tcp_syn_ack_timeout);
73972659eccSOctavian Purdila
tcp_set_keepalive(struct sock * sk,int val)7401da177e4SLinus Torvalds void tcp_set_keepalive(struct sock *sk, int val)
7411da177e4SLinus Torvalds {
7421da177e4SLinus Torvalds if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
7431da177e4SLinus Torvalds return;
7441da177e4SLinus Torvalds
7451da177e4SLinus Torvalds if (val && !sock_flag(sk, SOCK_KEEPOPEN))
746463c84b9SArnaldo Carvalho de Melo inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
7471da177e4SLinus Torvalds else if (!val)
748463c84b9SArnaldo Carvalho de Melo inet_csk_delete_keepalive_timer(sk);
7491da177e4SLinus Torvalds }
7504b9d07a4SUrsula Braun EXPORT_SYMBOL_GPL(tcp_set_keepalive);
7511da177e4SLinus Torvalds
7521da177e4SLinus Torvalds
tcp_keepalive_timer(struct timer_list * t)75359f379f9SKees Cook static void tcp_keepalive_timer (struct timer_list *t)
7541da177e4SLinus Torvalds {
75559f379f9SKees Cook struct sock *sk = from_timer(sk, t, sk_timer);
7566687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk);
7571da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk);
7586c37e5deSFlavio Leitner u32 elapsed;
7591da177e4SLinus Torvalds
7601da177e4SLinus Torvalds /* Only process if socket is not in use. */
7611da177e4SLinus Torvalds bh_lock_sock(sk);
7621da177e4SLinus Torvalds if (sock_owned_by_user(sk)) {
7631da177e4SLinus Torvalds /* Try again later. */
764463c84b9SArnaldo Carvalho de Melo inet_csk_reset_keepalive_timer (sk, HZ/20);
7651da177e4SLinus Torvalds goto out;
7661da177e4SLinus Torvalds }
7671da177e4SLinus Torvalds
7681da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) {
769fa76ce73SEric Dumazet pr_err("Hmm... keepalive on a LISTEN ???\n");
7701da177e4SLinus Torvalds goto out;
7711da177e4SLinus Torvalds }
7721da177e4SLinus Torvalds
7734688eb7cSEric Dumazet tcp_mstamp_refresh(tp);
7741da177e4SLinus Torvalds if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
775a81722ddSEric Dumazet if (READ_ONCE(tp->linger2) >= 0) {
776463c84b9SArnaldo Carvalho de Melo const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
7771da177e4SLinus Torvalds
7781da177e4SLinus Torvalds if (tmo > 0) {
7791da177e4SLinus Torvalds tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
7801da177e4SLinus Torvalds goto out;
7811da177e4SLinus Torvalds }
7821da177e4SLinus Torvalds }
783edefba66SJason Xing tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_STATE);
7841da177e4SLinus Torvalds goto death;
7851da177e4SLinus Torvalds }
7861da177e4SLinus Torvalds
7872dda6400SEric Dumazet if (!sock_flag(sk, SOCK_KEEPOPEN) ||
7882dda6400SEric Dumazet ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
7891da177e4SLinus Torvalds goto out;
7901da177e4SLinus Torvalds
7911da177e4SLinus Torvalds elapsed = keepalive_time_when(tp);
7921da177e4SLinus Torvalds
7931da177e4SLinus Torvalds /* It is alive without keepalive 8) */
79475c119afSEric Dumazet if (tp->packets_out || !tcp_write_queue_empty(sk))
7951da177e4SLinus Torvalds goto resched;
7961da177e4SLinus Torvalds
7976c37e5deSFlavio Leitner elapsed = keepalive_time_elapsed(tp);
7981da177e4SLinus Torvalds
7991da177e4SLinus Torvalds if (elapsed >= keepalive_time_when(tp)) {
800d58f2e15SEric Dumazet u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
801d58f2e15SEric Dumazet
802dca43c75SJerry Chu /* If the TCP_USER_TIMEOUT option is enabled, use that
803dca43c75SJerry Chu * to determine when to timeout instead.
804dca43c75SJerry Chu */
805d58f2e15SEric Dumazet if ((user_timeout != 0 &&
806d58f2e15SEric Dumazet elapsed >= msecs_to_jiffies(user_timeout) &&
807dca43c75SJerry Chu icsk->icsk_probes_out > 0) ||
808d58f2e15SEric Dumazet (user_timeout == 0 &&
809dca43c75SJerry Chu icsk->icsk_probes_out >= keepalive_probes(tp))) {
8105691276bSJason Xing tcp_send_active_reset(sk, GFP_ATOMIC,
8110a399892SJason Xing SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT);
8121da177e4SLinus Torvalds tcp_write_err(sk);
8131da177e4SLinus Torvalds goto out;
8141da177e4SLinus Torvalds }
815e520af48SEric Dumazet if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
8166687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out++;
8171da177e4SLinus Torvalds elapsed = keepalive_intvl_when(tp);
8181da177e4SLinus Torvalds } else {
8191da177e4SLinus Torvalds /* If keepalive was lost due to local congestion,
8201da177e4SLinus Torvalds * try harder.
8211da177e4SLinus Torvalds */
8221da177e4SLinus Torvalds elapsed = TCP_RESOURCE_PROBE_INTERVAL;
8231da177e4SLinus Torvalds }
8241da177e4SLinus Torvalds } else {
8251da177e4SLinus Torvalds /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
8261da177e4SLinus Torvalds elapsed = keepalive_time_when(tp) - elapsed;
8271da177e4SLinus Torvalds }
8281da177e4SLinus Torvalds
8291da177e4SLinus Torvalds resched:
830463c84b9SArnaldo Carvalho de Melo inet_csk_reset_keepalive_timer (sk, elapsed);
8311da177e4SLinus Torvalds goto out;
8321da177e4SLinus Torvalds
8331da177e4SLinus Torvalds death:
8341da177e4SLinus Torvalds tcp_done(sk);
8351da177e4SLinus Torvalds
8361da177e4SLinus Torvalds out:
8371da177e4SLinus Torvalds bh_unlock_sock(sk);
8381da177e4SLinus Torvalds sock_put(sk);
8391da177e4SLinus Torvalds }
8406f458dfbSEric Dumazet
tcp_compressed_ack_kick(struct hrtimer * timer)8415d9f4262SEric Dumazet static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
8425d9f4262SEric Dumazet {
8435d9f4262SEric Dumazet struct tcp_sock *tp = container_of(timer, struct tcp_sock, compressed_ack_timer);
8445d9f4262SEric Dumazet struct sock *sk = (struct sock *)tp;
8455d9f4262SEric Dumazet
8465d9f4262SEric Dumazet bh_lock_sock(sk);
8475d9f4262SEric Dumazet if (!sock_owned_by_user(sk)) {
8482b195850SEric Dumazet if (tp->compressed_ack) {
8492b195850SEric Dumazet /* Since we have to send one ack finally,
850974d8f86SZheng Yongjun * subtract one from tp->compressed_ack to keep
8512b195850SEric Dumazet * LINUX_MIB_TCPACKCOMPRESSED accurate.
8522b195850SEric Dumazet */
8532b195850SEric Dumazet tp->compressed_ack--;
8545d9f4262SEric Dumazet tcp_send_ack(sk);
8552b195850SEric Dumazet }
8565d9f4262SEric Dumazet } else {
8575d9f4262SEric Dumazet if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
8585d9f4262SEric Dumazet &sk->sk_tsq_flags))
8595d9f4262SEric Dumazet sock_hold(sk);
8605d9f4262SEric Dumazet }
8615d9f4262SEric Dumazet bh_unlock_sock(sk);
8625d9f4262SEric Dumazet
8635d9f4262SEric Dumazet sock_put(sk);
8645d9f4262SEric Dumazet
8655d9f4262SEric Dumazet return HRTIMER_NORESTART;
8665d9f4262SEric Dumazet }
8675d9f4262SEric Dumazet
tcp_init_xmit_timers(struct sock * sk)8686f458dfbSEric Dumazet void tcp_init_xmit_timers(struct sock *sk)
8696f458dfbSEric Dumazet {
8706f458dfbSEric Dumazet inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
8716f458dfbSEric Dumazet &tcp_keepalive_timer);
872fb420d5dSEric Dumazet hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
87373a6bab5SEric Dumazet HRTIMER_MODE_ABS_PINNED_SOFT);
874218af599SEric Dumazet tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
8755d9f4262SEric Dumazet
8765d9f4262SEric Dumazet hrtimer_init(&tcp_sk(sk)->compressed_ack_timer, CLOCK_MONOTONIC,
8775d9f4262SEric Dumazet HRTIMER_MODE_REL_PINNED_SOFT);
8785d9f4262SEric Dumazet tcp_sk(sk)->compressed_ack_timer.function = tcp_compressed_ack_kick;
8796f458dfbSEric Dumazet }
880