xref: /linux/net/ipv4/tcp_timer.c (revision 031c44b7527aec2f22ddaae4bcd8b085ff810ec4)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
41da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
51da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  *		Implementation of the Transmission Control Protocol(TCP).
81da177e4SLinus Torvalds  *
902c30a84SJesper Juhl  * Authors:	Ross Biro
101da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
111da177e4SLinus Torvalds  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
121da177e4SLinus Torvalds  *		Corey Minyard <wf-rch!minyard@relay.EU.net>
131da177e4SLinus Torvalds  *		Florian La Roche, <flla@stud.uni-sb.de>
141da177e4SLinus Torvalds  *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
151da177e4SLinus Torvalds  *		Linus Torvalds, <torvalds@cs.helsinki.fi>
161da177e4SLinus Torvalds  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
171da177e4SLinus Torvalds  *		Matthew Dillon, <dillon@apollo.west.oic.com>
181da177e4SLinus Torvalds  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
191da177e4SLinus Torvalds  *		Jorge Cwik, <jorge@laser.satlink.net>
201da177e4SLinus Torvalds  */
211da177e4SLinus Torvalds 
221da177e4SLinus Torvalds #include <linux/module.h>
235a0e3ad6STejun Heo #include <linux/gfp.h>
241da177e4SLinus Torvalds #include <net/tcp.h>
251da177e4SLinus Torvalds 
26b701a99eSJon Maxwell static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
27b701a99eSJon Maxwell {
28b701a99eSJon Maxwell 	struct inet_connection_sock *icsk = inet_csk(sk);
29d58f2e15SEric Dumazet 	u32 elapsed, start_ts, user_timeout;
309efdda4eSEric Dumazet 	s32 remaining;
31b701a99eSJon Maxwell 
327ae18975SYuchung Cheng 	start_ts = tcp_sk(sk)->retrans_stamp;
33d58f2e15SEric Dumazet 	user_timeout = READ_ONCE(icsk->icsk_user_timeout);
34d58f2e15SEric Dumazet 	if (!user_timeout)
35b701a99eSJon Maxwell 		return icsk->icsk_rto;
36b701a99eSJon Maxwell 	elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
37d58f2e15SEric Dumazet 	remaining = user_timeout - elapsed;
389efdda4eSEric Dumazet 	if (remaining <= 0)
39b701a99eSJon Maxwell 		return 1; /* user timeout has passed; fire ASAP */
409efdda4eSEric Dumazet 
419efdda4eSEric Dumazet 	return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
42b701a99eSJon Maxwell }
43b701a99eSJon Maxwell 
44344db93aSEnke Chen u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
45344db93aSEnke Chen {
46344db93aSEnke Chen 	struct inet_connection_sock *icsk = inet_csk(sk);
47d58f2e15SEric Dumazet 	u32 remaining, user_timeout;
48344db93aSEnke Chen 	s32 elapsed;
49344db93aSEnke Chen 
50d58f2e15SEric Dumazet 	user_timeout = READ_ONCE(icsk->icsk_user_timeout);
51d58f2e15SEric Dumazet 	if (!user_timeout || !icsk->icsk_probes_tstamp)
52344db93aSEnke Chen 		return when;
53344db93aSEnke Chen 
54344db93aSEnke Chen 	elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;
55344db93aSEnke Chen 	if (unlikely(elapsed < 0))
56344db93aSEnke Chen 		elapsed = 0;
57d58f2e15SEric Dumazet 	remaining = msecs_to_jiffies(user_timeout) - elapsed;
58344db93aSEnke Chen 	remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);
59344db93aSEnke Chen 
60344db93aSEnke Chen 	return min_t(u32, remaining, when);
61344db93aSEnke Chen }
62344db93aSEnke Chen 
63c380d37eSRichard Sailer /**
64c380d37eSRichard Sailer  *  tcp_write_err() - close socket and save error info
65c380d37eSRichard Sailer  *  @sk:  The socket the error has appeared on.
66c380d37eSRichard Sailer  *
67c380d37eSRichard Sailer  *  Returns: Nothing (void)
68c380d37eSRichard Sailer  */
69c380d37eSRichard Sailer 
701da177e4SLinus Torvalds static void tcp_write_err(struct sock *sk)
711da177e4SLinus Torvalds {
72e13ec3daSEric Dumazet 	WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT);
73e3ae2365SAlexander Aring 	sk_error_report(sk);
741da177e4SLinus Torvalds 
75e05836acSSoheil Hassas Yeganeh 	tcp_write_queue_purge(sk);
761da177e4SLinus Torvalds 	tcp_done(sk);
7702a1d6e7SEric Dumazet 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
781da177e4SLinus Torvalds }
791da177e4SLinus Torvalds 
80c380d37eSRichard Sailer /**
81c380d37eSRichard Sailer  *  tcp_out_of_resources() - Close socket if out of resources
82c380d37eSRichard Sailer  *  @sk:        pointer to current socket
83c380d37eSRichard Sailer  *  @do_reset:  send a last packet with reset flag
84c380d37eSRichard Sailer  *
85c380d37eSRichard Sailer  *  Do not allow orphaned sockets to eat all our resources.
861da177e4SLinus Torvalds  *  This is direct violation of TCP specs, but it is required
871da177e4SLinus Torvalds  *  to prevent DoS attacks. It is called when a retransmission timeout
881da177e4SLinus Torvalds  *  or zero probe timeout occurs on orphaned socket.
891da177e4SLinus Torvalds  *
904ee806d5SDan Streetman  *  Also close if our net namespace is exiting; in that case there is no
914ee806d5SDan Streetman  *  hope of ever communicating again since all netns interfaces are already
924ee806d5SDan Streetman  *  down (or about to be down), and we need to release our dst references,
934ee806d5SDan Streetman  *  which have been moved to the netns loopback interface, so the namespace
944ee806d5SDan Streetman  *  can finish exiting.  This condition is only possible if we are a kernel
954ee806d5SDan Streetman  *  socket, as those do not hold references to the namespace.
964ee806d5SDan Streetman  *
97caa20d9aSStephen Hemminger  *  Criteria is still not confirmed experimentally and may change.
981da177e4SLinus Torvalds  *  We kill the socket, if:
991da177e4SLinus Torvalds  *  1. If number of orphaned sockets exceeds an administratively configured
1001da177e4SLinus Torvalds  *     limit.
1011da177e4SLinus Torvalds  *  2. If we have strong memory pressure.
1024ee806d5SDan Streetman  *  3. If our net namespace is exiting.
1031da177e4SLinus Torvalds  */
104b248230cSYuchung Cheng static int tcp_out_of_resources(struct sock *sk, bool do_reset)
1051da177e4SLinus Torvalds {
1061da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
107ad1af0feSDavid S. Miller 	int shift = 0;
1081da177e4SLinus Torvalds 
1091da177e4SLinus Torvalds 	/* If peer does not open window for long time, or did not transmit
1101da177e4SLinus Torvalds 	 * anything for long time, penalize it. */
111d635fbe2SEric Dumazet 	if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
112ad1af0feSDavid S. Miller 		shift++;
1131da177e4SLinus Torvalds 
1141da177e4SLinus Torvalds 	/* If some dubious ICMP arrived, penalize even more. */
115cee1af82SEric Dumazet 	if (READ_ONCE(sk->sk_err_soft))
116ad1af0feSDavid S. Miller 		shift++;
1171da177e4SLinus Torvalds 
118efcdbf24SArun Sharma 	if (tcp_check_oom(sk, shift)) {
1191da177e4SLinus Torvalds 		/* Catch exceptional cases, when connection requires reset.
1201da177e4SLinus Torvalds 		 *      1. Last segment was sent recently. */
121d635fbe2SEric Dumazet 		if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
1221da177e4SLinus Torvalds 		    /*  2. Window is closed. */
1231da177e4SLinus Torvalds 		    (!tp->snd_wnd && !tp->packets_out))
124b248230cSYuchung Cheng 			do_reset = true;
1251da177e4SLinus Torvalds 		if (do_reset)
1261da177e4SLinus Torvalds 			tcp_send_active_reset(sk, GFP_ATOMIC);
1271da177e4SLinus Torvalds 		tcp_done(sk);
12802a1d6e7SEric Dumazet 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
1291da177e4SLinus Torvalds 		return 1;
1301da177e4SLinus Torvalds 	}
1314ee806d5SDan Streetman 
1324ee806d5SDan Streetman 	if (!check_net(sock_net(sk))) {
1334ee806d5SDan Streetman 		/* Not possible to send reset; just close */
1344ee806d5SDan Streetman 		tcp_done(sk);
1354ee806d5SDan Streetman 		return 1;
1364ee806d5SDan Streetman 	}
1374ee806d5SDan Streetman 
1381da177e4SLinus Torvalds 	return 0;
1391da177e4SLinus Torvalds }
1401da177e4SLinus Torvalds 
141c380d37eSRichard Sailer /**
142c380d37eSRichard Sailer  *  tcp_orphan_retries() - Returns maximal number of retries on an orphaned socket
143c380d37eSRichard Sailer  *  @sk:    Pointer to the current socket.
144c380d37eSRichard Sailer  *  @alive: bool, socket alive state
145c380d37eSRichard Sailer  */
1467533ce30SRichard Sailer static int tcp_orphan_retries(struct sock *sk, bool alive)
1471da177e4SLinus Torvalds {
14839e24435SKuniyuki Iwashima 	int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
1491da177e4SLinus Torvalds 
1501da177e4SLinus Torvalds 	/* We know from an ICMP that something is wrong. */
151cee1af82SEric Dumazet 	if (READ_ONCE(sk->sk_err_soft) && !alive)
1521da177e4SLinus Torvalds 		retries = 0;
1531da177e4SLinus Torvalds 
1541da177e4SLinus Torvalds 	/* However, if socket sent something recently, select some safe
1551da177e4SLinus Torvalds 	 * number of retries. 8 corresponds to >100 seconds with minimal
1561da177e4SLinus Torvalds 	 * RTO of 200msec. */
1571da177e4SLinus Torvalds 	if (retries == 0 && alive)
1581da177e4SLinus Torvalds 		retries = 8;
1591da177e4SLinus Torvalds 	return retries;
1601da177e4SLinus Torvalds }
1611da177e4SLinus Torvalds 
162ce55dd36SEric Dumazet static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
163ce55dd36SEric Dumazet {
164d0f36847SEric Dumazet 	const struct net *net = sock_net(sk);
165d0f36847SEric Dumazet 	int mss;
166b0f9ca53SFan Du 
167ce55dd36SEric Dumazet 	/* Black hole detection */
168f47d00e0SKuniyuki Iwashima 	if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
169d0f36847SEric Dumazet 		return;
170d0f36847SEric Dumazet 
171ce55dd36SEric Dumazet 	if (!icsk->icsk_mtup.enabled) {
172ce55dd36SEric Dumazet 		icsk->icsk_mtup.enabled = 1;
173c74df29aSEric Dumazet 		icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
174ce55dd36SEric Dumazet 	} else {
1758beb5c5fSEric Dumazet 		mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
17688d78bc0SKuniyuki Iwashima 		mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
1778e92d442SKuniyuki Iwashima 		mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor));
17878eb166cSKuniyuki Iwashima 		mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
179ce55dd36SEric Dumazet 		icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
180d0f36847SEric Dumazet 	}
181ce55dd36SEric Dumazet 	tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
182ce55dd36SEric Dumazet }
183ce55dd36SEric Dumazet 
18401a523b0SYuchung Cheng static unsigned int tcp_model_timeout(struct sock *sk,
18501a523b0SYuchung Cheng 				      unsigned int boundary,
18601a523b0SYuchung Cheng 				      unsigned int rto_base)
18701a523b0SYuchung Cheng {
18801a523b0SYuchung Cheng 	unsigned int linear_backoff_thresh, timeout;
189c380d37eSRichard Sailer 
19001a523b0SYuchung Cheng 	linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base);
19101a523b0SYuchung Cheng 	if (boundary <= linear_backoff_thresh)
19201a523b0SYuchung Cheng 		timeout = ((2 << boundary) - 1) * rto_base;
19301a523b0SYuchung Cheng 	else
19401a523b0SYuchung Cheng 		timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
19501a523b0SYuchung Cheng 			(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
19601a523b0SYuchung Cheng 	return jiffies_to_msecs(timeout);
19701a523b0SYuchung Cheng }
198c380d37eSRichard Sailer /**
199c380d37eSRichard Sailer  *  retransmits_timed_out() - returns true if this connection has timed out
200c380d37eSRichard Sailer  *  @sk:       The current socket
201c380d37eSRichard Sailer  *  @boundary: max number of retransmissions
202c380d37eSRichard Sailer  *  @timeout:  A custom timeout value.
203c380d37eSRichard Sailer  *             If set to 0 the default timeout is calculated and used.
204c380d37eSRichard Sailer  *             Using TCP_RTO_MIN and the number of unsuccessful retransmits.
205c380d37eSRichard Sailer  *
206c380d37eSRichard Sailer  * The default "timeout" value this function can calculate and use
207c380d37eSRichard Sailer  * is equivalent to the timeout of a TCP Connection
208c380d37eSRichard Sailer  * after "boundary" unsuccessful, exponentially backed-off
209ce682ef6SEric Dumazet  * retransmissions with an initial RTO of TCP_RTO_MIN.
2102f7de571SDamian Lukowski  */
2112f7de571SDamian Lukowski static bool retransmits_timed_out(struct sock *sk,
212dca43c75SJerry Chu 				  unsigned int boundary,
213ce682ef6SEric Dumazet 				  unsigned int timeout)
2142f7de571SDamian Lukowski {
21501a523b0SYuchung Cheng 	unsigned int start_ts;
2162f7de571SDamian Lukowski 
2172f7de571SDamian Lukowski 	if (!inet_csk(sk)->icsk_retransmits)
2182f7de571SDamian Lukowski 		return false;
2192f7de571SDamian Lukowski 
2207ae18975SYuchung Cheng 	start_ts = tcp_sk(sk)->retrans_stamp;
2213256a2d6SEric Dumazet 	if (likely(timeout == 0)) {
2223256a2d6SEric Dumazet 		unsigned int rto_base = TCP_RTO_MIN;
2233256a2d6SEric Dumazet 
2243256a2d6SEric Dumazet 		if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
2253256a2d6SEric Dumazet 			rto_base = tcp_timeout_init(sk);
2263256a2d6SEric Dumazet 		timeout = tcp_model_timeout(sk, boundary, rto_base);
2273256a2d6SEric Dumazet 	}
2282f7de571SDamian Lukowski 
2299efdda4eSEric Dumazet 	return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
2302f7de571SDamian Lukowski }
2312f7de571SDamian Lukowski 
2321da177e4SLinus Torvalds /* A write timeout has occurred. Process the after effects. */
2331da177e4SLinus Torvalds static int tcp_write_timeout(struct sock *sk)
2341da177e4SLinus Torvalds {
2355d424d5aSJohn Heffner 	struct inet_connection_sock *icsk = inet_csk(sk);
236c968601dSYuchung Cheng 	struct tcp_sock *tp = tcp_sk(sk);
2376fa25166SNikolay Borisov 	struct net *net = sock_net(sk);
238a41e8a88SEric Dumazet 	bool expired = false, do_reset;
239ccce324dSDavid Morley 	int retry_until, max_retransmits;
2401da177e4SLinus Torvalds 
2411da177e4SLinus Torvalds 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
2429c30ae83SYuchung Cheng 		if (icsk->icsk_retransmits)
2439c30ae83SYuchung Cheng 			__dst_negative_advice(sk);
244d44fd4a7SEric Dumazet 		/* Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */
245d44fd4a7SEric Dumazet 		retry_until = READ_ONCE(icsk->icsk_syn_retries) ? :
24620a3b1c0SKuniyuki Iwashima 			READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
247ccce324dSDavid Morley 
248ccce324dSDavid Morley 		max_retransmits = retry_until;
249ccce324dSDavid Morley 		if (sk->sk_state == TCP_SYN_SENT)
250ccce324dSDavid Morley 			max_retransmits += READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts);
251ccce324dSDavid Morley 
252ccce324dSDavid Morley 		expired = icsk->icsk_retransmits >= max_retransmits;
2531da177e4SLinus Torvalds 	} else {
25439e24435SKuniyuki Iwashima 		if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
2555d424d5aSJohn Heffner 			/* Black hole detection */
256ce55dd36SEric Dumazet 			tcp_mtu_probing(icsk, sk);
2571da177e4SLinus Torvalds 
2589c30ae83SYuchung Cheng 			__dst_negative_advice(sk);
2591da177e4SLinus Torvalds 		}
2601da177e4SLinus Torvalds 
26139e24435SKuniyuki Iwashima 		retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
2621da177e4SLinus Torvalds 		if (sock_flag(sk, SOCK_DEAD)) {
2637533ce30SRichard Sailer 			const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
2641da177e4SLinus Torvalds 
2651da177e4SLinus Torvalds 			retry_until = tcp_orphan_retries(sk, alive);
2666fa12c85SDamian Lukowski 			do_reset = alive ||
267ce682ef6SEric Dumazet 				!retransmits_timed_out(sk, retry_until, 0);
2681da177e4SLinus Torvalds 
2696fa12c85SDamian Lukowski 			if (tcp_out_of_resources(sk, do_reset))
2701da177e4SLinus Torvalds 				return 1;
2711da177e4SLinus Torvalds 		}
272a41e8a88SEric Dumazet 	}
273a41e8a88SEric Dumazet 	if (!expired)
274ce682ef6SEric Dumazet 		expired = retransmits_timed_out(sk, retry_until,
275d58f2e15SEric Dumazet 						READ_ONCE(icsk->icsk_user_timeout));
2767268586bSYuchung Cheng 	tcp_fastopen_active_detect_blackhole(sk, expired);
277f89013f6SLawrence Brakmo 
278f89013f6SLawrence Brakmo 	if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
279f89013f6SLawrence Brakmo 		tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
280f89013f6SLawrence Brakmo 				  icsk->icsk_retransmits,
281f89013f6SLawrence Brakmo 				  icsk->icsk_rto, (int)expired);
282f89013f6SLawrence Brakmo 
283ce682ef6SEric Dumazet 	if (expired) {
2841da177e4SLinus Torvalds 		/* Has it gone just too far? */
2851da177e4SLinus Torvalds 		tcp_write_err(sk);
2861da177e4SLinus Torvalds 		return 1;
2871da177e4SLinus Torvalds 	}
288f89013f6SLawrence Brakmo 
2899c30ae83SYuchung Cheng 	if (sk_rethink_txhash(sk)) {
2909c30ae83SYuchung Cheng 		tp->timeout_rehash++;
2919c30ae83SYuchung Cheng 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH);
2929c30ae83SYuchung Cheng 	}
2939c30ae83SYuchung Cheng 
2941da177e4SLinus Torvalds 	return 0;
2951da177e4SLinus Torvalds }
2961da177e4SLinus Torvalds 
297c10d9310SEric Dumazet /* Called with BH disabled */
2986f458dfbSEric Dumazet void tcp_delack_timer_handler(struct sock *sk)
2991da177e4SLinus Torvalds {
300463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
30130c6f0bfSfuyuanli 	struct tcp_sock *tp = tcp_sk(sk);
3021da177e4SLinus Torvalds 
30330c6f0bfSfuyuanli 	if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
30430c6f0bfSfuyuanli 		return;
30530c6f0bfSfuyuanli 
30630c6f0bfSfuyuanli 	/* Handling the sack compression case */
30730c6f0bfSfuyuanli 	if (tp->compressed_ack) {
30830c6f0bfSfuyuanli 		tcp_mstamp_refresh(tp);
30930c6f0bfSfuyuanli 		tcp_sack_compress_send_ack(sk);
31030c6f0bfSfuyuanli 		return;
31130c6f0bfSfuyuanli 	}
31230c6f0bfSfuyuanli 
31330c6f0bfSfuyuanli 	if (!(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
3144890b686SEric Dumazet 		return;
3151da177e4SLinus Torvalds 
316463c84b9SArnaldo Carvalho de Melo 	if (time_after(icsk->icsk_ack.timeout, jiffies)) {
317463c84b9SArnaldo Carvalho de Melo 		sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
3184890b686SEric Dumazet 		return;
3191da177e4SLinus Torvalds 	}
320463c84b9SArnaldo Carvalho de Melo 	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
3211da177e4SLinus Torvalds 
322463c84b9SArnaldo Carvalho de Melo 	if (inet_csk_ack_scheduled(sk)) {
32331954cd8SWei Wang 		if (!inet_csk_in_pingpong_mode(sk)) {
3241da177e4SLinus Torvalds 			/* Delayed ACK missed: inflate ATO. */
325463c84b9SArnaldo Carvalho de Melo 			icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
3261da177e4SLinus Torvalds 		} else {
3271da177e4SLinus Torvalds 			/* Delayed ACK missed: leave pingpong mode and
3281da177e4SLinus Torvalds 			 * deflate ATO.
3291da177e4SLinus Torvalds 			 */
33031954cd8SWei Wang 			inet_csk_exit_pingpong_mode(sk);
331463c84b9SArnaldo Carvalho de Melo 			icsk->icsk_ack.ato      = TCP_ATO_MIN;
3321da177e4SLinus Torvalds 		}
33330c6f0bfSfuyuanli 		tcp_mstamp_refresh(tp);
3341da177e4SLinus Torvalds 		tcp_send_ack(sk);
33502a1d6e7SEric Dumazet 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
3361da177e4SLinus Torvalds 	}
3376f458dfbSEric Dumazet }
3386f458dfbSEric Dumazet 
339c380d37eSRichard Sailer 
340c380d37eSRichard Sailer /**
341c380d37eSRichard Sailer  *  tcp_delack_timer() - The TCP delayed ACK timeout handler
3423628e3cbSAndrew Lunn  *  @t:  Pointer to the timer. (gets casted to struct sock *)
343c380d37eSRichard Sailer  *
344c380d37eSRichard Sailer  *  This function gets (indirectly) called when the kernel timer for a TCP packet
345c380d37eSRichard Sailer  *  of this socket expires. Calls tcp_delack_timer_handler() to do the actual work.
346c380d37eSRichard Sailer  *
347c380d37eSRichard Sailer  *  Returns: Nothing (void)
348c380d37eSRichard Sailer  */
34959f379f9SKees Cook static void tcp_delack_timer(struct timer_list *t)
3506f458dfbSEric Dumazet {
35159f379f9SKees Cook 	struct inet_connection_sock *icsk =
35259f379f9SKees Cook 			from_timer(icsk, t, icsk_delack_timer);
35359f379f9SKees Cook 	struct sock *sk = &icsk->icsk_inet.sk;
3546f458dfbSEric Dumazet 
3556f458dfbSEric Dumazet 	bh_lock_sock(sk);
3566f458dfbSEric Dumazet 	if (!sock_owned_by_user(sk)) {
3576f458dfbSEric Dumazet 		tcp_delack_timer_handler(sk);
3586f458dfbSEric Dumazet 	} else {
35902a1d6e7SEric Dumazet 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
3606f458dfbSEric Dumazet 		/* deleguate our work to tcp_release_cb() */
3617aa5470cSEric Dumazet 		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
362144d56e9SEric Dumazet 			sock_hold(sk);
3636f458dfbSEric Dumazet 	}
3641da177e4SLinus Torvalds 	bh_unlock_sock(sk);
3651da177e4SLinus Torvalds 	sock_put(sk);
3661da177e4SLinus Torvalds }
3671da177e4SLinus Torvalds 
3681da177e4SLinus Torvalds static void tcp_probe_timer(struct sock *sk)
3691da177e4SLinus Torvalds {
3706687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
37175c119afSEric Dumazet 	struct sk_buff *skb = tcp_send_head(sk);
3721da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
3731da177e4SLinus Torvalds 	int max_probes;
3741da177e4SLinus Torvalds 
37575c119afSEric Dumazet 	if (tp->packets_out || !skb) {
3766687e988SArnaldo Carvalho de Melo 		icsk->icsk_probes_out = 0;
3779d9b1ee0SEnke Chen 		icsk->icsk_probes_tstamp = 0;
3781da177e4SLinus Torvalds 		return;
3791da177e4SLinus Torvalds 	}
3801da177e4SLinus Torvalds 
381b248230cSYuchung Cheng 	/* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as
382b248230cSYuchung Cheng 	 * long as the receiver continues to respond probes. We support this by
383b248230cSYuchung Cheng 	 * default and reset icsk_probes_out with incoming ACKs. But if the
384b248230cSYuchung Cheng 	 * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we
385b248230cSYuchung Cheng 	 * kill the socket when the retry count and the time exceeds the
386b248230cSYuchung Cheng 	 * corresponding system limit. We also implement similar policy when
387b248230cSYuchung Cheng 	 * we use RTO to probe window in tcp_retransmit_timer().
3881da177e4SLinus Torvalds 	 */
389d58f2e15SEric Dumazet 	if (!icsk->icsk_probes_tstamp) {
3909d9b1ee0SEnke Chen 		icsk->icsk_probes_tstamp = tcp_jiffies32;
391d58f2e15SEric Dumazet 	} else {
392d58f2e15SEric Dumazet 		u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
3931da177e4SLinus Torvalds 
394d58f2e15SEric Dumazet 		if (user_timeout &&
395d58f2e15SEric Dumazet 		    (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
396d58f2e15SEric Dumazet 		     msecs_to_jiffies(user_timeout))
397d58f2e15SEric Dumazet 		goto abort;
398d58f2e15SEric Dumazet 	}
39939e24435SKuniyuki Iwashima 	max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
4001da177e4SLinus Torvalds 	if (sock_flag(sk, SOCK_DEAD)) {
4017533ce30SRichard Sailer 		const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
4021da177e4SLinus Torvalds 
4031da177e4SLinus Torvalds 		max_probes = tcp_orphan_retries(sk, alive);
404b248230cSYuchung Cheng 		if (!alive && icsk->icsk_backoff >= max_probes)
405b248230cSYuchung Cheng 			goto abort;
406b248230cSYuchung Cheng 		if (tcp_out_of_resources(sk, true))
4071da177e4SLinus Torvalds 			return;
4081da177e4SLinus Torvalds 	}
4091da177e4SLinus Torvalds 
4103976535aSYuchung Cheng 	if (icsk->icsk_probes_out >= max_probes) {
411b248230cSYuchung Cheng abort:		tcp_write_err(sk);
4121da177e4SLinus Torvalds 	} else {
4131da177e4SLinus Torvalds 		/* Only send another probe if we didn't close things up. */
4141da177e4SLinus Torvalds 		tcp_send_probe0(sk);
4151da177e4SLinus Torvalds 	}
4161da177e4SLinus Torvalds }
4171da177e4SLinus Torvalds 
4181da177e4SLinus Torvalds /*
4198336886fSJerry Chu  *	Timer for Fast Open socket to retransmit SYNACK. Note that the
4208336886fSJerry Chu  *	sk here is the child socket, not the parent (listener) socket.
4218336886fSJerry Chu  */
422d983ea6fSEric Dumazet static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
4238336886fSJerry Chu {
4248336886fSJerry Chu 	struct inet_connection_sock *icsk = inet_csk(sk);
425c7d13c8fSYuchung Cheng 	struct tcp_sock *tp = tcp_sk(sk);
42620a3b1c0SKuniyuki Iwashima 	int max_retries;
4278336886fSJerry Chu 
42842cb80a2SEric Dumazet 	req->rsk_ops->syn_ack_timeout(req);
4298336886fSJerry Chu 
430d44fd4a7SEric Dumazet 	/* Add one more retry for fastopen.
431d44fd4a7SEric Dumazet 	 * Paired with WRITE_ONCE() in tcp_sock_set_syncnt()
432d44fd4a7SEric Dumazet 	 */
433d44fd4a7SEric Dumazet 	max_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
43420a3b1c0SKuniyuki Iwashima 		READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
43520a3b1c0SKuniyuki Iwashima 
436e6c022a4SEric Dumazet 	if (req->num_timeout >= max_retries) {
4378336886fSJerry Chu 		tcp_write_err(sk);
4388336886fSJerry Chu 		return;
4398336886fSJerry Chu 	}
4408c3cfe19SYuchung Cheng 	/* Lower cwnd after certain SYNACK timeout like tcp_init_transfer() */
4418c3cfe19SYuchung Cheng 	if (icsk->icsk_retransmits == 1)
4428c3cfe19SYuchung Cheng 		tcp_enter_loss(sk);
4438336886fSJerry Chu 	/* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
4448336886fSJerry Chu 	 * returned from rtx_syn_ack() to make it more persistent like
4458336886fSJerry Chu 	 * regular retransmit because if the child socket has been accepted
4468336886fSJerry Chu 	 * it's not good to give up too easily.
4478336886fSJerry Chu 	 */
448e6c022a4SEric Dumazet 	inet_rtx_syn_ack(sk, req);
449e6c022a4SEric Dumazet 	req->num_timeout++;
4507e32b443SYuchung Cheng 	icsk->icsk_retransmits++;
451c7d13c8fSYuchung Cheng 	if (!tp->retrans_stamp)
452c7d13c8fSYuchung Cheng 		tp->retrans_stamp = tcp_time_stamp(tp);
4538336886fSJerry Chu 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
4548ea731d4SJie Meng 			  req->timeout << req->num_timeout, TCP_RTO_MAX);
4558336886fSJerry Chu }
4568336886fSJerry Chu 
457e89688e3SMenglong Dong static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
458e89688e3SMenglong Dong 				     const struct sk_buff *skb)
459e89688e3SMenglong Dong {
460e89688e3SMenglong Dong 	const struct tcp_sock *tp = tcp_sk(sk);
461e89688e3SMenglong Dong 	const int timeout = TCP_RTO_MAX * 2;
462e89688e3SMenglong Dong 	u32 rcv_delta, rtx_delta;
463e89688e3SMenglong Dong 
464e89688e3SMenglong Dong 	rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp;
465e89688e3SMenglong Dong 	if (rcv_delta <= timeout)
466e89688e3SMenglong Dong 		return false;
467e89688e3SMenglong Dong 
468e89688e3SMenglong Dong 	rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) -
469e89688e3SMenglong Dong 			(tp->retrans_stamp ?: tcp_skb_timestamp(skb)));
470e89688e3SMenglong Dong 
471e89688e3SMenglong Dong 	return rtx_delta > timeout;
472e89688e3SMenglong Dong }
4731da177e4SLinus Torvalds 
474c380d37eSRichard Sailer /**
475c380d37eSRichard Sailer  *  tcp_retransmit_timer() - The TCP retransmit timeout handler
476c380d37eSRichard Sailer  *  @sk:  Pointer to the current socket.
477c380d37eSRichard Sailer  *
478c380d37eSRichard Sailer  *  This function gets called when the kernel timer for a TCP packet
479c380d37eSRichard Sailer  *  of this socket expires.
480c380d37eSRichard Sailer  *
481974d8f86SZheng Yongjun  *  It handles retransmission, timer adjustment and other necessary measures.
482c380d37eSRichard Sailer  *
483c380d37eSRichard Sailer  *  Returns: Nothing (void)
484c380d37eSRichard Sailer  */
485f1ecd5d9SDamian Lukowski void tcp_retransmit_timer(struct sock *sk)
4861da177e4SLinus Torvalds {
4871da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
488ae5c3f40SNikolay Borisov 	struct net *net = sock_net(sk);
489463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
490d983ea6fSEric Dumazet 	struct request_sock *req;
4910d580fbdSEric Dumazet 	struct sk_buff *skb;
4921da177e4SLinus Torvalds 
493d983ea6fSEric Dumazet 	req = rcu_dereference_protected(tp->fastopen_rsk,
494d983ea6fSEric Dumazet 					lockdep_sock_is_held(sk));
495d983ea6fSEric Dumazet 	if (req) {
49637561f68SJerry Chu 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
4978336886fSJerry Chu 			     sk->sk_state != TCP_FIN_WAIT1);
498d983ea6fSEric Dumazet 		tcp_fastopen_synack_timer(sk, req);
4998336886fSJerry Chu 		/* Before we receive ACK to our SYN-ACK don't retransmit
5008336886fSJerry Chu 		 * anything else (e.g., data or FIN segments).
5018336886fSJerry Chu 		 */
5028336886fSJerry Chu 		return;
5038336886fSJerry Chu 	}
5040d580fbdSEric Dumazet 
5050d580fbdSEric Dumazet 	if (!tp->packets_out)
5060d580fbdSEric Dumazet 		return;
5070d580fbdSEric Dumazet 
5080d580fbdSEric Dumazet 	skb = tcp_rtx_queue_head(sk);
5090d580fbdSEric Dumazet 	if (WARN_ON_ONCE(!skb))
51088f8598dSYuchung Cheng 		return;
5111da177e4SLinus Torvalds 
5129b717a8dSNandita Dukkipati 	tp->tlp_high_seq = 0;
5139b717a8dSNandita Dukkipati 
5141da177e4SLinus Torvalds 	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
5151da177e4SLinus Torvalds 	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
5161da177e4SLinus Torvalds 		/* Receiver dastardly shrinks window. Our retransmits
5171da177e4SLinus Torvalds 		 * become zero probes, but we should not timeout this
5181da177e4SLinus Torvalds 		 * connection. If the socket is an orphan, time it out,
5191da177e4SLinus Torvalds 		 * we cannot allow such beasts to hang infinitely.
5201da177e4SLinus Torvalds 		 */
5211da177e4SLinus Torvalds 		struct inet_sock *inet = inet_sk(sk);
522*031c44b7SMenglong Dong 		u32 rtx_delta;
523*031c44b7SMenglong Dong 
524*031c44b7SMenglong Dong 		rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp(skb));
525569508c9SYOSHIFUJI Hideaki 		if (sk->sk_family == AF_INET) {
526*031c44b7SMenglong Dong 			net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n",
527*031c44b7SMenglong Dong 				&inet->inet_daddr, ntohs(inet->inet_dport),
528*031c44b7SMenglong Dong 				inet->inet_num, tp->snd_una, tp->snd_nxt,
529*031c44b7SMenglong Dong 				jiffies_to_msecs(jiffies - tp->rcv_tstamp),
530*031c44b7SMenglong Dong 				rtx_delta);
5311da177e4SLinus Torvalds 		}
532dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
533569508c9SYOSHIFUJI Hideaki 		else if (sk->sk_family == AF_INET6) {
534*031c44b7SMenglong Dong 			net_dbg_ratelimited("Probing zero-window on %pI6:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n",
535*031c44b7SMenglong Dong 				&sk->sk_v6_daddr, ntohs(inet->inet_dport),
536*031c44b7SMenglong Dong 				inet->inet_num, tp->snd_una, tp->snd_nxt,
537*031c44b7SMenglong Dong 				jiffies_to_msecs(jiffies - tp->rcv_tstamp),
538*031c44b7SMenglong Dong 				rtx_delta);
539569508c9SYOSHIFUJI Hideaki 		}
540569508c9SYOSHIFUJI Hideaki #endif
541e89688e3SMenglong Dong 		if (tcp_rtx_probe0_timed_out(sk, skb)) {
5421da177e4SLinus Torvalds 			tcp_write_err(sk);
5431da177e4SLinus Torvalds 			goto out;
5441da177e4SLinus Torvalds 		}
5455ae344c9SNeal Cardwell 		tcp_enter_loss(sk);
5460d580fbdSEric Dumazet 		tcp_retransmit_skb(sk, skb, 1);
5471da177e4SLinus Torvalds 		__sk_dst_reset(sk);
5481da177e4SLinus Torvalds 		goto out_reset_timer;
5491da177e4SLinus Torvalds 	}
5501da177e4SLinus Torvalds 
551e1561fe2SYuchung Cheng 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
5521da177e4SLinus Torvalds 	if (tcp_write_timeout(sk))
5531da177e4SLinus Torvalds 		goto out;
5541da177e4SLinus Torvalds 
555463c84b9SArnaldo Carvalho de Melo 	if (icsk->icsk_retransmits == 0) {
556e1561fe2SYuchung Cheng 		int mib_idx = 0;
55740b215e5SPavel Emelyanov 
558c60ce4e2SIlpo Järvinen 		if (icsk->icsk_ca_state == TCP_CA_Recovery) {
559bc079e9eSIlpo Järvinen 			if (tcp_is_sack(tp))
560bc079e9eSIlpo Järvinen 				mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
561bc079e9eSIlpo Järvinen 			else
562bc079e9eSIlpo Järvinen 				mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
5636687e988SArnaldo Carvalho de Melo 		} else if (icsk->icsk_ca_state == TCP_CA_Loss) {
56440b215e5SPavel Emelyanov 			mib_idx = LINUX_MIB_TCPLOSSFAILURES;
565c60ce4e2SIlpo Järvinen 		} else if ((icsk->icsk_ca_state == TCP_CA_Disorder) ||
566c60ce4e2SIlpo Järvinen 			   tp->sacked_out) {
567c60ce4e2SIlpo Järvinen 			if (tcp_is_sack(tp))
568c60ce4e2SIlpo Järvinen 				mib_idx = LINUX_MIB_TCPSACKFAILURES;
569c60ce4e2SIlpo Järvinen 			else
570c60ce4e2SIlpo Järvinen 				mib_idx = LINUX_MIB_TCPRENOFAILURES;
5711da177e4SLinus Torvalds 		}
572e1561fe2SYuchung Cheng 		if (mib_idx)
57302a1d6e7SEric Dumazet 			__NET_INC_STATS(sock_net(sk), mib_idx);
5741da177e4SLinus Torvalds 	}
5751da177e4SLinus Torvalds 
5765ae344c9SNeal Cardwell 	tcp_enter_loss(sk);
5771da177e4SLinus Torvalds 
578590d2026SYuchung Cheng 	icsk->icsk_retransmits++;
57975c119afSEric Dumazet 	if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
5801da177e4SLinus Torvalds 		/* Retransmission failed because of local congestion,
581590d2026SYuchung Cheng 		 * Let senders fight for local resources conservatively.
5821da177e4SLinus Torvalds 		 */
583463c84b9SArnaldo Carvalho de Melo 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
584590d2026SYuchung Cheng 					  TCP_RESOURCE_PROBE_INTERVAL,
5853f421baaSArnaldo Carvalho de Melo 					  TCP_RTO_MAX);
5861da177e4SLinus Torvalds 		goto out;
5871da177e4SLinus Torvalds 	}
5881da177e4SLinus Torvalds 
5891da177e4SLinus Torvalds 	/* Increase the timeout each time we retransmit.  Note that
5901da177e4SLinus Torvalds 	 * we do not increase the rtt estimate.  rto is initialized
5911da177e4SLinus Torvalds 	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
5921da177e4SLinus Torvalds 	 * that doubling rto each time is the least we can get away with.
5931da177e4SLinus Torvalds 	 * In KA9Q, Karn uses this for the first few times, and then
5941da177e4SLinus Torvalds 	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
5951da177e4SLinus Torvalds 	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
5961da177e4SLinus Torvalds 	 * defined in the protocol as the maximum possible RTT.  I guess
5971da177e4SLinus Torvalds 	 * we'll have to use something other than TCP to talk to the
5981da177e4SLinus Torvalds 	 * University of Mars.
5991da177e4SLinus Torvalds 	 *
6001da177e4SLinus Torvalds 	 * PAWS allows us longer timeouts and large windows, so once
6011da177e4SLinus Torvalds 	 * implemented ftp to mars will work nicely. We will have to fix
6021da177e4SLinus Torvalds 	 * the 120 second clamps though!
6031da177e4SLinus Torvalds 	 */
604463c84b9SArnaldo Carvalho de Melo 	icsk->icsk_backoff++;
6051da177e4SLinus Torvalds 
6061da177e4SLinus Torvalds out_reset_timer:
60736e31b0aSAndreas Petlund 	/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
60836e31b0aSAndreas Petlund 	 * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
60936e31b0aSAndreas Petlund 	 * might be increased if the stream oscillates between thin and thick,
61036e31b0aSAndreas Petlund 	 * thus the old value might already be too high compared to the value
61136e31b0aSAndreas Petlund 	 * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
61236e31b0aSAndreas Petlund 	 * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
61336e31b0aSAndreas Petlund 	 * exponential backoff behaviour to avoid continue hammering
61436e31b0aSAndreas Petlund 	 * linear-timeout retransmissions into a black hole
61536e31b0aSAndreas Petlund 	 */
61636e31b0aSAndreas Petlund 	if (sk->sk_state == TCP_ESTABLISHED &&
6177c6f2a86SKuniyuki Iwashima 	    (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
61836e31b0aSAndreas Petlund 	    tcp_stream_is_thin(tp) &&
61936e31b0aSAndreas Petlund 	    icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
62036e31b0aSAndreas Petlund 		icsk->icsk_backoff = 0;
62136e31b0aSAndreas Petlund 		icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
622ccce324dSDavid Morley 	} else if (sk->sk_state != TCP_SYN_SENT ||
623ccce324dSDavid Morley 		   icsk->icsk_backoff >
624ccce324dSDavid Morley 		   READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
625ccce324dSDavid Morley 		/* Use normal (exponential) backoff unless linear timeouts are
626ccce324dSDavid Morley 		 * activated.
627ccce324dSDavid Morley 		 */
628463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
62936e31b0aSAndreas Petlund 	}
630b701a99eSJon Maxwell 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
631b701a99eSJon Maxwell 				  tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
63239e24435SKuniyuki Iwashima 	if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
6331da177e4SLinus Torvalds 		__sk_dst_reset(sk);
6341da177e4SLinus Torvalds 
6351da177e4SLinus Torvalds out:;
6361da177e4SLinus Torvalds }
6371da177e4SLinus Torvalds 
638c380d37eSRichard Sailer /* Called with bottom-half processing disabled.
639c380d37eSRichard Sailer    Called by tcp_write_timer() */
6406f458dfbSEric Dumazet void tcp_write_timer_handler(struct sock *sk)
6411da177e4SLinus Torvalds {
642463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
6431da177e4SLinus Torvalds 	int event;
6441da177e4SLinus Torvalds 
64502b2faafSEric Dumazet 	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
64602b2faafSEric Dumazet 	    !icsk->icsk_pending)
6474890b686SEric Dumazet 		return;
6481da177e4SLinus Torvalds 
649463c84b9SArnaldo Carvalho de Melo 	if (time_after(icsk->icsk_timeout, jiffies)) {
650463c84b9SArnaldo Carvalho de Melo 		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
6514890b686SEric Dumazet 		return;
6521da177e4SLinus Torvalds 	}
6531da177e4SLinus Torvalds 
6549a568de4SEric Dumazet 	tcp_mstamp_refresh(tcp_sk(sk));
655463c84b9SArnaldo Carvalho de Melo 	event = icsk->icsk_pending;
6561da177e4SLinus Torvalds 
6571da177e4SLinus Torvalds 	switch (event) {
65857dde7f7SYuchung Cheng 	case ICSK_TIME_REO_TIMEOUT:
65957dde7f7SYuchung Cheng 		tcp_rack_reo_timeout(sk);
66057dde7f7SYuchung Cheng 		break;
6616ba8a3b1SNandita Dukkipati 	case ICSK_TIME_LOSS_PROBE:
6626ba8a3b1SNandita Dukkipati 		tcp_send_loss_probe(sk);
6636ba8a3b1SNandita Dukkipati 		break;
664463c84b9SArnaldo Carvalho de Melo 	case ICSK_TIME_RETRANS:
6656ba8a3b1SNandita Dukkipati 		icsk->icsk_pending = 0;
6661da177e4SLinus Torvalds 		tcp_retransmit_timer(sk);
6671da177e4SLinus Torvalds 		break;
668463c84b9SArnaldo Carvalho de Melo 	case ICSK_TIME_PROBE0:
6696ba8a3b1SNandita Dukkipati 		icsk->icsk_pending = 0;
6701da177e4SLinus Torvalds 		tcp_probe_timer(sk);
6711da177e4SLinus Torvalds 		break;
6721da177e4SLinus Torvalds 	}
6736f458dfbSEric Dumazet }
6746f458dfbSEric Dumazet 
67559f379f9SKees Cook static void tcp_write_timer(struct timer_list *t)
6766f458dfbSEric Dumazet {
67759f379f9SKees Cook 	struct inet_connection_sock *icsk =
67859f379f9SKees Cook 			from_timer(icsk, t, icsk_retransmit_timer);
67959f379f9SKees Cook 	struct sock *sk = &icsk->icsk_inet.sk;
6806f458dfbSEric Dumazet 
6816f458dfbSEric Dumazet 	bh_lock_sock(sk);
6826f458dfbSEric Dumazet 	if (!sock_owned_by_user(sk)) {
6836f458dfbSEric Dumazet 		tcp_write_timer_handler(sk);
6846f458dfbSEric Dumazet 	} else {
685c380d37eSRichard Sailer 		/* delegate our work to tcp_release_cb() */
6867aa5470cSEric Dumazet 		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
687144d56e9SEric Dumazet 			sock_hold(sk);
6886f458dfbSEric Dumazet 	}
6891da177e4SLinus Torvalds 	bh_unlock_sock(sk);
6901da177e4SLinus Torvalds 	sock_put(sk);
6911da177e4SLinus Torvalds }
6921da177e4SLinus Torvalds 
69342cb80a2SEric Dumazet void tcp_syn_ack_timeout(const struct request_sock *req)
69472659eccSOctavian Purdila {
69542cb80a2SEric Dumazet 	struct net *net = read_pnet(&inet_rsk(req)->ireq_net);
69642cb80a2SEric Dumazet 
69702a1d6e7SEric Dumazet 	__NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS);
69872659eccSOctavian Purdila }
69972659eccSOctavian Purdila EXPORT_SYMBOL(tcp_syn_ack_timeout);
70072659eccSOctavian Purdila 
7011da177e4SLinus Torvalds void tcp_set_keepalive(struct sock *sk, int val)
7021da177e4SLinus Torvalds {
7031da177e4SLinus Torvalds 	if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
7041da177e4SLinus Torvalds 		return;
7051da177e4SLinus Torvalds 
7061da177e4SLinus Torvalds 	if (val && !sock_flag(sk, SOCK_KEEPOPEN))
707463c84b9SArnaldo Carvalho de Melo 		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
7081da177e4SLinus Torvalds 	else if (!val)
709463c84b9SArnaldo Carvalho de Melo 		inet_csk_delete_keepalive_timer(sk);
7101da177e4SLinus Torvalds }
7114b9d07a4SUrsula Braun EXPORT_SYMBOL_GPL(tcp_set_keepalive);
7121da177e4SLinus Torvalds 
7131da177e4SLinus Torvalds 
71459f379f9SKees Cook static void tcp_keepalive_timer (struct timer_list *t)
7151da177e4SLinus Torvalds {
71659f379f9SKees Cook 	struct sock *sk = from_timer(sk, t, sk_timer);
7176687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
7181da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
7196c37e5deSFlavio Leitner 	u32 elapsed;
7201da177e4SLinus Torvalds 
7211da177e4SLinus Torvalds 	/* Only process if socket is not in use. */
7221da177e4SLinus Torvalds 	bh_lock_sock(sk);
7231da177e4SLinus Torvalds 	if (sock_owned_by_user(sk)) {
7241da177e4SLinus Torvalds 		/* Try again later. */
725463c84b9SArnaldo Carvalho de Melo 		inet_csk_reset_keepalive_timer (sk, HZ/20);
7261da177e4SLinus Torvalds 		goto out;
7271da177e4SLinus Torvalds 	}
7281da177e4SLinus Torvalds 
7291da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN) {
730fa76ce73SEric Dumazet 		pr_err("Hmm... keepalive on a LISTEN ???\n");
7311da177e4SLinus Torvalds 		goto out;
7321da177e4SLinus Torvalds 	}
7331da177e4SLinus Torvalds 
7344688eb7cSEric Dumazet 	tcp_mstamp_refresh(tp);
7351da177e4SLinus Torvalds 	if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
736a81722ddSEric Dumazet 		if (READ_ONCE(tp->linger2) >= 0) {
737463c84b9SArnaldo Carvalho de Melo 			const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
7381da177e4SLinus Torvalds 
7391da177e4SLinus Torvalds 			if (tmo > 0) {
7401da177e4SLinus Torvalds 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
7411da177e4SLinus Torvalds 				goto out;
7421da177e4SLinus Torvalds 			}
7431da177e4SLinus Torvalds 		}
7441da177e4SLinus Torvalds 		tcp_send_active_reset(sk, GFP_ATOMIC);
7451da177e4SLinus Torvalds 		goto death;
7461da177e4SLinus Torvalds 	}
7471da177e4SLinus Torvalds 
7482dda6400SEric Dumazet 	if (!sock_flag(sk, SOCK_KEEPOPEN) ||
7492dda6400SEric Dumazet 	    ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
7501da177e4SLinus Torvalds 		goto out;
7511da177e4SLinus Torvalds 
7521da177e4SLinus Torvalds 	elapsed = keepalive_time_when(tp);
7531da177e4SLinus Torvalds 
7541da177e4SLinus Torvalds 	/* It is alive without keepalive 8) */
75575c119afSEric Dumazet 	if (tp->packets_out || !tcp_write_queue_empty(sk))
7561da177e4SLinus Torvalds 		goto resched;
7571da177e4SLinus Torvalds 
7586c37e5deSFlavio Leitner 	elapsed = keepalive_time_elapsed(tp);
7591da177e4SLinus Torvalds 
7601da177e4SLinus Torvalds 	if (elapsed >= keepalive_time_when(tp)) {
761d58f2e15SEric Dumazet 		u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
762d58f2e15SEric Dumazet 
763dca43c75SJerry Chu 		/* If the TCP_USER_TIMEOUT option is enabled, use that
764dca43c75SJerry Chu 		 * to determine when to timeout instead.
765dca43c75SJerry Chu 		 */
766d58f2e15SEric Dumazet 		if ((user_timeout != 0 &&
767d58f2e15SEric Dumazet 		    elapsed >= msecs_to_jiffies(user_timeout) &&
768dca43c75SJerry Chu 		    icsk->icsk_probes_out > 0) ||
769d58f2e15SEric Dumazet 		    (user_timeout == 0 &&
770dca43c75SJerry Chu 		    icsk->icsk_probes_out >= keepalive_probes(tp))) {
7711da177e4SLinus Torvalds 			tcp_send_active_reset(sk, GFP_ATOMIC);
7721da177e4SLinus Torvalds 			tcp_write_err(sk);
7731da177e4SLinus Torvalds 			goto out;
7741da177e4SLinus Torvalds 		}
775e520af48SEric Dumazet 		if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
7766687e988SArnaldo Carvalho de Melo 			icsk->icsk_probes_out++;
7771da177e4SLinus Torvalds 			elapsed = keepalive_intvl_when(tp);
7781da177e4SLinus Torvalds 		} else {
7791da177e4SLinus Torvalds 			/* If keepalive was lost due to local congestion,
7801da177e4SLinus Torvalds 			 * try harder.
7811da177e4SLinus Torvalds 			 */
7821da177e4SLinus Torvalds 			elapsed = TCP_RESOURCE_PROBE_INTERVAL;
7831da177e4SLinus Torvalds 		}
7841da177e4SLinus Torvalds 	} else {
7851da177e4SLinus Torvalds 		/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
7861da177e4SLinus Torvalds 		elapsed = keepalive_time_when(tp) - elapsed;
7871da177e4SLinus Torvalds 	}
7881da177e4SLinus Torvalds 
7891da177e4SLinus Torvalds resched:
790463c84b9SArnaldo Carvalho de Melo 	inet_csk_reset_keepalive_timer (sk, elapsed);
7911da177e4SLinus Torvalds 	goto out;
7921da177e4SLinus Torvalds 
7931da177e4SLinus Torvalds death:
7941da177e4SLinus Torvalds 	tcp_done(sk);
7951da177e4SLinus Torvalds 
7961da177e4SLinus Torvalds out:
7971da177e4SLinus Torvalds 	bh_unlock_sock(sk);
7981da177e4SLinus Torvalds 	sock_put(sk);
7991da177e4SLinus Torvalds }
8006f458dfbSEric Dumazet 
8015d9f4262SEric Dumazet static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
8025d9f4262SEric Dumazet {
8035d9f4262SEric Dumazet 	struct tcp_sock *tp = container_of(timer, struct tcp_sock, compressed_ack_timer);
8045d9f4262SEric Dumazet 	struct sock *sk = (struct sock *)tp;
8055d9f4262SEric Dumazet 
8065d9f4262SEric Dumazet 	bh_lock_sock(sk);
8075d9f4262SEric Dumazet 	if (!sock_owned_by_user(sk)) {
8082b195850SEric Dumazet 		if (tp->compressed_ack) {
8092b195850SEric Dumazet 			/* Since we have to send one ack finally,
810974d8f86SZheng Yongjun 			 * subtract one from tp->compressed_ack to keep
8112b195850SEric Dumazet 			 * LINUX_MIB_TCPACKCOMPRESSED accurate.
8122b195850SEric Dumazet 			 */
8132b195850SEric Dumazet 			tp->compressed_ack--;
8145d9f4262SEric Dumazet 			tcp_send_ack(sk);
8152b195850SEric Dumazet 		}
8165d9f4262SEric Dumazet 	} else {
8175d9f4262SEric Dumazet 		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
8185d9f4262SEric Dumazet 				      &sk->sk_tsq_flags))
8195d9f4262SEric Dumazet 			sock_hold(sk);
8205d9f4262SEric Dumazet 	}
8215d9f4262SEric Dumazet 	bh_unlock_sock(sk);
8225d9f4262SEric Dumazet 
8235d9f4262SEric Dumazet 	sock_put(sk);
8245d9f4262SEric Dumazet 
8255d9f4262SEric Dumazet 	return HRTIMER_NORESTART;
8265d9f4262SEric Dumazet }
8275d9f4262SEric Dumazet 
8286f458dfbSEric Dumazet void tcp_init_xmit_timers(struct sock *sk)
8296f458dfbSEric Dumazet {
8306f458dfbSEric Dumazet 	inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
8316f458dfbSEric Dumazet 				  &tcp_keepalive_timer);
832fb420d5dSEric Dumazet 	hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
83373a6bab5SEric Dumazet 		     HRTIMER_MODE_ABS_PINNED_SOFT);
834218af599SEric Dumazet 	tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
8355d9f4262SEric Dumazet 
8365d9f4262SEric Dumazet 	hrtimer_init(&tcp_sk(sk)->compressed_ack_timer, CLOCK_MONOTONIC,
8375d9f4262SEric Dumazet 		     HRTIMER_MODE_REL_PINNED_SOFT);
8385d9f4262SEric Dumazet 	tcp_sk(sk)->compressed_ack_timer.function = tcp_compressed_ack_kick;
8396f458dfbSEric Dumazet }
840