xref: /linux/net/ipv4/tcp_timer.c (revision fb420d5d91c1274d5966917725e71f27ed092a85)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Implementation of the Transmission Control Protocol(TCP).
71da177e4SLinus Torvalds  *
802c30a84SJesper Juhl  * Authors:	Ross Biro
91da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
101da177e4SLinus Torvalds  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
111da177e4SLinus Torvalds  *		Corey Minyard <wf-rch!minyard@relay.EU.net>
121da177e4SLinus Torvalds  *		Florian La Roche, <flla@stud.uni-sb.de>
131da177e4SLinus Torvalds  *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
141da177e4SLinus Torvalds  *		Linus Torvalds, <torvalds@cs.helsinki.fi>
151da177e4SLinus Torvalds  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
161da177e4SLinus Torvalds  *		Matthew Dillon, <dillon@apollo.west.oic.com>
171da177e4SLinus Torvalds  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
181da177e4SLinus Torvalds  *		Jorge Cwik, <jorge@laser.satlink.net>
191da177e4SLinus Torvalds  */
201da177e4SLinus Torvalds 
211da177e4SLinus Torvalds #include <linux/module.h>
225a0e3ad6STejun Heo #include <linux/gfp.h>
231da177e4SLinus Torvalds #include <net/tcp.h>
241da177e4SLinus Torvalds 
2555477206SWei Yongjun static u32 tcp_retransmit_stamp(const struct sock *sk)
26a7fa3770SJon Maxwell {
27a7fa3770SJon Maxwell 	u32 start_ts = tcp_sk(sk)->retrans_stamp;
28a7fa3770SJon Maxwell 
29a7fa3770SJon Maxwell 	if (unlikely(!start_ts)) {
30a7fa3770SJon Maxwell 		struct sk_buff *head = tcp_rtx_queue_head(sk);
31a7fa3770SJon Maxwell 
32a7fa3770SJon Maxwell 		if (!head)
33a7fa3770SJon Maxwell 			return 0;
34a7fa3770SJon Maxwell 		start_ts = tcp_skb_timestamp(head);
35a7fa3770SJon Maxwell 	}
36a7fa3770SJon Maxwell 	return start_ts;
37a7fa3770SJon Maxwell }
38a7fa3770SJon Maxwell 
39b701a99eSJon Maxwell static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
40b701a99eSJon Maxwell {
41b701a99eSJon Maxwell 	struct inet_connection_sock *icsk = inet_csk(sk);
42b701a99eSJon Maxwell 	u32 elapsed, start_ts;
43b701a99eSJon Maxwell 
44b701a99eSJon Maxwell 	start_ts = tcp_retransmit_stamp(sk);
45b701a99eSJon Maxwell 	if (!icsk->icsk_user_timeout || !start_ts)
46b701a99eSJon Maxwell 		return icsk->icsk_rto;
47b701a99eSJon Maxwell 	elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
48b701a99eSJon Maxwell 	if (elapsed >= icsk->icsk_user_timeout)
49b701a99eSJon Maxwell 		return 1; /* user timeout has passed; fire ASAP */
50b701a99eSJon Maxwell 	else
51b701a99eSJon Maxwell 		return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
52b701a99eSJon Maxwell }
53b701a99eSJon Maxwell 
54c380d37eSRichard Sailer /**
55c380d37eSRichard Sailer  *  tcp_write_err() - close socket and save error info
56c380d37eSRichard Sailer  *  @sk:  The socket the error has appeared on.
57c380d37eSRichard Sailer  *
58c380d37eSRichard Sailer  *  Returns: Nothing (void)
59c380d37eSRichard Sailer  */
60c380d37eSRichard Sailer 
611da177e4SLinus Torvalds static void tcp_write_err(struct sock *sk)
621da177e4SLinus Torvalds {
631da177e4SLinus Torvalds 	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
641da177e4SLinus Torvalds 	sk->sk_error_report(sk);
651da177e4SLinus Torvalds 
66e05836acSSoheil Hassas Yeganeh 	tcp_write_queue_purge(sk);
671da177e4SLinus Torvalds 	tcp_done(sk);
6802a1d6e7SEric Dumazet 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
691da177e4SLinus Torvalds }
701da177e4SLinus Torvalds 
71c380d37eSRichard Sailer /**
72c380d37eSRichard Sailer  *  tcp_out_of_resources() - Close socket if out of resources
73c380d37eSRichard Sailer  *  @sk:        pointer to current socket
74c380d37eSRichard Sailer  *  @do_reset:  send a last packet with reset flag
75c380d37eSRichard Sailer  *
76c380d37eSRichard Sailer  *  Do not allow orphaned sockets to eat all our resources.
771da177e4SLinus Torvalds  *  This is direct violation of TCP specs, but it is required
781da177e4SLinus Torvalds  *  to prevent DoS attacks. It is called when a retransmission timeout
791da177e4SLinus Torvalds  *  or zero probe timeout occurs on orphaned socket.
801da177e4SLinus Torvalds  *
814ee806d5SDan Streetman  *  Also close if our net namespace is exiting; in that case there is no
824ee806d5SDan Streetman  *  hope of ever communicating again since all netns interfaces are already
834ee806d5SDan Streetman  *  down (or about to be down), and we need to release our dst references,
844ee806d5SDan Streetman  *  which have been moved to the netns loopback interface, so the namespace
854ee806d5SDan Streetman  *  can finish exiting.  This condition is only possible if we are a kernel
864ee806d5SDan Streetman  *  socket, as those do not hold references to the namespace.
874ee806d5SDan Streetman  *
88caa20d9aSStephen Hemminger  *  Criteria is still not confirmed experimentally and may change.
891da177e4SLinus Torvalds  *  We kill the socket, if:
901da177e4SLinus Torvalds  *  1. If number of orphaned sockets exceeds an administratively configured
911da177e4SLinus Torvalds  *     limit.
921da177e4SLinus Torvalds  *  2. If we have strong memory pressure.
934ee806d5SDan Streetman  *  3. If our net namespace is exiting.
941da177e4SLinus Torvalds  */
95b248230cSYuchung Cheng static int tcp_out_of_resources(struct sock *sk, bool do_reset)
961da177e4SLinus Torvalds {
971da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
98ad1af0feSDavid S. Miller 	int shift = 0;
991da177e4SLinus Torvalds 
1001da177e4SLinus Torvalds 	/* If peer does not open window for long time, or did not transmit
1011da177e4SLinus Torvalds 	 * anything for long time, penalize it. */
102d635fbe2SEric Dumazet 	if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
103ad1af0feSDavid S. Miller 		shift++;
1041da177e4SLinus Torvalds 
1051da177e4SLinus Torvalds 	/* If some dubious ICMP arrived, penalize even more. */
1061da177e4SLinus Torvalds 	if (sk->sk_err_soft)
107ad1af0feSDavid S. Miller 		shift++;
1081da177e4SLinus Torvalds 
109efcdbf24SArun Sharma 	if (tcp_check_oom(sk, shift)) {
1101da177e4SLinus Torvalds 		/* Catch exceptional cases, when connection requires reset.
1111da177e4SLinus Torvalds 		 *      1. Last segment was sent recently. */
112d635fbe2SEric Dumazet 		if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
1131da177e4SLinus Torvalds 		    /*  2. Window is closed. */
1141da177e4SLinus Torvalds 		    (!tp->snd_wnd && !tp->packets_out))
115b248230cSYuchung Cheng 			do_reset = true;
1161da177e4SLinus Torvalds 		if (do_reset)
1171da177e4SLinus Torvalds 			tcp_send_active_reset(sk, GFP_ATOMIC);
1181da177e4SLinus Torvalds 		tcp_done(sk);
11902a1d6e7SEric Dumazet 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
1201da177e4SLinus Torvalds 		return 1;
1211da177e4SLinus Torvalds 	}
1224ee806d5SDan Streetman 
1234ee806d5SDan Streetman 	if (!check_net(sock_net(sk))) {
1244ee806d5SDan Streetman 		/* Not possible to send reset; just close */
1254ee806d5SDan Streetman 		tcp_done(sk);
1264ee806d5SDan Streetman 		return 1;
1274ee806d5SDan Streetman 	}
1284ee806d5SDan Streetman 
1291da177e4SLinus Torvalds 	return 0;
1301da177e4SLinus Torvalds }
1311da177e4SLinus Torvalds 
132c380d37eSRichard Sailer /**
133c380d37eSRichard Sailer  *  tcp_orphan_retries() - Returns maximal number of retries on an orphaned socket
134c380d37eSRichard Sailer  *  @sk:    Pointer to the current socket.
135c380d37eSRichard Sailer  *  @alive: bool, socket alive state
136c380d37eSRichard Sailer  */
1377533ce30SRichard Sailer static int tcp_orphan_retries(struct sock *sk, bool alive)
1381da177e4SLinus Torvalds {
139c402d9beSNikolay Borisov 	int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
1401da177e4SLinus Torvalds 
1411da177e4SLinus Torvalds 	/* We know from an ICMP that something is wrong. */
1421da177e4SLinus Torvalds 	if (sk->sk_err_soft && !alive)
1431da177e4SLinus Torvalds 		retries = 0;
1441da177e4SLinus Torvalds 
1451da177e4SLinus Torvalds 	/* However, if socket sent something recently, select some safe
1461da177e4SLinus Torvalds 	 * number of retries. 8 corresponds to >100 seconds with minimal
1471da177e4SLinus Torvalds 	 * RTO of 200msec. */
1481da177e4SLinus Torvalds 	if (retries == 0 && alive)
1491da177e4SLinus Torvalds 		retries = 8;
1501da177e4SLinus Torvalds 	return retries;
1511da177e4SLinus Torvalds }
1521da177e4SLinus Torvalds 
153ce55dd36SEric Dumazet static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
154ce55dd36SEric Dumazet {
155d0f36847SEric Dumazet 	const struct net *net = sock_net(sk);
156d0f36847SEric Dumazet 	int mss;
157b0f9ca53SFan Du 
158ce55dd36SEric Dumazet 	/* Black hole detection */
159d0f36847SEric Dumazet 	if (!net->ipv4.sysctl_tcp_mtu_probing)
160d0f36847SEric Dumazet 		return;
161d0f36847SEric Dumazet 
162ce55dd36SEric Dumazet 	if (!icsk->icsk_mtup.enabled) {
163ce55dd36SEric Dumazet 		icsk->icsk_mtup.enabled = 1;
164c74df29aSEric Dumazet 		icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
165ce55dd36SEric Dumazet 	} else {
1668beb5c5fSEric Dumazet 		mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
167b0f9ca53SFan Du 		mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
168d0f36847SEric Dumazet 		mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
169ce55dd36SEric Dumazet 		icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
170d0f36847SEric Dumazet 	}
171ce55dd36SEric Dumazet 	tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
172ce55dd36SEric Dumazet }
173ce55dd36SEric Dumazet 
174c380d37eSRichard Sailer 
175c380d37eSRichard Sailer /**
176c380d37eSRichard Sailer  *  retransmits_timed_out() - returns true if this connection has timed out
177c380d37eSRichard Sailer  *  @sk:       The current socket
178c380d37eSRichard Sailer  *  @boundary: max number of retransmissions
179c380d37eSRichard Sailer  *  @timeout:  A custom timeout value.
180c380d37eSRichard Sailer  *             If set to 0 the default timeout is calculated and used.
181c380d37eSRichard Sailer  *             Using TCP_RTO_MIN and the number of unsuccessful retransmits.
182c380d37eSRichard Sailer  *
183c380d37eSRichard Sailer  * The default "timeout" value this function can calculate and use
184c380d37eSRichard Sailer  * is equivalent to the timeout of a TCP Connection
185c380d37eSRichard Sailer  * after "boundary" unsuccessful, exponentially backed-off
186ce682ef6SEric Dumazet  * retransmissions with an initial RTO of TCP_RTO_MIN.
1872f7de571SDamian Lukowski  */
1882f7de571SDamian Lukowski static bool retransmits_timed_out(struct sock *sk,
189dca43c75SJerry Chu 				  unsigned int boundary,
190ce682ef6SEric Dumazet 				  unsigned int timeout)
1912f7de571SDamian Lukowski {
192ce682ef6SEric Dumazet 	const unsigned int rto_base = TCP_RTO_MIN;
1939a568de4SEric Dumazet 	unsigned int linear_backoff_thresh, start_ts;
1942f7de571SDamian Lukowski 
1952f7de571SDamian Lukowski 	if (!inet_csk(sk)->icsk_retransmits)
1962f7de571SDamian Lukowski 		return false;
1972f7de571SDamian Lukowski 
198a7fa3770SJon Maxwell 	start_ts = tcp_retransmit_stamp(sk);
199a7fa3770SJon Maxwell 	if (!start_ts)
20075c119afSEric Dumazet 		return false;
2012f7de571SDamian Lukowski 
202dca43c75SJerry Chu 	if (likely(timeout == 0)) {
2034d22f7d3SDamian Lukowski 		linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
2042f7de571SDamian Lukowski 
2052f7de571SDamian Lukowski 		if (boundary <= linear_backoff_thresh)
2064d22f7d3SDamian Lukowski 			timeout = ((2 << boundary) - 1) * rto_base;
2072f7de571SDamian Lukowski 		else
2084d22f7d3SDamian Lukowski 			timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
2092f7de571SDamian Lukowski 				(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
2109bcc66e1SJon Maxwell 		timeout = jiffies_to_msecs(timeout);
211dca43c75SJerry Chu 	}
2129bcc66e1SJon Maxwell 	return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
2132f7de571SDamian Lukowski }
2142f7de571SDamian Lukowski 
2151da177e4SLinus Torvalds /* A write timeout has occurred. Process the after effects. */
2161da177e4SLinus Torvalds static int tcp_write_timeout(struct sock *sk)
2171da177e4SLinus Torvalds {
2185d424d5aSJohn Heffner 	struct inet_connection_sock *icsk = inet_csk(sk);
219c968601dSYuchung Cheng 	struct tcp_sock *tp = tcp_sk(sk);
2206fa25166SNikolay Borisov 	struct net *net = sock_net(sk);
221ce682ef6SEric Dumazet 	bool expired, do_reset;
2221da177e4SLinus Torvalds 	int retry_until;
2231da177e4SLinus Torvalds 
2241da177e4SLinus Torvalds 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
225c968601dSYuchung Cheng 		if (icsk->icsk_retransmits) {
226b6c6712aSEric Dumazet 			dst_negative_advice(sk);
2273acf3ec3SLawrence Brakmo 		} else if (!tp->syn_data && !tp->syn_fastopen) {
2283acf3ec3SLawrence Brakmo 			sk_rethink_txhash(sk);
229c968601dSYuchung Cheng 		}
2306fa25166SNikolay Borisov 		retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
231ce682ef6SEric Dumazet 		expired = icsk->icsk_retransmits >= retry_until;
2321da177e4SLinus Torvalds 	} else {
233ce682ef6SEric Dumazet 		if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
2345d424d5aSJohn Heffner 			/* Black hole detection */
235ce55dd36SEric Dumazet 			tcp_mtu_probing(icsk, sk);
2361da177e4SLinus Torvalds 
237b6c6712aSEric Dumazet 			dst_negative_advice(sk);
2383acf3ec3SLawrence Brakmo 		} else {
2393acf3ec3SLawrence Brakmo 			sk_rethink_txhash(sk);
2401da177e4SLinus Torvalds 		}
2411da177e4SLinus Torvalds 
242c6214a97SNikolay Borisov 		retry_until = net->ipv4.sysctl_tcp_retries2;
2431da177e4SLinus Torvalds 		if (sock_flag(sk, SOCK_DEAD)) {
2447533ce30SRichard Sailer 			const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
2451da177e4SLinus Torvalds 
2461da177e4SLinus Torvalds 			retry_until = tcp_orphan_retries(sk, alive);
2476fa12c85SDamian Lukowski 			do_reset = alive ||
248ce682ef6SEric Dumazet 				!retransmits_timed_out(sk, retry_until, 0);
2491da177e4SLinus Torvalds 
2506fa12c85SDamian Lukowski 			if (tcp_out_of_resources(sk, do_reset))
2511da177e4SLinus Torvalds 				return 1;
2521da177e4SLinus Torvalds 		}
253ce682ef6SEric Dumazet 		expired = retransmits_timed_out(sk, retry_until,
254ce682ef6SEric Dumazet 						icsk->icsk_user_timeout);
2551da177e4SLinus Torvalds 	}
2567268586bSYuchung Cheng 	tcp_fastopen_active_detect_blackhole(sk, expired);
257f89013f6SLawrence Brakmo 
258f89013f6SLawrence Brakmo 	if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
259f89013f6SLawrence Brakmo 		tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
260f89013f6SLawrence Brakmo 				  icsk->icsk_retransmits,
261f89013f6SLawrence Brakmo 				  icsk->icsk_rto, (int)expired);
262f89013f6SLawrence Brakmo 
263ce682ef6SEric Dumazet 	if (expired) {
2641da177e4SLinus Torvalds 		/* Has it gone just too far? */
2651da177e4SLinus Torvalds 		tcp_write_err(sk);
2661da177e4SLinus Torvalds 		return 1;
2671da177e4SLinus Torvalds 	}
268f89013f6SLawrence Brakmo 
2691da177e4SLinus Torvalds 	return 0;
2701da177e4SLinus Torvalds }
2711da177e4SLinus Torvalds 
272c10d9310SEric Dumazet /* Called with BH disabled */
2736f458dfbSEric Dumazet void tcp_delack_timer_handler(struct sock *sk)
2741da177e4SLinus Torvalds {
275463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
2761da177e4SLinus Torvalds 
2779993e7d3SDavid S. Miller 	sk_mem_reclaim_partial(sk);
2781da177e4SLinus Torvalds 
27902b2faafSEric Dumazet 	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
28002b2faafSEric Dumazet 	    !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
2811da177e4SLinus Torvalds 		goto out;
2821da177e4SLinus Torvalds 
283463c84b9SArnaldo Carvalho de Melo 	if (time_after(icsk->icsk_ack.timeout, jiffies)) {
284463c84b9SArnaldo Carvalho de Melo 		sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
2851da177e4SLinus Torvalds 		goto out;
2861da177e4SLinus Torvalds 	}
287463c84b9SArnaldo Carvalho de Melo 	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
2881da177e4SLinus Torvalds 
289463c84b9SArnaldo Carvalho de Melo 	if (inet_csk_ack_scheduled(sk)) {
290463c84b9SArnaldo Carvalho de Melo 		if (!icsk->icsk_ack.pingpong) {
2911da177e4SLinus Torvalds 			/* Delayed ACK missed: inflate ATO. */
292463c84b9SArnaldo Carvalho de Melo 			icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
2931da177e4SLinus Torvalds 		} else {
2941da177e4SLinus Torvalds 			/* Delayed ACK missed: leave pingpong mode and
2951da177e4SLinus Torvalds 			 * deflate ATO.
2961da177e4SLinus Torvalds 			 */
297463c84b9SArnaldo Carvalho de Melo 			icsk->icsk_ack.pingpong = 0;
298463c84b9SArnaldo Carvalho de Melo 			icsk->icsk_ack.ato      = TCP_ATO_MIN;
2991da177e4SLinus Torvalds 		}
3004688eb7cSEric Dumazet 		tcp_mstamp_refresh(tcp_sk(sk));
3011da177e4SLinus Torvalds 		tcp_send_ack(sk);
30202a1d6e7SEric Dumazet 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
3031da177e4SLinus Torvalds 	}
3041da177e4SLinus Torvalds 
3051da177e4SLinus Torvalds out:
306b8da51ebSEric Dumazet 	if (tcp_under_memory_pressure(sk))
3073ab224beSHideo Aoki 		sk_mem_reclaim(sk);
3086f458dfbSEric Dumazet }
3096f458dfbSEric Dumazet 
310c380d37eSRichard Sailer 
311c380d37eSRichard Sailer /**
312c380d37eSRichard Sailer  *  tcp_delack_timer() - The TCP delayed ACK timeout handler
313c380d37eSRichard Sailer  *  @data:  Pointer to the current socket. (gets casted to struct sock *)
314c380d37eSRichard Sailer  *
315c380d37eSRichard Sailer  *  This function gets (indirectly) called when the kernel timer for a TCP packet
316c380d37eSRichard Sailer  *  of this socket expires. Calls tcp_delack_timer_handler() to do the actual work.
317c380d37eSRichard Sailer  *
318c380d37eSRichard Sailer  *  Returns: Nothing (void)
319c380d37eSRichard Sailer  */
32059f379f9SKees Cook static void tcp_delack_timer(struct timer_list *t)
3216f458dfbSEric Dumazet {
32259f379f9SKees Cook 	struct inet_connection_sock *icsk =
32359f379f9SKees Cook 			from_timer(icsk, t, icsk_delack_timer);
32459f379f9SKees Cook 	struct sock *sk = &icsk->icsk_inet.sk;
3256f458dfbSEric Dumazet 
3266f458dfbSEric Dumazet 	bh_lock_sock(sk);
3276f458dfbSEric Dumazet 	if (!sock_owned_by_user(sk)) {
3286f458dfbSEric Dumazet 		tcp_delack_timer_handler(sk);
3296f458dfbSEric Dumazet 	} else {
33059f379f9SKees Cook 		icsk->icsk_ack.blocked = 1;
33102a1d6e7SEric Dumazet 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
3326f458dfbSEric Dumazet 		/* deleguate our work to tcp_release_cb() */
3337aa5470cSEric Dumazet 		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
334144d56e9SEric Dumazet 			sock_hold(sk);
3356f458dfbSEric Dumazet 	}
3361da177e4SLinus Torvalds 	bh_unlock_sock(sk);
3371da177e4SLinus Torvalds 	sock_put(sk);
3381da177e4SLinus Torvalds }
3391da177e4SLinus Torvalds 
3401da177e4SLinus Torvalds static void tcp_probe_timer(struct sock *sk)
3411da177e4SLinus Torvalds {
3426687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
34375c119afSEric Dumazet 	struct sk_buff *skb = tcp_send_head(sk);
3441da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
3451da177e4SLinus Torvalds 	int max_probes;
346b248230cSYuchung Cheng 	u32 start_ts;
3471da177e4SLinus Torvalds 
34875c119afSEric Dumazet 	if (tp->packets_out || !skb) {
3496687e988SArnaldo Carvalho de Melo 		icsk->icsk_probes_out = 0;
3501da177e4SLinus Torvalds 		return;
3511da177e4SLinus Torvalds 	}
3521da177e4SLinus Torvalds 
353b248230cSYuchung Cheng 	/* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as
354b248230cSYuchung Cheng 	 * long as the receiver continues to respond probes. We support this by
355b248230cSYuchung Cheng 	 * default and reset icsk_probes_out with incoming ACKs. But if the
356b248230cSYuchung Cheng 	 * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we
357b248230cSYuchung Cheng 	 * kill the socket when the retry count and the time exceeds the
358b248230cSYuchung Cheng 	 * corresponding system limit. We also implement similar policy when
359b248230cSYuchung Cheng 	 * we use RTO to probe window in tcp_retransmit_timer().
3601da177e4SLinus Torvalds 	 */
36175c119afSEric Dumazet 	start_ts = tcp_skb_timestamp(skb);
362b248230cSYuchung Cheng 	if (!start_ts)
363d3edd06eSEric Dumazet 		skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
364b248230cSYuchung Cheng 	else if (icsk->icsk_user_timeout &&
3659bcc66e1SJon Maxwell 		 (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
366b248230cSYuchung Cheng 		goto abort;
3671da177e4SLinus Torvalds 
368c6214a97SNikolay Borisov 	max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
3691da177e4SLinus Torvalds 	if (sock_flag(sk, SOCK_DEAD)) {
3707533ce30SRichard Sailer 		const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
3711da177e4SLinus Torvalds 
3721da177e4SLinus Torvalds 		max_probes = tcp_orphan_retries(sk, alive);
373b248230cSYuchung Cheng 		if (!alive && icsk->icsk_backoff >= max_probes)
374b248230cSYuchung Cheng 			goto abort;
375b248230cSYuchung Cheng 		if (tcp_out_of_resources(sk, true))
3761da177e4SLinus Torvalds 			return;
3771da177e4SLinus Torvalds 	}
3781da177e4SLinus Torvalds 
3796687e988SArnaldo Carvalho de Melo 	if (icsk->icsk_probes_out > max_probes) {
380b248230cSYuchung Cheng abort:		tcp_write_err(sk);
3811da177e4SLinus Torvalds 	} else {
3821da177e4SLinus Torvalds 		/* Only send another probe if we didn't close things up. */
3831da177e4SLinus Torvalds 		tcp_send_probe0(sk);
3841da177e4SLinus Torvalds 	}
3851da177e4SLinus Torvalds }
3861da177e4SLinus Torvalds 
3871da177e4SLinus Torvalds /*
3888336886fSJerry Chu  *	Timer for Fast Open socket to retransmit SYNACK. Note that the
3898336886fSJerry Chu  *	sk here is the child socket, not the parent (listener) socket.
3908336886fSJerry Chu  */
3918336886fSJerry Chu static void tcp_fastopen_synack_timer(struct sock *sk)
3928336886fSJerry Chu {
3938336886fSJerry Chu 	struct inet_connection_sock *icsk = inet_csk(sk);
3948336886fSJerry Chu 	int max_retries = icsk->icsk_syn_retries ? :
3957c083ecbSNikolay Borisov 	    sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
3968336886fSJerry Chu 	struct request_sock *req;
3978336886fSJerry Chu 
3988336886fSJerry Chu 	req = tcp_sk(sk)->fastopen_rsk;
39942cb80a2SEric Dumazet 	req->rsk_ops->syn_ack_timeout(req);
4008336886fSJerry Chu 
401e6c022a4SEric Dumazet 	if (req->num_timeout >= max_retries) {
4028336886fSJerry Chu 		tcp_write_err(sk);
4038336886fSJerry Chu 		return;
4048336886fSJerry Chu 	}
4058336886fSJerry Chu 	/* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
4068336886fSJerry Chu 	 * returned from rtx_syn_ack() to make it more persistent like
4078336886fSJerry Chu 	 * regular retransmit because if the child socket has been accepted
4088336886fSJerry Chu 	 * it's not good to give up too easily.
4098336886fSJerry Chu 	 */
410e6c022a4SEric Dumazet 	inet_rtx_syn_ack(sk, req);
411e6c022a4SEric Dumazet 	req->num_timeout++;
4127e32b443SYuchung Cheng 	icsk->icsk_retransmits++;
4138336886fSJerry Chu 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
414e6c022a4SEric Dumazet 			  TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
4158336886fSJerry Chu }
4168336886fSJerry Chu 
4171da177e4SLinus Torvalds 
418c380d37eSRichard Sailer /**
419c380d37eSRichard Sailer  *  tcp_retransmit_timer() - The TCP retransmit timeout handler
420c380d37eSRichard Sailer  *  @sk:  Pointer to the current socket.
421c380d37eSRichard Sailer  *
422c380d37eSRichard Sailer  *  This function gets called when the kernel timer for a TCP packet
423c380d37eSRichard Sailer  *  of this socket expires.
424c380d37eSRichard Sailer  *
425c380d37eSRichard Sailer  *  It handles retransmission, timer adjustment and other necesarry measures.
426c380d37eSRichard Sailer  *
427c380d37eSRichard Sailer  *  Returns: Nothing (void)
428c380d37eSRichard Sailer  */
429f1ecd5d9SDamian Lukowski void tcp_retransmit_timer(struct sock *sk)
4301da177e4SLinus Torvalds {
4311da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
432ae5c3f40SNikolay Borisov 	struct net *net = sock_net(sk);
433463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
4341da177e4SLinus Torvalds 
4358336886fSJerry Chu 	if (tp->fastopen_rsk) {
43637561f68SJerry Chu 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
4378336886fSJerry Chu 			     sk->sk_state != TCP_FIN_WAIT1);
4388336886fSJerry Chu 		tcp_fastopen_synack_timer(sk);
4398336886fSJerry Chu 		/* Before we receive ACK to our SYN-ACK don't retransmit
4408336886fSJerry Chu 		 * anything else (e.g., data or FIN segments).
4418336886fSJerry Chu 		 */
4428336886fSJerry Chu 		return;
4438336886fSJerry Chu 	}
4441da177e4SLinus Torvalds 	if (!tp->packets_out)
4451da177e4SLinus Torvalds 		goto out;
4461da177e4SLinus Torvalds 
44775c119afSEric Dumazet 	WARN_ON(tcp_rtx_queue_empty(sk));
4481da177e4SLinus Torvalds 
4499b717a8dSNandita Dukkipati 	tp->tlp_high_seq = 0;
4509b717a8dSNandita Dukkipati 
4511da177e4SLinus Torvalds 	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
4521da177e4SLinus Torvalds 	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
4531da177e4SLinus Torvalds 		/* Receiver dastardly shrinks window. Our retransmits
4541da177e4SLinus Torvalds 		 * become zero probes, but we should not timeout this
4551da177e4SLinus Torvalds 		 * connection. If the socket is an orphan, time it out,
4561da177e4SLinus Torvalds 		 * we cannot allow such beasts to hang infinitely.
4571da177e4SLinus Torvalds 		 */
4581da177e4SLinus Torvalds 		struct inet_sock *inet = inet_sk(sk);
459569508c9SYOSHIFUJI Hideaki 		if (sk->sk_family == AF_INET) {
460ba7a46f1SJoe Perches 			net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
461afd46503SJoe Perches 					    &inet->inet_daddr,
462ba7a46f1SJoe Perches 					    ntohs(inet->inet_dport),
463ba7a46f1SJoe Perches 					    inet->inet_num,
464afd46503SJoe Perches 					    tp->snd_una, tp->snd_nxt);
4651da177e4SLinus Torvalds 		}
466dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
467569508c9SYOSHIFUJI Hideaki 		else if (sk->sk_family == AF_INET6) {
468ba7a46f1SJoe Perches 			net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
469efe4208fSEric Dumazet 					    &sk->sk_v6_daddr,
470ba7a46f1SJoe Perches 					    ntohs(inet->inet_dport),
471ba7a46f1SJoe Perches 					    inet->inet_num,
472afd46503SJoe Perches 					    tp->snd_una, tp->snd_nxt);
473569508c9SYOSHIFUJI Hideaki 		}
474569508c9SYOSHIFUJI Hideaki #endif
47570eabf0eSEric Dumazet 		if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) {
4761da177e4SLinus Torvalds 			tcp_write_err(sk);
4771da177e4SLinus Torvalds 			goto out;
4781da177e4SLinus Torvalds 		}
4795ae344c9SNeal Cardwell 		tcp_enter_loss(sk);
48075c119afSEric Dumazet 		tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1);
4811da177e4SLinus Torvalds 		__sk_dst_reset(sk);
4821da177e4SLinus Torvalds 		goto out_reset_timer;
4831da177e4SLinus Torvalds 	}
4841da177e4SLinus Torvalds 
4851da177e4SLinus Torvalds 	if (tcp_write_timeout(sk))
4861da177e4SLinus Torvalds 		goto out;
4871da177e4SLinus Torvalds 
488463c84b9SArnaldo Carvalho de Melo 	if (icsk->icsk_retransmits == 0) {
48940b215e5SPavel Emelyanov 		int mib_idx;
49040b215e5SPavel Emelyanov 
491c60ce4e2SIlpo Järvinen 		if (icsk->icsk_ca_state == TCP_CA_Recovery) {
492bc079e9eSIlpo Järvinen 			if (tcp_is_sack(tp))
493bc079e9eSIlpo Järvinen 				mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
494bc079e9eSIlpo Järvinen 			else
495bc079e9eSIlpo Järvinen 				mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
4966687e988SArnaldo Carvalho de Melo 		} else if (icsk->icsk_ca_state == TCP_CA_Loss) {
49740b215e5SPavel Emelyanov 			mib_idx = LINUX_MIB_TCPLOSSFAILURES;
498c60ce4e2SIlpo Järvinen 		} else if ((icsk->icsk_ca_state == TCP_CA_Disorder) ||
499c60ce4e2SIlpo Järvinen 			   tp->sacked_out) {
500c60ce4e2SIlpo Järvinen 			if (tcp_is_sack(tp))
501c60ce4e2SIlpo Järvinen 				mib_idx = LINUX_MIB_TCPSACKFAILURES;
502c60ce4e2SIlpo Järvinen 			else
503c60ce4e2SIlpo Järvinen 				mib_idx = LINUX_MIB_TCPRENOFAILURES;
5041da177e4SLinus Torvalds 		} else {
50540b215e5SPavel Emelyanov 			mib_idx = LINUX_MIB_TCPTIMEOUTS;
5061da177e4SLinus Torvalds 		}
50702a1d6e7SEric Dumazet 		__NET_INC_STATS(sock_net(sk), mib_idx);
5081da177e4SLinus Torvalds 	}
5091da177e4SLinus Torvalds 
5105ae344c9SNeal Cardwell 	tcp_enter_loss(sk);
5111da177e4SLinus Torvalds 
51275c119afSEric Dumazet 	if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
5131da177e4SLinus Torvalds 		/* Retransmission failed because of local congestion,
5141da177e4SLinus Torvalds 		 * do not backoff.
5151da177e4SLinus Torvalds 		 */
516463c84b9SArnaldo Carvalho de Melo 		if (!icsk->icsk_retransmits)
517463c84b9SArnaldo Carvalho de Melo 			icsk->icsk_retransmits = 1;
518463c84b9SArnaldo Carvalho de Melo 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
5193f421baaSArnaldo Carvalho de Melo 					  min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
5203f421baaSArnaldo Carvalho de Melo 					  TCP_RTO_MAX);
5211da177e4SLinus Torvalds 		goto out;
5221da177e4SLinus Torvalds 	}
5231da177e4SLinus Torvalds 
5241da177e4SLinus Torvalds 	/* Increase the timeout each time we retransmit.  Note that
5251da177e4SLinus Torvalds 	 * we do not increase the rtt estimate.  rto is initialized
5261da177e4SLinus Torvalds 	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
5271da177e4SLinus Torvalds 	 * that doubling rto each time is the least we can get away with.
5281da177e4SLinus Torvalds 	 * In KA9Q, Karn uses this for the first few times, and then
5291da177e4SLinus Torvalds 	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
5301da177e4SLinus Torvalds 	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
5311da177e4SLinus Torvalds 	 * defined in the protocol as the maximum possible RTT.  I guess
5321da177e4SLinus Torvalds 	 * we'll have to use something other than TCP to talk to the
5331da177e4SLinus Torvalds 	 * University of Mars.
5341da177e4SLinus Torvalds 	 *
5351da177e4SLinus Torvalds 	 * PAWS allows us longer timeouts and large windows, so once
5361da177e4SLinus Torvalds 	 * implemented ftp to mars will work nicely. We will have to fix
5371da177e4SLinus Torvalds 	 * the 120 second clamps though!
5381da177e4SLinus Torvalds 	 */
539463c84b9SArnaldo Carvalho de Melo 	icsk->icsk_backoff++;
540463c84b9SArnaldo Carvalho de Melo 	icsk->icsk_retransmits++;
5411da177e4SLinus Torvalds 
5421da177e4SLinus Torvalds out_reset_timer:
54336e31b0aSAndreas Petlund 	/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
54436e31b0aSAndreas Petlund 	 * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
54536e31b0aSAndreas Petlund 	 * might be increased if the stream oscillates between thin and thick,
54636e31b0aSAndreas Petlund 	 * thus the old value might already be too high compared to the value
54736e31b0aSAndreas Petlund 	 * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
54836e31b0aSAndreas Petlund 	 * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
54936e31b0aSAndreas Petlund 	 * exponential backoff behaviour to avoid continue hammering
55036e31b0aSAndreas Petlund 	 * linear-timeout retransmissions into a black hole
55136e31b0aSAndreas Petlund 	 */
55236e31b0aSAndreas Petlund 	if (sk->sk_state == TCP_ESTABLISHED &&
5532c04ac8aSEric Dumazet 	    (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
55436e31b0aSAndreas Petlund 	    tcp_stream_is_thin(tp) &&
55536e31b0aSAndreas Petlund 	    icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
55636e31b0aSAndreas Petlund 		icsk->icsk_backoff = 0;
55736e31b0aSAndreas Petlund 		icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
55836e31b0aSAndreas Petlund 	} else {
55936e31b0aSAndreas Petlund 		/* Use normal (exponential) backoff */
560463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
56136e31b0aSAndreas Petlund 	}
562b701a99eSJon Maxwell 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
563b701a99eSJon Maxwell 				  tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
564ce682ef6SEric Dumazet 	if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
5651da177e4SLinus Torvalds 		__sk_dst_reset(sk);
5661da177e4SLinus Torvalds 
5671da177e4SLinus Torvalds out:;
5681da177e4SLinus Torvalds }
5691da177e4SLinus Torvalds 
570c380d37eSRichard Sailer /* Called with bottom-half processing disabled.
571c380d37eSRichard Sailer    Called by tcp_write_timer() */
5726f458dfbSEric Dumazet void tcp_write_timer_handler(struct sock *sk)
5731da177e4SLinus Torvalds {
574463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
5751da177e4SLinus Torvalds 	int event;
5761da177e4SLinus Torvalds 
57702b2faafSEric Dumazet 	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
57802b2faafSEric Dumazet 	    !icsk->icsk_pending)
5791da177e4SLinus Torvalds 		goto out;
5801da177e4SLinus Torvalds 
581463c84b9SArnaldo Carvalho de Melo 	if (time_after(icsk->icsk_timeout, jiffies)) {
582463c84b9SArnaldo Carvalho de Melo 		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
5831da177e4SLinus Torvalds 		goto out;
5841da177e4SLinus Torvalds 	}
5851da177e4SLinus Torvalds 
5869a568de4SEric Dumazet 	tcp_mstamp_refresh(tcp_sk(sk));
587463c84b9SArnaldo Carvalho de Melo 	event = icsk->icsk_pending;
5881da177e4SLinus Torvalds 
5891da177e4SLinus Torvalds 	switch (event) {
59057dde7f7SYuchung Cheng 	case ICSK_TIME_REO_TIMEOUT:
59157dde7f7SYuchung Cheng 		tcp_rack_reo_timeout(sk);
59257dde7f7SYuchung Cheng 		break;
5936ba8a3b1SNandita Dukkipati 	case ICSK_TIME_LOSS_PROBE:
5946ba8a3b1SNandita Dukkipati 		tcp_send_loss_probe(sk);
5956ba8a3b1SNandita Dukkipati 		break;
596463c84b9SArnaldo Carvalho de Melo 	case ICSK_TIME_RETRANS:
5976ba8a3b1SNandita Dukkipati 		icsk->icsk_pending = 0;
5981da177e4SLinus Torvalds 		tcp_retransmit_timer(sk);
5991da177e4SLinus Torvalds 		break;
600463c84b9SArnaldo Carvalho de Melo 	case ICSK_TIME_PROBE0:
6016ba8a3b1SNandita Dukkipati 		icsk->icsk_pending = 0;
6021da177e4SLinus Torvalds 		tcp_probe_timer(sk);
6031da177e4SLinus Torvalds 		break;
6041da177e4SLinus Torvalds 	}
6051da177e4SLinus Torvalds 
6061da177e4SLinus Torvalds out:
6073ab224beSHideo Aoki 	sk_mem_reclaim(sk);
6086f458dfbSEric Dumazet }
6096f458dfbSEric Dumazet 
61059f379f9SKees Cook static void tcp_write_timer(struct timer_list *t)
6116f458dfbSEric Dumazet {
61259f379f9SKees Cook 	struct inet_connection_sock *icsk =
61359f379f9SKees Cook 			from_timer(icsk, t, icsk_retransmit_timer);
61459f379f9SKees Cook 	struct sock *sk = &icsk->icsk_inet.sk;
6156f458dfbSEric Dumazet 
6166f458dfbSEric Dumazet 	bh_lock_sock(sk);
6176f458dfbSEric Dumazet 	if (!sock_owned_by_user(sk)) {
6186f458dfbSEric Dumazet 		tcp_write_timer_handler(sk);
6196f458dfbSEric Dumazet 	} else {
620c380d37eSRichard Sailer 		/* delegate our work to tcp_release_cb() */
6217aa5470cSEric Dumazet 		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
622144d56e9SEric Dumazet 			sock_hold(sk);
6236f458dfbSEric Dumazet 	}
6241da177e4SLinus Torvalds 	bh_unlock_sock(sk);
6251da177e4SLinus Torvalds 	sock_put(sk);
6261da177e4SLinus Torvalds }
6271da177e4SLinus Torvalds 
62842cb80a2SEric Dumazet void tcp_syn_ack_timeout(const struct request_sock *req)
62972659eccSOctavian Purdila {
63042cb80a2SEric Dumazet 	struct net *net = read_pnet(&inet_rsk(req)->ireq_net);
63142cb80a2SEric Dumazet 
63202a1d6e7SEric Dumazet 	__NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS);
63372659eccSOctavian Purdila }
63472659eccSOctavian Purdila EXPORT_SYMBOL(tcp_syn_ack_timeout);
63572659eccSOctavian Purdila 
6361da177e4SLinus Torvalds void tcp_set_keepalive(struct sock *sk, int val)
6371da177e4SLinus Torvalds {
6381da177e4SLinus Torvalds 	if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
6391da177e4SLinus Torvalds 		return;
6401da177e4SLinus Torvalds 
6411da177e4SLinus Torvalds 	if (val && !sock_flag(sk, SOCK_KEEPOPEN))
642463c84b9SArnaldo Carvalho de Melo 		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
6431da177e4SLinus Torvalds 	else if (!val)
644463c84b9SArnaldo Carvalho de Melo 		inet_csk_delete_keepalive_timer(sk);
6451da177e4SLinus Torvalds }
6464b9d07a4SUrsula Braun EXPORT_SYMBOL_GPL(tcp_set_keepalive);
6471da177e4SLinus Torvalds 
6481da177e4SLinus Torvalds 
64959f379f9SKees Cook static void tcp_keepalive_timer (struct timer_list *t)
6501da177e4SLinus Torvalds {
65159f379f9SKees Cook 	struct sock *sk = from_timer(sk, t, sk_timer);
6526687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
6531da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
6546c37e5deSFlavio Leitner 	u32 elapsed;
6551da177e4SLinus Torvalds 
6561da177e4SLinus Torvalds 	/* Only process if socket is not in use. */
6571da177e4SLinus Torvalds 	bh_lock_sock(sk);
6581da177e4SLinus Torvalds 	if (sock_owned_by_user(sk)) {
6591da177e4SLinus Torvalds 		/* Try again later. */
660463c84b9SArnaldo Carvalho de Melo 		inet_csk_reset_keepalive_timer (sk, HZ/20);
6611da177e4SLinus Torvalds 		goto out;
6621da177e4SLinus Torvalds 	}
6631da177e4SLinus Torvalds 
6641da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN) {
665fa76ce73SEric Dumazet 		pr_err("Hmm... keepalive on a LISTEN ???\n");
6661da177e4SLinus Torvalds 		goto out;
6671da177e4SLinus Torvalds 	}
6681da177e4SLinus Torvalds 
6694688eb7cSEric Dumazet 	tcp_mstamp_refresh(tp);
6701da177e4SLinus Torvalds 	if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
6711da177e4SLinus Torvalds 		if (tp->linger2 >= 0) {
672463c84b9SArnaldo Carvalho de Melo 			const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
6731da177e4SLinus Torvalds 
6741da177e4SLinus Torvalds 			if (tmo > 0) {
6751da177e4SLinus Torvalds 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
6761da177e4SLinus Torvalds 				goto out;
6771da177e4SLinus Torvalds 			}
6781da177e4SLinus Torvalds 		}
6791da177e4SLinus Torvalds 		tcp_send_active_reset(sk, GFP_ATOMIC);
6801da177e4SLinus Torvalds 		goto death;
6811da177e4SLinus Torvalds 	}
6821da177e4SLinus Torvalds 
6832dda6400SEric Dumazet 	if (!sock_flag(sk, SOCK_KEEPOPEN) ||
6842dda6400SEric Dumazet 	    ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
6851da177e4SLinus Torvalds 		goto out;
6861da177e4SLinus Torvalds 
6871da177e4SLinus Torvalds 	elapsed = keepalive_time_when(tp);
6881da177e4SLinus Torvalds 
6891da177e4SLinus Torvalds 	/* It is alive without keepalive 8) */
69075c119afSEric Dumazet 	if (tp->packets_out || !tcp_write_queue_empty(sk))
6911da177e4SLinus Torvalds 		goto resched;
6921da177e4SLinus Torvalds 
6936c37e5deSFlavio Leitner 	elapsed = keepalive_time_elapsed(tp);
6941da177e4SLinus Torvalds 
6951da177e4SLinus Torvalds 	if (elapsed >= keepalive_time_when(tp)) {
696dca43c75SJerry Chu 		/* If the TCP_USER_TIMEOUT option is enabled, use that
697dca43c75SJerry Chu 		 * to determine when to timeout instead.
698dca43c75SJerry Chu 		 */
699dca43c75SJerry Chu 		if ((icsk->icsk_user_timeout != 0 &&
7009bcc66e1SJon Maxwell 		    elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
701dca43c75SJerry Chu 		    icsk->icsk_probes_out > 0) ||
702dca43c75SJerry Chu 		    (icsk->icsk_user_timeout == 0 &&
703dca43c75SJerry Chu 		    icsk->icsk_probes_out >= keepalive_probes(tp))) {
7041da177e4SLinus Torvalds 			tcp_send_active_reset(sk, GFP_ATOMIC);
7051da177e4SLinus Torvalds 			tcp_write_err(sk);
7061da177e4SLinus Torvalds 			goto out;
7071da177e4SLinus Torvalds 		}
708e520af48SEric Dumazet 		if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
7096687e988SArnaldo Carvalho de Melo 			icsk->icsk_probes_out++;
7101da177e4SLinus Torvalds 			elapsed = keepalive_intvl_when(tp);
7111da177e4SLinus Torvalds 		} else {
7121da177e4SLinus Torvalds 			/* If keepalive was lost due to local congestion,
7131da177e4SLinus Torvalds 			 * try harder.
7141da177e4SLinus Torvalds 			 */
7151da177e4SLinus Torvalds 			elapsed = TCP_RESOURCE_PROBE_INTERVAL;
7161da177e4SLinus Torvalds 		}
7171da177e4SLinus Torvalds 	} else {
7181da177e4SLinus Torvalds 		/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
7191da177e4SLinus Torvalds 		elapsed = keepalive_time_when(tp) - elapsed;
7201da177e4SLinus Torvalds 	}
7211da177e4SLinus Torvalds 
7223ab224beSHideo Aoki 	sk_mem_reclaim(sk);
7231da177e4SLinus Torvalds 
7241da177e4SLinus Torvalds resched:
725463c84b9SArnaldo Carvalho de Melo 	inet_csk_reset_keepalive_timer (sk, elapsed);
7261da177e4SLinus Torvalds 	goto out;
7271da177e4SLinus Torvalds 
7281da177e4SLinus Torvalds death:
7291da177e4SLinus Torvalds 	tcp_done(sk);
7301da177e4SLinus Torvalds 
7311da177e4SLinus Torvalds out:
7321da177e4SLinus Torvalds 	bh_unlock_sock(sk);
7331da177e4SLinus Torvalds 	sock_put(sk);
7341da177e4SLinus Torvalds }
7356f458dfbSEric Dumazet 
7365d9f4262SEric Dumazet static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
7375d9f4262SEric Dumazet {
7385d9f4262SEric Dumazet 	struct tcp_sock *tp = container_of(timer, struct tcp_sock, compressed_ack_timer);
7395d9f4262SEric Dumazet 	struct sock *sk = (struct sock *)tp;
7405d9f4262SEric Dumazet 
7415d9f4262SEric Dumazet 	bh_lock_sock(sk);
7425d9f4262SEric Dumazet 	if (!sock_owned_by_user(sk)) {
7435d9f4262SEric Dumazet 		if (tp->compressed_ack)
7445d9f4262SEric Dumazet 			tcp_send_ack(sk);
7455d9f4262SEric Dumazet 	} else {
7465d9f4262SEric Dumazet 		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
7475d9f4262SEric Dumazet 				      &sk->sk_tsq_flags))
7485d9f4262SEric Dumazet 			sock_hold(sk);
7495d9f4262SEric Dumazet 	}
7505d9f4262SEric Dumazet 	bh_unlock_sock(sk);
7515d9f4262SEric Dumazet 
7525d9f4262SEric Dumazet 	sock_put(sk);
7535d9f4262SEric Dumazet 
7545d9f4262SEric Dumazet 	return HRTIMER_NORESTART;
7555d9f4262SEric Dumazet }
7565d9f4262SEric Dumazet 
7576f458dfbSEric Dumazet void tcp_init_xmit_timers(struct sock *sk)
7586f458dfbSEric Dumazet {
7596f458dfbSEric Dumazet 	inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
7606f458dfbSEric Dumazet 				  &tcp_keepalive_timer);
761*fb420d5dSEric Dumazet 	hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
76273a6bab5SEric Dumazet 		     HRTIMER_MODE_ABS_PINNED_SOFT);
763218af599SEric Dumazet 	tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
7645d9f4262SEric Dumazet 
7655d9f4262SEric Dumazet 	hrtimer_init(&tcp_sk(sk)->compressed_ack_timer, CLOCK_MONOTONIC,
7665d9f4262SEric Dumazet 		     HRTIMER_MODE_REL_PINNED_SOFT);
7675d9f4262SEric Dumazet 	tcp_sk(sk)->compressed_ack_timer.function = tcp_compressed_ack_kick;
7686f458dfbSEric Dumazet }
769