xref: /linux/net/ipv4/tcp_timer.c (revision 218af599fa635b107cfe10acf3249c4dfe5e4123)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * INET		An implementation of the TCP/IP protocol suite for the LINUX
31da177e4SLinus Torvalds  *		operating system.  INET is implemented using the  BSD Socket
41da177e4SLinus Torvalds  *		interface as the means of communication with the user level.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *		Implementation of the Transmission Control Protocol(TCP).
71da177e4SLinus Torvalds  *
802c30a84SJesper Juhl  * Authors:	Ross Biro
91da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
101da177e4SLinus Torvalds  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
111da177e4SLinus Torvalds  *		Corey Minyard <wf-rch!minyard@relay.EU.net>
121da177e4SLinus Torvalds  *		Florian La Roche, <flla@stud.uni-sb.de>
131da177e4SLinus Torvalds  *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
141da177e4SLinus Torvalds  *		Linus Torvalds, <torvalds@cs.helsinki.fi>
151da177e4SLinus Torvalds  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
161da177e4SLinus Torvalds  *		Matthew Dillon, <dillon@apollo.west.oic.com>
171da177e4SLinus Torvalds  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
181da177e4SLinus Torvalds  *		Jorge Cwik, <jorge@laser.satlink.net>
191da177e4SLinus Torvalds  */
201da177e4SLinus Torvalds 
211da177e4SLinus Torvalds #include <linux/module.h>
225a0e3ad6STejun Heo #include <linux/gfp.h>
231da177e4SLinus Torvalds #include <net/tcp.h>
241da177e4SLinus Torvalds 
2536e31b0aSAndreas Petlund int sysctl_tcp_thin_linear_timeouts __read_mostly;
261da177e4SLinus Torvalds 
27c380d37eSRichard Sailer /**
28c380d37eSRichard Sailer  *  tcp_write_err() - close socket and save error info
29c380d37eSRichard Sailer  *  @sk:  The socket the error has appeared on.
30c380d37eSRichard Sailer  *
31c380d37eSRichard Sailer  *  Returns: Nothing (void)
32c380d37eSRichard Sailer  */
33c380d37eSRichard Sailer 
341da177e4SLinus Torvalds static void tcp_write_err(struct sock *sk)
351da177e4SLinus Torvalds {
361da177e4SLinus Torvalds 	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
371da177e4SLinus Torvalds 	sk->sk_error_report(sk);
381da177e4SLinus Torvalds 
391da177e4SLinus Torvalds 	tcp_done(sk);
4002a1d6e7SEric Dumazet 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
411da177e4SLinus Torvalds }
421da177e4SLinus Torvalds 
43c380d37eSRichard Sailer /**
44c380d37eSRichard Sailer  *  tcp_out_of_resources() - Close socket if out of resources
45c380d37eSRichard Sailer  *  @sk:        pointer to current socket
46c380d37eSRichard Sailer  *  @do_reset:  send a last packet with reset flag
47c380d37eSRichard Sailer  *
48c380d37eSRichard Sailer  *  Do not allow orphaned sockets to eat all our resources.
491da177e4SLinus Torvalds  *  This is direct violation of TCP specs, but it is required
501da177e4SLinus Torvalds  *  to prevent DoS attacks. It is called when a retransmission timeout
511da177e4SLinus Torvalds  *  or zero probe timeout occurs on orphaned socket.
521da177e4SLinus Torvalds  *
53caa20d9aSStephen Hemminger  *  Criteria is still not confirmed experimentally and may change.
541da177e4SLinus Torvalds  *  We kill the socket, if:
551da177e4SLinus Torvalds  *  1. If number of orphaned sockets exceeds an administratively configured
561da177e4SLinus Torvalds  *     limit.
571da177e4SLinus Torvalds  *  2. If we have strong memory pressure.
581da177e4SLinus Torvalds  */
59b248230cSYuchung Cheng static int tcp_out_of_resources(struct sock *sk, bool do_reset)
601da177e4SLinus Torvalds {
611da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
62ad1af0feSDavid S. Miller 	int shift = 0;
631da177e4SLinus Torvalds 
641da177e4SLinus Torvalds 	/* If peer does not open window for long time, or did not transmit
651da177e4SLinus Torvalds 	 * anything for long time, penalize it. */
661da177e4SLinus Torvalds 	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
67ad1af0feSDavid S. Miller 		shift++;
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds 	/* If some dubious ICMP arrived, penalize even more. */
701da177e4SLinus Torvalds 	if (sk->sk_err_soft)
71ad1af0feSDavid S. Miller 		shift++;
721da177e4SLinus Torvalds 
73efcdbf24SArun Sharma 	if (tcp_check_oom(sk, shift)) {
741da177e4SLinus Torvalds 		/* Catch exceptional cases, when connection requires reset.
751da177e4SLinus Torvalds 		 *      1. Last segment was sent recently. */
761da177e4SLinus Torvalds 		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
771da177e4SLinus Torvalds 		    /*  2. Window is closed. */
781da177e4SLinus Torvalds 		    (!tp->snd_wnd && !tp->packets_out))
79b248230cSYuchung Cheng 			do_reset = true;
801da177e4SLinus Torvalds 		if (do_reset)
811da177e4SLinus Torvalds 			tcp_send_active_reset(sk, GFP_ATOMIC);
821da177e4SLinus Torvalds 		tcp_done(sk);
8302a1d6e7SEric Dumazet 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
841da177e4SLinus Torvalds 		return 1;
851da177e4SLinus Torvalds 	}
861da177e4SLinus Torvalds 	return 0;
871da177e4SLinus Torvalds }
881da177e4SLinus Torvalds 
89c380d37eSRichard Sailer /**
90c380d37eSRichard Sailer  *  tcp_orphan_retries() - Returns maximal number of retries on an orphaned socket
91c380d37eSRichard Sailer  *  @sk:    Pointer to the current socket.
92c380d37eSRichard Sailer  *  @alive: bool, socket alive state
93c380d37eSRichard Sailer  */
947533ce30SRichard Sailer static int tcp_orphan_retries(struct sock *sk, bool alive)
951da177e4SLinus Torvalds {
96c402d9beSNikolay Borisov 	int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
971da177e4SLinus Torvalds 
981da177e4SLinus Torvalds 	/* We know from an ICMP that something is wrong. */
991da177e4SLinus Torvalds 	if (sk->sk_err_soft && !alive)
1001da177e4SLinus Torvalds 		retries = 0;
1011da177e4SLinus Torvalds 
1021da177e4SLinus Torvalds 	/* However, if socket sent something recently, select some safe
1031da177e4SLinus Torvalds 	 * number of retries. 8 corresponds to >100 seconds with minimal
1041da177e4SLinus Torvalds 	 * RTO of 200msec. */
1051da177e4SLinus Torvalds 	if (retries == 0 && alive)
1061da177e4SLinus Torvalds 		retries = 8;
1071da177e4SLinus Torvalds 	return retries;
1081da177e4SLinus Torvalds }
1091da177e4SLinus Torvalds 
110ce55dd36SEric Dumazet static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
111ce55dd36SEric Dumazet {
112b0f9ca53SFan Du 	struct net *net = sock_net(sk);
113b0f9ca53SFan Du 
114ce55dd36SEric Dumazet 	/* Black hole detection */
115b0f9ca53SFan Du 	if (net->ipv4.sysctl_tcp_mtu_probing) {
116ce55dd36SEric Dumazet 		if (!icsk->icsk_mtup.enabled) {
117ce55dd36SEric Dumazet 			icsk->icsk_mtup.enabled = 1;
11805cbc0dbSFan Du 			icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
119ce55dd36SEric Dumazet 			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
120ce55dd36SEric Dumazet 		} else {
121b0f9ca53SFan Du 			struct net *net = sock_net(sk);
122ce55dd36SEric Dumazet 			struct tcp_sock *tp = tcp_sk(sk);
123829942c1SDavid S. Miller 			int mss;
124829942c1SDavid S. Miller 
1258beb5c5fSEric Dumazet 			mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
126b0f9ca53SFan Du 			mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
127ce55dd36SEric Dumazet 			mss = max(mss, 68 - tp->tcp_header_len);
128ce55dd36SEric Dumazet 			icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
129ce55dd36SEric Dumazet 			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
130ce55dd36SEric Dumazet 		}
131ce55dd36SEric Dumazet 	}
132ce55dd36SEric Dumazet }
133ce55dd36SEric Dumazet 
134c380d37eSRichard Sailer 
135c380d37eSRichard Sailer /**
136c380d37eSRichard Sailer  *  retransmits_timed_out() - returns true if this connection has timed out
137c380d37eSRichard Sailer  *  @sk:       The current socket
138c380d37eSRichard Sailer  *  @boundary: max number of retransmissions
139c380d37eSRichard Sailer  *  @timeout:  A custom timeout value.
140c380d37eSRichard Sailer  *             If set to 0 the default timeout is calculated and used.
141c380d37eSRichard Sailer  *             Using TCP_RTO_MIN and the number of unsuccessful retransmits.
142c380d37eSRichard Sailer  *  @syn_set:  true if the SYN Bit was set.
143c380d37eSRichard Sailer  *
144c380d37eSRichard Sailer  * The default "timeout" value this function can calculate and use
145c380d37eSRichard Sailer  * is equivalent to the timeout of a TCP Connection
146c380d37eSRichard Sailer  * after "boundary" unsuccessful, exponentially backed-off
1474d22f7d3SDamian Lukowski  * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if
1484d22f7d3SDamian Lukowski  * syn_set flag is set.
149c380d37eSRichard Sailer  *
1502f7de571SDamian Lukowski  */
1512f7de571SDamian Lukowski static bool retransmits_timed_out(struct sock *sk,
152dca43c75SJerry Chu 				  unsigned int boundary,
15321a180cdSDavid S. Miller 				  unsigned int timeout,
1544d22f7d3SDamian Lukowski 				  bool syn_set)
1552f7de571SDamian Lukowski {
156dca43c75SJerry Chu 	unsigned int linear_backoff_thresh, start_ts;
1574d22f7d3SDamian Lukowski 	unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
1582f7de571SDamian Lukowski 
1592f7de571SDamian Lukowski 	if (!inet_csk(sk)->icsk_retransmits)
1602f7de571SDamian Lukowski 		return false;
1612f7de571SDamian Lukowski 
1622f7de571SDamian Lukowski 	start_ts = tcp_sk(sk)->retrans_stamp;
1637faee5c0SEric Dumazet 	if (unlikely(!start_ts))
1647faee5c0SEric Dumazet 		start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk));
1652f7de571SDamian Lukowski 
166dca43c75SJerry Chu 	if (likely(timeout == 0)) {
1674d22f7d3SDamian Lukowski 		linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
1682f7de571SDamian Lukowski 
1692f7de571SDamian Lukowski 		if (boundary <= linear_backoff_thresh)
1704d22f7d3SDamian Lukowski 			timeout = ((2 << boundary) - 1) * rto_base;
1712f7de571SDamian Lukowski 		else
1724d22f7d3SDamian Lukowski 			timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
1732f7de571SDamian Lukowski 				(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
174dca43c75SJerry Chu 	}
1752f7de571SDamian Lukowski 	return (tcp_time_stamp - start_ts) >= timeout;
1762f7de571SDamian Lukowski }
1772f7de571SDamian Lukowski 
1781da177e4SLinus Torvalds /* A write timeout has occurred. Process the after effects. */
1791da177e4SLinus Torvalds static int tcp_write_timeout(struct sock *sk)
1801da177e4SLinus Torvalds {
1815d424d5aSJohn Heffner 	struct inet_connection_sock *icsk = inet_csk(sk);
182c968601dSYuchung Cheng 	struct tcp_sock *tp = tcp_sk(sk);
1836fa25166SNikolay Borisov 	struct net *net = sock_net(sk);
1841da177e4SLinus Torvalds 	int retry_until;
1853db1cd5cSRusty Russell 	bool do_reset, syn_set = false;
1861da177e4SLinus Torvalds 
1871da177e4SLinus Torvalds 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
188c968601dSYuchung Cheng 		if (icsk->icsk_retransmits) {
189b6c6712aSEric Dumazet 			dst_negative_advice(sk);
190c968601dSYuchung Cheng 			if (tp->syn_fastopen || tp->syn_data)
1912646c831SDaniel Lee 				tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
192dd52bc2bSYuchung Cheng 			if (tp->syn_data && icsk->icsk_retransmits == 1)
193c10d9310SEric Dumazet 				NET_INC_STATS(sock_net(sk),
194f19c29e3SYuchung Cheng 					      LINUX_MIB_TCPFASTOPENACTIVEFAIL);
1953acf3ec3SLawrence Brakmo 		} else if (!tp->syn_data && !tp->syn_fastopen) {
1963acf3ec3SLawrence Brakmo 			sk_rethink_txhash(sk);
197c968601dSYuchung Cheng 		}
1986fa25166SNikolay Borisov 		retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
1993db1cd5cSRusty Russell 		syn_set = true;
2001da177e4SLinus Torvalds 	} else {
201ae5c3f40SNikolay Borisov 		if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) {
2020e45f4daSYuchung Cheng 			/* Some middle-boxes may black-hole Fast Open _after_
2030e45f4daSYuchung Cheng 			 * the handshake. Therefore we conservatively disable
20459450f8dSWei Wang 			 * Fast Open on this path on recurring timeouts after
20559450f8dSWei Wang 			 * successful Fast Open.
2060e45f4daSYuchung Cheng 			 */
20759450f8dSWei Wang 			if (tp->syn_data_acked) {
2080e45f4daSYuchung Cheng 				tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
209ae5c3f40SNikolay Borisov 				if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1)
210c10d9310SEric Dumazet 					NET_INC_STATS(sock_net(sk),
2110e45f4daSYuchung Cheng 						      LINUX_MIB_TCPFASTOPENACTIVEFAIL);
2120e45f4daSYuchung Cheng 			}
2135d424d5aSJohn Heffner 			/* Black hole detection */
214ce55dd36SEric Dumazet 			tcp_mtu_probing(icsk, sk);
2151da177e4SLinus Torvalds 
216b6c6712aSEric Dumazet 			dst_negative_advice(sk);
2173acf3ec3SLawrence Brakmo 		} else {
2183acf3ec3SLawrence Brakmo 			sk_rethink_txhash(sk);
2191da177e4SLinus Torvalds 		}
2201da177e4SLinus Torvalds 
221c6214a97SNikolay Borisov 		retry_until = net->ipv4.sysctl_tcp_retries2;
2221da177e4SLinus Torvalds 		if (sock_flag(sk, SOCK_DEAD)) {
2237533ce30SRichard Sailer 			const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
2241da177e4SLinus Torvalds 
2251da177e4SLinus Torvalds 			retry_until = tcp_orphan_retries(sk, alive);
2266fa12c85SDamian Lukowski 			do_reset = alive ||
22721a180cdSDavid S. Miller 				!retransmits_timed_out(sk, retry_until, 0, 0);
2281da177e4SLinus Torvalds 
2296fa12c85SDamian Lukowski 			if (tcp_out_of_resources(sk, do_reset))
2301da177e4SLinus Torvalds 				return 1;
2311da177e4SLinus Torvalds 		}
2321da177e4SLinus Torvalds 	}
2331da177e4SLinus Torvalds 
234dca43c75SJerry Chu 	if (retransmits_timed_out(sk, retry_until,
23521a180cdSDavid S. Miller 				  syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) {
2361da177e4SLinus Torvalds 		/* Has it gone just too far? */
2371da177e4SLinus Torvalds 		tcp_write_err(sk);
2381da177e4SLinus Torvalds 		return 1;
2391da177e4SLinus Torvalds 	}
2401da177e4SLinus Torvalds 	return 0;
2411da177e4SLinus Torvalds }
2421da177e4SLinus Torvalds 
243c10d9310SEric Dumazet /* Called with BH disabled */
2446f458dfbSEric Dumazet void tcp_delack_timer_handler(struct sock *sk)
2451da177e4SLinus Torvalds {
2461da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
247463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
2481da177e4SLinus Torvalds 
2499993e7d3SDavid S. Miller 	sk_mem_reclaim_partial(sk);
2501da177e4SLinus Torvalds 
25102b2faafSEric Dumazet 	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
25202b2faafSEric Dumazet 	    !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
2531da177e4SLinus Torvalds 		goto out;
2541da177e4SLinus Torvalds 
255463c84b9SArnaldo Carvalho de Melo 	if (time_after(icsk->icsk_ack.timeout, jiffies)) {
256463c84b9SArnaldo Carvalho de Melo 		sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
2571da177e4SLinus Torvalds 		goto out;
2581da177e4SLinus Torvalds 	}
259463c84b9SArnaldo Carvalho de Melo 	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
2601da177e4SLinus Torvalds 
261b03efcfbSDavid S. Miller 	if (!skb_queue_empty(&tp->ucopy.prequeue)) {
2621da177e4SLinus Torvalds 		struct sk_buff *skb;
2631da177e4SLinus Torvalds 
26402a1d6e7SEric Dumazet 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
2651da177e4SLinus Torvalds 
2661da177e4SLinus Torvalds 		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
267c57943a1SPeter Zijlstra 			sk_backlog_rcv(sk, skb);
2681da177e4SLinus Torvalds 
2691da177e4SLinus Torvalds 		tp->ucopy.memory = 0;
2701da177e4SLinus Torvalds 	}
2711da177e4SLinus Torvalds 
272463c84b9SArnaldo Carvalho de Melo 	if (inet_csk_ack_scheduled(sk)) {
273463c84b9SArnaldo Carvalho de Melo 		if (!icsk->icsk_ack.pingpong) {
2741da177e4SLinus Torvalds 			/* Delayed ACK missed: inflate ATO. */
275463c84b9SArnaldo Carvalho de Melo 			icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
2761da177e4SLinus Torvalds 		} else {
2771da177e4SLinus Torvalds 			/* Delayed ACK missed: leave pingpong mode and
2781da177e4SLinus Torvalds 			 * deflate ATO.
2791da177e4SLinus Torvalds 			 */
280463c84b9SArnaldo Carvalho de Melo 			icsk->icsk_ack.pingpong = 0;
281463c84b9SArnaldo Carvalho de Melo 			icsk->icsk_ack.ato      = TCP_ATO_MIN;
2821da177e4SLinus Torvalds 		}
2831da177e4SLinus Torvalds 		tcp_send_ack(sk);
28402a1d6e7SEric Dumazet 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
2851da177e4SLinus Torvalds 	}
2861da177e4SLinus Torvalds 
2871da177e4SLinus Torvalds out:
288b8da51ebSEric Dumazet 	if (tcp_under_memory_pressure(sk))
2893ab224beSHideo Aoki 		sk_mem_reclaim(sk);
2906f458dfbSEric Dumazet }
2916f458dfbSEric Dumazet 
292c380d37eSRichard Sailer 
293c380d37eSRichard Sailer /**
294c380d37eSRichard Sailer  *  tcp_delack_timer() - The TCP delayed ACK timeout handler
295c380d37eSRichard Sailer  *  @data:  Pointer to the current socket. (gets casted to struct sock *)
296c380d37eSRichard Sailer  *
297c380d37eSRichard Sailer  *  This function gets (indirectly) called when the kernel timer for a TCP packet
298c380d37eSRichard Sailer  *  of this socket expires. Calls tcp_delack_timer_handler() to do the actual work.
299c380d37eSRichard Sailer  *
300c380d37eSRichard Sailer  *  Returns: Nothing (void)
301c380d37eSRichard Sailer  */
3026f458dfbSEric Dumazet static void tcp_delack_timer(unsigned long data)
3036f458dfbSEric Dumazet {
3046f458dfbSEric Dumazet 	struct sock *sk = (struct sock *)data;
3056f458dfbSEric Dumazet 
3066f458dfbSEric Dumazet 	bh_lock_sock(sk);
3076f458dfbSEric Dumazet 	if (!sock_owned_by_user(sk)) {
3086f458dfbSEric Dumazet 		tcp_delack_timer_handler(sk);
3096f458dfbSEric Dumazet 	} else {
3106f458dfbSEric Dumazet 		inet_csk(sk)->icsk_ack.blocked = 1;
31102a1d6e7SEric Dumazet 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
3126f458dfbSEric Dumazet 		/* deleguate our work to tcp_release_cb() */
3137aa5470cSEric Dumazet 		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
314144d56e9SEric Dumazet 			sock_hold(sk);
3156f458dfbSEric Dumazet 	}
3161da177e4SLinus Torvalds 	bh_unlock_sock(sk);
3171da177e4SLinus Torvalds 	sock_put(sk);
3181da177e4SLinus Torvalds }
3191da177e4SLinus Torvalds 
3201da177e4SLinus Torvalds static void tcp_probe_timer(struct sock *sk)
3211da177e4SLinus Torvalds {
3226687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
3231da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
3241da177e4SLinus Torvalds 	int max_probes;
325b248230cSYuchung Cheng 	u32 start_ts;
3261da177e4SLinus Torvalds 
327fe067e8aSDavid S. Miller 	if (tp->packets_out || !tcp_send_head(sk)) {
3286687e988SArnaldo Carvalho de Melo 		icsk->icsk_probes_out = 0;
3291da177e4SLinus Torvalds 		return;
3301da177e4SLinus Torvalds 	}
3311da177e4SLinus Torvalds 
332b248230cSYuchung Cheng 	/* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as
333b248230cSYuchung Cheng 	 * long as the receiver continues to respond probes. We support this by
334b248230cSYuchung Cheng 	 * default and reset icsk_probes_out with incoming ACKs. But if the
335b248230cSYuchung Cheng 	 * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we
336b248230cSYuchung Cheng 	 * kill the socket when the retry count and the time exceeds the
337b248230cSYuchung Cheng 	 * corresponding system limit. We also implement similar policy when
338b248230cSYuchung Cheng 	 * we use RTO to probe window in tcp_retransmit_timer().
3391da177e4SLinus Torvalds 	 */
340b248230cSYuchung Cheng 	start_ts = tcp_skb_timestamp(tcp_send_head(sk));
341b248230cSYuchung Cheng 	if (!start_ts)
342b248230cSYuchung Cheng 		skb_mstamp_get(&tcp_send_head(sk)->skb_mstamp);
343b248230cSYuchung Cheng 	else if (icsk->icsk_user_timeout &&
344b248230cSYuchung Cheng 		 (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout)
345b248230cSYuchung Cheng 		goto abort;
3461da177e4SLinus Torvalds 
347c6214a97SNikolay Borisov 	max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
3481da177e4SLinus Torvalds 	if (sock_flag(sk, SOCK_DEAD)) {
3497533ce30SRichard Sailer 		const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
3501da177e4SLinus Torvalds 
3511da177e4SLinus Torvalds 		max_probes = tcp_orphan_retries(sk, alive);
352b248230cSYuchung Cheng 		if (!alive && icsk->icsk_backoff >= max_probes)
353b248230cSYuchung Cheng 			goto abort;
354b248230cSYuchung Cheng 		if (tcp_out_of_resources(sk, true))
3551da177e4SLinus Torvalds 			return;
3561da177e4SLinus Torvalds 	}
3571da177e4SLinus Torvalds 
3586687e988SArnaldo Carvalho de Melo 	if (icsk->icsk_probes_out > max_probes) {
359b248230cSYuchung Cheng abort:		tcp_write_err(sk);
3601da177e4SLinus Torvalds 	} else {
3611da177e4SLinus Torvalds 		/* Only send another probe if we didn't close things up. */
3621da177e4SLinus Torvalds 		tcp_send_probe0(sk);
3631da177e4SLinus Torvalds 	}
3641da177e4SLinus Torvalds }
3651da177e4SLinus Torvalds 
3661da177e4SLinus Torvalds /*
3678336886fSJerry Chu  *	Timer for Fast Open socket to retransmit SYNACK. Note that the
3688336886fSJerry Chu  *	sk here is the child socket, not the parent (listener) socket.
3698336886fSJerry Chu  */
3708336886fSJerry Chu static void tcp_fastopen_synack_timer(struct sock *sk)
3718336886fSJerry Chu {
3728336886fSJerry Chu 	struct inet_connection_sock *icsk = inet_csk(sk);
3738336886fSJerry Chu 	int max_retries = icsk->icsk_syn_retries ? :
3747c083ecbSNikolay Borisov 	    sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
3758336886fSJerry Chu 	struct request_sock *req;
3768336886fSJerry Chu 
3778336886fSJerry Chu 	req = tcp_sk(sk)->fastopen_rsk;
37842cb80a2SEric Dumazet 	req->rsk_ops->syn_ack_timeout(req);
3798336886fSJerry Chu 
380e6c022a4SEric Dumazet 	if (req->num_timeout >= max_retries) {
3818336886fSJerry Chu 		tcp_write_err(sk);
3828336886fSJerry Chu 		return;
3838336886fSJerry Chu 	}
3848336886fSJerry Chu 	/* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
3858336886fSJerry Chu 	 * returned from rtx_syn_ack() to make it more persistent like
3868336886fSJerry Chu 	 * regular retransmit because if the child socket has been accepted
3878336886fSJerry Chu 	 * it's not good to give up too easily.
3888336886fSJerry Chu 	 */
389e6c022a4SEric Dumazet 	inet_rtx_syn_ack(sk, req);
390e6c022a4SEric Dumazet 	req->num_timeout++;
3917e32b443SYuchung Cheng 	icsk->icsk_retransmits++;
3928336886fSJerry Chu 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
393e6c022a4SEric Dumazet 			  TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
3948336886fSJerry Chu }
3958336886fSJerry Chu 
3961da177e4SLinus Torvalds 
397c380d37eSRichard Sailer /**
398c380d37eSRichard Sailer  *  tcp_retransmit_timer() - The TCP retransmit timeout handler
399c380d37eSRichard Sailer  *  @sk:  Pointer to the current socket.
400c380d37eSRichard Sailer  *
401c380d37eSRichard Sailer  *  This function gets called when the kernel timer for a TCP packet
402c380d37eSRichard Sailer  *  of this socket expires.
403c380d37eSRichard Sailer  *
404c380d37eSRichard Sailer  *  It handles retransmission, timer adjustment and other necesarry measures.
405c380d37eSRichard Sailer  *
406c380d37eSRichard Sailer  *  Returns: Nothing (void)
407c380d37eSRichard Sailer  */
408f1ecd5d9SDamian Lukowski void tcp_retransmit_timer(struct sock *sk)
4091da177e4SLinus Torvalds {
4101da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
411ae5c3f40SNikolay Borisov 	struct net *net = sock_net(sk);
412463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
4131da177e4SLinus Torvalds 
4148336886fSJerry Chu 	if (tp->fastopen_rsk) {
41537561f68SJerry Chu 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
4168336886fSJerry Chu 			     sk->sk_state != TCP_FIN_WAIT1);
4178336886fSJerry Chu 		tcp_fastopen_synack_timer(sk);
4188336886fSJerry Chu 		/* Before we receive ACK to our SYN-ACK don't retransmit
4198336886fSJerry Chu 		 * anything else (e.g., data or FIN segments).
4208336886fSJerry Chu 		 */
4218336886fSJerry Chu 		return;
4228336886fSJerry Chu 	}
4231da177e4SLinus Torvalds 	if (!tp->packets_out)
4241da177e4SLinus Torvalds 		goto out;
4251da177e4SLinus Torvalds 
426547b792cSIlpo Järvinen 	WARN_ON(tcp_write_queue_empty(sk));
4271da177e4SLinus Torvalds 
4289b717a8dSNandita Dukkipati 	tp->tlp_high_seq = 0;
4299b717a8dSNandita Dukkipati 
4301da177e4SLinus Torvalds 	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
4311da177e4SLinus Torvalds 	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
4321da177e4SLinus Torvalds 		/* Receiver dastardly shrinks window. Our retransmits
4331da177e4SLinus Torvalds 		 * become zero probes, but we should not timeout this
4341da177e4SLinus Torvalds 		 * connection. If the socket is an orphan, time it out,
4351da177e4SLinus Torvalds 		 * we cannot allow such beasts to hang infinitely.
4361da177e4SLinus Torvalds 		 */
4371da177e4SLinus Torvalds 		struct inet_sock *inet = inet_sk(sk);
438569508c9SYOSHIFUJI Hideaki 		if (sk->sk_family == AF_INET) {
439ba7a46f1SJoe Perches 			net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
440afd46503SJoe Perches 					    &inet->inet_daddr,
441ba7a46f1SJoe Perches 					    ntohs(inet->inet_dport),
442ba7a46f1SJoe Perches 					    inet->inet_num,
443afd46503SJoe Perches 					    tp->snd_una, tp->snd_nxt);
4441da177e4SLinus Torvalds 		}
445dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6)
446569508c9SYOSHIFUJI Hideaki 		else if (sk->sk_family == AF_INET6) {
447ba7a46f1SJoe Perches 			net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
448efe4208fSEric Dumazet 					    &sk->sk_v6_daddr,
449ba7a46f1SJoe Perches 					    ntohs(inet->inet_dport),
450ba7a46f1SJoe Perches 					    inet->inet_num,
451afd46503SJoe Perches 					    tp->snd_una, tp->snd_nxt);
452569508c9SYOSHIFUJI Hideaki 		}
453569508c9SYOSHIFUJI Hideaki #endif
4541da177e4SLinus Torvalds 		if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
4551da177e4SLinus Torvalds 			tcp_write_err(sk);
4561da177e4SLinus Torvalds 			goto out;
4571da177e4SLinus Torvalds 		}
4585ae344c9SNeal Cardwell 		tcp_enter_loss(sk);
45910d3be56SEric Dumazet 		tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1);
4601da177e4SLinus Torvalds 		__sk_dst_reset(sk);
4611da177e4SLinus Torvalds 		goto out_reset_timer;
4621da177e4SLinus Torvalds 	}
4631da177e4SLinus Torvalds 
4641da177e4SLinus Torvalds 	if (tcp_write_timeout(sk))
4651da177e4SLinus Torvalds 		goto out;
4661da177e4SLinus Torvalds 
467463c84b9SArnaldo Carvalho de Melo 	if (icsk->icsk_retransmits == 0) {
46840b215e5SPavel Emelyanov 		int mib_idx;
46940b215e5SPavel Emelyanov 
470c60ce4e2SIlpo Järvinen 		if (icsk->icsk_ca_state == TCP_CA_Recovery) {
471bc079e9eSIlpo Järvinen 			if (tcp_is_sack(tp))
472bc079e9eSIlpo Järvinen 				mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
473bc079e9eSIlpo Järvinen 			else
474bc079e9eSIlpo Järvinen 				mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
4756687e988SArnaldo Carvalho de Melo 		} else if (icsk->icsk_ca_state == TCP_CA_Loss) {
47640b215e5SPavel Emelyanov 			mib_idx = LINUX_MIB_TCPLOSSFAILURES;
477c60ce4e2SIlpo Järvinen 		} else if ((icsk->icsk_ca_state == TCP_CA_Disorder) ||
478c60ce4e2SIlpo Järvinen 			   tp->sacked_out) {
479c60ce4e2SIlpo Järvinen 			if (tcp_is_sack(tp))
480c60ce4e2SIlpo Järvinen 				mib_idx = LINUX_MIB_TCPSACKFAILURES;
481c60ce4e2SIlpo Järvinen 			else
482c60ce4e2SIlpo Järvinen 				mib_idx = LINUX_MIB_TCPRENOFAILURES;
4831da177e4SLinus Torvalds 		} else {
48440b215e5SPavel Emelyanov 			mib_idx = LINUX_MIB_TCPTIMEOUTS;
4851da177e4SLinus Torvalds 		}
48602a1d6e7SEric Dumazet 		__NET_INC_STATS(sock_net(sk), mib_idx);
4871da177e4SLinus Torvalds 	}
4881da177e4SLinus Torvalds 
4895ae344c9SNeal Cardwell 	tcp_enter_loss(sk);
4901da177e4SLinus Torvalds 
49110d3be56SEric Dumazet 	if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) {
4921da177e4SLinus Torvalds 		/* Retransmission failed because of local congestion,
4931da177e4SLinus Torvalds 		 * do not backoff.
4941da177e4SLinus Torvalds 		 */
495463c84b9SArnaldo Carvalho de Melo 		if (!icsk->icsk_retransmits)
496463c84b9SArnaldo Carvalho de Melo 			icsk->icsk_retransmits = 1;
497463c84b9SArnaldo Carvalho de Melo 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
4983f421baaSArnaldo Carvalho de Melo 					  min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
4993f421baaSArnaldo Carvalho de Melo 					  TCP_RTO_MAX);
5001da177e4SLinus Torvalds 		goto out;
5011da177e4SLinus Torvalds 	}
5021da177e4SLinus Torvalds 
5031da177e4SLinus Torvalds 	/* Increase the timeout each time we retransmit.  Note that
5041da177e4SLinus Torvalds 	 * we do not increase the rtt estimate.  rto is initialized
5051da177e4SLinus Torvalds 	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
5061da177e4SLinus Torvalds 	 * that doubling rto each time is the least we can get away with.
5071da177e4SLinus Torvalds 	 * In KA9Q, Karn uses this for the first few times, and then
5081da177e4SLinus Torvalds 	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
5091da177e4SLinus Torvalds 	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
5101da177e4SLinus Torvalds 	 * defined in the protocol as the maximum possible RTT.  I guess
5111da177e4SLinus Torvalds 	 * we'll have to use something other than TCP to talk to the
5121da177e4SLinus Torvalds 	 * University of Mars.
5131da177e4SLinus Torvalds 	 *
5141da177e4SLinus Torvalds 	 * PAWS allows us longer timeouts and large windows, so once
5151da177e4SLinus Torvalds 	 * implemented ftp to mars will work nicely. We will have to fix
5161da177e4SLinus Torvalds 	 * the 120 second clamps though!
5171da177e4SLinus Torvalds 	 */
518463c84b9SArnaldo Carvalho de Melo 	icsk->icsk_backoff++;
519463c84b9SArnaldo Carvalho de Melo 	icsk->icsk_retransmits++;
5201da177e4SLinus Torvalds 
5211da177e4SLinus Torvalds out_reset_timer:
52236e31b0aSAndreas Petlund 	/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
52336e31b0aSAndreas Petlund 	 * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
52436e31b0aSAndreas Petlund 	 * might be increased if the stream oscillates between thin and thick,
52536e31b0aSAndreas Petlund 	 * thus the old value might already be too high compared to the value
52636e31b0aSAndreas Petlund 	 * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
52736e31b0aSAndreas Petlund 	 * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
52836e31b0aSAndreas Petlund 	 * exponential backoff behaviour to avoid continue hammering
52936e31b0aSAndreas Petlund 	 * linear-timeout retransmissions into a black hole
53036e31b0aSAndreas Petlund 	 */
53136e31b0aSAndreas Petlund 	if (sk->sk_state == TCP_ESTABLISHED &&
53236e31b0aSAndreas Petlund 	    (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
53336e31b0aSAndreas Petlund 	    tcp_stream_is_thin(tp) &&
53436e31b0aSAndreas Petlund 	    icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
53536e31b0aSAndreas Petlund 		icsk->icsk_backoff = 0;
53636e31b0aSAndreas Petlund 		icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
53736e31b0aSAndreas Petlund 	} else {
53836e31b0aSAndreas Petlund 		/* Use normal (exponential) backoff */
539463c84b9SArnaldo Carvalho de Melo 		icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
54036e31b0aSAndreas Petlund 	}
5413f421baaSArnaldo Carvalho de Melo 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
542ae5c3f40SNikolay Borisov 	if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0, 0))
5431da177e4SLinus Torvalds 		__sk_dst_reset(sk);
5441da177e4SLinus Torvalds 
5451da177e4SLinus Torvalds out:;
5461da177e4SLinus Torvalds }
5471da177e4SLinus Torvalds 
548c380d37eSRichard Sailer /* Called with bottom-half processing disabled.
549c380d37eSRichard Sailer    Called by tcp_write_timer() */
5506f458dfbSEric Dumazet void tcp_write_timer_handler(struct sock *sk)
5511da177e4SLinus Torvalds {
552463c84b9SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
5531da177e4SLinus Torvalds 	int event;
5541da177e4SLinus Torvalds 
55502b2faafSEric Dumazet 	if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
55602b2faafSEric Dumazet 	    !icsk->icsk_pending)
5571da177e4SLinus Torvalds 		goto out;
5581da177e4SLinus Torvalds 
559463c84b9SArnaldo Carvalho de Melo 	if (time_after(icsk->icsk_timeout, jiffies)) {
560463c84b9SArnaldo Carvalho de Melo 		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
5611da177e4SLinus Torvalds 		goto out;
5621da177e4SLinus Torvalds 	}
5631da177e4SLinus Torvalds 
564463c84b9SArnaldo Carvalho de Melo 	event = icsk->icsk_pending;
5651da177e4SLinus Torvalds 
5661da177e4SLinus Torvalds 	switch (event) {
56757dde7f7SYuchung Cheng 	case ICSK_TIME_REO_TIMEOUT:
56857dde7f7SYuchung Cheng 		tcp_rack_reo_timeout(sk);
56957dde7f7SYuchung Cheng 		break;
5706ba8a3b1SNandita Dukkipati 	case ICSK_TIME_LOSS_PROBE:
5716ba8a3b1SNandita Dukkipati 		tcp_send_loss_probe(sk);
5726ba8a3b1SNandita Dukkipati 		break;
573463c84b9SArnaldo Carvalho de Melo 	case ICSK_TIME_RETRANS:
5746ba8a3b1SNandita Dukkipati 		icsk->icsk_pending = 0;
5751da177e4SLinus Torvalds 		tcp_retransmit_timer(sk);
5761da177e4SLinus Torvalds 		break;
577463c84b9SArnaldo Carvalho de Melo 	case ICSK_TIME_PROBE0:
5786ba8a3b1SNandita Dukkipati 		icsk->icsk_pending = 0;
5791da177e4SLinus Torvalds 		tcp_probe_timer(sk);
5801da177e4SLinus Torvalds 		break;
5811da177e4SLinus Torvalds 	}
5821da177e4SLinus Torvalds 
5831da177e4SLinus Torvalds out:
5843ab224beSHideo Aoki 	sk_mem_reclaim(sk);
5856f458dfbSEric Dumazet }
5866f458dfbSEric Dumazet 
5876f458dfbSEric Dumazet static void tcp_write_timer(unsigned long data)
5886f458dfbSEric Dumazet {
5896f458dfbSEric Dumazet 	struct sock *sk = (struct sock *)data;
5906f458dfbSEric Dumazet 
5916f458dfbSEric Dumazet 	bh_lock_sock(sk);
5926f458dfbSEric Dumazet 	if (!sock_owned_by_user(sk)) {
5936f458dfbSEric Dumazet 		tcp_write_timer_handler(sk);
5946f458dfbSEric Dumazet 	} else {
595c380d37eSRichard Sailer 		/* delegate our work to tcp_release_cb() */
5967aa5470cSEric Dumazet 		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
597144d56e9SEric Dumazet 			sock_hold(sk);
5986f458dfbSEric Dumazet 	}
5991da177e4SLinus Torvalds 	bh_unlock_sock(sk);
6001da177e4SLinus Torvalds 	sock_put(sk);
6011da177e4SLinus Torvalds }
6021da177e4SLinus Torvalds 
60342cb80a2SEric Dumazet void tcp_syn_ack_timeout(const struct request_sock *req)
60472659eccSOctavian Purdila {
60542cb80a2SEric Dumazet 	struct net *net = read_pnet(&inet_rsk(req)->ireq_net);
60642cb80a2SEric Dumazet 
60702a1d6e7SEric Dumazet 	__NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS);
60872659eccSOctavian Purdila }
60972659eccSOctavian Purdila EXPORT_SYMBOL(tcp_syn_ack_timeout);
61072659eccSOctavian Purdila 
6111da177e4SLinus Torvalds void tcp_set_keepalive(struct sock *sk, int val)
6121da177e4SLinus Torvalds {
6131da177e4SLinus Torvalds 	if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
6141da177e4SLinus Torvalds 		return;
6151da177e4SLinus Torvalds 
6161da177e4SLinus Torvalds 	if (val && !sock_flag(sk, SOCK_KEEPOPEN))
617463c84b9SArnaldo Carvalho de Melo 		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
6181da177e4SLinus Torvalds 	else if (!val)
619463c84b9SArnaldo Carvalho de Melo 		inet_csk_delete_keepalive_timer(sk);
6201da177e4SLinus Torvalds }
6214b9d07a4SUrsula Braun EXPORT_SYMBOL_GPL(tcp_set_keepalive);
6221da177e4SLinus Torvalds 
6231da177e4SLinus Torvalds 
6241da177e4SLinus Torvalds static void tcp_keepalive_timer (unsigned long data)
6251da177e4SLinus Torvalds {
6261da177e4SLinus Torvalds 	struct sock *sk = (struct sock *) data;
6276687e988SArnaldo Carvalho de Melo 	struct inet_connection_sock *icsk = inet_csk(sk);
6281da177e4SLinus Torvalds 	struct tcp_sock *tp = tcp_sk(sk);
6296c37e5deSFlavio Leitner 	u32 elapsed;
6301da177e4SLinus Torvalds 
6311da177e4SLinus Torvalds 	/* Only process if socket is not in use. */
6321da177e4SLinus Torvalds 	bh_lock_sock(sk);
6331da177e4SLinus Torvalds 	if (sock_owned_by_user(sk)) {
6341da177e4SLinus Torvalds 		/* Try again later. */
635463c84b9SArnaldo Carvalho de Melo 		inet_csk_reset_keepalive_timer (sk, HZ/20);
6361da177e4SLinus Torvalds 		goto out;
6371da177e4SLinus Torvalds 	}
6381da177e4SLinus Torvalds 
6391da177e4SLinus Torvalds 	if (sk->sk_state == TCP_LISTEN) {
640fa76ce73SEric Dumazet 		pr_err("Hmm... keepalive on a LISTEN ???\n");
6411da177e4SLinus Torvalds 		goto out;
6421da177e4SLinus Torvalds 	}
6431da177e4SLinus Torvalds 
6441da177e4SLinus Torvalds 	if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
6451da177e4SLinus Torvalds 		if (tp->linger2 >= 0) {
646463c84b9SArnaldo Carvalho de Melo 			const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
6471da177e4SLinus Torvalds 
6481da177e4SLinus Torvalds 			if (tmo > 0) {
6491da177e4SLinus Torvalds 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
6501da177e4SLinus Torvalds 				goto out;
6511da177e4SLinus Torvalds 			}
6521da177e4SLinus Torvalds 		}
6531da177e4SLinus Torvalds 		tcp_send_active_reset(sk, GFP_ATOMIC);
6541da177e4SLinus Torvalds 		goto death;
6551da177e4SLinus Torvalds 	}
6561da177e4SLinus Torvalds 
6571da177e4SLinus Torvalds 	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
6581da177e4SLinus Torvalds 		goto out;
6591da177e4SLinus Torvalds 
6601da177e4SLinus Torvalds 	elapsed = keepalive_time_when(tp);
6611da177e4SLinus Torvalds 
6621da177e4SLinus Torvalds 	/* It is alive without keepalive 8) */
663fe067e8aSDavid S. Miller 	if (tp->packets_out || tcp_send_head(sk))
6641da177e4SLinus Torvalds 		goto resched;
6651da177e4SLinus Torvalds 
6666c37e5deSFlavio Leitner 	elapsed = keepalive_time_elapsed(tp);
6671da177e4SLinus Torvalds 
6681da177e4SLinus Torvalds 	if (elapsed >= keepalive_time_when(tp)) {
669dca43c75SJerry Chu 		/* If the TCP_USER_TIMEOUT option is enabled, use that
670dca43c75SJerry Chu 		 * to determine when to timeout instead.
671dca43c75SJerry Chu 		 */
672dca43c75SJerry Chu 		if ((icsk->icsk_user_timeout != 0 &&
673dca43c75SJerry Chu 		    elapsed >= icsk->icsk_user_timeout &&
674dca43c75SJerry Chu 		    icsk->icsk_probes_out > 0) ||
675dca43c75SJerry Chu 		    (icsk->icsk_user_timeout == 0 &&
676dca43c75SJerry Chu 		    icsk->icsk_probes_out >= keepalive_probes(tp))) {
6771da177e4SLinus Torvalds 			tcp_send_active_reset(sk, GFP_ATOMIC);
6781da177e4SLinus Torvalds 			tcp_write_err(sk);
6791da177e4SLinus Torvalds 			goto out;
6801da177e4SLinus Torvalds 		}
681e520af48SEric Dumazet 		if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
6826687e988SArnaldo Carvalho de Melo 			icsk->icsk_probes_out++;
6831da177e4SLinus Torvalds 			elapsed = keepalive_intvl_when(tp);
6841da177e4SLinus Torvalds 		} else {
6851da177e4SLinus Torvalds 			/* If keepalive was lost due to local congestion,
6861da177e4SLinus Torvalds 			 * try harder.
6871da177e4SLinus Torvalds 			 */
6881da177e4SLinus Torvalds 			elapsed = TCP_RESOURCE_PROBE_INTERVAL;
6891da177e4SLinus Torvalds 		}
6901da177e4SLinus Torvalds 	} else {
6911da177e4SLinus Torvalds 		/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
6921da177e4SLinus Torvalds 		elapsed = keepalive_time_when(tp) - elapsed;
6931da177e4SLinus Torvalds 	}
6941da177e4SLinus Torvalds 
6953ab224beSHideo Aoki 	sk_mem_reclaim(sk);
6961da177e4SLinus Torvalds 
6971da177e4SLinus Torvalds resched:
698463c84b9SArnaldo Carvalho de Melo 	inet_csk_reset_keepalive_timer (sk, elapsed);
6991da177e4SLinus Torvalds 	goto out;
7001da177e4SLinus Torvalds 
7011da177e4SLinus Torvalds death:
7021da177e4SLinus Torvalds 	tcp_done(sk);
7031da177e4SLinus Torvalds 
7041da177e4SLinus Torvalds out:
7051da177e4SLinus Torvalds 	bh_unlock_sock(sk);
7061da177e4SLinus Torvalds 	sock_put(sk);
7071da177e4SLinus Torvalds }
7086f458dfbSEric Dumazet 
7096f458dfbSEric Dumazet void tcp_init_xmit_timers(struct sock *sk)
7106f458dfbSEric Dumazet {
7116f458dfbSEric Dumazet 	inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
7126f458dfbSEric Dumazet 				  &tcp_keepalive_timer);
713*218af599SEric Dumazet 	hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
714*218af599SEric Dumazet 		     HRTIMER_MODE_ABS_PINNED);
715*218af599SEric Dumazet 	tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
7166f458dfbSEric Dumazet }
717