1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 21da177e4SLinus Torvalds /* 31da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 41da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 51da177e4SLinus Torvalds * interface as the means of communication with the user level. 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * Implementation of the Transmission Control Protocol(TCP). 81da177e4SLinus Torvalds * 902c30a84SJesper Juhl * Authors: Ross Biro 101da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 111da177e4SLinus Torvalds * Mark Evans, <evansmp@uhura.aston.ac.uk> 121da177e4SLinus Torvalds * Corey Minyard <wf-rch!minyard@relay.EU.net> 131da177e4SLinus Torvalds * Florian La Roche, <flla@stud.uni-sb.de> 141da177e4SLinus Torvalds * Charles Hedrick, <hedrick@klinzhai.rutgers.edu> 151da177e4SLinus Torvalds * Linus Torvalds, <torvalds@cs.helsinki.fi> 161da177e4SLinus Torvalds * Alan Cox, <gw4pts@gw4pts.ampr.org> 171da177e4SLinus Torvalds * Matthew Dillon, <dillon@apollo.west.oic.com> 181da177e4SLinus Torvalds * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 191da177e4SLinus Torvalds * Jorge Cwik, <jorge@laser.satlink.net> 201da177e4SLinus Torvalds */ 211da177e4SLinus Torvalds 221da177e4SLinus Torvalds #include <linux/module.h> 235a0e3ad6STejun Heo #include <linux/gfp.h> 241da177e4SLinus Torvalds #include <net/tcp.h> 251da177e4SLinus Torvalds 26b701a99eSJon Maxwell static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) 27b701a99eSJon Maxwell { 28b701a99eSJon Maxwell struct inet_connection_sock *icsk = inet_csk(sk); 29d58f2e15SEric Dumazet u32 elapsed, start_ts, user_timeout; 309efdda4eSEric Dumazet s32 remaining; 31b701a99eSJon Maxwell 327ae18975SYuchung Cheng start_ts = tcp_sk(sk)->retrans_stamp; 33d58f2e15SEric Dumazet user_timeout = READ_ONCE(icsk->icsk_user_timeout); 34d58f2e15SEric Dumazet if (!user_timeout) 35b701a99eSJon Maxwell return icsk->icsk_rto; 36b701a99eSJon Maxwell elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; 37d58f2e15SEric Dumazet remaining = user_timeout - elapsed; 389efdda4eSEric Dumazet if (remaining <= 0) 39b701a99eSJon Maxwell return 1; /* user timeout has passed; fire ASAP */ 409efdda4eSEric Dumazet 419efdda4eSEric Dumazet return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining)); 42b701a99eSJon Maxwell } 43b701a99eSJon Maxwell 44344db93aSEnke Chen u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) 45344db93aSEnke Chen { 46344db93aSEnke Chen struct inet_connection_sock *icsk = inet_csk(sk); 47d58f2e15SEric Dumazet u32 remaining, user_timeout; 48344db93aSEnke Chen s32 elapsed; 49344db93aSEnke Chen 50d58f2e15SEric Dumazet user_timeout = READ_ONCE(icsk->icsk_user_timeout); 51d58f2e15SEric Dumazet if (!user_timeout || !icsk->icsk_probes_tstamp) 52344db93aSEnke Chen return when; 53344db93aSEnke Chen 54344db93aSEnke Chen elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp; 55344db93aSEnke Chen if (unlikely(elapsed < 0)) 56344db93aSEnke Chen elapsed = 0; 57d58f2e15SEric Dumazet remaining = msecs_to_jiffies(user_timeout) - elapsed; 58344db93aSEnke Chen remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN); 59344db93aSEnke Chen 60344db93aSEnke Chen return min_t(u32, remaining, when); 61344db93aSEnke Chen } 62344db93aSEnke Chen 63c380d37eSRichard Sailer /** 64c380d37eSRichard Sailer * tcp_write_err() - close socket and save error info 65c380d37eSRichard Sailer * @sk: The socket the error has appeared on. 66c380d37eSRichard Sailer * 67c380d37eSRichard Sailer * Returns: Nothing (void) 68c380d37eSRichard Sailer */ 69c380d37eSRichard Sailer 701da177e4SLinus Torvalds static void tcp_write_err(struct sock *sk) 711da177e4SLinus Torvalds { 72e13ec3daSEric Dumazet WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT); 73e3ae2365SAlexander Aring sk_error_report(sk); 741da177e4SLinus Torvalds 75e05836acSSoheil Hassas Yeganeh tcp_write_queue_purge(sk); 761da177e4SLinus Torvalds tcp_done(sk); 7702a1d6e7SEric Dumazet __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT); 781da177e4SLinus Torvalds } 791da177e4SLinus Torvalds 80c380d37eSRichard Sailer /** 81c380d37eSRichard Sailer * tcp_out_of_resources() - Close socket if out of resources 82c380d37eSRichard Sailer * @sk: pointer to current socket 83c380d37eSRichard Sailer * @do_reset: send a last packet with reset flag 84c380d37eSRichard Sailer * 85c380d37eSRichard Sailer * Do not allow orphaned sockets to eat all our resources. 861da177e4SLinus Torvalds * This is direct violation of TCP specs, but it is required 871da177e4SLinus Torvalds * to prevent DoS attacks. It is called when a retransmission timeout 881da177e4SLinus Torvalds * or zero probe timeout occurs on orphaned socket. 891da177e4SLinus Torvalds * 904ee806d5SDan Streetman * Also close if our net namespace is exiting; in that case there is no 914ee806d5SDan Streetman * hope of ever communicating again since all netns interfaces are already 924ee806d5SDan Streetman * down (or about to be down), and we need to release our dst references, 934ee806d5SDan Streetman * which have been moved to the netns loopback interface, so the namespace 944ee806d5SDan Streetman * can finish exiting. This condition is only possible if we are a kernel 954ee806d5SDan Streetman * socket, as those do not hold references to the namespace. 964ee806d5SDan Streetman * 97caa20d9aSStephen Hemminger * Criteria is still not confirmed experimentally and may change. 981da177e4SLinus Torvalds * We kill the socket, if: 991da177e4SLinus Torvalds * 1. If number of orphaned sockets exceeds an administratively configured 1001da177e4SLinus Torvalds * limit. 1011da177e4SLinus Torvalds * 2. If we have strong memory pressure. 1024ee806d5SDan Streetman * 3. If our net namespace is exiting. 1031da177e4SLinus Torvalds */ 104b248230cSYuchung Cheng static int tcp_out_of_resources(struct sock *sk, bool do_reset) 1051da177e4SLinus Torvalds { 1061da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 107ad1af0feSDavid S. Miller int shift = 0; 1081da177e4SLinus Torvalds 1091da177e4SLinus Torvalds /* If peer does not open window for long time, or did not transmit 1101da177e4SLinus Torvalds * anything for long time, penalize it. */ 111d635fbe2SEric Dumazet if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) 112ad1af0feSDavid S. Miller shift++; 1131da177e4SLinus Torvalds 1141da177e4SLinus Torvalds /* If some dubious ICMP arrived, penalize even more. */ 115cee1af82SEric Dumazet if (READ_ONCE(sk->sk_err_soft)) 116ad1af0feSDavid S. Miller shift++; 1171da177e4SLinus Torvalds 118efcdbf24SArun Sharma if (tcp_check_oom(sk, shift)) { 1191da177e4SLinus Torvalds /* Catch exceptional cases, when connection requires reset. 1201da177e4SLinus Torvalds * 1. Last segment was sent recently. */ 121d635fbe2SEric Dumazet if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN || 1221da177e4SLinus Torvalds /* 2. Window is closed. */ 1231da177e4SLinus Torvalds (!tp->snd_wnd && !tp->packets_out)) 124b248230cSYuchung Cheng do_reset = true; 1251da177e4SLinus Torvalds if (do_reset) 1261da177e4SLinus Torvalds tcp_send_active_reset(sk, GFP_ATOMIC); 1271da177e4SLinus Torvalds tcp_done(sk); 12802a1d6e7SEric Dumazet __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); 1291da177e4SLinus Torvalds return 1; 1301da177e4SLinus Torvalds } 1314ee806d5SDan Streetman 1324ee806d5SDan Streetman if (!check_net(sock_net(sk))) { 1334ee806d5SDan Streetman /* Not possible to send reset; just close */ 1344ee806d5SDan Streetman tcp_done(sk); 1354ee806d5SDan Streetman return 1; 1364ee806d5SDan Streetman } 1374ee806d5SDan Streetman 1381da177e4SLinus Torvalds return 0; 1391da177e4SLinus Torvalds } 1401da177e4SLinus Torvalds 141c380d37eSRichard Sailer /** 142c380d37eSRichard Sailer * tcp_orphan_retries() - Returns maximal number of retries on an orphaned socket 143c380d37eSRichard Sailer * @sk: Pointer to the current socket. 144c380d37eSRichard Sailer * @alive: bool, socket alive state 145c380d37eSRichard Sailer */ 1467533ce30SRichard Sailer static int tcp_orphan_retries(struct sock *sk, bool alive) 1471da177e4SLinus Torvalds { 14839e24435SKuniyuki Iwashima int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */ 1491da177e4SLinus Torvalds 1501da177e4SLinus Torvalds /* We know from an ICMP that something is wrong. */ 151cee1af82SEric Dumazet if (READ_ONCE(sk->sk_err_soft) && !alive) 1521da177e4SLinus Torvalds retries = 0; 1531da177e4SLinus Torvalds 1541da177e4SLinus Torvalds /* However, if socket sent something recently, select some safe 1551da177e4SLinus Torvalds * number of retries. 8 corresponds to >100 seconds with minimal 1561da177e4SLinus Torvalds * RTO of 200msec. */ 1571da177e4SLinus Torvalds if (retries == 0 && alive) 1581da177e4SLinus Torvalds retries = 8; 1591da177e4SLinus Torvalds return retries; 1601da177e4SLinus Torvalds } 1611da177e4SLinus Torvalds 162ce55dd36SEric Dumazet static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) 163ce55dd36SEric Dumazet { 164d0f36847SEric Dumazet const struct net *net = sock_net(sk); 165d0f36847SEric Dumazet int mss; 166b0f9ca53SFan Du 167ce55dd36SEric Dumazet /* Black hole detection */ 168f47d00e0SKuniyuki Iwashima if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing)) 169d0f36847SEric Dumazet return; 170d0f36847SEric Dumazet 171ce55dd36SEric Dumazet if (!icsk->icsk_mtup.enabled) { 172ce55dd36SEric Dumazet icsk->icsk_mtup.enabled = 1; 173c74df29aSEric Dumazet icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; 174ce55dd36SEric Dumazet } else { 1758beb5c5fSEric Dumazet mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; 17688d78bc0SKuniyuki Iwashima mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss); 1778e92d442SKuniyuki Iwashima mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor)); 17878eb166cSKuniyuki Iwashima mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss)); 179ce55dd36SEric Dumazet icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); 180d0f36847SEric Dumazet } 181ce55dd36SEric Dumazet tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 182ce55dd36SEric Dumazet } 183ce55dd36SEric Dumazet 18401a523b0SYuchung Cheng static unsigned int tcp_model_timeout(struct sock *sk, 18501a523b0SYuchung Cheng unsigned int boundary, 18601a523b0SYuchung Cheng unsigned int rto_base) 18701a523b0SYuchung Cheng { 18801a523b0SYuchung Cheng unsigned int linear_backoff_thresh, timeout; 189c380d37eSRichard Sailer 19001a523b0SYuchung Cheng linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base); 19101a523b0SYuchung Cheng if (boundary <= linear_backoff_thresh) 19201a523b0SYuchung Cheng timeout = ((2 << boundary) - 1) * rto_base; 19301a523b0SYuchung Cheng else 19401a523b0SYuchung Cheng timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + 19501a523b0SYuchung Cheng (boundary - linear_backoff_thresh) * TCP_RTO_MAX; 19601a523b0SYuchung Cheng return jiffies_to_msecs(timeout); 19701a523b0SYuchung Cheng } 198c380d37eSRichard Sailer /** 199c380d37eSRichard Sailer * retransmits_timed_out() - returns true if this connection has timed out 200c380d37eSRichard Sailer * @sk: The current socket 201c380d37eSRichard Sailer * @boundary: max number of retransmissions 202c380d37eSRichard Sailer * @timeout: A custom timeout value. 203c380d37eSRichard Sailer * If set to 0 the default timeout is calculated and used. 204c380d37eSRichard Sailer * Using TCP_RTO_MIN and the number of unsuccessful retransmits. 205c380d37eSRichard Sailer * 206c380d37eSRichard Sailer * The default "timeout" value this function can calculate and use 207c380d37eSRichard Sailer * is equivalent to the timeout of a TCP Connection 208c380d37eSRichard Sailer * after "boundary" unsuccessful, exponentially backed-off 209ce682ef6SEric Dumazet * retransmissions with an initial RTO of TCP_RTO_MIN. 2102f7de571SDamian Lukowski */ 2112f7de571SDamian Lukowski static bool retransmits_timed_out(struct sock *sk, 212dca43c75SJerry Chu unsigned int boundary, 213ce682ef6SEric Dumazet unsigned int timeout) 2142f7de571SDamian Lukowski { 21501a523b0SYuchung Cheng unsigned int start_ts; 2162f7de571SDamian Lukowski 2172f7de571SDamian Lukowski if (!inet_csk(sk)->icsk_retransmits) 2182f7de571SDamian Lukowski return false; 2192f7de571SDamian Lukowski 2207ae18975SYuchung Cheng start_ts = tcp_sk(sk)->retrans_stamp; 2213256a2d6SEric Dumazet if (likely(timeout == 0)) { 2223256a2d6SEric Dumazet unsigned int rto_base = TCP_RTO_MIN; 2233256a2d6SEric Dumazet 2243256a2d6SEric Dumazet if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) 2253256a2d6SEric Dumazet rto_base = tcp_timeout_init(sk); 2263256a2d6SEric Dumazet timeout = tcp_model_timeout(sk, boundary, rto_base); 2273256a2d6SEric Dumazet } 2282f7de571SDamian Lukowski 2299efdda4eSEric Dumazet return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; 2302f7de571SDamian Lukowski } 2312f7de571SDamian Lukowski 2321da177e4SLinus Torvalds /* A write timeout has occurred. Process the after effects. */ 2331da177e4SLinus Torvalds static int tcp_write_timeout(struct sock *sk) 2341da177e4SLinus Torvalds { 2355d424d5aSJohn Heffner struct inet_connection_sock *icsk = inet_csk(sk); 236c968601dSYuchung Cheng struct tcp_sock *tp = tcp_sk(sk); 2376fa25166SNikolay Borisov struct net *net = sock_net(sk); 238a41e8a88SEric Dumazet bool expired = false, do_reset; 239ccce324dSDavid Morley int retry_until, max_retransmits; 2401da177e4SLinus Torvalds 2411da177e4SLinus Torvalds if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 2429c30ae83SYuchung Cheng if (icsk->icsk_retransmits) 2439c30ae83SYuchung Cheng __dst_negative_advice(sk); 244d44fd4a7SEric Dumazet /* Paired with WRITE_ONCE() in tcp_sock_set_syncnt() */ 245d44fd4a7SEric Dumazet retry_until = READ_ONCE(icsk->icsk_syn_retries) ? : 24620a3b1c0SKuniyuki Iwashima READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); 247ccce324dSDavid Morley 248ccce324dSDavid Morley max_retransmits = retry_until; 249ccce324dSDavid Morley if (sk->sk_state == TCP_SYN_SENT) 250ccce324dSDavid Morley max_retransmits += READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts); 251ccce324dSDavid Morley 252ccce324dSDavid Morley expired = icsk->icsk_retransmits >= max_retransmits; 2531da177e4SLinus Torvalds } else { 25439e24435SKuniyuki Iwashima if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { 2555d424d5aSJohn Heffner /* Black hole detection */ 256ce55dd36SEric Dumazet tcp_mtu_probing(icsk, sk); 2571da177e4SLinus Torvalds 2589c30ae83SYuchung Cheng __dst_negative_advice(sk); 2591da177e4SLinus Torvalds } 2601da177e4SLinus Torvalds 26139e24435SKuniyuki Iwashima retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); 2621da177e4SLinus Torvalds if (sock_flag(sk, SOCK_DEAD)) { 2637533ce30SRichard Sailer const bool alive = icsk->icsk_rto < TCP_RTO_MAX; 2641da177e4SLinus Torvalds 2651da177e4SLinus Torvalds retry_until = tcp_orphan_retries(sk, alive); 2666fa12c85SDamian Lukowski do_reset = alive || 267ce682ef6SEric Dumazet !retransmits_timed_out(sk, retry_until, 0); 2681da177e4SLinus Torvalds 2696fa12c85SDamian Lukowski if (tcp_out_of_resources(sk, do_reset)) 2701da177e4SLinus Torvalds return 1; 2711da177e4SLinus Torvalds } 272a41e8a88SEric Dumazet } 273a41e8a88SEric Dumazet if (!expired) 274ce682ef6SEric Dumazet expired = retransmits_timed_out(sk, retry_until, 275d58f2e15SEric Dumazet READ_ONCE(icsk->icsk_user_timeout)); 2767268586bSYuchung Cheng tcp_fastopen_active_detect_blackhole(sk, expired); 277f89013f6SLawrence Brakmo 278f89013f6SLawrence Brakmo if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG)) 279f89013f6SLawrence Brakmo tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB, 280f89013f6SLawrence Brakmo icsk->icsk_retransmits, 281f89013f6SLawrence Brakmo icsk->icsk_rto, (int)expired); 282f89013f6SLawrence Brakmo 283ce682ef6SEric Dumazet if (expired) { 2841da177e4SLinus Torvalds /* Has it gone just too far? */ 2851da177e4SLinus Torvalds tcp_write_err(sk); 2861da177e4SLinus Torvalds return 1; 2871da177e4SLinus Torvalds } 288f89013f6SLawrence Brakmo 2899c30ae83SYuchung Cheng if (sk_rethink_txhash(sk)) { 2909c30ae83SYuchung Cheng tp->timeout_rehash++; 2919c30ae83SYuchung Cheng __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH); 2929c30ae83SYuchung Cheng } 2939c30ae83SYuchung Cheng 2941da177e4SLinus Torvalds return 0; 2951da177e4SLinus Torvalds } 2961da177e4SLinus Torvalds 297c10d9310SEric Dumazet /* Called with BH disabled */ 2986f458dfbSEric Dumazet void tcp_delack_timer_handler(struct sock *sk) 2991da177e4SLinus Torvalds { 300463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 30130c6f0bfSfuyuanli struct tcp_sock *tp = tcp_sk(sk); 3021da177e4SLinus Torvalds 30330c6f0bfSfuyuanli if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) 30430c6f0bfSfuyuanli return; 30530c6f0bfSfuyuanli 30630c6f0bfSfuyuanli /* Handling the sack compression case */ 30730c6f0bfSfuyuanli if (tp->compressed_ack) { 30830c6f0bfSfuyuanli tcp_mstamp_refresh(tp); 30930c6f0bfSfuyuanli tcp_sack_compress_send_ack(sk); 31030c6f0bfSfuyuanli return; 31130c6f0bfSfuyuanli } 31230c6f0bfSfuyuanli 31330c6f0bfSfuyuanli if (!(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) 3144890b686SEric Dumazet return; 3151da177e4SLinus Torvalds 316463c84b9SArnaldo Carvalho de Melo if (time_after(icsk->icsk_ack.timeout, jiffies)) { 317463c84b9SArnaldo Carvalho de Melo sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); 3184890b686SEric Dumazet return; 3191da177e4SLinus Torvalds } 320463c84b9SArnaldo Carvalho de Melo icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; 3211da177e4SLinus Torvalds 322463c84b9SArnaldo Carvalho de Melo if (inet_csk_ack_scheduled(sk)) { 32331954cd8SWei Wang if (!inet_csk_in_pingpong_mode(sk)) { 3241da177e4SLinus Torvalds /* Delayed ACK missed: inflate ATO. */ 325463c84b9SArnaldo Carvalho de Melo icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); 3261da177e4SLinus Torvalds } else { 3271da177e4SLinus Torvalds /* Delayed ACK missed: leave pingpong mode and 3281da177e4SLinus Torvalds * deflate ATO. 3291da177e4SLinus Torvalds */ 33031954cd8SWei Wang inet_csk_exit_pingpong_mode(sk); 331463c84b9SArnaldo Carvalho de Melo icsk->icsk_ack.ato = TCP_ATO_MIN; 3321da177e4SLinus Torvalds } 33330c6f0bfSfuyuanli tcp_mstamp_refresh(tp); 3341da177e4SLinus Torvalds tcp_send_ack(sk); 33502a1d6e7SEric Dumazet __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); 3361da177e4SLinus Torvalds } 3376f458dfbSEric Dumazet } 3386f458dfbSEric Dumazet 339c380d37eSRichard Sailer 340c380d37eSRichard Sailer /** 341c380d37eSRichard Sailer * tcp_delack_timer() - The TCP delayed ACK timeout handler 3423628e3cbSAndrew Lunn * @t: Pointer to the timer. (gets casted to struct sock *) 343c380d37eSRichard Sailer * 344c380d37eSRichard Sailer * This function gets (indirectly) called when the kernel timer for a TCP packet 345c380d37eSRichard Sailer * of this socket expires. Calls tcp_delack_timer_handler() to do the actual work. 346c380d37eSRichard Sailer * 347c380d37eSRichard Sailer * Returns: Nothing (void) 348c380d37eSRichard Sailer */ 34959f379f9SKees Cook static void tcp_delack_timer(struct timer_list *t) 3506f458dfbSEric Dumazet { 35159f379f9SKees Cook struct inet_connection_sock *icsk = 35259f379f9SKees Cook from_timer(icsk, t, icsk_delack_timer); 35359f379f9SKees Cook struct sock *sk = &icsk->icsk_inet.sk; 3546f458dfbSEric Dumazet 3556f458dfbSEric Dumazet bh_lock_sock(sk); 3566f458dfbSEric Dumazet if (!sock_owned_by_user(sk)) { 3576f458dfbSEric Dumazet tcp_delack_timer_handler(sk); 3586f458dfbSEric Dumazet } else { 35902a1d6e7SEric Dumazet __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); 3606f458dfbSEric Dumazet /* deleguate our work to tcp_release_cb() */ 3617aa5470cSEric Dumazet if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) 362144d56e9SEric Dumazet sock_hold(sk); 3636f458dfbSEric Dumazet } 3641da177e4SLinus Torvalds bh_unlock_sock(sk); 3651da177e4SLinus Torvalds sock_put(sk); 3661da177e4SLinus Torvalds } 3671da177e4SLinus Torvalds 3681da177e4SLinus Torvalds static void tcp_probe_timer(struct sock *sk) 3691da177e4SLinus Torvalds { 3706687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 37175c119afSEric Dumazet struct sk_buff *skb = tcp_send_head(sk); 3721da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 3731da177e4SLinus Torvalds int max_probes; 3741da177e4SLinus Torvalds 37575c119afSEric Dumazet if (tp->packets_out || !skb) { 3766687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out = 0; 3779d9b1ee0SEnke Chen icsk->icsk_probes_tstamp = 0; 3781da177e4SLinus Torvalds return; 3791da177e4SLinus Torvalds } 3801da177e4SLinus Torvalds 381b248230cSYuchung Cheng /* RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as 382b248230cSYuchung Cheng * long as the receiver continues to respond probes. We support this by 383b248230cSYuchung Cheng * default and reset icsk_probes_out with incoming ACKs. But if the 384b248230cSYuchung Cheng * socket is orphaned or the user specifies TCP_USER_TIMEOUT, we 385b248230cSYuchung Cheng * kill the socket when the retry count and the time exceeds the 386b248230cSYuchung Cheng * corresponding system limit. We also implement similar policy when 387b248230cSYuchung Cheng * we use RTO to probe window in tcp_retransmit_timer(). 3881da177e4SLinus Torvalds */ 389d58f2e15SEric Dumazet if (!icsk->icsk_probes_tstamp) { 3909d9b1ee0SEnke Chen icsk->icsk_probes_tstamp = tcp_jiffies32; 391d58f2e15SEric Dumazet } else { 392d58f2e15SEric Dumazet u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); 3931da177e4SLinus Torvalds 394d58f2e15SEric Dumazet if (user_timeout && 395d58f2e15SEric Dumazet (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >= 396d58f2e15SEric Dumazet msecs_to_jiffies(user_timeout)) 397d58f2e15SEric Dumazet goto abort; 398d58f2e15SEric Dumazet } 39939e24435SKuniyuki Iwashima max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); 4001da177e4SLinus Torvalds if (sock_flag(sk, SOCK_DEAD)) { 4017533ce30SRichard Sailer const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; 4021da177e4SLinus Torvalds 4031da177e4SLinus Torvalds max_probes = tcp_orphan_retries(sk, alive); 404b248230cSYuchung Cheng if (!alive && icsk->icsk_backoff >= max_probes) 405b248230cSYuchung Cheng goto abort; 406b248230cSYuchung Cheng if (tcp_out_of_resources(sk, true)) 4071da177e4SLinus Torvalds return; 4081da177e4SLinus Torvalds } 4091da177e4SLinus Torvalds 4103976535aSYuchung Cheng if (icsk->icsk_probes_out >= max_probes) { 411b248230cSYuchung Cheng abort: tcp_write_err(sk); 4121da177e4SLinus Torvalds } else { 4131da177e4SLinus Torvalds /* Only send another probe if we didn't close things up. */ 4141da177e4SLinus Torvalds tcp_send_probe0(sk); 4151da177e4SLinus Torvalds } 4161da177e4SLinus Torvalds } 4171da177e4SLinus Torvalds 4181da177e4SLinus Torvalds /* 4198336886fSJerry Chu * Timer for Fast Open socket to retransmit SYNACK. Note that the 4208336886fSJerry Chu * sk here is the child socket, not the parent (listener) socket. 4218336886fSJerry Chu */ 422d983ea6fSEric Dumazet static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) 4238336886fSJerry Chu { 4248336886fSJerry Chu struct inet_connection_sock *icsk = inet_csk(sk); 425c7d13c8fSYuchung Cheng struct tcp_sock *tp = tcp_sk(sk); 42620a3b1c0SKuniyuki Iwashima int max_retries; 4278336886fSJerry Chu 42842cb80a2SEric Dumazet req->rsk_ops->syn_ack_timeout(req); 4298336886fSJerry Chu 430d44fd4a7SEric Dumazet /* Add one more retry for fastopen. 431d44fd4a7SEric Dumazet * Paired with WRITE_ONCE() in tcp_sock_set_syncnt() 432d44fd4a7SEric Dumazet */ 433d44fd4a7SEric Dumazet max_retries = READ_ONCE(icsk->icsk_syn_retries) ? : 43420a3b1c0SKuniyuki Iwashima READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1; 43520a3b1c0SKuniyuki Iwashima 436e6c022a4SEric Dumazet if (req->num_timeout >= max_retries) { 4378336886fSJerry Chu tcp_write_err(sk); 4388336886fSJerry Chu return; 4398336886fSJerry Chu } 4408c3cfe19SYuchung Cheng /* Lower cwnd after certain SYNACK timeout like tcp_init_transfer() */ 4418c3cfe19SYuchung Cheng if (icsk->icsk_retransmits == 1) 4428c3cfe19SYuchung Cheng tcp_enter_loss(sk); 4438336886fSJerry Chu /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error 4448336886fSJerry Chu * returned from rtx_syn_ack() to make it more persistent like 4458336886fSJerry Chu * regular retransmit because if the child socket has been accepted 4468336886fSJerry Chu * it's not good to give up too easily. 4478336886fSJerry Chu */ 448e6c022a4SEric Dumazet inet_rtx_syn_ack(sk, req); 449e6c022a4SEric Dumazet req->num_timeout++; 4507e32b443SYuchung Cheng icsk->icsk_retransmits++; 451c7d13c8fSYuchung Cheng if (!tp->retrans_stamp) 452c7d13c8fSYuchung Cheng tp->retrans_stamp = tcp_time_stamp(tp); 4538336886fSJerry Chu inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 4548ea731d4SJie Meng req->timeout << req->num_timeout, TCP_RTO_MAX); 4558336886fSJerry Chu } 4568336886fSJerry Chu 457e89688e3SMenglong Dong static bool tcp_rtx_probe0_timed_out(const struct sock *sk, 458e89688e3SMenglong Dong const struct sk_buff *skb) 459e89688e3SMenglong Dong { 460e89688e3SMenglong Dong const struct tcp_sock *tp = tcp_sk(sk); 461e89688e3SMenglong Dong const int timeout = TCP_RTO_MAX * 2; 462e89688e3SMenglong Dong u32 rcv_delta, rtx_delta; 463e89688e3SMenglong Dong 464e89688e3SMenglong Dong rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; 465e89688e3SMenglong Dong if (rcv_delta <= timeout) 466e89688e3SMenglong Dong return false; 467e89688e3SMenglong Dong 468e89688e3SMenglong Dong rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) - 469e89688e3SMenglong Dong (tp->retrans_stamp ?: tcp_skb_timestamp(skb))); 470e89688e3SMenglong Dong 471e89688e3SMenglong Dong return rtx_delta > timeout; 472e89688e3SMenglong Dong } 4731da177e4SLinus Torvalds 474c380d37eSRichard Sailer /** 475c380d37eSRichard Sailer * tcp_retransmit_timer() - The TCP retransmit timeout handler 476c380d37eSRichard Sailer * @sk: Pointer to the current socket. 477c380d37eSRichard Sailer * 478c380d37eSRichard Sailer * This function gets called when the kernel timer for a TCP packet 479c380d37eSRichard Sailer * of this socket expires. 480c380d37eSRichard Sailer * 481974d8f86SZheng Yongjun * It handles retransmission, timer adjustment and other necessary measures. 482c380d37eSRichard Sailer * 483c380d37eSRichard Sailer * Returns: Nothing (void) 484c380d37eSRichard Sailer */ 485f1ecd5d9SDamian Lukowski void tcp_retransmit_timer(struct sock *sk) 4861da177e4SLinus Torvalds { 4871da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 488ae5c3f40SNikolay Borisov struct net *net = sock_net(sk); 489463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 490d983ea6fSEric Dumazet struct request_sock *req; 4910d580fbdSEric Dumazet struct sk_buff *skb; 4921da177e4SLinus Torvalds 493d983ea6fSEric Dumazet req = rcu_dereference_protected(tp->fastopen_rsk, 494d983ea6fSEric Dumazet lockdep_sock_is_held(sk)); 495d983ea6fSEric Dumazet if (req) { 49637561f68SJerry Chu WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && 4978336886fSJerry Chu sk->sk_state != TCP_FIN_WAIT1); 498d983ea6fSEric Dumazet tcp_fastopen_synack_timer(sk, req); 4998336886fSJerry Chu /* Before we receive ACK to our SYN-ACK don't retransmit 5008336886fSJerry Chu * anything else (e.g., data or FIN segments). 5018336886fSJerry Chu */ 5028336886fSJerry Chu return; 5038336886fSJerry Chu } 5040d580fbdSEric Dumazet 5050d580fbdSEric Dumazet if (!tp->packets_out) 5060d580fbdSEric Dumazet return; 5070d580fbdSEric Dumazet 5080d580fbdSEric Dumazet skb = tcp_rtx_queue_head(sk); 5090d580fbdSEric Dumazet if (WARN_ON_ONCE(!skb)) 51088f8598dSYuchung Cheng return; 5111da177e4SLinus Torvalds 5129b717a8dSNandita Dukkipati tp->tlp_high_seq = 0; 5139b717a8dSNandita Dukkipati 5141da177e4SLinus Torvalds if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && 5151da177e4SLinus Torvalds !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { 5161da177e4SLinus Torvalds /* Receiver dastardly shrinks window. Our retransmits 5171da177e4SLinus Torvalds * become zero probes, but we should not timeout this 5181da177e4SLinus Torvalds * connection. If the socket is an orphan, time it out, 5191da177e4SLinus Torvalds * we cannot allow such beasts to hang infinitely. 5201da177e4SLinus Torvalds */ 5211da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 522*031c44b7SMenglong Dong u32 rtx_delta; 523*031c44b7SMenglong Dong 524*031c44b7SMenglong Dong rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp(skb)); 525569508c9SYOSHIFUJI Hideaki if (sk->sk_family == AF_INET) { 526*031c44b7SMenglong Dong net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", 527*031c44b7SMenglong Dong &inet->inet_daddr, ntohs(inet->inet_dport), 528*031c44b7SMenglong Dong inet->inet_num, tp->snd_una, tp->snd_nxt, 529*031c44b7SMenglong Dong jiffies_to_msecs(jiffies - tp->rcv_tstamp), 530*031c44b7SMenglong Dong rtx_delta); 5311da177e4SLinus Torvalds } 532dfd56b8bSEric Dumazet #if IS_ENABLED(CONFIG_IPV6) 533569508c9SYOSHIFUJI Hideaki else if (sk->sk_family == AF_INET6) { 534*031c44b7SMenglong Dong net_dbg_ratelimited("Probing zero-window on %pI6:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", 535*031c44b7SMenglong Dong &sk->sk_v6_daddr, ntohs(inet->inet_dport), 536*031c44b7SMenglong Dong inet->inet_num, tp->snd_una, tp->snd_nxt, 537*031c44b7SMenglong Dong jiffies_to_msecs(jiffies - tp->rcv_tstamp), 538*031c44b7SMenglong Dong rtx_delta); 539569508c9SYOSHIFUJI Hideaki } 540569508c9SYOSHIFUJI Hideaki #endif 541e89688e3SMenglong Dong if (tcp_rtx_probe0_timed_out(sk, skb)) { 5421da177e4SLinus Torvalds tcp_write_err(sk); 5431da177e4SLinus Torvalds goto out; 5441da177e4SLinus Torvalds } 5455ae344c9SNeal Cardwell tcp_enter_loss(sk); 5460d580fbdSEric Dumazet tcp_retransmit_skb(sk, skb, 1); 5471da177e4SLinus Torvalds __sk_dst_reset(sk); 5481da177e4SLinus Torvalds goto out_reset_timer; 5491da177e4SLinus Torvalds } 5501da177e4SLinus Torvalds 551e1561fe2SYuchung Cheng __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); 5521da177e4SLinus Torvalds if (tcp_write_timeout(sk)) 5531da177e4SLinus Torvalds goto out; 5541da177e4SLinus Torvalds 555463c84b9SArnaldo Carvalho de Melo if (icsk->icsk_retransmits == 0) { 556e1561fe2SYuchung Cheng int mib_idx = 0; 55740b215e5SPavel Emelyanov 558c60ce4e2SIlpo Järvinen if (icsk->icsk_ca_state == TCP_CA_Recovery) { 559bc079e9eSIlpo Järvinen if (tcp_is_sack(tp)) 560bc079e9eSIlpo Järvinen mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; 561bc079e9eSIlpo Järvinen else 562bc079e9eSIlpo Järvinen mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; 5636687e988SArnaldo Carvalho de Melo } else if (icsk->icsk_ca_state == TCP_CA_Loss) { 56440b215e5SPavel Emelyanov mib_idx = LINUX_MIB_TCPLOSSFAILURES; 565c60ce4e2SIlpo Järvinen } else if ((icsk->icsk_ca_state == TCP_CA_Disorder) || 566c60ce4e2SIlpo Järvinen tp->sacked_out) { 567c60ce4e2SIlpo Järvinen if (tcp_is_sack(tp)) 568c60ce4e2SIlpo Järvinen mib_idx = LINUX_MIB_TCPSACKFAILURES; 569c60ce4e2SIlpo Järvinen else 570c60ce4e2SIlpo Järvinen mib_idx = LINUX_MIB_TCPRENOFAILURES; 5711da177e4SLinus Torvalds } 572e1561fe2SYuchung Cheng if (mib_idx) 57302a1d6e7SEric Dumazet __NET_INC_STATS(sock_net(sk), mib_idx); 5741da177e4SLinus Torvalds } 5751da177e4SLinus Torvalds 5765ae344c9SNeal Cardwell tcp_enter_loss(sk); 5771da177e4SLinus Torvalds 578590d2026SYuchung Cheng icsk->icsk_retransmits++; 57975c119afSEric Dumazet if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { 5801da177e4SLinus Torvalds /* Retransmission failed because of local congestion, 581590d2026SYuchung Cheng * Let senders fight for local resources conservatively. 5821da177e4SLinus Torvalds */ 583463c84b9SArnaldo Carvalho de Melo inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 584590d2026SYuchung Cheng TCP_RESOURCE_PROBE_INTERVAL, 5853f421baaSArnaldo Carvalho de Melo TCP_RTO_MAX); 5861da177e4SLinus Torvalds goto out; 5871da177e4SLinus Torvalds } 5881da177e4SLinus Torvalds 5891da177e4SLinus Torvalds /* Increase the timeout each time we retransmit. Note that 5901da177e4SLinus Torvalds * we do not increase the rtt estimate. rto is initialized 5911da177e4SLinus Torvalds * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests 5921da177e4SLinus Torvalds * that doubling rto each time is the least we can get away with. 5931da177e4SLinus Torvalds * In KA9Q, Karn uses this for the first few times, and then 5941da177e4SLinus Torvalds * goes to quadratic. netBSD doubles, but only goes up to *64, 5951da177e4SLinus Torvalds * and clamps at 1 to 64 sec afterwards. Note that 120 sec is 5961da177e4SLinus Torvalds * defined in the protocol as the maximum possible RTT. I guess 5971da177e4SLinus Torvalds * we'll have to use something other than TCP to talk to the 5981da177e4SLinus Torvalds * University of Mars. 5991da177e4SLinus Torvalds * 6001da177e4SLinus Torvalds * PAWS allows us longer timeouts and large windows, so once 6011da177e4SLinus Torvalds * implemented ftp to mars will work nicely. We will have to fix 6021da177e4SLinus Torvalds * the 120 second clamps though! 6031da177e4SLinus Torvalds */ 604463c84b9SArnaldo Carvalho de Melo icsk->icsk_backoff++; 6051da177e4SLinus Torvalds 6061da177e4SLinus Torvalds out_reset_timer: 60736e31b0aSAndreas Petlund /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is 60836e31b0aSAndreas Petlund * used to reset timer, set to 0. Recalculate 'icsk_rto' as this 60936e31b0aSAndreas Petlund * might be increased if the stream oscillates between thin and thick, 61036e31b0aSAndreas Petlund * thus the old value might already be too high compared to the value 61136e31b0aSAndreas Petlund * set by 'tcp_set_rto' in tcp_input.c which resets the rto without 61236e31b0aSAndreas Petlund * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating 61336e31b0aSAndreas Petlund * exponential backoff behaviour to avoid continue hammering 61436e31b0aSAndreas Petlund * linear-timeout retransmissions into a black hole 61536e31b0aSAndreas Petlund */ 61636e31b0aSAndreas Petlund if (sk->sk_state == TCP_ESTABLISHED && 6177c6f2a86SKuniyuki Iwashima (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) && 61836e31b0aSAndreas Petlund tcp_stream_is_thin(tp) && 61936e31b0aSAndreas Petlund icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { 62036e31b0aSAndreas Petlund icsk->icsk_backoff = 0; 62136e31b0aSAndreas Petlund icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); 622ccce324dSDavid Morley } else if (sk->sk_state != TCP_SYN_SENT || 623ccce324dSDavid Morley icsk->icsk_backoff > 624ccce324dSDavid Morley READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) { 625ccce324dSDavid Morley /* Use normal (exponential) backoff unless linear timeouts are 626ccce324dSDavid Morley * activated. 627ccce324dSDavid Morley */ 628463c84b9SArnaldo Carvalho de Melo icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 62936e31b0aSAndreas Petlund } 630b701a99eSJon Maxwell inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 631b701a99eSJon Maxwell tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX); 63239e24435SKuniyuki Iwashima if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0)) 6331da177e4SLinus Torvalds __sk_dst_reset(sk); 6341da177e4SLinus Torvalds 6351da177e4SLinus Torvalds out:; 6361da177e4SLinus Torvalds } 6371da177e4SLinus Torvalds 638c380d37eSRichard Sailer /* Called with bottom-half processing disabled. 639c380d37eSRichard Sailer Called by tcp_write_timer() */ 6406f458dfbSEric Dumazet void tcp_write_timer_handler(struct sock *sk) 6411da177e4SLinus Torvalds { 642463c84b9SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 6431da177e4SLinus Torvalds int event; 6441da177e4SLinus Torvalds 64502b2faafSEric Dumazet if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || 64602b2faafSEric Dumazet !icsk->icsk_pending) 6474890b686SEric Dumazet return; 6481da177e4SLinus Torvalds 649463c84b9SArnaldo Carvalho de Melo if (time_after(icsk->icsk_timeout, jiffies)) { 650463c84b9SArnaldo Carvalho de Melo sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); 6514890b686SEric Dumazet return; 6521da177e4SLinus Torvalds } 6531da177e4SLinus Torvalds 6549a568de4SEric Dumazet tcp_mstamp_refresh(tcp_sk(sk)); 655463c84b9SArnaldo Carvalho de Melo event = icsk->icsk_pending; 6561da177e4SLinus Torvalds 6571da177e4SLinus Torvalds switch (event) { 65857dde7f7SYuchung Cheng case ICSK_TIME_REO_TIMEOUT: 65957dde7f7SYuchung Cheng tcp_rack_reo_timeout(sk); 66057dde7f7SYuchung Cheng break; 6616ba8a3b1SNandita Dukkipati case ICSK_TIME_LOSS_PROBE: 6626ba8a3b1SNandita Dukkipati tcp_send_loss_probe(sk); 6636ba8a3b1SNandita Dukkipati break; 664463c84b9SArnaldo Carvalho de Melo case ICSK_TIME_RETRANS: 6656ba8a3b1SNandita Dukkipati icsk->icsk_pending = 0; 6661da177e4SLinus Torvalds tcp_retransmit_timer(sk); 6671da177e4SLinus Torvalds break; 668463c84b9SArnaldo Carvalho de Melo case ICSK_TIME_PROBE0: 6696ba8a3b1SNandita Dukkipati icsk->icsk_pending = 0; 6701da177e4SLinus Torvalds tcp_probe_timer(sk); 6711da177e4SLinus Torvalds break; 6721da177e4SLinus Torvalds } 6736f458dfbSEric Dumazet } 6746f458dfbSEric Dumazet 67559f379f9SKees Cook static void tcp_write_timer(struct timer_list *t) 6766f458dfbSEric Dumazet { 67759f379f9SKees Cook struct inet_connection_sock *icsk = 67859f379f9SKees Cook from_timer(icsk, t, icsk_retransmit_timer); 67959f379f9SKees Cook struct sock *sk = &icsk->icsk_inet.sk; 6806f458dfbSEric Dumazet 6816f458dfbSEric Dumazet bh_lock_sock(sk); 6826f458dfbSEric Dumazet if (!sock_owned_by_user(sk)) { 6836f458dfbSEric Dumazet tcp_write_timer_handler(sk); 6846f458dfbSEric Dumazet } else { 685c380d37eSRichard Sailer /* delegate our work to tcp_release_cb() */ 6867aa5470cSEric Dumazet if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags)) 687144d56e9SEric Dumazet sock_hold(sk); 6886f458dfbSEric Dumazet } 6891da177e4SLinus Torvalds bh_unlock_sock(sk); 6901da177e4SLinus Torvalds sock_put(sk); 6911da177e4SLinus Torvalds } 6921da177e4SLinus Torvalds 69342cb80a2SEric Dumazet void tcp_syn_ack_timeout(const struct request_sock *req) 69472659eccSOctavian Purdila { 69542cb80a2SEric Dumazet struct net *net = read_pnet(&inet_rsk(req)->ireq_net); 69642cb80a2SEric Dumazet 69702a1d6e7SEric Dumazet __NET_INC_STATS(net, LINUX_MIB_TCPTIMEOUTS); 69872659eccSOctavian Purdila } 69972659eccSOctavian Purdila EXPORT_SYMBOL(tcp_syn_ack_timeout); 70072659eccSOctavian Purdila 7011da177e4SLinus Torvalds void tcp_set_keepalive(struct sock *sk, int val) 7021da177e4SLinus Torvalds { 7031da177e4SLinus Torvalds if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) 7041da177e4SLinus Torvalds return; 7051da177e4SLinus Torvalds 7061da177e4SLinus Torvalds if (val && !sock_flag(sk, SOCK_KEEPOPEN)) 707463c84b9SArnaldo Carvalho de Melo inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); 7081da177e4SLinus Torvalds else if (!val) 709463c84b9SArnaldo Carvalho de Melo inet_csk_delete_keepalive_timer(sk); 7101da177e4SLinus Torvalds } 7114b9d07a4SUrsula Braun EXPORT_SYMBOL_GPL(tcp_set_keepalive); 7121da177e4SLinus Torvalds 7131da177e4SLinus Torvalds 71459f379f9SKees Cook static void tcp_keepalive_timer (struct timer_list *t) 7151da177e4SLinus Torvalds { 71659f379f9SKees Cook struct sock *sk = from_timer(sk, t, sk_timer); 7176687e988SArnaldo Carvalho de Melo struct inet_connection_sock *icsk = inet_csk(sk); 7181da177e4SLinus Torvalds struct tcp_sock *tp = tcp_sk(sk); 7196c37e5deSFlavio Leitner u32 elapsed; 7201da177e4SLinus Torvalds 7211da177e4SLinus Torvalds /* Only process if socket is not in use. */ 7221da177e4SLinus Torvalds bh_lock_sock(sk); 7231da177e4SLinus Torvalds if (sock_owned_by_user(sk)) { 7241da177e4SLinus Torvalds /* Try again later. */ 725463c84b9SArnaldo Carvalho de Melo inet_csk_reset_keepalive_timer (sk, HZ/20); 7261da177e4SLinus Torvalds goto out; 7271da177e4SLinus Torvalds } 7281da177e4SLinus Torvalds 7291da177e4SLinus Torvalds if (sk->sk_state == TCP_LISTEN) { 730fa76ce73SEric Dumazet pr_err("Hmm... keepalive on a LISTEN ???\n"); 7311da177e4SLinus Torvalds goto out; 7321da177e4SLinus Torvalds } 7331da177e4SLinus Torvalds 7344688eb7cSEric Dumazet tcp_mstamp_refresh(tp); 7351da177e4SLinus Torvalds if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { 736a81722ddSEric Dumazet if (READ_ONCE(tp->linger2) >= 0) { 737463c84b9SArnaldo Carvalho de Melo const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; 7381da177e4SLinus Torvalds 7391da177e4SLinus Torvalds if (tmo > 0) { 7401da177e4SLinus Torvalds tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); 7411da177e4SLinus Torvalds goto out; 7421da177e4SLinus Torvalds } 7431da177e4SLinus Torvalds } 7441da177e4SLinus Torvalds tcp_send_active_reset(sk, GFP_ATOMIC); 7451da177e4SLinus Torvalds goto death; 7461da177e4SLinus Torvalds } 7471da177e4SLinus Torvalds 7482dda6400SEric Dumazet if (!sock_flag(sk, SOCK_KEEPOPEN) || 7492dda6400SEric Dumazet ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT))) 7501da177e4SLinus Torvalds goto out; 7511da177e4SLinus Torvalds 7521da177e4SLinus Torvalds elapsed = keepalive_time_when(tp); 7531da177e4SLinus Torvalds 7541da177e4SLinus Torvalds /* It is alive without keepalive 8) */ 75575c119afSEric Dumazet if (tp->packets_out || !tcp_write_queue_empty(sk)) 7561da177e4SLinus Torvalds goto resched; 7571da177e4SLinus Torvalds 7586c37e5deSFlavio Leitner elapsed = keepalive_time_elapsed(tp); 7591da177e4SLinus Torvalds 7601da177e4SLinus Torvalds if (elapsed >= keepalive_time_when(tp)) { 761d58f2e15SEric Dumazet u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout); 762d58f2e15SEric Dumazet 763dca43c75SJerry Chu /* If the TCP_USER_TIMEOUT option is enabled, use that 764dca43c75SJerry Chu * to determine when to timeout instead. 765dca43c75SJerry Chu */ 766d58f2e15SEric Dumazet if ((user_timeout != 0 && 767d58f2e15SEric Dumazet elapsed >= msecs_to_jiffies(user_timeout) && 768dca43c75SJerry Chu icsk->icsk_probes_out > 0) || 769d58f2e15SEric Dumazet (user_timeout == 0 && 770dca43c75SJerry Chu icsk->icsk_probes_out >= keepalive_probes(tp))) { 7711da177e4SLinus Torvalds tcp_send_active_reset(sk, GFP_ATOMIC); 7721da177e4SLinus Torvalds tcp_write_err(sk); 7731da177e4SLinus Torvalds goto out; 7741da177e4SLinus Torvalds } 775e520af48SEric Dumazet if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { 7766687e988SArnaldo Carvalho de Melo icsk->icsk_probes_out++; 7771da177e4SLinus Torvalds elapsed = keepalive_intvl_when(tp); 7781da177e4SLinus Torvalds } else { 7791da177e4SLinus Torvalds /* If keepalive was lost due to local congestion, 7801da177e4SLinus Torvalds * try harder. 7811da177e4SLinus Torvalds */ 7821da177e4SLinus Torvalds elapsed = TCP_RESOURCE_PROBE_INTERVAL; 7831da177e4SLinus Torvalds } 7841da177e4SLinus Torvalds } else { 7851da177e4SLinus Torvalds /* It is tp->rcv_tstamp + keepalive_time_when(tp) */ 7861da177e4SLinus Torvalds elapsed = keepalive_time_when(tp) - elapsed; 7871da177e4SLinus Torvalds } 7881da177e4SLinus Torvalds 7891da177e4SLinus Torvalds resched: 790463c84b9SArnaldo Carvalho de Melo inet_csk_reset_keepalive_timer (sk, elapsed); 7911da177e4SLinus Torvalds goto out; 7921da177e4SLinus Torvalds 7931da177e4SLinus Torvalds death: 7941da177e4SLinus Torvalds tcp_done(sk); 7951da177e4SLinus Torvalds 7961da177e4SLinus Torvalds out: 7971da177e4SLinus Torvalds bh_unlock_sock(sk); 7981da177e4SLinus Torvalds sock_put(sk); 7991da177e4SLinus Torvalds } 8006f458dfbSEric Dumazet 8015d9f4262SEric Dumazet static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer) 8025d9f4262SEric Dumazet { 8035d9f4262SEric Dumazet struct tcp_sock *tp = container_of(timer, struct tcp_sock, compressed_ack_timer); 8045d9f4262SEric Dumazet struct sock *sk = (struct sock *)tp; 8055d9f4262SEric Dumazet 8065d9f4262SEric Dumazet bh_lock_sock(sk); 8075d9f4262SEric Dumazet if (!sock_owned_by_user(sk)) { 8082b195850SEric Dumazet if (tp->compressed_ack) { 8092b195850SEric Dumazet /* Since we have to send one ack finally, 810974d8f86SZheng Yongjun * subtract one from tp->compressed_ack to keep 8112b195850SEric Dumazet * LINUX_MIB_TCPACKCOMPRESSED accurate. 8122b195850SEric Dumazet */ 8132b195850SEric Dumazet tp->compressed_ack--; 8145d9f4262SEric Dumazet tcp_send_ack(sk); 8152b195850SEric Dumazet } 8165d9f4262SEric Dumazet } else { 8175d9f4262SEric Dumazet if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, 8185d9f4262SEric Dumazet &sk->sk_tsq_flags)) 8195d9f4262SEric Dumazet sock_hold(sk); 8205d9f4262SEric Dumazet } 8215d9f4262SEric Dumazet bh_unlock_sock(sk); 8225d9f4262SEric Dumazet 8235d9f4262SEric Dumazet sock_put(sk); 8245d9f4262SEric Dumazet 8255d9f4262SEric Dumazet return HRTIMER_NORESTART; 8265d9f4262SEric Dumazet } 8275d9f4262SEric Dumazet 8286f458dfbSEric Dumazet void tcp_init_xmit_timers(struct sock *sk) 8296f458dfbSEric Dumazet { 8306f458dfbSEric Dumazet inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, 8316f458dfbSEric Dumazet &tcp_keepalive_timer); 832fb420d5dSEric Dumazet hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC, 83373a6bab5SEric Dumazet HRTIMER_MODE_ABS_PINNED_SOFT); 834218af599SEric Dumazet tcp_sk(sk)->pacing_timer.function = tcp_pace_kick; 8355d9f4262SEric Dumazet 8365d9f4262SEric Dumazet hrtimer_init(&tcp_sk(sk)->compressed_ack_timer, CLOCK_MONOTONIC, 8375d9f4262SEric Dumazet HRTIMER_MODE_REL_PINNED_SOFT); 8385d9f4262SEric Dumazet tcp_sk(sk)->compressed_ack_timer.function = tcp_compressed_ack_kick; 8396f458dfbSEric Dumazet } 840