1c398230bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4e79adb8eSGarrett Wollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 9df8bae1dSRodney W. Grimes * are met: 10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 15fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 17df8bae1dSRodney W. Grimes * without specific prior written permission. 18df8bae1dSRodney W. Grimes * 19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29df8bae1dSRodney W. Grimes * SUCH DAMAGE. 30df8bae1dSRodney W. Grimes * 31e79adb8eSGarrett Wollman * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 32df8bae1dSRodney W. Grimes */ 33df8bae1dSRodney W. Grimes 344b421e2dSMike Silbersack #include <sys/cdefs.h> 354b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 364b421e2dSMike Silbersack 37825fd1e4SNavdeep Parhar #include "opt_inet.h" 38fb59c426SYoshinobu Inoue #include "opt_inet6.h" 39883831c6SAdrian Chadd #include "opt_rss.h" 400cc12cc5SJoerg Wunsch 41df8bae1dSRodney W. Grimes #include <sys/param.h> 4298163b98SPoul-Henning Kamp #include <sys/kernel.h> 43c74af4faSBruce Evans #include <sys/lock.h> 4408517d53SMike Silbersack #include <sys/mbuf.h> 45c74af4faSBruce Evans #include <sys/mutex.h> 46c74af4faSBruce Evans #include <sys/protosw.h> 4787aedea4SKip Macy #include <sys/smp.h> 48df8bae1dSRodney W. Grimes #include <sys/socket.h> 49df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 50c74af4faSBruce Evans #include <sys/sysctl.h> 51c74af4faSBruce Evans #include <sys/systm.h> 52e79adb8eSGarrett Wollman 534b79449eSBjoern A. Zeeb #include <net/if.h> 54df8bae1dSRodney W. Grimes #include <net/route.h> 55b2bdc62aSAdrian Chadd #include <net/rss_config.h> 56530c0060SRobert Watson #include <net/vnet.h> 57883831c6SAdrian Chadd #include <net/netisr.h> 58df8bae1dSRodney W. Grimes 59df8bae1dSRodney W. Grimes #include <netinet/in.h> 605d06879aSGeorge V. Neville-Neil #include <netinet/in_kdtrace.h> 61df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 62883831c6SAdrian Chadd #include <netinet/in_rss.h> 63c74af4faSBruce Evans #include <netinet/in_systm.h> 64fb59c426SYoshinobu Inoue #ifdef INET6 65fb59c426SYoshinobu Inoue #include <netinet6/in6_pcb.h> 66fb59c426SYoshinobu Inoue #endif 67df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 682de3e790SGleb Smirnoff #include <netinet/tcp.h> 69df8bae1dSRodney W. Grimes #include <netinet/tcp_fsm.h> 70df8bae1dSRodney W. Grimes #include <netinet/tcp_timer.h> 71df8bae1dSRodney W. Grimes #include <netinet/tcp_var.h> 7269c7c811SRandall Stewart #include <netinet/tcp_log_buf.h> 7389e560f4SRandall Stewart #include <netinet/tcp_seq.h> 744644fda3SGleb Smirnoff #include <netinet/cc/cc.h> 75f6f6703fSSean Bruno #ifdef INET6 76f6f6703fSSean Bruno #include <netinet6/tcp6_var.h> 77f6f6703fSSean Bruno #endif 78df8bae1dSRodney W. Grimes #include <netinet/tcpip.h> 79df8bae1dSRodney W. Grimes 800645c604SHiren Panchasara int tcp_persmin; 817029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin, 827029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 837029da5cSPawel Biernacki &tcp_persmin, 0, sysctl_msec_to_ticks, "I", 847029da5cSPawel Biernacki "minimum persistence interval"); 850645c604SHiren Panchasara 860645c604SHiren Panchasara int tcp_persmax; 877029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax, 887029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 897029da5cSPawel Biernacki &tcp_persmax, 0, sysctl_msec_to_ticks, "I", 907029da5cSPawel Biernacki "maximum persistence interval"); 910645c604SHiren Panchasara 929b8b58e0SJonathan Lemon int tcp_keepinit; 937029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, 947029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 957029da5cSPawel Biernacki &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", 967029da5cSPawel Biernacki "time to establish connection"); 977b40aa32SPaul Traina 989b8b58e0SJonathan Lemon int tcp_keepidle; 997029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, 1007029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 1017029da5cSPawel Biernacki &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", 1027029da5cSPawel Biernacki "time before keepalive probes begin"); 10398163b98SPoul-Henning Kamp 1049b8b58e0SJonathan Lemon int tcp_keepintvl; 1057029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, 1067029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 1077029da5cSPawel Biernacki &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", 1087029da5cSPawel Biernacki "time between keepalive probes"); 10998163b98SPoul-Henning Kamp 1109b8b58e0SJonathan Lemon int tcp_delacktime; 1117029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, 1127029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 1136489fe65SAndre Oppermann &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 114ccb4d0c6SJonathan Lemon "Time before a delayed ACK is sent"); 1159b8b58e0SJonathan Lemon 116c2c8e360SAlexander V. Chernikov VNET_DEFINE(int, tcp_msl); 1177029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, 118c2c8e360SAlexander V. Chernikov CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET, 119c2c8e360SAlexander V. Chernikov &VNET_NAME(tcp_msl), 0, sysctl_msec_to_ticks, "I", 1207029da5cSPawel Biernacki "Maximum segment lifetime"); 1219b8b58e0SJonathan Lemon 1220999766dSMichael Tuexen int tcp_rexmit_initial; 1237029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_initial, 1247029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 1250999766dSMichael Tuexen &tcp_rexmit_initial, 0, sysctl_msec_to_ticks, "I", 1260999766dSMichael Tuexen "Initial Retransmission Timeout"); 1270999766dSMichael Tuexen 128701bec5aSMatthew Dillon int tcp_rexmit_min; 1297029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, 1307029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 1316489fe65SAndre Oppermann &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 1326489fe65SAndre Oppermann "Minimum Retransmission Timeout"); 133701bec5aSMatthew Dillon 134701bec5aSMatthew Dillon int tcp_rexmit_slop; 1357029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, 1367029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 1376489fe65SAndre Oppermann &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 1386489fe65SAndre Oppermann "Retransmission Timer Slop"); 139701bec5aSMatthew Dillon 140334fc582SBjoern A. Zeeb VNET_DEFINE(int, tcp_always_keepalive) = 1; 141334fc582SBjoern A. Zeeb SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_VNET|CTLFLAG_RW, 142334fc582SBjoern A. Zeeb &VNET_NAME(tcp_always_keepalive) , 0, 143334fc582SBjoern A. Zeeb "Assume SO_KEEPALIVE on all TCP connections"); 14434be9bf3SPoul-Henning Kamp 1457c72af87SMohan Srinivasan int tcp_fast_finwait2_recycle = 0; 1467c72af87SMohan Srinivasan SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 1476489fe65SAndre Oppermann &tcp_fast_finwait2_recycle, 0, 1486489fe65SAndre Oppermann "Recycle closed FIN_WAIT_2 connections faster"); 1497c72af87SMohan Srinivasan 1507c72af87SMohan Srinivasan int tcp_finwait2_timeout; 1517029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, 1527029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 1537029da5cSPawel Biernacki &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", 1547029da5cSPawel Biernacki "FIN-WAIT2 timeout"); 1557c72af87SMohan Srinivasan 1569077f387SGleb Smirnoff int tcp_keepcnt = TCPTV_KEEPCNT; 1579077f387SGleb Smirnoff SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0, 1589077f387SGleb Smirnoff "Number of keepalive probes to send"); 1597c72af87SMohan Srinivasan 1600312fbe9SPoul-Henning Kamp /* max idle probes */ 1619b8b58e0SJonathan Lemon int tcp_maxpersistidle; 162e79adb8eSGarrett Wollman 16389e560f4SRandall Stewart int tcp_rexmit_drop_options = 0; 1646c0ef895SJohn Baldwin SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW, 1656c0ef895SJohn Baldwin &tcp_rexmit_drop_options, 0, 1666c0ef895SJohn Baldwin "Drop TCP options from 3rd and later retransmitted SYN"); 1676c0ef895SJohn Baldwin 16808af8aacSRandall Stewart int tcp_maxunacktime = TCPTV_MAXUNACKTIME; 16908af8aacSRandall Stewart SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxunacktime, 17008af8aacSRandall Stewart CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_NEEDGIANT, 17108af8aacSRandall Stewart &tcp_maxunacktime, 0, sysctl_msec_to_ticks, "I", 17208af8aacSRandall Stewart "Maximum time (in ms) that a session can linger without making progress"); 17308af8aacSRandall Stewart 174e29c55e4SGleb Smirnoff VNET_DEFINE(int, tcp_pmtud_blackhole_detect); 175f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection, 176f0188618SHans Petter Selasky CTLFLAG_RW|CTLFLAG_VNET, 177f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_detect), 0, 178f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection Enabled"); 179f6f6703fSSean Bruno 180f6f6703fSSean Bruno #ifdef INET 181e29c55e4SGleb Smirnoff VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200; 182f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, 183f0188618SHans Petter Selasky CTLFLAG_RW|CTLFLAG_VNET, 184f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_mss), 0, 185f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection lowered MSS"); 186f6f6703fSSean Bruno #endif 187f6f6703fSSean Bruno 188f6f6703fSSean Bruno #ifdef INET6 189e29c55e4SGleb Smirnoff VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220; 190f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss, 191f0188618SHans Petter Selasky CTLFLAG_RW|CTLFLAG_VNET, 192f6f6703fSSean Bruno &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0, 193f6f6703fSSean Bruno "Path MTU Discovery IPv6 Black Hole Detection lowered MSS"); 194f6f6703fSSean Bruno #endif 195f6f6703fSSean Bruno 1968f7e75cbSAdrian Chadd #ifdef RSS 1978f7e75cbSAdrian Chadd static int per_cpu_timers = 1; 1988f7e75cbSAdrian Chadd #else 19987aedea4SKip Macy static int per_cpu_timers = 0; 2008f7e75cbSAdrian Chadd #endif 20187aedea4SKip Macy SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, 20287aedea4SKip Macy &per_cpu_timers , 0, "run tcp timers on all cpus"); 20387aedea4SKip Macy 204*43b117f8SRichard Scheffenegger static int 205*43b117f8SRichard Scheffenegger sysctl_net_inet_tcp_retries(SYSCTL_HANDLER_ARGS) 206*43b117f8SRichard Scheffenegger { 207*43b117f8SRichard Scheffenegger int error, new; 208*43b117f8SRichard Scheffenegger 209*43b117f8SRichard Scheffenegger new = V_tcp_retries; 210*43b117f8SRichard Scheffenegger error = sysctl_handle_int(oidp, &new, 0, req); 211*43b117f8SRichard Scheffenegger if (error == 0 && req->newptr) { 212*43b117f8SRichard Scheffenegger if ((new < 1) || (new > TCP_MAXRXTSHIFT)) 213*43b117f8SRichard Scheffenegger error = EINVAL; 214*43b117f8SRichard Scheffenegger else 215*43b117f8SRichard Scheffenegger V_tcp_retries = new; 216*43b117f8SRichard Scheffenegger } 217*43b117f8SRichard Scheffenegger return (error); 218*43b117f8SRichard Scheffenegger } 219*43b117f8SRichard Scheffenegger 220*43b117f8SRichard Scheffenegger VNET_DEFINE(int, tcp_retries) = TCP_MAXRXTSHIFT; 221*43b117f8SRichard Scheffenegger SYSCTL_PROC(_net_inet_tcp, OID_AUTO, retries, 222*43b117f8SRichard Scheffenegger CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW, 223*43b117f8SRichard Scheffenegger &VNET_NAME(tcp_retries), 0, sysctl_net_inet_tcp_retries, "I", 224*43b117f8SRichard Scheffenegger "maximum number of consecutive timer based retransmissions"); 225*43b117f8SRichard Scheffenegger 226883831c6SAdrian Chadd /* 227883831c6SAdrian Chadd * Map the given inp to a CPU id. 228883831c6SAdrian Chadd * 229883831c6SAdrian Chadd * This queries RSS if it's compiled in, else it defaults to the current 230883831c6SAdrian Chadd * CPU ID. 231883831c6SAdrian Chadd */ 23289e560f4SRandall Stewart inline int 233883831c6SAdrian Chadd inp_to_cpuid(struct inpcb *inp) 234883831c6SAdrian Chadd { 235883831c6SAdrian Chadd u_int cpuid; 236883831c6SAdrian Chadd 237883831c6SAdrian Chadd if (per_cpu_timers) { 23847ded797SFranco Fichtner #ifdef RSS 239883831c6SAdrian Chadd cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); 240883831c6SAdrian Chadd if (cpuid == NETISR_CPUID_NONE) 241883831c6SAdrian Chadd return (curcpu); /* XXX */ 242883831c6SAdrian Chadd else 243883831c6SAdrian Chadd return (cpuid); 24447ded797SFranco Fichtner #endif 245883831c6SAdrian Chadd /* 246883831c6SAdrian Chadd * We don't have a flowid -> cpuid mapping, so cheat and 247883831c6SAdrian Chadd * just map unknown cpuids to curcpu. Not the best, but 248883831c6SAdrian Chadd * apparently better than defaulting to swi 0. 249883831c6SAdrian Chadd */ 250883831c6SAdrian Chadd cpuid = inp->inp_flowid % (mp_maxid + 1); 251883831c6SAdrian Chadd if (! CPU_ABSENT(cpuid)) 252883831c6SAdrian Chadd return (cpuid); 253883831c6SAdrian Chadd return (curcpu); 25447ded797SFranco Fichtner } else { 255883831c6SAdrian Chadd return (0); 256883831c6SAdrian Chadd } 257883831c6SAdrian Chadd } 25887aedea4SKip Macy 259df8bae1dSRodney W. Grimes int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 260f058535dSJeffrey Hsu { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 261df8bae1dSRodney W. Grimes 26289e560f4SRandall Stewart int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 263e79adb8eSGarrett Wollman 264df8bae1dSRodney W. Grimes /* 265df8bae1dSRodney W. Grimes * TCP timer processing. 266446ccdd0SGleb Smirnoff * 267446ccdd0SGleb Smirnoff * Each connection has 5 timers associated with it, which can be scheduled 268446ccdd0SGleb Smirnoff * simultaneously. They all are serviced by one callout tcp_timer_enter(). 269446ccdd0SGleb Smirnoff * This function executes the next timer via tcp_timersw[] vector. Each 270446ccdd0SGleb Smirnoff * timer is supposed to return 'true' unless the connection was destroyed. 271446ccdd0SGleb Smirnoff * In the former case tcp_timer_enter() will schedule callout for next timer. 272df8bae1dSRodney W. Grimes */ 27385d94372SRobert Watson 274446ccdd0SGleb Smirnoff typedef bool tcp_timer_t(struct tcpcb *); 275446ccdd0SGleb Smirnoff static tcp_timer_t tcp_timer_delack; 276446ccdd0SGleb Smirnoff static tcp_timer_t tcp_timer_2msl; 277446ccdd0SGleb Smirnoff static tcp_timer_t tcp_timer_keep; 278446ccdd0SGleb Smirnoff static tcp_timer_t tcp_timer_persist; 279446ccdd0SGleb Smirnoff static tcp_timer_t tcp_timer_rexmt; 280446ccdd0SGleb Smirnoff 281446ccdd0SGleb Smirnoff static tcp_timer_t * const tcp_timersw[TT_N] = { 282446ccdd0SGleb Smirnoff [TT_DELACK] = tcp_timer_delack, 283446ccdd0SGleb Smirnoff [TT_REXMT] = tcp_timer_rexmt, 284446ccdd0SGleb Smirnoff [TT_PERSIST] = tcp_timer_persist, 285446ccdd0SGleb Smirnoff [TT_KEEP] = tcp_timer_keep, 286446ccdd0SGleb Smirnoff [TT_2MSL] = tcp_timer_2msl, 287446ccdd0SGleb Smirnoff }; 288446ccdd0SGleb Smirnoff 289446ccdd0SGleb Smirnoff /* 290446ccdd0SGleb Smirnoff * tcp_output_locked() s a timer specific variation of call to tcp_output(), 291446ccdd0SGleb Smirnoff * see tcp_var.h for the rest. It handles drop request from advanced stacks, 292446ccdd0SGleb Smirnoff * but keeps tcpcb locked unless tcp_drop() destroyed it. 293446ccdd0SGleb Smirnoff * Returns true if tcpcb is valid and locked. 294446ccdd0SGleb Smirnoff */ 295446ccdd0SGleb Smirnoff static inline bool 296446ccdd0SGleb Smirnoff tcp_output_locked(struct tcpcb *tp) 297446ccdd0SGleb Smirnoff { 298446ccdd0SGleb Smirnoff int rv; 299446ccdd0SGleb Smirnoff 300446ccdd0SGleb Smirnoff INP_WLOCK_ASSERT(tptoinpcb(tp)); 301446ccdd0SGleb Smirnoff 302446ccdd0SGleb Smirnoff if ((rv = tp->t_fb->tfb_tcp_output(tp)) < 0) { 303446ccdd0SGleb Smirnoff KASSERT(tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP, 304446ccdd0SGleb Smirnoff ("TCP stack %s requested tcp_drop(%p)", 305446ccdd0SGleb Smirnoff tp->t_fb->tfb_tcp_block_name, tp)); 306446ccdd0SGleb Smirnoff tp = tcp_drop(tp, rv); 307446ccdd0SGleb Smirnoff } 308446ccdd0SGleb Smirnoff 309446ccdd0SGleb Smirnoff return (tp != NULL); 310446ccdd0SGleb Smirnoff } 311446ccdd0SGleb Smirnoff 312446ccdd0SGleb Smirnoff static bool 313446ccdd0SGleb Smirnoff tcp_timer_delack(struct tcpcb *tp) 314df8bae1dSRodney W. Grimes { 315109eb549SGleb Smirnoff struct epoch_tracker et; 316446ccdd0SGleb Smirnoff #if defined(INVARIANTS) || defined(VIMAGE) 3179eb0e832SGleb Smirnoff struct inpcb *inp = tptoinpcb(tp); 318446ccdd0SGleb Smirnoff #endif 319446ccdd0SGleb Smirnoff bool rv; 3209eb0e832SGleb Smirnoff 321446ccdd0SGleb Smirnoff INP_WLOCK_ASSERT(inp); 322446ccdd0SGleb Smirnoff 3238840ae22SGleb Smirnoff CURVNET_SET(inp->inp_vnet); 3249b8b58e0SJonathan Lemon tp->t_flags |= TF_ACKNOW; 32578b50714SRobert Watson TCPSTAT_INC(tcps_delack); 326109eb549SGleb Smirnoff NET_EPOCH_ENTER(et); 327446ccdd0SGleb Smirnoff rv = tcp_output_locked(tp); 328109eb549SGleb Smirnoff NET_EPOCH_EXIT(et); 3298b615593SMarko Zec CURVNET_RESTORE(); 330446ccdd0SGleb Smirnoff 331446ccdd0SGleb Smirnoff return (rv); 3329b8b58e0SJonathan Lemon } 3339b8b58e0SJonathan Lemon 334446ccdd0SGleb Smirnoff static bool 335446ccdd0SGleb Smirnoff tcp_timer_2msl(struct tcpcb *tp) 336b07fef50SRandall Stewart { 3379eb0e832SGleb Smirnoff struct inpcb *inp = tptoinpcb(tp); 338446ccdd0SGleb Smirnoff bool close = false; 33977198a94SGleb Smirnoff 34077198a94SGleb Smirnoff INP_WLOCK_ASSERT(inp); 34177198a94SGleb Smirnoff 342446ccdd0SGleb Smirnoff TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); 3438840ae22SGleb Smirnoff CURVNET_SET(inp->inp_vnet); 344d1b07f36SRandall Stewart tcp_log_end_status(tp, TCP_EI_STATUS_2MSL); 34585d94372SRobert Watson tcp_free_sackholes(tp); 34685d94372SRobert Watson /* 347df8bae1dSRodney W. Grimes * 2 MSL timeout in shutdown went off. If we're closed but 348df8bae1dSRodney W. Grimes * still waiting for peer to close and connection has been idle 34931a7749dSJulien Charbon * too long delete connection control block. Otherwise, check 35031a7749dSJulien Charbon * again in a bit. 35131a7749dSJulien Charbon * 3527c72af87SMohan Srinivasan * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 3537c72af87SMohan Srinivasan * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 3547c72af87SMohan Srinivasan * Ignore fact that there were recent incoming segments. 355f71cb9f7SGleb Smirnoff * 356f71cb9f7SGleb Smirnoff * XXXGL: check if inp_socket shall always be !NULL here? 357df8bae1dSRodney W. Grimes */ 3580d744519SGleb Smirnoff if (tp->t_state == TCPS_TIME_WAIT) { 359446ccdd0SGleb Smirnoff close = true; 3600d744519SGleb Smirnoff } else if (tp->t_state == TCPS_FIN_WAIT_2 && 3619eb0e832SGleb Smirnoff tcp_fast_finwait2_recycle && inp->inp_socket && 3629eb0e832SGleb Smirnoff (inp->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 36378b50714SRobert Watson TCPSTAT_INC(tcps_finwait2_drops); 364446ccdd0SGleb Smirnoff close = true; 3657c72af87SMohan Srinivasan } else { 366446ccdd0SGleb Smirnoff if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) 367446ccdd0SGleb Smirnoff tcp_timer_activate(tp, TT_2MSL, TP_KEEPINTVL(tp)); 368446ccdd0SGleb Smirnoff else 369446ccdd0SGleb Smirnoff close = true; 370446ccdd0SGleb Smirnoff } 371446ccdd0SGleb Smirnoff if (close) { 372446ccdd0SGleb Smirnoff struct epoch_tracker et; 373446ccdd0SGleb Smirnoff 374446ccdd0SGleb Smirnoff NET_EPOCH_ENTER(et); 375446ccdd0SGleb Smirnoff tp = tcp_close(tp); 376446ccdd0SGleb Smirnoff NET_EPOCH_EXIT(et); 377446ccdd0SGleb Smirnoff } 37877198a94SGleb Smirnoff CURVNET_RESTORE(); 379446ccdd0SGleb Smirnoff 380446ccdd0SGleb Smirnoff return (tp != NULL); 3817c72af87SMohan Srinivasan } 382df8bae1dSRodney W. Grimes 383446ccdd0SGleb Smirnoff static bool 384446ccdd0SGleb Smirnoff tcp_timer_keep(struct tcpcb *tp) 3859b8b58e0SJonathan Lemon { 3866573d758SMatt Macy struct epoch_tracker et; 3879eb0e832SGleb Smirnoff struct inpcb *inp = tptoinpcb(tp); 3889eb0e832SGleb Smirnoff struct tcptemp *t_template; 3899b8b58e0SJonathan Lemon 390446ccdd0SGleb Smirnoff INP_WLOCK_ASSERT(inp); 3919eb0e832SGleb Smirnoff 392446ccdd0SGleb Smirnoff TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); 3938840ae22SGleb Smirnoff CURVNET_SET(inp->inp_vnet); 3946d172f58SJonathan T. Looney /* 3956d172f58SJonathan T. Looney * Because we don't regularly reset the keepalive callout in 3966d172f58SJonathan T. Looney * the ESTABLISHED state, it may be that we don't actually need 3976d172f58SJonathan T. Looney * to send a keepalive yet. If that occurs, schedule another 3986d172f58SJonathan T. Looney * call for the next time the keepalive timer might expire. 3996d172f58SJonathan T. Looney */ 4006d172f58SJonathan T. Looney if (TCPS_HAVEESTABLISHED(tp->t_state)) { 4016d172f58SJonathan T. Looney u_int idletime; 4026d172f58SJonathan T. Looney 4036d172f58SJonathan T. Looney idletime = ticks - tp->t_rcvtime; 4046d172f58SJonathan T. Looney if (idletime < TP_KEEPIDLE(tp)) { 405446ccdd0SGleb Smirnoff tcp_timer_activate(tp, TT_KEEP, 406446ccdd0SGleb Smirnoff TP_KEEPIDLE(tp) - idletime); 4076d172f58SJonathan T. Looney CURVNET_RESTORE(); 408446ccdd0SGleb Smirnoff return (true); 4096d172f58SJonathan T. Looney } 4106d172f58SJonathan T. Looney } 4116d172f58SJonathan T. Looney 4129b8b58e0SJonathan Lemon /* 4139b8b58e0SJonathan Lemon * Keep-alive timer went off; send something 4149b8b58e0SJonathan Lemon * or drop connection if idle for too long. 4159b8b58e0SJonathan Lemon */ 41678b50714SRobert Watson TCPSTAT_INC(tcps_keeptimeo); 4179b8b58e0SJonathan Lemon if (tp->t_state < TCPS_ESTABLISHED) 4189b8b58e0SJonathan Lemon goto dropit; 419334fc582SBjoern A. Zeeb if ((V_tcp_always_keepalive || 420f1798531SJohn Baldwin inp->inp_socket->so_options & SO_KEEPALIVE) && 4219b8b58e0SJonathan Lemon tp->t_state <= TCPS_CLOSING) { 4229077f387SGleb Smirnoff if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) 4239b8b58e0SJonathan Lemon goto dropit; 4249b8b58e0SJonathan Lemon /* 4259b8b58e0SJonathan Lemon * Send a packet designed to force a response 4269b8b58e0SJonathan Lemon * if the peer is up and reachable: 4279b8b58e0SJonathan Lemon * either an ACK if the connection is still alive, 4289b8b58e0SJonathan Lemon * or an RST if the peer has closed the connection 4299b8b58e0SJonathan Lemon * due to timeout or reboot. 4309b8b58e0SJonathan Lemon * Using sequence number tp->snd_una-1 4319b8b58e0SJonathan Lemon * causes the transmitted zero-length segment 4329b8b58e0SJonathan Lemon * to lie outside the receive window; 4339b8b58e0SJonathan Lemon * by the protocol spec, this requires the 4349b8b58e0SJonathan Lemon * correspondent TCP to respond. 4359b8b58e0SJonathan Lemon */ 43678b50714SRobert Watson TCPSTAT_INC(tcps_keepprobe); 43779909384SJonathan Lemon t_template = tcpip_maketemplate(inp); 43808517d53SMike Silbersack if (t_template) { 439b9555453SGleb Smirnoff NET_EPOCH_ENTER(et); 44008517d53SMike Silbersack tcp_respond(tp, t_template->tt_ipgen, 44108517d53SMike Silbersack &t_template->tt_t, (struct mbuf *)NULL, 4429b8b58e0SJonathan Lemon tp->rcv_nxt, tp->snd_una - 1, 0); 443b9555453SGleb Smirnoff NET_EPOCH_EXIT(et); 44453640b0eSRobert Watson free(t_template, M_TEMP); 44508517d53SMike Silbersack } 446446ccdd0SGleb Smirnoff tcp_timer_activate(tp, TT_KEEP, TP_KEEPINTVL(tp)); 447b07fef50SRandall Stewart } else 448446ccdd0SGleb Smirnoff tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); 4499b8b58e0SJonathan Lemon 4508b615593SMarko Zec CURVNET_RESTORE(); 451446ccdd0SGleb Smirnoff return (true); 4529b8b58e0SJonathan Lemon 4539b8b58e0SJonathan Lemon dropit: 45478b50714SRobert Watson TCPSTAT_INC(tcps_keepdrops); 45558d94bd0SGleb Smirnoff NET_EPOCH_ENTER(et); 456d1b07f36SRandall Stewart tcp_log_end_status(tp, TCP_EI_STATUS_KEEP_MAX); 45785d94372SRobert Watson tp = tcp_drop(tp, ETIMEDOUT); 45858d94bd0SGleb Smirnoff NET_EPOCH_EXIT(et); 4598b615593SMarko Zec CURVNET_RESTORE(); 460446ccdd0SGleb Smirnoff 461446ccdd0SGleb Smirnoff return (tp != NULL); 4629b8b58e0SJonathan Lemon } 4639b8b58e0SJonathan Lemon 46408af8aacSRandall Stewart /* 46508af8aacSRandall Stewart * Has this session exceeded the maximum time without seeing a substantive 46608af8aacSRandall Stewart * acknowledgement? If so, return true; otherwise false. 46708af8aacSRandall Stewart */ 46808af8aacSRandall Stewart static bool 46908af8aacSRandall Stewart tcp_maxunacktime_check(struct tcpcb *tp) 47008af8aacSRandall Stewart { 47108af8aacSRandall Stewart 47208af8aacSRandall Stewart /* Are we tracking this timer for this session? */ 47308af8aacSRandall Stewart if (TP_MAXUNACKTIME(tp) == 0) 47408af8aacSRandall Stewart return false; 47508af8aacSRandall Stewart 47608af8aacSRandall Stewart /* Do we have a current measurement. */ 47708af8aacSRandall Stewart if (tp->t_acktime == 0) 47808af8aacSRandall Stewart return false; 47908af8aacSRandall Stewart 48008af8aacSRandall Stewart /* Are we within the acceptable range? */ 48108af8aacSRandall Stewart if (TSTMP_GT(TP_MAXUNACKTIME(tp) + tp->t_acktime, (u_int)ticks)) 48208af8aacSRandall Stewart return false; 48308af8aacSRandall Stewart 48408af8aacSRandall Stewart /* We exceeded the timer. */ 48508af8aacSRandall Stewart TCPSTAT_INC(tcps_progdrops); 48608af8aacSRandall Stewart return true; 48708af8aacSRandall Stewart } 48808af8aacSRandall Stewart 489446ccdd0SGleb Smirnoff static bool 490446ccdd0SGleb Smirnoff tcp_timer_persist(struct tcpcb *tp) 4919b8b58e0SJonathan Lemon { 4926573d758SMatt Macy struct epoch_tracker et; 493446ccdd0SGleb Smirnoff #if defined(INVARIANTS) || defined(VIMAGE) 4949eb0e832SGleb Smirnoff struct inpcb *inp = tptoinpcb(tp); 4959b8b58e0SJonathan Lemon #endif 496446ccdd0SGleb Smirnoff bool progdrop, rv; 4979eb0e832SGleb Smirnoff 498446ccdd0SGleb Smirnoff INP_WLOCK_ASSERT(inp); 499446ccdd0SGleb Smirnoff 500446ccdd0SGleb Smirnoff TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); 5018840ae22SGleb Smirnoff CURVNET_SET(inp->inp_vnet); 5029b8b58e0SJonathan Lemon /* 503a4641f4eSPedro F. Giffuni * Persistence timer into zero window. 5049b8b58e0SJonathan Lemon * Force a byte to be output, if possible. 5059b8b58e0SJonathan Lemon */ 50678b50714SRobert Watson TCPSTAT_INC(tcps_persisttimeo); 5079b8b58e0SJonathan Lemon /* 5089b8b58e0SJonathan Lemon * Hack: if the peer is dead/unreachable, we do not 5099b8b58e0SJonathan Lemon * time out if the window is closed. After a full 5109b8b58e0SJonathan Lemon * backoff, drop the connection if the idle time 5119b8b58e0SJonathan Lemon * (no responses to probes) reaches the maximum 5129b8b58e0SJonathan Lemon * backoff that we would use if retransmitting. 51308af8aacSRandall Stewart * Also, drop the connection if we haven't been making 51408af8aacSRandall Stewart * progress. 5159b8b58e0SJonathan Lemon */ 51608af8aacSRandall Stewart progdrop = tcp_maxunacktime_check(tp); 517*43b117f8SRichard Scheffenegger if (progdrop || (tp->t_rxtshift >= V_tcp_retries && 5186b0c5521SJohn Baldwin (ticks - tp->t_rcvtime >= tcp_maxpersistidle || 51908af8aacSRandall Stewart ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) { 52008af8aacSRandall Stewart if (!progdrop) 52178b50714SRobert Watson TCPSTAT_INC(tcps_persistdrop); 522d1b07f36SRandall Stewart tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); 523446ccdd0SGleb Smirnoff goto dropit; 5249b8b58e0SJonathan Lemon } 525322181c9SAndre Oppermann /* 526322181c9SAndre Oppermann * If the user has closed the socket then drop a persisting 527322181c9SAndre Oppermann * connection after a much reduced timeout. 528322181c9SAndre Oppermann */ 529322181c9SAndre Oppermann if (tp->t_state > TCPS_CLOSE_WAIT && 530322181c9SAndre Oppermann (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { 531322181c9SAndre Oppermann TCPSTAT_INC(tcps_persistdrop); 532d1b07f36SRandall Stewart tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); 533446ccdd0SGleb Smirnoff goto dropit; 534322181c9SAndre Oppermann } 5359b8b58e0SJonathan Lemon tcp_setpersist(tp); 5362cdbfa66SPaul Saab tp->t_flags |= TF_FORCEDATA; 537109eb549SGleb Smirnoff NET_EPOCH_ENTER(et); 538446ccdd0SGleb Smirnoff if ((rv = tcp_output_locked(tp))) 5392cdbfa66SPaul Saab tp->t_flags &= ~TF_FORCEDATA; 540f64dc2abSGleb Smirnoff NET_EPOCH_EXIT(et); 5418b615593SMarko Zec CURVNET_RESTORE(); 542446ccdd0SGleb Smirnoff 543446ccdd0SGleb Smirnoff return (rv); 544446ccdd0SGleb Smirnoff 545446ccdd0SGleb Smirnoff dropit: 546446ccdd0SGleb Smirnoff NET_EPOCH_ENTER(et); 547446ccdd0SGleb Smirnoff tp = tcp_drop(tp, ETIMEDOUT); 548446ccdd0SGleb Smirnoff NET_EPOCH_EXIT(et); 549446ccdd0SGleb Smirnoff CURVNET_RESTORE(); 550446ccdd0SGleb Smirnoff 551446ccdd0SGleb Smirnoff return (tp != NULL); 5529b8b58e0SJonathan Lemon } 5539b8b58e0SJonathan Lemon 554446ccdd0SGleb Smirnoff static bool 555446ccdd0SGleb Smirnoff tcp_timer_rexmt(struct tcpcb *tp) 5569b8b58e0SJonathan Lemon { 5579eb0e832SGleb Smirnoff struct epoch_tracker et; 5589eb0e832SGleb Smirnoff struct inpcb *inp = tptoinpcb(tp); 559446ccdd0SGleb Smirnoff int rexmt; 560446ccdd0SGleb Smirnoff bool isipv6, rv; 5619b8b58e0SJonathan Lemon 562446ccdd0SGleb Smirnoff INP_WLOCK_ASSERT(inp); 5639eb0e832SGleb Smirnoff 564446ccdd0SGleb Smirnoff TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); 5658840ae22SGleb Smirnoff CURVNET_SET(inp->inp_vnet); 5666d90faf3SPaul Saab tcp_free_sackholes(tp); 5675105a92cSRandall Stewart if (tp->t_fb->tfb_tcp_rexmit_tmr) { 5685105a92cSRandall Stewart /* The stack has a timer action too. */ 5695105a92cSRandall Stewart (*tp->t_fb->tfb_tcp_rexmit_tmr)(tp); 5705105a92cSRandall Stewart } 571df8bae1dSRodney W. Grimes /* 572df8bae1dSRodney W. Grimes * Retransmission timer went off. Message has not 573df8bae1dSRodney W. Grimes * been acked within retransmit interval. Back off 574df8bae1dSRodney W. Grimes * to a longer retransmit interval and retransmit one segment. 57508af8aacSRandall Stewart * 57608af8aacSRandall Stewart * If we've either exceeded the maximum number of retransmissions, 57708af8aacSRandall Stewart * or we've gone long enough without making progress, then drop 57808af8aacSRandall Stewart * the session. 579df8bae1dSRodney W. Grimes */ 580*43b117f8SRichard Scheffenegger if (++tp->t_rxtshift > V_tcp_retries || tcp_maxunacktime_check(tp)) { 581*43b117f8SRichard Scheffenegger if (tp->t_rxtshift > V_tcp_retries) 58278b50714SRobert Watson TCPSTAT_INC(tcps_timeoutdrop); 583*43b117f8SRichard Scheffenegger tp->t_rxtshift = V_tcp_retries; 584d1b07f36SRandall Stewart tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN); 585446ccdd0SGleb Smirnoff NET_EPOCH_ENTER(et); 586446ccdd0SGleb Smirnoff tp = tcp_drop(tp, ETIMEDOUT); 587446ccdd0SGleb Smirnoff NET_EPOCH_EXIT(et); 58877198a94SGleb Smirnoff CURVNET_RESTORE(); 589446ccdd0SGleb Smirnoff 590446ccdd0SGleb Smirnoff return (tp != NULL); 591b07fef50SRandall Stewart } 592cf8f04f4SAndre Oppermann if (tp->t_state == TCPS_SYN_SENT) { 593cf8f04f4SAndre Oppermann /* 594cf8f04f4SAndre Oppermann * If the SYN was retransmitted, indicate CWND to be 595cf8f04f4SAndre Oppermann * limited to 1 segment in cc_conn_init(). 596cf8f04f4SAndre Oppermann */ 597cf8f04f4SAndre Oppermann tp->snd_cwnd = 1; 598cf8f04f4SAndre Oppermann } else if (tp->t_rxtshift == 1) { 5999b8b58e0SJonathan Lemon /* 6009b8b58e0SJonathan Lemon * first retransmit; record ssthresh and cwnd so they can 6019b8b58e0SJonathan Lemon * be recovered if this turns out to be a "bad" retransmit. 6029b8b58e0SJonathan Lemon * A retransmit is considered "bad" if an ACK for this 6039b8b58e0SJonathan Lemon * segment is received within RTT/2 interval; the assumption 6049b8b58e0SJonathan Lemon * here is that the ACK was already in flight. See 6059b8b58e0SJonathan Lemon * "On Estimating End-to-End Network Path Properties" by 6069b8b58e0SJonathan Lemon * Allman and Paxson for more details. 6079b8b58e0SJonathan Lemon */ 6089b8b58e0SJonathan Lemon tp->snd_cwnd_prev = tp->snd_cwnd; 6099b8b58e0SJonathan Lemon tp->snd_ssthresh_prev = tp->snd_ssthresh; 6109d11646dSJeffrey Hsu tp->snd_recover_prev = tp->snd_recover; 611dbc42409SLawrence Stewart if (IN_FASTRECOVERY(tp->t_flags)) 6129d11646dSJeffrey Hsu tp->t_flags |= TF_WASFRECOVERY; 6139d11646dSJeffrey Hsu else 6149d11646dSJeffrey Hsu tp->t_flags &= ~TF_WASFRECOVERY; 615dbc42409SLawrence Stewart if (IN_CONGRECOVERY(tp->t_flags)) 616dbc42409SLawrence Stewart tp->t_flags |= TF_WASCRECOVERY; 617dbc42409SLawrence Stewart else 618dbc42409SLawrence Stewart tp->t_flags &= ~TF_WASCRECOVERY; 61910d20c84SMatt Macy if ((tp->t_flags & TF_RCVD_TSTMP) == 0) 6209b8b58e0SJonathan Lemon tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 62110d20c84SMatt Macy /* In the event that we've negotiated timestamps 62210d20c84SMatt Macy * badrxtwin will be set to the value that we set 62310d20c84SMatt Macy * the retransmitted packet's to_tsval to by tcp_output 62410d20c84SMatt Macy */ 625672dc4aeSJohn Baldwin tp->t_flags |= TF_PREVVALID; 626672dc4aeSJohn Baldwin } else 627672dc4aeSJohn Baldwin tp->t_flags &= ~TF_PREVVALID; 62878b50714SRobert Watson TCPSTAT_INC(tcps_rexmttimeo); 629281a0fd4SPatrick Kelsey if ((tp->t_state == TCPS_SYN_SENT) || 630281a0fd4SPatrick Kelsey (tp->t_state == TCPS_SYN_RECEIVED)) 6310999766dSMichael Tuexen rexmt = tcp_rexmit_initial * tcp_backoff[tp->t_rxtshift]; 6327d42e30cSJonathan Lemon else 633df8bae1dSRodney W. Grimes rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 634df8bae1dSRodney W. Grimes TCPT_RANGESET(tp->t_rxtcur, rexmt, 635df8bae1dSRodney W. Grimes tp->t_rttmin, TCPTV_REXMTMAX); 636f6f6703fSSean Bruno 637882ac53eSSean Bruno /* 638882ac53eSSean Bruno * We enter the path for PLMTUD if connection is established or, if 639882ac53eSSean Bruno * connection is FIN_WAIT_1 status, reason for the last is that if 640882ac53eSSean Bruno * amount of data we send is very small, we could send it in couple of 641882ac53eSSean Bruno * packets and process straight to FIN. In that case we won't catch 642882ac53eSSean Bruno * ESTABLISHED state. 643882ac53eSSean Bruno */ 644f6f6703fSSean Bruno #ifdef INET6 6459eb0e832SGleb Smirnoff isipv6 = (inp->inp_vflag & INP_IPV6) ? true : false; 646413c3db1SMichael Tuexen #else 647413c3db1SMichael Tuexen isipv6 = false; 648f6f6703fSSean Bruno #endif 649413c3db1SMichael Tuexen if (((V_tcp_pmtud_blackhole_detect == 1) || 650413c3db1SMichael Tuexen (V_tcp_pmtud_blackhole_detect == 2 && !isipv6) || 651413c3db1SMichael Tuexen (V_tcp_pmtud_blackhole_detect == 3 && isipv6)) && 652413c3db1SMichael Tuexen ((tp->t_state == TCPS_ESTABLISHED) || 653413c3db1SMichael Tuexen (tp->t_state == TCPS_FIN_WAIT_1))) { 654b89af8e1SMichael Tuexen if (tp->t_rxtshift == 1) { 655adf43a92SHiren Panchasara /* 656b89af8e1SMichael Tuexen * We enter blackhole detection after the first 657b89af8e1SMichael Tuexen * unsuccessful timer based retransmission. 658b89af8e1SMichael Tuexen * Then we reduce up to two times the MSS, each 659b89af8e1SMichael Tuexen * candidate giving two tries of retransmissions. 660b89af8e1SMichael Tuexen * But we give a candidate only two tries, if it 661b89af8e1SMichael Tuexen * actually reduces the MSS. 662adf43a92SHiren Panchasara */ 663b89af8e1SMichael Tuexen tp->t_blackhole_enter = 2; 664b89af8e1SMichael Tuexen tp->t_blackhole_exit = tp->t_blackhole_enter; 665b89af8e1SMichael Tuexen if (isipv6) { 666b89af8e1SMichael Tuexen #ifdef INET6 667b89af8e1SMichael Tuexen if (tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss) 668b89af8e1SMichael Tuexen tp->t_blackhole_exit += 2; 669b89af8e1SMichael Tuexen if (tp->t_maxseg > V_tcp_v6mssdflt && 670b89af8e1SMichael Tuexen V_tcp_v6pmtud_blackhole_mss > V_tcp_v6mssdflt) 671b89af8e1SMichael Tuexen tp->t_blackhole_exit += 2; 672b89af8e1SMichael Tuexen #endif 673b89af8e1SMichael Tuexen } else { 674b89af8e1SMichael Tuexen #ifdef INET 675b89af8e1SMichael Tuexen if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss) 676b89af8e1SMichael Tuexen tp->t_blackhole_exit += 2; 677b89af8e1SMichael Tuexen if (tp->t_maxseg > V_tcp_mssdflt && 678b89af8e1SMichael Tuexen V_tcp_pmtud_blackhole_mss > V_tcp_mssdflt) 679b89af8e1SMichael Tuexen tp->t_blackhole_exit += 2; 680b89af8e1SMichael Tuexen #endif 681b89af8e1SMichael Tuexen } 682b89af8e1SMichael Tuexen } 683f6f6703fSSean Bruno if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) == 684f6f6703fSSean Bruno (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) && 685b89af8e1SMichael Tuexen (tp->t_rxtshift >= tp->t_blackhole_enter && 686b89af8e1SMichael Tuexen tp->t_rxtshift < tp->t_blackhole_exit && 687b89af8e1SMichael Tuexen (tp->t_rxtshift - tp->t_blackhole_enter) % 2 == 0)) { 688f6f6703fSSean Bruno /* 689f6f6703fSSean Bruno * Enter Path MTU Black-hole Detection mechanism: 690f6f6703fSSean Bruno * - Disable Path MTU Discovery (IP "DF" bit). 691f6f6703fSSean Bruno * - Reduce MTU to lower value than what we 692f6f6703fSSean Bruno * negotiated with peer. 693f6f6703fSSean Bruno */ 6943d5af7a1SMichael Tuexen if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) == 0) { 695f6f6703fSSean Bruno /* Record that we may have found a black hole. */ 696f6f6703fSSean Bruno tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE; 697f6f6703fSSean Bruno /* Keep track of previous MSS. */ 6980c39d38dSGleb Smirnoff tp->t_pmtud_saved_maxseg = tp->t_maxseg; 6993d5af7a1SMichael Tuexen } 700f6f6703fSSean Bruno 701f6f6703fSSean Bruno /* 702f6f6703fSSean Bruno * Reduce the MSS to blackhole value or to the default 703f6f6703fSSean Bruno * in an attempt to retransmit. 704f6f6703fSSean Bruno */ 705f6f6703fSSean Bruno #ifdef INET6 706f6f6703fSSean Bruno if (isipv6 && 707b89af8e1SMichael Tuexen tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss && 708b89af8e1SMichael Tuexen V_tcp_v6pmtud_blackhole_mss > V_tcp_v6mssdflt) { 709f6f6703fSSean Bruno /* Use the sysctl tuneable blackhole MSS. */ 7100c39d38dSGleb Smirnoff tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss; 71132a04bb8SSean Bruno TCPSTAT_INC(tcps_pmtud_blackhole_activated); 712f6f6703fSSean Bruno } else if (isipv6) { 713f6f6703fSSean Bruno /* Use the default MSS. */ 7140c39d38dSGleb Smirnoff tp->t_maxseg = V_tcp_v6mssdflt; 715f6f6703fSSean Bruno /* 716f6f6703fSSean Bruno * Disable Path MTU Discovery when we switch to 717f6f6703fSSean Bruno * minmss. 718f6f6703fSSean Bruno */ 719f6f6703fSSean Bruno tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; 72032a04bb8SSean Bruno TCPSTAT_INC(tcps_pmtud_blackhole_activated_min_mss); 721f6f6703fSSean Bruno } 722f6f6703fSSean Bruno #endif 723f6f6703fSSean Bruno #if defined(INET6) && defined(INET) 724f6f6703fSSean Bruno else 725f6f6703fSSean Bruno #endif 726f6f6703fSSean Bruno #ifdef INET 727b89af8e1SMichael Tuexen if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss && 728b89af8e1SMichael Tuexen V_tcp_pmtud_blackhole_mss > V_tcp_mssdflt) { 729f6f6703fSSean Bruno /* Use the sysctl tuneable blackhole MSS. */ 7300c39d38dSGleb Smirnoff tp->t_maxseg = V_tcp_pmtud_blackhole_mss; 73132a04bb8SSean Bruno TCPSTAT_INC(tcps_pmtud_blackhole_activated); 732f6f6703fSSean Bruno } else { 733f6f6703fSSean Bruno /* Use the default MSS. */ 7340c39d38dSGleb Smirnoff tp->t_maxseg = V_tcp_mssdflt; 735f6f6703fSSean Bruno /* 736f6f6703fSSean Bruno * Disable Path MTU Discovery when we switch to 737f6f6703fSSean Bruno * minmss. 738f6f6703fSSean Bruno */ 739f6f6703fSSean Bruno tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; 74032a04bb8SSean Bruno TCPSTAT_INC(tcps_pmtud_blackhole_activated_min_mss); 741f6f6703fSSean Bruno } 742f6f6703fSSean Bruno #endif 743f6f6703fSSean Bruno /* 744f6f6703fSSean Bruno * Reset the slow-start flight size 745f6f6703fSSean Bruno * as it may depend on the new MSS. 746f6f6703fSSean Bruno */ 747f6f6703fSSean Bruno if (CC_ALGO(tp)->conn_init != NULL) 748e68b3792SGleb Smirnoff CC_ALGO(tp)->conn_init(&tp->t_ccv); 749f6f6703fSSean Bruno } else { 750f6f6703fSSean Bruno /* 751f6f6703fSSean Bruno * If further retransmissions are still unsuccessful 752f6f6703fSSean Bruno * with a lowered MTU, maybe this isn't a blackhole and 753f6f6703fSSean Bruno * we restore the previous MSS and blackhole detection 754f6f6703fSSean Bruno * flags. 755f6f6703fSSean Bruno */ 756f6f6703fSSean Bruno if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) && 757b89af8e1SMichael Tuexen (tp->t_rxtshift >= tp->t_blackhole_exit)) { 758f6f6703fSSean Bruno tp->t_flags2 |= TF2_PLPMTU_PMTUD; 759f6f6703fSSean Bruno tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; 7600c39d38dSGleb Smirnoff tp->t_maxseg = tp->t_pmtud_saved_maxseg; 76132a04bb8SSean Bruno TCPSTAT_INC(tcps_pmtud_blackhole_failed); 762f6f6703fSSean Bruno /* 763f6f6703fSSean Bruno * Reset the slow-start flight size as it 764f6f6703fSSean Bruno * may depend on the new MSS. 765f6f6703fSSean Bruno */ 766f6f6703fSSean Bruno if (CC_ALGO(tp)->conn_init != NULL) 767e68b3792SGleb Smirnoff CC_ALGO(tp)->conn_init(&tp->t_ccv); 768f6f6703fSSean Bruno } 769f6f6703fSSean Bruno } 770f6f6703fSSean Bruno } 771f6f6703fSSean Bruno 772df8bae1dSRodney W. Grimes /* 77377339e1cSAndre Oppermann * Disable RFC1323 and SACK if we haven't got any response to 7747ceb7783SJesper Skriver * our third SYN to work-around some broken terminal servers 7757ceb7783SJesper Skriver * (most of which have hopefully been retired) that have bad VJ 7767ceb7783SJesper Skriver * header compression code which trashes TCP segments containing 7777ceb7783SJesper Skriver * unknown-to-them TCP options. 7787ceb7783SJesper Skriver */ 7796c0ef895SJohn Baldwin if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) && 7806c0ef895SJohn Baldwin (tp->t_rxtshift == 3)) 781c4ab59c1SAndre Oppermann tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT); 7827ceb7783SJesper Skriver /* 7835ede40dcSRyan Stone * If we backed off this far, notify the L3 protocol that we're having 7845ede40dcSRyan Stone * connection problems. 785df8bae1dSRodney W. Grimes */ 7865ede40dcSRyan Stone if (tp->t_rxtshift > TCP_RTT_INVALIDATE) { 787fb59c426SYoshinobu Inoue #ifdef INET6 7889eb0e832SGleb Smirnoff if ((inp->inp_vflag & INP_IPV6) != 0) 7899eb0e832SGleb Smirnoff in6_losing(inp); 79084cc0778SGeorge V. Neville-Neil else 791fb59c426SYoshinobu Inoue #endif 7929eb0e832SGleb Smirnoff in_losing(inp); 793df8bae1dSRodney W. Grimes } 794df8bae1dSRodney W. Grimes tp->snd_nxt = tp->snd_una; 7959d11646dSJeffrey Hsu tp->snd_recover = tp->snd_max; 79646f58482SJonathan Lemon /* 79774b48c1dSAndras Olah * Force a segment to be sent. 79874b48c1dSAndras Olah */ 79974b48c1dSAndras Olah tp->t_flags |= TF_ACKNOW; 80074b48c1dSAndras Olah /* 801df8bae1dSRodney W. Grimes * If timing a segment in this window, stop the timer. 802df8bae1dSRodney W. Grimes */ 8039b8b58e0SJonathan Lemon tp->t_rtttime = 0; 804dbc42409SLawrence Stewart 805b5af1b88SLawrence Stewart cc_cong_signal(tp, NULL, CC_RTO); 806109eb549SGleb Smirnoff NET_EPOCH_ENTER(et); 807446ccdd0SGleb Smirnoff rv = tcp_output_locked(tp); 808f64dc2abSGleb Smirnoff NET_EPOCH_EXIT(et); 8098b615593SMarko Zec CURVNET_RESTORE(); 810446ccdd0SGleb Smirnoff 811446ccdd0SGleb Smirnoff return (rv); 81285d94372SRobert Watson } 81385d94372SRobert Watson 81476578d60SMichael Tuexen static void 81576578d60SMichael Tuexen tcp_bblog_timer(struct tcpcb *tp, tt_which which, tt_what what, uint32_t ticks) 81676578d60SMichael Tuexen { 81776578d60SMichael Tuexen struct tcp_log_buffer *lgb; 81876578d60SMichael Tuexen uint64_t ms; 81976578d60SMichael Tuexen 82076578d60SMichael Tuexen INP_WLOCK_ASSERT(tptoinpcb(tp)); 82169c7c811SRandall Stewart if (tcp_bblogging_on(tp)) 82269c7c811SRandall Stewart lgb = tcp_log_event(tp, NULL, NULL, NULL, TCP_LOG_RTO, 0, 0, 82376578d60SMichael Tuexen NULL, false, NULL, NULL, 0, NULL); 82476578d60SMichael Tuexen else 82576578d60SMichael Tuexen lgb = NULL; 82676578d60SMichael Tuexen if (lgb != NULL) { 82776578d60SMichael Tuexen lgb->tlb_flex1 = (what << 8) | which; 82876578d60SMichael Tuexen if (what == TT_STARTING) { 82976578d60SMichael Tuexen /* Convert ticks to ms and store it in tlb_flex2. */ 83076578d60SMichael Tuexen if (hz == 1000) 83176578d60SMichael Tuexen lgb->tlb_flex2 = ticks; 83276578d60SMichael Tuexen else { 83376578d60SMichael Tuexen ms = (((uint64_t)ticks * 1000) + (hz - 1)) / hz; 83476578d60SMichael Tuexen if (ms > UINT32_MAX) 83576578d60SMichael Tuexen lgb->tlb_flex2 = UINT32_MAX; 83676578d60SMichael Tuexen else 83776578d60SMichael Tuexen lgb->tlb_flex2 = (uint32_t)ms; 83876578d60SMichael Tuexen } 83976578d60SMichael Tuexen } 84076578d60SMichael Tuexen } 84176578d60SMichael Tuexen } 84276578d60SMichael Tuexen 843446ccdd0SGleb Smirnoff static inline tt_which 844446ccdd0SGleb Smirnoff tcp_timer_next(struct tcpcb *tp, sbintime_t *precision) 84585d94372SRobert Watson { 846446ccdd0SGleb Smirnoff tt_which i, rv; 847446ccdd0SGleb Smirnoff sbintime_t after, before; 848446ccdd0SGleb Smirnoff 849446ccdd0SGleb Smirnoff for (i = 0, rv = TT_N, after = before = SBT_MAX; i < TT_N; i++) { 850446ccdd0SGleb Smirnoff if (tp->t_timers[i] < after) { 851446ccdd0SGleb Smirnoff after = tp->t_timers[i]; 852446ccdd0SGleb Smirnoff rv = i; 853446ccdd0SGleb Smirnoff } 854446ccdd0SGleb Smirnoff before = MIN(before, tp->t_timers[i] + tp->t_precisions[i]); 855446ccdd0SGleb Smirnoff } 856446ccdd0SGleb Smirnoff if (precision != NULL) 857446ccdd0SGleb Smirnoff *precision = before - after; 858446ccdd0SGleb Smirnoff 859446ccdd0SGleb Smirnoff return (rv); 860446ccdd0SGleb Smirnoff } 861446ccdd0SGleb Smirnoff 862446ccdd0SGleb Smirnoff static void 863446ccdd0SGleb Smirnoff tcp_timer_enter(void *xtp) 864446ccdd0SGleb Smirnoff { 865446ccdd0SGleb Smirnoff struct tcpcb *tp = xtp; 8669eb0e832SGleb Smirnoff struct inpcb *inp = tptoinpcb(tp); 867446ccdd0SGleb Smirnoff sbintime_t precision; 868446ccdd0SGleb Smirnoff tt_which which; 86976578d60SMichael Tuexen bool tp_valid; 870446ccdd0SGleb Smirnoff 871446ccdd0SGleb Smirnoff INP_WLOCK_ASSERT(inp); 872446ccdd0SGleb Smirnoff MPASS((curthread->td_pflags & TDP_INTCPCALLOUT) == 0); 873446ccdd0SGleb Smirnoff 874446ccdd0SGleb Smirnoff curthread->td_pflags |= TDP_INTCPCALLOUT; 875446ccdd0SGleb Smirnoff 876446ccdd0SGleb Smirnoff which = tcp_timer_next(tp, NULL); 877446ccdd0SGleb Smirnoff MPASS(which < TT_N); 878446ccdd0SGleb Smirnoff tp->t_timers[which] = SBT_MAX; 879446ccdd0SGleb Smirnoff tp->t_precisions[which] = 0; 880446ccdd0SGleb Smirnoff 88176578d60SMichael Tuexen tcp_bblog_timer(tp, which, TT_PROCESSING, 0); 88276578d60SMichael Tuexen tp_valid = tcp_timersw[which](tp); 88376578d60SMichael Tuexen if (tp_valid) { 88476578d60SMichael Tuexen tcp_bblog_timer(tp, which, TT_PROCESSED, 0); 885446ccdd0SGleb Smirnoff if ((which = tcp_timer_next(tp, &precision)) != TT_N) { 886446ccdd0SGleb Smirnoff callout_reset_sbt_on(&tp->t_callout, 887446ccdd0SGleb Smirnoff tp->t_timers[which], precision, tcp_timer_enter, 888446ccdd0SGleb Smirnoff tp, inp_to_cpuid(inp), C_ABSOLUTE); 889446ccdd0SGleb Smirnoff } 890446ccdd0SGleb Smirnoff INP_WUNLOCK(inp); 891446ccdd0SGleb Smirnoff } 892446ccdd0SGleb Smirnoff 893446ccdd0SGleb Smirnoff curthread->td_pflags &= ~TDP_INTCPCALLOUT; 894446ccdd0SGleb Smirnoff } 895446ccdd0SGleb Smirnoff 896446ccdd0SGleb Smirnoff /* 897446ccdd0SGleb Smirnoff * Activate or stop (delta == 0) a TCP timer. 898446ccdd0SGleb Smirnoff */ 899446ccdd0SGleb Smirnoff void 900446ccdd0SGleb Smirnoff tcp_timer_activate(struct tcpcb *tp, tt_which which, u_int delta) 901446ccdd0SGleb Smirnoff { 902446ccdd0SGleb Smirnoff struct inpcb *inp = tptoinpcb(tp); 903446ccdd0SGleb Smirnoff sbintime_t precision; 90476578d60SMichael Tuexen tt_what what; 90585d94372SRobert Watson 90609fe6320SNavdeep Parhar #ifdef TCP_OFFLOAD 90709fe6320SNavdeep Parhar if (tp->t_flags & TF_TOE) 90809fe6320SNavdeep Parhar return; 90909fe6320SNavdeep Parhar #endif 91009fe6320SNavdeep Parhar 911446ccdd0SGleb Smirnoff INP_WLOCK_ASSERT(inp); 9125571f9cfSJulien Charbon 91376578d60SMichael Tuexen if (delta > 0) { 91476578d60SMichael Tuexen what = TT_STARTING; 915446ccdd0SGleb Smirnoff callout_when(tick_sbt * delta, 0, C_HARDCLOCK, 916446ccdd0SGleb Smirnoff &tp->t_timers[which], &tp->t_precisions[which]); 91776578d60SMichael Tuexen } else { 91876578d60SMichael Tuexen what = TT_STOPPING; 919446ccdd0SGleb Smirnoff tp->t_timers[which] = SBT_MAX; 92076578d60SMichael Tuexen } 92176578d60SMichael Tuexen tcp_bblog_timer(tp, which, what, delta); 922446ccdd0SGleb Smirnoff 923446ccdd0SGleb Smirnoff if ((which = tcp_timer_next(tp, &precision)) != TT_N) 924446ccdd0SGleb Smirnoff callout_reset_sbt_on(&tp->t_callout, tp->t_timers[which], 925446ccdd0SGleb Smirnoff precision, tcp_timer_enter, tp, inp_to_cpuid(inp), 926446ccdd0SGleb Smirnoff C_ABSOLUTE); 927446ccdd0SGleb Smirnoff else 928446ccdd0SGleb Smirnoff callout_stop(&tp->t_callout); 92985d94372SRobert Watson } 93085d94372SRobert Watson 931446ccdd0SGleb Smirnoff bool 932446ccdd0SGleb Smirnoff tcp_timer_active(struct tcpcb *tp, tt_which which) 93385d94372SRobert Watson { 93485d94372SRobert Watson 935446ccdd0SGleb Smirnoff INP_WLOCK_ASSERT(tptoinpcb(tp)); 936446ccdd0SGleb Smirnoff 937446ccdd0SGleb Smirnoff return (tp->t_timers[which] != SBT_MAX); 938df8bae1dSRodney W. Grimes } 939b8614722SMike Silbersack 940446ccdd0SGleb Smirnoff /* 941446ccdd0SGleb Smirnoff * Stop all timers associated with tcpcb. 942446ccdd0SGleb Smirnoff * 943446ccdd0SGleb Smirnoff * Called only on tcpcb destruction. The tcpcb shall already be dropped from 944446ccdd0SGleb Smirnoff * the pcb lookup database and socket is not losing the last reference. 945446ccdd0SGleb Smirnoff * 946446ccdd0SGleb Smirnoff * XXXGL: unfortunately our callout(9) is not able to fully stop a locked 947446ccdd0SGleb Smirnoff * callout even when only two threads are involved: the callout itself and the 948446ccdd0SGleb Smirnoff * thread that does callout_stop(). See where softclock_call_cc() swaps the 949446ccdd0SGleb Smirnoff * callwheel lock to callout lock and then checks cc_exec_cancel(). This is 950446ccdd0SGleb Smirnoff * the race window. If it happens, the tcp_timer_enter() won't be executed, 951446ccdd0SGleb Smirnoff * however pcb lock will be locked and released, hence we can't free memory. 952446ccdd0SGleb Smirnoff * Until callout(9) is improved, just keep retrying. In my profiling I've seen 953446ccdd0SGleb Smirnoff * such event happening less than 1 time per hour with 20-30 Gbit/s of traffic. 954446ccdd0SGleb Smirnoff */ 955446ccdd0SGleb Smirnoff void 956446ccdd0SGleb Smirnoff tcp_timer_stop(struct tcpcb *tp) 957ff945008SGleb Smirnoff { 9589eb0e832SGleb Smirnoff struct inpcb *inp = tptoinpcb(tp); 959ff945008SGleb Smirnoff 960446ccdd0SGleb Smirnoff INP_WLOCK_ASSERT(inp); 9619eb0e832SGleb Smirnoff 962446ccdd0SGleb Smirnoff if (curthread->td_pflags & TDP_INTCPCALLOUT) { 963446ccdd0SGleb Smirnoff int stopped __diagused; 964446ccdd0SGleb Smirnoff 965446ccdd0SGleb Smirnoff stopped = callout_stop(&tp->t_callout); 966446ccdd0SGleb Smirnoff MPASS(stopped == 0); 967446ccdd0SGleb Smirnoff } else while(__predict_false(callout_stop(&tp->t_callout) == 0)) { 968ff945008SGleb Smirnoff INP_WUNLOCK(inp); 969446ccdd0SGleb Smirnoff kern_yield(PRI_UNCHANGED); 970446ccdd0SGleb Smirnoff INP_WLOCK(inp); 9715571f9cfSJulien Charbon } 9725571f9cfSJulien Charbon } 973