1c398230bSWarner Losh /*- 2e79adb8eSGarrett Wollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29e79adb8eSGarrett Wollman * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30df8bae1dSRodney W. Grimes */ 31df8bae1dSRodney W. Grimes 324b421e2dSMike Silbersack #include <sys/cdefs.h> 334b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 344b421e2dSMike Silbersack 35825fd1e4SNavdeep Parhar #include "opt_inet.h" 36fb59c426SYoshinobu Inoue #include "opt_inet6.h" 370cc12cc5SJoerg Wunsch #include "opt_tcpdebug.h" 38883831c6SAdrian Chadd #include "opt_rss.h" 390cc12cc5SJoerg Wunsch 40df8bae1dSRodney W. Grimes #include <sys/param.h> 4198163b98SPoul-Henning Kamp #include <sys/kernel.h> 42c74af4faSBruce Evans #include <sys/lock.h> 4308517d53SMike Silbersack #include <sys/mbuf.h> 44c74af4faSBruce Evans #include <sys/mutex.h> 45c74af4faSBruce Evans #include <sys/protosw.h> 4687aedea4SKip Macy #include <sys/smp.h> 47df8bae1dSRodney W. Grimes #include <sys/socket.h> 48df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 49c74af4faSBruce Evans #include <sys/sysctl.h> 50c74af4faSBruce Evans #include <sys/systm.h> 51e79adb8eSGarrett Wollman 524b79449eSBjoern A. Zeeb #include <net/if.h> 53df8bae1dSRodney W. Grimes #include <net/route.h> 54b2bdc62aSAdrian Chadd #include <net/rss_config.h> 55530c0060SRobert Watson #include <net/vnet.h> 56883831c6SAdrian Chadd #include <net/netisr.h> 57df8bae1dSRodney W. Grimes 58df8bae1dSRodney W. Grimes #include <netinet/in.h> 595d06879aSGeorge V. Neville-Neil #include <netinet/in_kdtrace.h> 60df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 61883831c6SAdrian Chadd #include <netinet/in_rss.h> 62c74af4faSBruce Evans #include <netinet/in_systm.h> 63fb59c426SYoshinobu Inoue #ifdef INET6 64fb59c426SYoshinobu Inoue #include <netinet6/in6_pcb.h> 65fb59c426SYoshinobu Inoue #endif 66df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 672de3e790SGleb Smirnoff #include <netinet/tcp.h> 68df8bae1dSRodney W. Grimes #include <netinet/tcp_fsm.h> 69df8bae1dSRodney W. Grimes #include <netinet/tcp_timer.h> 70df8bae1dSRodney W. Grimes #include <netinet/tcp_var.h> 714644fda3SGleb Smirnoff #include <netinet/cc/cc.h> 72f6f6703fSSean Bruno #ifdef INET6 73f6f6703fSSean Bruno #include <netinet6/tcp6_var.h> 74f6f6703fSSean Bruno #endif 75df8bae1dSRodney W. Grimes #include <netinet/tcpip.h> 76af7a2999SDavid Greenman #ifdef TCPDEBUG 77af7a2999SDavid Greenman #include <netinet/tcp_debug.h> 78af7a2999SDavid Greenman #endif 79df8bae1dSRodney W. Grimes 800645c604SHiren Panchasara int tcp_persmin; 810645c604SHiren Panchasara SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin, CTLTYPE_INT|CTLFLAG_RW, 820645c604SHiren Panchasara &tcp_persmin, 0, sysctl_msec_to_ticks, "I", "minimum persistence interval"); 830645c604SHiren Panchasara 840645c604SHiren Panchasara int tcp_persmax; 850645c604SHiren Panchasara SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax, CTLTYPE_INT|CTLFLAG_RW, 860645c604SHiren Panchasara &tcp_persmax, 0, sysctl_msec_to_ticks, "I", "maximum persistence interval"); 870645c604SHiren Panchasara 889b8b58e0SJonathan Lemon int tcp_keepinit; 89ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 9041698ebfSTom Rhodes &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection"); 917b40aa32SPaul Traina 929b8b58e0SJonathan Lemon int tcp_keepidle; 93ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 9441698ebfSTom Rhodes &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin"); 9598163b98SPoul-Henning Kamp 969b8b58e0SJonathan Lemon int tcp_keepintvl; 97ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 9841698ebfSTom Rhodes &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes"); 9998163b98SPoul-Henning Kamp 1009b8b58e0SJonathan Lemon int tcp_delacktime; 1016489fe65SAndre Oppermann SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 1026489fe65SAndre Oppermann &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 103ccb4d0c6SJonathan Lemon "Time before a delayed ACK is sent"); 1049b8b58e0SJonathan Lemon 1059b8b58e0SJonathan Lemon int tcp_msl; 106ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 107ccb4d0c6SJonathan Lemon &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 1089b8b58e0SJonathan Lemon 109701bec5aSMatthew Dillon int tcp_rexmit_min; 110701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 1116489fe65SAndre Oppermann &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 1126489fe65SAndre Oppermann "Minimum Retransmission Timeout"); 113701bec5aSMatthew Dillon 114701bec5aSMatthew Dillon int tcp_rexmit_slop; 115701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 1166489fe65SAndre Oppermann &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 1176489fe65SAndre Oppermann "Retransmission Timer Slop"); 118701bec5aSMatthew Dillon 119c39a614eSRobert Watson static int always_keepalive = 1; 1203d177f46SBill Fumerola SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 1213d177f46SBill Fumerola &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 12234be9bf3SPoul-Henning Kamp 1237c72af87SMohan Srinivasan int tcp_fast_finwait2_recycle = 0; 1247c72af87SMohan Srinivasan SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 1256489fe65SAndre Oppermann &tcp_fast_finwait2_recycle, 0, 1266489fe65SAndre Oppermann "Recycle closed FIN_WAIT_2 connections faster"); 1277c72af87SMohan Srinivasan 1287c72af87SMohan Srinivasan int tcp_finwait2_timeout; 1297c72af87SMohan Srinivasan SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 1306489fe65SAndre Oppermann &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 1317c72af87SMohan Srinivasan 1329077f387SGleb Smirnoff int tcp_keepcnt = TCPTV_KEEPCNT; 1339077f387SGleb Smirnoff SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0, 1349077f387SGleb Smirnoff "Number of keepalive probes to send"); 1357c72af87SMohan Srinivasan 1360312fbe9SPoul-Henning Kamp /* max idle probes */ 1379b8b58e0SJonathan Lemon int tcp_maxpersistidle; 138e79adb8eSGarrett Wollman 1396c0ef895SJohn Baldwin static int tcp_rexmit_drop_options = 0; 1406c0ef895SJohn Baldwin SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW, 1416c0ef895SJohn Baldwin &tcp_rexmit_drop_options, 0, 1426c0ef895SJohn Baldwin "Drop TCP options from 3rd and later retransmitted SYN"); 1436c0ef895SJohn Baldwin 144f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_detect); 145f6f6703fSSean Bruno #define V_tcp_pmtud_blackhole_detect VNET(tcp_pmtud_blackhole_detect) 146f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection, 147f0188618SHans Petter Selasky CTLFLAG_RW|CTLFLAG_VNET, 148f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_detect), 0, 149f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection Enabled"); 150f6f6703fSSean Bruno 151f6f6703fSSean Bruno #ifdef INET 152f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200; 153f6f6703fSSean Bruno #define V_tcp_pmtud_blackhole_mss VNET(tcp_pmtud_blackhole_mss) 154f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, 155f0188618SHans Petter Selasky CTLFLAG_RW|CTLFLAG_VNET, 156f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_mss), 0, 157f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection lowered MSS"); 158f6f6703fSSean Bruno #endif 159f6f6703fSSean Bruno 160f6f6703fSSean Bruno #ifdef INET6 161f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220; 162f6f6703fSSean Bruno #define V_tcp_v6pmtud_blackhole_mss VNET(tcp_v6pmtud_blackhole_mss) 163f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss, 164f0188618SHans Petter Selasky CTLFLAG_RW|CTLFLAG_VNET, 165f6f6703fSSean Bruno &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0, 166f6f6703fSSean Bruno "Path MTU Discovery IPv6 Black Hole Detection lowered MSS"); 167f6f6703fSSean Bruno #endif 168f6f6703fSSean Bruno 1698f7e75cbSAdrian Chadd #ifdef RSS 1708f7e75cbSAdrian Chadd static int per_cpu_timers = 1; 1718f7e75cbSAdrian Chadd #else 17287aedea4SKip Macy static int per_cpu_timers = 0; 1738f7e75cbSAdrian Chadd #endif 17487aedea4SKip Macy SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, 17587aedea4SKip Macy &per_cpu_timers , 0, "run tcp timers on all cpus"); 17687aedea4SKip Macy 177883831c6SAdrian Chadd #if 0 17887aedea4SKip Macy #define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \ 17987aedea4SKip Macy ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0) 180883831c6SAdrian Chadd #endif 181883831c6SAdrian Chadd 182883831c6SAdrian Chadd /* 183883831c6SAdrian Chadd * Map the given inp to a CPU id. 184883831c6SAdrian Chadd * 185883831c6SAdrian Chadd * This queries RSS if it's compiled in, else it defaults to the current 186883831c6SAdrian Chadd * CPU ID. 187883831c6SAdrian Chadd */ 188883831c6SAdrian Chadd static inline int 189883831c6SAdrian Chadd inp_to_cpuid(struct inpcb *inp) 190883831c6SAdrian Chadd { 191883831c6SAdrian Chadd u_int cpuid; 192883831c6SAdrian Chadd 193883831c6SAdrian Chadd #ifdef RSS 194883831c6SAdrian Chadd if (per_cpu_timers) { 195883831c6SAdrian Chadd cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); 196883831c6SAdrian Chadd if (cpuid == NETISR_CPUID_NONE) 197883831c6SAdrian Chadd return (curcpu); /* XXX */ 198883831c6SAdrian Chadd else 199883831c6SAdrian Chadd return (cpuid); 200883831c6SAdrian Chadd } 201883831c6SAdrian Chadd #else 202883831c6SAdrian Chadd /* Legacy, pre-RSS behaviour */ 203883831c6SAdrian Chadd if (per_cpu_timers) { 204883831c6SAdrian Chadd /* 205883831c6SAdrian Chadd * We don't have a flowid -> cpuid mapping, so cheat and 206883831c6SAdrian Chadd * just map unknown cpuids to curcpu. Not the best, but 207883831c6SAdrian Chadd * apparently better than defaulting to swi 0. 208883831c6SAdrian Chadd */ 209883831c6SAdrian Chadd cpuid = inp->inp_flowid % (mp_maxid + 1); 210883831c6SAdrian Chadd if (! CPU_ABSENT(cpuid)) 211883831c6SAdrian Chadd return (cpuid); 212883831c6SAdrian Chadd return (curcpu); 213883831c6SAdrian Chadd } 214883831c6SAdrian Chadd #endif 215883831c6SAdrian Chadd /* Default for RSS and non-RSS - cpuid 0 */ 216883831c6SAdrian Chadd else { 217883831c6SAdrian Chadd return (0); 218883831c6SAdrian Chadd } 219883831c6SAdrian Chadd } 22087aedea4SKip Macy 221df8bae1dSRodney W. Grimes /* 222df8bae1dSRodney W. Grimes * Tcp protocol timeout routine called every 500 ms. 2239b8b58e0SJonathan Lemon * Updates timestamps used for TCP 224df8bae1dSRodney W. Grimes * causes finite state machine actions if timers expire. 225df8bae1dSRodney W. Grimes */ 226df8bae1dSRodney W. Grimes void 227e2f2059fSMike Silbersack tcp_slowtimo(void) 228df8bae1dSRodney W. Grimes { 2298b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 23015bd2b43SDavid Greenman 2315ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 2328b615593SMarko Zec VNET_FOREACH(vnet_iter) { 2338b615593SMarko Zec CURVNET_SET(vnet_iter); 234cea40c48SJulien Charbon (void) tcp_tw_2msl_scan(0); 2358b615593SMarko Zec CURVNET_RESTORE(); 2368b615593SMarko Zec } 2375ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 238df8bae1dSRodney W. Grimes } 239df8bae1dSRodney W. Grimes 2407d42e30cSJonathan Lemon int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 2417d42e30cSJonathan Lemon { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 2427d42e30cSJonathan Lemon 243df8bae1dSRodney W. Grimes int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 244f058535dSJeffrey Hsu { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 245df8bae1dSRodney W. Grimes 246f058535dSJeffrey Hsu static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 247e79adb8eSGarrett Wollman 248df8bae1dSRodney W. Grimes /* 249df8bae1dSRodney W. Grimes * TCP timer processing. 250df8bae1dSRodney W. Grimes */ 25185d94372SRobert Watson 25285d94372SRobert Watson void 25385d94372SRobert Watson tcp_timer_delack(void *xtp) 254df8bae1dSRodney W. Grimes { 25585d94372SRobert Watson struct tcpcb *tp = xtp; 25685d94372SRobert Watson struct inpcb *inp; 2578b615593SMarko Zec CURVNET_SET(tp->t_vnet); 25885d94372SRobert Watson 25985d94372SRobert Watson inp = tp->t_inpcb; 2605571f9cfSJulien Charbon KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); 2618501a69cSRobert Watson INP_WLOCK(inp); 262655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_delack) || 263655f934bSMikolaj Golub !callout_active(&tp->t_timers->tt_delack)) { 2648501a69cSRobert Watson INP_WUNLOCK(inp); 2658b615593SMarko Zec CURVNET_RESTORE(); 26685d94372SRobert Watson return; 26785d94372SRobert Watson } 268e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_delack); 269655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 270655f934bSMikolaj Golub INP_WUNLOCK(inp); 271655f934bSMikolaj Golub CURVNET_RESTORE(); 272655f934bSMikolaj Golub return; 273655f934bSMikolaj Golub } 2749b8b58e0SJonathan Lemon tp->t_flags |= TF_ACKNOW; 27578b50714SRobert Watson TCPSTAT_INC(tcps_delack); 27655bceb1eSRandall Stewart (void) tp->t_fb->tfb_tcp_output(tp); 2778501a69cSRobert Watson INP_WUNLOCK(inp); 2788b615593SMarko Zec CURVNET_RESTORE(); 2799b8b58e0SJonathan Lemon } 2809b8b58e0SJonathan Lemon 2810fa047b9SRandall Stewart /* 2820fa047b9SRandall Stewart * When a timer wants to remove a TCB it must 2830fa047b9SRandall Stewart * hold the INP_INFO_RLOCK(). The timer function 2840fa047b9SRandall Stewart * should only have grabbed the INP_WLOCK() when 2850fa047b9SRandall Stewart * it entered. To safely switch to holding both the 2860fa047b9SRandall Stewart * INP_INFO_RLOCK() and the INP_WLOCK() we must first 287eadd00f8SRandall Stewart * grab a reference on the inp, which will hold the inp 288eadd00f8SRandall Stewart * so that it can't be removed. We then unlock the INP_WLOCK(), 289eadd00f8SRandall Stewart * and grab the INP_INFO_RLOCK() lock. Once we have the INP_INFO_RLOCK() 290eadd00f8SRandall Stewart * we proceed again to get the INP_WLOCK() (this preserves proper 291eadd00f8SRandall Stewart * lock order). After acquiring the INP_WLOCK we must check if someone 292eadd00f8SRandall Stewart * else deleted the pcb i.e. the inp_flags check. 293eadd00f8SRandall Stewart * If so we return 1 otherwise we return 0. 2940fa047b9SRandall Stewart * 295eadd00f8SRandall Stewart * No matter what the tcp_inpinfo_lock_add() function 2960fa047b9SRandall Stewart * returns the caller must afterwards call tcp_inpinfo_lock_del() 2970fa047b9SRandall Stewart * to drop the locks and reference properly. 2980fa047b9SRandall Stewart */ 2990fa047b9SRandall Stewart 300b07fef50SRandall Stewart int 301b07fef50SRandall Stewart tcp_inpinfo_lock_add(struct inpcb *inp) 302b07fef50SRandall Stewart { 303b07fef50SRandall Stewart in_pcbref(inp); 304b07fef50SRandall Stewart INP_WUNLOCK(inp); 305b07fef50SRandall Stewart INP_INFO_RLOCK(&V_tcbinfo); 306b07fef50SRandall Stewart INP_WLOCK(inp); 307b07fef50SRandall Stewart if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 308b07fef50SRandall Stewart return(1); 309b07fef50SRandall Stewart } 310b07fef50SRandall Stewart return(0); 311b07fef50SRandall Stewart 312b07fef50SRandall Stewart } 313b07fef50SRandall Stewart 314b07fef50SRandall Stewart void 315b07fef50SRandall Stewart tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp) 316b07fef50SRandall Stewart { 317b07fef50SRandall Stewart INP_INFO_RUNLOCK(&V_tcbinfo); 318b07fef50SRandall Stewart if (inp && (tp == NULL)) { 319b07fef50SRandall Stewart /* 320b07fef50SRandall Stewart * If tcp_close/drop() gets called and tp 321b07fef50SRandall Stewart * returns NULL, then the function dropped 322b07fef50SRandall Stewart * the inp lock, we hold a reference keeping 323b07fef50SRandall Stewart * this around, so we must re-aquire the 324b07fef50SRandall Stewart * INP_WLOCK() in order to proceed with 325b07fef50SRandall Stewart * our dropping the inp reference. 326b07fef50SRandall Stewart */ 327b07fef50SRandall Stewart INP_WLOCK(inp); 328b07fef50SRandall Stewart } 329b07fef50SRandall Stewart if (inp && in_pcbrele_wlocked(inp) == 0) 330b07fef50SRandall Stewart INP_WUNLOCK(inp); 331b07fef50SRandall Stewart } 332b07fef50SRandall Stewart 33385d94372SRobert Watson void 33485d94372SRobert Watson tcp_timer_2msl(void *xtp) 3359b8b58e0SJonathan Lemon { 33685d94372SRobert Watson struct tcpcb *tp = xtp; 33785d94372SRobert Watson struct inpcb *inp; 3388b615593SMarko Zec CURVNET_SET(tp->t_vnet); 3399b8b58e0SJonathan Lemon #ifdef TCPDEBUG 3409b8b58e0SJonathan Lemon int ostate; 3419b8b58e0SJonathan Lemon 3429b8b58e0SJonathan Lemon ostate = tp->t_state; 3439b8b58e0SJonathan Lemon #endif 34485d94372SRobert Watson inp = tp->t_inpcb; 3455571f9cfSJulien Charbon KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); 3468501a69cSRobert Watson INP_WLOCK(inp); 34785d94372SRobert Watson tcp_free_sackholes(tp); 348655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_2msl) || 349e2f2059fSMike Silbersack !callout_active(&tp->t_timers->tt_2msl)) { 3508501a69cSRobert Watson INP_WUNLOCK(tp->t_inpcb); 3518b615593SMarko Zec CURVNET_RESTORE(); 35285d94372SRobert Watson return; 35385d94372SRobert Watson } 354e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_2msl); 355655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 356655f934bSMikolaj Golub INP_WUNLOCK(inp); 357655f934bSMikolaj Golub CURVNET_RESTORE(); 358655f934bSMikolaj Golub return; 359655f934bSMikolaj Golub } 3605571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, 3615571f9cfSJulien Charbon ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); 36285d94372SRobert Watson /* 363df8bae1dSRodney W. Grimes * 2 MSL timeout in shutdown went off. If we're closed but 364df8bae1dSRodney W. Grimes * still waiting for peer to close and connection has been idle 36531a7749dSJulien Charbon * too long delete connection control block. Otherwise, check 36631a7749dSJulien Charbon * again in a bit. 36731a7749dSJulien Charbon * 36831a7749dSJulien Charbon * If in TIME_WAIT state just ignore as this timeout is handled in 36931a7749dSJulien Charbon * tcp_tw_2msl_scan(). 3707c72af87SMohan Srinivasan * 3717c72af87SMohan Srinivasan * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 3727c72af87SMohan Srinivasan * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 3737c72af87SMohan Srinivasan * Ignore fact that there were recent incoming segments. 374df8bae1dSRodney W. Grimes */ 37531a7749dSJulien Charbon if ((inp->inp_flags & INP_TIMEWAIT) != 0) { 37631a7749dSJulien Charbon INP_WUNLOCK(inp); 37731a7749dSJulien Charbon CURVNET_RESTORE(); 37831a7749dSJulien Charbon return; 37931a7749dSJulien Charbon } 3807c72af87SMohan Srinivasan if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 38185d94372SRobert Watson tp->t_inpcb && tp->t_inpcb->inp_socket && 3827c72af87SMohan Srinivasan (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 38378b50714SRobert Watson TCPSTAT_INC(tcps_finwait2_drops); 384b07fef50SRandall Stewart if (tcp_inpinfo_lock_add(inp)) { 385b07fef50SRandall Stewart tcp_inpinfo_lock_del(inp, tp); 386b07fef50SRandall Stewart goto out; 387b07fef50SRandall Stewart } 38885d94372SRobert Watson tp = tcp_close(tp); 389b07fef50SRandall Stewart tcp_inpinfo_lock_del(inp, tp); 390b07fef50SRandall Stewart goto out; 3917c72af87SMohan Srinivasan } else { 392d6de19acSJulien Charbon if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) { 393b07fef50SRandall Stewart callout_reset(&tp->t_timers->tt_2msl, 394b07fef50SRandall Stewart TP_KEEPINTVL(tp), tcp_timer_2msl, tp); 395b07fef50SRandall Stewart } else { 396b07fef50SRandall Stewart if (tcp_inpinfo_lock_add(inp)) { 397b07fef50SRandall Stewart tcp_inpinfo_lock_del(inp, tp); 398b07fef50SRandall Stewart goto out; 399d6de19acSJulien Charbon } 40085d94372SRobert Watson tp = tcp_close(tp); 401b07fef50SRandall Stewart tcp_inpinfo_lock_del(inp, tp); 402b07fef50SRandall Stewart goto out; 403b07fef50SRandall Stewart } 4047c72af87SMohan Srinivasan } 405df8bae1dSRodney W. Grimes 4069b8b58e0SJonathan Lemon #ifdef TCPDEBUG 407586b4a0eSKonstantin Belousov if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 408fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 4099b8b58e0SJonathan Lemon PRU_SLOWTIMO); 4109b8b58e0SJonathan Lemon #endif 4115d06879aSGeorge V. Neville-Neil TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); 4125d06879aSGeorge V. Neville-Neil 41385d94372SRobert Watson if (tp != NULL) 4148501a69cSRobert Watson INP_WUNLOCK(inp); 415b07fef50SRandall Stewart out: 4168b615593SMarko Zec CURVNET_RESTORE(); 4179b8b58e0SJonathan Lemon } 4189b8b58e0SJonathan Lemon 41985d94372SRobert Watson void 42085d94372SRobert Watson tcp_timer_keep(void *xtp) 4219b8b58e0SJonathan Lemon { 42285d94372SRobert Watson struct tcpcb *tp = xtp; 42308517d53SMike Silbersack struct tcptemp *t_template; 42485d94372SRobert Watson struct inpcb *inp; 4258b615593SMarko Zec CURVNET_SET(tp->t_vnet); 4269b8b58e0SJonathan Lemon #ifdef TCPDEBUG 4279b8b58e0SJonathan Lemon int ostate; 4289b8b58e0SJonathan Lemon 4299b8b58e0SJonathan Lemon ostate = tp->t_state; 4309b8b58e0SJonathan Lemon #endif 43185d94372SRobert Watson inp = tp->t_inpcb; 4325571f9cfSJulien Charbon KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); 4338501a69cSRobert Watson INP_WLOCK(inp); 434655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_keep) || 435655f934bSMikolaj Golub !callout_active(&tp->t_timers->tt_keep)) { 4368501a69cSRobert Watson INP_WUNLOCK(inp); 4378b615593SMarko Zec CURVNET_RESTORE(); 43885d94372SRobert Watson return; 43985d94372SRobert Watson } 440e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_keep); 441655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 442655f934bSMikolaj Golub INP_WUNLOCK(inp); 443655f934bSMikolaj Golub CURVNET_RESTORE(); 444655f934bSMikolaj Golub return; 445655f934bSMikolaj Golub } 4465571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, 4475571f9cfSJulien Charbon ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); 4486d172f58SJonathan T. Looney 4496d172f58SJonathan T. Looney /* 4506d172f58SJonathan T. Looney * Because we don't regularly reset the keepalive callout in 4516d172f58SJonathan T. Looney * the ESTABLISHED state, it may be that we don't actually need 4526d172f58SJonathan T. Looney * to send a keepalive yet. If that occurs, schedule another 4536d172f58SJonathan T. Looney * call for the next time the keepalive timer might expire. 4546d172f58SJonathan T. Looney */ 4556d172f58SJonathan T. Looney if (TCPS_HAVEESTABLISHED(tp->t_state)) { 4566d172f58SJonathan T. Looney u_int idletime; 4576d172f58SJonathan T. Looney 4586d172f58SJonathan T. Looney idletime = ticks - tp->t_rcvtime; 4596d172f58SJonathan T. Looney if (idletime < TP_KEEPIDLE(tp)) { 4606d172f58SJonathan T. Looney callout_reset(&tp->t_timers->tt_keep, 4616d172f58SJonathan T. Looney TP_KEEPIDLE(tp) - idletime, tcp_timer_keep, tp); 4626d172f58SJonathan T. Looney INP_WUNLOCK(inp); 4636d172f58SJonathan T. Looney CURVNET_RESTORE(); 4646d172f58SJonathan T. Looney return; 4656d172f58SJonathan T. Looney } 4666d172f58SJonathan T. Looney } 4676d172f58SJonathan T. Looney 4689b8b58e0SJonathan Lemon /* 4699b8b58e0SJonathan Lemon * Keep-alive timer went off; send something 4709b8b58e0SJonathan Lemon * or drop connection if idle for too long. 4719b8b58e0SJonathan Lemon */ 47278b50714SRobert Watson TCPSTAT_INC(tcps_keeptimeo); 4739b8b58e0SJonathan Lemon if (tp->t_state < TCPS_ESTABLISHED) 4749b8b58e0SJonathan Lemon goto dropit; 4752a074620SSam Leffler if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 4769b8b58e0SJonathan Lemon tp->t_state <= TCPS_CLOSING) { 4779077f387SGleb Smirnoff if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) 4789b8b58e0SJonathan Lemon goto dropit; 4799b8b58e0SJonathan Lemon /* 4809b8b58e0SJonathan Lemon * Send a packet designed to force a response 4819b8b58e0SJonathan Lemon * if the peer is up and reachable: 4829b8b58e0SJonathan Lemon * either an ACK if the connection is still alive, 4839b8b58e0SJonathan Lemon * or an RST if the peer has closed the connection 4849b8b58e0SJonathan Lemon * due to timeout or reboot. 4859b8b58e0SJonathan Lemon * Using sequence number tp->snd_una-1 4869b8b58e0SJonathan Lemon * causes the transmitted zero-length segment 4879b8b58e0SJonathan Lemon * to lie outside the receive window; 4889b8b58e0SJonathan Lemon * by the protocol spec, this requires the 4899b8b58e0SJonathan Lemon * correspondent TCP to respond. 4909b8b58e0SJonathan Lemon */ 49178b50714SRobert Watson TCPSTAT_INC(tcps_keepprobe); 49279909384SJonathan Lemon t_template = tcpip_maketemplate(inp); 49308517d53SMike Silbersack if (t_template) { 49408517d53SMike Silbersack tcp_respond(tp, t_template->tt_ipgen, 49508517d53SMike Silbersack &t_template->tt_t, (struct mbuf *)NULL, 4969b8b58e0SJonathan Lemon tp->rcv_nxt, tp->snd_una - 1, 0); 49753640b0eSRobert Watson free(t_template, M_TEMP); 49808517d53SMike Silbersack } 499b07fef50SRandall Stewart callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), 500b07fef50SRandall Stewart tcp_timer_keep, tp); 501b07fef50SRandall Stewart } else 502b07fef50SRandall Stewart callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), 503b07fef50SRandall Stewart tcp_timer_keep, tp); 5049b8b58e0SJonathan Lemon 5059b8b58e0SJonathan Lemon #ifdef TCPDEBUG 5062a074620SSam Leffler if (inp->inp_socket->so_options & SO_DEBUG) 507fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 5089b8b58e0SJonathan Lemon PRU_SLOWTIMO); 5099b8b58e0SJonathan Lemon #endif 5105d06879aSGeorge V. Neville-Neil TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); 5118501a69cSRobert Watson INP_WUNLOCK(inp); 5128b615593SMarko Zec CURVNET_RESTORE(); 51385d94372SRobert Watson return; 5149b8b58e0SJonathan Lemon 5159b8b58e0SJonathan Lemon dropit: 51678b50714SRobert Watson TCPSTAT_INC(tcps_keepdrops); 517b07fef50SRandall Stewart 518b07fef50SRandall Stewart if (tcp_inpinfo_lock_add(inp)) { 519b07fef50SRandall Stewart tcp_inpinfo_lock_del(inp, tp); 520b07fef50SRandall Stewart goto out; 521b07fef50SRandall Stewart } 52285d94372SRobert Watson tp = tcp_drop(tp, ETIMEDOUT); 52385d94372SRobert Watson 52485d94372SRobert Watson #ifdef TCPDEBUG 52585d94372SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 52685d94372SRobert Watson tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 52785d94372SRobert Watson PRU_SLOWTIMO); 52885d94372SRobert Watson #endif 5295d06879aSGeorge V. Neville-Neil TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); 530b07fef50SRandall Stewart tcp_inpinfo_lock_del(inp, tp); 531b07fef50SRandall Stewart out: 5328b615593SMarko Zec CURVNET_RESTORE(); 5339b8b58e0SJonathan Lemon } 5349b8b58e0SJonathan Lemon 53585d94372SRobert Watson void 53685d94372SRobert Watson tcp_timer_persist(void *xtp) 5379b8b58e0SJonathan Lemon { 53885d94372SRobert Watson struct tcpcb *tp = xtp; 53985d94372SRobert Watson struct inpcb *inp; 5408b615593SMarko Zec CURVNET_SET(tp->t_vnet); 5419b8b58e0SJonathan Lemon #ifdef TCPDEBUG 5429b8b58e0SJonathan Lemon int ostate; 5439b8b58e0SJonathan Lemon 5449b8b58e0SJonathan Lemon ostate = tp->t_state; 5459b8b58e0SJonathan Lemon #endif 54685d94372SRobert Watson inp = tp->t_inpcb; 5475571f9cfSJulien Charbon KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); 5488501a69cSRobert Watson INP_WLOCK(inp); 549655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_persist) || 550655f934bSMikolaj Golub !callout_active(&tp->t_timers->tt_persist)) { 5518501a69cSRobert Watson INP_WUNLOCK(inp); 5528b615593SMarko Zec CURVNET_RESTORE(); 55385d94372SRobert Watson return; 55485d94372SRobert Watson } 555e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_persist); 556655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 557655f934bSMikolaj Golub INP_WUNLOCK(inp); 558655f934bSMikolaj Golub CURVNET_RESTORE(); 559655f934bSMikolaj Golub return; 560655f934bSMikolaj Golub } 5615571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, 5625571f9cfSJulien Charbon ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); 5639b8b58e0SJonathan Lemon /* 564a4641f4eSPedro F. Giffuni * Persistence timer into zero window. 5659b8b58e0SJonathan Lemon * Force a byte to be output, if possible. 5669b8b58e0SJonathan Lemon */ 56778b50714SRobert Watson TCPSTAT_INC(tcps_persisttimeo); 5689b8b58e0SJonathan Lemon /* 5699b8b58e0SJonathan Lemon * Hack: if the peer is dead/unreachable, we do not 5709b8b58e0SJonathan Lemon * time out if the window is closed. After a full 5719b8b58e0SJonathan Lemon * backoff, drop the connection if the idle time 5729b8b58e0SJonathan Lemon * (no responses to probes) reaches the maximum 5739b8b58e0SJonathan Lemon * backoff that we would use if retransmitting. 5749b8b58e0SJonathan Lemon */ 5759b8b58e0SJonathan Lemon if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 5766b0c5521SJohn Baldwin (ticks - tp->t_rcvtime >= tcp_maxpersistidle || 5776b0c5521SJohn Baldwin ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 57878b50714SRobert Watson TCPSTAT_INC(tcps_persistdrop); 579b07fef50SRandall Stewart if (tcp_inpinfo_lock_add(inp)) { 580b07fef50SRandall Stewart tcp_inpinfo_lock_del(inp, tp); 581b07fef50SRandall Stewart goto out; 582b07fef50SRandall Stewart } 58385d94372SRobert Watson tp = tcp_drop(tp, ETIMEDOUT); 584b07fef50SRandall Stewart tcp_inpinfo_lock_del(inp, tp); 58585d94372SRobert Watson goto out; 5869b8b58e0SJonathan Lemon } 587322181c9SAndre Oppermann /* 588322181c9SAndre Oppermann * If the user has closed the socket then drop a persisting 589322181c9SAndre Oppermann * connection after a much reduced timeout. 590322181c9SAndre Oppermann */ 591322181c9SAndre Oppermann if (tp->t_state > TCPS_CLOSE_WAIT && 592322181c9SAndre Oppermann (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { 593322181c9SAndre Oppermann TCPSTAT_INC(tcps_persistdrop); 594b07fef50SRandall Stewart if (tcp_inpinfo_lock_add(inp)) { 595b07fef50SRandall Stewart tcp_inpinfo_lock_del(inp, tp); 596b07fef50SRandall Stewart goto out; 597b07fef50SRandall Stewart } 598322181c9SAndre Oppermann tp = tcp_drop(tp, ETIMEDOUT); 599b07fef50SRandall Stewart tcp_inpinfo_lock_del(inp, tp); 600322181c9SAndre Oppermann goto out; 601322181c9SAndre Oppermann } 6029b8b58e0SJonathan Lemon tcp_setpersist(tp); 6032cdbfa66SPaul Saab tp->t_flags |= TF_FORCEDATA; 60455bceb1eSRandall Stewart (void) tp->t_fb->tfb_tcp_output(tp); 6052cdbfa66SPaul Saab tp->t_flags &= ~TF_FORCEDATA; 6069b8b58e0SJonathan Lemon 6079b8b58e0SJonathan Lemon #ifdef TCPDEBUG 608ffb761f6SGleb Smirnoff if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 609ffb761f6SGleb Smirnoff tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 6109b8b58e0SJonathan Lemon #endif 6115d06879aSGeorge V. Neville-Neil TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); 6128501a69cSRobert Watson INP_WUNLOCK(inp); 613b07fef50SRandall Stewart out: 6148b615593SMarko Zec CURVNET_RESTORE(); 6159b8b58e0SJonathan Lemon } 6169b8b58e0SJonathan Lemon 61785d94372SRobert Watson void 61885d94372SRobert Watson tcp_timer_rexmt(void * xtp) 6199b8b58e0SJonathan Lemon { 62085d94372SRobert Watson struct tcpcb *tp = xtp; 6218b615593SMarko Zec CURVNET_SET(tp->t_vnet); 6229b8b58e0SJonathan Lemon int rexmt; 62385d94372SRobert Watson struct inpcb *inp; 6249b8b58e0SJonathan Lemon #ifdef TCPDEBUG 6259b8b58e0SJonathan Lemon int ostate; 6269b8b58e0SJonathan Lemon 6279b8b58e0SJonathan Lemon ostate = tp->t_state; 6289b8b58e0SJonathan Lemon #endif 62985d94372SRobert Watson inp = tp->t_inpcb; 6305571f9cfSJulien Charbon KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); 6318501a69cSRobert Watson INP_WLOCK(inp); 632655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_rexmt) || 633655f934bSMikolaj Golub !callout_active(&tp->t_timers->tt_rexmt)) { 6348501a69cSRobert Watson INP_WUNLOCK(inp); 6358b615593SMarko Zec CURVNET_RESTORE(); 63685d94372SRobert Watson return; 63785d94372SRobert Watson } 638e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_rexmt); 639655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 640655f934bSMikolaj Golub INP_WUNLOCK(inp); 641655f934bSMikolaj Golub CURVNET_RESTORE(); 642655f934bSMikolaj Golub return; 643655f934bSMikolaj Golub } 6445571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, 6455571f9cfSJulien Charbon ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); 6466d90faf3SPaul Saab tcp_free_sackholes(tp); 6475105a92cSRandall Stewart if (tp->t_fb->tfb_tcp_rexmit_tmr) { 6485105a92cSRandall Stewart /* The stack has a timer action too. */ 6495105a92cSRandall Stewart (*tp->t_fb->tfb_tcp_rexmit_tmr)(tp); 6505105a92cSRandall Stewart } 651df8bae1dSRodney W. Grimes /* 652df8bae1dSRodney W. Grimes * Retransmission timer went off. Message has not 653df8bae1dSRodney W. Grimes * been acked within retransmit interval. Back off 654df8bae1dSRodney W. Grimes * to a longer retransmit interval and retransmit one segment. 655df8bae1dSRodney W. Grimes */ 656df8bae1dSRodney W. Grimes if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 657df8bae1dSRodney W. Grimes tp->t_rxtshift = TCP_MAXRXTSHIFT; 65878b50714SRobert Watson TCPSTAT_INC(tcps_timeoutdrop); 659b07fef50SRandall Stewart if (tcp_inpinfo_lock_add(inp)) { 660b07fef50SRandall Stewart tcp_inpinfo_lock_del(inp, tp); 66185d94372SRobert Watson goto out; 6629b8b58e0SJonathan Lemon } 663b07fef50SRandall Stewart tp = tcp_drop(tp, tp->t_softerror ? 664b07fef50SRandall Stewart tp->t_softerror : ETIMEDOUT); 665b07fef50SRandall Stewart tcp_inpinfo_lock_del(inp, tp); 666b07fef50SRandall Stewart goto out; 667b07fef50SRandall Stewart } 668cf8f04f4SAndre Oppermann if (tp->t_state == TCPS_SYN_SENT) { 669cf8f04f4SAndre Oppermann /* 670cf8f04f4SAndre Oppermann * If the SYN was retransmitted, indicate CWND to be 671cf8f04f4SAndre Oppermann * limited to 1 segment in cc_conn_init(). 672cf8f04f4SAndre Oppermann */ 673cf8f04f4SAndre Oppermann tp->snd_cwnd = 1; 674cf8f04f4SAndre Oppermann } else if (tp->t_rxtshift == 1) { 6759b8b58e0SJonathan Lemon /* 6769b8b58e0SJonathan Lemon * first retransmit; record ssthresh and cwnd so they can 6779b8b58e0SJonathan Lemon * be recovered if this turns out to be a "bad" retransmit. 6789b8b58e0SJonathan Lemon * A retransmit is considered "bad" if an ACK for this 6799b8b58e0SJonathan Lemon * segment is received within RTT/2 interval; the assumption 6809b8b58e0SJonathan Lemon * here is that the ACK was already in flight. See 6819b8b58e0SJonathan Lemon * "On Estimating End-to-End Network Path Properties" by 6829b8b58e0SJonathan Lemon * Allman and Paxson for more details. 6839b8b58e0SJonathan Lemon */ 6849b8b58e0SJonathan Lemon tp->snd_cwnd_prev = tp->snd_cwnd; 6859b8b58e0SJonathan Lemon tp->snd_ssthresh_prev = tp->snd_ssthresh; 6869d11646dSJeffrey Hsu tp->snd_recover_prev = tp->snd_recover; 687dbc42409SLawrence Stewart if (IN_FASTRECOVERY(tp->t_flags)) 6889d11646dSJeffrey Hsu tp->t_flags |= TF_WASFRECOVERY; 6899d11646dSJeffrey Hsu else 6909d11646dSJeffrey Hsu tp->t_flags &= ~TF_WASFRECOVERY; 691dbc42409SLawrence Stewart if (IN_CONGRECOVERY(tp->t_flags)) 692dbc42409SLawrence Stewart tp->t_flags |= TF_WASCRECOVERY; 693dbc42409SLawrence Stewart else 694dbc42409SLawrence Stewart tp->t_flags &= ~TF_WASCRECOVERY; 6959b8b58e0SJonathan Lemon tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 696672dc4aeSJohn Baldwin tp->t_flags |= TF_PREVVALID; 697672dc4aeSJohn Baldwin } else 698672dc4aeSJohn Baldwin tp->t_flags &= ~TF_PREVVALID; 69978b50714SRobert Watson TCPSTAT_INC(tcps_rexmttimeo); 700281a0fd4SPatrick Kelsey if ((tp->t_state == TCPS_SYN_SENT) || 701281a0fd4SPatrick Kelsey (tp->t_state == TCPS_SYN_RECEIVED)) 702f4748ef5SAndre Oppermann rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift]; 7037d42e30cSJonathan Lemon else 704df8bae1dSRodney W. Grimes rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 705df8bae1dSRodney W. Grimes TCPT_RANGESET(tp->t_rxtcur, rexmt, 706df8bae1dSRodney W. Grimes tp->t_rttmin, TCPTV_REXMTMAX); 707f6f6703fSSean Bruno 708882ac53eSSean Bruno /* 709882ac53eSSean Bruno * We enter the path for PLMTUD if connection is established or, if 710882ac53eSSean Bruno * connection is FIN_WAIT_1 status, reason for the last is that if 711882ac53eSSean Bruno * amount of data we send is very small, we could send it in couple of 712882ac53eSSean Bruno * packets and process straight to FIN. In that case we won't catch 713882ac53eSSean Bruno * ESTABLISHED state. 714882ac53eSSean Bruno */ 715882ac53eSSean Bruno if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED)) 716882ac53eSSean Bruno || (tp->t_state == TCPS_FIN_WAIT_1))) { 717f6f6703fSSean Bruno #ifdef INET6 718f6f6703fSSean Bruno int isipv6; 719f6f6703fSSean Bruno #endif 720f6f6703fSSean Bruno 721adf43a92SHiren Panchasara /* 722adf43a92SHiren Panchasara * Idea here is that at each stage of mtu probe (usually, 1448 723adf43a92SHiren Panchasara * -> 1188 -> 524) should be given 2 chances to recover before 724adf43a92SHiren Panchasara * further clamping down. 'tp->t_rxtshift % 2 == 0' should 725adf43a92SHiren Panchasara * take care of that. 726adf43a92SHiren Panchasara */ 727f6f6703fSSean Bruno if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) == 728f6f6703fSSean Bruno (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) && 729adf43a92SHiren Panchasara (tp->t_rxtshift >= 2 && tp->t_rxtshift % 2 == 0)) { 730f6f6703fSSean Bruno /* 731f6f6703fSSean Bruno * Enter Path MTU Black-hole Detection mechanism: 732f6f6703fSSean Bruno * - Disable Path MTU Discovery (IP "DF" bit). 733f6f6703fSSean Bruno * - Reduce MTU to lower value than what we 734f6f6703fSSean Bruno * negotiated with peer. 735f6f6703fSSean Bruno */ 736f6f6703fSSean Bruno /* Record that we may have found a black hole. */ 737f6f6703fSSean Bruno tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE; 738f6f6703fSSean Bruno 739f6f6703fSSean Bruno /* Keep track of previous MSS. */ 7400c39d38dSGleb Smirnoff tp->t_pmtud_saved_maxseg = tp->t_maxseg; 741f6f6703fSSean Bruno 742f6f6703fSSean Bruno /* 743f6f6703fSSean Bruno * Reduce the MSS to blackhole value or to the default 744f6f6703fSSean Bruno * in an attempt to retransmit. 745f6f6703fSSean Bruno */ 746f6f6703fSSean Bruno #ifdef INET6 747f6f6703fSSean Bruno isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0; 748f6f6703fSSean Bruno if (isipv6 && 7490c39d38dSGleb Smirnoff tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss) { 750f6f6703fSSean Bruno /* Use the sysctl tuneable blackhole MSS. */ 7510c39d38dSGleb Smirnoff tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss; 752*32a04bb8SSean Bruno TCPSTAT_INC(tcps_pmtud_blackhole_activated); 753f6f6703fSSean Bruno } else if (isipv6) { 754f6f6703fSSean Bruno /* Use the default MSS. */ 7550c39d38dSGleb Smirnoff tp->t_maxseg = V_tcp_v6mssdflt; 756f6f6703fSSean Bruno /* 757f6f6703fSSean Bruno * Disable Path MTU Discovery when we switch to 758f6f6703fSSean Bruno * minmss. 759f6f6703fSSean Bruno */ 760f6f6703fSSean Bruno tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; 761*32a04bb8SSean Bruno TCPSTAT_INC(tcps_pmtud_blackhole_activated_min_mss); 762f6f6703fSSean Bruno } 763f6f6703fSSean Bruno #endif 764f6f6703fSSean Bruno #if defined(INET6) && defined(INET) 765f6f6703fSSean Bruno else 766f6f6703fSSean Bruno #endif 767f6f6703fSSean Bruno #ifdef INET 7680c39d38dSGleb Smirnoff if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss) { 769f6f6703fSSean Bruno /* Use the sysctl tuneable blackhole MSS. */ 7700c39d38dSGleb Smirnoff tp->t_maxseg = V_tcp_pmtud_blackhole_mss; 771*32a04bb8SSean Bruno TCPSTAT_INC(tcps_pmtud_blackhole_activated); 772f6f6703fSSean Bruno } else { 773f6f6703fSSean Bruno /* Use the default MSS. */ 7740c39d38dSGleb Smirnoff tp->t_maxseg = V_tcp_mssdflt; 775f6f6703fSSean Bruno /* 776f6f6703fSSean Bruno * Disable Path MTU Discovery when we switch to 777f6f6703fSSean Bruno * minmss. 778f6f6703fSSean Bruno */ 779f6f6703fSSean Bruno tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; 780*32a04bb8SSean Bruno TCPSTAT_INC(tcps_pmtud_blackhole_activated_min_mss); 781f6f6703fSSean Bruno } 782f6f6703fSSean Bruno #endif 783f6f6703fSSean Bruno /* 784f6f6703fSSean Bruno * Reset the slow-start flight size 785f6f6703fSSean Bruno * as it may depend on the new MSS. 786f6f6703fSSean Bruno */ 787f6f6703fSSean Bruno if (CC_ALGO(tp)->conn_init != NULL) 788f6f6703fSSean Bruno CC_ALGO(tp)->conn_init(tp->ccv); 789f6f6703fSSean Bruno } else { 790f6f6703fSSean Bruno /* 791f6f6703fSSean Bruno * If further retransmissions are still unsuccessful 792f6f6703fSSean Bruno * with a lowered MTU, maybe this isn't a blackhole and 793f6f6703fSSean Bruno * we restore the previous MSS and blackhole detection 794f6f6703fSSean Bruno * flags. 795adf43a92SHiren Panchasara * The limit '6' is determined by giving each probe 796adf43a92SHiren Panchasara * stage (1448, 1188, 524) 2 chances to recover. 797f6f6703fSSean Bruno */ 798f6f6703fSSean Bruno if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) && 799adf43a92SHiren Panchasara (tp->t_rxtshift > 6)) { 800f6f6703fSSean Bruno tp->t_flags2 |= TF2_PLPMTU_PMTUD; 801f6f6703fSSean Bruno tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; 8020c39d38dSGleb Smirnoff tp->t_maxseg = tp->t_pmtud_saved_maxseg; 803*32a04bb8SSean Bruno TCPSTAT_INC(tcps_pmtud_blackhole_failed); 804f6f6703fSSean Bruno /* 805f6f6703fSSean Bruno * Reset the slow-start flight size as it 806f6f6703fSSean Bruno * may depend on the new MSS. 807f6f6703fSSean Bruno */ 808f6f6703fSSean Bruno if (CC_ALGO(tp)->conn_init != NULL) 809f6f6703fSSean Bruno CC_ALGO(tp)->conn_init(tp->ccv); 810f6f6703fSSean Bruno } 811f6f6703fSSean Bruno } 812f6f6703fSSean Bruno } 813f6f6703fSSean Bruno 814df8bae1dSRodney W. Grimes /* 81577339e1cSAndre Oppermann * Disable RFC1323 and SACK if we haven't got any response to 8167ceb7783SJesper Skriver * our third SYN to work-around some broken terminal servers 8177ceb7783SJesper Skriver * (most of which have hopefully been retired) that have bad VJ 8187ceb7783SJesper Skriver * header compression code which trashes TCP segments containing 8197ceb7783SJesper Skriver * unknown-to-them TCP options. 8207ceb7783SJesper Skriver */ 8216c0ef895SJohn Baldwin if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) && 8226c0ef895SJohn Baldwin (tp->t_rxtshift == 3)) 823c4ab59c1SAndre Oppermann tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT); 8247ceb7783SJesper Skriver /* 8255ede40dcSRyan Stone * If we backed off this far, notify the L3 protocol that we're having 8265ede40dcSRyan Stone * connection problems. 827df8bae1dSRodney W. Grimes */ 8285ede40dcSRyan Stone if (tp->t_rxtshift > TCP_RTT_INVALIDATE) { 829fb59c426SYoshinobu Inoue #ifdef INET6 830fb59c426SYoshinobu Inoue if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 831fb59c426SYoshinobu Inoue in6_losing(tp->t_inpcb); 83284cc0778SGeorge V. Neville-Neil else 833fb59c426SYoshinobu Inoue #endif 83484cc0778SGeorge V. Neville-Neil in_losing(tp->t_inpcb); 835df8bae1dSRodney W. Grimes } 836df8bae1dSRodney W. Grimes tp->snd_nxt = tp->snd_una; 8379d11646dSJeffrey Hsu tp->snd_recover = tp->snd_max; 83846f58482SJonathan Lemon /* 83974b48c1dSAndras Olah * Force a segment to be sent. 84074b48c1dSAndras Olah */ 84174b48c1dSAndras Olah tp->t_flags |= TF_ACKNOW; 84274b48c1dSAndras Olah /* 843df8bae1dSRodney W. Grimes * If timing a segment in this window, stop the timer. 844df8bae1dSRodney W. Grimes */ 8459b8b58e0SJonathan Lemon tp->t_rtttime = 0; 846dbc42409SLawrence Stewart 847b5af1b88SLawrence Stewart cc_cong_signal(tp, NULL, CC_RTO); 848dbc42409SLawrence Stewart 84955bceb1eSRandall Stewart (void) tp->t_fb->tfb_tcp_output(tp); 850df8bae1dSRodney W. Grimes 8519b8b58e0SJonathan Lemon #ifdef TCPDEBUG 8521c53f806SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 853fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 8549b8b58e0SJonathan Lemon PRU_SLOWTIMO); 855df8bae1dSRodney W. Grimes #endif 8565d06879aSGeorge V. Neville-Neil TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); 8578501a69cSRobert Watson INP_WUNLOCK(inp); 858b07fef50SRandall Stewart out: 8598b615593SMarko Zec CURVNET_RESTORE(); 86085d94372SRobert Watson } 86185d94372SRobert Watson 86285d94372SRobert Watson void 8635571f9cfSJulien Charbon tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta) 86485d94372SRobert Watson { 86585d94372SRobert Watson struct callout *t_callout; 86618832f1fSJulien Charbon timeout_t *f_callout; 86787aedea4SKip Macy struct inpcb *inp = tp->t_inpcb; 868883831c6SAdrian Chadd int cpu = inp_to_cpuid(inp); 86985d94372SRobert Watson 87009fe6320SNavdeep Parhar #ifdef TCP_OFFLOAD 87109fe6320SNavdeep Parhar if (tp->t_flags & TF_TOE) 87209fe6320SNavdeep Parhar return; 87309fe6320SNavdeep Parhar #endif 87409fe6320SNavdeep Parhar 8755571f9cfSJulien Charbon if (tp->t_timers->tt_flags & TT_STOPPED) 8765571f9cfSJulien Charbon return; 8775571f9cfSJulien Charbon 87885d94372SRobert Watson switch (timer_type) { 87985d94372SRobert Watson case TT_DELACK: 880e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_delack; 88185d94372SRobert Watson f_callout = tcp_timer_delack; 88285d94372SRobert Watson break; 88385d94372SRobert Watson case TT_REXMT: 884e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_rexmt; 88585d94372SRobert Watson f_callout = tcp_timer_rexmt; 88685d94372SRobert Watson break; 88785d94372SRobert Watson case TT_PERSIST: 888e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_persist; 88985d94372SRobert Watson f_callout = tcp_timer_persist; 89085d94372SRobert Watson break; 89185d94372SRobert Watson case TT_KEEP: 892e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_keep; 89385d94372SRobert Watson f_callout = tcp_timer_keep; 89485d94372SRobert Watson break; 89585d94372SRobert Watson case TT_2MSL: 896e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_2msl; 89785d94372SRobert Watson f_callout = tcp_timer_2msl; 89885d94372SRobert Watson break; 89985d94372SRobert Watson default: 90055bceb1eSRandall Stewart if (tp->t_fb->tfb_tcp_timer_activate) { 90155bceb1eSRandall Stewart tp->t_fb->tfb_tcp_timer_activate(tp, timer_type, delta); 90255bceb1eSRandall Stewart return; 90355bceb1eSRandall Stewart } 90403374917SJulien Charbon panic("tp %p bad timer_type %#x", tp, timer_type); 90585d94372SRobert Watson } 90685d94372SRobert Watson if (delta == 0) { 907b07fef50SRandall Stewart callout_stop(t_callout); 90885d94372SRobert Watson } else { 90987aedea4SKip Macy callout_reset_on(t_callout, delta, f_callout, tp, cpu); 91085d94372SRobert Watson } 91185d94372SRobert Watson } 91285d94372SRobert Watson 91385d94372SRobert Watson int 9145571f9cfSJulien Charbon tcp_timer_active(struct tcpcb *tp, uint32_t timer_type) 91585d94372SRobert Watson { 91685d94372SRobert Watson struct callout *t_callout; 91785d94372SRobert Watson 91885d94372SRobert Watson switch (timer_type) { 91985d94372SRobert Watson case TT_DELACK: 920e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_delack; 92185d94372SRobert Watson break; 92285d94372SRobert Watson case TT_REXMT: 923e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_rexmt; 92485d94372SRobert Watson break; 92585d94372SRobert Watson case TT_PERSIST: 926e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_persist; 92785d94372SRobert Watson break; 92885d94372SRobert Watson case TT_KEEP: 929e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_keep; 93085d94372SRobert Watson break; 93185d94372SRobert Watson case TT_2MSL: 932e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_2msl; 93385d94372SRobert Watson break; 93485d94372SRobert Watson default: 93555bceb1eSRandall Stewart if (tp->t_fb->tfb_tcp_timer_active) { 93655bceb1eSRandall Stewart return(tp->t_fb->tfb_tcp_timer_active(tp, timer_type)); 93755bceb1eSRandall Stewart } 93803374917SJulien Charbon panic("tp %p bad timer_type %#x", tp, timer_type); 93985d94372SRobert Watson } 94085d94372SRobert Watson return callout_active(t_callout); 941df8bae1dSRodney W. Grimes } 942b8614722SMike Silbersack 9435571f9cfSJulien Charbon void 9445571f9cfSJulien Charbon tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type) 9455571f9cfSJulien Charbon { 9465571f9cfSJulien Charbon struct callout *t_callout; 9475571f9cfSJulien Charbon 9485571f9cfSJulien Charbon tp->t_timers->tt_flags |= TT_STOPPED; 9495571f9cfSJulien Charbon switch (timer_type) { 9505571f9cfSJulien Charbon case TT_DELACK: 9515571f9cfSJulien Charbon t_callout = &tp->t_timers->tt_delack; 9525571f9cfSJulien Charbon break; 9535571f9cfSJulien Charbon case TT_REXMT: 9545571f9cfSJulien Charbon t_callout = &tp->t_timers->tt_rexmt; 9555571f9cfSJulien Charbon break; 9565571f9cfSJulien Charbon case TT_PERSIST: 9575571f9cfSJulien Charbon t_callout = &tp->t_timers->tt_persist; 9585571f9cfSJulien Charbon break; 9595571f9cfSJulien Charbon case TT_KEEP: 9605571f9cfSJulien Charbon t_callout = &tp->t_timers->tt_keep; 9615571f9cfSJulien Charbon break; 9625571f9cfSJulien Charbon case TT_2MSL: 9635571f9cfSJulien Charbon t_callout = &tp->t_timers->tt_2msl; 9645571f9cfSJulien Charbon break; 9655571f9cfSJulien Charbon default: 96655bceb1eSRandall Stewart if (tp->t_fb->tfb_tcp_timer_stop) { 96755bceb1eSRandall Stewart /* 96855bceb1eSRandall Stewart * XXXrrs we need to look at this with the 96955bceb1eSRandall Stewart * stop case below (flags). 97055bceb1eSRandall Stewart */ 97155bceb1eSRandall Stewart tp->t_fb->tfb_tcp_timer_stop(tp, timer_type); 97255bceb1eSRandall Stewart return; 97355bceb1eSRandall Stewart } 9745571f9cfSJulien Charbon panic("tp %p bad timer_type %#x", tp, timer_type); 9755571f9cfSJulien Charbon } 9765571f9cfSJulien Charbon 977e5ad6456SRandall Stewart if (callout_async_drain(t_callout, tcp_timer_discard) == 0) { 9785571f9cfSJulien Charbon /* 9795571f9cfSJulien Charbon * Can't stop the callout, defer tcpcb actual deletion 980e5ad6456SRandall Stewart * to the last one. We do this using the async drain 981e5ad6456SRandall Stewart * function and incrementing the count in 9825571f9cfSJulien Charbon */ 983e5ad6456SRandall Stewart tp->t_timers->tt_draincnt++; 9845571f9cfSJulien Charbon } 9855571f9cfSJulien Charbon } 986