1c398230bSWarner Losh /*- 2e79adb8eSGarrett Wollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29e79adb8eSGarrett Wollman * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30df8bae1dSRodney W. Grimes */ 31df8bae1dSRodney W. Grimes 324b421e2dSMike Silbersack #include <sys/cdefs.h> 334b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 344b421e2dSMike Silbersack 35825fd1e4SNavdeep Parhar #include "opt_inet.h" 36fb59c426SYoshinobu Inoue #include "opt_inet6.h" 370cc12cc5SJoerg Wunsch #include "opt_tcpdebug.h" 38883831c6SAdrian Chadd #include "opt_rss.h" 390cc12cc5SJoerg Wunsch 40df8bae1dSRodney W. Grimes #include <sys/param.h> 4198163b98SPoul-Henning Kamp #include <sys/kernel.h> 42c74af4faSBruce Evans #include <sys/lock.h> 4308517d53SMike Silbersack #include <sys/mbuf.h> 44c74af4faSBruce Evans #include <sys/mutex.h> 45c74af4faSBruce Evans #include <sys/protosw.h> 4687aedea4SKip Macy #include <sys/smp.h> 47df8bae1dSRodney W. Grimes #include <sys/socket.h> 48df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 49c74af4faSBruce Evans #include <sys/sysctl.h> 50c74af4faSBruce Evans #include <sys/systm.h> 51e79adb8eSGarrett Wollman 524b79449eSBjoern A. Zeeb #include <net/if.h> 53df8bae1dSRodney W. Grimes #include <net/route.h> 54b2bdc62aSAdrian Chadd #include <net/rss_config.h> 55530c0060SRobert Watson #include <net/vnet.h> 56883831c6SAdrian Chadd #include <net/netisr.h> 57df8bae1dSRodney W. Grimes 58dbc42409SLawrence Stewart #include <netinet/cc.h> 59df8bae1dSRodney W. Grimes #include <netinet/in.h> 60df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 61883831c6SAdrian Chadd #include <netinet/in_rss.h> 62c74af4faSBruce Evans #include <netinet/in_systm.h> 63fb59c426SYoshinobu Inoue #ifdef INET6 64fb59c426SYoshinobu Inoue #include <netinet6/in6_pcb.h> 65fb59c426SYoshinobu Inoue #endif 66df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 67df8bae1dSRodney W. Grimes #include <netinet/tcp_fsm.h> 68df8bae1dSRodney W. Grimes #include <netinet/tcp_timer.h> 69df8bae1dSRodney W. Grimes #include <netinet/tcp_var.h> 70f6f6703fSSean Bruno #ifdef INET6 71f6f6703fSSean Bruno #include <netinet6/tcp6_var.h> 72f6f6703fSSean Bruno #endif 73df8bae1dSRodney W. Grimes #include <netinet/tcpip.h> 74af7a2999SDavid Greenman #ifdef TCPDEBUG 75af7a2999SDavid Greenman #include <netinet/tcp_debug.h> 76af7a2999SDavid Greenman #endif 77df8bae1dSRodney W. Grimes 789b8b58e0SJonathan Lemon int tcp_keepinit; 79ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 8041698ebfSTom Rhodes &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection"); 817b40aa32SPaul Traina 829b8b58e0SJonathan Lemon int tcp_keepidle; 83ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 8441698ebfSTom Rhodes &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin"); 8598163b98SPoul-Henning Kamp 869b8b58e0SJonathan Lemon int tcp_keepintvl; 87ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 8841698ebfSTom Rhodes &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes"); 8998163b98SPoul-Henning Kamp 909b8b58e0SJonathan Lemon int tcp_delacktime; 916489fe65SAndre Oppermann SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 926489fe65SAndre Oppermann &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 93ccb4d0c6SJonathan Lemon "Time before a delayed ACK is sent"); 949b8b58e0SJonathan Lemon 959b8b58e0SJonathan Lemon int tcp_msl; 96ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 97ccb4d0c6SJonathan Lemon &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 989b8b58e0SJonathan Lemon 99701bec5aSMatthew Dillon int tcp_rexmit_min; 100701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 1016489fe65SAndre Oppermann &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 1026489fe65SAndre Oppermann "Minimum Retransmission Timeout"); 103701bec5aSMatthew Dillon 104701bec5aSMatthew Dillon int tcp_rexmit_slop; 105701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 1066489fe65SAndre Oppermann &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 1076489fe65SAndre Oppermann "Retransmission Timer Slop"); 108701bec5aSMatthew Dillon 109c39a614eSRobert Watson static int always_keepalive = 1; 1103d177f46SBill Fumerola SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 1113d177f46SBill Fumerola &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 11234be9bf3SPoul-Henning Kamp 1137c72af87SMohan Srinivasan int tcp_fast_finwait2_recycle = 0; 1147c72af87SMohan Srinivasan SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 1156489fe65SAndre Oppermann &tcp_fast_finwait2_recycle, 0, 1166489fe65SAndre Oppermann "Recycle closed FIN_WAIT_2 connections faster"); 1177c72af87SMohan Srinivasan 1187c72af87SMohan Srinivasan int tcp_finwait2_timeout; 1197c72af87SMohan Srinivasan SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 1206489fe65SAndre Oppermann &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 1217c72af87SMohan Srinivasan 1229077f387SGleb Smirnoff int tcp_keepcnt = TCPTV_KEEPCNT; 1239077f387SGleb Smirnoff SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0, 1249077f387SGleb Smirnoff "Number of keepalive probes to send"); 1257c72af87SMohan Srinivasan 1260312fbe9SPoul-Henning Kamp /* max idle probes */ 1279b8b58e0SJonathan Lemon int tcp_maxpersistidle; 128e79adb8eSGarrett Wollman 1296c0ef895SJohn Baldwin static int tcp_rexmit_drop_options = 0; 1306c0ef895SJohn Baldwin SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW, 1316c0ef895SJohn Baldwin &tcp_rexmit_drop_options, 0, 1326c0ef895SJohn Baldwin "Drop TCP options from 3rd and later retransmitted SYN"); 1336c0ef895SJohn Baldwin 134f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_detect); 135f6f6703fSSean Bruno #define V_tcp_pmtud_blackhole_detect VNET(tcp_pmtud_blackhole_detect) 136f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection, 137f0188618SHans Petter Selasky CTLFLAG_RW|CTLFLAG_VNET, 138f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_detect), 0, 139f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection Enabled"); 140f6f6703fSSean Bruno 141f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_activated); 142f6f6703fSSean Bruno #define V_tcp_pmtud_blackhole_activated \ 143f6f6703fSSean Bruno VNET(tcp_pmtud_blackhole_activated) 144f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated, 145f0188618SHans Petter Selasky CTLFLAG_RD|CTLFLAG_VNET, 146f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_activated), 0, 147f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection, Activation Count"); 148f6f6703fSSean Bruno 149f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss); 150f6f6703fSSean Bruno #define V_tcp_pmtud_blackhole_activated_min_mss \ 151f6f6703fSSean Bruno VNET(tcp_pmtud_blackhole_activated_min_mss) 152f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss, 153f0188618SHans Petter Selasky CTLFLAG_RD|CTLFLAG_VNET, 154f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0, 155f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection, Activation Count at min MSS"); 156f6f6703fSSean Bruno 157f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_failed); 158f6f6703fSSean Bruno #define V_tcp_pmtud_blackhole_failed VNET(tcp_pmtud_blackhole_failed) 159f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed, 160f0188618SHans Petter Selasky CTLFLAG_RD|CTLFLAG_VNET, 161f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_failed), 0, 162f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection, Failure Count"); 163f6f6703fSSean Bruno 164f6f6703fSSean Bruno #ifdef INET 165f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200; 166f6f6703fSSean Bruno #define V_tcp_pmtud_blackhole_mss VNET(tcp_pmtud_blackhole_mss) 167f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, 168f0188618SHans Petter Selasky CTLFLAG_RW|CTLFLAG_VNET, 169f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_mss), 0, 170f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection lowered MSS"); 171f6f6703fSSean Bruno #endif 172f6f6703fSSean Bruno 173f6f6703fSSean Bruno #ifdef INET6 174f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220; 175f6f6703fSSean Bruno #define V_tcp_v6pmtud_blackhole_mss VNET(tcp_v6pmtud_blackhole_mss) 176f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss, 177f0188618SHans Petter Selasky CTLFLAG_RW|CTLFLAG_VNET, 178f6f6703fSSean Bruno &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0, 179f6f6703fSSean Bruno "Path MTU Discovery IPv6 Black Hole Detection lowered MSS"); 180f6f6703fSSean Bruno #endif 181f6f6703fSSean Bruno 1828f7e75cbSAdrian Chadd #ifdef RSS 1838f7e75cbSAdrian Chadd static int per_cpu_timers = 1; 1848f7e75cbSAdrian Chadd #else 18587aedea4SKip Macy static int per_cpu_timers = 0; 1868f7e75cbSAdrian Chadd #endif 18787aedea4SKip Macy SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, 18887aedea4SKip Macy &per_cpu_timers , 0, "run tcp timers on all cpus"); 18987aedea4SKip Macy 190883831c6SAdrian Chadd #if 0 19187aedea4SKip Macy #define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \ 19287aedea4SKip Macy ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0) 193883831c6SAdrian Chadd #endif 194883831c6SAdrian Chadd 195883831c6SAdrian Chadd /* 196883831c6SAdrian Chadd * Map the given inp to a CPU id. 197883831c6SAdrian Chadd * 198883831c6SAdrian Chadd * This queries RSS if it's compiled in, else it defaults to the current 199883831c6SAdrian Chadd * CPU ID. 200883831c6SAdrian Chadd */ 201883831c6SAdrian Chadd static inline int 202883831c6SAdrian Chadd inp_to_cpuid(struct inpcb *inp) 203883831c6SAdrian Chadd { 204883831c6SAdrian Chadd u_int cpuid; 205883831c6SAdrian Chadd 206883831c6SAdrian Chadd #ifdef RSS 207883831c6SAdrian Chadd if (per_cpu_timers) { 208883831c6SAdrian Chadd cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); 209883831c6SAdrian Chadd if (cpuid == NETISR_CPUID_NONE) 210883831c6SAdrian Chadd return (curcpu); /* XXX */ 211883831c6SAdrian Chadd else 212883831c6SAdrian Chadd return (cpuid); 213883831c6SAdrian Chadd } 214883831c6SAdrian Chadd #else 215883831c6SAdrian Chadd /* Legacy, pre-RSS behaviour */ 216883831c6SAdrian Chadd if (per_cpu_timers) { 217883831c6SAdrian Chadd /* 218883831c6SAdrian Chadd * We don't have a flowid -> cpuid mapping, so cheat and 219883831c6SAdrian Chadd * just map unknown cpuids to curcpu. Not the best, but 220883831c6SAdrian Chadd * apparently better than defaulting to swi 0. 221883831c6SAdrian Chadd */ 222883831c6SAdrian Chadd cpuid = inp->inp_flowid % (mp_maxid + 1); 223883831c6SAdrian Chadd if (! CPU_ABSENT(cpuid)) 224883831c6SAdrian Chadd return (cpuid); 225883831c6SAdrian Chadd return (curcpu); 226883831c6SAdrian Chadd } 227883831c6SAdrian Chadd #endif 228883831c6SAdrian Chadd /* Default for RSS and non-RSS - cpuid 0 */ 229883831c6SAdrian Chadd else { 230883831c6SAdrian Chadd return (0); 231883831c6SAdrian Chadd } 232883831c6SAdrian Chadd } 23387aedea4SKip Macy 234df8bae1dSRodney W. Grimes /* 235df8bae1dSRodney W. Grimes * Tcp protocol timeout routine called every 500 ms. 2369b8b58e0SJonathan Lemon * Updates timestamps used for TCP 237df8bae1dSRodney W. Grimes * causes finite state machine actions if timers expire. 238df8bae1dSRodney W. Grimes */ 239df8bae1dSRodney W. Grimes void 240e2f2059fSMike Silbersack tcp_slowtimo(void) 241df8bae1dSRodney W. Grimes { 2428b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 24315bd2b43SDavid Greenman 2445ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 2458b615593SMarko Zec VNET_FOREACH(vnet_iter) { 2468b615593SMarko Zec CURVNET_SET(vnet_iter); 247cea40c48SJulien Charbon (void) tcp_tw_2msl_scan(0); 2488b615593SMarko Zec CURVNET_RESTORE(); 2498b615593SMarko Zec } 2505ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 251df8bae1dSRodney W. Grimes } 252df8bae1dSRodney W. Grimes 2537d42e30cSJonathan Lemon int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 2547d42e30cSJonathan Lemon { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 2557d42e30cSJonathan Lemon 256df8bae1dSRodney W. Grimes int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 257f058535dSJeffrey Hsu { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 258df8bae1dSRodney W. Grimes 259f058535dSJeffrey Hsu static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 260e79adb8eSGarrett Wollman 261df8bae1dSRodney W. Grimes /* 262df8bae1dSRodney W. Grimes * TCP timer processing. 263df8bae1dSRodney W. Grimes */ 26485d94372SRobert Watson 26585d94372SRobert Watson void 26685d94372SRobert Watson tcp_timer_delack(void *xtp) 267df8bae1dSRodney W. Grimes { 26885d94372SRobert Watson struct tcpcb *tp = xtp; 26985d94372SRobert Watson struct inpcb *inp; 2708b615593SMarko Zec CURVNET_SET(tp->t_vnet); 27185d94372SRobert Watson 27285d94372SRobert Watson inp = tp->t_inpcb; 273*5571f9cfSJulien Charbon KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); 2748501a69cSRobert Watson INP_WLOCK(inp); 275655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_delack) || 276655f934bSMikolaj Golub !callout_active(&tp->t_timers->tt_delack)) { 2778501a69cSRobert Watson INP_WUNLOCK(inp); 2788b615593SMarko Zec CURVNET_RESTORE(); 27985d94372SRobert Watson return; 28085d94372SRobert Watson } 281e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_delack); 282655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 283655f934bSMikolaj Golub INP_WUNLOCK(inp); 284655f934bSMikolaj Golub CURVNET_RESTORE(); 285655f934bSMikolaj Golub return; 286655f934bSMikolaj Golub } 287*5571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, 288*5571f9cfSJulien Charbon ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); 289*5571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0, 290*5571f9cfSJulien Charbon ("%s: tp %p delack callout should be running", __func__, tp)); 291df8bae1dSRodney W. Grimes 2929b8b58e0SJonathan Lemon tp->t_flags |= TF_ACKNOW; 29378b50714SRobert Watson TCPSTAT_INC(tcps_delack); 2949b8b58e0SJonathan Lemon (void) tcp_output(tp); 2958501a69cSRobert Watson INP_WUNLOCK(inp); 2968b615593SMarko Zec CURVNET_RESTORE(); 2979b8b58e0SJonathan Lemon } 2989b8b58e0SJonathan Lemon 29985d94372SRobert Watson void 30085d94372SRobert Watson tcp_timer_2msl(void *xtp) 3019b8b58e0SJonathan Lemon { 30285d94372SRobert Watson struct tcpcb *tp = xtp; 30385d94372SRobert Watson struct inpcb *inp; 3048b615593SMarko Zec CURVNET_SET(tp->t_vnet); 3059b8b58e0SJonathan Lemon #ifdef TCPDEBUG 3069b8b58e0SJonathan Lemon int ostate; 3079b8b58e0SJonathan Lemon 3089b8b58e0SJonathan Lemon ostate = tp->t_state; 3099b8b58e0SJonathan Lemon #endif 310603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_tcbinfo); 31185d94372SRobert Watson inp = tp->t_inpcb; 312*5571f9cfSJulien Charbon KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); 3138501a69cSRobert Watson INP_WLOCK(inp); 31485d94372SRobert Watson tcp_free_sackholes(tp); 315655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_2msl) || 316e2f2059fSMike Silbersack !callout_active(&tp->t_timers->tt_2msl)) { 3178501a69cSRobert Watson INP_WUNLOCK(tp->t_inpcb); 318603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 3198b615593SMarko Zec CURVNET_RESTORE(); 32085d94372SRobert Watson return; 32185d94372SRobert Watson } 322e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_2msl); 323655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 324655f934bSMikolaj Golub INP_WUNLOCK(inp); 325655f934bSMikolaj Golub INP_INFO_WUNLOCK(&V_tcbinfo); 326655f934bSMikolaj Golub CURVNET_RESTORE(); 327655f934bSMikolaj Golub return; 328655f934bSMikolaj Golub } 329*5571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, 330*5571f9cfSJulien Charbon ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); 331*5571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0, 332*5571f9cfSJulien Charbon ("%s: tp %p 2msl callout should be running", __func__, tp)); 33385d94372SRobert Watson /* 334df8bae1dSRodney W. Grimes * 2 MSL timeout in shutdown went off. If we're closed but 335df8bae1dSRodney W. Grimes * still waiting for peer to close and connection has been idle 336df8bae1dSRodney W. Grimes * too long, or if 2MSL time is up from TIME_WAIT, delete connection 337df8bae1dSRodney W. Grimes * control block. Otherwise, check again in a bit. 3387c72af87SMohan Srinivasan * 3397c72af87SMohan Srinivasan * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 3407c72af87SMohan Srinivasan * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 3417c72af87SMohan Srinivasan * Ignore fact that there were recent incoming segments. 342df8bae1dSRodney W. Grimes */ 3437c72af87SMohan Srinivasan if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 34485d94372SRobert Watson tp->t_inpcb && tp->t_inpcb->inp_socket && 3457c72af87SMohan Srinivasan (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 34678b50714SRobert Watson TCPSTAT_INC(tcps_finwait2_drops); 34785d94372SRobert Watson tp = tcp_close(tp); 3487c72af87SMohan Srinivasan } else { 349df8bae1dSRodney W. Grimes if (tp->t_state != TCPS_TIME_WAIT && 3509077f387SGleb Smirnoff ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) 3519077f387SGleb Smirnoff callout_reset_on(&tp->t_timers->tt_2msl, 352883831c6SAdrian Chadd TP_KEEPINTVL(tp), tcp_timer_2msl, tp, 353883831c6SAdrian Chadd inp_to_cpuid(inp)); 354df8bae1dSRodney W. Grimes else 35585d94372SRobert Watson tp = tcp_close(tp); 3567c72af87SMohan Srinivasan } 357df8bae1dSRodney W. Grimes 3589b8b58e0SJonathan Lemon #ifdef TCPDEBUG 359586b4a0eSKonstantin Belousov if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 360fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 3619b8b58e0SJonathan Lemon PRU_SLOWTIMO); 3629b8b58e0SJonathan Lemon #endif 36385d94372SRobert Watson if (tp != NULL) 3648501a69cSRobert Watson INP_WUNLOCK(inp); 365603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 3668b615593SMarko Zec CURVNET_RESTORE(); 3679b8b58e0SJonathan Lemon } 3689b8b58e0SJonathan Lemon 36985d94372SRobert Watson void 37085d94372SRobert Watson tcp_timer_keep(void *xtp) 3719b8b58e0SJonathan Lemon { 37285d94372SRobert Watson struct tcpcb *tp = xtp; 37308517d53SMike Silbersack struct tcptemp *t_template; 37485d94372SRobert Watson struct inpcb *inp; 3758b615593SMarko Zec CURVNET_SET(tp->t_vnet); 3769b8b58e0SJonathan Lemon #ifdef TCPDEBUG 3779b8b58e0SJonathan Lemon int ostate; 3789b8b58e0SJonathan Lemon 3799b8b58e0SJonathan Lemon ostate = tp->t_state; 3809b8b58e0SJonathan Lemon #endif 381603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_tcbinfo); 38285d94372SRobert Watson inp = tp->t_inpcb; 383*5571f9cfSJulien Charbon KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); 3848501a69cSRobert Watson INP_WLOCK(inp); 385655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_keep) || 386655f934bSMikolaj Golub !callout_active(&tp->t_timers->tt_keep)) { 3878501a69cSRobert Watson INP_WUNLOCK(inp); 388603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 3898b615593SMarko Zec CURVNET_RESTORE(); 39085d94372SRobert Watson return; 39185d94372SRobert Watson } 392e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_keep); 393655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 394655f934bSMikolaj Golub INP_WUNLOCK(inp); 395655f934bSMikolaj Golub INP_INFO_WUNLOCK(&V_tcbinfo); 396655f934bSMikolaj Golub CURVNET_RESTORE(); 397655f934bSMikolaj Golub return; 398655f934bSMikolaj Golub } 399*5571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, 400*5571f9cfSJulien Charbon ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); 401*5571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0, 402*5571f9cfSJulien Charbon ("%s: tp %p keep callout should be running", __func__, tp)); 4039b8b58e0SJonathan Lemon /* 4049b8b58e0SJonathan Lemon * Keep-alive timer went off; send something 4059b8b58e0SJonathan Lemon * or drop connection if idle for too long. 4069b8b58e0SJonathan Lemon */ 40778b50714SRobert Watson TCPSTAT_INC(tcps_keeptimeo); 4089b8b58e0SJonathan Lemon if (tp->t_state < TCPS_ESTABLISHED) 4099b8b58e0SJonathan Lemon goto dropit; 4102a074620SSam Leffler if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 4119b8b58e0SJonathan Lemon tp->t_state <= TCPS_CLOSING) { 4129077f387SGleb Smirnoff if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) 4139b8b58e0SJonathan Lemon goto dropit; 4149b8b58e0SJonathan Lemon /* 4159b8b58e0SJonathan Lemon * Send a packet designed to force a response 4169b8b58e0SJonathan Lemon * if the peer is up and reachable: 4179b8b58e0SJonathan Lemon * either an ACK if the connection is still alive, 4189b8b58e0SJonathan Lemon * or an RST if the peer has closed the connection 4199b8b58e0SJonathan Lemon * due to timeout or reboot. 4209b8b58e0SJonathan Lemon * Using sequence number tp->snd_una-1 4219b8b58e0SJonathan Lemon * causes the transmitted zero-length segment 4229b8b58e0SJonathan Lemon * to lie outside the receive window; 4239b8b58e0SJonathan Lemon * by the protocol spec, this requires the 4249b8b58e0SJonathan Lemon * correspondent TCP to respond. 4259b8b58e0SJonathan Lemon */ 42678b50714SRobert Watson TCPSTAT_INC(tcps_keepprobe); 42779909384SJonathan Lemon t_template = tcpip_maketemplate(inp); 42808517d53SMike Silbersack if (t_template) { 42908517d53SMike Silbersack tcp_respond(tp, t_template->tt_ipgen, 43008517d53SMike Silbersack &t_template->tt_t, (struct mbuf *)NULL, 4319b8b58e0SJonathan Lemon tp->rcv_nxt, tp->snd_una - 1, 0); 43253640b0eSRobert Watson free(t_template, M_TEMP); 43308517d53SMike Silbersack } 4349077f387SGleb Smirnoff callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), 435883831c6SAdrian Chadd tcp_timer_keep, tp, inp_to_cpuid(inp)); 4364cc20ab1SSeigo Tanimura } else 4379077f387SGleb Smirnoff callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), 438883831c6SAdrian Chadd tcp_timer_keep, tp, inp_to_cpuid(inp)); 4399b8b58e0SJonathan Lemon 4409b8b58e0SJonathan Lemon #ifdef TCPDEBUG 4412a074620SSam Leffler if (inp->inp_socket->so_options & SO_DEBUG) 442fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 4439b8b58e0SJonathan Lemon PRU_SLOWTIMO); 4449b8b58e0SJonathan Lemon #endif 4458501a69cSRobert Watson INP_WUNLOCK(inp); 446603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 4478b615593SMarko Zec CURVNET_RESTORE(); 44885d94372SRobert Watson return; 4499b8b58e0SJonathan Lemon 4509b8b58e0SJonathan Lemon dropit: 45178b50714SRobert Watson TCPSTAT_INC(tcps_keepdrops); 45285d94372SRobert Watson tp = tcp_drop(tp, ETIMEDOUT); 45385d94372SRobert Watson 45485d94372SRobert Watson #ifdef TCPDEBUG 45585d94372SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 45685d94372SRobert Watson tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 45785d94372SRobert Watson PRU_SLOWTIMO); 45885d94372SRobert Watson #endif 45985d94372SRobert Watson if (tp != NULL) 4608501a69cSRobert Watson INP_WUNLOCK(tp->t_inpcb); 461603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 4628b615593SMarko Zec CURVNET_RESTORE(); 4639b8b58e0SJonathan Lemon } 4649b8b58e0SJonathan Lemon 46585d94372SRobert Watson void 46685d94372SRobert Watson tcp_timer_persist(void *xtp) 4679b8b58e0SJonathan Lemon { 46885d94372SRobert Watson struct tcpcb *tp = xtp; 46985d94372SRobert Watson struct inpcb *inp; 4708b615593SMarko Zec CURVNET_SET(tp->t_vnet); 4719b8b58e0SJonathan Lemon #ifdef TCPDEBUG 4729b8b58e0SJonathan Lemon int ostate; 4739b8b58e0SJonathan Lemon 4749b8b58e0SJonathan Lemon ostate = tp->t_state; 4759b8b58e0SJonathan Lemon #endif 476603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_tcbinfo); 47785d94372SRobert Watson inp = tp->t_inpcb; 478*5571f9cfSJulien Charbon KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); 4798501a69cSRobert Watson INP_WLOCK(inp); 480655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_persist) || 481655f934bSMikolaj Golub !callout_active(&tp->t_timers->tt_persist)) { 4828501a69cSRobert Watson INP_WUNLOCK(inp); 483603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 4848b615593SMarko Zec CURVNET_RESTORE(); 48585d94372SRobert Watson return; 48685d94372SRobert Watson } 487e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_persist); 488655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 489655f934bSMikolaj Golub INP_WUNLOCK(inp); 490655f934bSMikolaj Golub INP_INFO_WUNLOCK(&V_tcbinfo); 491655f934bSMikolaj Golub CURVNET_RESTORE(); 492655f934bSMikolaj Golub return; 493655f934bSMikolaj Golub } 494*5571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, 495*5571f9cfSJulien Charbon ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); 496*5571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_PERSIST) != 0, 497*5571f9cfSJulien Charbon ("%s: tp %p persist callout should be running", __func__, tp)); 4989b8b58e0SJonathan Lemon /* 4999b8b58e0SJonathan Lemon * Persistance timer into zero window. 5009b8b58e0SJonathan Lemon * Force a byte to be output, if possible. 5019b8b58e0SJonathan Lemon */ 50278b50714SRobert Watson TCPSTAT_INC(tcps_persisttimeo); 5039b8b58e0SJonathan Lemon /* 5049b8b58e0SJonathan Lemon * Hack: if the peer is dead/unreachable, we do not 5059b8b58e0SJonathan Lemon * time out if the window is closed. After a full 5069b8b58e0SJonathan Lemon * backoff, drop the connection if the idle time 5079b8b58e0SJonathan Lemon * (no responses to probes) reaches the maximum 5089b8b58e0SJonathan Lemon * backoff that we would use if retransmitting. 5099b8b58e0SJonathan Lemon */ 5109b8b58e0SJonathan Lemon if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 5116b0c5521SJohn Baldwin (ticks - tp->t_rcvtime >= tcp_maxpersistidle || 5126b0c5521SJohn Baldwin ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 51378b50714SRobert Watson TCPSTAT_INC(tcps_persistdrop); 51485d94372SRobert Watson tp = tcp_drop(tp, ETIMEDOUT); 51585d94372SRobert Watson goto out; 5169b8b58e0SJonathan Lemon } 517322181c9SAndre Oppermann /* 518322181c9SAndre Oppermann * If the user has closed the socket then drop a persisting 519322181c9SAndre Oppermann * connection after a much reduced timeout. 520322181c9SAndre Oppermann */ 521322181c9SAndre Oppermann if (tp->t_state > TCPS_CLOSE_WAIT && 522322181c9SAndre Oppermann (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { 523322181c9SAndre Oppermann TCPSTAT_INC(tcps_persistdrop); 524322181c9SAndre Oppermann tp = tcp_drop(tp, ETIMEDOUT); 525322181c9SAndre Oppermann goto out; 526322181c9SAndre Oppermann } 5279b8b58e0SJonathan Lemon tcp_setpersist(tp); 5282cdbfa66SPaul Saab tp->t_flags |= TF_FORCEDATA; 5299b8b58e0SJonathan Lemon (void) tcp_output(tp); 5302cdbfa66SPaul Saab tp->t_flags &= ~TF_FORCEDATA; 5319b8b58e0SJonathan Lemon 53285d94372SRobert Watson out: 5339b8b58e0SJonathan Lemon #ifdef TCPDEBUG 534ffb761f6SGleb Smirnoff if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 535ffb761f6SGleb Smirnoff tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 5369b8b58e0SJonathan Lemon #endif 53785d94372SRobert Watson if (tp != NULL) 5388501a69cSRobert Watson INP_WUNLOCK(inp); 539603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 5408b615593SMarko Zec CURVNET_RESTORE(); 5419b8b58e0SJonathan Lemon } 5429b8b58e0SJonathan Lemon 54385d94372SRobert Watson void 54485d94372SRobert Watson tcp_timer_rexmt(void * xtp) 5459b8b58e0SJonathan Lemon { 54685d94372SRobert Watson struct tcpcb *tp = xtp; 5478b615593SMarko Zec CURVNET_SET(tp->t_vnet); 5489b8b58e0SJonathan Lemon int rexmt; 54985d94372SRobert Watson int headlocked; 55085d94372SRobert Watson struct inpcb *inp; 5519b8b58e0SJonathan Lemon #ifdef TCPDEBUG 5529b8b58e0SJonathan Lemon int ostate; 5539b8b58e0SJonathan Lemon 5549b8b58e0SJonathan Lemon ostate = tp->t_state; 5559b8b58e0SJonathan Lemon #endif 556f6f6703fSSean Bruno 55787aedea4SKip Macy INP_INFO_RLOCK(&V_tcbinfo); 55885d94372SRobert Watson inp = tp->t_inpcb; 559*5571f9cfSJulien Charbon KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); 5608501a69cSRobert Watson INP_WLOCK(inp); 561655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_rexmt) || 562655f934bSMikolaj Golub !callout_active(&tp->t_timers->tt_rexmt)) { 5638501a69cSRobert Watson INP_WUNLOCK(inp); 56487aedea4SKip Macy INP_INFO_RUNLOCK(&V_tcbinfo); 5658b615593SMarko Zec CURVNET_RESTORE(); 56685d94372SRobert Watson return; 56785d94372SRobert Watson } 568e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_rexmt); 569655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 570655f934bSMikolaj Golub INP_WUNLOCK(inp); 571655f934bSMikolaj Golub INP_INFO_RUNLOCK(&V_tcbinfo); 572655f934bSMikolaj Golub CURVNET_RESTORE(); 573655f934bSMikolaj Golub return; 574655f934bSMikolaj Golub } 575*5571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, 576*5571f9cfSJulien Charbon ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); 577*5571f9cfSJulien Charbon KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0, 578*5571f9cfSJulien Charbon ("%s: tp %p rexmt callout should be running", __func__, tp)); 5796d90faf3SPaul Saab tcp_free_sackholes(tp); 580df8bae1dSRodney W. Grimes /* 581df8bae1dSRodney W. Grimes * Retransmission timer went off. Message has not 582df8bae1dSRodney W. Grimes * been acked within retransmit interval. Back off 583df8bae1dSRodney W. Grimes * to a longer retransmit interval and retransmit one segment. 584df8bae1dSRodney W. Grimes */ 585df8bae1dSRodney W. Grimes if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 586df8bae1dSRodney W. Grimes tp->t_rxtshift = TCP_MAXRXTSHIFT; 58778b50714SRobert Watson TCPSTAT_INC(tcps_timeoutdrop); 58887aedea4SKip Macy in_pcbref(inp); 58987aedea4SKip Macy INP_INFO_RUNLOCK(&V_tcbinfo); 59087aedea4SKip Macy INP_WUNLOCK(inp); 59187aedea4SKip Macy INP_INFO_WLOCK(&V_tcbinfo); 59287aedea4SKip Macy INP_WLOCK(inp); 593fa046d87SRobert Watson if (in_pcbrele_wlocked(inp)) { 59487aedea4SKip Macy INP_INFO_WUNLOCK(&V_tcbinfo); 59587aedea4SKip Macy CURVNET_RESTORE(); 59687aedea4SKip Macy return; 59787aedea4SKip Macy } 598aa4b09c5SNavdeep Parhar if (inp->inp_flags & INP_DROPPED) { 599aa4b09c5SNavdeep Parhar INP_WUNLOCK(inp); 600aa4b09c5SNavdeep Parhar INP_INFO_WUNLOCK(&V_tcbinfo); 601aa4b09c5SNavdeep Parhar CURVNET_RESTORE(); 602aa4b09c5SNavdeep Parhar return; 603aa4b09c5SNavdeep Parhar } 604aa4b09c5SNavdeep Parhar 60585d94372SRobert Watson tp = tcp_drop(tp, tp->t_softerror ? 60685d94372SRobert Watson tp->t_softerror : ETIMEDOUT); 60787aedea4SKip Macy headlocked = 1; 60885d94372SRobert Watson goto out; 6099b8b58e0SJonathan Lemon } 61087aedea4SKip Macy INP_INFO_RUNLOCK(&V_tcbinfo); 61185d94372SRobert Watson headlocked = 0; 612cf8f04f4SAndre Oppermann if (tp->t_state == TCPS_SYN_SENT) { 613cf8f04f4SAndre Oppermann /* 614cf8f04f4SAndre Oppermann * If the SYN was retransmitted, indicate CWND to be 615cf8f04f4SAndre Oppermann * limited to 1 segment in cc_conn_init(). 616cf8f04f4SAndre Oppermann */ 617cf8f04f4SAndre Oppermann tp->snd_cwnd = 1; 618cf8f04f4SAndre Oppermann } else if (tp->t_rxtshift == 1) { 6199b8b58e0SJonathan Lemon /* 6209b8b58e0SJonathan Lemon * first retransmit; record ssthresh and cwnd so they can 6219b8b58e0SJonathan Lemon * be recovered if this turns out to be a "bad" retransmit. 6229b8b58e0SJonathan Lemon * A retransmit is considered "bad" if an ACK for this 6239b8b58e0SJonathan Lemon * segment is received within RTT/2 interval; the assumption 6249b8b58e0SJonathan Lemon * here is that the ACK was already in flight. See 6259b8b58e0SJonathan Lemon * "On Estimating End-to-End Network Path Properties" by 6269b8b58e0SJonathan Lemon * Allman and Paxson for more details. 6279b8b58e0SJonathan Lemon */ 6289b8b58e0SJonathan Lemon tp->snd_cwnd_prev = tp->snd_cwnd; 6299b8b58e0SJonathan Lemon tp->snd_ssthresh_prev = tp->snd_ssthresh; 6309d11646dSJeffrey Hsu tp->snd_recover_prev = tp->snd_recover; 631dbc42409SLawrence Stewart if (IN_FASTRECOVERY(tp->t_flags)) 6329d11646dSJeffrey Hsu tp->t_flags |= TF_WASFRECOVERY; 6339d11646dSJeffrey Hsu else 6349d11646dSJeffrey Hsu tp->t_flags &= ~TF_WASFRECOVERY; 635dbc42409SLawrence Stewart if (IN_CONGRECOVERY(tp->t_flags)) 636dbc42409SLawrence Stewart tp->t_flags |= TF_WASCRECOVERY; 637dbc42409SLawrence Stewart else 638dbc42409SLawrence Stewart tp->t_flags &= ~TF_WASCRECOVERY; 6399b8b58e0SJonathan Lemon tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 640672dc4aeSJohn Baldwin tp->t_flags |= TF_PREVVALID; 641672dc4aeSJohn Baldwin } else 642672dc4aeSJohn Baldwin tp->t_flags &= ~TF_PREVVALID; 64378b50714SRobert Watson TCPSTAT_INC(tcps_rexmttimeo); 6447d42e30cSJonathan Lemon if (tp->t_state == TCPS_SYN_SENT) 645f4748ef5SAndre Oppermann rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift]; 6467d42e30cSJonathan Lemon else 647df8bae1dSRodney W. Grimes rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 648df8bae1dSRodney W. Grimes TCPT_RANGESET(tp->t_rxtcur, rexmt, 649df8bae1dSRodney W. Grimes tp->t_rttmin, TCPTV_REXMTMAX); 650f6f6703fSSean Bruno 651882ac53eSSean Bruno /* 652882ac53eSSean Bruno * We enter the path for PLMTUD if connection is established or, if 653882ac53eSSean Bruno * connection is FIN_WAIT_1 status, reason for the last is that if 654882ac53eSSean Bruno * amount of data we send is very small, we could send it in couple of 655882ac53eSSean Bruno * packets and process straight to FIN. In that case we won't catch 656882ac53eSSean Bruno * ESTABLISHED state. 657882ac53eSSean Bruno */ 658882ac53eSSean Bruno if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED)) 659882ac53eSSean Bruno || (tp->t_state == TCPS_FIN_WAIT_1))) { 660f6f6703fSSean Bruno int optlen; 661f6f6703fSSean Bruno #ifdef INET6 662f6f6703fSSean Bruno int isipv6; 663f6f6703fSSean Bruno #endif 664f6f6703fSSean Bruno 665f6f6703fSSean Bruno if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) == 666f6f6703fSSean Bruno (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) && 667f6f6703fSSean Bruno (tp->t_rxtshift <= 2)) { 668f6f6703fSSean Bruno /* 669f6f6703fSSean Bruno * Enter Path MTU Black-hole Detection mechanism: 670f6f6703fSSean Bruno * - Disable Path MTU Discovery (IP "DF" bit). 671f6f6703fSSean Bruno * - Reduce MTU to lower value than what we 672f6f6703fSSean Bruno * negotiated with peer. 673f6f6703fSSean Bruno */ 674f6f6703fSSean Bruno /* Record that we may have found a black hole. */ 675f6f6703fSSean Bruno tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE; 676f6f6703fSSean Bruno 677f6f6703fSSean Bruno /* Keep track of previous MSS. */ 678f6f6703fSSean Bruno optlen = tp->t_maxopd - tp->t_maxseg; 679f6f6703fSSean Bruno tp->t_pmtud_saved_maxopd = tp->t_maxopd; 680f6f6703fSSean Bruno 681f6f6703fSSean Bruno /* 682f6f6703fSSean Bruno * Reduce the MSS to blackhole value or to the default 683f6f6703fSSean Bruno * in an attempt to retransmit. 684f6f6703fSSean Bruno */ 685f6f6703fSSean Bruno #ifdef INET6 686f6f6703fSSean Bruno isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0; 687f6f6703fSSean Bruno if (isipv6 && 688f6f6703fSSean Bruno tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) { 689f6f6703fSSean Bruno /* Use the sysctl tuneable blackhole MSS. */ 690f6f6703fSSean Bruno tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss; 691f6f6703fSSean Bruno V_tcp_pmtud_blackhole_activated++; 692f6f6703fSSean Bruno } else if (isipv6) { 693f6f6703fSSean Bruno /* Use the default MSS. */ 694f6f6703fSSean Bruno tp->t_maxopd = V_tcp_v6mssdflt; 695f6f6703fSSean Bruno /* 696f6f6703fSSean Bruno * Disable Path MTU Discovery when we switch to 697f6f6703fSSean Bruno * minmss. 698f6f6703fSSean Bruno */ 699f6f6703fSSean Bruno tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; 700f6f6703fSSean Bruno V_tcp_pmtud_blackhole_activated_min_mss++; 701f6f6703fSSean Bruno } 702f6f6703fSSean Bruno #endif 703f6f6703fSSean Bruno #if defined(INET6) && defined(INET) 704f6f6703fSSean Bruno else 705f6f6703fSSean Bruno #endif 706f6f6703fSSean Bruno #ifdef INET 707f6f6703fSSean Bruno if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) { 708f6f6703fSSean Bruno /* Use the sysctl tuneable blackhole MSS. */ 709f6f6703fSSean Bruno tp->t_maxopd = V_tcp_pmtud_blackhole_mss; 710f6f6703fSSean Bruno V_tcp_pmtud_blackhole_activated++; 711f6f6703fSSean Bruno } else { 712f6f6703fSSean Bruno /* Use the default MSS. */ 713f6f6703fSSean Bruno tp->t_maxopd = V_tcp_mssdflt; 714f6f6703fSSean Bruno /* 715f6f6703fSSean Bruno * Disable Path MTU Discovery when we switch to 716f6f6703fSSean Bruno * minmss. 717f6f6703fSSean Bruno */ 718f6f6703fSSean Bruno tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; 719f6f6703fSSean Bruno V_tcp_pmtud_blackhole_activated_min_mss++; 720f6f6703fSSean Bruno } 721f6f6703fSSean Bruno #endif 722f6f6703fSSean Bruno tp->t_maxseg = tp->t_maxopd - optlen; 723f6f6703fSSean Bruno /* 724f6f6703fSSean Bruno * Reset the slow-start flight size 725f6f6703fSSean Bruno * as it may depend on the new MSS. 726f6f6703fSSean Bruno */ 727f6f6703fSSean Bruno if (CC_ALGO(tp)->conn_init != NULL) 728f6f6703fSSean Bruno CC_ALGO(tp)->conn_init(tp->ccv); 729f6f6703fSSean Bruno } else { 730f6f6703fSSean Bruno /* 731f6f6703fSSean Bruno * If further retransmissions are still unsuccessful 732f6f6703fSSean Bruno * with a lowered MTU, maybe this isn't a blackhole and 733f6f6703fSSean Bruno * we restore the previous MSS and blackhole detection 734f6f6703fSSean Bruno * flags. 735f6f6703fSSean Bruno */ 736f6f6703fSSean Bruno if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) && 737f6f6703fSSean Bruno (tp->t_rxtshift > 4)) { 738f6f6703fSSean Bruno tp->t_flags2 |= TF2_PLPMTU_PMTUD; 739f6f6703fSSean Bruno tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; 740f6f6703fSSean Bruno optlen = tp->t_maxopd - tp->t_maxseg; 741f6f6703fSSean Bruno tp->t_maxopd = tp->t_pmtud_saved_maxopd; 742f6f6703fSSean Bruno tp->t_maxseg = tp->t_maxopd - optlen; 743f6f6703fSSean Bruno V_tcp_pmtud_blackhole_failed++; 744f6f6703fSSean Bruno /* 745f6f6703fSSean Bruno * Reset the slow-start flight size as it 746f6f6703fSSean Bruno * may depend on the new MSS. 747f6f6703fSSean Bruno */ 748f6f6703fSSean Bruno if (CC_ALGO(tp)->conn_init != NULL) 749f6f6703fSSean Bruno CC_ALGO(tp)->conn_init(tp->ccv); 750f6f6703fSSean Bruno } 751f6f6703fSSean Bruno } 752f6f6703fSSean Bruno } 753f6f6703fSSean Bruno 754df8bae1dSRodney W. Grimes /* 75577339e1cSAndre Oppermann * Disable RFC1323 and SACK if we haven't got any response to 7567ceb7783SJesper Skriver * our third SYN to work-around some broken terminal servers 7577ceb7783SJesper Skriver * (most of which have hopefully been retired) that have bad VJ 7587ceb7783SJesper Skriver * header compression code which trashes TCP segments containing 7597ceb7783SJesper Skriver * unknown-to-them TCP options. 7607ceb7783SJesper Skriver */ 7616c0ef895SJohn Baldwin if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) && 7626c0ef895SJohn Baldwin (tp->t_rxtshift == 3)) 763c4ab59c1SAndre Oppermann tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT); 7647ceb7783SJesper Skriver /* 76597d8d152SAndre Oppermann * If we backed off this far, our srtt estimate is probably bogus. 76697d8d152SAndre Oppermann * Clobber it so we'll take the next rtt measurement as our srtt; 767df8bae1dSRodney W. Grimes * move the current srtt into rttvar to keep the current 768df8bae1dSRodney W. Grimes * retransmit times until then. 769df8bae1dSRodney W. Grimes */ 770df8bae1dSRodney W. Grimes if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 771fb59c426SYoshinobu Inoue #ifdef INET6 772fb59c426SYoshinobu Inoue if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 773fb59c426SYoshinobu Inoue in6_losing(tp->t_inpcb); 774fb59c426SYoshinobu Inoue #endif 775df8bae1dSRodney W. Grimes tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 776df8bae1dSRodney W. Grimes tp->t_srtt = 0; 777df8bae1dSRodney W. Grimes } 778df8bae1dSRodney W. Grimes tp->snd_nxt = tp->snd_una; 7799d11646dSJeffrey Hsu tp->snd_recover = tp->snd_max; 78046f58482SJonathan Lemon /* 78174b48c1dSAndras Olah * Force a segment to be sent. 78274b48c1dSAndras Olah */ 78374b48c1dSAndras Olah tp->t_flags |= TF_ACKNOW; 78474b48c1dSAndras Olah /* 785df8bae1dSRodney W. Grimes * If timing a segment in this window, stop the timer. 786df8bae1dSRodney W. Grimes */ 7879b8b58e0SJonathan Lemon tp->t_rtttime = 0; 788dbc42409SLawrence Stewart 789b5af1b88SLawrence Stewart cc_cong_signal(tp, NULL, CC_RTO); 790dbc42409SLawrence Stewart 791df8bae1dSRodney W. Grimes (void) tcp_output(tp); 792df8bae1dSRodney W. Grimes 79385d94372SRobert Watson out: 7949b8b58e0SJonathan Lemon #ifdef TCPDEBUG 7951c53f806SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 796fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 7979b8b58e0SJonathan Lemon PRU_SLOWTIMO); 798df8bae1dSRodney W. Grimes #endif 79985d94372SRobert Watson if (tp != NULL) 8008501a69cSRobert Watson INP_WUNLOCK(inp); 80185d94372SRobert Watson if (headlocked) 802603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 8038b615593SMarko Zec CURVNET_RESTORE(); 80485d94372SRobert Watson } 80585d94372SRobert Watson 80685d94372SRobert Watson void 807*5571f9cfSJulien Charbon tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta) 80885d94372SRobert Watson { 80985d94372SRobert Watson struct callout *t_callout; 81018832f1fSJulien Charbon timeout_t *f_callout; 81187aedea4SKip Macy struct inpcb *inp = tp->t_inpcb; 812883831c6SAdrian Chadd int cpu = inp_to_cpuid(inp); 81385d94372SRobert Watson 81409fe6320SNavdeep Parhar #ifdef TCP_OFFLOAD 81509fe6320SNavdeep Parhar if (tp->t_flags & TF_TOE) 81609fe6320SNavdeep Parhar return; 81709fe6320SNavdeep Parhar #endif 81809fe6320SNavdeep Parhar 819*5571f9cfSJulien Charbon if (tp->t_timers->tt_flags & TT_STOPPED) 820*5571f9cfSJulien Charbon return; 821*5571f9cfSJulien Charbon 82285d94372SRobert Watson switch (timer_type) { 82385d94372SRobert Watson case TT_DELACK: 824e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_delack; 82585d94372SRobert Watson f_callout = tcp_timer_delack; 82685d94372SRobert Watson break; 82785d94372SRobert Watson case TT_REXMT: 828e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_rexmt; 82985d94372SRobert Watson f_callout = tcp_timer_rexmt; 83085d94372SRobert Watson break; 83185d94372SRobert Watson case TT_PERSIST: 832e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_persist; 83385d94372SRobert Watson f_callout = tcp_timer_persist; 83485d94372SRobert Watson break; 83585d94372SRobert Watson case TT_KEEP: 836e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_keep; 83785d94372SRobert Watson f_callout = tcp_timer_keep; 83885d94372SRobert Watson break; 83985d94372SRobert Watson case TT_2MSL: 840e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_2msl; 84185d94372SRobert Watson f_callout = tcp_timer_2msl; 84285d94372SRobert Watson break; 84385d94372SRobert Watson default: 84403374917SJulien Charbon panic("tp %p bad timer_type %#x", tp, timer_type); 84585d94372SRobert Watson } 84685d94372SRobert Watson if (delta == 0) { 847*5571f9cfSJulien Charbon if ((tp->t_timers->tt_flags & timer_type) && 848*5571f9cfSJulien Charbon callout_stop(t_callout)) { 849*5571f9cfSJulien Charbon tp->t_timers->tt_flags &= ~timer_type; 850*5571f9cfSJulien Charbon } 85185d94372SRobert Watson } else { 852*5571f9cfSJulien Charbon if ((tp->t_timers->tt_flags & timer_type) == 0) { 853*5571f9cfSJulien Charbon tp->t_timers->tt_flags |= timer_type; 85487aedea4SKip Macy callout_reset_on(t_callout, delta, f_callout, tp, cpu); 855*5571f9cfSJulien Charbon } else { 856*5571f9cfSJulien Charbon /* Reset already running callout on the same CPU. */ 857*5571f9cfSJulien Charbon callout_reset(t_callout, delta, f_callout, tp); 858*5571f9cfSJulien Charbon } 85985d94372SRobert Watson } 86085d94372SRobert Watson } 86185d94372SRobert Watson 86285d94372SRobert Watson int 863*5571f9cfSJulien Charbon tcp_timer_active(struct tcpcb *tp, uint32_t timer_type) 86485d94372SRobert Watson { 86585d94372SRobert Watson struct callout *t_callout; 86685d94372SRobert Watson 86785d94372SRobert Watson switch (timer_type) { 86885d94372SRobert Watson case TT_DELACK: 869e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_delack; 87085d94372SRobert Watson break; 87185d94372SRobert Watson case TT_REXMT: 872e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_rexmt; 87385d94372SRobert Watson break; 87485d94372SRobert Watson case TT_PERSIST: 875e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_persist; 87685d94372SRobert Watson break; 87785d94372SRobert Watson case TT_KEEP: 878e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_keep; 87985d94372SRobert Watson break; 88085d94372SRobert Watson case TT_2MSL: 881e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_2msl; 88285d94372SRobert Watson break; 88385d94372SRobert Watson default: 88403374917SJulien Charbon panic("tp %p bad timer_type %#x", tp, timer_type); 88585d94372SRobert Watson } 88685d94372SRobert Watson return callout_active(t_callout); 887df8bae1dSRodney W. Grimes } 888b8614722SMike Silbersack 889*5571f9cfSJulien Charbon void 890*5571f9cfSJulien Charbon tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type) 891*5571f9cfSJulien Charbon { 892*5571f9cfSJulien Charbon struct callout *t_callout; 893*5571f9cfSJulien Charbon timeout_t *f_callout; 894*5571f9cfSJulien Charbon 895*5571f9cfSJulien Charbon tp->t_timers->tt_flags |= TT_STOPPED; 896*5571f9cfSJulien Charbon 897*5571f9cfSJulien Charbon switch (timer_type) { 898*5571f9cfSJulien Charbon case TT_DELACK: 899*5571f9cfSJulien Charbon t_callout = &tp->t_timers->tt_delack; 900*5571f9cfSJulien Charbon f_callout = tcp_timer_delack_discard; 901*5571f9cfSJulien Charbon break; 902*5571f9cfSJulien Charbon case TT_REXMT: 903*5571f9cfSJulien Charbon t_callout = &tp->t_timers->tt_rexmt; 904*5571f9cfSJulien Charbon f_callout = tcp_timer_rexmt_discard; 905*5571f9cfSJulien Charbon break; 906*5571f9cfSJulien Charbon case TT_PERSIST: 907*5571f9cfSJulien Charbon t_callout = &tp->t_timers->tt_persist; 908*5571f9cfSJulien Charbon f_callout = tcp_timer_persist_discard; 909*5571f9cfSJulien Charbon break; 910*5571f9cfSJulien Charbon case TT_KEEP: 911*5571f9cfSJulien Charbon t_callout = &tp->t_timers->tt_keep; 912*5571f9cfSJulien Charbon f_callout = tcp_timer_keep_discard; 913*5571f9cfSJulien Charbon break; 914*5571f9cfSJulien Charbon case TT_2MSL: 915*5571f9cfSJulien Charbon t_callout = &tp->t_timers->tt_2msl; 916*5571f9cfSJulien Charbon f_callout = tcp_timer_2msl_discard; 917*5571f9cfSJulien Charbon break; 918*5571f9cfSJulien Charbon default: 919*5571f9cfSJulien Charbon panic("tp %p bad timer_type %#x", tp, timer_type); 920*5571f9cfSJulien Charbon } 921*5571f9cfSJulien Charbon 922*5571f9cfSJulien Charbon if (tp->t_timers->tt_flags & timer_type) { 923*5571f9cfSJulien Charbon if (callout_stop(t_callout)) { 924*5571f9cfSJulien Charbon tp->t_timers->tt_flags &= ~timer_type; 925*5571f9cfSJulien Charbon } else { 926*5571f9cfSJulien Charbon /* 927*5571f9cfSJulien Charbon * Can't stop the callout, defer tcpcb actual deletion 928*5571f9cfSJulien Charbon * to the last tcp timer discard callout. 929*5571f9cfSJulien Charbon * The TT_STOPPED flag will ensure that no tcp timer 930*5571f9cfSJulien Charbon * callouts can be restarted on our behalf, and 931*5571f9cfSJulien Charbon * past this point currently running callouts waiting 932*5571f9cfSJulien Charbon * on inp lock will return right away after the 933*5571f9cfSJulien Charbon * classical check for callout reset/stop events: 934*5571f9cfSJulien Charbon * callout_pending() || !callout_active() 935*5571f9cfSJulien Charbon */ 936*5571f9cfSJulien Charbon callout_reset(t_callout, 1, f_callout, tp); 937*5571f9cfSJulien Charbon } 938*5571f9cfSJulien Charbon } 939*5571f9cfSJulien Charbon } 940*5571f9cfSJulien Charbon 941b8614722SMike Silbersack #define ticks_to_msecs(t) (1000*(t) / hz) 942b8614722SMike Silbersack 943b8614722SMike Silbersack void 9445b999a6bSDavide Italiano tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, 9455b999a6bSDavide Italiano struct xtcp_timer *xtimer) 946b8614722SMike Silbersack { 9475b999a6bSDavide Italiano sbintime_t now; 9485b999a6bSDavide Italiano 9495b999a6bSDavide Italiano bzero(xtimer, sizeof(*xtimer)); 950b8614722SMike Silbersack if (timer == NULL) 951b8614722SMike Silbersack return; 9525b999a6bSDavide Italiano now = getsbinuptime(); 953b8614722SMike Silbersack if (callout_active(&timer->tt_delack)) 9545b999a6bSDavide Italiano xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS; 955b8614722SMike Silbersack if (callout_active(&timer->tt_rexmt)) 9565b999a6bSDavide Italiano xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS; 957b8614722SMike Silbersack if (callout_active(&timer->tt_persist)) 9585b999a6bSDavide Italiano xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS; 959b8614722SMike Silbersack if (callout_active(&timer->tt_keep)) 9605b999a6bSDavide Italiano xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS; 961b8614722SMike Silbersack if (callout_active(&timer->tt_2msl)) 9625b999a6bSDavide Italiano xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS; 963b8614722SMike Silbersack xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); 964b8614722SMike Silbersack } 965