1c398230bSWarner Losh /*- 2e79adb8eSGarrett Wollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29e79adb8eSGarrett Wollman * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30df8bae1dSRodney W. Grimes */ 31df8bae1dSRodney W. Grimes 324b421e2dSMike Silbersack #include <sys/cdefs.h> 334b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 344b421e2dSMike Silbersack 35825fd1e4SNavdeep Parhar #include "opt_inet.h" 36fb59c426SYoshinobu Inoue #include "opt_inet6.h" 370cc12cc5SJoerg Wunsch #include "opt_tcpdebug.h" 38883831c6SAdrian Chadd #include "opt_rss.h" 390cc12cc5SJoerg Wunsch 40df8bae1dSRodney W. Grimes #include <sys/param.h> 4198163b98SPoul-Henning Kamp #include <sys/kernel.h> 42c74af4faSBruce Evans #include <sys/lock.h> 4308517d53SMike Silbersack #include <sys/mbuf.h> 44c74af4faSBruce Evans #include <sys/mutex.h> 45c74af4faSBruce Evans #include <sys/protosw.h> 4687aedea4SKip Macy #include <sys/smp.h> 47df8bae1dSRodney W. Grimes #include <sys/socket.h> 48df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 49c74af4faSBruce Evans #include <sys/sysctl.h> 50c74af4faSBruce Evans #include <sys/systm.h> 51e79adb8eSGarrett Wollman 524b79449eSBjoern A. Zeeb #include <net/if.h> 53df8bae1dSRodney W. Grimes #include <net/route.h> 54530c0060SRobert Watson #include <net/vnet.h> 55883831c6SAdrian Chadd #include <net/netisr.h> 56df8bae1dSRodney W. Grimes 57dbc42409SLawrence Stewart #include <netinet/cc.h> 58df8bae1dSRodney W. Grimes #include <netinet/in.h> 59df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 60883831c6SAdrian Chadd #include <netinet/in_rss.h> 61c74af4faSBruce Evans #include <netinet/in_systm.h> 62fb59c426SYoshinobu Inoue #ifdef INET6 63fb59c426SYoshinobu Inoue #include <netinet6/in6_pcb.h> 64fb59c426SYoshinobu Inoue #endif 65df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 66df8bae1dSRodney W. Grimes #include <netinet/tcp_fsm.h> 67df8bae1dSRodney W. Grimes #include <netinet/tcp_timer.h> 68df8bae1dSRodney W. Grimes #include <netinet/tcp_var.h> 69f6f6703fSSean Bruno #ifdef INET6 70f6f6703fSSean Bruno #include <netinet6/tcp6_var.h> 71f6f6703fSSean Bruno #endif 72df8bae1dSRodney W. Grimes #include <netinet/tcpip.h> 73af7a2999SDavid Greenman #ifdef TCPDEBUG 74af7a2999SDavid Greenman #include <netinet/tcp_debug.h> 75af7a2999SDavid Greenman #endif 76df8bae1dSRodney W. Grimes 779b8b58e0SJonathan Lemon int tcp_keepinit; 78ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 7941698ebfSTom Rhodes &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection"); 807b40aa32SPaul Traina 819b8b58e0SJonathan Lemon int tcp_keepidle; 82ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 8341698ebfSTom Rhodes &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin"); 8498163b98SPoul-Henning Kamp 859b8b58e0SJonathan Lemon int tcp_keepintvl; 86ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 8741698ebfSTom Rhodes &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes"); 8898163b98SPoul-Henning Kamp 899b8b58e0SJonathan Lemon int tcp_delacktime; 906489fe65SAndre Oppermann SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 916489fe65SAndre Oppermann &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 92ccb4d0c6SJonathan Lemon "Time before a delayed ACK is sent"); 939b8b58e0SJonathan Lemon 949b8b58e0SJonathan Lemon int tcp_msl; 95ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 96ccb4d0c6SJonathan Lemon &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 979b8b58e0SJonathan Lemon 98701bec5aSMatthew Dillon int tcp_rexmit_min; 99701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 1006489fe65SAndre Oppermann &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 1016489fe65SAndre Oppermann "Minimum Retransmission Timeout"); 102701bec5aSMatthew Dillon 103701bec5aSMatthew Dillon int tcp_rexmit_slop; 104701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 1056489fe65SAndre Oppermann &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 1066489fe65SAndre Oppermann "Retransmission Timer Slop"); 107701bec5aSMatthew Dillon 108c39a614eSRobert Watson static int always_keepalive = 1; 1093d177f46SBill Fumerola SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 1103d177f46SBill Fumerola &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 11134be9bf3SPoul-Henning Kamp 1127c72af87SMohan Srinivasan int tcp_fast_finwait2_recycle = 0; 1137c72af87SMohan Srinivasan SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 1146489fe65SAndre Oppermann &tcp_fast_finwait2_recycle, 0, 1156489fe65SAndre Oppermann "Recycle closed FIN_WAIT_2 connections faster"); 1167c72af87SMohan Srinivasan 1177c72af87SMohan Srinivasan int tcp_finwait2_timeout; 1187c72af87SMohan Srinivasan SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 1196489fe65SAndre Oppermann &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 1207c72af87SMohan Srinivasan 1219077f387SGleb Smirnoff int tcp_keepcnt = TCPTV_KEEPCNT; 1229077f387SGleb Smirnoff SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0, 1239077f387SGleb Smirnoff "Number of keepalive probes to send"); 1247c72af87SMohan Srinivasan 1250312fbe9SPoul-Henning Kamp /* max idle probes */ 1269b8b58e0SJonathan Lemon int tcp_maxpersistidle; 127e79adb8eSGarrett Wollman 1286c0ef895SJohn Baldwin static int tcp_rexmit_drop_options = 0; 1296c0ef895SJohn Baldwin SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW, 1306c0ef895SJohn Baldwin &tcp_rexmit_drop_options, 0, 1316c0ef895SJohn Baldwin "Drop TCP options from 3rd and later retransmitted SYN"); 1326c0ef895SJohn Baldwin 133f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_detect); 134f6f6703fSSean Bruno #define V_tcp_pmtud_blackhole_detect VNET(tcp_pmtud_blackhole_detect) 135f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection, 136f6f6703fSSean Bruno CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_VNET, 137f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_detect), 0, 138f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection Enabled"); 139f6f6703fSSean Bruno 140f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_activated); 141f6f6703fSSean Bruno #define V_tcp_pmtud_blackhole_activated \ 142f6f6703fSSean Bruno VNET(tcp_pmtud_blackhole_activated) 143f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated, 144f6f6703fSSean Bruno CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_VNET, 145f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_activated), 0, 146f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection, Activation Count"); 147f6f6703fSSean Bruno 148f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss); 149f6f6703fSSean Bruno #define V_tcp_pmtud_blackhole_activated_min_mss \ 150f6f6703fSSean Bruno VNET(tcp_pmtud_blackhole_activated_min_mss) 151f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss, 152f6f6703fSSean Bruno CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_VNET, 153f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0, 154f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection, Activation Count at min MSS"); 155f6f6703fSSean Bruno 156f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_failed); 157f6f6703fSSean Bruno #define V_tcp_pmtud_blackhole_failed VNET(tcp_pmtud_blackhole_failed) 158f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed, 159f6f6703fSSean Bruno CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_VNET, 160f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_failed), 0, 161f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection, Failure Count"); 162f6f6703fSSean Bruno 163f6f6703fSSean Bruno #ifdef INET 164f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200; 165f6f6703fSSean Bruno #define V_tcp_pmtud_blackhole_mss VNET(tcp_pmtud_blackhole_mss) 166f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, 167f6f6703fSSean Bruno CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_VNET, 168f6f6703fSSean Bruno &VNET_NAME(tcp_pmtud_blackhole_mss), 0, 169f6f6703fSSean Bruno "Path MTU Discovery Black Hole Detection lowered MSS"); 170f6f6703fSSean Bruno #endif 171f6f6703fSSean Bruno 172f6f6703fSSean Bruno #ifdef INET6 173f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220; 174f6f6703fSSean Bruno #define V_tcp_v6pmtud_blackhole_mss VNET(tcp_v6pmtud_blackhole_mss) 175f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss, 176f6f6703fSSean Bruno CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_VNET, 177f6f6703fSSean Bruno &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0, 178f6f6703fSSean Bruno "Path MTU Discovery IPv6 Black Hole Detection lowered MSS"); 179f6f6703fSSean Bruno #endif 180f6f6703fSSean Bruno 1818f7e75cbSAdrian Chadd #ifdef RSS 1828f7e75cbSAdrian Chadd static int per_cpu_timers = 1; 1838f7e75cbSAdrian Chadd #else 18487aedea4SKip Macy static int per_cpu_timers = 0; 1858f7e75cbSAdrian Chadd #endif 18687aedea4SKip Macy SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, 18787aedea4SKip Macy &per_cpu_timers , 0, "run tcp timers on all cpus"); 18887aedea4SKip Macy 189883831c6SAdrian Chadd #if 0 19087aedea4SKip Macy #define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \ 19187aedea4SKip Macy ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0) 192883831c6SAdrian Chadd #endif 193883831c6SAdrian Chadd 194883831c6SAdrian Chadd /* 195883831c6SAdrian Chadd * Map the given inp to a CPU id. 196883831c6SAdrian Chadd * 197883831c6SAdrian Chadd * This queries RSS if it's compiled in, else it defaults to the current 198883831c6SAdrian Chadd * CPU ID. 199883831c6SAdrian Chadd */ 200883831c6SAdrian Chadd static inline int 201883831c6SAdrian Chadd inp_to_cpuid(struct inpcb *inp) 202883831c6SAdrian Chadd { 203883831c6SAdrian Chadd u_int cpuid; 204883831c6SAdrian Chadd 205883831c6SAdrian Chadd #ifdef RSS 206883831c6SAdrian Chadd if (per_cpu_timers) { 207883831c6SAdrian Chadd cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); 208883831c6SAdrian Chadd if (cpuid == NETISR_CPUID_NONE) 209883831c6SAdrian Chadd return (curcpu); /* XXX */ 210883831c6SAdrian Chadd else 211883831c6SAdrian Chadd return (cpuid); 212883831c6SAdrian Chadd } 213883831c6SAdrian Chadd #else 214883831c6SAdrian Chadd /* Legacy, pre-RSS behaviour */ 215883831c6SAdrian Chadd if (per_cpu_timers) { 216883831c6SAdrian Chadd /* 217883831c6SAdrian Chadd * We don't have a flowid -> cpuid mapping, so cheat and 218883831c6SAdrian Chadd * just map unknown cpuids to curcpu. Not the best, but 219883831c6SAdrian Chadd * apparently better than defaulting to swi 0. 220883831c6SAdrian Chadd */ 221883831c6SAdrian Chadd cpuid = inp->inp_flowid % (mp_maxid + 1); 222883831c6SAdrian Chadd if (! CPU_ABSENT(cpuid)) 223883831c6SAdrian Chadd return (cpuid); 224883831c6SAdrian Chadd return (curcpu); 225883831c6SAdrian Chadd } 226883831c6SAdrian Chadd #endif 227883831c6SAdrian Chadd /* Default for RSS and non-RSS - cpuid 0 */ 228883831c6SAdrian Chadd else { 229883831c6SAdrian Chadd return (0); 230883831c6SAdrian Chadd } 231883831c6SAdrian Chadd } 23287aedea4SKip Macy 233df8bae1dSRodney W. Grimes /* 234df8bae1dSRodney W. Grimes * Tcp protocol timeout routine called every 500 ms. 2359b8b58e0SJonathan Lemon * Updates timestamps used for TCP 236df8bae1dSRodney W. Grimes * causes finite state machine actions if timers expire. 237df8bae1dSRodney W. Grimes */ 238df8bae1dSRodney W. Grimes void 239e2f2059fSMike Silbersack tcp_slowtimo(void) 240df8bae1dSRodney W. Grimes { 2418b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 24215bd2b43SDavid Greenman 2435ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 2448b615593SMarko Zec VNET_FOREACH(vnet_iter) { 2458b615593SMarko Zec CURVNET_SET(vnet_iter); 24666eefb1eSJohn Baldwin tcp_tw_2msl_scan(); 2478b615593SMarko Zec CURVNET_RESTORE(); 2488b615593SMarko Zec } 2495ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 250df8bae1dSRodney W. Grimes } 251df8bae1dSRodney W. Grimes 2527d42e30cSJonathan Lemon int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 2537d42e30cSJonathan Lemon { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 2547d42e30cSJonathan Lemon 255df8bae1dSRodney W. Grimes int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 256f058535dSJeffrey Hsu { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 257df8bae1dSRodney W. Grimes 258f058535dSJeffrey Hsu static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 259e79adb8eSGarrett Wollman 260623dce13SRobert Watson static int tcp_timer_race; 261623dce13SRobert Watson SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 262623dce13SRobert Watson 0, "Count of t_inpcb races on tcp_discardcb"); 263623dce13SRobert Watson 264df8bae1dSRodney W. Grimes /* 265df8bae1dSRodney W. Grimes * TCP timer processing. 266df8bae1dSRodney W. Grimes */ 26785d94372SRobert Watson 26885d94372SRobert Watson void 26985d94372SRobert Watson tcp_timer_delack(void *xtp) 270df8bae1dSRodney W. Grimes { 27185d94372SRobert Watson struct tcpcb *tp = xtp; 27285d94372SRobert Watson struct inpcb *inp; 2738b615593SMarko Zec CURVNET_SET(tp->t_vnet); 27485d94372SRobert Watson 27585d94372SRobert Watson inp = tp->t_inpcb; 27685d94372SRobert Watson /* 27785d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 27885d94372SRobert Watson * tear-down mean we need it as a work-around for races between 27985d94372SRobert Watson * timers and tcp_discardcb(). 28085d94372SRobert Watson * 28185d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL")); 28285d94372SRobert Watson */ 28385d94372SRobert Watson if (inp == NULL) { 28485d94372SRobert Watson tcp_timer_race++; 2858b615593SMarko Zec CURVNET_RESTORE(); 28685d94372SRobert Watson return; 28785d94372SRobert Watson } 2888501a69cSRobert Watson INP_WLOCK(inp); 289655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_delack) || 290655f934bSMikolaj Golub !callout_active(&tp->t_timers->tt_delack)) { 2918501a69cSRobert Watson INP_WUNLOCK(inp); 2928b615593SMarko Zec CURVNET_RESTORE(); 29385d94372SRobert Watson return; 29485d94372SRobert Watson } 295e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_delack); 296655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 297655f934bSMikolaj Golub INP_WUNLOCK(inp); 298655f934bSMikolaj Golub CURVNET_RESTORE(); 299655f934bSMikolaj Golub return; 300655f934bSMikolaj Golub } 301df8bae1dSRodney W. Grimes 3029b8b58e0SJonathan Lemon tp->t_flags |= TF_ACKNOW; 30378b50714SRobert Watson TCPSTAT_INC(tcps_delack); 3049b8b58e0SJonathan Lemon (void) tcp_output(tp); 3058501a69cSRobert Watson INP_WUNLOCK(inp); 3068b615593SMarko Zec CURVNET_RESTORE(); 3079b8b58e0SJonathan Lemon } 3089b8b58e0SJonathan Lemon 30985d94372SRobert Watson void 31085d94372SRobert Watson tcp_timer_2msl(void *xtp) 3119b8b58e0SJonathan Lemon { 31285d94372SRobert Watson struct tcpcb *tp = xtp; 31385d94372SRobert Watson struct inpcb *inp; 3148b615593SMarko Zec CURVNET_SET(tp->t_vnet); 3159b8b58e0SJonathan Lemon #ifdef TCPDEBUG 3169b8b58e0SJonathan Lemon int ostate; 3179b8b58e0SJonathan Lemon 3189b8b58e0SJonathan Lemon ostate = tp->t_state; 3199b8b58e0SJonathan Lemon #endif 320623dce13SRobert Watson /* 32185d94372SRobert Watson * XXXRW: Does this actually happen? 32285d94372SRobert Watson */ 323603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_tcbinfo); 32485d94372SRobert Watson inp = tp->t_inpcb; 32585d94372SRobert Watson /* 32685d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 32785d94372SRobert Watson * tear-down mean we need it as a work-around for races between 32885d94372SRobert Watson * timers and tcp_discardcb(). 32985d94372SRobert Watson * 33085d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL")); 33185d94372SRobert Watson */ 33285d94372SRobert Watson if (inp == NULL) { 33385d94372SRobert Watson tcp_timer_race++; 334603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 3358b615593SMarko Zec CURVNET_RESTORE(); 33685d94372SRobert Watson return; 33785d94372SRobert Watson } 3388501a69cSRobert Watson INP_WLOCK(inp); 33985d94372SRobert Watson tcp_free_sackholes(tp); 340655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_2msl) || 341e2f2059fSMike Silbersack !callout_active(&tp->t_timers->tt_2msl)) { 3428501a69cSRobert Watson INP_WUNLOCK(tp->t_inpcb); 343603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 3448b615593SMarko Zec CURVNET_RESTORE(); 34585d94372SRobert Watson return; 34685d94372SRobert Watson } 347e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_2msl); 348655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 349655f934bSMikolaj Golub INP_WUNLOCK(inp); 350655f934bSMikolaj Golub INP_INFO_WUNLOCK(&V_tcbinfo); 351655f934bSMikolaj Golub CURVNET_RESTORE(); 352655f934bSMikolaj Golub return; 353655f934bSMikolaj Golub } 35485d94372SRobert Watson /* 355df8bae1dSRodney W. Grimes * 2 MSL timeout in shutdown went off. If we're closed but 356df8bae1dSRodney W. Grimes * still waiting for peer to close and connection has been idle 357df8bae1dSRodney W. Grimes * too long, or if 2MSL time is up from TIME_WAIT, delete connection 358df8bae1dSRodney W. Grimes * control block. Otherwise, check again in a bit. 3597c72af87SMohan Srinivasan * 3607c72af87SMohan Srinivasan * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 3617c72af87SMohan Srinivasan * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 3627c72af87SMohan Srinivasan * Ignore fact that there were recent incoming segments. 363df8bae1dSRodney W. Grimes */ 3647c72af87SMohan Srinivasan if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 36585d94372SRobert Watson tp->t_inpcb && tp->t_inpcb->inp_socket && 3667c72af87SMohan Srinivasan (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 36778b50714SRobert Watson TCPSTAT_INC(tcps_finwait2_drops); 36885d94372SRobert Watson tp = tcp_close(tp); 3697c72af87SMohan Srinivasan } else { 370df8bae1dSRodney W. Grimes if (tp->t_state != TCPS_TIME_WAIT && 3719077f387SGleb Smirnoff ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) 3729077f387SGleb Smirnoff callout_reset_on(&tp->t_timers->tt_2msl, 373883831c6SAdrian Chadd TP_KEEPINTVL(tp), tcp_timer_2msl, tp, 374883831c6SAdrian Chadd inp_to_cpuid(inp)); 375df8bae1dSRodney W. Grimes else 37685d94372SRobert Watson tp = tcp_close(tp); 3777c72af87SMohan Srinivasan } 378df8bae1dSRodney W. Grimes 3799b8b58e0SJonathan Lemon #ifdef TCPDEBUG 380586b4a0eSKonstantin Belousov if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 381fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 3829b8b58e0SJonathan Lemon PRU_SLOWTIMO); 3839b8b58e0SJonathan Lemon #endif 38485d94372SRobert Watson if (tp != NULL) 3858501a69cSRobert Watson INP_WUNLOCK(inp); 386603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 3878b615593SMarko Zec CURVNET_RESTORE(); 3889b8b58e0SJonathan Lemon } 3899b8b58e0SJonathan Lemon 39085d94372SRobert Watson void 39185d94372SRobert Watson tcp_timer_keep(void *xtp) 3929b8b58e0SJonathan Lemon { 39385d94372SRobert Watson struct tcpcb *tp = xtp; 39408517d53SMike Silbersack struct tcptemp *t_template; 39585d94372SRobert Watson struct inpcb *inp; 3968b615593SMarko Zec CURVNET_SET(tp->t_vnet); 3979b8b58e0SJonathan Lemon #ifdef TCPDEBUG 3989b8b58e0SJonathan Lemon int ostate; 3999b8b58e0SJonathan Lemon 4009b8b58e0SJonathan Lemon ostate = tp->t_state; 4019b8b58e0SJonathan Lemon #endif 402603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_tcbinfo); 40385d94372SRobert Watson inp = tp->t_inpcb; 40485d94372SRobert Watson /* 40585d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 40685d94372SRobert Watson * tear-down mean we need it as a work-around for races between 40785d94372SRobert Watson * timers and tcp_discardcb(). 40885d94372SRobert Watson * 40985d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL")); 41085d94372SRobert Watson */ 41185d94372SRobert Watson if (inp == NULL) { 41285d94372SRobert Watson tcp_timer_race++; 413603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 4148b615593SMarko Zec CURVNET_RESTORE(); 41585d94372SRobert Watson return; 41685d94372SRobert Watson } 4178501a69cSRobert Watson INP_WLOCK(inp); 418655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_keep) || 419655f934bSMikolaj Golub !callout_active(&tp->t_timers->tt_keep)) { 4208501a69cSRobert Watson INP_WUNLOCK(inp); 421603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 4228b615593SMarko Zec CURVNET_RESTORE(); 42385d94372SRobert Watson return; 42485d94372SRobert Watson } 425e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_keep); 426655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 427655f934bSMikolaj Golub INP_WUNLOCK(inp); 428655f934bSMikolaj Golub INP_INFO_WUNLOCK(&V_tcbinfo); 429655f934bSMikolaj Golub CURVNET_RESTORE(); 430655f934bSMikolaj Golub return; 431655f934bSMikolaj Golub } 4329b8b58e0SJonathan Lemon /* 4339b8b58e0SJonathan Lemon * Keep-alive timer went off; send something 4349b8b58e0SJonathan Lemon * or drop connection if idle for too long. 4359b8b58e0SJonathan Lemon */ 43678b50714SRobert Watson TCPSTAT_INC(tcps_keeptimeo); 4379b8b58e0SJonathan Lemon if (tp->t_state < TCPS_ESTABLISHED) 4389b8b58e0SJonathan Lemon goto dropit; 4392a074620SSam Leffler if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 4409b8b58e0SJonathan Lemon tp->t_state <= TCPS_CLOSING) { 4419077f387SGleb Smirnoff if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) 4429b8b58e0SJonathan Lemon goto dropit; 4439b8b58e0SJonathan Lemon /* 4449b8b58e0SJonathan Lemon * Send a packet designed to force a response 4459b8b58e0SJonathan Lemon * if the peer is up and reachable: 4469b8b58e0SJonathan Lemon * either an ACK if the connection is still alive, 4479b8b58e0SJonathan Lemon * or an RST if the peer has closed the connection 4489b8b58e0SJonathan Lemon * due to timeout or reboot. 4499b8b58e0SJonathan Lemon * Using sequence number tp->snd_una-1 4509b8b58e0SJonathan Lemon * causes the transmitted zero-length segment 4519b8b58e0SJonathan Lemon * to lie outside the receive window; 4529b8b58e0SJonathan Lemon * by the protocol spec, this requires the 4539b8b58e0SJonathan Lemon * correspondent TCP to respond. 4549b8b58e0SJonathan Lemon */ 45578b50714SRobert Watson TCPSTAT_INC(tcps_keepprobe); 45679909384SJonathan Lemon t_template = tcpip_maketemplate(inp); 45708517d53SMike Silbersack if (t_template) { 45808517d53SMike Silbersack tcp_respond(tp, t_template->tt_ipgen, 45908517d53SMike Silbersack &t_template->tt_t, (struct mbuf *)NULL, 4609b8b58e0SJonathan Lemon tp->rcv_nxt, tp->snd_una - 1, 0); 46153640b0eSRobert Watson free(t_template, M_TEMP); 46208517d53SMike Silbersack } 4639077f387SGleb Smirnoff callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), 464883831c6SAdrian Chadd tcp_timer_keep, tp, inp_to_cpuid(inp)); 4654cc20ab1SSeigo Tanimura } else 4669077f387SGleb Smirnoff callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), 467883831c6SAdrian Chadd tcp_timer_keep, tp, inp_to_cpuid(inp)); 4689b8b58e0SJonathan Lemon 4699b8b58e0SJonathan Lemon #ifdef TCPDEBUG 4702a074620SSam Leffler if (inp->inp_socket->so_options & SO_DEBUG) 471fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 4729b8b58e0SJonathan Lemon PRU_SLOWTIMO); 4739b8b58e0SJonathan Lemon #endif 4748501a69cSRobert Watson INP_WUNLOCK(inp); 475603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 4768b615593SMarko Zec CURVNET_RESTORE(); 47785d94372SRobert Watson return; 4789b8b58e0SJonathan Lemon 4799b8b58e0SJonathan Lemon dropit: 48078b50714SRobert Watson TCPSTAT_INC(tcps_keepdrops); 48185d94372SRobert Watson tp = tcp_drop(tp, ETIMEDOUT); 48285d94372SRobert Watson 48385d94372SRobert Watson #ifdef TCPDEBUG 48485d94372SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 48585d94372SRobert Watson tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 48685d94372SRobert Watson PRU_SLOWTIMO); 48785d94372SRobert Watson #endif 48885d94372SRobert Watson if (tp != NULL) 4898501a69cSRobert Watson INP_WUNLOCK(tp->t_inpcb); 490603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 4918b615593SMarko Zec CURVNET_RESTORE(); 4929b8b58e0SJonathan Lemon } 4939b8b58e0SJonathan Lemon 49485d94372SRobert Watson void 49585d94372SRobert Watson tcp_timer_persist(void *xtp) 4969b8b58e0SJonathan Lemon { 49785d94372SRobert Watson struct tcpcb *tp = xtp; 49885d94372SRobert Watson struct inpcb *inp; 4998b615593SMarko Zec CURVNET_SET(tp->t_vnet); 5009b8b58e0SJonathan Lemon #ifdef TCPDEBUG 5019b8b58e0SJonathan Lemon int ostate; 5029b8b58e0SJonathan Lemon 5039b8b58e0SJonathan Lemon ostate = tp->t_state; 5049b8b58e0SJonathan Lemon #endif 505603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_tcbinfo); 50685d94372SRobert Watson inp = tp->t_inpcb; 50785d94372SRobert Watson /* 50885d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 50985d94372SRobert Watson * tear-down mean we need it as a work-around for races between 51085d94372SRobert Watson * timers and tcp_discardcb(). 51185d94372SRobert Watson * 51285d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL")); 51385d94372SRobert Watson */ 51485d94372SRobert Watson if (inp == NULL) { 51585d94372SRobert Watson tcp_timer_race++; 516603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 5178b615593SMarko Zec CURVNET_RESTORE(); 51885d94372SRobert Watson return; 51985d94372SRobert Watson } 5208501a69cSRobert Watson INP_WLOCK(inp); 521655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_persist) || 522655f934bSMikolaj Golub !callout_active(&tp->t_timers->tt_persist)) { 5238501a69cSRobert Watson INP_WUNLOCK(inp); 524603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 5258b615593SMarko Zec CURVNET_RESTORE(); 52685d94372SRobert Watson return; 52785d94372SRobert Watson } 528e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_persist); 529655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 530655f934bSMikolaj Golub INP_WUNLOCK(inp); 531655f934bSMikolaj Golub INP_INFO_WUNLOCK(&V_tcbinfo); 532655f934bSMikolaj Golub CURVNET_RESTORE(); 533655f934bSMikolaj Golub return; 534655f934bSMikolaj Golub } 5359b8b58e0SJonathan Lemon /* 5369b8b58e0SJonathan Lemon * Persistance timer into zero window. 5379b8b58e0SJonathan Lemon * Force a byte to be output, if possible. 5389b8b58e0SJonathan Lemon */ 53978b50714SRobert Watson TCPSTAT_INC(tcps_persisttimeo); 5409b8b58e0SJonathan Lemon /* 5419b8b58e0SJonathan Lemon * Hack: if the peer is dead/unreachable, we do not 5429b8b58e0SJonathan Lemon * time out if the window is closed. After a full 5439b8b58e0SJonathan Lemon * backoff, drop the connection if the idle time 5449b8b58e0SJonathan Lemon * (no responses to probes) reaches the maximum 5459b8b58e0SJonathan Lemon * backoff that we would use if retransmitting. 5469b8b58e0SJonathan Lemon */ 5479b8b58e0SJonathan Lemon if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 5486b0c5521SJohn Baldwin (ticks - tp->t_rcvtime >= tcp_maxpersistidle || 5496b0c5521SJohn Baldwin ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 55078b50714SRobert Watson TCPSTAT_INC(tcps_persistdrop); 55185d94372SRobert Watson tp = tcp_drop(tp, ETIMEDOUT); 55285d94372SRobert Watson goto out; 5539b8b58e0SJonathan Lemon } 554322181c9SAndre Oppermann /* 555322181c9SAndre Oppermann * If the user has closed the socket then drop a persisting 556322181c9SAndre Oppermann * connection after a much reduced timeout. 557322181c9SAndre Oppermann */ 558322181c9SAndre Oppermann if (tp->t_state > TCPS_CLOSE_WAIT && 559322181c9SAndre Oppermann (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { 560322181c9SAndre Oppermann TCPSTAT_INC(tcps_persistdrop); 561322181c9SAndre Oppermann tp = tcp_drop(tp, ETIMEDOUT); 562322181c9SAndre Oppermann goto out; 563322181c9SAndre Oppermann } 5649b8b58e0SJonathan Lemon tcp_setpersist(tp); 5652cdbfa66SPaul Saab tp->t_flags |= TF_FORCEDATA; 5669b8b58e0SJonathan Lemon (void) tcp_output(tp); 5672cdbfa66SPaul Saab tp->t_flags &= ~TF_FORCEDATA; 5689b8b58e0SJonathan Lemon 56985d94372SRobert Watson out: 5709b8b58e0SJonathan Lemon #ifdef TCPDEBUG 571ffb761f6SGleb Smirnoff if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 572ffb761f6SGleb Smirnoff tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 5739b8b58e0SJonathan Lemon #endif 57485d94372SRobert Watson if (tp != NULL) 5758501a69cSRobert Watson INP_WUNLOCK(inp); 576603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 5778b615593SMarko Zec CURVNET_RESTORE(); 5789b8b58e0SJonathan Lemon } 5799b8b58e0SJonathan Lemon 58085d94372SRobert Watson void 58185d94372SRobert Watson tcp_timer_rexmt(void * xtp) 5829b8b58e0SJonathan Lemon { 58385d94372SRobert Watson struct tcpcb *tp = xtp; 5848b615593SMarko Zec CURVNET_SET(tp->t_vnet); 5859b8b58e0SJonathan Lemon int rexmt; 58685d94372SRobert Watson int headlocked; 58785d94372SRobert Watson struct inpcb *inp; 5889b8b58e0SJonathan Lemon #ifdef TCPDEBUG 5899b8b58e0SJonathan Lemon int ostate; 5909b8b58e0SJonathan Lemon 5919b8b58e0SJonathan Lemon ostate = tp->t_state; 5929b8b58e0SJonathan Lemon #endif 593f6f6703fSSean Bruno 59487aedea4SKip Macy INP_INFO_RLOCK(&V_tcbinfo); 59585d94372SRobert Watson inp = tp->t_inpcb; 59685d94372SRobert Watson /* 59785d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 59885d94372SRobert Watson * tear-down mean we need it as a work-around for races between 59985d94372SRobert Watson * timers and tcp_discardcb(). 60085d94372SRobert Watson * 60185d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL")); 60285d94372SRobert Watson */ 60385d94372SRobert Watson if (inp == NULL) { 60485d94372SRobert Watson tcp_timer_race++; 60587aedea4SKip Macy INP_INFO_RUNLOCK(&V_tcbinfo); 6068b615593SMarko Zec CURVNET_RESTORE(); 60785d94372SRobert Watson return; 60885d94372SRobert Watson } 6098501a69cSRobert Watson INP_WLOCK(inp); 610655f934bSMikolaj Golub if (callout_pending(&tp->t_timers->tt_rexmt) || 611655f934bSMikolaj Golub !callout_active(&tp->t_timers->tt_rexmt)) { 6128501a69cSRobert Watson INP_WUNLOCK(inp); 61387aedea4SKip Macy INP_INFO_RUNLOCK(&V_tcbinfo); 6148b615593SMarko Zec CURVNET_RESTORE(); 61585d94372SRobert Watson return; 61685d94372SRobert Watson } 617e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_rexmt); 618655f934bSMikolaj Golub if ((inp->inp_flags & INP_DROPPED) != 0) { 619655f934bSMikolaj Golub INP_WUNLOCK(inp); 620655f934bSMikolaj Golub INP_INFO_RUNLOCK(&V_tcbinfo); 621655f934bSMikolaj Golub CURVNET_RESTORE(); 622655f934bSMikolaj Golub return; 623655f934bSMikolaj Golub } 6246d90faf3SPaul Saab tcp_free_sackholes(tp); 625df8bae1dSRodney W. Grimes /* 626df8bae1dSRodney W. Grimes * Retransmission timer went off. Message has not 627df8bae1dSRodney W. Grimes * been acked within retransmit interval. Back off 628df8bae1dSRodney W. Grimes * to a longer retransmit interval and retransmit one segment. 629df8bae1dSRodney W. Grimes */ 630df8bae1dSRodney W. Grimes if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 631df8bae1dSRodney W. Grimes tp->t_rxtshift = TCP_MAXRXTSHIFT; 63278b50714SRobert Watson TCPSTAT_INC(tcps_timeoutdrop); 63387aedea4SKip Macy in_pcbref(inp); 63487aedea4SKip Macy INP_INFO_RUNLOCK(&V_tcbinfo); 63587aedea4SKip Macy INP_WUNLOCK(inp); 63687aedea4SKip Macy INP_INFO_WLOCK(&V_tcbinfo); 63787aedea4SKip Macy INP_WLOCK(inp); 638fa046d87SRobert Watson if (in_pcbrele_wlocked(inp)) { 63987aedea4SKip Macy INP_INFO_WUNLOCK(&V_tcbinfo); 64087aedea4SKip Macy CURVNET_RESTORE(); 64187aedea4SKip Macy return; 64287aedea4SKip Macy } 643aa4b09c5SNavdeep Parhar if (inp->inp_flags & INP_DROPPED) { 644aa4b09c5SNavdeep Parhar INP_WUNLOCK(inp); 645aa4b09c5SNavdeep Parhar INP_INFO_WUNLOCK(&V_tcbinfo); 646aa4b09c5SNavdeep Parhar CURVNET_RESTORE(); 647aa4b09c5SNavdeep Parhar return; 648aa4b09c5SNavdeep Parhar } 649aa4b09c5SNavdeep Parhar 65085d94372SRobert Watson tp = tcp_drop(tp, tp->t_softerror ? 65185d94372SRobert Watson tp->t_softerror : ETIMEDOUT); 65287aedea4SKip Macy headlocked = 1; 65385d94372SRobert Watson goto out; 6549b8b58e0SJonathan Lemon } 65587aedea4SKip Macy INP_INFO_RUNLOCK(&V_tcbinfo); 65685d94372SRobert Watson headlocked = 0; 657cf8f04f4SAndre Oppermann if (tp->t_state == TCPS_SYN_SENT) { 658cf8f04f4SAndre Oppermann /* 659cf8f04f4SAndre Oppermann * If the SYN was retransmitted, indicate CWND to be 660cf8f04f4SAndre Oppermann * limited to 1 segment in cc_conn_init(). 661cf8f04f4SAndre Oppermann */ 662cf8f04f4SAndre Oppermann tp->snd_cwnd = 1; 663cf8f04f4SAndre Oppermann } else if (tp->t_rxtshift == 1) { 6649b8b58e0SJonathan Lemon /* 6659b8b58e0SJonathan Lemon * first retransmit; record ssthresh and cwnd so they can 6669b8b58e0SJonathan Lemon * be recovered if this turns out to be a "bad" retransmit. 6679b8b58e0SJonathan Lemon * A retransmit is considered "bad" if an ACK for this 6689b8b58e0SJonathan Lemon * segment is received within RTT/2 interval; the assumption 6699b8b58e0SJonathan Lemon * here is that the ACK was already in flight. See 6709b8b58e0SJonathan Lemon * "On Estimating End-to-End Network Path Properties" by 6719b8b58e0SJonathan Lemon * Allman and Paxson for more details. 6729b8b58e0SJonathan Lemon */ 6739b8b58e0SJonathan Lemon tp->snd_cwnd_prev = tp->snd_cwnd; 6749b8b58e0SJonathan Lemon tp->snd_ssthresh_prev = tp->snd_ssthresh; 6759d11646dSJeffrey Hsu tp->snd_recover_prev = tp->snd_recover; 676dbc42409SLawrence Stewart if (IN_FASTRECOVERY(tp->t_flags)) 6779d11646dSJeffrey Hsu tp->t_flags |= TF_WASFRECOVERY; 6789d11646dSJeffrey Hsu else 6799d11646dSJeffrey Hsu tp->t_flags &= ~TF_WASFRECOVERY; 680dbc42409SLawrence Stewart if (IN_CONGRECOVERY(tp->t_flags)) 681dbc42409SLawrence Stewart tp->t_flags |= TF_WASCRECOVERY; 682dbc42409SLawrence Stewart else 683dbc42409SLawrence Stewart tp->t_flags &= ~TF_WASCRECOVERY; 6849b8b58e0SJonathan Lemon tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 685672dc4aeSJohn Baldwin tp->t_flags |= TF_PREVVALID; 686672dc4aeSJohn Baldwin } else 687672dc4aeSJohn Baldwin tp->t_flags &= ~TF_PREVVALID; 68878b50714SRobert Watson TCPSTAT_INC(tcps_rexmttimeo); 6897d42e30cSJonathan Lemon if (tp->t_state == TCPS_SYN_SENT) 690f4748ef5SAndre Oppermann rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift]; 6917d42e30cSJonathan Lemon else 692df8bae1dSRodney W. Grimes rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 693df8bae1dSRodney W. Grimes TCPT_RANGESET(tp->t_rxtcur, rexmt, 694df8bae1dSRodney W. Grimes tp->t_rttmin, TCPTV_REXMTMAX); 695f6f6703fSSean Bruno 696*882ac53eSSean Bruno /* 697*882ac53eSSean Bruno * We enter the path for PLMTUD if connection is established or, if 698*882ac53eSSean Bruno * connection is FIN_WAIT_1 status, reason for the last is that if 699*882ac53eSSean Bruno * amount of data we send is very small, we could send it in couple of 700*882ac53eSSean Bruno * packets and process straight to FIN. In that case we won't catch 701*882ac53eSSean Bruno * ESTABLISHED state. 702*882ac53eSSean Bruno */ 703*882ac53eSSean Bruno if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED)) 704*882ac53eSSean Bruno || (tp->t_state == TCPS_FIN_WAIT_1))) { 705f6f6703fSSean Bruno int optlen; 706f6f6703fSSean Bruno #ifdef INET6 707f6f6703fSSean Bruno int isipv6; 708f6f6703fSSean Bruno #endif 709f6f6703fSSean Bruno 710f6f6703fSSean Bruno if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) == 711f6f6703fSSean Bruno (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) && 712f6f6703fSSean Bruno (tp->t_rxtshift <= 2)) { 713f6f6703fSSean Bruno /* 714f6f6703fSSean Bruno * Enter Path MTU Black-hole Detection mechanism: 715f6f6703fSSean Bruno * - Disable Path MTU Discovery (IP "DF" bit). 716f6f6703fSSean Bruno * - Reduce MTU to lower value than what we 717f6f6703fSSean Bruno * negotiated with peer. 718f6f6703fSSean Bruno */ 719f6f6703fSSean Bruno /* Record that we may have found a black hole. */ 720f6f6703fSSean Bruno tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE; 721f6f6703fSSean Bruno 722f6f6703fSSean Bruno /* Keep track of previous MSS. */ 723f6f6703fSSean Bruno optlen = tp->t_maxopd - tp->t_maxseg; 724f6f6703fSSean Bruno tp->t_pmtud_saved_maxopd = tp->t_maxopd; 725f6f6703fSSean Bruno 726f6f6703fSSean Bruno /* 727f6f6703fSSean Bruno * Reduce the MSS to blackhole value or to the default 728f6f6703fSSean Bruno * in an attempt to retransmit. 729f6f6703fSSean Bruno */ 730f6f6703fSSean Bruno #ifdef INET6 731f6f6703fSSean Bruno isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0; 732f6f6703fSSean Bruno if (isipv6 && 733f6f6703fSSean Bruno tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) { 734f6f6703fSSean Bruno /* Use the sysctl tuneable blackhole MSS. */ 735f6f6703fSSean Bruno tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss; 736f6f6703fSSean Bruno V_tcp_pmtud_blackhole_activated++; 737f6f6703fSSean Bruno } else if (isipv6) { 738f6f6703fSSean Bruno /* Use the default MSS. */ 739f6f6703fSSean Bruno tp->t_maxopd = V_tcp_v6mssdflt; 740f6f6703fSSean Bruno /* 741f6f6703fSSean Bruno * Disable Path MTU Discovery when we switch to 742f6f6703fSSean Bruno * minmss. 743f6f6703fSSean Bruno */ 744f6f6703fSSean Bruno tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; 745f6f6703fSSean Bruno V_tcp_pmtud_blackhole_activated_min_mss++; 746f6f6703fSSean Bruno } 747f6f6703fSSean Bruno #endif 748f6f6703fSSean Bruno #if defined(INET6) && defined(INET) 749f6f6703fSSean Bruno else 750f6f6703fSSean Bruno #endif 751f6f6703fSSean Bruno #ifdef INET 752f6f6703fSSean Bruno if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) { 753f6f6703fSSean Bruno /* Use the sysctl tuneable blackhole MSS. */ 754f6f6703fSSean Bruno tp->t_maxopd = V_tcp_pmtud_blackhole_mss; 755f6f6703fSSean Bruno V_tcp_pmtud_blackhole_activated++; 756f6f6703fSSean Bruno } else { 757f6f6703fSSean Bruno /* Use the default MSS. */ 758f6f6703fSSean Bruno tp->t_maxopd = V_tcp_mssdflt; 759f6f6703fSSean Bruno /* 760f6f6703fSSean Bruno * Disable Path MTU Discovery when we switch to 761f6f6703fSSean Bruno * minmss. 762f6f6703fSSean Bruno */ 763f6f6703fSSean Bruno tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; 764f6f6703fSSean Bruno V_tcp_pmtud_blackhole_activated_min_mss++; 765f6f6703fSSean Bruno } 766f6f6703fSSean Bruno #endif 767f6f6703fSSean Bruno tp->t_maxseg = tp->t_maxopd - optlen; 768f6f6703fSSean Bruno /* 769f6f6703fSSean Bruno * Reset the slow-start flight size 770f6f6703fSSean Bruno * as it may depend on the new MSS. 771f6f6703fSSean Bruno */ 772f6f6703fSSean Bruno if (CC_ALGO(tp)->conn_init != NULL) 773f6f6703fSSean Bruno CC_ALGO(tp)->conn_init(tp->ccv); 774f6f6703fSSean Bruno } else { 775f6f6703fSSean Bruno /* 776f6f6703fSSean Bruno * If further retransmissions are still unsuccessful 777f6f6703fSSean Bruno * with a lowered MTU, maybe this isn't a blackhole and 778f6f6703fSSean Bruno * we restore the previous MSS and blackhole detection 779f6f6703fSSean Bruno * flags. 780f6f6703fSSean Bruno */ 781f6f6703fSSean Bruno if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) && 782f6f6703fSSean Bruno (tp->t_rxtshift > 4)) { 783f6f6703fSSean Bruno tp->t_flags2 |= TF2_PLPMTU_PMTUD; 784f6f6703fSSean Bruno tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; 785f6f6703fSSean Bruno optlen = tp->t_maxopd - tp->t_maxseg; 786f6f6703fSSean Bruno tp->t_maxopd = tp->t_pmtud_saved_maxopd; 787f6f6703fSSean Bruno tp->t_maxseg = tp->t_maxopd - optlen; 788f6f6703fSSean Bruno V_tcp_pmtud_blackhole_failed++; 789f6f6703fSSean Bruno /* 790f6f6703fSSean Bruno * Reset the slow-start flight size as it 791f6f6703fSSean Bruno * may depend on the new MSS. 792f6f6703fSSean Bruno */ 793f6f6703fSSean Bruno if (CC_ALGO(tp)->conn_init != NULL) 794f6f6703fSSean Bruno CC_ALGO(tp)->conn_init(tp->ccv); 795f6f6703fSSean Bruno } 796f6f6703fSSean Bruno } 797f6f6703fSSean Bruno } 798f6f6703fSSean Bruno 799df8bae1dSRodney W. Grimes /* 80077339e1cSAndre Oppermann * Disable RFC1323 and SACK if we haven't got any response to 8017ceb7783SJesper Skriver * our third SYN to work-around some broken terminal servers 8027ceb7783SJesper Skriver * (most of which have hopefully been retired) that have bad VJ 8037ceb7783SJesper Skriver * header compression code which trashes TCP segments containing 8047ceb7783SJesper Skriver * unknown-to-them TCP options. 8057ceb7783SJesper Skriver */ 8066c0ef895SJohn Baldwin if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) && 8076c0ef895SJohn Baldwin (tp->t_rxtshift == 3)) 808c4ab59c1SAndre Oppermann tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT); 8097ceb7783SJesper Skriver /* 81097d8d152SAndre Oppermann * If we backed off this far, our srtt estimate is probably bogus. 81197d8d152SAndre Oppermann * Clobber it so we'll take the next rtt measurement as our srtt; 812df8bae1dSRodney W. Grimes * move the current srtt into rttvar to keep the current 813df8bae1dSRodney W. Grimes * retransmit times until then. 814df8bae1dSRodney W. Grimes */ 815df8bae1dSRodney W. Grimes if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 816fb59c426SYoshinobu Inoue #ifdef INET6 817fb59c426SYoshinobu Inoue if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 818fb59c426SYoshinobu Inoue in6_losing(tp->t_inpcb); 819fb59c426SYoshinobu Inoue #endif 820df8bae1dSRodney W. Grimes tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 821df8bae1dSRodney W. Grimes tp->t_srtt = 0; 822df8bae1dSRodney W. Grimes } 823df8bae1dSRodney W. Grimes tp->snd_nxt = tp->snd_una; 8249d11646dSJeffrey Hsu tp->snd_recover = tp->snd_max; 82546f58482SJonathan Lemon /* 82674b48c1dSAndras Olah * Force a segment to be sent. 82774b48c1dSAndras Olah */ 82874b48c1dSAndras Olah tp->t_flags |= TF_ACKNOW; 82974b48c1dSAndras Olah /* 830df8bae1dSRodney W. Grimes * If timing a segment in this window, stop the timer. 831df8bae1dSRodney W. Grimes */ 8329b8b58e0SJonathan Lemon tp->t_rtttime = 0; 833dbc42409SLawrence Stewart 834b5af1b88SLawrence Stewart cc_cong_signal(tp, NULL, CC_RTO); 835dbc42409SLawrence Stewart 836df8bae1dSRodney W. Grimes (void) tcp_output(tp); 837df8bae1dSRodney W. Grimes 83885d94372SRobert Watson out: 8399b8b58e0SJonathan Lemon #ifdef TCPDEBUG 8401c53f806SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 841fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 8429b8b58e0SJonathan Lemon PRU_SLOWTIMO); 843df8bae1dSRodney W. Grimes #endif 84485d94372SRobert Watson if (tp != NULL) 8458501a69cSRobert Watson INP_WUNLOCK(inp); 84685d94372SRobert Watson if (headlocked) 847603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 8488b615593SMarko Zec CURVNET_RESTORE(); 84985d94372SRobert Watson } 85085d94372SRobert Watson 85185d94372SRobert Watson void 85285d94372SRobert Watson tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 85385d94372SRobert Watson { 85485d94372SRobert Watson struct callout *t_callout; 85585d94372SRobert Watson void *f_callout; 85687aedea4SKip Macy struct inpcb *inp = tp->t_inpcb; 857883831c6SAdrian Chadd int cpu = inp_to_cpuid(inp); 85885d94372SRobert Watson 85909fe6320SNavdeep Parhar #ifdef TCP_OFFLOAD 86009fe6320SNavdeep Parhar if (tp->t_flags & TF_TOE) 86109fe6320SNavdeep Parhar return; 86209fe6320SNavdeep Parhar #endif 86309fe6320SNavdeep Parhar 86485d94372SRobert Watson switch (timer_type) { 86585d94372SRobert Watson case TT_DELACK: 866e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_delack; 86785d94372SRobert Watson f_callout = tcp_timer_delack; 86885d94372SRobert Watson break; 86985d94372SRobert Watson case TT_REXMT: 870e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_rexmt; 87185d94372SRobert Watson f_callout = tcp_timer_rexmt; 87285d94372SRobert Watson break; 87385d94372SRobert Watson case TT_PERSIST: 874e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_persist; 87585d94372SRobert Watson f_callout = tcp_timer_persist; 87685d94372SRobert Watson break; 87785d94372SRobert Watson case TT_KEEP: 878e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_keep; 87985d94372SRobert Watson f_callout = tcp_timer_keep; 88085d94372SRobert Watson break; 88185d94372SRobert Watson case TT_2MSL: 882e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_2msl; 88385d94372SRobert Watson f_callout = tcp_timer_2msl; 88485d94372SRobert Watson break; 88585d94372SRobert Watson default: 88685d94372SRobert Watson panic("bad timer_type"); 88785d94372SRobert Watson } 88885d94372SRobert Watson if (delta == 0) { 88985d94372SRobert Watson callout_stop(t_callout); 89085d94372SRobert Watson } else { 89187aedea4SKip Macy callout_reset_on(t_callout, delta, f_callout, tp, cpu); 89285d94372SRobert Watson } 89385d94372SRobert Watson } 89485d94372SRobert Watson 89585d94372SRobert Watson int 89685d94372SRobert Watson tcp_timer_active(struct tcpcb *tp, int timer_type) 89785d94372SRobert Watson { 89885d94372SRobert Watson struct callout *t_callout; 89985d94372SRobert Watson 90085d94372SRobert Watson switch (timer_type) { 90185d94372SRobert Watson case TT_DELACK: 902e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_delack; 90385d94372SRobert Watson break; 90485d94372SRobert Watson case TT_REXMT: 905e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_rexmt; 90685d94372SRobert Watson break; 90785d94372SRobert Watson case TT_PERSIST: 908e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_persist; 90985d94372SRobert Watson break; 91085d94372SRobert Watson case TT_KEEP: 911e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_keep; 91285d94372SRobert Watson break; 91385d94372SRobert Watson case TT_2MSL: 914e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_2msl; 91585d94372SRobert Watson break; 91685d94372SRobert Watson default: 91785d94372SRobert Watson panic("bad timer_type"); 91885d94372SRobert Watson } 91985d94372SRobert Watson return callout_active(t_callout); 920df8bae1dSRodney W. Grimes } 921b8614722SMike Silbersack 922b8614722SMike Silbersack #define ticks_to_msecs(t) (1000*(t) / hz) 923b8614722SMike Silbersack 924b8614722SMike Silbersack void 9255b999a6bSDavide Italiano tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, 9265b999a6bSDavide Italiano struct xtcp_timer *xtimer) 927b8614722SMike Silbersack { 9285b999a6bSDavide Italiano sbintime_t now; 9295b999a6bSDavide Italiano 9305b999a6bSDavide Italiano bzero(xtimer, sizeof(*xtimer)); 931b8614722SMike Silbersack if (timer == NULL) 932b8614722SMike Silbersack return; 9335b999a6bSDavide Italiano now = getsbinuptime(); 934b8614722SMike Silbersack if (callout_active(&timer->tt_delack)) 9355b999a6bSDavide Italiano xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS; 936b8614722SMike Silbersack if (callout_active(&timer->tt_rexmt)) 9375b999a6bSDavide Italiano xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS; 938b8614722SMike Silbersack if (callout_active(&timer->tt_persist)) 9395b999a6bSDavide Italiano xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS; 940b8614722SMike Silbersack if (callout_active(&timer->tt_keep)) 9415b999a6bSDavide Italiano xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS; 942b8614722SMike Silbersack if (callout_active(&timer->tt_2msl)) 9435b999a6bSDavide Italiano xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS; 944b8614722SMike Silbersack xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); 945b8614722SMike Silbersack } 946