1c398230bSWarner Losh /*- 2e79adb8eSGarrett Wollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29e79adb8eSGarrett Wollman * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30c3aac50fSPeter Wemm * $FreeBSD$ 31df8bae1dSRodney W. Grimes */ 32df8bae1dSRodney W. Grimes 33fb59c426SYoshinobu Inoue #include "opt_inet6.h" 340cc12cc5SJoerg Wunsch #include "opt_tcpdebug.h" 356d90faf3SPaul Saab #include "opt_tcp_sack.h" 360cc12cc5SJoerg Wunsch 37df8bae1dSRodney W. Grimes #include <sys/param.h> 3898163b98SPoul-Henning Kamp #include <sys/kernel.h> 39c74af4faSBruce Evans #include <sys/lock.h> 4008517d53SMike Silbersack #include <sys/mbuf.h> 41c74af4faSBruce Evans #include <sys/mutex.h> 42c74af4faSBruce Evans #include <sys/protosw.h> 43df8bae1dSRodney W. Grimes #include <sys/socket.h> 44df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 45c74af4faSBruce Evans #include <sys/sysctl.h> 46c74af4faSBruce Evans #include <sys/systm.h> 47e79adb8eSGarrett Wollman 48df8bae1dSRodney W. Grimes #include <net/route.h> 49df8bae1dSRodney W. Grimes 50df8bae1dSRodney W. Grimes #include <netinet/in.h> 51df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 52c74af4faSBruce Evans #include <netinet/in_systm.h> 53fb59c426SYoshinobu Inoue #ifdef INET6 54fb59c426SYoshinobu Inoue #include <netinet6/in6_pcb.h> 55fb59c426SYoshinobu Inoue #endif 56df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 57df8bae1dSRodney W. Grimes #include <netinet/tcp.h> 58df8bae1dSRodney W. Grimes #include <netinet/tcp_fsm.h> 59df8bae1dSRodney W. Grimes #include <netinet/tcp_timer.h> 60df8bae1dSRodney W. Grimes #include <netinet/tcp_var.h> 61df8bae1dSRodney W. Grimes #include <netinet/tcpip.h> 62af7a2999SDavid Greenman #ifdef TCPDEBUG 63af7a2999SDavid Greenman #include <netinet/tcp_debug.h> 64af7a2999SDavid Greenman #endif 65df8bae1dSRodney W. Grimes 669b8b58e0SJonathan Lemon int tcp_keepinit; 67ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 68ccb4d0c6SJonathan Lemon &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); 697b40aa32SPaul Traina 709b8b58e0SJonathan Lemon int tcp_keepidle; 71ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 72ccb4d0c6SJonathan Lemon &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); 7398163b98SPoul-Henning Kamp 749b8b58e0SJonathan Lemon int tcp_keepintvl; 75ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 76ccb4d0c6SJonathan Lemon &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); 7798163b98SPoul-Henning Kamp 789b8b58e0SJonathan Lemon int tcp_delacktime; 796489fe65SAndre Oppermann SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 806489fe65SAndre Oppermann &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 81ccb4d0c6SJonathan Lemon "Time before a delayed ACK is sent"); 829b8b58e0SJonathan Lemon 839b8b58e0SJonathan Lemon int tcp_msl; 84ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 85ccb4d0c6SJonathan Lemon &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 869b8b58e0SJonathan Lemon 87701bec5aSMatthew Dillon int tcp_rexmit_min; 88701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 896489fe65SAndre Oppermann &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 906489fe65SAndre Oppermann "Minimum Retransmission Timeout"); 91701bec5aSMatthew Dillon 92701bec5aSMatthew Dillon int tcp_rexmit_slop; 93701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 946489fe65SAndre Oppermann &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 956489fe65SAndre Oppermann "Retransmission Timer Slop"); 96701bec5aSMatthew Dillon 97c39a614eSRobert Watson static int always_keepalive = 1; 983d177f46SBill Fumerola SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 993d177f46SBill Fumerola &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 10034be9bf3SPoul-Henning Kamp 1017c72af87SMohan Srinivasan int tcp_fast_finwait2_recycle = 0; 1027c72af87SMohan Srinivasan SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 1036489fe65SAndre Oppermann &tcp_fast_finwait2_recycle, 0, 1046489fe65SAndre Oppermann "Recycle closed FIN_WAIT_2 connections faster"); 1057c72af87SMohan Srinivasan 1067c72af87SMohan Srinivasan int tcp_finwait2_timeout; 1077c72af87SMohan Srinivasan SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 1086489fe65SAndre Oppermann &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 1097c72af87SMohan Srinivasan 1107c72af87SMohan Srinivasan 1110312fbe9SPoul-Henning Kamp static int tcp_keepcnt = TCPTV_KEEPCNT; 1120312fbe9SPoul-Henning Kamp /* max idle probes */ 1139b8b58e0SJonathan Lemon int tcp_maxpersistidle; 1140312fbe9SPoul-Henning Kamp /* max idle time in persist */ 115df8bae1dSRodney W. Grimes int tcp_maxidle; 116e79adb8eSGarrett Wollman 117df8bae1dSRodney W. Grimes /* 118df8bae1dSRodney W. Grimes * Tcp protocol timeout routine called every 500 ms. 1199b8b58e0SJonathan Lemon * Updates timestamps used for TCP 120df8bae1dSRodney W. Grimes * causes finite state machine actions if timers expire. 121df8bae1dSRodney W. Grimes */ 122df8bae1dSRodney W. Grimes void 123df8bae1dSRodney W. Grimes tcp_slowtimo() 124df8bae1dSRodney W. Grimes { 12515bd2b43SDavid Greenman 126e79adb8eSGarrett Wollman tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 127607b0b0cSJonathan Lemon INP_INFO_WLOCK(&tcbinfo); 128607b0b0cSJonathan Lemon (void) tcp_timer_2msl_tw(0); 129607b0b0cSJonathan Lemon INP_INFO_WUNLOCK(&tcbinfo); 130df8bae1dSRodney W. Grimes } 131df8bae1dSRodney W. Grimes 1327d42e30cSJonathan Lemon int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 1337d42e30cSJonathan Lemon { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 1347d42e30cSJonathan Lemon 135df8bae1dSRodney W. Grimes int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 136f058535dSJeffrey Hsu { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 137df8bae1dSRodney W. Grimes 138f058535dSJeffrey Hsu static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 139e79adb8eSGarrett Wollman 140623dce13SRobert Watson static int tcp_timer_race; 141623dce13SRobert Watson SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 142623dce13SRobert Watson 0, "Count of t_inpcb races on tcp_discardcb"); 143623dce13SRobert Watson 144df8bae1dSRodney W. Grimes /* 145df8bae1dSRodney W. Grimes * TCP timer processing. 146df8bae1dSRodney W. Grimes */ 147f76fcf6dSJeffrey Hsu 1489b8b58e0SJonathan Lemon void 1499b8b58e0SJonathan Lemon tcp_timer_delack(xtp) 1509b8b58e0SJonathan Lemon void *xtp; 151df8bae1dSRodney W. Grimes { 1529b8b58e0SJonathan Lemon struct tcpcb *tp = xtp; 153f76fcf6dSJeffrey Hsu struct inpcb *inp; 154df8bae1dSRodney W. Grimes 155f76fcf6dSJeffrey Hsu INP_INFO_RLOCK(&tcbinfo); 156f76fcf6dSJeffrey Hsu inp = tp->t_inpcb; 157623dce13SRobert Watson /* 158623dce13SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 159623dce13SRobert Watson * tear-down mean we need it as a work-around for races between 160623dce13SRobert Watson * timers and tcp_discardcb(). 161623dce13SRobert Watson * 162623dce13SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL")); 163623dce13SRobert Watson */ 164b42ff86eSRobert Watson if (inp == NULL) { 165623dce13SRobert Watson tcp_timer_race++; 166abe239cfSJeffrey Hsu INP_INFO_RUNLOCK(&tcbinfo); 167abe239cfSJeffrey Hsu return; 168abe239cfSJeffrey Hsu } 169f76fcf6dSJeffrey Hsu INP_LOCK(inp); 170f76fcf6dSJeffrey Hsu INP_INFO_RUNLOCK(&tcbinfo); 171d8ab0ec6SRobert Watson if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_delack) 172d8ab0ec6SRobert Watson || !callout_active(tp->tt_delack)) { 173f76fcf6dSJeffrey Hsu INP_UNLOCK(inp); 1749b8b58e0SJonathan Lemon return; 1759b8b58e0SJonathan Lemon } 1769b8b58e0SJonathan Lemon callout_deactivate(tp->tt_delack); 177df8bae1dSRodney W. Grimes 1789b8b58e0SJonathan Lemon tp->t_flags |= TF_ACKNOW; 1799b8b58e0SJonathan Lemon tcpstat.tcps_delack++; 1809b8b58e0SJonathan Lemon (void) tcp_output(tp); 181f76fcf6dSJeffrey Hsu INP_UNLOCK(inp); 1829b8b58e0SJonathan Lemon } 1839b8b58e0SJonathan Lemon 1849b8b58e0SJonathan Lemon void 1859b8b58e0SJonathan Lemon tcp_timer_2msl(xtp) 1869b8b58e0SJonathan Lemon void *xtp; 1879b8b58e0SJonathan Lemon { 1889b8b58e0SJonathan Lemon struct tcpcb *tp = xtp; 189f76fcf6dSJeffrey Hsu struct inpcb *inp; 1909b8b58e0SJonathan Lemon #ifdef TCPDEBUG 1919b8b58e0SJonathan Lemon int ostate; 1929b8b58e0SJonathan Lemon 1939b8b58e0SJonathan Lemon ostate = tp->t_state; 1949b8b58e0SJonathan Lemon #endif 195623dce13SRobert Watson /* 196623dce13SRobert Watson * XXXRW: Does this actually happen? 197623dce13SRobert Watson */ 198f76fcf6dSJeffrey Hsu INP_INFO_WLOCK(&tcbinfo); 199f76fcf6dSJeffrey Hsu inp = tp->t_inpcb; 200623dce13SRobert Watson /* 201623dce13SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 202623dce13SRobert Watson * tear-down mean we need it as a work-around for races between 203623dce13SRobert Watson * timers and tcp_discardcb(). 204623dce13SRobert Watson * 205623dce13SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL")); 206623dce13SRobert Watson */ 207b42ff86eSRobert Watson if (inp == NULL) { 208623dce13SRobert Watson tcp_timer_race++; 209d8ab0ec6SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 210abe239cfSJeffrey Hsu return; 211abe239cfSJeffrey Hsu } 212f76fcf6dSJeffrey Hsu INP_LOCK(inp); 2136d90faf3SPaul Saab tcp_free_sackholes(tp); 214d8ab0ec6SRobert Watson if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_2msl) || 215d8ab0ec6SRobert Watson !callout_active(tp->tt_2msl)) { 216f76fcf6dSJeffrey Hsu INP_UNLOCK(tp->t_inpcb); 217f76fcf6dSJeffrey Hsu INP_INFO_WUNLOCK(&tcbinfo); 2189b8b58e0SJonathan Lemon return; 2199b8b58e0SJonathan Lemon } 2209b8b58e0SJonathan Lemon callout_deactivate(tp->tt_2msl); 221df8bae1dSRodney W. Grimes /* 222df8bae1dSRodney W. Grimes * 2 MSL timeout in shutdown went off. If we're closed but 223df8bae1dSRodney W. Grimes * still waiting for peer to close and connection has been idle 224df8bae1dSRodney W. Grimes * too long, or if 2MSL time is up from TIME_WAIT, delete connection 225df8bae1dSRodney W. Grimes * control block. Otherwise, check again in a bit. 2267c72af87SMohan Srinivasan * 2277c72af87SMohan Srinivasan * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 2287c72af87SMohan Srinivasan * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 2297c72af87SMohan Srinivasan * Ignore fact that there were recent incoming segments. 230df8bae1dSRodney W. Grimes */ 2317c72af87SMohan Srinivasan if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 2327c72af87SMohan Srinivasan tp->t_inpcb && tp->t_inpcb->inp_socket && 2337c72af87SMohan Srinivasan (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 2347c72af87SMohan Srinivasan tcpstat.tcps_finwait2_drops++; 2357c72af87SMohan Srinivasan tp = tcp_close(tp); 2367c72af87SMohan Srinivasan } else { 237df8bae1dSRodney W. Grimes if (tp->t_state != TCPS_TIME_WAIT && 2389b8b58e0SJonathan Lemon (ticks - tp->t_rcvtime) <= tcp_maxidle) 2399b8b58e0SJonathan Lemon callout_reset(tp->tt_2msl, tcp_keepintvl, 2409b8b58e0SJonathan Lemon tcp_timer_2msl, tp); 241df8bae1dSRodney W. Grimes else 242df8bae1dSRodney W. Grimes tp = tcp_close(tp); 2437c72af87SMohan Srinivasan } 244df8bae1dSRodney W. Grimes 2459b8b58e0SJonathan Lemon #ifdef TCPDEBUG 2461c53f806SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 247fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 2489b8b58e0SJonathan Lemon PRU_SLOWTIMO); 2499b8b58e0SJonathan Lemon #endif 2501c53f806SRobert Watson if (tp != NULL) 251f76fcf6dSJeffrey Hsu INP_UNLOCK(inp); 252f76fcf6dSJeffrey Hsu INP_INFO_WUNLOCK(&tcbinfo); 2539b8b58e0SJonathan Lemon } 2549b8b58e0SJonathan Lemon 255cce83ffbSRobert Watson /* 256751dea29SRuslan Ermilov * The timed wait queue contains references to each of the TCP sessions 257751dea29SRuslan Ermilov * currently in the TIME_WAIT state. The queue pointers, including the 258751dea29SRuslan Ermilov * queue pointers in each tcptw structure, are protected using the global 259751dea29SRuslan Ermilov * tcbinfo lock, which must be held over queue iteration and modification. 260cce83ffbSRobert Watson */ 261751dea29SRuslan Ermilov static TAILQ_HEAD(, tcptw) twq_2msl; 262340c35deSJonathan Lemon 263607b0b0cSJonathan Lemon void 264607b0b0cSJonathan Lemon tcp_timer_init(void) 265607b0b0cSJonathan Lemon { 266607b0b0cSJonathan Lemon 267751dea29SRuslan Ermilov TAILQ_INIT(&twq_2msl); 268607b0b0cSJonathan Lemon } 269607b0b0cSJonathan Lemon 270607b0b0cSJonathan Lemon void 271751dea29SRuslan Ermilov tcp_timer_2msl_reset(struct tcptw *tw, int rearm) 272607b0b0cSJonathan Lemon { 273607b0b0cSJonathan Lemon 274cce83ffbSRobert Watson INP_INFO_WLOCK_ASSERT(&tcbinfo); 27579a9e59cSRobert Watson INP_LOCK_ASSERT(tw->tw_inpcb); 276464469c7SMohan Srinivasan if (rearm) 277751dea29SRuslan Ermilov TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); 278751dea29SRuslan Ermilov tw->tw_time = ticks + 2 * tcp_msl; 279751dea29SRuslan Ermilov TAILQ_INSERT_TAIL(&twq_2msl, tw, tw_2msl); 280607b0b0cSJonathan Lemon } 281607b0b0cSJonathan Lemon 282607b0b0cSJonathan Lemon void 283607b0b0cSJonathan Lemon tcp_timer_2msl_stop(struct tcptw *tw) 284607b0b0cSJonathan Lemon { 285607b0b0cSJonathan Lemon 286cce83ffbSRobert Watson INP_INFO_WLOCK_ASSERT(&tcbinfo); 287751dea29SRuslan Ermilov TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); 288607b0b0cSJonathan Lemon } 289607b0b0cSJonathan Lemon 290607b0b0cSJonathan Lemon struct tcptw * 291607b0b0cSJonathan Lemon tcp_timer_2msl_tw(int reuse) 292607b0b0cSJonathan Lemon { 293751dea29SRuslan Ermilov struct tcptw *tw; 294607b0b0cSJonathan Lemon 295cce83ffbSRobert Watson INP_INFO_WLOCK_ASSERT(&tcbinfo); 2962c857a9bSGleb Smirnoff for (;;) { 297751dea29SRuslan Ermilov tw = TAILQ_FIRST(&twq_2msl); 298751dea29SRuslan Ermilov if (tw == NULL || (!reuse && tw->tw_time > ticks)) 2992c857a9bSGleb Smirnoff break; 300340c35deSJonathan Lemon INP_LOCK(tw->tw_inpcb); 301623dce13SRobert Watson tcp_twclose(tw, reuse); 3022c857a9bSGleb Smirnoff if (reuse) 3032c857a9bSGleb Smirnoff return (tw); 3042c857a9bSGleb Smirnoff } 305607b0b0cSJonathan Lemon return (NULL); 306340c35deSJonathan Lemon } 307340c35deSJonathan Lemon 308340c35deSJonathan Lemon void 3099b8b58e0SJonathan Lemon tcp_timer_keep(xtp) 3109b8b58e0SJonathan Lemon void *xtp; 3119b8b58e0SJonathan Lemon { 3129b8b58e0SJonathan Lemon struct tcpcb *tp = xtp; 31308517d53SMike Silbersack struct tcptemp *t_template; 314f76fcf6dSJeffrey Hsu struct inpcb *inp; 3159b8b58e0SJonathan Lemon #ifdef TCPDEBUG 3169b8b58e0SJonathan Lemon int ostate; 3179b8b58e0SJonathan Lemon 3189b8b58e0SJonathan Lemon ostate = tp->t_state; 3199b8b58e0SJonathan Lemon #endif 320f76fcf6dSJeffrey Hsu INP_INFO_WLOCK(&tcbinfo); 321f76fcf6dSJeffrey Hsu inp = tp->t_inpcb; 322623dce13SRobert Watson /* 323623dce13SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 324623dce13SRobert Watson * tear-down mean we need it as a work-around for races between 325623dce13SRobert Watson * timers and tcp_discardcb(). 326623dce13SRobert Watson * 327623dce13SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL")); 328623dce13SRobert Watson */ 329623dce13SRobert Watson if (inp == NULL) { 330623dce13SRobert Watson tcp_timer_race++; 331d8ab0ec6SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 332abe239cfSJeffrey Hsu return; 333abe239cfSJeffrey Hsu } 334f76fcf6dSJeffrey Hsu INP_LOCK(inp); 335d8ab0ec6SRobert Watson if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_keep) 336d8ab0ec6SRobert Watson || !callout_active(tp->tt_keep)) { 337f76fcf6dSJeffrey Hsu INP_UNLOCK(inp); 338f76fcf6dSJeffrey Hsu INP_INFO_WUNLOCK(&tcbinfo); 3399b8b58e0SJonathan Lemon return; 3409b8b58e0SJonathan Lemon } 3419b8b58e0SJonathan Lemon callout_deactivate(tp->tt_keep); 3429b8b58e0SJonathan Lemon /* 3439b8b58e0SJonathan Lemon * Keep-alive timer went off; send something 3449b8b58e0SJonathan Lemon * or drop connection if idle for too long. 3459b8b58e0SJonathan Lemon */ 3469b8b58e0SJonathan Lemon tcpstat.tcps_keeptimeo++; 3479b8b58e0SJonathan Lemon if (tp->t_state < TCPS_ESTABLISHED) 3489b8b58e0SJonathan Lemon goto dropit; 3492a074620SSam Leffler if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 3509b8b58e0SJonathan Lemon tp->t_state <= TCPS_CLOSING) { 3519b8b58e0SJonathan Lemon if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle) 3529b8b58e0SJonathan Lemon goto dropit; 3539b8b58e0SJonathan Lemon /* 3549b8b58e0SJonathan Lemon * Send a packet designed to force a response 3559b8b58e0SJonathan Lemon * if the peer is up and reachable: 3569b8b58e0SJonathan Lemon * either an ACK if the connection is still alive, 3579b8b58e0SJonathan Lemon * or an RST if the peer has closed the connection 3589b8b58e0SJonathan Lemon * due to timeout or reboot. 3599b8b58e0SJonathan Lemon * Using sequence number tp->snd_una-1 3609b8b58e0SJonathan Lemon * causes the transmitted zero-length segment 3619b8b58e0SJonathan Lemon * to lie outside the receive window; 3629b8b58e0SJonathan Lemon * by the protocol spec, this requires the 3639b8b58e0SJonathan Lemon * correspondent TCP to respond. 3649b8b58e0SJonathan Lemon */ 3659b8b58e0SJonathan Lemon tcpstat.tcps_keepprobe++; 36679909384SJonathan Lemon t_template = tcpip_maketemplate(inp); 36708517d53SMike Silbersack if (t_template) { 36808517d53SMike Silbersack tcp_respond(tp, t_template->tt_ipgen, 36908517d53SMike Silbersack &t_template->tt_t, (struct mbuf *)NULL, 3709b8b58e0SJonathan Lemon tp->rcv_nxt, tp->snd_una - 1, 0); 37108517d53SMike Silbersack (void) m_free(dtom(t_template)); 37208517d53SMike Silbersack } 3739b8b58e0SJonathan Lemon callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp); 3744cc20ab1SSeigo Tanimura } else 3759b8b58e0SJonathan Lemon callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp); 3769b8b58e0SJonathan Lemon 3779b8b58e0SJonathan Lemon #ifdef TCPDEBUG 3782a074620SSam Leffler if (inp->inp_socket->so_options & SO_DEBUG) 379fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 3809b8b58e0SJonathan Lemon PRU_SLOWTIMO); 3819b8b58e0SJonathan Lemon #endif 382f76fcf6dSJeffrey Hsu INP_UNLOCK(inp); 383f76fcf6dSJeffrey Hsu INP_INFO_WUNLOCK(&tcbinfo); 3849b8b58e0SJonathan Lemon return; 3859b8b58e0SJonathan Lemon 3869b8b58e0SJonathan Lemon dropit: 3879b8b58e0SJonathan Lemon tcpstat.tcps_keepdrops++; 3889b8b58e0SJonathan Lemon tp = tcp_drop(tp, ETIMEDOUT); 3899b8b58e0SJonathan Lemon 3909b8b58e0SJonathan Lemon #ifdef TCPDEBUG 3911c53f806SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 392fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 3939b8b58e0SJonathan Lemon PRU_SLOWTIMO); 3949b8b58e0SJonathan Lemon #endif 3951c53f806SRobert Watson if (tp != NULL) 396f76fcf6dSJeffrey Hsu INP_UNLOCK(tp->t_inpcb); 397f76fcf6dSJeffrey Hsu INP_INFO_WUNLOCK(&tcbinfo); 3989b8b58e0SJonathan Lemon } 3999b8b58e0SJonathan Lemon 4009b8b58e0SJonathan Lemon void 4019b8b58e0SJonathan Lemon tcp_timer_persist(xtp) 4029b8b58e0SJonathan Lemon void *xtp; 4039b8b58e0SJonathan Lemon { 4049b8b58e0SJonathan Lemon struct tcpcb *tp = xtp; 405f76fcf6dSJeffrey Hsu struct inpcb *inp; 4069b8b58e0SJonathan Lemon #ifdef TCPDEBUG 4079b8b58e0SJonathan Lemon int ostate; 4089b8b58e0SJonathan Lemon 4099b8b58e0SJonathan Lemon ostate = tp->t_state; 4109b8b58e0SJonathan Lemon #endif 411f76fcf6dSJeffrey Hsu INP_INFO_WLOCK(&tcbinfo); 412f76fcf6dSJeffrey Hsu inp = tp->t_inpcb; 413623dce13SRobert Watson /* 414623dce13SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 415623dce13SRobert Watson * tear-down mean we need it as a work-around for races between 416623dce13SRobert Watson * timers and tcp_discardcb(). 417623dce13SRobert Watson * 418623dce13SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL")); 419623dce13SRobert Watson */ 420623dce13SRobert Watson if (inp == NULL) { 421623dce13SRobert Watson tcp_timer_race++; 422d8ab0ec6SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 423abe239cfSJeffrey Hsu return; 424abe239cfSJeffrey Hsu } 425f76fcf6dSJeffrey Hsu INP_LOCK(inp); 426d8ab0ec6SRobert Watson if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_persist) 427d8ab0ec6SRobert Watson || !callout_active(tp->tt_persist)) { 428f76fcf6dSJeffrey Hsu INP_UNLOCK(inp); 429f76fcf6dSJeffrey Hsu INP_INFO_WUNLOCK(&tcbinfo); 4309b8b58e0SJonathan Lemon return; 4319b8b58e0SJonathan Lemon } 4329b8b58e0SJonathan Lemon callout_deactivate(tp->tt_persist); 4339b8b58e0SJonathan Lemon /* 4349b8b58e0SJonathan Lemon * Persistance timer into zero window. 4359b8b58e0SJonathan Lemon * Force a byte to be output, if possible. 4369b8b58e0SJonathan Lemon */ 4379b8b58e0SJonathan Lemon tcpstat.tcps_persisttimeo++; 4389b8b58e0SJonathan Lemon /* 4399b8b58e0SJonathan Lemon * Hack: if the peer is dead/unreachable, we do not 4409b8b58e0SJonathan Lemon * time out if the window is closed. After a full 4419b8b58e0SJonathan Lemon * backoff, drop the connection if the idle time 4429b8b58e0SJonathan Lemon * (no responses to probes) reaches the maximum 4439b8b58e0SJonathan Lemon * backoff that we would use if retransmitting. 4449b8b58e0SJonathan Lemon */ 4459b8b58e0SJonathan Lemon if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 4469b8b58e0SJonathan Lemon ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 4479b8b58e0SJonathan Lemon (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 4489b8b58e0SJonathan Lemon tcpstat.tcps_persistdrop++; 4499b8b58e0SJonathan Lemon tp = tcp_drop(tp, ETIMEDOUT); 4509b8b58e0SJonathan Lemon goto out; 4519b8b58e0SJonathan Lemon } 4529b8b58e0SJonathan Lemon tcp_setpersist(tp); 4532cdbfa66SPaul Saab tp->t_flags |= TF_FORCEDATA; 4549b8b58e0SJonathan Lemon (void) tcp_output(tp); 4552cdbfa66SPaul Saab tp->t_flags &= ~TF_FORCEDATA; 4569b8b58e0SJonathan Lemon 4579b8b58e0SJonathan Lemon out: 4589b8b58e0SJonathan Lemon #ifdef TCPDEBUG 459ffb761f6SGleb Smirnoff if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 460ffb761f6SGleb Smirnoff tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 4619b8b58e0SJonathan Lemon #endif 4621c53f806SRobert Watson if (tp != NULL) 463f76fcf6dSJeffrey Hsu INP_UNLOCK(inp); 464f76fcf6dSJeffrey Hsu INP_INFO_WUNLOCK(&tcbinfo); 4659b8b58e0SJonathan Lemon } 4669b8b58e0SJonathan Lemon 4679b8b58e0SJonathan Lemon void 4689b8b58e0SJonathan Lemon tcp_timer_rexmt(xtp) 4699b8b58e0SJonathan Lemon void *xtp; 4709b8b58e0SJonathan Lemon { 4719b8b58e0SJonathan Lemon struct tcpcb *tp = xtp; 4729b8b58e0SJonathan Lemon int rexmt; 473f76fcf6dSJeffrey Hsu int headlocked; 474f76fcf6dSJeffrey Hsu struct inpcb *inp; 4759b8b58e0SJonathan Lemon #ifdef TCPDEBUG 4769b8b58e0SJonathan Lemon int ostate; 4779b8b58e0SJonathan Lemon 4789b8b58e0SJonathan Lemon ostate = tp->t_state; 4799b8b58e0SJonathan Lemon #endif 480f76fcf6dSJeffrey Hsu INP_INFO_WLOCK(&tcbinfo); 481f76fcf6dSJeffrey Hsu headlocked = 1; 482f76fcf6dSJeffrey Hsu inp = tp->t_inpcb; 483623dce13SRobert Watson /* 484623dce13SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 485623dce13SRobert Watson * tear-down mean we need it as a work-around for races between 486623dce13SRobert Watson * timers and tcp_discardcb(). 487623dce13SRobert Watson * 488623dce13SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL")); 489623dce13SRobert Watson */ 490623dce13SRobert Watson if (inp == NULL) { 491623dce13SRobert Watson tcp_timer_race++; 492d8ab0ec6SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 493abe239cfSJeffrey Hsu return; 494abe239cfSJeffrey Hsu } 495f76fcf6dSJeffrey Hsu INP_LOCK(inp); 496d8ab0ec6SRobert Watson if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_rexmt) 497d8ab0ec6SRobert Watson || !callout_active(tp->tt_rexmt)) { 498f76fcf6dSJeffrey Hsu INP_UNLOCK(inp); 499f76fcf6dSJeffrey Hsu INP_INFO_WUNLOCK(&tcbinfo); 5009b8b58e0SJonathan Lemon return; 5019b8b58e0SJonathan Lemon } 5029b8b58e0SJonathan Lemon callout_deactivate(tp->tt_rexmt); 5036d90faf3SPaul Saab tcp_free_sackholes(tp); 504df8bae1dSRodney W. Grimes /* 505df8bae1dSRodney W. Grimes * Retransmission timer went off. Message has not 506df8bae1dSRodney W. Grimes * been acked within retransmit interval. Back off 507df8bae1dSRodney W. Grimes * to a longer retransmit interval and retransmit one segment. 508df8bae1dSRodney W. Grimes */ 509df8bae1dSRodney W. Grimes if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 510df8bae1dSRodney W. Grimes tp->t_rxtshift = TCP_MAXRXTSHIFT; 511df8bae1dSRodney W. Grimes tcpstat.tcps_timeoutdrop++; 512df8bae1dSRodney W. Grimes tp = tcp_drop(tp, tp->t_softerror ? 513df8bae1dSRodney W. Grimes tp->t_softerror : ETIMEDOUT); 5149b8b58e0SJonathan Lemon goto out; 5159b8b58e0SJonathan Lemon } 516f76fcf6dSJeffrey Hsu INP_INFO_WUNLOCK(&tcbinfo); 517f76fcf6dSJeffrey Hsu headlocked = 0; 5189b8b58e0SJonathan Lemon if (tp->t_rxtshift == 1) { 5199b8b58e0SJonathan Lemon /* 5209b8b58e0SJonathan Lemon * first retransmit; record ssthresh and cwnd so they can 5219b8b58e0SJonathan Lemon * be recovered if this turns out to be a "bad" retransmit. 5229b8b58e0SJonathan Lemon * A retransmit is considered "bad" if an ACK for this 5239b8b58e0SJonathan Lemon * segment is received within RTT/2 interval; the assumption 5249b8b58e0SJonathan Lemon * here is that the ACK was already in flight. See 5259b8b58e0SJonathan Lemon * "On Estimating End-to-End Network Path Properties" by 5269b8b58e0SJonathan Lemon * Allman and Paxson for more details. 5279b8b58e0SJonathan Lemon */ 5289b8b58e0SJonathan Lemon tp->snd_cwnd_prev = tp->snd_cwnd; 5299b8b58e0SJonathan Lemon tp->snd_ssthresh_prev = tp->snd_ssthresh; 5309d11646dSJeffrey Hsu tp->snd_recover_prev = tp->snd_recover; 5319d11646dSJeffrey Hsu if (IN_FASTRECOVERY(tp)) 5329d11646dSJeffrey Hsu tp->t_flags |= TF_WASFRECOVERY; 5339d11646dSJeffrey Hsu else 5349d11646dSJeffrey Hsu tp->t_flags &= ~TF_WASFRECOVERY; 5359b8b58e0SJonathan Lemon tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 536df8bae1dSRodney W. Grimes } 537df8bae1dSRodney W. Grimes tcpstat.tcps_rexmttimeo++; 5387d42e30cSJonathan Lemon if (tp->t_state == TCPS_SYN_SENT) 5397d42e30cSJonathan Lemon rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; 5407d42e30cSJonathan Lemon else 541df8bae1dSRodney W. Grimes rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 542df8bae1dSRodney W. Grimes TCPT_RANGESET(tp->t_rxtcur, rexmt, 543df8bae1dSRodney W. Grimes tp->t_rttmin, TCPTV_REXMTMAX); 544df8bae1dSRodney W. Grimes /* 545c94c54e4SAndre Oppermann * Disable rfc1323 if we havn't got any response to 5467ceb7783SJesper Skriver * our third SYN to work-around some broken terminal servers 5477ceb7783SJesper Skriver * (most of which have hopefully been retired) that have bad VJ 5487ceb7783SJesper Skriver * header compression code which trashes TCP segments containing 5497ceb7783SJesper Skriver * unknown-to-them TCP options. 5507ceb7783SJesper Skriver */ 5517ceb7783SJesper Skriver if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 552c94c54e4SAndre Oppermann tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP); 5537ceb7783SJesper Skriver /* 55497d8d152SAndre Oppermann * If we backed off this far, our srtt estimate is probably bogus. 55597d8d152SAndre Oppermann * Clobber it so we'll take the next rtt measurement as our srtt; 556df8bae1dSRodney W. Grimes * move the current srtt into rttvar to keep the current 557df8bae1dSRodney W. Grimes * retransmit times until then. 558df8bae1dSRodney W. Grimes */ 559df8bae1dSRodney W. Grimes if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 560fb59c426SYoshinobu Inoue #ifdef INET6 561fb59c426SYoshinobu Inoue if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 562fb59c426SYoshinobu Inoue in6_losing(tp->t_inpcb); 563fb59c426SYoshinobu Inoue else 564fb59c426SYoshinobu Inoue #endif 565df8bae1dSRodney W. Grimes tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 566df8bae1dSRodney W. Grimes tp->t_srtt = 0; 567df8bae1dSRodney W. Grimes } 568df8bae1dSRodney W. Grimes tp->snd_nxt = tp->snd_una; 5699d11646dSJeffrey Hsu tp->snd_recover = tp->snd_max; 57046f58482SJonathan Lemon /* 57174b48c1dSAndras Olah * Force a segment to be sent. 57274b48c1dSAndras Olah */ 57374b48c1dSAndras Olah tp->t_flags |= TF_ACKNOW; 57474b48c1dSAndras Olah /* 575df8bae1dSRodney W. Grimes * If timing a segment in this window, stop the timer. 576df8bae1dSRodney W. Grimes */ 5779b8b58e0SJonathan Lemon tp->t_rtttime = 0; 578df8bae1dSRodney W. Grimes /* 579df8bae1dSRodney W. Grimes * Close the congestion window down to one segment 580df8bae1dSRodney W. Grimes * (we'll open it by one segment for each ack we get). 581df8bae1dSRodney W. Grimes * Since we probably have a window's worth of unacked 582df8bae1dSRodney W. Grimes * data accumulated, this "slow start" keeps us from 583df8bae1dSRodney W. Grimes * dumping all that data as back-to-back packets (which 584df8bae1dSRodney W. Grimes * might overwhelm an intermediate gateway). 585df8bae1dSRodney W. Grimes * 586df8bae1dSRodney W. Grimes * There are two phases to the opening: Initially we 587df8bae1dSRodney W. Grimes * open by one mss on each ack. This makes the window 588df8bae1dSRodney W. Grimes * size increase exponentially with time. If the 589df8bae1dSRodney W. Grimes * window is larger than the path can handle, this 590df8bae1dSRodney W. Grimes * exponential growth results in dropped packet(s) 591df8bae1dSRodney W. Grimes * almost immediately. To get more time between 592df8bae1dSRodney W. Grimes * drops but still "push" the network to take advantage 593df8bae1dSRodney W. Grimes * of improving conditions, we switch from exponential 594df8bae1dSRodney W. Grimes * to linear window opening at some threshhold size. 595df8bae1dSRodney W. Grimes * For a threshhold, we use half the current window 596df8bae1dSRodney W. Grimes * size, truncated to a multiple of the mss. 597df8bae1dSRodney W. Grimes * 598df8bae1dSRodney W. Grimes * (the minimum cwnd that will give us exponential 599df8bae1dSRodney W. Grimes * growth is 2 mss. We don't allow the threshhold 600df8bae1dSRodney W. Grimes * to go below this.) 601df8bae1dSRodney W. Grimes */ 602df8bae1dSRodney W. Grimes { 603df8bae1dSRodney W. Grimes u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 604df8bae1dSRodney W. Grimes if (win < 2) 605df8bae1dSRodney W. Grimes win = 2; 606df8bae1dSRodney W. Grimes tp->snd_cwnd = tp->t_maxseg; 607df8bae1dSRodney W. Grimes tp->snd_ssthresh = win * tp->t_maxseg; 608df8bae1dSRodney W. Grimes tp->t_dupacks = 0; 609df8bae1dSRodney W. Grimes } 6109d11646dSJeffrey Hsu EXIT_FASTRECOVERY(tp); 611df8bae1dSRodney W. Grimes (void) tcp_output(tp); 612df8bae1dSRodney W. Grimes 6139b8b58e0SJonathan Lemon out: 6149b8b58e0SJonathan Lemon #ifdef TCPDEBUG 6151c53f806SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 616fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 6179b8b58e0SJonathan Lemon PRU_SLOWTIMO); 618df8bae1dSRodney W. Grimes #endif 6191c53f806SRobert Watson if (tp != NULL) 620f76fcf6dSJeffrey Hsu INP_UNLOCK(inp); 621f76fcf6dSJeffrey Hsu if (headlocked) 622f76fcf6dSJeffrey Hsu INP_INFO_WUNLOCK(&tcbinfo); 623df8bae1dSRodney W. Grimes } 624