1c398230bSWarner Losh /*- 2e79adb8eSGarrett Wollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29e79adb8eSGarrett Wollman * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30df8bae1dSRodney W. Grimes */ 31df8bae1dSRodney W. Grimes 324b421e2dSMike Silbersack #include <sys/cdefs.h> 334b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 344b421e2dSMike Silbersack 35fb59c426SYoshinobu Inoue #include "opt_inet6.h" 360cc12cc5SJoerg Wunsch #include "opt_tcpdebug.h" 370cc12cc5SJoerg Wunsch 38df8bae1dSRodney W. Grimes #include <sys/param.h> 3998163b98SPoul-Henning Kamp #include <sys/kernel.h> 40c74af4faSBruce Evans #include <sys/lock.h> 4108517d53SMike Silbersack #include <sys/mbuf.h> 42c74af4faSBruce Evans #include <sys/mutex.h> 43c74af4faSBruce Evans #include <sys/protosw.h> 44df8bae1dSRodney W. Grimes #include <sys/socket.h> 45df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 46c74af4faSBruce Evans #include <sys/sysctl.h> 47c74af4faSBruce Evans #include <sys/systm.h> 48603724d3SBjoern A. Zeeb #include <sys/vimage.h> 49e79adb8eSGarrett Wollman 50df8bae1dSRodney W. Grimes #include <net/route.h> 51df8bae1dSRodney W. Grimes 52df8bae1dSRodney W. Grimes #include <netinet/in.h> 53df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 54c74af4faSBruce Evans #include <netinet/in_systm.h> 55fb59c426SYoshinobu Inoue #ifdef INET6 56fb59c426SYoshinobu Inoue #include <netinet6/in6_pcb.h> 57fb59c426SYoshinobu Inoue #endif 58df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 59df8bae1dSRodney W. Grimes #include <netinet/tcp.h> 60df8bae1dSRodney W. Grimes #include <netinet/tcp_fsm.h> 61df8bae1dSRodney W. Grimes #include <netinet/tcp_timer.h> 62df8bae1dSRodney W. Grimes #include <netinet/tcp_var.h> 63df8bae1dSRodney W. Grimes #include <netinet/tcpip.h> 64af7a2999SDavid Greenman #ifdef TCPDEBUG 65af7a2999SDavid Greenman #include <netinet/tcp_debug.h> 66af7a2999SDavid Greenman #endif 67df8bae1dSRodney W. Grimes 689b8b58e0SJonathan Lemon int tcp_keepinit; 69ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 7041698ebfSTom Rhodes &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection"); 717b40aa32SPaul Traina 729b8b58e0SJonathan Lemon int tcp_keepidle; 73ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 7441698ebfSTom Rhodes &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin"); 7598163b98SPoul-Henning Kamp 769b8b58e0SJonathan Lemon int tcp_keepintvl; 77ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 7841698ebfSTom Rhodes &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes"); 7998163b98SPoul-Henning Kamp 809b8b58e0SJonathan Lemon int tcp_delacktime; 816489fe65SAndre Oppermann SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 826489fe65SAndre Oppermann &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 83ccb4d0c6SJonathan Lemon "Time before a delayed ACK is sent"); 849b8b58e0SJonathan Lemon 859b8b58e0SJonathan Lemon int tcp_msl; 86ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 87ccb4d0c6SJonathan Lemon &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 889b8b58e0SJonathan Lemon 89701bec5aSMatthew Dillon int tcp_rexmit_min; 90701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 916489fe65SAndre Oppermann &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 926489fe65SAndre Oppermann "Minimum Retransmission Timeout"); 93701bec5aSMatthew Dillon 94701bec5aSMatthew Dillon int tcp_rexmit_slop; 95701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 966489fe65SAndre Oppermann &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 976489fe65SAndre Oppermann "Retransmission Timer Slop"); 98701bec5aSMatthew Dillon 99c39a614eSRobert Watson static int always_keepalive = 1; 1003d177f46SBill Fumerola SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 1013d177f46SBill Fumerola &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 10234be9bf3SPoul-Henning Kamp 1037c72af87SMohan Srinivasan int tcp_fast_finwait2_recycle = 0; 1047c72af87SMohan Srinivasan SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 1056489fe65SAndre Oppermann &tcp_fast_finwait2_recycle, 0, 1066489fe65SAndre Oppermann "Recycle closed FIN_WAIT_2 connections faster"); 1077c72af87SMohan Srinivasan 1087c72af87SMohan Srinivasan int tcp_finwait2_timeout; 1097c72af87SMohan Srinivasan SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 1106489fe65SAndre Oppermann &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 1117c72af87SMohan Srinivasan 1127c72af87SMohan Srinivasan 1130312fbe9SPoul-Henning Kamp static int tcp_keepcnt = TCPTV_KEEPCNT; 1140312fbe9SPoul-Henning Kamp /* max idle probes */ 1159b8b58e0SJonathan Lemon int tcp_maxpersistidle; 1160312fbe9SPoul-Henning Kamp /* max idle time in persist */ 117df8bae1dSRodney W. Grimes int tcp_maxidle; 118e79adb8eSGarrett Wollman 119df8bae1dSRodney W. Grimes /* 120df8bae1dSRodney W. Grimes * Tcp protocol timeout routine called every 500 ms. 1219b8b58e0SJonathan Lemon * Updates timestamps used for TCP 122df8bae1dSRodney W. Grimes * causes finite state machine actions if timers expire. 123df8bae1dSRodney W. Grimes */ 124df8bae1dSRodney W. Grimes void 125e2f2059fSMike Silbersack tcp_slowtimo(void) 126df8bae1dSRodney W. Grimes { 1278b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 12815bd2b43SDavid Greenman 1298b615593SMarko Zec VNET_LIST_RLOCK(); 1308b615593SMarko Zec VNET_FOREACH(vnet_iter) { 1318b615593SMarko Zec CURVNET_SET(vnet_iter); 1328b615593SMarko Zec INIT_VNET_INET(vnet_iter); 133e79adb8eSGarrett Wollman tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 134603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_tcbinfo); 1352104448fSAndre Oppermann (void) tcp_tw_2msl_scan(0); 136603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 1378b615593SMarko Zec CURVNET_RESTORE(); 1388b615593SMarko Zec } 1398b615593SMarko Zec VNET_LIST_RUNLOCK(); 140df8bae1dSRodney W. Grimes } 141df8bae1dSRodney W. Grimes 1427d42e30cSJonathan Lemon int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 1437d42e30cSJonathan Lemon { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 1447d42e30cSJonathan Lemon 145df8bae1dSRodney W. Grimes int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 146f058535dSJeffrey Hsu { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 147df8bae1dSRodney W. Grimes 148f058535dSJeffrey Hsu static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 149e79adb8eSGarrett Wollman 150623dce13SRobert Watson static int tcp_timer_race; 151623dce13SRobert Watson SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 152623dce13SRobert Watson 0, "Count of t_inpcb races on tcp_discardcb"); 153623dce13SRobert Watson 154df8bae1dSRodney W. Grimes /* 155df8bae1dSRodney W. Grimes * TCP timer processing. 156df8bae1dSRodney W. Grimes */ 15785d94372SRobert Watson 15885d94372SRobert Watson void 15985d94372SRobert Watson tcp_timer_delack(void *xtp) 160df8bae1dSRodney W. Grimes { 16185d94372SRobert Watson struct tcpcb *tp = xtp; 16285d94372SRobert Watson struct inpcb *inp; 1638b615593SMarko Zec CURVNET_SET(tp->t_vnet); 1648b615593SMarko Zec INIT_VNET_INET(tp->t_vnet); 16585d94372SRobert Watson 166603724d3SBjoern A. Zeeb INP_INFO_RLOCK(&V_tcbinfo); 16785d94372SRobert Watson inp = tp->t_inpcb; 16885d94372SRobert Watson /* 16985d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 17085d94372SRobert Watson * tear-down mean we need it as a work-around for races between 17185d94372SRobert Watson * timers and tcp_discardcb(). 17285d94372SRobert Watson * 17385d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL")); 17485d94372SRobert Watson */ 17585d94372SRobert Watson if (inp == NULL) { 17685d94372SRobert Watson tcp_timer_race++; 177603724d3SBjoern A. Zeeb INP_INFO_RUNLOCK(&V_tcbinfo); 1788b615593SMarko Zec CURVNET_RESTORE(); 17985d94372SRobert Watson return; 18085d94372SRobert Watson } 1818501a69cSRobert Watson INP_WLOCK(inp); 182603724d3SBjoern A. Zeeb INP_INFO_RUNLOCK(&V_tcbinfo); 183e2f2059fSMike Silbersack if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack) 184e2f2059fSMike Silbersack || !callout_active(&tp->t_timers->tt_delack)) { 1858501a69cSRobert Watson INP_WUNLOCK(inp); 1868b615593SMarko Zec CURVNET_RESTORE(); 18785d94372SRobert Watson return; 18885d94372SRobert Watson } 189e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_delack); 190df8bae1dSRodney W. Grimes 1919b8b58e0SJonathan Lemon tp->t_flags |= TF_ACKNOW; 192603724d3SBjoern A. Zeeb V_tcpstat.tcps_delack++; 1939b8b58e0SJonathan Lemon (void) tcp_output(tp); 1948501a69cSRobert Watson INP_WUNLOCK(inp); 1958b615593SMarko Zec CURVNET_RESTORE(); 1969b8b58e0SJonathan Lemon } 1979b8b58e0SJonathan Lemon 19885d94372SRobert Watson void 19985d94372SRobert Watson tcp_timer_2msl(void *xtp) 2009b8b58e0SJonathan Lemon { 20185d94372SRobert Watson struct tcpcb *tp = xtp; 20285d94372SRobert Watson struct inpcb *inp; 2038b615593SMarko Zec CURVNET_SET(tp->t_vnet); 2048b615593SMarko Zec INIT_VNET_INET(tp->t_vnet); 2059b8b58e0SJonathan Lemon #ifdef TCPDEBUG 2069b8b58e0SJonathan Lemon int ostate; 2079b8b58e0SJonathan Lemon 2089b8b58e0SJonathan Lemon ostate = tp->t_state; 2099b8b58e0SJonathan Lemon #endif 210623dce13SRobert Watson /* 21185d94372SRobert Watson * XXXRW: Does this actually happen? 21285d94372SRobert Watson */ 213603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_tcbinfo); 21485d94372SRobert Watson inp = tp->t_inpcb; 21585d94372SRobert Watson /* 21685d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 21785d94372SRobert Watson * tear-down mean we need it as a work-around for races between 21885d94372SRobert Watson * timers and tcp_discardcb(). 21985d94372SRobert Watson * 22085d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL")); 22185d94372SRobert Watson */ 22285d94372SRobert Watson if (inp == NULL) { 22385d94372SRobert Watson tcp_timer_race++; 224603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 2258b615593SMarko Zec CURVNET_RESTORE(); 22685d94372SRobert Watson return; 22785d94372SRobert Watson } 2288501a69cSRobert Watson INP_WLOCK(inp); 22985d94372SRobert Watson tcp_free_sackholes(tp); 230e2f2059fSMike Silbersack if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) || 231e2f2059fSMike Silbersack !callout_active(&tp->t_timers->tt_2msl)) { 2328501a69cSRobert Watson INP_WUNLOCK(tp->t_inpcb); 233603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 2348b615593SMarko Zec CURVNET_RESTORE(); 23585d94372SRobert Watson return; 23685d94372SRobert Watson } 237e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_2msl); 23885d94372SRobert Watson /* 239df8bae1dSRodney W. Grimes * 2 MSL timeout in shutdown went off. If we're closed but 240df8bae1dSRodney W. Grimes * still waiting for peer to close and connection has been idle 241df8bae1dSRodney W. Grimes * too long, or if 2MSL time is up from TIME_WAIT, delete connection 242df8bae1dSRodney W. Grimes * control block. Otherwise, check again in a bit. 2437c72af87SMohan Srinivasan * 2447c72af87SMohan Srinivasan * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 2457c72af87SMohan Srinivasan * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 2467c72af87SMohan Srinivasan * Ignore fact that there were recent incoming segments. 247df8bae1dSRodney W. Grimes */ 2487c72af87SMohan Srinivasan if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 24985d94372SRobert Watson tp->t_inpcb && tp->t_inpcb->inp_socket && 2507c72af87SMohan Srinivasan (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 251603724d3SBjoern A. Zeeb V_tcpstat.tcps_finwait2_drops++; 25285d94372SRobert Watson tp = tcp_close(tp); 2537c72af87SMohan Srinivasan } else { 254df8bae1dSRodney W. Grimes if (tp->t_state != TCPS_TIME_WAIT && 2559b8b58e0SJonathan Lemon (ticks - tp->t_rcvtime) <= tcp_maxidle) 256e2f2059fSMike Silbersack callout_reset(&tp->t_timers->tt_2msl, tcp_keepintvl, 25785d94372SRobert Watson tcp_timer_2msl, tp); 258df8bae1dSRodney W. Grimes else 25985d94372SRobert Watson tp = tcp_close(tp); 2607c72af87SMohan Srinivasan } 261df8bae1dSRodney W. Grimes 2629b8b58e0SJonathan Lemon #ifdef TCPDEBUG 263586b4a0eSKonstantin Belousov if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 264fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 2659b8b58e0SJonathan Lemon PRU_SLOWTIMO); 2669b8b58e0SJonathan Lemon #endif 26785d94372SRobert Watson if (tp != NULL) 2688501a69cSRobert Watson INP_WUNLOCK(inp); 269603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 2708b615593SMarko Zec CURVNET_RESTORE(); 2719b8b58e0SJonathan Lemon } 2729b8b58e0SJonathan Lemon 27385d94372SRobert Watson void 27485d94372SRobert Watson tcp_timer_keep(void *xtp) 2759b8b58e0SJonathan Lemon { 27685d94372SRobert Watson struct tcpcb *tp = xtp; 27708517d53SMike Silbersack struct tcptemp *t_template; 27885d94372SRobert Watson struct inpcb *inp; 2798b615593SMarko Zec CURVNET_SET(tp->t_vnet); 2808b615593SMarko Zec INIT_VNET_INET(tp->t_vnet); 2819b8b58e0SJonathan Lemon #ifdef TCPDEBUG 2829b8b58e0SJonathan Lemon int ostate; 2839b8b58e0SJonathan Lemon 2849b8b58e0SJonathan Lemon ostate = tp->t_state; 2859b8b58e0SJonathan Lemon #endif 286603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_tcbinfo); 28785d94372SRobert Watson inp = tp->t_inpcb; 28885d94372SRobert Watson /* 28985d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 29085d94372SRobert Watson * tear-down mean we need it as a work-around for races between 29185d94372SRobert Watson * timers and tcp_discardcb(). 29285d94372SRobert Watson * 29385d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL")); 29485d94372SRobert Watson */ 29585d94372SRobert Watson if (inp == NULL) { 29685d94372SRobert Watson tcp_timer_race++; 297603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 2988b615593SMarko Zec CURVNET_RESTORE(); 29985d94372SRobert Watson return; 30085d94372SRobert Watson } 3018501a69cSRobert Watson INP_WLOCK(inp); 302e2f2059fSMike Silbersack if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep) 303e2f2059fSMike Silbersack || !callout_active(&tp->t_timers->tt_keep)) { 3048501a69cSRobert Watson INP_WUNLOCK(inp); 305603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 3068b615593SMarko Zec CURVNET_RESTORE(); 30785d94372SRobert Watson return; 30885d94372SRobert Watson } 309e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_keep); 3109b8b58e0SJonathan Lemon /* 3119b8b58e0SJonathan Lemon * Keep-alive timer went off; send something 3129b8b58e0SJonathan Lemon * or drop connection if idle for too long. 3139b8b58e0SJonathan Lemon */ 314603724d3SBjoern A. Zeeb V_tcpstat.tcps_keeptimeo++; 3159b8b58e0SJonathan Lemon if (tp->t_state < TCPS_ESTABLISHED) 3169b8b58e0SJonathan Lemon goto dropit; 3172a074620SSam Leffler if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 3189b8b58e0SJonathan Lemon tp->t_state <= TCPS_CLOSING) { 3199b8b58e0SJonathan Lemon if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle) 3209b8b58e0SJonathan Lemon goto dropit; 3219b8b58e0SJonathan Lemon /* 3229b8b58e0SJonathan Lemon * Send a packet designed to force a response 3239b8b58e0SJonathan Lemon * if the peer is up and reachable: 3249b8b58e0SJonathan Lemon * either an ACK if the connection is still alive, 3259b8b58e0SJonathan Lemon * or an RST if the peer has closed the connection 3269b8b58e0SJonathan Lemon * due to timeout or reboot. 3279b8b58e0SJonathan Lemon * Using sequence number tp->snd_una-1 3289b8b58e0SJonathan Lemon * causes the transmitted zero-length segment 3299b8b58e0SJonathan Lemon * to lie outside the receive window; 3309b8b58e0SJonathan Lemon * by the protocol spec, this requires the 3319b8b58e0SJonathan Lemon * correspondent TCP to respond. 3329b8b58e0SJonathan Lemon */ 333603724d3SBjoern A. Zeeb V_tcpstat.tcps_keepprobe++; 33479909384SJonathan Lemon t_template = tcpip_maketemplate(inp); 33508517d53SMike Silbersack if (t_template) { 33608517d53SMike Silbersack tcp_respond(tp, t_template->tt_ipgen, 33708517d53SMike Silbersack &t_template->tt_t, (struct mbuf *)NULL, 3389b8b58e0SJonathan Lemon tp->rcv_nxt, tp->snd_una - 1, 0); 33953640b0eSRobert Watson free(t_template, M_TEMP); 34008517d53SMike Silbersack } 341e2f2059fSMike Silbersack callout_reset(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp); 3424cc20ab1SSeigo Tanimura } else 343e2f2059fSMike Silbersack callout_reset(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp); 3449b8b58e0SJonathan Lemon 3459b8b58e0SJonathan Lemon #ifdef TCPDEBUG 3462a074620SSam Leffler if (inp->inp_socket->so_options & SO_DEBUG) 347fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 3489b8b58e0SJonathan Lemon PRU_SLOWTIMO); 3499b8b58e0SJonathan Lemon #endif 3508501a69cSRobert Watson INP_WUNLOCK(inp); 351603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 3528b615593SMarko Zec CURVNET_RESTORE(); 35385d94372SRobert Watson return; 3549b8b58e0SJonathan Lemon 3559b8b58e0SJonathan Lemon dropit: 356603724d3SBjoern A. Zeeb V_tcpstat.tcps_keepdrops++; 35785d94372SRobert Watson tp = tcp_drop(tp, ETIMEDOUT); 35885d94372SRobert Watson 35985d94372SRobert Watson #ifdef TCPDEBUG 36085d94372SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 36185d94372SRobert Watson tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 36285d94372SRobert Watson PRU_SLOWTIMO); 36385d94372SRobert Watson #endif 36485d94372SRobert Watson if (tp != NULL) 3658501a69cSRobert Watson INP_WUNLOCK(tp->t_inpcb); 366603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 3678b615593SMarko Zec CURVNET_RESTORE(); 3689b8b58e0SJonathan Lemon } 3699b8b58e0SJonathan Lemon 37085d94372SRobert Watson void 37185d94372SRobert Watson tcp_timer_persist(void *xtp) 3729b8b58e0SJonathan Lemon { 37385d94372SRobert Watson struct tcpcb *tp = xtp; 37485d94372SRobert Watson struct inpcb *inp; 3758b615593SMarko Zec CURVNET_SET(tp->t_vnet); 3768b615593SMarko Zec INIT_VNET_INET(tp->t_vnet); 3779b8b58e0SJonathan Lemon #ifdef TCPDEBUG 3789b8b58e0SJonathan Lemon int ostate; 3799b8b58e0SJonathan Lemon 3809b8b58e0SJonathan Lemon ostate = tp->t_state; 3819b8b58e0SJonathan Lemon #endif 382603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_tcbinfo); 38385d94372SRobert Watson inp = tp->t_inpcb; 38485d94372SRobert Watson /* 38585d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 38685d94372SRobert Watson * tear-down mean we need it as a work-around for races between 38785d94372SRobert Watson * timers and tcp_discardcb(). 38885d94372SRobert Watson * 38985d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL")); 39085d94372SRobert Watson */ 39185d94372SRobert Watson if (inp == NULL) { 39285d94372SRobert Watson tcp_timer_race++; 393603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 3948b615593SMarko Zec CURVNET_RESTORE(); 39585d94372SRobert Watson return; 39685d94372SRobert Watson } 3978501a69cSRobert Watson INP_WLOCK(inp); 398e2f2059fSMike Silbersack if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist) 399e2f2059fSMike Silbersack || !callout_active(&tp->t_timers->tt_persist)) { 4008501a69cSRobert Watson INP_WUNLOCK(inp); 401603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 4028b615593SMarko Zec CURVNET_RESTORE(); 40385d94372SRobert Watson return; 40485d94372SRobert Watson } 405e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_persist); 4069b8b58e0SJonathan Lemon /* 4079b8b58e0SJonathan Lemon * Persistance timer into zero window. 4089b8b58e0SJonathan Lemon * Force a byte to be output, if possible. 4099b8b58e0SJonathan Lemon */ 410603724d3SBjoern A. Zeeb V_tcpstat.tcps_persisttimeo++; 4119b8b58e0SJonathan Lemon /* 4129b8b58e0SJonathan Lemon * Hack: if the peer is dead/unreachable, we do not 4139b8b58e0SJonathan Lemon * time out if the window is closed. After a full 4149b8b58e0SJonathan Lemon * backoff, drop the connection if the idle time 4159b8b58e0SJonathan Lemon * (no responses to probes) reaches the maximum 4169b8b58e0SJonathan Lemon * backoff that we would use if retransmitting. 4179b8b58e0SJonathan Lemon */ 4189b8b58e0SJonathan Lemon if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 4199b8b58e0SJonathan Lemon ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 4209b8b58e0SJonathan Lemon (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 421603724d3SBjoern A. Zeeb V_tcpstat.tcps_persistdrop++; 42285d94372SRobert Watson tp = tcp_drop(tp, ETIMEDOUT); 42385d94372SRobert Watson goto out; 4249b8b58e0SJonathan Lemon } 4259b8b58e0SJonathan Lemon tcp_setpersist(tp); 4262cdbfa66SPaul Saab tp->t_flags |= TF_FORCEDATA; 4279b8b58e0SJonathan Lemon (void) tcp_output(tp); 4282cdbfa66SPaul Saab tp->t_flags &= ~TF_FORCEDATA; 4299b8b58e0SJonathan Lemon 43085d94372SRobert Watson out: 4319b8b58e0SJonathan Lemon #ifdef TCPDEBUG 432ffb761f6SGleb Smirnoff if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 433ffb761f6SGleb Smirnoff tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 4349b8b58e0SJonathan Lemon #endif 43585d94372SRobert Watson if (tp != NULL) 4368501a69cSRobert Watson INP_WUNLOCK(inp); 437603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 4388b615593SMarko Zec CURVNET_RESTORE(); 4399b8b58e0SJonathan Lemon } 4409b8b58e0SJonathan Lemon 44185d94372SRobert Watson void 44285d94372SRobert Watson tcp_timer_rexmt(void * xtp) 4439b8b58e0SJonathan Lemon { 44485d94372SRobert Watson struct tcpcb *tp = xtp; 4458b615593SMarko Zec CURVNET_SET(tp->t_vnet); 4468b615593SMarko Zec INIT_VNET_INET(tp->t_vnet); 4479b8b58e0SJonathan Lemon int rexmt; 44885d94372SRobert Watson int headlocked; 44985d94372SRobert Watson struct inpcb *inp; 4509b8b58e0SJonathan Lemon #ifdef TCPDEBUG 4519b8b58e0SJonathan Lemon int ostate; 4529b8b58e0SJonathan Lemon 4539b8b58e0SJonathan Lemon ostate = tp->t_state; 4549b8b58e0SJonathan Lemon #endif 455603724d3SBjoern A. Zeeb INP_INFO_WLOCK(&V_tcbinfo); 45685d94372SRobert Watson headlocked = 1; 45785d94372SRobert Watson inp = tp->t_inpcb; 45885d94372SRobert Watson /* 45985d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 46085d94372SRobert Watson * tear-down mean we need it as a work-around for races between 46185d94372SRobert Watson * timers and tcp_discardcb(). 46285d94372SRobert Watson * 46385d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL")); 46485d94372SRobert Watson */ 46585d94372SRobert Watson if (inp == NULL) { 46685d94372SRobert Watson tcp_timer_race++; 467603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 4688b615593SMarko Zec CURVNET_RESTORE(); 46985d94372SRobert Watson return; 47085d94372SRobert Watson } 4718501a69cSRobert Watson INP_WLOCK(inp); 472e2f2059fSMike Silbersack if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt) 473e2f2059fSMike Silbersack || !callout_active(&tp->t_timers->tt_rexmt)) { 4748501a69cSRobert Watson INP_WUNLOCK(inp); 475603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 4768b615593SMarko Zec CURVNET_RESTORE(); 47785d94372SRobert Watson return; 47885d94372SRobert Watson } 479e2f2059fSMike Silbersack callout_deactivate(&tp->t_timers->tt_rexmt); 4806d90faf3SPaul Saab tcp_free_sackholes(tp); 481df8bae1dSRodney W. Grimes /* 482df8bae1dSRodney W. Grimes * Retransmission timer went off. Message has not 483df8bae1dSRodney W. Grimes * been acked within retransmit interval. Back off 484df8bae1dSRodney W. Grimes * to a longer retransmit interval and retransmit one segment. 485df8bae1dSRodney W. Grimes */ 486df8bae1dSRodney W. Grimes if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 487df8bae1dSRodney W. Grimes tp->t_rxtshift = TCP_MAXRXTSHIFT; 488603724d3SBjoern A. Zeeb V_tcpstat.tcps_timeoutdrop++; 48985d94372SRobert Watson tp = tcp_drop(tp, tp->t_softerror ? 49085d94372SRobert Watson tp->t_softerror : ETIMEDOUT); 49185d94372SRobert Watson goto out; 4929b8b58e0SJonathan Lemon } 493603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 49485d94372SRobert Watson headlocked = 0; 4959b8b58e0SJonathan Lemon if (tp->t_rxtshift == 1) { 4969b8b58e0SJonathan Lemon /* 4979b8b58e0SJonathan Lemon * first retransmit; record ssthresh and cwnd so they can 4989b8b58e0SJonathan Lemon * be recovered if this turns out to be a "bad" retransmit. 4999b8b58e0SJonathan Lemon * A retransmit is considered "bad" if an ACK for this 5009b8b58e0SJonathan Lemon * segment is received within RTT/2 interval; the assumption 5019b8b58e0SJonathan Lemon * here is that the ACK was already in flight. See 5029b8b58e0SJonathan Lemon * "On Estimating End-to-End Network Path Properties" by 5039b8b58e0SJonathan Lemon * Allman and Paxson for more details. 5049b8b58e0SJonathan Lemon */ 5059b8b58e0SJonathan Lemon tp->snd_cwnd_prev = tp->snd_cwnd; 5069b8b58e0SJonathan Lemon tp->snd_ssthresh_prev = tp->snd_ssthresh; 5079d11646dSJeffrey Hsu tp->snd_recover_prev = tp->snd_recover; 5089d11646dSJeffrey Hsu if (IN_FASTRECOVERY(tp)) 5099d11646dSJeffrey Hsu tp->t_flags |= TF_WASFRECOVERY; 5109d11646dSJeffrey Hsu else 5119d11646dSJeffrey Hsu tp->t_flags &= ~TF_WASFRECOVERY; 5129b8b58e0SJonathan Lemon tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 513df8bae1dSRodney W. Grimes } 514603724d3SBjoern A. Zeeb V_tcpstat.tcps_rexmttimeo++; 5157d42e30cSJonathan Lemon if (tp->t_state == TCPS_SYN_SENT) 5167d42e30cSJonathan Lemon rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; 5177d42e30cSJonathan Lemon else 518df8bae1dSRodney W. Grimes rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 519df8bae1dSRodney W. Grimes TCPT_RANGESET(tp->t_rxtcur, rexmt, 520df8bae1dSRodney W. Grimes tp->t_rttmin, TCPTV_REXMTMAX); 521df8bae1dSRodney W. Grimes /* 522c94c54e4SAndre Oppermann * Disable rfc1323 if we havn't got any response to 5237ceb7783SJesper Skriver * our third SYN to work-around some broken terminal servers 5247ceb7783SJesper Skriver * (most of which have hopefully been retired) that have bad VJ 5257ceb7783SJesper Skriver * header compression code which trashes TCP segments containing 5267ceb7783SJesper Skriver * unknown-to-them TCP options. 5277ceb7783SJesper Skriver */ 5287ceb7783SJesper Skriver if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 529c94c54e4SAndre Oppermann tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP); 5307ceb7783SJesper Skriver /* 53197d8d152SAndre Oppermann * If we backed off this far, our srtt estimate is probably bogus. 53297d8d152SAndre Oppermann * Clobber it so we'll take the next rtt measurement as our srtt; 533df8bae1dSRodney W. Grimes * move the current srtt into rttvar to keep the current 534df8bae1dSRodney W. Grimes * retransmit times until then. 535df8bae1dSRodney W. Grimes */ 536df8bae1dSRodney W. Grimes if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 537fb59c426SYoshinobu Inoue #ifdef INET6 538fb59c426SYoshinobu Inoue if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 539fb59c426SYoshinobu Inoue in6_losing(tp->t_inpcb); 540fb59c426SYoshinobu Inoue else 541fb59c426SYoshinobu Inoue #endif 542df8bae1dSRodney W. Grimes tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 543df8bae1dSRodney W. Grimes tp->t_srtt = 0; 544df8bae1dSRodney W. Grimes } 545df8bae1dSRodney W. Grimes tp->snd_nxt = tp->snd_una; 5469d11646dSJeffrey Hsu tp->snd_recover = tp->snd_max; 54746f58482SJonathan Lemon /* 54874b48c1dSAndras Olah * Force a segment to be sent. 54974b48c1dSAndras Olah */ 55074b48c1dSAndras Olah tp->t_flags |= TF_ACKNOW; 55174b48c1dSAndras Olah /* 552df8bae1dSRodney W. Grimes * If timing a segment in this window, stop the timer. 553df8bae1dSRodney W. Grimes */ 5549b8b58e0SJonathan Lemon tp->t_rtttime = 0; 555df8bae1dSRodney W. Grimes /* 556df8bae1dSRodney W. Grimes * Close the congestion window down to one segment 557df8bae1dSRodney W. Grimes * (we'll open it by one segment for each ack we get). 558df8bae1dSRodney W. Grimes * Since we probably have a window's worth of unacked 559df8bae1dSRodney W. Grimes * data accumulated, this "slow start" keeps us from 560df8bae1dSRodney W. Grimes * dumping all that data as back-to-back packets (which 561df8bae1dSRodney W. Grimes * might overwhelm an intermediate gateway). 562df8bae1dSRodney W. Grimes * 563df8bae1dSRodney W. Grimes * There are two phases to the opening: Initially we 564df8bae1dSRodney W. Grimes * open by one mss on each ack. This makes the window 565df8bae1dSRodney W. Grimes * size increase exponentially with time. If the 566df8bae1dSRodney W. Grimes * window is larger than the path can handle, this 567df8bae1dSRodney W. Grimes * exponential growth results in dropped packet(s) 568df8bae1dSRodney W. Grimes * almost immediately. To get more time between 569df8bae1dSRodney W. Grimes * drops but still "push" the network to take advantage 570df8bae1dSRodney W. Grimes * of improving conditions, we switch from exponential 571df8bae1dSRodney W. Grimes * to linear window opening at some threshhold size. 572df8bae1dSRodney W. Grimes * For a threshhold, we use half the current window 573df8bae1dSRodney W. Grimes * size, truncated to a multiple of the mss. 574df8bae1dSRodney W. Grimes * 575df8bae1dSRodney W. Grimes * (the minimum cwnd that will give us exponential 576df8bae1dSRodney W. Grimes * growth is 2 mss. We don't allow the threshhold 577df8bae1dSRodney W. Grimes * to go below this.) 578df8bae1dSRodney W. Grimes */ 579df8bae1dSRodney W. Grimes { 580df8bae1dSRodney W. Grimes u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 581df8bae1dSRodney W. Grimes if (win < 2) 582df8bae1dSRodney W. Grimes win = 2; 583df8bae1dSRodney W. Grimes tp->snd_cwnd = tp->t_maxseg; 584df8bae1dSRodney W. Grimes tp->snd_ssthresh = win * tp->t_maxseg; 585df8bae1dSRodney W. Grimes tp->t_dupacks = 0; 586df8bae1dSRodney W. Grimes } 5879d11646dSJeffrey Hsu EXIT_FASTRECOVERY(tp); 588df8bae1dSRodney W. Grimes (void) tcp_output(tp); 589df8bae1dSRodney W. Grimes 59085d94372SRobert Watson out: 5919b8b58e0SJonathan Lemon #ifdef TCPDEBUG 5921c53f806SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 593fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 5949b8b58e0SJonathan Lemon PRU_SLOWTIMO); 595df8bae1dSRodney W. Grimes #endif 59685d94372SRobert Watson if (tp != NULL) 5978501a69cSRobert Watson INP_WUNLOCK(inp); 59885d94372SRobert Watson if (headlocked) 599603724d3SBjoern A. Zeeb INP_INFO_WUNLOCK(&V_tcbinfo); 6008b615593SMarko Zec CURVNET_RESTORE(); 60185d94372SRobert Watson } 60285d94372SRobert Watson 60385d94372SRobert Watson void 60485d94372SRobert Watson tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 60585d94372SRobert Watson { 60685d94372SRobert Watson struct callout *t_callout; 60785d94372SRobert Watson void *f_callout; 60885d94372SRobert Watson 60985d94372SRobert Watson switch (timer_type) { 61085d94372SRobert Watson case TT_DELACK: 611e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_delack; 61285d94372SRobert Watson f_callout = tcp_timer_delack; 61385d94372SRobert Watson break; 61485d94372SRobert Watson case TT_REXMT: 615e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_rexmt; 61685d94372SRobert Watson f_callout = tcp_timer_rexmt; 61785d94372SRobert Watson break; 61885d94372SRobert Watson case TT_PERSIST: 619e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_persist; 62085d94372SRobert Watson f_callout = tcp_timer_persist; 62185d94372SRobert Watson break; 62285d94372SRobert Watson case TT_KEEP: 623e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_keep; 62485d94372SRobert Watson f_callout = tcp_timer_keep; 62585d94372SRobert Watson break; 62685d94372SRobert Watson case TT_2MSL: 627e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_2msl; 62885d94372SRobert Watson f_callout = tcp_timer_2msl; 62985d94372SRobert Watson break; 63085d94372SRobert Watson default: 63185d94372SRobert Watson panic("bad timer_type"); 63285d94372SRobert Watson } 63385d94372SRobert Watson if (delta == 0) { 63485d94372SRobert Watson callout_stop(t_callout); 63585d94372SRobert Watson } else { 63685d94372SRobert Watson callout_reset(t_callout, delta, f_callout, tp); 63785d94372SRobert Watson } 63885d94372SRobert Watson } 63985d94372SRobert Watson 64085d94372SRobert Watson int 64185d94372SRobert Watson tcp_timer_active(struct tcpcb *tp, int timer_type) 64285d94372SRobert Watson { 64385d94372SRobert Watson struct callout *t_callout; 64485d94372SRobert Watson 64585d94372SRobert Watson switch (timer_type) { 64685d94372SRobert Watson case TT_DELACK: 647e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_delack; 64885d94372SRobert Watson break; 64985d94372SRobert Watson case TT_REXMT: 650e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_rexmt; 65185d94372SRobert Watson break; 65285d94372SRobert Watson case TT_PERSIST: 653e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_persist; 65485d94372SRobert Watson break; 65585d94372SRobert Watson case TT_KEEP: 656e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_keep; 65785d94372SRobert Watson break; 65885d94372SRobert Watson case TT_2MSL: 659e2f2059fSMike Silbersack t_callout = &tp->t_timers->tt_2msl; 66085d94372SRobert Watson break; 66185d94372SRobert Watson default: 66285d94372SRobert Watson panic("bad timer_type"); 66385d94372SRobert Watson } 66485d94372SRobert Watson return callout_active(t_callout); 665df8bae1dSRodney W. Grimes } 666