1c398230bSWarner Losh /*- 2e79adb8eSGarrett Wollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29e79adb8eSGarrett Wollman * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30c3aac50fSPeter Wemm * $FreeBSD$ 31df8bae1dSRodney W. Grimes */ 32df8bae1dSRodney W. Grimes 33fb59c426SYoshinobu Inoue #include "opt_inet6.h" 340cc12cc5SJoerg Wunsch #include "opt_tcpdebug.h" 350cc12cc5SJoerg Wunsch 36df8bae1dSRodney W. Grimes #include <sys/param.h> 3798163b98SPoul-Henning Kamp #include <sys/kernel.h> 38c74af4faSBruce Evans #include <sys/lock.h> 3908517d53SMike Silbersack #include <sys/mbuf.h> 40c74af4faSBruce Evans #include <sys/mutex.h> 41c74af4faSBruce Evans #include <sys/protosw.h> 42df8bae1dSRodney W. Grimes #include <sys/socket.h> 43df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 44c74af4faSBruce Evans #include <sys/sysctl.h> 45c74af4faSBruce Evans #include <sys/systm.h> 46e79adb8eSGarrett Wollman 47df8bae1dSRodney W. Grimes #include <net/route.h> 48df8bae1dSRodney W. Grimes 49df8bae1dSRodney W. Grimes #include <netinet/in.h> 50df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 51c74af4faSBruce Evans #include <netinet/in_systm.h> 52fb59c426SYoshinobu Inoue #ifdef INET6 53fb59c426SYoshinobu Inoue #include <netinet6/in6_pcb.h> 54fb59c426SYoshinobu Inoue #endif 55df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 56df8bae1dSRodney W. Grimes #include <netinet/tcp.h> 57df8bae1dSRodney W. Grimes #include <netinet/tcp_fsm.h> 58df8bae1dSRodney W. Grimes #include <netinet/tcp_timer.h> 59df8bae1dSRodney W. Grimes #include <netinet/tcp_var.h> 60df8bae1dSRodney W. Grimes #include <netinet/tcpip.h> 61af7a2999SDavid Greenman #ifdef TCPDEBUG 62af7a2999SDavid Greenman #include <netinet/tcp_debug.h> 63af7a2999SDavid Greenman #endif 64df8bae1dSRodney W. Grimes 659b8b58e0SJonathan Lemon int tcp_keepinit; 66ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 67ccb4d0c6SJonathan Lemon &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); 687b40aa32SPaul Traina 699b8b58e0SJonathan Lemon int tcp_keepidle; 70ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 71ccb4d0c6SJonathan Lemon &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); 7298163b98SPoul-Henning Kamp 739b8b58e0SJonathan Lemon int tcp_keepintvl; 74ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 75ccb4d0c6SJonathan Lemon &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); 7698163b98SPoul-Henning Kamp 779b8b58e0SJonathan Lemon int tcp_delacktime; 786489fe65SAndre Oppermann SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 796489fe65SAndre Oppermann &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 80ccb4d0c6SJonathan Lemon "Time before a delayed ACK is sent"); 819b8b58e0SJonathan Lemon 829b8b58e0SJonathan Lemon int tcp_msl; 83ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 84ccb4d0c6SJonathan Lemon &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 859b8b58e0SJonathan Lemon 86701bec5aSMatthew Dillon int tcp_rexmit_min; 87701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 886489fe65SAndre Oppermann &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 896489fe65SAndre Oppermann "Minimum Retransmission Timeout"); 90701bec5aSMatthew Dillon 91701bec5aSMatthew Dillon int tcp_rexmit_slop; 92701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 936489fe65SAndre Oppermann &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 946489fe65SAndre Oppermann "Retransmission Timer Slop"); 95701bec5aSMatthew Dillon 96c39a614eSRobert Watson static int always_keepalive = 1; 973d177f46SBill Fumerola SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 983d177f46SBill Fumerola &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 9934be9bf3SPoul-Henning Kamp 1007c72af87SMohan Srinivasan int tcp_fast_finwait2_recycle = 0; 1017c72af87SMohan Srinivasan SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 1026489fe65SAndre Oppermann &tcp_fast_finwait2_recycle, 0, 1036489fe65SAndre Oppermann "Recycle closed FIN_WAIT_2 connections faster"); 1047c72af87SMohan Srinivasan 1057c72af87SMohan Srinivasan int tcp_finwait2_timeout; 1067c72af87SMohan Srinivasan SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 1076489fe65SAndre Oppermann &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 1087c72af87SMohan Srinivasan 1097c72af87SMohan Srinivasan 1100312fbe9SPoul-Henning Kamp static int tcp_keepcnt = TCPTV_KEEPCNT; 1110312fbe9SPoul-Henning Kamp /* max idle probes */ 1129b8b58e0SJonathan Lemon int tcp_maxpersistidle; 1130312fbe9SPoul-Henning Kamp /* max idle time in persist */ 114df8bae1dSRodney W. Grimes int tcp_maxidle; 115e79adb8eSGarrett Wollman 116df8bae1dSRodney W. Grimes /* 117df8bae1dSRodney W. Grimes * Tcp protocol timeout routine called every 500 ms. 1189b8b58e0SJonathan Lemon * Updates timestamps used for TCP 119df8bae1dSRodney W. Grimes * causes finite state machine actions if timers expire. 120df8bae1dSRodney W. Grimes */ 121df8bae1dSRodney W. Grimes void 12285d94372SRobert Watson tcp_slowtimo() 123df8bae1dSRodney W. Grimes { 12415bd2b43SDavid Greenman 125e79adb8eSGarrett Wollman tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 126607b0b0cSJonathan Lemon INP_INFO_WLOCK(&tcbinfo); 1272104448fSAndre Oppermann (void) tcp_tw_2msl_scan(0); 128607b0b0cSJonathan Lemon INP_INFO_WUNLOCK(&tcbinfo); 129df8bae1dSRodney W. Grimes } 130df8bae1dSRodney W. Grimes 1317d42e30cSJonathan Lemon int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 1327d42e30cSJonathan Lemon { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 1337d42e30cSJonathan Lemon 134df8bae1dSRodney W. Grimes int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 135f058535dSJeffrey Hsu { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 136df8bae1dSRodney W. Grimes 137f058535dSJeffrey Hsu static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 138e79adb8eSGarrett Wollman 139623dce13SRobert Watson static int tcp_timer_race; 140623dce13SRobert Watson SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 141623dce13SRobert Watson 0, "Count of t_inpcb races on tcp_discardcb"); 142623dce13SRobert Watson 143df8bae1dSRodney W. Grimes /* 144df8bae1dSRodney W. Grimes * TCP timer processing. 145df8bae1dSRodney W. Grimes */ 14685d94372SRobert Watson 14785d94372SRobert Watson void 14885d94372SRobert Watson tcp_timer_delack(void *xtp) 149df8bae1dSRodney W. Grimes { 15085d94372SRobert Watson struct tcpcb *tp = xtp; 15185d94372SRobert Watson struct inpcb *inp; 15285d94372SRobert Watson 15385d94372SRobert Watson INP_INFO_RLOCK(&tcbinfo); 15485d94372SRobert Watson inp = tp->t_inpcb; 15585d94372SRobert Watson /* 15685d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 15785d94372SRobert Watson * tear-down mean we need it as a work-around for races between 15885d94372SRobert Watson * timers and tcp_discardcb(). 15985d94372SRobert Watson * 16085d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL")); 16185d94372SRobert Watson */ 16285d94372SRobert Watson if (inp == NULL) { 16385d94372SRobert Watson tcp_timer_race++; 16485d94372SRobert Watson INP_INFO_RUNLOCK(&tcbinfo); 16585d94372SRobert Watson return; 16685d94372SRobert Watson } 16785d94372SRobert Watson INP_LOCK(inp); 16885d94372SRobert Watson INP_INFO_RUNLOCK(&tcbinfo); 16985d94372SRobert Watson if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_delack) 17085d94372SRobert Watson || !callout_active(tp->tt_delack)) { 17185d94372SRobert Watson INP_UNLOCK(inp); 17285d94372SRobert Watson return; 17385d94372SRobert Watson } 17485d94372SRobert Watson callout_deactivate(tp->tt_delack); 175df8bae1dSRodney W. Grimes 1769b8b58e0SJonathan Lemon tp->t_flags |= TF_ACKNOW; 1779b8b58e0SJonathan Lemon tcpstat.tcps_delack++; 1789b8b58e0SJonathan Lemon (void) tcp_output(tp); 17985d94372SRobert Watson INP_UNLOCK(inp); 1809b8b58e0SJonathan Lemon } 1819b8b58e0SJonathan Lemon 18285d94372SRobert Watson void 18385d94372SRobert Watson tcp_timer_2msl(void *xtp) 1849b8b58e0SJonathan Lemon { 18585d94372SRobert Watson struct tcpcb *tp = xtp; 18685d94372SRobert Watson struct inpcb *inp; 1879b8b58e0SJonathan Lemon #ifdef TCPDEBUG 1889b8b58e0SJonathan Lemon int ostate; 1899b8b58e0SJonathan Lemon 1909b8b58e0SJonathan Lemon ostate = tp->t_state; 1919b8b58e0SJonathan Lemon #endif 192623dce13SRobert Watson /* 19385d94372SRobert Watson * XXXRW: Does this actually happen? 19485d94372SRobert Watson */ 19585d94372SRobert Watson INP_INFO_WLOCK(&tcbinfo); 19685d94372SRobert Watson inp = tp->t_inpcb; 19785d94372SRobert Watson /* 19885d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 19985d94372SRobert Watson * tear-down mean we need it as a work-around for races between 20085d94372SRobert Watson * timers and tcp_discardcb(). 20185d94372SRobert Watson * 20285d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL")); 20385d94372SRobert Watson */ 20485d94372SRobert Watson if (inp == NULL) { 20585d94372SRobert Watson tcp_timer_race++; 20685d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 20785d94372SRobert Watson return; 20885d94372SRobert Watson } 20985d94372SRobert Watson INP_LOCK(inp); 21085d94372SRobert Watson tcp_free_sackholes(tp); 21185d94372SRobert Watson if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_2msl) || 21285d94372SRobert Watson !callout_active(tp->tt_2msl)) { 21385d94372SRobert Watson INP_UNLOCK(tp->t_inpcb); 21485d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 21585d94372SRobert Watson return; 21685d94372SRobert Watson } 21785d94372SRobert Watson callout_deactivate(tp->tt_2msl); 21885d94372SRobert Watson /* 219df8bae1dSRodney W. Grimes * 2 MSL timeout in shutdown went off. If we're closed but 220df8bae1dSRodney W. Grimes * still waiting for peer to close and connection has been idle 221df8bae1dSRodney W. Grimes * too long, or if 2MSL time is up from TIME_WAIT, delete connection 222df8bae1dSRodney W. Grimes * control block. Otherwise, check again in a bit. 2237c72af87SMohan Srinivasan * 2247c72af87SMohan Srinivasan * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 2257c72af87SMohan Srinivasan * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 2267c72af87SMohan Srinivasan * Ignore fact that there were recent incoming segments. 227df8bae1dSRodney W. Grimes */ 2287c72af87SMohan Srinivasan if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 22985d94372SRobert Watson tp->t_inpcb && tp->t_inpcb->inp_socket && 2307c72af87SMohan Srinivasan (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 2317c72af87SMohan Srinivasan tcpstat.tcps_finwait2_drops++; 23285d94372SRobert Watson tp = tcp_close(tp); 2337c72af87SMohan Srinivasan } else { 234df8bae1dSRodney W. Grimes if (tp->t_state != TCPS_TIME_WAIT && 2359b8b58e0SJonathan Lemon (ticks - tp->t_rcvtime) <= tcp_maxidle) 23685d94372SRobert Watson callout_reset(tp->tt_2msl, tcp_keepintvl, 23785d94372SRobert Watson tcp_timer_2msl, tp); 238df8bae1dSRodney W. Grimes else 23985d94372SRobert Watson tp = tcp_close(tp); 2407c72af87SMohan Srinivasan } 241df8bae1dSRodney W. Grimes 2429b8b58e0SJonathan Lemon #ifdef TCPDEBUG 243c214db75SRobert Watson if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 244fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 2459b8b58e0SJonathan Lemon PRU_SLOWTIMO); 2469b8b58e0SJonathan Lemon #endif 24785d94372SRobert Watson if (tp != NULL) 24885d94372SRobert Watson INP_UNLOCK(inp); 24985d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 2509b8b58e0SJonathan Lemon } 2519b8b58e0SJonathan Lemon 25285d94372SRobert Watson void 25385d94372SRobert Watson tcp_timer_keep(void *xtp) 2549b8b58e0SJonathan Lemon { 25585d94372SRobert Watson struct tcpcb *tp = xtp; 25608517d53SMike Silbersack struct tcptemp *t_template; 25785d94372SRobert Watson struct inpcb *inp; 2589b8b58e0SJonathan Lemon #ifdef TCPDEBUG 2599b8b58e0SJonathan Lemon int ostate; 2609b8b58e0SJonathan Lemon 2619b8b58e0SJonathan Lemon ostate = tp->t_state; 2629b8b58e0SJonathan Lemon #endif 26385d94372SRobert Watson INP_INFO_WLOCK(&tcbinfo); 26485d94372SRobert Watson inp = tp->t_inpcb; 26585d94372SRobert Watson /* 26685d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 26785d94372SRobert Watson * tear-down mean we need it as a work-around for races between 26885d94372SRobert Watson * timers and tcp_discardcb(). 26985d94372SRobert Watson * 27085d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL")); 27185d94372SRobert Watson */ 27285d94372SRobert Watson if (inp == NULL) { 27385d94372SRobert Watson tcp_timer_race++; 27485d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 27585d94372SRobert Watson return; 27685d94372SRobert Watson } 27785d94372SRobert Watson INP_LOCK(inp); 27885d94372SRobert Watson if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_keep) 27985d94372SRobert Watson || !callout_active(tp->tt_keep)) { 28085d94372SRobert Watson INP_UNLOCK(inp); 28185d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 28285d94372SRobert Watson return; 28385d94372SRobert Watson } 28485d94372SRobert Watson callout_deactivate(tp->tt_keep); 2859b8b58e0SJonathan Lemon /* 2869b8b58e0SJonathan Lemon * Keep-alive timer went off; send something 2879b8b58e0SJonathan Lemon * or drop connection if idle for too long. 2889b8b58e0SJonathan Lemon */ 2899b8b58e0SJonathan Lemon tcpstat.tcps_keeptimeo++; 2909b8b58e0SJonathan Lemon if (tp->t_state < TCPS_ESTABLISHED) 2919b8b58e0SJonathan Lemon goto dropit; 2922a074620SSam Leffler if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 2939b8b58e0SJonathan Lemon tp->t_state <= TCPS_CLOSING) { 2949b8b58e0SJonathan Lemon if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle) 2959b8b58e0SJonathan Lemon goto dropit; 2969b8b58e0SJonathan Lemon /* 2979b8b58e0SJonathan Lemon * Send a packet designed to force a response 2989b8b58e0SJonathan Lemon * if the peer is up and reachable: 2999b8b58e0SJonathan Lemon * either an ACK if the connection is still alive, 3009b8b58e0SJonathan Lemon * or an RST if the peer has closed the connection 3019b8b58e0SJonathan Lemon * due to timeout or reboot. 3029b8b58e0SJonathan Lemon * Using sequence number tp->snd_una-1 3039b8b58e0SJonathan Lemon * causes the transmitted zero-length segment 3049b8b58e0SJonathan Lemon * to lie outside the receive window; 3059b8b58e0SJonathan Lemon * by the protocol spec, this requires the 3069b8b58e0SJonathan Lemon * correspondent TCP to respond. 3079b8b58e0SJonathan Lemon */ 3089b8b58e0SJonathan Lemon tcpstat.tcps_keepprobe++; 30979909384SJonathan Lemon t_template = tcpip_maketemplate(inp); 31008517d53SMike Silbersack if (t_template) { 31108517d53SMike Silbersack tcp_respond(tp, t_template->tt_ipgen, 31208517d53SMike Silbersack &t_template->tt_t, (struct mbuf *)NULL, 3139b8b58e0SJonathan Lemon tp->rcv_nxt, tp->snd_una - 1, 0); 31408517d53SMike Silbersack (void) m_free(dtom(t_template)); 31508517d53SMike Silbersack } 31685d94372SRobert Watson callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp); 3174cc20ab1SSeigo Tanimura } else 31885d94372SRobert Watson callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp); 3199b8b58e0SJonathan Lemon 3209b8b58e0SJonathan Lemon #ifdef TCPDEBUG 3212a074620SSam Leffler if (inp->inp_socket->so_options & SO_DEBUG) 322fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 3239b8b58e0SJonathan Lemon PRU_SLOWTIMO); 3249b8b58e0SJonathan Lemon #endif 32585d94372SRobert Watson INP_UNLOCK(inp); 32685d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 32785d94372SRobert Watson return; 3289b8b58e0SJonathan Lemon 3299b8b58e0SJonathan Lemon dropit: 3309b8b58e0SJonathan Lemon tcpstat.tcps_keepdrops++; 33185d94372SRobert Watson tp = tcp_drop(tp, ETIMEDOUT); 33285d94372SRobert Watson 33385d94372SRobert Watson #ifdef TCPDEBUG 33485d94372SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 33585d94372SRobert Watson tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 33685d94372SRobert Watson PRU_SLOWTIMO); 33785d94372SRobert Watson #endif 33885d94372SRobert Watson if (tp != NULL) 33985d94372SRobert Watson INP_UNLOCK(tp->t_inpcb); 34085d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 3419b8b58e0SJonathan Lemon } 3429b8b58e0SJonathan Lemon 34385d94372SRobert Watson void 34485d94372SRobert Watson tcp_timer_persist(void *xtp) 3459b8b58e0SJonathan Lemon { 34685d94372SRobert Watson struct tcpcb *tp = xtp; 34785d94372SRobert Watson struct inpcb *inp; 3489b8b58e0SJonathan Lemon #ifdef TCPDEBUG 3499b8b58e0SJonathan Lemon int ostate; 3509b8b58e0SJonathan Lemon 3519b8b58e0SJonathan Lemon ostate = tp->t_state; 3529b8b58e0SJonathan Lemon #endif 35385d94372SRobert Watson INP_INFO_WLOCK(&tcbinfo); 35485d94372SRobert Watson inp = tp->t_inpcb; 35585d94372SRobert Watson /* 35685d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 35785d94372SRobert Watson * tear-down mean we need it as a work-around for races between 35885d94372SRobert Watson * timers and tcp_discardcb(). 35985d94372SRobert Watson * 36085d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL")); 36185d94372SRobert Watson */ 36285d94372SRobert Watson if (inp == NULL) { 36385d94372SRobert Watson tcp_timer_race++; 36485d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 36585d94372SRobert Watson return; 36685d94372SRobert Watson } 36785d94372SRobert Watson INP_LOCK(inp); 36885d94372SRobert Watson if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_persist) 36985d94372SRobert Watson || !callout_active(tp->tt_persist)) { 37085d94372SRobert Watson INP_UNLOCK(inp); 37185d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 37285d94372SRobert Watson return; 37385d94372SRobert Watson } 37485d94372SRobert Watson callout_deactivate(tp->tt_persist); 3759b8b58e0SJonathan Lemon /* 3769b8b58e0SJonathan Lemon * Persistance timer into zero window. 3779b8b58e0SJonathan Lemon * Force a byte to be output, if possible. 3789b8b58e0SJonathan Lemon */ 3799b8b58e0SJonathan Lemon tcpstat.tcps_persisttimeo++; 3809b8b58e0SJonathan Lemon /* 3819b8b58e0SJonathan Lemon * Hack: if the peer is dead/unreachable, we do not 3829b8b58e0SJonathan Lemon * time out if the window is closed. After a full 3839b8b58e0SJonathan Lemon * backoff, drop the connection if the idle time 3849b8b58e0SJonathan Lemon * (no responses to probes) reaches the maximum 3859b8b58e0SJonathan Lemon * backoff that we would use if retransmitting. 3869b8b58e0SJonathan Lemon */ 3879b8b58e0SJonathan Lemon if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 3889b8b58e0SJonathan Lemon ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 3899b8b58e0SJonathan Lemon (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 3909b8b58e0SJonathan Lemon tcpstat.tcps_persistdrop++; 39185d94372SRobert Watson tp = tcp_drop(tp, ETIMEDOUT); 39285d94372SRobert Watson goto out; 3939b8b58e0SJonathan Lemon } 3949b8b58e0SJonathan Lemon tcp_setpersist(tp); 3952cdbfa66SPaul Saab tp->t_flags |= TF_FORCEDATA; 3969b8b58e0SJonathan Lemon (void) tcp_output(tp); 3972cdbfa66SPaul Saab tp->t_flags &= ~TF_FORCEDATA; 3989b8b58e0SJonathan Lemon 39985d94372SRobert Watson out: 4009b8b58e0SJonathan Lemon #ifdef TCPDEBUG 401ffb761f6SGleb Smirnoff if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 402ffb761f6SGleb Smirnoff tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 4039b8b58e0SJonathan Lemon #endif 40485d94372SRobert Watson if (tp != NULL) 40585d94372SRobert Watson INP_UNLOCK(inp); 40685d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 4079b8b58e0SJonathan Lemon } 4089b8b58e0SJonathan Lemon 40985d94372SRobert Watson void 41085d94372SRobert Watson tcp_timer_rexmt(void * xtp) 4119b8b58e0SJonathan Lemon { 41285d94372SRobert Watson struct tcpcb *tp = xtp; 4139b8b58e0SJonathan Lemon int rexmt; 41485d94372SRobert Watson int headlocked; 41585d94372SRobert Watson struct inpcb *inp; 4169b8b58e0SJonathan Lemon #ifdef TCPDEBUG 4179b8b58e0SJonathan Lemon int ostate; 4189b8b58e0SJonathan Lemon 4199b8b58e0SJonathan Lemon ostate = tp->t_state; 4209b8b58e0SJonathan Lemon #endif 42185d94372SRobert Watson INP_INFO_WLOCK(&tcbinfo); 42285d94372SRobert Watson headlocked = 1; 42385d94372SRobert Watson inp = tp->t_inpcb; 42485d94372SRobert Watson /* 42585d94372SRobert Watson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 42685d94372SRobert Watson * tear-down mean we need it as a work-around for races between 42785d94372SRobert Watson * timers and tcp_discardcb(). 42885d94372SRobert Watson * 42985d94372SRobert Watson * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL")); 43085d94372SRobert Watson */ 43185d94372SRobert Watson if (inp == NULL) { 43285d94372SRobert Watson tcp_timer_race++; 43385d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 43485d94372SRobert Watson return; 43585d94372SRobert Watson } 43685d94372SRobert Watson INP_LOCK(inp); 43785d94372SRobert Watson if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_rexmt) 43885d94372SRobert Watson || !callout_active(tp->tt_rexmt)) { 43985d94372SRobert Watson INP_UNLOCK(inp); 44085d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 44185d94372SRobert Watson return; 44285d94372SRobert Watson } 44385d94372SRobert Watson callout_deactivate(tp->tt_rexmt); 4446d90faf3SPaul Saab tcp_free_sackholes(tp); 445df8bae1dSRodney W. Grimes /* 446df8bae1dSRodney W. Grimes * Retransmission timer went off. Message has not 447df8bae1dSRodney W. Grimes * been acked within retransmit interval. Back off 448df8bae1dSRodney W. Grimes * to a longer retransmit interval and retransmit one segment. 449df8bae1dSRodney W. Grimes */ 450df8bae1dSRodney W. Grimes if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 451df8bae1dSRodney W. Grimes tp->t_rxtshift = TCP_MAXRXTSHIFT; 452df8bae1dSRodney W. Grimes tcpstat.tcps_timeoutdrop++; 45385d94372SRobert Watson tp = tcp_drop(tp, tp->t_softerror ? 45485d94372SRobert Watson tp->t_softerror : ETIMEDOUT); 45585d94372SRobert Watson goto out; 4569b8b58e0SJonathan Lemon } 45785d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 45885d94372SRobert Watson headlocked = 0; 4599b8b58e0SJonathan Lemon if (tp->t_rxtshift == 1) { 4609b8b58e0SJonathan Lemon /* 4619b8b58e0SJonathan Lemon * first retransmit; record ssthresh and cwnd so they can 4629b8b58e0SJonathan Lemon * be recovered if this turns out to be a "bad" retransmit. 4639b8b58e0SJonathan Lemon * A retransmit is considered "bad" if an ACK for this 4649b8b58e0SJonathan Lemon * segment is received within RTT/2 interval; the assumption 4659b8b58e0SJonathan Lemon * here is that the ACK was already in flight. See 4669b8b58e0SJonathan Lemon * "On Estimating End-to-End Network Path Properties" by 4679b8b58e0SJonathan Lemon * Allman and Paxson for more details. 4689b8b58e0SJonathan Lemon */ 4699b8b58e0SJonathan Lemon tp->snd_cwnd_prev = tp->snd_cwnd; 4709b8b58e0SJonathan Lemon tp->snd_ssthresh_prev = tp->snd_ssthresh; 4719d11646dSJeffrey Hsu tp->snd_recover_prev = tp->snd_recover; 4729d11646dSJeffrey Hsu if (IN_FASTRECOVERY(tp)) 4739d11646dSJeffrey Hsu tp->t_flags |= TF_WASFRECOVERY; 4749d11646dSJeffrey Hsu else 4759d11646dSJeffrey Hsu tp->t_flags &= ~TF_WASFRECOVERY; 4769b8b58e0SJonathan Lemon tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 477df8bae1dSRodney W. Grimes } 478df8bae1dSRodney W. Grimes tcpstat.tcps_rexmttimeo++; 4797d42e30cSJonathan Lemon if (tp->t_state == TCPS_SYN_SENT) 4807d42e30cSJonathan Lemon rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; 4817d42e30cSJonathan Lemon else 482df8bae1dSRodney W. Grimes rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 483df8bae1dSRodney W. Grimes TCPT_RANGESET(tp->t_rxtcur, rexmt, 484df8bae1dSRodney W. Grimes tp->t_rttmin, TCPTV_REXMTMAX); 485df8bae1dSRodney W. Grimes /* 486c94c54e4SAndre Oppermann * Disable rfc1323 if we havn't got any response to 4877ceb7783SJesper Skriver * our third SYN to work-around some broken terminal servers 4887ceb7783SJesper Skriver * (most of which have hopefully been retired) that have bad VJ 4897ceb7783SJesper Skriver * header compression code which trashes TCP segments containing 4907ceb7783SJesper Skriver * unknown-to-them TCP options. 4917ceb7783SJesper Skriver */ 4927ceb7783SJesper Skriver if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 493c94c54e4SAndre Oppermann tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP); 4947ceb7783SJesper Skriver /* 49597d8d152SAndre Oppermann * If we backed off this far, our srtt estimate is probably bogus. 49697d8d152SAndre Oppermann * Clobber it so we'll take the next rtt measurement as our srtt; 497df8bae1dSRodney W. Grimes * move the current srtt into rttvar to keep the current 498df8bae1dSRodney W. Grimes * retransmit times until then. 499df8bae1dSRodney W. Grimes */ 500df8bae1dSRodney W. Grimes if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 501fb59c426SYoshinobu Inoue #ifdef INET6 502fb59c426SYoshinobu Inoue if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 503fb59c426SYoshinobu Inoue in6_losing(tp->t_inpcb); 504fb59c426SYoshinobu Inoue else 505fb59c426SYoshinobu Inoue #endif 506df8bae1dSRodney W. Grimes tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 507df8bae1dSRodney W. Grimes tp->t_srtt = 0; 508df8bae1dSRodney W. Grimes } 509df8bae1dSRodney W. Grimes tp->snd_nxt = tp->snd_una; 5109d11646dSJeffrey Hsu tp->snd_recover = tp->snd_max; 51146f58482SJonathan Lemon /* 51274b48c1dSAndras Olah * Force a segment to be sent. 51374b48c1dSAndras Olah */ 51474b48c1dSAndras Olah tp->t_flags |= TF_ACKNOW; 51574b48c1dSAndras Olah /* 516df8bae1dSRodney W. Grimes * If timing a segment in this window, stop the timer. 517df8bae1dSRodney W. Grimes */ 5189b8b58e0SJonathan Lemon tp->t_rtttime = 0; 519df8bae1dSRodney W. Grimes /* 520df8bae1dSRodney W. Grimes * Close the congestion window down to one segment 521df8bae1dSRodney W. Grimes * (we'll open it by one segment for each ack we get). 522df8bae1dSRodney W. Grimes * Since we probably have a window's worth of unacked 523df8bae1dSRodney W. Grimes * data accumulated, this "slow start" keeps us from 524df8bae1dSRodney W. Grimes * dumping all that data as back-to-back packets (which 525df8bae1dSRodney W. Grimes * might overwhelm an intermediate gateway). 526df8bae1dSRodney W. Grimes * 527df8bae1dSRodney W. Grimes * There are two phases to the opening: Initially we 528df8bae1dSRodney W. Grimes * open by one mss on each ack. This makes the window 529df8bae1dSRodney W. Grimes * size increase exponentially with time. If the 530df8bae1dSRodney W. Grimes * window is larger than the path can handle, this 531df8bae1dSRodney W. Grimes * exponential growth results in dropped packet(s) 532df8bae1dSRodney W. Grimes * almost immediately. To get more time between 533df8bae1dSRodney W. Grimes * drops but still "push" the network to take advantage 534df8bae1dSRodney W. Grimes * of improving conditions, we switch from exponential 535df8bae1dSRodney W. Grimes * to linear window opening at some threshhold size. 536df8bae1dSRodney W. Grimes * For a threshhold, we use half the current window 537df8bae1dSRodney W. Grimes * size, truncated to a multiple of the mss. 538df8bae1dSRodney W. Grimes * 539df8bae1dSRodney W. Grimes * (the minimum cwnd that will give us exponential 540df8bae1dSRodney W. Grimes * growth is 2 mss. We don't allow the threshhold 541df8bae1dSRodney W. Grimes * to go below this.) 542df8bae1dSRodney W. Grimes */ 543df8bae1dSRodney W. Grimes { 544df8bae1dSRodney W. Grimes u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 545df8bae1dSRodney W. Grimes if (win < 2) 546df8bae1dSRodney W. Grimes win = 2; 547df8bae1dSRodney W. Grimes tp->snd_cwnd = tp->t_maxseg; 548df8bae1dSRodney W. Grimes tp->snd_ssthresh = win * tp->t_maxseg; 549df8bae1dSRodney W. Grimes tp->t_dupacks = 0; 550df8bae1dSRodney W. Grimes } 5519d11646dSJeffrey Hsu EXIT_FASTRECOVERY(tp); 552df8bae1dSRodney W. Grimes (void) tcp_output(tp); 553df8bae1dSRodney W. Grimes 55485d94372SRobert Watson out: 5559b8b58e0SJonathan Lemon #ifdef TCPDEBUG 5561c53f806SRobert Watson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 557fb59c426SYoshinobu Inoue tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 5589b8b58e0SJonathan Lemon PRU_SLOWTIMO); 559df8bae1dSRodney W. Grimes #endif 56085d94372SRobert Watson if (tp != NULL) 56185d94372SRobert Watson INP_UNLOCK(inp); 56285d94372SRobert Watson if (headlocked) 56385d94372SRobert Watson INP_INFO_WUNLOCK(&tcbinfo); 56485d94372SRobert Watson } 56585d94372SRobert Watson 56685d94372SRobert Watson void 56785d94372SRobert Watson tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 56885d94372SRobert Watson { 56985d94372SRobert Watson struct callout *t_callout; 57085d94372SRobert Watson void *f_callout; 57185d94372SRobert Watson 57285d94372SRobert Watson switch (timer_type) { 57385d94372SRobert Watson case TT_DELACK: 57485d94372SRobert Watson t_callout = tp->tt_delack; 57585d94372SRobert Watson f_callout = tcp_timer_delack; 57685d94372SRobert Watson break; 57785d94372SRobert Watson case TT_REXMT: 57885d94372SRobert Watson t_callout = tp->tt_rexmt; 57985d94372SRobert Watson f_callout = tcp_timer_rexmt; 58085d94372SRobert Watson break; 58185d94372SRobert Watson case TT_PERSIST: 58285d94372SRobert Watson t_callout = tp->tt_persist; 58385d94372SRobert Watson f_callout = tcp_timer_persist; 58485d94372SRobert Watson break; 58585d94372SRobert Watson case TT_KEEP: 58685d94372SRobert Watson t_callout = tp->tt_keep; 58785d94372SRobert Watson f_callout = tcp_timer_keep; 58885d94372SRobert Watson break; 58985d94372SRobert Watson case TT_2MSL: 59085d94372SRobert Watson t_callout = tp->tt_2msl; 59185d94372SRobert Watson f_callout = tcp_timer_2msl; 59285d94372SRobert Watson break; 59385d94372SRobert Watson default: 59485d94372SRobert Watson panic("bad timer_type"); 59585d94372SRobert Watson } 59685d94372SRobert Watson if (delta == 0) { 59785d94372SRobert Watson callout_stop(t_callout); 59885d94372SRobert Watson } else { 59985d94372SRobert Watson callout_reset(t_callout, delta, f_callout, tp); 60085d94372SRobert Watson } 60185d94372SRobert Watson } 60285d94372SRobert Watson 60385d94372SRobert Watson int 60485d94372SRobert Watson tcp_timer_active(struct tcpcb *tp, int timer_type) 60585d94372SRobert Watson { 60685d94372SRobert Watson struct callout *t_callout; 60785d94372SRobert Watson 60885d94372SRobert Watson switch (timer_type) { 60985d94372SRobert Watson case TT_DELACK: 61085d94372SRobert Watson t_callout = tp->tt_delack; 61185d94372SRobert Watson break; 61285d94372SRobert Watson case TT_REXMT: 61385d94372SRobert Watson t_callout = tp->tt_rexmt; 61485d94372SRobert Watson break; 61585d94372SRobert Watson case TT_PERSIST: 61685d94372SRobert Watson t_callout = tp->tt_persist; 61785d94372SRobert Watson break; 61885d94372SRobert Watson case TT_KEEP: 61985d94372SRobert Watson t_callout = tp->tt_keep; 62085d94372SRobert Watson break; 62185d94372SRobert Watson case TT_2MSL: 62285d94372SRobert Watson t_callout = tp->tt_2msl; 62385d94372SRobert Watson break; 62485d94372SRobert Watson default: 62585d94372SRobert Watson panic("bad timer_type"); 62685d94372SRobert Watson } 62785d94372SRobert Watson return callout_active(t_callout); 628df8bae1dSRodney W. Grimes } 629