xref: /freebsd/sys/netinet/tcp_timer.c (revision 4b421e2daaa3e23db6b5a51f8ca5b2da0d894a56)
1c398230bSWarner Losh /*-
2e79adb8eSGarrett Wollman  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
29e79adb8eSGarrett Wollman  *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
30df8bae1dSRodney W. Grimes  */
31df8bae1dSRodney W. Grimes 
324b421e2dSMike Silbersack #include <sys/cdefs.h>
334b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
344b421e2dSMike Silbersack 
35fb59c426SYoshinobu Inoue #include "opt_inet6.h"
360cc12cc5SJoerg Wunsch #include "opt_tcpdebug.h"
370cc12cc5SJoerg Wunsch 
38df8bae1dSRodney W. Grimes #include <sys/param.h>
3998163b98SPoul-Henning Kamp #include <sys/kernel.h>
40c74af4faSBruce Evans #include <sys/lock.h>
4108517d53SMike Silbersack #include <sys/mbuf.h>
42c74af4faSBruce Evans #include <sys/mutex.h>
43c74af4faSBruce Evans #include <sys/protosw.h>
44df8bae1dSRodney W. Grimes #include <sys/socket.h>
45df8bae1dSRodney W. Grimes #include <sys/socketvar.h>
46c74af4faSBruce Evans #include <sys/sysctl.h>
47c74af4faSBruce Evans #include <sys/systm.h>
48e79adb8eSGarrett Wollman 
49df8bae1dSRodney W. Grimes #include <net/route.h>
50df8bae1dSRodney W. Grimes 
51df8bae1dSRodney W. Grimes #include <netinet/in.h>
52df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
53c74af4faSBruce Evans #include <netinet/in_systm.h>
54fb59c426SYoshinobu Inoue #ifdef INET6
55fb59c426SYoshinobu Inoue #include <netinet6/in6_pcb.h>
56fb59c426SYoshinobu Inoue #endif
57df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
58df8bae1dSRodney W. Grimes #include <netinet/tcp.h>
59df8bae1dSRodney W. Grimes #include <netinet/tcp_fsm.h>
60df8bae1dSRodney W. Grimes #include <netinet/tcp_timer.h>
61df8bae1dSRodney W. Grimes #include <netinet/tcp_var.h>
62df8bae1dSRodney W. Grimes #include <netinet/tcpip.h>
63af7a2999SDavid Greenman #ifdef TCPDEBUG
64af7a2999SDavid Greenman #include <netinet/tcp_debug.h>
65af7a2999SDavid Greenman #endif
66df8bae1dSRodney W. Grimes 
679b8b58e0SJonathan Lemon int	tcp_keepinit;
68ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
69ccb4d0c6SJonathan Lemon     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "");
707b40aa32SPaul Traina 
719b8b58e0SJonathan Lemon int	tcp_keepidle;
72ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
73ccb4d0c6SJonathan Lemon     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "");
7498163b98SPoul-Henning Kamp 
759b8b58e0SJonathan Lemon int	tcp_keepintvl;
76ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
77ccb4d0c6SJonathan Lemon     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "");
7898163b98SPoul-Henning Kamp 
799b8b58e0SJonathan Lemon int	tcp_delacktime;
806489fe65SAndre Oppermann SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
816489fe65SAndre Oppermann     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
82ccb4d0c6SJonathan Lemon     "Time before a delayed ACK is sent");
839b8b58e0SJonathan Lemon 
849b8b58e0SJonathan Lemon int	tcp_msl;
85ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
86ccb4d0c6SJonathan Lemon     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
879b8b58e0SJonathan Lemon 
88701bec5aSMatthew Dillon int	tcp_rexmit_min;
89701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
906489fe65SAndre Oppermann     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
916489fe65SAndre Oppermann     "Minimum Retransmission Timeout");
92701bec5aSMatthew Dillon 
93701bec5aSMatthew Dillon int	tcp_rexmit_slop;
94701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
956489fe65SAndre Oppermann     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
966489fe65SAndre Oppermann     "Retransmission Timer Slop");
97701bec5aSMatthew Dillon 
98c39a614eSRobert Watson static int	always_keepalive = 1;
993d177f46SBill Fumerola SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
1003d177f46SBill Fumerola     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
10134be9bf3SPoul-Henning Kamp 
1027c72af87SMohan Srinivasan int    tcp_fast_finwait2_recycle = 0;
1037c72af87SMohan Srinivasan SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
1046489fe65SAndre Oppermann     &tcp_fast_finwait2_recycle, 0,
1056489fe65SAndre Oppermann     "Recycle closed FIN_WAIT_2 connections faster");
1067c72af87SMohan Srinivasan 
1077c72af87SMohan Srinivasan int    tcp_finwait2_timeout;
1087c72af87SMohan Srinivasan SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
1096489fe65SAndre Oppermann     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
1107c72af87SMohan Srinivasan 
1117c72af87SMohan Srinivasan 
1120312fbe9SPoul-Henning Kamp static int	tcp_keepcnt = TCPTV_KEEPCNT;
1130312fbe9SPoul-Henning Kamp 	/* max idle probes */
1149b8b58e0SJonathan Lemon int	tcp_maxpersistidle;
1150312fbe9SPoul-Henning Kamp 	/* max idle time in persist */
116df8bae1dSRodney W. Grimes int	tcp_maxidle;
117e79adb8eSGarrett Wollman 
118df8bae1dSRodney W. Grimes /*
119df8bae1dSRodney W. Grimes  * Tcp protocol timeout routine called every 500 ms.
1209b8b58e0SJonathan Lemon  * Updates timestamps used for TCP
121df8bae1dSRodney W. Grimes  * causes finite state machine actions if timers expire.
122df8bae1dSRodney W. Grimes  */
123df8bae1dSRodney W. Grimes void
124e2f2059fSMike Silbersack tcp_slowtimo(void)
125df8bae1dSRodney W. Grimes {
12615bd2b43SDavid Greenman 
127e79adb8eSGarrett Wollman 	tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
128607b0b0cSJonathan Lemon 	INP_INFO_WLOCK(&tcbinfo);
1292104448fSAndre Oppermann 	(void) tcp_tw_2msl_scan(0);
130607b0b0cSJonathan Lemon 	INP_INFO_WUNLOCK(&tcbinfo);
131df8bae1dSRodney W. Grimes }
132df8bae1dSRodney W. Grimes 
1337d42e30cSJonathan Lemon int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
1347d42e30cSJonathan Lemon     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
1357d42e30cSJonathan Lemon 
136df8bae1dSRodney W. Grimes int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
137f058535dSJeffrey Hsu     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
138df8bae1dSRodney W. Grimes 
139f058535dSJeffrey Hsu static int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
140e79adb8eSGarrett Wollman 
141623dce13SRobert Watson static int tcp_timer_race;
142623dce13SRobert Watson SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
143623dce13SRobert Watson     0, "Count of t_inpcb races on tcp_discardcb");
144623dce13SRobert Watson 
145df8bae1dSRodney W. Grimes /*
146df8bae1dSRodney W. Grimes  * TCP timer processing.
147df8bae1dSRodney W. Grimes  */
14885d94372SRobert Watson 
14985d94372SRobert Watson void
15085d94372SRobert Watson tcp_timer_delack(void *xtp)
151df8bae1dSRodney W. Grimes {
15285d94372SRobert Watson 	struct tcpcb *tp = xtp;
15385d94372SRobert Watson 	struct inpcb *inp;
15485d94372SRobert Watson 
15585d94372SRobert Watson 	INP_INFO_RLOCK(&tcbinfo);
15685d94372SRobert Watson 	inp = tp->t_inpcb;
15785d94372SRobert Watson 	/*
15885d94372SRobert Watson 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
15985d94372SRobert Watson 	 * tear-down mean we need it as a work-around for races between
16085d94372SRobert Watson 	 * timers and tcp_discardcb().
16185d94372SRobert Watson 	 *
16285d94372SRobert Watson 	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
16385d94372SRobert Watson 	 */
16485d94372SRobert Watson 	if (inp == NULL) {
16585d94372SRobert Watson 		tcp_timer_race++;
16685d94372SRobert Watson 		INP_INFO_RUNLOCK(&tcbinfo);
16785d94372SRobert Watson 		return;
16885d94372SRobert Watson 	}
16985d94372SRobert Watson 	INP_LOCK(inp);
17085d94372SRobert Watson 	INP_INFO_RUNLOCK(&tcbinfo);
171e2f2059fSMike Silbersack 	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack)
172e2f2059fSMike Silbersack 	    || !callout_active(&tp->t_timers->tt_delack)) {
17385d94372SRobert Watson 		INP_UNLOCK(inp);
17485d94372SRobert Watson 		return;
17585d94372SRobert Watson 	}
176e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_delack);
177df8bae1dSRodney W. Grimes 
1789b8b58e0SJonathan Lemon 	tp->t_flags |= TF_ACKNOW;
1799b8b58e0SJonathan Lemon 	tcpstat.tcps_delack++;
1809b8b58e0SJonathan Lemon 	(void) tcp_output(tp);
18185d94372SRobert Watson 	INP_UNLOCK(inp);
1829b8b58e0SJonathan Lemon }
1839b8b58e0SJonathan Lemon 
18485d94372SRobert Watson void
18585d94372SRobert Watson tcp_timer_2msl(void *xtp)
1869b8b58e0SJonathan Lemon {
18785d94372SRobert Watson 	struct tcpcb *tp = xtp;
18885d94372SRobert Watson 	struct inpcb *inp;
1899b8b58e0SJonathan Lemon #ifdef TCPDEBUG
1909b8b58e0SJonathan Lemon 	int ostate;
1919b8b58e0SJonathan Lemon 
1929b8b58e0SJonathan Lemon 	ostate = tp->t_state;
1939b8b58e0SJonathan Lemon #endif
194623dce13SRobert Watson 	/*
19585d94372SRobert Watson 	 * XXXRW: Does this actually happen?
19685d94372SRobert Watson 	 */
19785d94372SRobert Watson 	INP_INFO_WLOCK(&tcbinfo);
19885d94372SRobert Watson 	inp = tp->t_inpcb;
19985d94372SRobert Watson 	/*
20085d94372SRobert Watson 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
20185d94372SRobert Watson 	 * tear-down mean we need it as a work-around for races between
20285d94372SRobert Watson 	 * timers and tcp_discardcb().
20385d94372SRobert Watson 	 *
20485d94372SRobert Watson 	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
20585d94372SRobert Watson 	 */
20685d94372SRobert Watson 	if (inp == NULL) {
20785d94372SRobert Watson 		tcp_timer_race++;
20885d94372SRobert Watson 		INP_INFO_WUNLOCK(&tcbinfo);
20985d94372SRobert Watson 		return;
21085d94372SRobert Watson 	}
21185d94372SRobert Watson 	INP_LOCK(inp);
21285d94372SRobert Watson 	tcp_free_sackholes(tp);
213e2f2059fSMike Silbersack 	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) ||
214e2f2059fSMike Silbersack 	    !callout_active(&tp->t_timers->tt_2msl)) {
21585d94372SRobert Watson 		INP_UNLOCK(tp->t_inpcb);
21685d94372SRobert Watson 		INP_INFO_WUNLOCK(&tcbinfo);
21785d94372SRobert Watson 		return;
21885d94372SRobert Watson 	}
219e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_2msl);
22085d94372SRobert Watson 	/*
221df8bae1dSRodney W. Grimes 	 * 2 MSL timeout in shutdown went off.  If we're closed but
222df8bae1dSRodney W. Grimes 	 * still waiting for peer to close and connection has been idle
223df8bae1dSRodney W. Grimes 	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
224df8bae1dSRodney W. Grimes 	 * control block.  Otherwise, check again in a bit.
2257c72af87SMohan Srinivasan 	 *
2267c72af87SMohan Srinivasan 	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
2277c72af87SMohan Srinivasan 	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
2287c72af87SMohan Srinivasan 	 * Ignore fact that there were recent incoming segments.
229df8bae1dSRodney W. Grimes 	 */
2307c72af87SMohan Srinivasan 	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
23185d94372SRobert Watson 	    tp->t_inpcb && tp->t_inpcb->inp_socket &&
2327c72af87SMohan Srinivasan 	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
2337c72af87SMohan Srinivasan 		tcpstat.tcps_finwait2_drops++;
23485d94372SRobert Watson 		tp = tcp_close(tp);
2357c72af87SMohan Srinivasan 	} else {
236df8bae1dSRodney W. Grimes 		if (tp->t_state != TCPS_TIME_WAIT &&
2379b8b58e0SJonathan Lemon 		   (ticks - tp->t_rcvtime) <= tcp_maxidle)
238e2f2059fSMike Silbersack 		       callout_reset(&tp->t_timers->tt_2msl, tcp_keepintvl,
23985d94372SRobert Watson 				     tcp_timer_2msl, tp);
240df8bae1dSRodney W. Grimes 	       else
24185d94372SRobert Watson 		       tp = tcp_close(tp);
2427c72af87SMohan Srinivasan        }
243df8bae1dSRodney W. Grimes 
2449b8b58e0SJonathan Lemon #ifdef TCPDEBUG
245586b4a0eSKonstantin Belousov 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
246fb59c426SYoshinobu Inoue 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
2479b8b58e0SJonathan Lemon 			  PRU_SLOWTIMO);
2489b8b58e0SJonathan Lemon #endif
24985d94372SRobert Watson 	if (tp != NULL)
25085d94372SRobert Watson 		INP_UNLOCK(inp);
25185d94372SRobert Watson 	INP_INFO_WUNLOCK(&tcbinfo);
2529b8b58e0SJonathan Lemon }
2539b8b58e0SJonathan Lemon 
25485d94372SRobert Watson void
25585d94372SRobert Watson tcp_timer_keep(void *xtp)
2569b8b58e0SJonathan Lemon {
25785d94372SRobert Watson 	struct tcpcb *tp = xtp;
25808517d53SMike Silbersack 	struct tcptemp *t_template;
25985d94372SRobert Watson 	struct inpcb *inp;
2609b8b58e0SJonathan Lemon #ifdef TCPDEBUG
2619b8b58e0SJonathan Lemon 	int ostate;
2629b8b58e0SJonathan Lemon 
2639b8b58e0SJonathan Lemon 	ostate = tp->t_state;
2649b8b58e0SJonathan Lemon #endif
26585d94372SRobert Watson 	INP_INFO_WLOCK(&tcbinfo);
26685d94372SRobert Watson 	inp = tp->t_inpcb;
26785d94372SRobert Watson 	/*
26885d94372SRobert Watson 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
26985d94372SRobert Watson 	 * tear-down mean we need it as a work-around for races between
27085d94372SRobert Watson 	 * timers and tcp_discardcb().
27185d94372SRobert Watson 	 *
27285d94372SRobert Watson 	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
27385d94372SRobert Watson 	 */
27485d94372SRobert Watson 	if (inp == NULL) {
27585d94372SRobert Watson 		tcp_timer_race++;
27685d94372SRobert Watson 		INP_INFO_WUNLOCK(&tcbinfo);
27785d94372SRobert Watson 		return;
27885d94372SRobert Watson 	}
27985d94372SRobert Watson 	INP_LOCK(inp);
280e2f2059fSMike Silbersack 	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep)
281e2f2059fSMike Silbersack 	    || !callout_active(&tp->t_timers->tt_keep)) {
28285d94372SRobert Watson 		INP_UNLOCK(inp);
28385d94372SRobert Watson 		INP_INFO_WUNLOCK(&tcbinfo);
28485d94372SRobert Watson 		return;
28585d94372SRobert Watson 	}
286e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_keep);
2879b8b58e0SJonathan Lemon 	/*
2889b8b58e0SJonathan Lemon 	 * Keep-alive timer went off; send something
2899b8b58e0SJonathan Lemon 	 * or drop connection if idle for too long.
2909b8b58e0SJonathan Lemon 	 */
2919b8b58e0SJonathan Lemon 	tcpstat.tcps_keeptimeo++;
2929b8b58e0SJonathan Lemon 	if (tp->t_state < TCPS_ESTABLISHED)
2939b8b58e0SJonathan Lemon 		goto dropit;
2942a074620SSam Leffler 	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
2959b8b58e0SJonathan Lemon 	    tp->t_state <= TCPS_CLOSING) {
2969b8b58e0SJonathan Lemon 		if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle)
2979b8b58e0SJonathan Lemon 			goto dropit;
2989b8b58e0SJonathan Lemon 		/*
2999b8b58e0SJonathan Lemon 		 * Send a packet designed to force a response
3009b8b58e0SJonathan Lemon 		 * if the peer is up and reachable:
3019b8b58e0SJonathan Lemon 		 * either an ACK if the connection is still alive,
3029b8b58e0SJonathan Lemon 		 * or an RST if the peer has closed the connection
3039b8b58e0SJonathan Lemon 		 * due to timeout or reboot.
3049b8b58e0SJonathan Lemon 		 * Using sequence number tp->snd_una-1
3059b8b58e0SJonathan Lemon 		 * causes the transmitted zero-length segment
3069b8b58e0SJonathan Lemon 		 * to lie outside the receive window;
3079b8b58e0SJonathan Lemon 		 * by the protocol spec, this requires the
3089b8b58e0SJonathan Lemon 		 * correspondent TCP to respond.
3099b8b58e0SJonathan Lemon 		 */
3109b8b58e0SJonathan Lemon 		tcpstat.tcps_keepprobe++;
31179909384SJonathan Lemon 		t_template = tcpip_maketemplate(inp);
31208517d53SMike Silbersack 		if (t_template) {
31308517d53SMike Silbersack 			tcp_respond(tp, t_template->tt_ipgen,
31408517d53SMike Silbersack 				    &t_template->tt_t, (struct mbuf *)NULL,
3159b8b58e0SJonathan Lemon 				    tp->rcv_nxt, tp->snd_una - 1, 0);
31608517d53SMike Silbersack 			(void) m_free(dtom(t_template));
31708517d53SMike Silbersack 		}
318e2f2059fSMike Silbersack 		callout_reset(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
3194cc20ab1SSeigo Tanimura 	} else
320e2f2059fSMike Silbersack 		callout_reset(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
3219b8b58e0SJonathan Lemon 
3229b8b58e0SJonathan Lemon #ifdef TCPDEBUG
3232a074620SSam Leffler 	if (inp->inp_socket->so_options & SO_DEBUG)
324fb59c426SYoshinobu Inoue 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
3259b8b58e0SJonathan Lemon 			  PRU_SLOWTIMO);
3269b8b58e0SJonathan Lemon #endif
32785d94372SRobert Watson 	INP_UNLOCK(inp);
32885d94372SRobert Watson 	INP_INFO_WUNLOCK(&tcbinfo);
32985d94372SRobert Watson 	return;
3309b8b58e0SJonathan Lemon 
3319b8b58e0SJonathan Lemon dropit:
3329b8b58e0SJonathan Lemon 	tcpstat.tcps_keepdrops++;
33385d94372SRobert Watson 	tp = tcp_drop(tp, ETIMEDOUT);
33485d94372SRobert Watson 
33585d94372SRobert Watson #ifdef TCPDEBUG
33685d94372SRobert Watson 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
33785d94372SRobert Watson 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
33885d94372SRobert Watson 			  PRU_SLOWTIMO);
33985d94372SRobert Watson #endif
34085d94372SRobert Watson 	if (tp != NULL)
34185d94372SRobert Watson 		INP_UNLOCK(tp->t_inpcb);
34285d94372SRobert Watson 	INP_INFO_WUNLOCK(&tcbinfo);
3439b8b58e0SJonathan Lemon }
3449b8b58e0SJonathan Lemon 
34585d94372SRobert Watson void
34685d94372SRobert Watson tcp_timer_persist(void *xtp)
3479b8b58e0SJonathan Lemon {
34885d94372SRobert Watson 	struct tcpcb *tp = xtp;
34985d94372SRobert Watson 	struct inpcb *inp;
3509b8b58e0SJonathan Lemon #ifdef TCPDEBUG
3519b8b58e0SJonathan Lemon 	int ostate;
3529b8b58e0SJonathan Lemon 
3539b8b58e0SJonathan Lemon 	ostate = tp->t_state;
3549b8b58e0SJonathan Lemon #endif
35585d94372SRobert Watson 	INP_INFO_WLOCK(&tcbinfo);
35685d94372SRobert Watson 	inp = tp->t_inpcb;
35785d94372SRobert Watson 	/*
35885d94372SRobert Watson 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
35985d94372SRobert Watson 	 * tear-down mean we need it as a work-around for races between
36085d94372SRobert Watson 	 * timers and tcp_discardcb().
36185d94372SRobert Watson 	 *
36285d94372SRobert Watson 	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
36385d94372SRobert Watson 	 */
36485d94372SRobert Watson 	if (inp == NULL) {
36585d94372SRobert Watson 		tcp_timer_race++;
36685d94372SRobert Watson 		INP_INFO_WUNLOCK(&tcbinfo);
36785d94372SRobert Watson 		return;
36885d94372SRobert Watson 	}
36985d94372SRobert Watson 	INP_LOCK(inp);
370e2f2059fSMike Silbersack 	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist)
371e2f2059fSMike Silbersack 	    || !callout_active(&tp->t_timers->tt_persist)) {
37285d94372SRobert Watson 		INP_UNLOCK(inp);
37385d94372SRobert Watson 		INP_INFO_WUNLOCK(&tcbinfo);
37485d94372SRobert Watson 		return;
37585d94372SRobert Watson 	}
376e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_persist);
3779b8b58e0SJonathan Lemon 	/*
3789b8b58e0SJonathan Lemon 	 * Persistance timer into zero window.
3799b8b58e0SJonathan Lemon 	 * Force a byte to be output, if possible.
3809b8b58e0SJonathan Lemon 	 */
3819b8b58e0SJonathan Lemon 	tcpstat.tcps_persisttimeo++;
3829b8b58e0SJonathan Lemon 	/*
3839b8b58e0SJonathan Lemon 	 * Hack: if the peer is dead/unreachable, we do not
3849b8b58e0SJonathan Lemon 	 * time out if the window is closed.  After a full
3859b8b58e0SJonathan Lemon 	 * backoff, drop the connection if the idle time
3869b8b58e0SJonathan Lemon 	 * (no responses to probes) reaches the maximum
3879b8b58e0SJonathan Lemon 	 * backoff that we would use if retransmitting.
3889b8b58e0SJonathan Lemon 	 */
3899b8b58e0SJonathan Lemon 	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
3909b8b58e0SJonathan Lemon 	    ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle ||
3919b8b58e0SJonathan Lemon 	     (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
3929b8b58e0SJonathan Lemon 		tcpstat.tcps_persistdrop++;
39385d94372SRobert Watson 		tp = tcp_drop(tp, ETIMEDOUT);
39485d94372SRobert Watson 		goto out;
3959b8b58e0SJonathan Lemon 	}
3969b8b58e0SJonathan Lemon 	tcp_setpersist(tp);
3972cdbfa66SPaul Saab 	tp->t_flags |= TF_FORCEDATA;
3989b8b58e0SJonathan Lemon 	(void) tcp_output(tp);
3992cdbfa66SPaul Saab 	tp->t_flags &= ~TF_FORCEDATA;
4009b8b58e0SJonathan Lemon 
40185d94372SRobert Watson out:
4029b8b58e0SJonathan Lemon #ifdef TCPDEBUG
403ffb761f6SGleb Smirnoff 	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
404ffb761f6SGleb Smirnoff 		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
4059b8b58e0SJonathan Lemon #endif
40685d94372SRobert Watson 	if (tp != NULL)
40785d94372SRobert Watson 		INP_UNLOCK(inp);
40885d94372SRobert Watson 	INP_INFO_WUNLOCK(&tcbinfo);
4099b8b58e0SJonathan Lemon }
4109b8b58e0SJonathan Lemon 
41185d94372SRobert Watson void
41285d94372SRobert Watson tcp_timer_rexmt(void * xtp)
4139b8b58e0SJonathan Lemon {
41485d94372SRobert Watson 	struct tcpcb *tp = xtp;
4159b8b58e0SJonathan Lemon 	int rexmt;
41685d94372SRobert Watson 	int headlocked;
41785d94372SRobert Watson 	struct inpcb *inp;
4189b8b58e0SJonathan Lemon #ifdef TCPDEBUG
4199b8b58e0SJonathan Lemon 	int ostate;
4209b8b58e0SJonathan Lemon 
4219b8b58e0SJonathan Lemon 	ostate = tp->t_state;
4229b8b58e0SJonathan Lemon #endif
42385d94372SRobert Watson 	INP_INFO_WLOCK(&tcbinfo);
42485d94372SRobert Watson 	headlocked = 1;
42585d94372SRobert Watson 	inp = tp->t_inpcb;
42685d94372SRobert Watson 	/*
42785d94372SRobert Watson 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
42885d94372SRobert Watson 	 * tear-down mean we need it as a work-around for races between
42985d94372SRobert Watson 	 * timers and tcp_discardcb().
43085d94372SRobert Watson 	 *
43185d94372SRobert Watson 	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
43285d94372SRobert Watson 	 */
43385d94372SRobert Watson 	if (inp == NULL) {
43485d94372SRobert Watson 		tcp_timer_race++;
43585d94372SRobert Watson 		INP_INFO_WUNLOCK(&tcbinfo);
43685d94372SRobert Watson 		return;
43785d94372SRobert Watson 	}
43885d94372SRobert Watson 	INP_LOCK(inp);
439e2f2059fSMike Silbersack 	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt)
440e2f2059fSMike Silbersack 	    || !callout_active(&tp->t_timers->tt_rexmt)) {
44185d94372SRobert Watson 		INP_UNLOCK(inp);
44285d94372SRobert Watson 		INP_INFO_WUNLOCK(&tcbinfo);
44385d94372SRobert Watson 		return;
44485d94372SRobert Watson 	}
445e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_rexmt);
4466d90faf3SPaul Saab 	tcp_free_sackholes(tp);
447df8bae1dSRodney W. Grimes 	/*
448df8bae1dSRodney W. Grimes 	 * Retransmission timer went off.  Message has not
449df8bae1dSRodney W. Grimes 	 * been acked within retransmit interval.  Back off
450df8bae1dSRodney W. Grimes 	 * to a longer retransmit interval and retransmit one segment.
451df8bae1dSRodney W. Grimes 	 */
452df8bae1dSRodney W. Grimes 	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
453df8bae1dSRodney W. Grimes 		tp->t_rxtshift = TCP_MAXRXTSHIFT;
454df8bae1dSRodney W. Grimes 		tcpstat.tcps_timeoutdrop++;
45585d94372SRobert Watson 		tp = tcp_drop(tp, tp->t_softerror ?
45685d94372SRobert Watson 			      tp->t_softerror : ETIMEDOUT);
45785d94372SRobert Watson 		goto out;
4589b8b58e0SJonathan Lemon 	}
45985d94372SRobert Watson 	INP_INFO_WUNLOCK(&tcbinfo);
46085d94372SRobert Watson 	headlocked = 0;
4619b8b58e0SJonathan Lemon 	if (tp->t_rxtshift == 1) {
4629b8b58e0SJonathan Lemon 		/*
4639b8b58e0SJonathan Lemon 		 * first retransmit; record ssthresh and cwnd so they can
4649b8b58e0SJonathan Lemon 		 * be recovered if this turns out to be a "bad" retransmit.
4659b8b58e0SJonathan Lemon 		 * A retransmit is considered "bad" if an ACK for this
4669b8b58e0SJonathan Lemon 		 * segment is received within RTT/2 interval; the assumption
4679b8b58e0SJonathan Lemon 		 * here is that the ACK was already in flight.  See
4689b8b58e0SJonathan Lemon 		 * "On Estimating End-to-End Network Path Properties" by
4699b8b58e0SJonathan Lemon 		 * Allman and Paxson for more details.
4709b8b58e0SJonathan Lemon 		 */
4719b8b58e0SJonathan Lemon 		tp->snd_cwnd_prev = tp->snd_cwnd;
4729b8b58e0SJonathan Lemon 		tp->snd_ssthresh_prev = tp->snd_ssthresh;
4739d11646dSJeffrey Hsu 		tp->snd_recover_prev = tp->snd_recover;
4749d11646dSJeffrey Hsu 		if (IN_FASTRECOVERY(tp))
4759d11646dSJeffrey Hsu 		  tp->t_flags |= TF_WASFRECOVERY;
4769d11646dSJeffrey Hsu 		else
4779d11646dSJeffrey Hsu 		  tp->t_flags &= ~TF_WASFRECOVERY;
4789b8b58e0SJonathan Lemon 		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
479df8bae1dSRodney W. Grimes 	}
480df8bae1dSRodney W. Grimes 	tcpstat.tcps_rexmttimeo++;
4817d42e30cSJonathan Lemon 	if (tp->t_state == TCPS_SYN_SENT)
4827d42e30cSJonathan Lemon 		rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
4837d42e30cSJonathan Lemon 	else
484df8bae1dSRodney W. Grimes 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
485df8bae1dSRodney W. Grimes 	TCPT_RANGESET(tp->t_rxtcur, rexmt,
486df8bae1dSRodney W. Grimes 		      tp->t_rttmin, TCPTV_REXMTMAX);
487df8bae1dSRodney W. Grimes 	/*
488c94c54e4SAndre Oppermann 	 * Disable rfc1323 if we havn't got any response to
4897ceb7783SJesper Skriver 	 * our third SYN to work-around some broken terminal servers
4907ceb7783SJesper Skriver 	 * (most of which have hopefully been retired) that have bad VJ
4917ceb7783SJesper Skriver 	 * header compression code which trashes TCP segments containing
4927ceb7783SJesper Skriver 	 * unknown-to-them TCP options.
4937ceb7783SJesper Skriver 	 */
4947ceb7783SJesper Skriver 	if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3))
495c94c54e4SAndre Oppermann 		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP);
4967ceb7783SJesper Skriver 	/*
49797d8d152SAndre Oppermann 	 * If we backed off this far, our srtt estimate is probably bogus.
49897d8d152SAndre Oppermann 	 * Clobber it so we'll take the next rtt measurement as our srtt;
499df8bae1dSRodney W. Grimes 	 * move the current srtt into rttvar to keep the current
500df8bae1dSRodney W. Grimes 	 * retransmit times until then.
501df8bae1dSRodney W. Grimes 	 */
502df8bae1dSRodney W. Grimes 	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
503fb59c426SYoshinobu Inoue #ifdef INET6
504fb59c426SYoshinobu Inoue 		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
505fb59c426SYoshinobu Inoue 			in6_losing(tp->t_inpcb);
506fb59c426SYoshinobu Inoue 		else
507fb59c426SYoshinobu Inoue #endif
508df8bae1dSRodney W. Grimes 		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
509df8bae1dSRodney W. Grimes 		tp->t_srtt = 0;
510df8bae1dSRodney W. Grimes 	}
511df8bae1dSRodney W. Grimes 	tp->snd_nxt = tp->snd_una;
5129d11646dSJeffrey Hsu 	tp->snd_recover = tp->snd_max;
51346f58482SJonathan Lemon 	/*
51474b48c1dSAndras Olah 	 * Force a segment to be sent.
51574b48c1dSAndras Olah 	 */
51674b48c1dSAndras Olah 	tp->t_flags |= TF_ACKNOW;
51774b48c1dSAndras Olah 	/*
518df8bae1dSRodney W. Grimes 	 * If timing a segment in this window, stop the timer.
519df8bae1dSRodney W. Grimes 	 */
5209b8b58e0SJonathan Lemon 	tp->t_rtttime = 0;
521df8bae1dSRodney W. Grimes 	/*
522df8bae1dSRodney W. Grimes 	 * Close the congestion window down to one segment
523df8bae1dSRodney W. Grimes 	 * (we'll open it by one segment for each ack we get).
524df8bae1dSRodney W. Grimes 	 * Since we probably have a window's worth of unacked
525df8bae1dSRodney W. Grimes 	 * data accumulated, this "slow start" keeps us from
526df8bae1dSRodney W. Grimes 	 * dumping all that data as back-to-back packets (which
527df8bae1dSRodney W. Grimes 	 * might overwhelm an intermediate gateway).
528df8bae1dSRodney W. Grimes 	 *
529df8bae1dSRodney W. Grimes 	 * There are two phases to the opening: Initially we
530df8bae1dSRodney W. Grimes 	 * open by one mss on each ack.  This makes the window
531df8bae1dSRodney W. Grimes 	 * size increase exponentially with time.  If the
532df8bae1dSRodney W. Grimes 	 * window is larger than the path can handle, this
533df8bae1dSRodney W. Grimes 	 * exponential growth results in dropped packet(s)
534df8bae1dSRodney W. Grimes 	 * almost immediately.  To get more time between
535df8bae1dSRodney W. Grimes 	 * drops but still "push" the network to take advantage
536df8bae1dSRodney W. Grimes 	 * of improving conditions, we switch from exponential
537df8bae1dSRodney W. Grimes 	 * to linear window opening at some threshhold size.
538df8bae1dSRodney W. Grimes 	 * For a threshhold, we use half the current window
539df8bae1dSRodney W. Grimes 	 * size, truncated to a multiple of the mss.
540df8bae1dSRodney W. Grimes 	 *
541df8bae1dSRodney W. Grimes 	 * (the minimum cwnd that will give us exponential
542df8bae1dSRodney W. Grimes 	 * growth is 2 mss.  We don't allow the threshhold
543df8bae1dSRodney W. Grimes 	 * to go below this.)
544df8bae1dSRodney W. Grimes 	 */
545df8bae1dSRodney W. Grimes 	{
546df8bae1dSRodney W. Grimes 		u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
547df8bae1dSRodney W. Grimes 		if (win < 2)
548df8bae1dSRodney W. Grimes 			win = 2;
549df8bae1dSRodney W. Grimes 		tp->snd_cwnd = tp->t_maxseg;
550df8bae1dSRodney W. Grimes 		tp->snd_ssthresh = win * tp->t_maxseg;
551df8bae1dSRodney W. Grimes 		tp->t_dupacks = 0;
552df8bae1dSRodney W. Grimes 	}
5539d11646dSJeffrey Hsu 	EXIT_FASTRECOVERY(tp);
554df8bae1dSRodney W. Grimes 	(void) tcp_output(tp);
555df8bae1dSRodney W. Grimes 
55685d94372SRobert Watson out:
5579b8b58e0SJonathan Lemon #ifdef TCPDEBUG
5581c53f806SRobert Watson 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
559fb59c426SYoshinobu Inoue 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
5609b8b58e0SJonathan Lemon 			  PRU_SLOWTIMO);
561df8bae1dSRodney W. Grimes #endif
56285d94372SRobert Watson 	if (tp != NULL)
56385d94372SRobert Watson 		INP_UNLOCK(inp);
56485d94372SRobert Watson 	if (headlocked)
56585d94372SRobert Watson 		INP_INFO_WUNLOCK(&tcbinfo);
56685d94372SRobert Watson }
56785d94372SRobert Watson 
56885d94372SRobert Watson void
56985d94372SRobert Watson tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
57085d94372SRobert Watson {
57185d94372SRobert Watson 	struct callout *t_callout;
57285d94372SRobert Watson 	void *f_callout;
57385d94372SRobert Watson 
57485d94372SRobert Watson 	switch (timer_type) {
57585d94372SRobert Watson 		case TT_DELACK:
576e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_delack;
57785d94372SRobert Watson 			f_callout = tcp_timer_delack;
57885d94372SRobert Watson 			break;
57985d94372SRobert Watson 		case TT_REXMT:
580e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_rexmt;
58185d94372SRobert Watson 			f_callout = tcp_timer_rexmt;
58285d94372SRobert Watson 			break;
58385d94372SRobert Watson 		case TT_PERSIST:
584e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_persist;
58585d94372SRobert Watson 			f_callout = tcp_timer_persist;
58685d94372SRobert Watson 			break;
58785d94372SRobert Watson 		case TT_KEEP:
588e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_keep;
58985d94372SRobert Watson 			f_callout = tcp_timer_keep;
59085d94372SRobert Watson 			break;
59185d94372SRobert Watson 		case TT_2MSL:
592e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_2msl;
59385d94372SRobert Watson 			f_callout = tcp_timer_2msl;
59485d94372SRobert Watson 			break;
59585d94372SRobert Watson 		default:
59685d94372SRobert Watson 			panic("bad timer_type");
59785d94372SRobert Watson 		}
59885d94372SRobert Watson 	if (delta == 0) {
59985d94372SRobert Watson 		callout_stop(t_callout);
60085d94372SRobert Watson 	} else {
60185d94372SRobert Watson 		callout_reset(t_callout, delta, f_callout, tp);
60285d94372SRobert Watson 	}
60385d94372SRobert Watson }
60485d94372SRobert Watson 
60585d94372SRobert Watson int
60685d94372SRobert Watson tcp_timer_active(struct tcpcb *tp, int timer_type)
60785d94372SRobert Watson {
60885d94372SRobert Watson 	struct callout *t_callout;
60985d94372SRobert Watson 
61085d94372SRobert Watson 	switch (timer_type) {
61185d94372SRobert Watson 		case TT_DELACK:
612e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_delack;
61385d94372SRobert Watson 			break;
61485d94372SRobert Watson 		case TT_REXMT:
615e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_rexmt;
61685d94372SRobert Watson 			break;
61785d94372SRobert Watson 		case TT_PERSIST:
618e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_persist;
61985d94372SRobert Watson 			break;
62085d94372SRobert Watson 		case TT_KEEP:
621e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_keep;
62285d94372SRobert Watson 			break;
62385d94372SRobert Watson 		case TT_2MSL:
624e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_2msl;
62585d94372SRobert Watson 			break;
62685d94372SRobert Watson 		default:
62785d94372SRobert Watson 			panic("bad timer_type");
62885d94372SRobert Watson 		}
62985d94372SRobert Watson 	return callout_active(t_callout);
630df8bae1dSRodney W. Grimes }
631