xref: /freebsd/sys/netinet/tcp_timer.c (revision 882ac53ed7ba31847423ef9ae7edee134b05cf47)
1c398230bSWarner Losh /*-
2e79adb8eSGarrett Wollman  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
29e79adb8eSGarrett Wollman  *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
30df8bae1dSRodney W. Grimes  */
31df8bae1dSRodney W. Grimes 
324b421e2dSMike Silbersack #include <sys/cdefs.h>
334b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
344b421e2dSMike Silbersack 
35825fd1e4SNavdeep Parhar #include "opt_inet.h"
36fb59c426SYoshinobu Inoue #include "opt_inet6.h"
370cc12cc5SJoerg Wunsch #include "opt_tcpdebug.h"
38883831c6SAdrian Chadd #include "opt_rss.h"
390cc12cc5SJoerg Wunsch 
40df8bae1dSRodney W. Grimes #include <sys/param.h>
4198163b98SPoul-Henning Kamp #include <sys/kernel.h>
42c74af4faSBruce Evans #include <sys/lock.h>
4308517d53SMike Silbersack #include <sys/mbuf.h>
44c74af4faSBruce Evans #include <sys/mutex.h>
45c74af4faSBruce Evans #include <sys/protosw.h>
4687aedea4SKip Macy #include <sys/smp.h>
47df8bae1dSRodney W. Grimes #include <sys/socket.h>
48df8bae1dSRodney W. Grimes #include <sys/socketvar.h>
49c74af4faSBruce Evans #include <sys/sysctl.h>
50c74af4faSBruce Evans #include <sys/systm.h>
51e79adb8eSGarrett Wollman 
524b79449eSBjoern A. Zeeb #include <net/if.h>
53df8bae1dSRodney W. Grimes #include <net/route.h>
54530c0060SRobert Watson #include <net/vnet.h>
55883831c6SAdrian Chadd #include <net/netisr.h>
56df8bae1dSRodney W. Grimes 
57dbc42409SLawrence Stewart #include <netinet/cc.h>
58df8bae1dSRodney W. Grimes #include <netinet/in.h>
59df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
60883831c6SAdrian Chadd #include <netinet/in_rss.h>
61c74af4faSBruce Evans #include <netinet/in_systm.h>
62fb59c426SYoshinobu Inoue #ifdef INET6
63fb59c426SYoshinobu Inoue #include <netinet6/in6_pcb.h>
64fb59c426SYoshinobu Inoue #endif
65df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
66df8bae1dSRodney W. Grimes #include <netinet/tcp_fsm.h>
67df8bae1dSRodney W. Grimes #include <netinet/tcp_timer.h>
68df8bae1dSRodney W. Grimes #include <netinet/tcp_var.h>
69f6f6703fSSean Bruno #ifdef INET6
70f6f6703fSSean Bruno #include <netinet6/tcp6_var.h>
71f6f6703fSSean Bruno #endif
72df8bae1dSRodney W. Grimes #include <netinet/tcpip.h>
73af7a2999SDavid Greenman #ifdef TCPDEBUG
74af7a2999SDavid Greenman #include <netinet/tcp_debug.h>
75af7a2999SDavid Greenman #endif
76df8bae1dSRodney W. Grimes 
779b8b58e0SJonathan Lemon int	tcp_keepinit;
78ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
7941698ebfSTom Rhodes     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
807b40aa32SPaul Traina 
819b8b58e0SJonathan Lemon int	tcp_keepidle;
82ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
8341698ebfSTom Rhodes     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
8498163b98SPoul-Henning Kamp 
859b8b58e0SJonathan Lemon int	tcp_keepintvl;
86ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
8741698ebfSTom Rhodes     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
8898163b98SPoul-Henning Kamp 
899b8b58e0SJonathan Lemon int	tcp_delacktime;
906489fe65SAndre Oppermann SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
916489fe65SAndre Oppermann     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
92ccb4d0c6SJonathan Lemon     "Time before a delayed ACK is sent");
939b8b58e0SJonathan Lemon 
949b8b58e0SJonathan Lemon int	tcp_msl;
95ccb4d0c6SJonathan Lemon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
96ccb4d0c6SJonathan Lemon     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
979b8b58e0SJonathan Lemon 
98701bec5aSMatthew Dillon int	tcp_rexmit_min;
99701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
1006489fe65SAndre Oppermann     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
1016489fe65SAndre Oppermann     "Minimum Retransmission Timeout");
102701bec5aSMatthew Dillon 
103701bec5aSMatthew Dillon int	tcp_rexmit_slop;
104701bec5aSMatthew Dillon SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
1056489fe65SAndre Oppermann     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
1066489fe65SAndre Oppermann     "Retransmission Timer Slop");
107701bec5aSMatthew Dillon 
108c39a614eSRobert Watson static int	always_keepalive = 1;
1093d177f46SBill Fumerola SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
1103d177f46SBill Fumerola     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
11134be9bf3SPoul-Henning Kamp 
1127c72af87SMohan Srinivasan int    tcp_fast_finwait2_recycle = 0;
1137c72af87SMohan Srinivasan SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
1146489fe65SAndre Oppermann     &tcp_fast_finwait2_recycle, 0,
1156489fe65SAndre Oppermann     "Recycle closed FIN_WAIT_2 connections faster");
1167c72af87SMohan Srinivasan 
1177c72af87SMohan Srinivasan int    tcp_finwait2_timeout;
1187c72af87SMohan Srinivasan SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
1196489fe65SAndre Oppermann     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
1207c72af87SMohan Srinivasan 
1219077f387SGleb Smirnoff int	tcp_keepcnt = TCPTV_KEEPCNT;
1229077f387SGleb Smirnoff SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
1239077f387SGleb Smirnoff     "Number of keepalive probes to send");
1247c72af87SMohan Srinivasan 
1250312fbe9SPoul-Henning Kamp 	/* max idle probes */
1269b8b58e0SJonathan Lemon int	tcp_maxpersistidle;
127e79adb8eSGarrett Wollman 
1286c0ef895SJohn Baldwin static int	tcp_rexmit_drop_options = 0;
1296c0ef895SJohn Baldwin SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
1306c0ef895SJohn Baldwin     &tcp_rexmit_drop_options, 0,
1316c0ef895SJohn Baldwin     "Drop TCP options from 3rd and later retransmitted SYN");
1326c0ef895SJohn Baldwin 
133f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
134f6f6703fSSean Bruno #define	V_tcp_pmtud_blackhole_detect	VNET(tcp_pmtud_blackhole_detect)
135f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
136f6f6703fSSean Bruno     CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_VNET,
137f6f6703fSSean Bruno     &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
138f6f6703fSSean Bruno     "Path MTU Discovery Black Hole Detection Enabled");
139f6f6703fSSean Bruno 
140f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
141f6f6703fSSean Bruno #define	V_tcp_pmtud_blackhole_activated \
142f6f6703fSSean Bruno     VNET(tcp_pmtud_blackhole_activated)
143f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
144f6f6703fSSean Bruno     CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_VNET,
145f6f6703fSSean Bruno     &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
146f6f6703fSSean Bruno     "Path MTU Discovery Black Hole Detection, Activation Count");
147f6f6703fSSean Bruno 
148f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
149f6f6703fSSean Bruno #define	V_tcp_pmtud_blackhole_activated_min_mss \
150f6f6703fSSean Bruno     VNET(tcp_pmtud_blackhole_activated_min_mss)
151f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
152f6f6703fSSean Bruno     CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_VNET,
153f6f6703fSSean Bruno     &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
154f6f6703fSSean Bruno     "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
155f6f6703fSSean Bruno 
156f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
157f6f6703fSSean Bruno #define	V_tcp_pmtud_blackhole_failed	VNET(tcp_pmtud_blackhole_failed)
158f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
159f6f6703fSSean Bruno     CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_VNET,
160f6f6703fSSean Bruno     &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
161f6f6703fSSean Bruno     "Path MTU Discovery Black Hole Detection, Failure Count");
162f6f6703fSSean Bruno 
163f6f6703fSSean Bruno #ifdef INET
164f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
165f6f6703fSSean Bruno #define	V_tcp_pmtud_blackhole_mss	VNET(tcp_pmtud_blackhole_mss)
166f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
167f6f6703fSSean Bruno     CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_VNET,
168f6f6703fSSean Bruno     &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
169f6f6703fSSean Bruno     "Path MTU Discovery Black Hole Detection lowered MSS");
170f6f6703fSSean Bruno #endif
171f6f6703fSSean Bruno 
172f6f6703fSSean Bruno #ifdef INET6
173f6f6703fSSean Bruno static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
174f6f6703fSSean Bruno #define	V_tcp_v6pmtud_blackhole_mss	VNET(tcp_v6pmtud_blackhole_mss)
175f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
176f6f6703fSSean Bruno     CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_VNET,
177f6f6703fSSean Bruno     &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
178f6f6703fSSean Bruno     "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
179f6f6703fSSean Bruno #endif
180f6f6703fSSean Bruno 
1818f7e75cbSAdrian Chadd #ifdef	RSS
1828f7e75cbSAdrian Chadd static int	per_cpu_timers = 1;
1838f7e75cbSAdrian Chadd #else
18487aedea4SKip Macy static int	per_cpu_timers = 0;
1858f7e75cbSAdrian Chadd #endif
18687aedea4SKip Macy SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
18787aedea4SKip Macy     &per_cpu_timers , 0, "run tcp timers on all cpus");
18887aedea4SKip Macy 
189883831c6SAdrian Chadd #if 0
19087aedea4SKip Macy #define	INP_CPU(inp)	(per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
19187aedea4SKip Macy 		((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
192883831c6SAdrian Chadd #endif
193883831c6SAdrian Chadd 
194883831c6SAdrian Chadd /*
195883831c6SAdrian Chadd  * Map the given inp to a CPU id.
196883831c6SAdrian Chadd  *
197883831c6SAdrian Chadd  * This queries RSS if it's compiled in, else it defaults to the current
198883831c6SAdrian Chadd  * CPU ID.
199883831c6SAdrian Chadd  */
200883831c6SAdrian Chadd static inline int
201883831c6SAdrian Chadd inp_to_cpuid(struct inpcb *inp)
202883831c6SAdrian Chadd {
203883831c6SAdrian Chadd 	u_int cpuid;
204883831c6SAdrian Chadd 
205883831c6SAdrian Chadd #ifdef	RSS
206883831c6SAdrian Chadd 	if (per_cpu_timers) {
207883831c6SAdrian Chadd 		cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
208883831c6SAdrian Chadd 		if (cpuid == NETISR_CPUID_NONE)
209883831c6SAdrian Chadd 			return (curcpu);	/* XXX */
210883831c6SAdrian Chadd 		else
211883831c6SAdrian Chadd 			return (cpuid);
212883831c6SAdrian Chadd 	}
213883831c6SAdrian Chadd #else
214883831c6SAdrian Chadd 	/* Legacy, pre-RSS behaviour */
215883831c6SAdrian Chadd 	if (per_cpu_timers) {
216883831c6SAdrian Chadd 		/*
217883831c6SAdrian Chadd 		 * We don't have a flowid -> cpuid mapping, so cheat and
218883831c6SAdrian Chadd 		 * just map unknown cpuids to curcpu.  Not the best, but
219883831c6SAdrian Chadd 		 * apparently better than defaulting to swi 0.
220883831c6SAdrian Chadd 		 */
221883831c6SAdrian Chadd 		cpuid = inp->inp_flowid % (mp_maxid + 1);
222883831c6SAdrian Chadd 		if (! CPU_ABSENT(cpuid))
223883831c6SAdrian Chadd 			return (cpuid);
224883831c6SAdrian Chadd 		return (curcpu);
225883831c6SAdrian Chadd 	}
226883831c6SAdrian Chadd #endif
227883831c6SAdrian Chadd 	/* Default for RSS and non-RSS - cpuid 0 */
228883831c6SAdrian Chadd 	else {
229883831c6SAdrian Chadd 		return (0);
230883831c6SAdrian Chadd 	}
231883831c6SAdrian Chadd }
23287aedea4SKip Macy 
233df8bae1dSRodney W. Grimes /*
234df8bae1dSRodney W. Grimes  * Tcp protocol timeout routine called every 500 ms.
2359b8b58e0SJonathan Lemon  * Updates timestamps used for TCP
236df8bae1dSRodney W. Grimes  * causes finite state machine actions if timers expire.
237df8bae1dSRodney W. Grimes  */
238df8bae1dSRodney W. Grimes void
239e2f2059fSMike Silbersack tcp_slowtimo(void)
240df8bae1dSRodney W. Grimes {
2418b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
24215bd2b43SDavid Greenman 
2435ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
2448b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
2458b615593SMarko Zec 		CURVNET_SET(vnet_iter);
24666eefb1eSJohn Baldwin 		tcp_tw_2msl_scan();
2478b615593SMarko Zec 		CURVNET_RESTORE();
2488b615593SMarko Zec 	}
2495ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
250df8bae1dSRodney W. Grimes }
251df8bae1dSRodney W. Grimes 
2527d42e30cSJonathan Lemon int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
2537d42e30cSJonathan Lemon     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
2547d42e30cSJonathan Lemon 
255df8bae1dSRodney W. Grimes int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
256f058535dSJeffrey Hsu     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
257df8bae1dSRodney W. Grimes 
258f058535dSJeffrey Hsu static int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
259e79adb8eSGarrett Wollman 
260623dce13SRobert Watson static int tcp_timer_race;
261623dce13SRobert Watson SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
262623dce13SRobert Watson     0, "Count of t_inpcb races on tcp_discardcb");
263623dce13SRobert Watson 
264df8bae1dSRodney W. Grimes /*
265df8bae1dSRodney W. Grimes  * TCP timer processing.
266df8bae1dSRodney W. Grimes  */
26785d94372SRobert Watson 
26885d94372SRobert Watson void
26985d94372SRobert Watson tcp_timer_delack(void *xtp)
270df8bae1dSRodney W. Grimes {
27185d94372SRobert Watson 	struct tcpcb *tp = xtp;
27285d94372SRobert Watson 	struct inpcb *inp;
2738b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
27485d94372SRobert Watson 
27585d94372SRobert Watson 	inp = tp->t_inpcb;
27685d94372SRobert Watson 	/*
27785d94372SRobert Watson 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
27885d94372SRobert Watson 	 * tear-down mean we need it as a work-around for races between
27985d94372SRobert Watson 	 * timers and tcp_discardcb().
28085d94372SRobert Watson 	 *
28185d94372SRobert Watson 	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
28285d94372SRobert Watson 	 */
28385d94372SRobert Watson 	if (inp == NULL) {
28485d94372SRobert Watson 		tcp_timer_race++;
2858b615593SMarko Zec 		CURVNET_RESTORE();
28685d94372SRobert Watson 		return;
28785d94372SRobert Watson 	}
2888501a69cSRobert Watson 	INP_WLOCK(inp);
289655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_delack) ||
290655f934bSMikolaj Golub 	    !callout_active(&tp->t_timers->tt_delack)) {
2918501a69cSRobert Watson 		INP_WUNLOCK(inp);
2928b615593SMarko Zec 		CURVNET_RESTORE();
29385d94372SRobert Watson 		return;
29485d94372SRobert Watson 	}
295e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_delack);
296655f934bSMikolaj Golub 	if ((inp->inp_flags & INP_DROPPED) != 0) {
297655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
298655f934bSMikolaj Golub 		CURVNET_RESTORE();
299655f934bSMikolaj Golub 		return;
300655f934bSMikolaj Golub 	}
301df8bae1dSRodney W. Grimes 
3029b8b58e0SJonathan Lemon 	tp->t_flags |= TF_ACKNOW;
30378b50714SRobert Watson 	TCPSTAT_INC(tcps_delack);
3049b8b58e0SJonathan Lemon 	(void) tcp_output(tp);
3058501a69cSRobert Watson 	INP_WUNLOCK(inp);
3068b615593SMarko Zec 	CURVNET_RESTORE();
3079b8b58e0SJonathan Lemon }
3089b8b58e0SJonathan Lemon 
30985d94372SRobert Watson void
31085d94372SRobert Watson tcp_timer_2msl(void *xtp)
3119b8b58e0SJonathan Lemon {
31285d94372SRobert Watson 	struct tcpcb *tp = xtp;
31385d94372SRobert Watson 	struct inpcb *inp;
3148b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
3159b8b58e0SJonathan Lemon #ifdef TCPDEBUG
3169b8b58e0SJonathan Lemon 	int ostate;
3179b8b58e0SJonathan Lemon 
3189b8b58e0SJonathan Lemon 	ostate = tp->t_state;
3199b8b58e0SJonathan Lemon #endif
320623dce13SRobert Watson 	/*
32185d94372SRobert Watson 	 * XXXRW: Does this actually happen?
32285d94372SRobert Watson 	 */
323603724d3SBjoern A. Zeeb 	INP_INFO_WLOCK(&V_tcbinfo);
32485d94372SRobert Watson 	inp = tp->t_inpcb;
32585d94372SRobert Watson 	/*
32685d94372SRobert Watson 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
32785d94372SRobert Watson 	 * tear-down mean we need it as a work-around for races between
32885d94372SRobert Watson 	 * timers and tcp_discardcb().
32985d94372SRobert Watson 	 *
33085d94372SRobert Watson 	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
33185d94372SRobert Watson 	 */
33285d94372SRobert Watson 	if (inp == NULL) {
33385d94372SRobert Watson 		tcp_timer_race++;
334603724d3SBjoern A. Zeeb 		INP_INFO_WUNLOCK(&V_tcbinfo);
3358b615593SMarko Zec 		CURVNET_RESTORE();
33685d94372SRobert Watson 		return;
33785d94372SRobert Watson 	}
3388501a69cSRobert Watson 	INP_WLOCK(inp);
33985d94372SRobert Watson 	tcp_free_sackholes(tp);
340655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_2msl) ||
341e2f2059fSMike Silbersack 	    !callout_active(&tp->t_timers->tt_2msl)) {
3428501a69cSRobert Watson 		INP_WUNLOCK(tp->t_inpcb);
343603724d3SBjoern A. Zeeb 		INP_INFO_WUNLOCK(&V_tcbinfo);
3448b615593SMarko Zec 		CURVNET_RESTORE();
34585d94372SRobert Watson 		return;
34685d94372SRobert Watson 	}
347e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_2msl);
348655f934bSMikolaj Golub 	if ((inp->inp_flags & INP_DROPPED) != 0) {
349655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
350655f934bSMikolaj Golub 		INP_INFO_WUNLOCK(&V_tcbinfo);
351655f934bSMikolaj Golub 		CURVNET_RESTORE();
352655f934bSMikolaj Golub 		return;
353655f934bSMikolaj Golub 	}
35485d94372SRobert Watson 	/*
355df8bae1dSRodney W. Grimes 	 * 2 MSL timeout in shutdown went off.  If we're closed but
356df8bae1dSRodney W. Grimes 	 * still waiting for peer to close and connection has been idle
357df8bae1dSRodney W. Grimes 	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
358df8bae1dSRodney W. Grimes 	 * control block.  Otherwise, check again in a bit.
3597c72af87SMohan Srinivasan 	 *
3607c72af87SMohan Srinivasan 	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
3617c72af87SMohan Srinivasan 	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
3627c72af87SMohan Srinivasan 	 * Ignore fact that there were recent incoming segments.
363df8bae1dSRodney W. Grimes 	 */
3647c72af87SMohan Srinivasan 	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
36585d94372SRobert Watson 	    tp->t_inpcb && tp->t_inpcb->inp_socket &&
3667c72af87SMohan Srinivasan 	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
36778b50714SRobert Watson 		TCPSTAT_INC(tcps_finwait2_drops);
36885d94372SRobert Watson 		tp = tcp_close(tp);
3697c72af87SMohan Srinivasan 	} else {
370df8bae1dSRodney W. Grimes 		if (tp->t_state != TCPS_TIME_WAIT &&
3719077f387SGleb Smirnoff 		   ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
3729077f387SGleb Smirnoff 		       callout_reset_on(&tp->t_timers->tt_2msl,
373883831c6SAdrian Chadd 			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp,
374883831c6SAdrian Chadd 			   inp_to_cpuid(inp));
375df8bae1dSRodney W. Grimes 	       else
37685d94372SRobert Watson 		       tp = tcp_close(tp);
3777c72af87SMohan Srinivasan        }
378df8bae1dSRodney W. Grimes 
3799b8b58e0SJonathan Lemon #ifdef TCPDEBUG
380586b4a0eSKonstantin Belousov 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
381fb59c426SYoshinobu Inoue 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
3829b8b58e0SJonathan Lemon 			  PRU_SLOWTIMO);
3839b8b58e0SJonathan Lemon #endif
38485d94372SRobert Watson 	if (tp != NULL)
3858501a69cSRobert Watson 		INP_WUNLOCK(inp);
386603724d3SBjoern A. Zeeb 	INP_INFO_WUNLOCK(&V_tcbinfo);
3878b615593SMarko Zec 	CURVNET_RESTORE();
3889b8b58e0SJonathan Lemon }
3899b8b58e0SJonathan Lemon 
39085d94372SRobert Watson void
39185d94372SRobert Watson tcp_timer_keep(void *xtp)
3929b8b58e0SJonathan Lemon {
39385d94372SRobert Watson 	struct tcpcb *tp = xtp;
39408517d53SMike Silbersack 	struct tcptemp *t_template;
39585d94372SRobert Watson 	struct inpcb *inp;
3968b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
3979b8b58e0SJonathan Lemon #ifdef TCPDEBUG
3989b8b58e0SJonathan Lemon 	int ostate;
3999b8b58e0SJonathan Lemon 
4009b8b58e0SJonathan Lemon 	ostate = tp->t_state;
4019b8b58e0SJonathan Lemon #endif
402603724d3SBjoern A. Zeeb 	INP_INFO_WLOCK(&V_tcbinfo);
40385d94372SRobert Watson 	inp = tp->t_inpcb;
40485d94372SRobert Watson 	/*
40585d94372SRobert Watson 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
40685d94372SRobert Watson 	 * tear-down mean we need it as a work-around for races between
40785d94372SRobert Watson 	 * timers and tcp_discardcb().
40885d94372SRobert Watson 	 *
40985d94372SRobert Watson 	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
41085d94372SRobert Watson 	 */
41185d94372SRobert Watson 	if (inp == NULL) {
41285d94372SRobert Watson 		tcp_timer_race++;
413603724d3SBjoern A. Zeeb 		INP_INFO_WUNLOCK(&V_tcbinfo);
4148b615593SMarko Zec 		CURVNET_RESTORE();
41585d94372SRobert Watson 		return;
41685d94372SRobert Watson 	}
4178501a69cSRobert Watson 	INP_WLOCK(inp);
418655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_keep) ||
419655f934bSMikolaj Golub 	    !callout_active(&tp->t_timers->tt_keep)) {
4208501a69cSRobert Watson 		INP_WUNLOCK(inp);
421603724d3SBjoern A. Zeeb 		INP_INFO_WUNLOCK(&V_tcbinfo);
4228b615593SMarko Zec 		CURVNET_RESTORE();
42385d94372SRobert Watson 		return;
42485d94372SRobert Watson 	}
425e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_keep);
426655f934bSMikolaj Golub 	if ((inp->inp_flags & INP_DROPPED) != 0) {
427655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
428655f934bSMikolaj Golub 		INP_INFO_WUNLOCK(&V_tcbinfo);
429655f934bSMikolaj Golub 		CURVNET_RESTORE();
430655f934bSMikolaj Golub 		return;
431655f934bSMikolaj Golub 	}
4329b8b58e0SJonathan Lemon 	/*
4339b8b58e0SJonathan Lemon 	 * Keep-alive timer went off; send something
4349b8b58e0SJonathan Lemon 	 * or drop connection if idle for too long.
4359b8b58e0SJonathan Lemon 	 */
43678b50714SRobert Watson 	TCPSTAT_INC(tcps_keeptimeo);
4379b8b58e0SJonathan Lemon 	if (tp->t_state < TCPS_ESTABLISHED)
4389b8b58e0SJonathan Lemon 		goto dropit;
4392a074620SSam Leffler 	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
4409b8b58e0SJonathan Lemon 	    tp->t_state <= TCPS_CLOSING) {
4419077f387SGleb Smirnoff 		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
4429b8b58e0SJonathan Lemon 			goto dropit;
4439b8b58e0SJonathan Lemon 		/*
4449b8b58e0SJonathan Lemon 		 * Send a packet designed to force a response
4459b8b58e0SJonathan Lemon 		 * if the peer is up and reachable:
4469b8b58e0SJonathan Lemon 		 * either an ACK if the connection is still alive,
4479b8b58e0SJonathan Lemon 		 * or an RST if the peer has closed the connection
4489b8b58e0SJonathan Lemon 		 * due to timeout or reboot.
4499b8b58e0SJonathan Lemon 		 * Using sequence number tp->snd_una-1
4509b8b58e0SJonathan Lemon 		 * causes the transmitted zero-length segment
4519b8b58e0SJonathan Lemon 		 * to lie outside the receive window;
4529b8b58e0SJonathan Lemon 		 * by the protocol spec, this requires the
4539b8b58e0SJonathan Lemon 		 * correspondent TCP to respond.
4549b8b58e0SJonathan Lemon 		 */
45578b50714SRobert Watson 		TCPSTAT_INC(tcps_keepprobe);
45679909384SJonathan Lemon 		t_template = tcpip_maketemplate(inp);
45708517d53SMike Silbersack 		if (t_template) {
45808517d53SMike Silbersack 			tcp_respond(tp, t_template->tt_ipgen,
45908517d53SMike Silbersack 				    &t_template->tt_t, (struct mbuf *)NULL,
4609b8b58e0SJonathan Lemon 				    tp->rcv_nxt, tp->snd_una - 1, 0);
46153640b0eSRobert Watson 			free(t_template, M_TEMP);
46208517d53SMike Silbersack 		}
4639077f387SGleb Smirnoff 		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
464883831c6SAdrian Chadd 		    tcp_timer_keep, tp, inp_to_cpuid(inp));
4654cc20ab1SSeigo Tanimura 	} else
4669077f387SGleb Smirnoff 		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
467883831c6SAdrian Chadd 		    tcp_timer_keep, tp, inp_to_cpuid(inp));
4689b8b58e0SJonathan Lemon 
4699b8b58e0SJonathan Lemon #ifdef TCPDEBUG
4702a074620SSam Leffler 	if (inp->inp_socket->so_options & SO_DEBUG)
471fb59c426SYoshinobu Inoue 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
4729b8b58e0SJonathan Lemon 			  PRU_SLOWTIMO);
4739b8b58e0SJonathan Lemon #endif
4748501a69cSRobert Watson 	INP_WUNLOCK(inp);
475603724d3SBjoern A. Zeeb 	INP_INFO_WUNLOCK(&V_tcbinfo);
4768b615593SMarko Zec 	CURVNET_RESTORE();
47785d94372SRobert Watson 	return;
4789b8b58e0SJonathan Lemon 
4799b8b58e0SJonathan Lemon dropit:
48078b50714SRobert Watson 	TCPSTAT_INC(tcps_keepdrops);
48185d94372SRobert Watson 	tp = tcp_drop(tp, ETIMEDOUT);
48285d94372SRobert Watson 
48385d94372SRobert Watson #ifdef TCPDEBUG
48485d94372SRobert Watson 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
48585d94372SRobert Watson 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
48685d94372SRobert Watson 			  PRU_SLOWTIMO);
48785d94372SRobert Watson #endif
48885d94372SRobert Watson 	if (tp != NULL)
4898501a69cSRobert Watson 		INP_WUNLOCK(tp->t_inpcb);
490603724d3SBjoern A. Zeeb 	INP_INFO_WUNLOCK(&V_tcbinfo);
4918b615593SMarko Zec 	CURVNET_RESTORE();
4929b8b58e0SJonathan Lemon }
4939b8b58e0SJonathan Lemon 
49485d94372SRobert Watson void
49585d94372SRobert Watson tcp_timer_persist(void *xtp)
4969b8b58e0SJonathan Lemon {
49785d94372SRobert Watson 	struct tcpcb *tp = xtp;
49885d94372SRobert Watson 	struct inpcb *inp;
4998b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
5009b8b58e0SJonathan Lemon #ifdef TCPDEBUG
5019b8b58e0SJonathan Lemon 	int ostate;
5029b8b58e0SJonathan Lemon 
5039b8b58e0SJonathan Lemon 	ostate = tp->t_state;
5049b8b58e0SJonathan Lemon #endif
505603724d3SBjoern A. Zeeb 	INP_INFO_WLOCK(&V_tcbinfo);
50685d94372SRobert Watson 	inp = tp->t_inpcb;
50785d94372SRobert Watson 	/*
50885d94372SRobert Watson 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
50985d94372SRobert Watson 	 * tear-down mean we need it as a work-around for races between
51085d94372SRobert Watson 	 * timers and tcp_discardcb().
51185d94372SRobert Watson 	 *
51285d94372SRobert Watson 	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
51385d94372SRobert Watson 	 */
51485d94372SRobert Watson 	if (inp == NULL) {
51585d94372SRobert Watson 		tcp_timer_race++;
516603724d3SBjoern A. Zeeb 		INP_INFO_WUNLOCK(&V_tcbinfo);
5178b615593SMarko Zec 		CURVNET_RESTORE();
51885d94372SRobert Watson 		return;
51985d94372SRobert Watson 	}
5208501a69cSRobert Watson 	INP_WLOCK(inp);
521655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_persist) ||
522655f934bSMikolaj Golub 	    !callout_active(&tp->t_timers->tt_persist)) {
5238501a69cSRobert Watson 		INP_WUNLOCK(inp);
524603724d3SBjoern A. Zeeb 		INP_INFO_WUNLOCK(&V_tcbinfo);
5258b615593SMarko Zec 		CURVNET_RESTORE();
52685d94372SRobert Watson 		return;
52785d94372SRobert Watson 	}
528e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_persist);
529655f934bSMikolaj Golub 	if ((inp->inp_flags & INP_DROPPED) != 0) {
530655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
531655f934bSMikolaj Golub 		INP_INFO_WUNLOCK(&V_tcbinfo);
532655f934bSMikolaj Golub 		CURVNET_RESTORE();
533655f934bSMikolaj Golub 		return;
534655f934bSMikolaj Golub 	}
5359b8b58e0SJonathan Lemon 	/*
5369b8b58e0SJonathan Lemon 	 * Persistance timer into zero window.
5379b8b58e0SJonathan Lemon 	 * Force a byte to be output, if possible.
5389b8b58e0SJonathan Lemon 	 */
53978b50714SRobert Watson 	TCPSTAT_INC(tcps_persisttimeo);
5409b8b58e0SJonathan Lemon 	/*
5419b8b58e0SJonathan Lemon 	 * Hack: if the peer is dead/unreachable, we do not
5429b8b58e0SJonathan Lemon 	 * time out if the window is closed.  After a full
5439b8b58e0SJonathan Lemon 	 * backoff, drop the connection if the idle time
5449b8b58e0SJonathan Lemon 	 * (no responses to probes) reaches the maximum
5459b8b58e0SJonathan Lemon 	 * backoff that we would use if retransmitting.
5469b8b58e0SJonathan Lemon 	 */
5479b8b58e0SJonathan Lemon 	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
5486b0c5521SJohn Baldwin 	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
5496b0c5521SJohn Baldwin 	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
55078b50714SRobert Watson 		TCPSTAT_INC(tcps_persistdrop);
55185d94372SRobert Watson 		tp = tcp_drop(tp, ETIMEDOUT);
55285d94372SRobert Watson 		goto out;
5539b8b58e0SJonathan Lemon 	}
554322181c9SAndre Oppermann 	/*
555322181c9SAndre Oppermann 	 * If the user has closed the socket then drop a persisting
556322181c9SAndre Oppermann 	 * connection after a much reduced timeout.
557322181c9SAndre Oppermann 	 */
558322181c9SAndre Oppermann 	if (tp->t_state > TCPS_CLOSE_WAIT &&
559322181c9SAndre Oppermann 	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
560322181c9SAndre Oppermann 		TCPSTAT_INC(tcps_persistdrop);
561322181c9SAndre Oppermann 		tp = tcp_drop(tp, ETIMEDOUT);
562322181c9SAndre Oppermann 		goto out;
563322181c9SAndre Oppermann 	}
5649b8b58e0SJonathan Lemon 	tcp_setpersist(tp);
5652cdbfa66SPaul Saab 	tp->t_flags |= TF_FORCEDATA;
5669b8b58e0SJonathan Lemon 	(void) tcp_output(tp);
5672cdbfa66SPaul Saab 	tp->t_flags &= ~TF_FORCEDATA;
5689b8b58e0SJonathan Lemon 
56985d94372SRobert Watson out:
5709b8b58e0SJonathan Lemon #ifdef TCPDEBUG
571ffb761f6SGleb Smirnoff 	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
572ffb761f6SGleb Smirnoff 		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
5739b8b58e0SJonathan Lemon #endif
57485d94372SRobert Watson 	if (tp != NULL)
5758501a69cSRobert Watson 		INP_WUNLOCK(inp);
576603724d3SBjoern A. Zeeb 	INP_INFO_WUNLOCK(&V_tcbinfo);
5778b615593SMarko Zec 	CURVNET_RESTORE();
5789b8b58e0SJonathan Lemon }
5799b8b58e0SJonathan Lemon 
58085d94372SRobert Watson void
58185d94372SRobert Watson tcp_timer_rexmt(void * xtp)
5829b8b58e0SJonathan Lemon {
58385d94372SRobert Watson 	struct tcpcb *tp = xtp;
5848b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
5859b8b58e0SJonathan Lemon 	int rexmt;
58685d94372SRobert Watson 	int headlocked;
58785d94372SRobert Watson 	struct inpcb *inp;
5889b8b58e0SJonathan Lemon #ifdef TCPDEBUG
5899b8b58e0SJonathan Lemon 	int ostate;
5909b8b58e0SJonathan Lemon 
5919b8b58e0SJonathan Lemon 	ostate = tp->t_state;
5929b8b58e0SJonathan Lemon #endif
593f6f6703fSSean Bruno 
59487aedea4SKip Macy 	INP_INFO_RLOCK(&V_tcbinfo);
59585d94372SRobert Watson 	inp = tp->t_inpcb;
59685d94372SRobert Watson 	/*
59785d94372SRobert Watson 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
59885d94372SRobert Watson 	 * tear-down mean we need it as a work-around for races between
59985d94372SRobert Watson 	 * timers and tcp_discardcb().
60085d94372SRobert Watson 	 *
60185d94372SRobert Watson 	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
60285d94372SRobert Watson 	 */
60385d94372SRobert Watson 	if (inp == NULL) {
60485d94372SRobert Watson 		tcp_timer_race++;
60587aedea4SKip Macy 		INP_INFO_RUNLOCK(&V_tcbinfo);
6068b615593SMarko Zec 		CURVNET_RESTORE();
60785d94372SRobert Watson 		return;
60885d94372SRobert Watson 	}
6098501a69cSRobert Watson 	INP_WLOCK(inp);
610655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_rexmt) ||
611655f934bSMikolaj Golub 	    !callout_active(&tp->t_timers->tt_rexmt)) {
6128501a69cSRobert Watson 		INP_WUNLOCK(inp);
61387aedea4SKip Macy 		INP_INFO_RUNLOCK(&V_tcbinfo);
6148b615593SMarko Zec 		CURVNET_RESTORE();
61585d94372SRobert Watson 		return;
61685d94372SRobert Watson 	}
617e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_rexmt);
618655f934bSMikolaj Golub 	if ((inp->inp_flags & INP_DROPPED) != 0) {
619655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
620655f934bSMikolaj Golub 		INP_INFO_RUNLOCK(&V_tcbinfo);
621655f934bSMikolaj Golub 		CURVNET_RESTORE();
622655f934bSMikolaj Golub 		return;
623655f934bSMikolaj Golub 	}
6246d90faf3SPaul Saab 	tcp_free_sackholes(tp);
625df8bae1dSRodney W. Grimes 	/*
626df8bae1dSRodney W. Grimes 	 * Retransmission timer went off.  Message has not
627df8bae1dSRodney W. Grimes 	 * been acked within retransmit interval.  Back off
628df8bae1dSRodney W. Grimes 	 * to a longer retransmit interval and retransmit one segment.
629df8bae1dSRodney W. Grimes 	 */
630df8bae1dSRodney W. Grimes 	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
631df8bae1dSRodney W. Grimes 		tp->t_rxtshift = TCP_MAXRXTSHIFT;
63278b50714SRobert Watson 		TCPSTAT_INC(tcps_timeoutdrop);
63387aedea4SKip Macy 		in_pcbref(inp);
63487aedea4SKip Macy 		INP_INFO_RUNLOCK(&V_tcbinfo);
63587aedea4SKip Macy 		INP_WUNLOCK(inp);
63687aedea4SKip Macy 		INP_INFO_WLOCK(&V_tcbinfo);
63787aedea4SKip Macy 		INP_WLOCK(inp);
638fa046d87SRobert Watson 		if (in_pcbrele_wlocked(inp)) {
63987aedea4SKip Macy 			INP_INFO_WUNLOCK(&V_tcbinfo);
64087aedea4SKip Macy 			CURVNET_RESTORE();
64187aedea4SKip Macy 			return;
64287aedea4SKip Macy 		}
643aa4b09c5SNavdeep Parhar 		if (inp->inp_flags & INP_DROPPED) {
644aa4b09c5SNavdeep Parhar 			INP_WUNLOCK(inp);
645aa4b09c5SNavdeep Parhar 			INP_INFO_WUNLOCK(&V_tcbinfo);
646aa4b09c5SNavdeep Parhar 			CURVNET_RESTORE();
647aa4b09c5SNavdeep Parhar 			return;
648aa4b09c5SNavdeep Parhar 		}
649aa4b09c5SNavdeep Parhar 
65085d94372SRobert Watson 		tp = tcp_drop(tp, tp->t_softerror ?
65185d94372SRobert Watson 			      tp->t_softerror : ETIMEDOUT);
65287aedea4SKip Macy 		headlocked = 1;
65385d94372SRobert Watson 		goto out;
6549b8b58e0SJonathan Lemon 	}
65587aedea4SKip Macy 	INP_INFO_RUNLOCK(&V_tcbinfo);
65685d94372SRobert Watson 	headlocked = 0;
657cf8f04f4SAndre Oppermann 	if (tp->t_state == TCPS_SYN_SENT) {
658cf8f04f4SAndre Oppermann 		/*
659cf8f04f4SAndre Oppermann 		 * If the SYN was retransmitted, indicate CWND to be
660cf8f04f4SAndre Oppermann 		 * limited to 1 segment in cc_conn_init().
661cf8f04f4SAndre Oppermann 		 */
662cf8f04f4SAndre Oppermann 		tp->snd_cwnd = 1;
663cf8f04f4SAndre Oppermann 	} else if (tp->t_rxtshift == 1) {
6649b8b58e0SJonathan Lemon 		/*
6659b8b58e0SJonathan Lemon 		 * first retransmit; record ssthresh and cwnd so they can
6669b8b58e0SJonathan Lemon 		 * be recovered if this turns out to be a "bad" retransmit.
6679b8b58e0SJonathan Lemon 		 * A retransmit is considered "bad" if an ACK for this
6689b8b58e0SJonathan Lemon 		 * segment is received within RTT/2 interval; the assumption
6699b8b58e0SJonathan Lemon 		 * here is that the ACK was already in flight.  See
6709b8b58e0SJonathan Lemon 		 * "On Estimating End-to-End Network Path Properties" by
6719b8b58e0SJonathan Lemon 		 * Allman and Paxson for more details.
6729b8b58e0SJonathan Lemon 		 */
6739b8b58e0SJonathan Lemon 		tp->snd_cwnd_prev = tp->snd_cwnd;
6749b8b58e0SJonathan Lemon 		tp->snd_ssthresh_prev = tp->snd_ssthresh;
6759d11646dSJeffrey Hsu 		tp->snd_recover_prev = tp->snd_recover;
676dbc42409SLawrence Stewart 		if (IN_FASTRECOVERY(tp->t_flags))
6779d11646dSJeffrey Hsu 			tp->t_flags |= TF_WASFRECOVERY;
6789d11646dSJeffrey Hsu 		else
6799d11646dSJeffrey Hsu 			tp->t_flags &= ~TF_WASFRECOVERY;
680dbc42409SLawrence Stewart 		if (IN_CONGRECOVERY(tp->t_flags))
681dbc42409SLawrence Stewart 			tp->t_flags |= TF_WASCRECOVERY;
682dbc42409SLawrence Stewart 		else
683dbc42409SLawrence Stewart 			tp->t_flags &= ~TF_WASCRECOVERY;
6849b8b58e0SJonathan Lemon 		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
685672dc4aeSJohn Baldwin 		tp->t_flags |= TF_PREVVALID;
686672dc4aeSJohn Baldwin 	} else
687672dc4aeSJohn Baldwin 		tp->t_flags &= ~TF_PREVVALID;
68878b50714SRobert Watson 	TCPSTAT_INC(tcps_rexmttimeo);
6897d42e30cSJonathan Lemon 	if (tp->t_state == TCPS_SYN_SENT)
690f4748ef5SAndre Oppermann 		rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
6917d42e30cSJonathan Lemon 	else
692df8bae1dSRodney W. Grimes 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
693df8bae1dSRodney W. Grimes 	TCPT_RANGESET(tp->t_rxtcur, rexmt,
694df8bae1dSRodney W. Grimes 		      tp->t_rttmin, TCPTV_REXMTMAX);
695f6f6703fSSean Bruno 
696*882ac53eSSean Bruno 	/*
697*882ac53eSSean Bruno 	 * We enter the path for PLMTUD if connection is established or, if
698*882ac53eSSean Bruno 	 * connection is FIN_WAIT_1 status, reason for the last is that if
699*882ac53eSSean Bruno 	 * amount of data we send is very small, we could send it in couple of
700*882ac53eSSean Bruno 	 * packets and process straight to FIN. In that case we won't catch
701*882ac53eSSean Bruno 	 * ESTABLISHED state.
702*882ac53eSSean Bruno 	 */
703*882ac53eSSean Bruno 	if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
704*882ac53eSSean Bruno 	    || (tp->t_state == TCPS_FIN_WAIT_1))) {
705f6f6703fSSean Bruno 		int optlen;
706f6f6703fSSean Bruno #ifdef INET6
707f6f6703fSSean Bruno 		int isipv6;
708f6f6703fSSean Bruno #endif
709f6f6703fSSean Bruno 
710f6f6703fSSean Bruno 		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
711f6f6703fSSean Bruno 		    (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
712f6f6703fSSean Bruno 		    (tp->t_rxtshift <= 2)) {
713f6f6703fSSean Bruno 			/*
714f6f6703fSSean Bruno 			 * Enter Path MTU Black-hole Detection mechanism:
715f6f6703fSSean Bruno 			 * - Disable Path MTU Discovery (IP "DF" bit).
716f6f6703fSSean Bruno 			 * - Reduce MTU to lower value than what we
717f6f6703fSSean Bruno 			 *   negotiated with peer.
718f6f6703fSSean Bruno 			 */
719f6f6703fSSean Bruno 			/* Record that we may have found a black hole. */
720f6f6703fSSean Bruno 			tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
721f6f6703fSSean Bruno 
722f6f6703fSSean Bruno 			/* Keep track of previous MSS. */
723f6f6703fSSean Bruno 			optlen = tp->t_maxopd - tp->t_maxseg;
724f6f6703fSSean Bruno 			tp->t_pmtud_saved_maxopd = tp->t_maxopd;
725f6f6703fSSean Bruno 
726f6f6703fSSean Bruno 			/*
727f6f6703fSSean Bruno 			 * Reduce the MSS to blackhole value or to the default
728f6f6703fSSean Bruno 			 * in an attempt to retransmit.
729f6f6703fSSean Bruno 			 */
730f6f6703fSSean Bruno #ifdef INET6
731f6f6703fSSean Bruno 			isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
732f6f6703fSSean Bruno 			if (isipv6 &&
733f6f6703fSSean Bruno 			    tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
734f6f6703fSSean Bruno 				/* Use the sysctl tuneable blackhole MSS. */
735f6f6703fSSean Bruno 				tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
736f6f6703fSSean Bruno 				V_tcp_pmtud_blackhole_activated++;
737f6f6703fSSean Bruno 			} else if (isipv6) {
738f6f6703fSSean Bruno 				/* Use the default MSS. */
739f6f6703fSSean Bruno 				tp->t_maxopd = V_tcp_v6mssdflt;
740f6f6703fSSean Bruno 				/*
741f6f6703fSSean Bruno 				 * Disable Path MTU Discovery when we switch to
742f6f6703fSSean Bruno 				 * minmss.
743f6f6703fSSean Bruno 				 */
744f6f6703fSSean Bruno 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
745f6f6703fSSean Bruno 				V_tcp_pmtud_blackhole_activated_min_mss++;
746f6f6703fSSean Bruno 			}
747f6f6703fSSean Bruno #endif
748f6f6703fSSean Bruno #if defined(INET6) && defined(INET)
749f6f6703fSSean Bruno 			else
750f6f6703fSSean Bruno #endif
751f6f6703fSSean Bruno #ifdef INET
752f6f6703fSSean Bruno 			if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
753f6f6703fSSean Bruno 				/* Use the sysctl tuneable blackhole MSS. */
754f6f6703fSSean Bruno 				tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
755f6f6703fSSean Bruno 				V_tcp_pmtud_blackhole_activated++;
756f6f6703fSSean Bruno 			} else {
757f6f6703fSSean Bruno 				/* Use the default MSS. */
758f6f6703fSSean Bruno 				tp->t_maxopd = V_tcp_mssdflt;
759f6f6703fSSean Bruno 				/*
760f6f6703fSSean Bruno 				 * Disable Path MTU Discovery when we switch to
761f6f6703fSSean Bruno 				 * minmss.
762f6f6703fSSean Bruno 				 */
763f6f6703fSSean Bruno 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
764f6f6703fSSean Bruno 				V_tcp_pmtud_blackhole_activated_min_mss++;
765f6f6703fSSean Bruno 			}
766f6f6703fSSean Bruno #endif
767f6f6703fSSean Bruno 			tp->t_maxseg = tp->t_maxopd - optlen;
768f6f6703fSSean Bruno 			/*
769f6f6703fSSean Bruno 			 * Reset the slow-start flight size
770f6f6703fSSean Bruno 			 * as it may depend on the new MSS.
771f6f6703fSSean Bruno 			 */
772f6f6703fSSean Bruno 			if (CC_ALGO(tp)->conn_init != NULL)
773f6f6703fSSean Bruno 				CC_ALGO(tp)->conn_init(tp->ccv);
774f6f6703fSSean Bruno 		} else {
775f6f6703fSSean Bruno 			/*
776f6f6703fSSean Bruno 			 * If further retransmissions are still unsuccessful
777f6f6703fSSean Bruno 			 * with a lowered MTU, maybe this isn't a blackhole and
778f6f6703fSSean Bruno 			 * we restore the previous MSS and blackhole detection
779f6f6703fSSean Bruno 			 * flags.
780f6f6703fSSean Bruno 			 */
781f6f6703fSSean Bruno 			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
782f6f6703fSSean Bruno 			    (tp->t_rxtshift > 4)) {
783f6f6703fSSean Bruno 				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
784f6f6703fSSean Bruno 				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
785f6f6703fSSean Bruno 				optlen = tp->t_maxopd - tp->t_maxseg;
786f6f6703fSSean Bruno 				tp->t_maxopd = tp->t_pmtud_saved_maxopd;
787f6f6703fSSean Bruno 				tp->t_maxseg = tp->t_maxopd - optlen;
788f6f6703fSSean Bruno 				V_tcp_pmtud_blackhole_failed++;
789f6f6703fSSean Bruno 				/*
790f6f6703fSSean Bruno 				 * Reset the slow-start flight size as it
791f6f6703fSSean Bruno 				 * may depend on the new MSS.
792f6f6703fSSean Bruno 				 */
793f6f6703fSSean Bruno 				if (CC_ALGO(tp)->conn_init != NULL)
794f6f6703fSSean Bruno 					CC_ALGO(tp)->conn_init(tp->ccv);
795f6f6703fSSean Bruno 			}
796f6f6703fSSean Bruno 		}
797f6f6703fSSean Bruno 	}
798f6f6703fSSean Bruno 
799df8bae1dSRodney W. Grimes 	/*
80077339e1cSAndre Oppermann 	 * Disable RFC1323 and SACK if we haven't got any response to
8017ceb7783SJesper Skriver 	 * our third SYN to work-around some broken terminal servers
8027ceb7783SJesper Skriver 	 * (most of which have hopefully been retired) that have bad VJ
8037ceb7783SJesper Skriver 	 * header compression code which trashes TCP segments containing
8047ceb7783SJesper Skriver 	 * unknown-to-them TCP options.
8057ceb7783SJesper Skriver 	 */
8066c0ef895SJohn Baldwin 	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
8076c0ef895SJohn Baldwin 	    (tp->t_rxtshift == 3))
808c4ab59c1SAndre Oppermann 		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
8097ceb7783SJesper Skriver 	/*
81097d8d152SAndre Oppermann 	 * If we backed off this far, our srtt estimate is probably bogus.
81197d8d152SAndre Oppermann 	 * Clobber it so we'll take the next rtt measurement as our srtt;
812df8bae1dSRodney W. Grimes 	 * move the current srtt into rttvar to keep the current
813df8bae1dSRodney W. Grimes 	 * retransmit times until then.
814df8bae1dSRodney W. Grimes 	 */
815df8bae1dSRodney W. Grimes 	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
816fb59c426SYoshinobu Inoue #ifdef INET6
817fb59c426SYoshinobu Inoue 		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
818fb59c426SYoshinobu Inoue 			in6_losing(tp->t_inpcb);
819fb59c426SYoshinobu Inoue #endif
820df8bae1dSRodney W. Grimes 		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
821df8bae1dSRodney W. Grimes 		tp->t_srtt = 0;
822df8bae1dSRodney W. Grimes 	}
823df8bae1dSRodney W. Grimes 	tp->snd_nxt = tp->snd_una;
8249d11646dSJeffrey Hsu 	tp->snd_recover = tp->snd_max;
82546f58482SJonathan Lemon 	/*
82674b48c1dSAndras Olah 	 * Force a segment to be sent.
82774b48c1dSAndras Olah 	 */
82874b48c1dSAndras Olah 	tp->t_flags |= TF_ACKNOW;
82974b48c1dSAndras Olah 	/*
830df8bae1dSRodney W. Grimes 	 * If timing a segment in this window, stop the timer.
831df8bae1dSRodney W. Grimes 	 */
8329b8b58e0SJonathan Lemon 	tp->t_rtttime = 0;
833dbc42409SLawrence Stewart 
834b5af1b88SLawrence Stewart 	cc_cong_signal(tp, NULL, CC_RTO);
835dbc42409SLawrence Stewart 
836df8bae1dSRodney W. Grimes 	(void) tcp_output(tp);
837df8bae1dSRodney W. Grimes 
83885d94372SRobert Watson out:
8399b8b58e0SJonathan Lemon #ifdef TCPDEBUG
8401c53f806SRobert Watson 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
841fb59c426SYoshinobu Inoue 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
8429b8b58e0SJonathan Lemon 			  PRU_SLOWTIMO);
843df8bae1dSRodney W. Grimes #endif
84485d94372SRobert Watson 	if (tp != NULL)
8458501a69cSRobert Watson 		INP_WUNLOCK(inp);
84685d94372SRobert Watson 	if (headlocked)
847603724d3SBjoern A. Zeeb 		INP_INFO_WUNLOCK(&V_tcbinfo);
8488b615593SMarko Zec 	CURVNET_RESTORE();
84985d94372SRobert Watson }
85085d94372SRobert Watson 
85185d94372SRobert Watson void
85285d94372SRobert Watson tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
85385d94372SRobert Watson {
85485d94372SRobert Watson 	struct callout *t_callout;
85585d94372SRobert Watson 	void *f_callout;
85687aedea4SKip Macy 	struct inpcb *inp = tp->t_inpcb;
857883831c6SAdrian Chadd 	int cpu = inp_to_cpuid(inp);
85885d94372SRobert Watson 
85909fe6320SNavdeep Parhar #ifdef TCP_OFFLOAD
86009fe6320SNavdeep Parhar 	if (tp->t_flags & TF_TOE)
86109fe6320SNavdeep Parhar 		return;
86209fe6320SNavdeep Parhar #endif
86309fe6320SNavdeep Parhar 
86485d94372SRobert Watson 	switch (timer_type) {
86585d94372SRobert Watson 		case TT_DELACK:
866e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_delack;
86785d94372SRobert Watson 			f_callout = tcp_timer_delack;
86885d94372SRobert Watson 			break;
86985d94372SRobert Watson 		case TT_REXMT:
870e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_rexmt;
87185d94372SRobert Watson 			f_callout = tcp_timer_rexmt;
87285d94372SRobert Watson 			break;
87385d94372SRobert Watson 		case TT_PERSIST:
874e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_persist;
87585d94372SRobert Watson 			f_callout = tcp_timer_persist;
87685d94372SRobert Watson 			break;
87785d94372SRobert Watson 		case TT_KEEP:
878e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_keep;
87985d94372SRobert Watson 			f_callout = tcp_timer_keep;
88085d94372SRobert Watson 			break;
88185d94372SRobert Watson 		case TT_2MSL:
882e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_2msl;
88385d94372SRobert Watson 			f_callout = tcp_timer_2msl;
88485d94372SRobert Watson 			break;
88585d94372SRobert Watson 		default:
88685d94372SRobert Watson 			panic("bad timer_type");
88785d94372SRobert Watson 		}
88885d94372SRobert Watson 	if (delta == 0) {
88985d94372SRobert Watson 		callout_stop(t_callout);
89085d94372SRobert Watson 	} else {
89187aedea4SKip Macy 		callout_reset_on(t_callout, delta, f_callout, tp, cpu);
89285d94372SRobert Watson 	}
89385d94372SRobert Watson }
89485d94372SRobert Watson 
89585d94372SRobert Watson int
89685d94372SRobert Watson tcp_timer_active(struct tcpcb *tp, int timer_type)
89785d94372SRobert Watson {
89885d94372SRobert Watson 	struct callout *t_callout;
89985d94372SRobert Watson 
90085d94372SRobert Watson 	switch (timer_type) {
90185d94372SRobert Watson 		case TT_DELACK:
902e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_delack;
90385d94372SRobert Watson 			break;
90485d94372SRobert Watson 		case TT_REXMT:
905e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_rexmt;
90685d94372SRobert Watson 			break;
90785d94372SRobert Watson 		case TT_PERSIST:
908e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_persist;
90985d94372SRobert Watson 			break;
91085d94372SRobert Watson 		case TT_KEEP:
911e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_keep;
91285d94372SRobert Watson 			break;
91385d94372SRobert Watson 		case TT_2MSL:
914e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_2msl;
91585d94372SRobert Watson 			break;
91685d94372SRobert Watson 		default:
91785d94372SRobert Watson 			panic("bad timer_type");
91885d94372SRobert Watson 		}
91985d94372SRobert Watson 	return callout_active(t_callout);
920df8bae1dSRodney W. Grimes }
921b8614722SMike Silbersack 
922b8614722SMike Silbersack #define	ticks_to_msecs(t)	(1000*(t) / hz)
923b8614722SMike Silbersack 
924b8614722SMike Silbersack void
9255b999a6bSDavide Italiano tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
9265b999a6bSDavide Italiano     struct xtcp_timer *xtimer)
927b8614722SMike Silbersack {
9285b999a6bSDavide Italiano 	sbintime_t now;
9295b999a6bSDavide Italiano 
9305b999a6bSDavide Italiano 	bzero(xtimer, sizeof(*xtimer));
931b8614722SMike Silbersack 	if (timer == NULL)
932b8614722SMike Silbersack 		return;
9335b999a6bSDavide Italiano 	now = getsbinuptime();
934b8614722SMike Silbersack 	if (callout_active(&timer->tt_delack))
9355b999a6bSDavide Italiano 		xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
936b8614722SMike Silbersack 	if (callout_active(&timer->tt_rexmt))
9375b999a6bSDavide Italiano 		xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
938b8614722SMike Silbersack 	if (callout_active(&timer->tt_persist))
9395b999a6bSDavide Italiano 		xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
940b8614722SMike Silbersack 	if (callout_active(&timer->tt_keep))
9415b999a6bSDavide Italiano 		xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
942b8614722SMike Silbersack 	if (callout_active(&timer->tt_2msl))
9435b999a6bSDavide Italiano 		xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
944b8614722SMike Silbersack 	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
945b8614722SMike Silbersack }
946