xref: /freebsd/sys/netinet/tcp_timer.c (revision ff94500855c16d0d9cc18aa8b0ba73ea94020c56)
1c398230bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4e79adb8eSGarrett Wollman  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  *
31e79adb8eSGarrett Wollman  *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
32df8bae1dSRodney W. Grimes  */
33df8bae1dSRodney W. Grimes 
344b421e2dSMike Silbersack #include <sys/cdefs.h>
354b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
364b421e2dSMike Silbersack 
37825fd1e4SNavdeep Parhar #include "opt_inet.h"
38fb59c426SYoshinobu Inoue #include "opt_inet6.h"
390cc12cc5SJoerg Wunsch #include "opt_tcpdebug.h"
40883831c6SAdrian Chadd #include "opt_rss.h"
410cc12cc5SJoerg Wunsch 
42df8bae1dSRodney W. Grimes #include <sys/param.h>
4398163b98SPoul-Henning Kamp #include <sys/kernel.h>
44c74af4faSBruce Evans #include <sys/lock.h>
4508517d53SMike Silbersack #include <sys/mbuf.h>
46c74af4faSBruce Evans #include <sys/mutex.h>
47c74af4faSBruce Evans #include <sys/protosw.h>
4887aedea4SKip Macy #include <sys/smp.h>
49df8bae1dSRodney W. Grimes #include <sys/socket.h>
50df8bae1dSRodney W. Grimes #include <sys/socketvar.h>
51c74af4faSBruce Evans #include <sys/sysctl.h>
52c74af4faSBruce Evans #include <sys/systm.h>
53e79adb8eSGarrett Wollman 
544b79449eSBjoern A. Zeeb #include <net/if.h>
55df8bae1dSRodney W. Grimes #include <net/route.h>
56b2bdc62aSAdrian Chadd #include <net/rss_config.h>
57530c0060SRobert Watson #include <net/vnet.h>
58883831c6SAdrian Chadd #include <net/netisr.h>
59df8bae1dSRodney W. Grimes 
60df8bae1dSRodney W. Grimes #include <netinet/in.h>
615d06879aSGeorge V. Neville-Neil #include <netinet/in_kdtrace.h>
62df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
63883831c6SAdrian Chadd #include <netinet/in_rss.h>
64c74af4faSBruce Evans #include <netinet/in_systm.h>
65fb59c426SYoshinobu Inoue #ifdef INET6
66fb59c426SYoshinobu Inoue #include <netinet6/in6_pcb.h>
67fb59c426SYoshinobu Inoue #endif
68df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
692de3e790SGleb Smirnoff #include <netinet/tcp.h>
70df8bae1dSRodney W. Grimes #include <netinet/tcp_fsm.h>
712529f56eSJonathan T. Looney #include <netinet/tcp_log_buf.h>
72df8bae1dSRodney W. Grimes #include <netinet/tcp_timer.h>
73df8bae1dSRodney W. Grimes #include <netinet/tcp_var.h>
7489e560f4SRandall Stewart #include <netinet/tcp_seq.h>
754644fda3SGleb Smirnoff #include <netinet/cc/cc.h>
76f6f6703fSSean Bruno #ifdef INET6
77f6f6703fSSean Bruno #include <netinet6/tcp6_var.h>
78f6f6703fSSean Bruno #endif
79df8bae1dSRodney W. Grimes #include <netinet/tcpip.h>
80af7a2999SDavid Greenman #ifdef TCPDEBUG
81af7a2999SDavid Greenman #include <netinet/tcp_debug.h>
82af7a2999SDavid Greenman #endif
83df8bae1dSRodney W. Grimes 
840645c604SHiren Panchasara int    tcp_persmin;
857029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin,
867029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
877029da5cSPawel Biernacki     &tcp_persmin, 0, sysctl_msec_to_ticks, "I",
887029da5cSPawel Biernacki     "minimum persistence interval");
890645c604SHiren Panchasara 
900645c604SHiren Panchasara int    tcp_persmax;
917029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax,
927029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
937029da5cSPawel Biernacki     &tcp_persmax, 0, sysctl_msec_to_ticks, "I",
947029da5cSPawel Biernacki     "maximum persistence interval");
950645c604SHiren Panchasara 
969b8b58e0SJonathan Lemon int	tcp_keepinit;
977029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit,
987029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
997029da5cSPawel Biernacki     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I",
1007029da5cSPawel Biernacki     "time to establish connection");
1017b40aa32SPaul Traina 
1029b8b58e0SJonathan Lemon int	tcp_keepidle;
1037029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle,
1047029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1057029da5cSPawel Biernacki     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I",
1067029da5cSPawel Biernacki     "time before keepalive probes begin");
10798163b98SPoul-Henning Kamp 
1089b8b58e0SJonathan Lemon int	tcp_keepintvl;
1097029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl,
1107029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1117029da5cSPawel Biernacki     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I",
1127029da5cSPawel Biernacki     "time between keepalive probes");
11398163b98SPoul-Henning Kamp 
1149b8b58e0SJonathan Lemon int	tcp_delacktime;
1157029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime,
1167029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1176489fe65SAndre Oppermann     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
118ccb4d0c6SJonathan Lemon     "Time before a delayed ACK is sent");
1199b8b58e0SJonathan Lemon 
1209b8b58e0SJonathan Lemon int	tcp_msl;
1217029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl,
1227029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1237029da5cSPawel Biernacki     &tcp_msl, 0, sysctl_msec_to_ticks, "I",
1247029da5cSPawel Biernacki     "Maximum segment lifetime");
1259b8b58e0SJonathan Lemon 
1260999766dSMichael Tuexen int	tcp_rexmit_initial;
1277029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_initial,
1287029da5cSPawel Biernacki    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1290999766dSMichael Tuexen     &tcp_rexmit_initial, 0, sysctl_msec_to_ticks, "I",
1300999766dSMichael Tuexen     "Initial Retransmission Timeout");
1310999766dSMichael Tuexen 
132701bec5aSMatthew Dillon int	tcp_rexmit_min;
1337029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min,
1347029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1356489fe65SAndre Oppermann     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
1366489fe65SAndre Oppermann     "Minimum Retransmission Timeout");
137701bec5aSMatthew Dillon 
138701bec5aSMatthew Dillon int	tcp_rexmit_slop;
1397029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop,
1407029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1416489fe65SAndre Oppermann     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
1426489fe65SAndre Oppermann     "Retransmission Timer Slop");
143701bec5aSMatthew Dillon 
144334fc582SBjoern A. Zeeb VNET_DEFINE(int, tcp_always_keepalive) = 1;
145334fc582SBjoern A. Zeeb SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_VNET|CTLFLAG_RW,
146334fc582SBjoern A. Zeeb     &VNET_NAME(tcp_always_keepalive) , 0,
147334fc582SBjoern A. Zeeb     "Assume SO_KEEPALIVE on all TCP connections");
14834be9bf3SPoul-Henning Kamp 
1497c72af87SMohan Srinivasan int    tcp_fast_finwait2_recycle = 0;
1507c72af87SMohan Srinivasan SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
1516489fe65SAndre Oppermann     &tcp_fast_finwait2_recycle, 0,
1526489fe65SAndre Oppermann     "Recycle closed FIN_WAIT_2 connections faster");
1537c72af87SMohan Srinivasan 
1547c72af87SMohan Srinivasan int    tcp_finwait2_timeout;
1557029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout,
1567029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1577029da5cSPawel Biernacki     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I",
1587029da5cSPawel Biernacki     "FIN-WAIT2 timeout");
1597c72af87SMohan Srinivasan 
1609077f387SGleb Smirnoff int	tcp_keepcnt = TCPTV_KEEPCNT;
1619077f387SGleb Smirnoff SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
1629077f387SGleb Smirnoff     "Number of keepalive probes to send");
1637c72af87SMohan Srinivasan 
1640312fbe9SPoul-Henning Kamp 	/* max idle probes */
1659b8b58e0SJonathan Lemon int	tcp_maxpersistidle;
166e79adb8eSGarrett Wollman 
16789e560f4SRandall Stewart int	tcp_rexmit_drop_options = 0;
1686c0ef895SJohn Baldwin SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
1696c0ef895SJohn Baldwin     &tcp_rexmit_drop_options, 0,
1706c0ef895SJohn Baldwin     "Drop TCP options from 3rd and later retransmitted SYN");
1716c0ef895SJohn Baldwin 
172e29c55e4SGleb Smirnoff VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
173f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
174f0188618SHans Petter Selasky     CTLFLAG_RW|CTLFLAG_VNET,
175f6f6703fSSean Bruno     &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
176f6f6703fSSean Bruno     "Path MTU Discovery Black Hole Detection Enabled");
177f6f6703fSSean Bruno 
178f6f6703fSSean Bruno #ifdef INET
179e29c55e4SGleb Smirnoff VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
180f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
181f0188618SHans Petter Selasky     CTLFLAG_RW|CTLFLAG_VNET,
182f6f6703fSSean Bruno     &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
183f6f6703fSSean Bruno     "Path MTU Discovery Black Hole Detection lowered MSS");
184f6f6703fSSean Bruno #endif
185f6f6703fSSean Bruno 
186f6f6703fSSean Bruno #ifdef INET6
187e29c55e4SGleb Smirnoff VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
188f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
189f0188618SHans Petter Selasky     CTLFLAG_RW|CTLFLAG_VNET,
190f6f6703fSSean Bruno     &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
191f6f6703fSSean Bruno     "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
192f6f6703fSSean Bruno #endif
193f6f6703fSSean Bruno 
1948f7e75cbSAdrian Chadd #ifdef	RSS
1958f7e75cbSAdrian Chadd static int	per_cpu_timers = 1;
1968f7e75cbSAdrian Chadd #else
19787aedea4SKip Macy static int	per_cpu_timers = 0;
1988f7e75cbSAdrian Chadd #endif
19987aedea4SKip Macy SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
20087aedea4SKip Macy     &per_cpu_timers , 0, "run tcp timers on all cpus");
20187aedea4SKip Macy 
202883831c6SAdrian Chadd /*
203883831c6SAdrian Chadd  * Map the given inp to a CPU id.
204883831c6SAdrian Chadd  *
205883831c6SAdrian Chadd  * This queries RSS if it's compiled in, else it defaults to the current
206883831c6SAdrian Chadd  * CPU ID.
207883831c6SAdrian Chadd  */
20889e560f4SRandall Stewart inline int
209883831c6SAdrian Chadd inp_to_cpuid(struct inpcb *inp)
210883831c6SAdrian Chadd {
211883831c6SAdrian Chadd 	u_int cpuid;
212883831c6SAdrian Chadd 
213883831c6SAdrian Chadd #ifdef	RSS
214883831c6SAdrian Chadd 	if (per_cpu_timers) {
215883831c6SAdrian Chadd 		cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
216883831c6SAdrian Chadd 		if (cpuid == NETISR_CPUID_NONE)
217883831c6SAdrian Chadd 			return (curcpu);	/* XXX */
218883831c6SAdrian Chadd 		else
219883831c6SAdrian Chadd 			return (cpuid);
220883831c6SAdrian Chadd 	}
221883831c6SAdrian Chadd #else
222883831c6SAdrian Chadd 	/* Legacy, pre-RSS behaviour */
223883831c6SAdrian Chadd 	if (per_cpu_timers) {
224883831c6SAdrian Chadd 		/*
225883831c6SAdrian Chadd 		 * We don't have a flowid -> cpuid mapping, so cheat and
226883831c6SAdrian Chadd 		 * just map unknown cpuids to curcpu.  Not the best, but
227883831c6SAdrian Chadd 		 * apparently better than defaulting to swi 0.
228883831c6SAdrian Chadd 		 */
229883831c6SAdrian Chadd 		cpuid = inp->inp_flowid % (mp_maxid + 1);
230883831c6SAdrian Chadd 		if (! CPU_ABSENT(cpuid))
231883831c6SAdrian Chadd 			return (cpuid);
232883831c6SAdrian Chadd 		return (curcpu);
233883831c6SAdrian Chadd 	}
234883831c6SAdrian Chadd #endif
235883831c6SAdrian Chadd 	/* Default for RSS and non-RSS - cpuid 0 */
236883831c6SAdrian Chadd 	else {
237883831c6SAdrian Chadd 		return (0);
238883831c6SAdrian Chadd 	}
239883831c6SAdrian Chadd }
24087aedea4SKip Macy 
241df8bae1dSRodney W. Grimes /*
242df8bae1dSRodney W. Grimes  * Tcp protocol timeout routine called every 500 ms.
2439b8b58e0SJonathan Lemon  * Updates timestamps used for TCP
244df8bae1dSRodney W. Grimes  * causes finite state machine actions if timers expire.
245df8bae1dSRodney W. Grimes  */
246df8bae1dSRodney W. Grimes void
247e2f2059fSMike Silbersack tcp_slowtimo(void)
248df8bae1dSRodney W. Grimes {
2498b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
25015bd2b43SDavid Greenman 
2515ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
2528b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
2538b615593SMarko Zec 		CURVNET_SET(vnet_iter);
254cea40c48SJulien Charbon 		(void) tcp_tw_2msl_scan(0);
2558b615593SMarko Zec 		CURVNET_RESTORE();
2568b615593SMarko Zec 	}
2575ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
258df8bae1dSRodney W. Grimes }
259df8bae1dSRodney W. Grimes 
260df8bae1dSRodney W. Grimes int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
261f058535dSJeffrey Hsu     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
262df8bae1dSRodney W. Grimes 
26389e560f4SRandall Stewart int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
264e79adb8eSGarrett Wollman 
265df8bae1dSRodney W. Grimes /*
266df8bae1dSRodney W. Grimes  * TCP timer processing.
267df8bae1dSRodney W. Grimes  */
26885d94372SRobert Watson 
26985d94372SRobert Watson void
27085d94372SRobert Watson tcp_timer_delack(void *xtp)
271df8bae1dSRodney W. Grimes {
272109eb549SGleb Smirnoff 	struct epoch_tracker et;
27385d94372SRobert Watson 	struct tcpcb *tp = xtp;
27485d94372SRobert Watson 	struct inpcb *inp;
2758b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
27685d94372SRobert Watson 
27785d94372SRobert Watson 	inp = tp->t_inpcb;
2785571f9cfSJulien Charbon 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
2798501a69cSRobert Watson 	INP_WLOCK(inp);
280655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_delack) ||
281655f934bSMikolaj Golub 	    !callout_active(&tp->t_timers->tt_delack)) {
2828501a69cSRobert Watson 		INP_WUNLOCK(inp);
2838b615593SMarko Zec 		CURVNET_RESTORE();
28485d94372SRobert Watson 		return;
28585d94372SRobert Watson 	}
286e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_delack);
287655f934bSMikolaj Golub 	if ((inp->inp_flags & INP_DROPPED) != 0) {
288655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
289655f934bSMikolaj Golub 		CURVNET_RESTORE();
290655f934bSMikolaj Golub 		return;
291655f934bSMikolaj Golub 	}
2929b8b58e0SJonathan Lemon 	tp->t_flags |= TF_ACKNOW;
29378b50714SRobert Watson 	TCPSTAT_INC(tcps_delack);
294109eb549SGleb Smirnoff 	NET_EPOCH_ENTER(et);
29555bceb1eSRandall Stewart 	(void) tp->t_fb->tfb_tcp_output(tp);
2968501a69cSRobert Watson 	INP_WUNLOCK(inp);
297109eb549SGleb Smirnoff 	NET_EPOCH_EXIT(et);
2988b615593SMarko Zec 	CURVNET_RESTORE();
2999b8b58e0SJonathan Lemon }
3009b8b58e0SJonathan Lemon 
301b07fef50SRandall Stewart void
302b07fef50SRandall Stewart tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp)
303b07fef50SRandall Stewart {
3046573d758SMatt Macy 	if (inp && tp != NULL)
305b07fef50SRandall Stewart 		INP_WUNLOCK(inp);
306b07fef50SRandall Stewart }
307b07fef50SRandall Stewart 
30885d94372SRobert Watson void
30985d94372SRobert Watson tcp_timer_2msl(void *xtp)
3109b8b58e0SJonathan Lemon {
31185d94372SRobert Watson 	struct tcpcb *tp = xtp;
31285d94372SRobert Watson 	struct inpcb *inp;
3136573d758SMatt Macy 	struct epoch_tracker et;
3148b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
3159b8b58e0SJonathan Lemon #ifdef TCPDEBUG
3169b8b58e0SJonathan Lemon 	int ostate;
3179b8b58e0SJonathan Lemon 
3189b8b58e0SJonathan Lemon 	ostate = tp->t_state;
3199b8b58e0SJonathan Lemon #endif
32085d94372SRobert Watson 	inp = tp->t_inpcb;
3215571f9cfSJulien Charbon 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
3228501a69cSRobert Watson 	INP_WLOCK(inp);
32385d94372SRobert Watson 	tcp_free_sackholes(tp);
324655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_2msl) ||
325e2f2059fSMike Silbersack 	    !callout_active(&tp->t_timers->tt_2msl)) {
3268501a69cSRobert Watson 		INP_WUNLOCK(tp->t_inpcb);
3278b615593SMarko Zec 		CURVNET_RESTORE();
32885d94372SRobert Watson 		return;
32985d94372SRobert Watson 	}
330e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_2msl);
3319a06a824SGleb Smirnoff 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
332655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
333655f934bSMikolaj Golub 		CURVNET_RESTORE();
334655f934bSMikolaj Golub 		return;
335655f934bSMikolaj Golub 	}
3365571f9cfSJulien Charbon 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
3375571f9cfSJulien Charbon 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
33885d94372SRobert Watson 	/*
339df8bae1dSRodney W. Grimes 	 * 2 MSL timeout in shutdown went off.  If we're closed but
340df8bae1dSRodney W. Grimes 	 * still waiting for peer to close and connection has been idle
34131a7749dSJulien Charbon 	 * too long delete connection control block.  Otherwise, check
34231a7749dSJulien Charbon 	 * again in a bit.
34331a7749dSJulien Charbon 	 *
3447c72af87SMohan Srinivasan 	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
3457c72af87SMohan Srinivasan 	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
3467c72af87SMohan Srinivasan 	 * Ignore fact that there were recent incoming segments.
347df8bae1dSRodney W. Grimes 	 */
3487c72af87SMohan Srinivasan 	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
34985d94372SRobert Watson 	    tp->t_inpcb && tp->t_inpcb->inp_socket &&
3507c72af87SMohan Srinivasan 	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
35178b50714SRobert Watson 		TCPSTAT_INC(tcps_finwait2_drops);
35258d94bd0SGleb Smirnoff 		NET_EPOCH_ENTER(et);
35385d94372SRobert Watson 		tp = tcp_close(tp);
35458d94bd0SGleb Smirnoff 		NET_EPOCH_EXIT(et);
355b07fef50SRandall Stewart 		tcp_inpinfo_lock_del(inp, tp);
356b07fef50SRandall Stewart 		goto out;
3577c72af87SMohan Srinivasan 	} else {
358d6de19acSJulien Charbon 		if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
359b07fef50SRandall Stewart 			callout_reset(&tp->t_timers->tt_2msl,
360b07fef50SRandall Stewart 				      TP_KEEPINTVL(tp), tcp_timer_2msl, tp);
361b07fef50SRandall Stewart 		} else {
36258d94bd0SGleb Smirnoff 			NET_EPOCH_ENTER(et);
36385d94372SRobert Watson 			tp = tcp_close(tp);
36458d94bd0SGleb Smirnoff 			NET_EPOCH_EXIT(et);
365b07fef50SRandall Stewart 			tcp_inpinfo_lock_del(inp, tp);
366b07fef50SRandall Stewart 			goto out;
367b07fef50SRandall Stewart 		}
3687c72af87SMohan Srinivasan 	}
369df8bae1dSRodney W. Grimes 
3709b8b58e0SJonathan Lemon #ifdef TCPDEBUG
371586b4a0eSKonstantin Belousov 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
372fb59c426SYoshinobu Inoue 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
3739b8b58e0SJonathan Lemon 			  PRU_SLOWTIMO);
3749b8b58e0SJonathan Lemon #endif
3755d06879aSGeorge V. Neville-Neil 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
3765d06879aSGeorge V. Neville-Neil 
37785d94372SRobert Watson 	if (tp != NULL)
3788501a69cSRobert Watson 		INP_WUNLOCK(inp);
379b07fef50SRandall Stewart out:
3808b615593SMarko Zec 	CURVNET_RESTORE();
3819b8b58e0SJonathan Lemon }
3829b8b58e0SJonathan Lemon 
38385d94372SRobert Watson void
38485d94372SRobert Watson tcp_timer_keep(void *xtp)
3859b8b58e0SJonathan Lemon {
38685d94372SRobert Watson 	struct tcpcb *tp = xtp;
38708517d53SMike Silbersack 	struct tcptemp *t_template;
38885d94372SRobert Watson 	struct inpcb *inp;
3896573d758SMatt Macy 	struct epoch_tracker et;
3908b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
3919b8b58e0SJonathan Lemon #ifdef TCPDEBUG
3929b8b58e0SJonathan Lemon 	int ostate;
3939b8b58e0SJonathan Lemon 
3949b8b58e0SJonathan Lemon 	ostate = tp->t_state;
3959b8b58e0SJonathan Lemon #endif
39685d94372SRobert Watson 	inp = tp->t_inpcb;
3975571f9cfSJulien Charbon 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
3988501a69cSRobert Watson 	INP_WLOCK(inp);
399655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_keep) ||
400655f934bSMikolaj Golub 	    !callout_active(&tp->t_timers->tt_keep)) {
4018501a69cSRobert Watson 		INP_WUNLOCK(inp);
4028b615593SMarko Zec 		CURVNET_RESTORE();
40385d94372SRobert Watson 		return;
40485d94372SRobert Watson 	}
405e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_keep);
4069a06a824SGleb Smirnoff 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
407655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
408655f934bSMikolaj Golub 		CURVNET_RESTORE();
409655f934bSMikolaj Golub 		return;
410655f934bSMikolaj Golub 	}
4115571f9cfSJulien Charbon 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
4125571f9cfSJulien Charbon 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
4136d172f58SJonathan T. Looney 
4146d172f58SJonathan T. Looney 	/*
4156d172f58SJonathan T. Looney 	 * Because we don't regularly reset the keepalive callout in
4166d172f58SJonathan T. Looney 	 * the ESTABLISHED state, it may be that we don't actually need
4176d172f58SJonathan T. Looney 	 * to send a keepalive yet. If that occurs, schedule another
4186d172f58SJonathan T. Looney 	 * call for the next time the keepalive timer might expire.
4196d172f58SJonathan T. Looney 	 */
4206d172f58SJonathan T. Looney 	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
4216d172f58SJonathan T. Looney 		u_int idletime;
4226d172f58SJonathan T. Looney 
4236d172f58SJonathan T. Looney 		idletime = ticks - tp->t_rcvtime;
4246d172f58SJonathan T. Looney 		if (idletime < TP_KEEPIDLE(tp)) {
4256d172f58SJonathan T. Looney 			callout_reset(&tp->t_timers->tt_keep,
4266d172f58SJonathan T. Looney 			    TP_KEEPIDLE(tp) - idletime, tcp_timer_keep, tp);
4276d172f58SJonathan T. Looney 			INP_WUNLOCK(inp);
4286d172f58SJonathan T. Looney 			CURVNET_RESTORE();
4296d172f58SJonathan T. Looney 			return;
4306d172f58SJonathan T. Looney 		}
4316d172f58SJonathan T. Looney 	}
4326d172f58SJonathan T. Looney 
4339b8b58e0SJonathan Lemon 	/*
4349b8b58e0SJonathan Lemon 	 * Keep-alive timer went off; send something
4359b8b58e0SJonathan Lemon 	 * or drop connection if idle for too long.
4369b8b58e0SJonathan Lemon 	 */
43778b50714SRobert Watson 	TCPSTAT_INC(tcps_keeptimeo);
4389b8b58e0SJonathan Lemon 	if (tp->t_state < TCPS_ESTABLISHED)
4399b8b58e0SJonathan Lemon 		goto dropit;
440334fc582SBjoern A. Zeeb 	if ((V_tcp_always_keepalive ||
441f1798531SJohn Baldwin 	    inp->inp_socket->so_options & SO_KEEPALIVE) &&
4429b8b58e0SJonathan Lemon 	    tp->t_state <= TCPS_CLOSING) {
4439077f387SGleb Smirnoff 		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
4449b8b58e0SJonathan Lemon 			goto dropit;
4459b8b58e0SJonathan Lemon 		/*
4469b8b58e0SJonathan Lemon 		 * Send a packet designed to force a response
4479b8b58e0SJonathan Lemon 		 * if the peer is up and reachable:
4489b8b58e0SJonathan Lemon 		 * either an ACK if the connection is still alive,
4499b8b58e0SJonathan Lemon 		 * or an RST if the peer has closed the connection
4509b8b58e0SJonathan Lemon 		 * due to timeout or reboot.
4519b8b58e0SJonathan Lemon 		 * Using sequence number tp->snd_una-1
4529b8b58e0SJonathan Lemon 		 * causes the transmitted zero-length segment
4539b8b58e0SJonathan Lemon 		 * to lie outside the receive window;
4549b8b58e0SJonathan Lemon 		 * by the protocol spec, this requires the
4559b8b58e0SJonathan Lemon 		 * correspondent TCP to respond.
4569b8b58e0SJonathan Lemon 		 */
45778b50714SRobert Watson 		TCPSTAT_INC(tcps_keepprobe);
45879909384SJonathan Lemon 		t_template = tcpip_maketemplate(inp);
45908517d53SMike Silbersack 		if (t_template) {
460b9555453SGleb Smirnoff 			NET_EPOCH_ENTER(et);
46108517d53SMike Silbersack 			tcp_respond(tp, t_template->tt_ipgen,
46208517d53SMike Silbersack 				    &t_template->tt_t, (struct mbuf *)NULL,
4639b8b58e0SJonathan Lemon 				    tp->rcv_nxt, tp->snd_una - 1, 0);
464b9555453SGleb Smirnoff 			NET_EPOCH_EXIT(et);
46553640b0eSRobert Watson 			free(t_template, M_TEMP);
46608517d53SMike Silbersack 		}
467b07fef50SRandall Stewart 		callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
468b07fef50SRandall Stewart 			      tcp_timer_keep, tp);
469b07fef50SRandall Stewart 	} else
470b07fef50SRandall Stewart 		callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
471b07fef50SRandall Stewart 			      tcp_timer_keep, tp);
4729b8b58e0SJonathan Lemon 
4739b8b58e0SJonathan Lemon #ifdef TCPDEBUG
4742a074620SSam Leffler 	if (inp->inp_socket->so_options & SO_DEBUG)
475fb59c426SYoshinobu Inoue 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
4769b8b58e0SJonathan Lemon 			  PRU_SLOWTIMO);
4779b8b58e0SJonathan Lemon #endif
4785d06879aSGeorge V. Neville-Neil 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
4798501a69cSRobert Watson 	INP_WUNLOCK(inp);
4808b615593SMarko Zec 	CURVNET_RESTORE();
48185d94372SRobert Watson 	return;
4829b8b58e0SJonathan Lemon 
4839b8b58e0SJonathan Lemon dropit:
48478b50714SRobert Watson 	TCPSTAT_INC(tcps_keepdrops);
48558d94bd0SGleb Smirnoff 	NET_EPOCH_ENTER(et);
48685d94372SRobert Watson 	tp = tcp_drop(tp, ETIMEDOUT);
48785d94372SRobert Watson 
48885d94372SRobert Watson #ifdef TCPDEBUG
48985d94372SRobert Watson 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
49085d94372SRobert Watson 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
49185d94372SRobert Watson 			  PRU_SLOWTIMO);
49285d94372SRobert Watson #endif
4935d06879aSGeorge V. Neville-Neil 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
49458d94bd0SGleb Smirnoff 	NET_EPOCH_EXIT(et);
495b07fef50SRandall Stewart 	tcp_inpinfo_lock_del(inp, tp);
4968b615593SMarko Zec 	CURVNET_RESTORE();
4979b8b58e0SJonathan Lemon }
4989b8b58e0SJonathan Lemon 
49985d94372SRobert Watson void
50085d94372SRobert Watson tcp_timer_persist(void *xtp)
5019b8b58e0SJonathan Lemon {
50285d94372SRobert Watson 	struct tcpcb *tp = xtp;
50385d94372SRobert Watson 	struct inpcb *inp;
5046573d758SMatt Macy 	struct epoch_tracker et;
5058b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
5069b8b58e0SJonathan Lemon #ifdef TCPDEBUG
5079b8b58e0SJonathan Lemon 	int ostate;
5089b8b58e0SJonathan Lemon 
5099b8b58e0SJonathan Lemon 	ostate = tp->t_state;
5109b8b58e0SJonathan Lemon #endif
51185d94372SRobert Watson 	inp = tp->t_inpcb;
5125571f9cfSJulien Charbon 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
5138501a69cSRobert Watson 	INP_WLOCK(inp);
514655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_persist) ||
515655f934bSMikolaj Golub 	    !callout_active(&tp->t_timers->tt_persist)) {
5168501a69cSRobert Watson 		INP_WUNLOCK(inp);
5178b615593SMarko Zec 		CURVNET_RESTORE();
51885d94372SRobert Watson 		return;
51985d94372SRobert Watson 	}
520e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_persist);
5219a06a824SGleb Smirnoff 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
522655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
523655f934bSMikolaj Golub 		CURVNET_RESTORE();
524655f934bSMikolaj Golub 		return;
525655f934bSMikolaj Golub 	}
5265571f9cfSJulien Charbon 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
5275571f9cfSJulien Charbon 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
5289b8b58e0SJonathan Lemon 	/*
529a4641f4eSPedro F. Giffuni 	 * Persistence timer into zero window.
5309b8b58e0SJonathan Lemon 	 * Force a byte to be output, if possible.
5319b8b58e0SJonathan Lemon 	 */
53278b50714SRobert Watson 	TCPSTAT_INC(tcps_persisttimeo);
5339b8b58e0SJonathan Lemon 	/*
5349b8b58e0SJonathan Lemon 	 * Hack: if the peer is dead/unreachable, we do not
5359b8b58e0SJonathan Lemon 	 * time out if the window is closed.  After a full
5369b8b58e0SJonathan Lemon 	 * backoff, drop the connection if the idle time
5379b8b58e0SJonathan Lemon 	 * (no responses to probes) reaches the maximum
5389b8b58e0SJonathan Lemon 	 * backoff that we would use if retransmitting.
5399b8b58e0SJonathan Lemon 	 */
5409b8b58e0SJonathan Lemon 	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
5416b0c5521SJohn Baldwin 	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
5426b0c5521SJohn Baldwin 	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
54378b50714SRobert Watson 		TCPSTAT_INC(tcps_persistdrop);
54458d94bd0SGleb Smirnoff 		NET_EPOCH_ENTER(et);
54585d94372SRobert Watson 		tp = tcp_drop(tp, ETIMEDOUT);
54658d94bd0SGleb Smirnoff 		NET_EPOCH_EXIT(et);
547b07fef50SRandall Stewart 		tcp_inpinfo_lock_del(inp, tp);
54885d94372SRobert Watson 		goto out;
5499b8b58e0SJonathan Lemon 	}
550322181c9SAndre Oppermann 	/*
551322181c9SAndre Oppermann 	 * If the user has closed the socket then drop a persisting
552322181c9SAndre Oppermann 	 * connection after a much reduced timeout.
553322181c9SAndre Oppermann 	 */
554322181c9SAndre Oppermann 	if (tp->t_state > TCPS_CLOSE_WAIT &&
555322181c9SAndre Oppermann 	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
556322181c9SAndre Oppermann 		TCPSTAT_INC(tcps_persistdrop);
55758d94bd0SGleb Smirnoff 		NET_EPOCH_ENTER(et);
558322181c9SAndre Oppermann 		tp = tcp_drop(tp, ETIMEDOUT);
55958d94bd0SGleb Smirnoff 		NET_EPOCH_EXIT(et);
560b07fef50SRandall Stewart 		tcp_inpinfo_lock_del(inp, tp);
561322181c9SAndre Oppermann 		goto out;
562322181c9SAndre Oppermann 	}
5639b8b58e0SJonathan Lemon 	tcp_setpersist(tp);
5642cdbfa66SPaul Saab 	tp->t_flags |= TF_FORCEDATA;
565109eb549SGleb Smirnoff 	NET_EPOCH_ENTER(et);
56655bceb1eSRandall Stewart 	(void) tp->t_fb->tfb_tcp_output(tp);
567109eb549SGleb Smirnoff 	NET_EPOCH_EXIT(et);
5682cdbfa66SPaul Saab 	tp->t_flags &= ~TF_FORCEDATA;
5699b8b58e0SJonathan Lemon 
5709b8b58e0SJonathan Lemon #ifdef TCPDEBUG
571ffb761f6SGleb Smirnoff 	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
572ffb761f6SGleb Smirnoff 		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
5739b8b58e0SJonathan Lemon #endif
5745d06879aSGeorge V. Neville-Neil 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
5758501a69cSRobert Watson 	INP_WUNLOCK(inp);
576b07fef50SRandall Stewart out:
5778b615593SMarko Zec 	CURVNET_RESTORE();
5789b8b58e0SJonathan Lemon }
5799b8b58e0SJonathan Lemon 
58085d94372SRobert Watson void
58185d94372SRobert Watson tcp_timer_rexmt(void * xtp)
5829b8b58e0SJonathan Lemon {
58385d94372SRobert Watson 	struct tcpcb *tp = xtp;
5848b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
5859b8b58e0SJonathan Lemon 	int rexmt;
58685d94372SRobert Watson 	struct inpcb *inp;
5876573d758SMatt Macy 	struct epoch_tracker et;
588413c3db1SMichael Tuexen 	bool isipv6;
5899b8b58e0SJonathan Lemon #ifdef TCPDEBUG
5909b8b58e0SJonathan Lemon 	int ostate;
5919b8b58e0SJonathan Lemon 
5929b8b58e0SJonathan Lemon 	ostate = tp->t_state;
5939b8b58e0SJonathan Lemon #endif
59485d94372SRobert Watson 	inp = tp->t_inpcb;
5955571f9cfSJulien Charbon 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
5968501a69cSRobert Watson 	INP_WLOCK(inp);
597655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_rexmt) ||
598655f934bSMikolaj Golub 	    !callout_active(&tp->t_timers->tt_rexmt)) {
5998501a69cSRobert Watson 		INP_WUNLOCK(inp);
6008b615593SMarko Zec 		CURVNET_RESTORE();
60185d94372SRobert Watson 		return;
60285d94372SRobert Watson 	}
603e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_rexmt);
6049a06a824SGleb Smirnoff 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
605655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
606655f934bSMikolaj Golub 		CURVNET_RESTORE();
607655f934bSMikolaj Golub 		return;
608655f934bSMikolaj Golub 	}
6095571f9cfSJulien Charbon 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
6105571f9cfSJulien Charbon 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
6116d90faf3SPaul Saab 	tcp_free_sackholes(tp);
6122529f56eSJonathan T. Looney 	TCP_LOG_EVENT(tp, NULL, NULL, NULL, TCP_LOG_RTO, 0, 0, NULL, false);
6135105a92cSRandall Stewart 	if (tp->t_fb->tfb_tcp_rexmit_tmr) {
6145105a92cSRandall Stewart 		/* The stack has a timer action too. */
6155105a92cSRandall Stewart 		(*tp->t_fb->tfb_tcp_rexmit_tmr)(tp);
6165105a92cSRandall Stewart 	}
617df8bae1dSRodney W. Grimes 	/*
618df8bae1dSRodney W. Grimes 	 * Retransmission timer went off.  Message has not
619df8bae1dSRodney W. Grimes 	 * been acked within retransmit interval.  Back off
620df8bae1dSRodney W. Grimes 	 * to a longer retransmit interval and retransmit one segment.
621df8bae1dSRodney W. Grimes 	 */
622df8bae1dSRodney W. Grimes 	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
623df8bae1dSRodney W. Grimes 		tp->t_rxtshift = TCP_MAXRXTSHIFT;
62478b50714SRobert Watson 		TCPSTAT_INC(tcps_timeoutdrop);
62558d94bd0SGleb Smirnoff 		NET_EPOCH_ENTER(et);
6264c6a1090SMichael Tuexen 		tp = tcp_drop(tp, ETIMEDOUT);
62758d94bd0SGleb Smirnoff 		NET_EPOCH_EXIT(et);
628b07fef50SRandall Stewart 		tcp_inpinfo_lock_del(inp, tp);
629b07fef50SRandall Stewart 		goto out;
630b07fef50SRandall Stewart 	}
631cf8f04f4SAndre Oppermann 	if (tp->t_state == TCPS_SYN_SENT) {
632cf8f04f4SAndre Oppermann 		/*
633cf8f04f4SAndre Oppermann 		 * If the SYN was retransmitted, indicate CWND to be
634cf8f04f4SAndre Oppermann 		 * limited to 1 segment in cc_conn_init().
635cf8f04f4SAndre Oppermann 		 */
636cf8f04f4SAndre Oppermann 		tp->snd_cwnd = 1;
637cf8f04f4SAndre Oppermann 	} else if (tp->t_rxtshift == 1) {
6389b8b58e0SJonathan Lemon 		/*
6399b8b58e0SJonathan Lemon 		 * first retransmit; record ssthresh and cwnd so they can
6409b8b58e0SJonathan Lemon 		 * be recovered if this turns out to be a "bad" retransmit.
6419b8b58e0SJonathan Lemon 		 * A retransmit is considered "bad" if an ACK for this
6429b8b58e0SJonathan Lemon 		 * segment is received within RTT/2 interval; the assumption
6439b8b58e0SJonathan Lemon 		 * here is that the ACK was already in flight.  See
6449b8b58e0SJonathan Lemon 		 * "On Estimating End-to-End Network Path Properties" by
6459b8b58e0SJonathan Lemon 		 * Allman and Paxson for more details.
6469b8b58e0SJonathan Lemon 		 */
6479b8b58e0SJonathan Lemon 		tp->snd_cwnd_prev = tp->snd_cwnd;
6489b8b58e0SJonathan Lemon 		tp->snd_ssthresh_prev = tp->snd_ssthresh;
6499d11646dSJeffrey Hsu 		tp->snd_recover_prev = tp->snd_recover;
650dbc42409SLawrence Stewart 		if (IN_FASTRECOVERY(tp->t_flags))
6519d11646dSJeffrey Hsu 			tp->t_flags |= TF_WASFRECOVERY;
6529d11646dSJeffrey Hsu 		else
6539d11646dSJeffrey Hsu 			tp->t_flags &= ~TF_WASFRECOVERY;
654dbc42409SLawrence Stewart 		if (IN_CONGRECOVERY(tp->t_flags))
655dbc42409SLawrence Stewart 			tp->t_flags |= TF_WASCRECOVERY;
656dbc42409SLawrence Stewart 		else
657dbc42409SLawrence Stewart 			tp->t_flags &= ~TF_WASCRECOVERY;
65810d20c84SMatt Macy 		if ((tp->t_flags & TF_RCVD_TSTMP) == 0)
6599b8b58e0SJonathan Lemon 			tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
66010d20c84SMatt Macy 		/* In the event that we've negotiated timestamps
66110d20c84SMatt Macy 		 * badrxtwin will be set to the value that we set
66210d20c84SMatt Macy 		 * the retransmitted packet's to_tsval to by tcp_output
66310d20c84SMatt Macy 		 */
664672dc4aeSJohn Baldwin 		tp->t_flags |= TF_PREVVALID;
665672dc4aeSJohn Baldwin 	} else
666672dc4aeSJohn Baldwin 		tp->t_flags &= ~TF_PREVVALID;
66778b50714SRobert Watson 	TCPSTAT_INC(tcps_rexmttimeo);
668281a0fd4SPatrick Kelsey 	if ((tp->t_state == TCPS_SYN_SENT) ||
669281a0fd4SPatrick Kelsey 	    (tp->t_state == TCPS_SYN_RECEIVED))
6700999766dSMichael Tuexen 		rexmt = tcp_rexmit_initial * tcp_backoff[tp->t_rxtshift];
6717d42e30cSJonathan Lemon 	else
672df8bae1dSRodney W. Grimes 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
673df8bae1dSRodney W. Grimes 	TCPT_RANGESET(tp->t_rxtcur, rexmt,
674df8bae1dSRodney W. Grimes 		      tp->t_rttmin, TCPTV_REXMTMAX);
675f6f6703fSSean Bruno 
676882ac53eSSean Bruno 	/*
677882ac53eSSean Bruno 	 * We enter the path for PLMTUD if connection is established or, if
678882ac53eSSean Bruno 	 * connection is FIN_WAIT_1 status, reason for the last is that if
679882ac53eSSean Bruno 	 * amount of data we send is very small, we could send it in couple of
680882ac53eSSean Bruno 	 * packets and process straight to FIN. In that case we won't catch
681882ac53eSSean Bruno 	 * ESTABLISHED state.
682882ac53eSSean Bruno 	 */
683f6f6703fSSean Bruno #ifdef INET6
684413c3db1SMichael Tuexen 	isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? true : false;
685413c3db1SMichael Tuexen #else
686413c3db1SMichael Tuexen 	isipv6 = false;
687f6f6703fSSean Bruno #endif
688413c3db1SMichael Tuexen 	if (((V_tcp_pmtud_blackhole_detect == 1) ||
689413c3db1SMichael Tuexen 	    (V_tcp_pmtud_blackhole_detect == 2 && !isipv6) ||
690413c3db1SMichael Tuexen 	    (V_tcp_pmtud_blackhole_detect == 3 && isipv6)) &&
691413c3db1SMichael Tuexen 	    ((tp->t_state == TCPS_ESTABLISHED) ||
692413c3db1SMichael Tuexen 	    (tp->t_state == TCPS_FIN_WAIT_1))) {
693b89af8e1SMichael Tuexen 		if (tp->t_rxtshift == 1) {
694adf43a92SHiren Panchasara 			/*
695b89af8e1SMichael Tuexen 			 * We enter blackhole detection after the first
696b89af8e1SMichael Tuexen 			 * unsuccessful timer based retransmission.
697b89af8e1SMichael Tuexen 			 * Then we reduce up to two times the MSS, each
698b89af8e1SMichael Tuexen 			 * candidate giving two tries of retransmissions.
699b89af8e1SMichael Tuexen 			 * But we give a candidate only two tries, if it
700b89af8e1SMichael Tuexen 			 * actually reduces the MSS.
701adf43a92SHiren Panchasara 			 */
702b89af8e1SMichael Tuexen 			tp->t_blackhole_enter = 2;
703b89af8e1SMichael Tuexen 			tp->t_blackhole_exit = tp->t_blackhole_enter;
704b89af8e1SMichael Tuexen 			if (isipv6) {
705b89af8e1SMichael Tuexen #ifdef INET6
706b89af8e1SMichael Tuexen 				if (tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss)
707b89af8e1SMichael Tuexen 					tp->t_blackhole_exit += 2;
708b89af8e1SMichael Tuexen 				if (tp->t_maxseg > V_tcp_v6mssdflt &&
709b89af8e1SMichael Tuexen 				    V_tcp_v6pmtud_blackhole_mss > V_tcp_v6mssdflt)
710b89af8e1SMichael Tuexen 					tp->t_blackhole_exit += 2;
711b89af8e1SMichael Tuexen #endif
712b89af8e1SMichael Tuexen 			} else {
713b89af8e1SMichael Tuexen #ifdef INET
714b89af8e1SMichael Tuexen 				if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss)
715b89af8e1SMichael Tuexen 					tp->t_blackhole_exit += 2;
716b89af8e1SMichael Tuexen 				if (tp->t_maxseg > V_tcp_mssdflt &&
717b89af8e1SMichael Tuexen 				    V_tcp_pmtud_blackhole_mss > V_tcp_mssdflt)
718b89af8e1SMichael Tuexen 					tp->t_blackhole_exit += 2;
719b89af8e1SMichael Tuexen #endif
720b89af8e1SMichael Tuexen 			}
721b89af8e1SMichael Tuexen 		}
722f6f6703fSSean Bruno 		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
723f6f6703fSSean Bruno 		    (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
724b89af8e1SMichael Tuexen 		    (tp->t_rxtshift >= tp->t_blackhole_enter &&
725b89af8e1SMichael Tuexen 		    tp->t_rxtshift < tp->t_blackhole_exit &&
726b89af8e1SMichael Tuexen 		    (tp->t_rxtshift - tp->t_blackhole_enter) % 2 == 0)) {
727f6f6703fSSean Bruno 			/*
728f6f6703fSSean Bruno 			 * Enter Path MTU Black-hole Detection mechanism:
729f6f6703fSSean Bruno 			 * - Disable Path MTU Discovery (IP "DF" bit).
730f6f6703fSSean Bruno 			 * - Reduce MTU to lower value than what we
731f6f6703fSSean Bruno 			 *   negotiated with peer.
732f6f6703fSSean Bruno 			 */
7333d5af7a1SMichael Tuexen 			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) == 0) {
734f6f6703fSSean Bruno 				/* Record that we may have found a black hole. */
735f6f6703fSSean Bruno 				tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
736f6f6703fSSean Bruno 				/* Keep track of previous MSS. */
7370c39d38dSGleb Smirnoff 				tp->t_pmtud_saved_maxseg = tp->t_maxseg;
7383d5af7a1SMichael Tuexen 			}
739f6f6703fSSean Bruno 
740f6f6703fSSean Bruno 			/*
741f6f6703fSSean Bruno 			 * Reduce the MSS to blackhole value or to the default
742f6f6703fSSean Bruno 			 * in an attempt to retransmit.
743f6f6703fSSean Bruno 			 */
744f6f6703fSSean Bruno #ifdef INET6
745f6f6703fSSean Bruno 			if (isipv6 &&
746b89af8e1SMichael Tuexen 			    tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss &&
747b89af8e1SMichael Tuexen 			    V_tcp_v6pmtud_blackhole_mss > V_tcp_v6mssdflt) {
748f6f6703fSSean Bruno 				/* Use the sysctl tuneable blackhole MSS. */
7490c39d38dSGleb Smirnoff 				tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss;
75032a04bb8SSean Bruno 				TCPSTAT_INC(tcps_pmtud_blackhole_activated);
751f6f6703fSSean Bruno 			} else if (isipv6) {
752f6f6703fSSean Bruno 				/* Use the default MSS. */
7530c39d38dSGleb Smirnoff 				tp->t_maxseg = V_tcp_v6mssdflt;
754f6f6703fSSean Bruno 				/*
755f6f6703fSSean Bruno 				 * Disable Path MTU Discovery when we switch to
756f6f6703fSSean Bruno 				 * minmss.
757f6f6703fSSean Bruno 				 */
758f6f6703fSSean Bruno 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
75932a04bb8SSean Bruno 				TCPSTAT_INC(tcps_pmtud_blackhole_activated_min_mss);
760f6f6703fSSean Bruno 			}
761f6f6703fSSean Bruno #endif
762f6f6703fSSean Bruno #if defined(INET6) && defined(INET)
763f6f6703fSSean Bruno 			else
764f6f6703fSSean Bruno #endif
765f6f6703fSSean Bruno #ifdef INET
766b89af8e1SMichael Tuexen 			if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss &&
767b89af8e1SMichael Tuexen 			    V_tcp_pmtud_blackhole_mss > V_tcp_mssdflt) {
768f6f6703fSSean Bruno 				/* Use the sysctl tuneable blackhole MSS. */
7690c39d38dSGleb Smirnoff 				tp->t_maxseg = V_tcp_pmtud_blackhole_mss;
77032a04bb8SSean Bruno 				TCPSTAT_INC(tcps_pmtud_blackhole_activated);
771f6f6703fSSean Bruno 			} else {
772f6f6703fSSean Bruno 				/* Use the default MSS. */
7730c39d38dSGleb Smirnoff 				tp->t_maxseg = V_tcp_mssdflt;
774f6f6703fSSean Bruno 				/*
775f6f6703fSSean Bruno 				 * Disable Path MTU Discovery when we switch to
776f6f6703fSSean Bruno 				 * minmss.
777f6f6703fSSean Bruno 				 */
778f6f6703fSSean Bruno 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
77932a04bb8SSean Bruno 				TCPSTAT_INC(tcps_pmtud_blackhole_activated_min_mss);
780f6f6703fSSean Bruno 			}
781f6f6703fSSean Bruno #endif
782f6f6703fSSean Bruno 			/*
783f6f6703fSSean Bruno 			 * Reset the slow-start flight size
784f6f6703fSSean Bruno 			 * as it may depend on the new MSS.
785f6f6703fSSean Bruno 			 */
786f6f6703fSSean Bruno 			if (CC_ALGO(tp)->conn_init != NULL)
787f6f6703fSSean Bruno 				CC_ALGO(tp)->conn_init(tp->ccv);
788f6f6703fSSean Bruno 		} else {
789f6f6703fSSean Bruno 			/*
790f6f6703fSSean Bruno 			 * If further retransmissions are still unsuccessful
791f6f6703fSSean Bruno 			 * with a lowered MTU, maybe this isn't a blackhole and
792f6f6703fSSean Bruno 			 * we restore the previous MSS and blackhole detection
793f6f6703fSSean Bruno 			 * flags.
794f6f6703fSSean Bruno 			 */
795f6f6703fSSean Bruno 			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
796b89af8e1SMichael Tuexen 			    (tp->t_rxtshift >= tp->t_blackhole_exit)) {
797f6f6703fSSean Bruno 				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
798f6f6703fSSean Bruno 				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
7990c39d38dSGleb Smirnoff 				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
80032a04bb8SSean Bruno 				TCPSTAT_INC(tcps_pmtud_blackhole_failed);
801f6f6703fSSean Bruno 				/*
802f6f6703fSSean Bruno 				 * Reset the slow-start flight size as it
803f6f6703fSSean Bruno 				 * may depend on the new MSS.
804f6f6703fSSean Bruno 				 */
805f6f6703fSSean Bruno 				if (CC_ALGO(tp)->conn_init != NULL)
806f6f6703fSSean Bruno 					CC_ALGO(tp)->conn_init(tp->ccv);
807f6f6703fSSean Bruno 			}
808f6f6703fSSean Bruno 		}
809f6f6703fSSean Bruno 	}
810f6f6703fSSean Bruno 
811df8bae1dSRodney W. Grimes 	/*
81277339e1cSAndre Oppermann 	 * Disable RFC1323 and SACK if we haven't got any response to
8137ceb7783SJesper Skriver 	 * our third SYN to work-around some broken terminal servers
8147ceb7783SJesper Skriver 	 * (most of which have hopefully been retired) that have bad VJ
8157ceb7783SJesper Skriver 	 * header compression code which trashes TCP segments containing
8167ceb7783SJesper Skriver 	 * unknown-to-them TCP options.
8177ceb7783SJesper Skriver 	 */
8186c0ef895SJohn Baldwin 	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
8196c0ef895SJohn Baldwin 	    (tp->t_rxtshift == 3))
820c4ab59c1SAndre Oppermann 		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
8217ceb7783SJesper Skriver 	/*
8225ede40dcSRyan Stone 	 * If we backed off this far, notify the L3 protocol that we're having
8235ede40dcSRyan Stone 	 * connection problems.
824df8bae1dSRodney W. Grimes 	 */
8255ede40dcSRyan Stone 	if (tp->t_rxtshift > TCP_RTT_INVALIDATE) {
826fb59c426SYoshinobu Inoue #ifdef INET6
827fb59c426SYoshinobu Inoue 		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
828fb59c426SYoshinobu Inoue 			in6_losing(tp->t_inpcb);
82984cc0778SGeorge V. Neville-Neil 		else
830fb59c426SYoshinobu Inoue #endif
83184cc0778SGeorge V. Neville-Neil 			in_losing(tp->t_inpcb);
832df8bae1dSRodney W. Grimes 	}
833df8bae1dSRodney W. Grimes 	tp->snd_nxt = tp->snd_una;
8349d11646dSJeffrey Hsu 	tp->snd_recover = tp->snd_max;
83546f58482SJonathan Lemon 	/*
83674b48c1dSAndras Olah 	 * Force a segment to be sent.
83774b48c1dSAndras Olah 	 */
83874b48c1dSAndras Olah 	tp->t_flags |= TF_ACKNOW;
83974b48c1dSAndras Olah 	/*
840df8bae1dSRodney W. Grimes 	 * If timing a segment in this window, stop the timer.
841df8bae1dSRodney W. Grimes 	 */
8429b8b58e0SJonathan Lemon 	tp->t_rtttime = 0;
843dbc42409SLawrence Stewart 
844b5af1b88SLawrence Stewart 	cc_cong_signal(tp, NULL, CC_RTO);
845109eb549SGleb Smirnoff 	NET_EPOCH_ENTER(et);
84655bceb1eSRandall Stewart 	(void) tp->t_fb->tfb_tcp_output(tp);
847109eb549SGleb Smirnoff 	NET_EPOCH_EXIT(et);
8489b8b58e0SJonathan Lemon #ifdef TCPDEBUG
8491c53f806SRobert Watson 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
850fb59c426SYoshinobu Inoue 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
8519b8b58e0SJonathan Lemon 			  PRU_SLOWTIMO);
852df8bae1dSRodney W. Grimes #endif
8535d06879aSGeorge V. Neville-Neil 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
8548501a69cSRobert Watson 	INP_WUNLOCK(inp);
855b07fef50SRandall Stewart out:
8568b615593SMarko Zec 	CURVNET_RESTORE();
85785d94372SRobert Watson }
85885d94372SRobert Watson 
85985d94372SRobert Watson void
8605571f9cfSJulien Charbon tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
86185d94372SRobert Watson {
86285d94372SRobert Watson 	struct callout *t_callout;
8635773ac11SJohn Baldwin 	callout_func_t *f_callout;
86487aedea4SKip Macy 	struct inpcb *inp = tp->t_inpcb;
865883831c6SAdrian Chadd 	int cpu = inp_to_cpuid(inp);
86685d94372SRobert Watson 
86709fe6320SNavdeep Parhar #ifdef TCP_OFFLOAD
86809fe6320SNavdeep Parhar 	if (tp->t_flags & TF_TOE)
86909fe6320SNavdeep Parhar 		return;
87009fe6320SNavdeep Parhar #endif
87109fe6320SNavdeep Parhar 
8725571f9cfSJulien Charbon 	if (tp->t_timers->tt_flags & TT_STOPPED)
8735571f9cfSJulien Charbon 		return;
8745571f9cfSJulien Charbon 
87585d94372SRobert Watson 	switch (timer_type) {
87685d94372SRobert Watson 		case TT_DELACK:
877e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_delack;
87885d94372SRobert Watson 			f_callout = tcp_timer_delack;
87985d94372SRobert Watson 			break;
88085d94372SRobert Watson 		case TT_REXMT:
881e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_rexmt;
88285d94372SRobert Watson 			f_callout = tcp_timer_rexmt;
88385d94372SRobert Watson 			break;
88485d94372SRobert Watson 		case TT_PERSIST:
885e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_persist;
88685d94372SRobert Watson 			f_callout = tcp_timer_persist;
88785d94372SRobert Watson 			break;
88885d94372SRobert Watson 		case TT_KEEP:
889e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_keep;
89085d94372SRobert Watson 			f_callout = tcp_timer_keep;
89185d94372SRobert Watson 			break;
89285d94372SRobert Watson 		case TT_2MSL:
893e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_2msl;
89485d94372SRobert Watson 			f_callout = tcp_timer_2msl;
89585d94372SRobert Watson 			break;
89685d94372SRobert Watson 		default:
89755bceb1eSRandall Stewart 			if (tp->t_fb->tfb_tcp_timer_activate) {
89855bceb1eSRandall Stewart 				tp->t_fb->tfb_tcp_timer_activate(tp, timer_type, delta);
89955bceb1eSRandall Stewart 				return;
90055bceb1eSRandall Stewart 			}
90103374917SJulien Charbon 			panic("tp %p bad timer_type %#x", tp, timer_type);
90285d94372SRobert Watson 		}
90385d94372SRobert Watson 	if (delta == 0) {
904b07fef50SRandall Stewart 		callout_stop(t_callout);
90585d94372SRobert Watson 	} else {
90687aedea4SKip Macy 		callout_reset_on(t_callout, delta, f_callout, tp, cpu);
90785d94372SRobert Watson 	}
90885d94372SRobert Watson }
90985d94372SRobert Watson 
91085d94372SRobert Watson int
9115571f9cfSJulien Charbon tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
91285d94372SRobert Watson {
91385d94372SRobert Watson 	struct callout *t_callout;
91485d94372SRobert Watson 
91585d94372SRobert Watson 	switch (timer_type) {
91685d94372SRobert Watson 		case TT_DELACK:
917e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_delack;
91885d94372SRobert Watson 			break;
91985d94372SRobert Watson 		case TT_REXMT:
920e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_rexmt;
92185d94372SRobert Watson 			break;
92285d94372SRobert Watson 		case TT_PERSIST:
923e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_persist;
92485d94372SRobert Watson 			break;
92585d94372SRobert Watson 		case TT_KEEP:
926e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_keep;
92785d94372SRobert Watson 			break;
92885d94372SRobert Watson 		case TT_2MSL:
929e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_2msl;
93085d94372SRobert Watson 			break;
93185d94372SRobert Watson 		default:
93255bceb1eSRandall Stewart 			if (tp->t_fb->tfb_tcp_timer_active) {
93355bceb1eSRandall Stewart 				return(tp->t_fb->tfb_tcp_timer_active(tp, timer_type));
93455bceb1eSRandall Stewart 			}
93503374917SJulien Charbon 			panic("tp %p bad timer_type %#x", tp, timer_type);
93685d94372SRobert Watson 		}
93785d94372SRobert Watson 	return callout_active(t_callout);
938df8bae1dSRodney W. Grimes }
939b8614722SMike Silbersack 
94089e560f4SRandall Stewart /*
94189e560f4SRandall Stewart  * Stop the timer from running, and apply a flag
94289e560f4SRandall Stewart  * against the timer_flags that will force the
94389e560f4SRandall Stewart  * timer never to run. The flag is needed to assure
94489e560f4SRandall Stewart  * a race does not leave it running and cause
94589e560f4SRandall Stewart  * the timer to possibly restart itself (keep and persist
94689e560f4SRandall Stewart  * especially do this).
94789e560f4SRandall Stewart  */
94889e560f4SRandall Stewart int
94989e560f4SRandall Stewart tcp_timer_suspend(struct tcpcb *tp, uint32_t timer_type)
95089e560f4SRandall Stewart {
95189e560f4SRandall Stewart 	struct callout *t_callout;
95289e560f4SRandall Stewart 	uint32_t t_flags;
95389e560f4SRandall Stewart 
95489e560f4SRandall Stewart 	switch (timer_type) {
95589e560f4SRandall Stewart 		case TT_DELACK:
95689e560f4SRandall Stewart 			t_flags = TT_DELACK_SUS;
95789e560f4SRandall Stewart 			t_callout = &tp->t_timers->tt_delack;
95889e560f4SRandall Stewart 			break;
95989e560f4SRandall Stewart 		case TT_REXMT:
96089e560f4SRandall Stewart 			t_flags = TT_REXMT_SUS;
96189e560f4SRandall Stewart 			t_callout = &tp->t_timers->tt_rexmt;
96289e560f4SRandall Stewart 			break;
96389e560f4SRandall Stewart 		case TT_PERSIST:
96489e560f4SRandall Stewart 			t_flags = TT_PERSIST_SUS;
96589e560f4SRandall Stewart 			t_callout = &tp->t_timers->tt_persist;
96689e560f4SRandall Stewart 			break;
96789e560f4SRandall Stewart 		case TT_KEEP:
96889e560f4SRandall Stewart 			t_flags = TT_KEEP_SUS;
96989e560f4SRandall Stewart 			t_callout = &tp->t_timers->tt_keep;
97089e560f4SRandall Stewart 			break;
97189e560f4SRandall Stewart 		case TT_2MSL:
97289e560f4SRandall Stewart 			t_flags = TT_2MSL_SUS;
97389e560f4SRandall Stewart 			t_callout = &tp->t_timers->tt_2msl;
97489e560f4SRandall Stewart 			break;
97589e560f4SRandall Stewart 		default:
97689e560f4SRandall Stewart 			panic("tp:%p bad timer_type 0x%x", tp, timer_type);
97789e560f4SRandall Stewart 	}
97889e560f4SRandall Stewart 	tp->t_timers->tt_flags |= t_flags;
97989e560f4SRandall Stewart 	return (callout_stop(t_callout));
98089e560f4SRandall Stewart }
98189e560f4SRandall Stewart 
98289e560f4SRandall Stewart void
98389e560f4SRandall Stewart tcp_timers_unsuspend(struct tcpcb *tp, uint32_t timer_type)
98489e560f4SRandall Stewart {
98589e560f4SRandall Stewart 	switch (timer_type) {
98689e560f4SRandall Stewart 		case TT_DELACK:
98789e560f4SRandall Stewart 			if (tp->t_timers->tt_flags & TT_DELACK_SUS) {
98889e560f4SRandall Stewart 				tp->t_timers->tt_flags &= ~TT_DELACK_SUS;
98989e560f4SRandall Stewart 				if (tp->t_flags & TF_DELACK) {
99089e560f4SRandall Stewart 					/* Delayed ack timer should be up activate a timer */
99189e560f4SRandall Stewart 					tp->t_flags &= ~TF_DELACK;
99289e560f4SRandall Stewart 					tcp_timer_activate(tp, TT_DELACK,
99389e560f4SRandall Stewart 					    tcp_delacktime);
99489e560f4SRandall Stewart 				}
99589e560f4SRandall Stewart 			}
99689e560f4SRandall Stewart 			break;
99789e560f4SRandall Stewart 		case TT_REXMT:
99889e560f4SRandall Stewart 			if (tp->t_timers->tt_flags & TT_REXMT_SUS) {
99989e560f4SRandall Stewart 				tp->t_timers->tt_flags &= ~TT_REXMT_SUS;
100089e560f4SRandall Stewart 				if (SEQ_GT(tp->snd_max, tp->snd_una) &&
100189e560f4SRandall Stewart 				    (tcp_timer_active((tp), TT_PERSIST) == 0) &&
100289e560f4SRandall Stewart 				    tp->snd_wnd) {
100389e560f4SRandall Stewart 					/* We have outstanding data activate a timer */
100489e560f4SRandall Stewart 					tcp_timer_activate(tp, TT_REXMT,
100589e560f4SRandall Stewart                                             tp->t_rxtcur);
100689e560f4SRandall Stewart 				}
100789e560f4SRandall Stewart 			}
100889e560f4SRandall Stewart 			break;
100989e560f4SRandall Stewart 		case TT_PERSIST:
101089e560f4SRandall Stewart 			if (tp->t_timers->tt_flags & TT_PERSIST_SUS) {
101189e560f4SRandall Stewart 				tp->t_timers->tt_flags &= ~TT_PERSIST_SUS;
101289e560f4SRandall Stewart 				if (tp->snd_wnd == 0) {
101389e560f4SRandall Stewart 					/* Activate the persists timer */
101489e560f4SRandall Stewart 					tp->t_rxtshift = 0;
101589e560f4SRandall Stewart 					tcp_setpersist(tp);
101689e560f4SRandall Stewart 				}
101789e560f4SRandall Stewart 			}
101889e560f4SRandall Stewart 			break;
101989e560f4SRandall Stewart 		case TT_KEEP:
102089e560f4SRandall Stewart 			if (tp->t_timers->tt_flags & TT_KEEP_SUS) {
102189e560f4SRandall Stewart 				tp->t_timers->tt_flags &= ~TT_KEEP_SUS;
102289e560f4SRandall Stewart 				tcp_timer_activate(tp, TT_KEEP,
102389e560f4SRandall Stewart 					    TCPS_HAVEESTABLISHED(tp->t_state) ?
102489e560f4SRandall Stewart 					    TP_KEEPIDLE(tp) : TP_KEEPINIT(tp));
102589e560f4SRandall Stewart 			}
102689e560f4SRandall Stewart 			break;
102789e560f4SRandall Stewart 		case TT_2MSL:
102889e560f4SRandall Stewart 			if (tp->t_timers->tt_flags &= TT_2MSL_SUS) {
102989e560f4SRandall Stewart 				tp->t_timers->tt_flags &= ~TT_2MSL_SUS;
103089e560f4SRandall Stewart 				if ((tp->t_state == TCPS_FIN_WAIT_2) &&
103189e560f4SRandall Stewart 				    ((tp->t_inpcb->inp_socket == NULL) ||
103289e560f4SRandall Stewart 				     (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE))) {
103389e560f4SRandall Stewart 					/* Star the 2MSL timer */
103489e560f4SRandall Stewart 					tcp_timer_activate(tp, TT_2MSL,
103589e560f4SRandall Stewart 					    (tcp_fast_finwait2_recycle) ?
103689e560f4SRandall Stewart 					    tcp_finwait2_timeout : TP_MAXIDLE(tp));
103789e560f4SRandall Stewart 				}
103889e560f4SRandall Stewart 			}
103989e560f4SRandall Stewart 			break;
104089e560f4SRandall Stewart 		default:
104189e560f4SRandall Stewart 			panic("tp:%p bad timer_type 0x%x", tp, timer_type);
104289e560f4SRandall Stewart 	}
104389e560f4SRandall Stewart }
104489e560f4SRandall Stewart 
1045*ff945008SGleb Smirnoff static void
1046*ff945008SGleb Smirnoff tcp_timer_discard(void *ptp)
1047*ff945008SGleb Smirnoff {
1048*ff945008SGleb Smirnoff 	struct inpcb *inp;
1049*ff945008SGleb Smirnoff 	struct tcpcb *tp;
1050*ff945008SGleb Smirnoff 	struct epoch_tracker et;
1051*ff945008SGleb Smirnoff 
1052*ff945008SGleb Smirnoff 	tp = (struct tcpcb *)ptp;
1053*ff945008SGleb Smirnoff 	CURVNET_SET(tp->t_vnet);
1054*ff945008SGleb Smirnoff 	NET_EPOCH_ENTER(et);
1055*ff945008SGleb Smirnoff 	inp = tp->t_inpcb;
1056*ff945008SGleb Smirnoff 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
1057*ff945008SGleb Smirnoff 		__func__, tp));
1058*ff945008SGleb Smirnoff 	INP_WLOCK(inp);
1059*ff945008SGleb Smirnoff 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0,
1060*ff945008SGleb Smirnoff 		("%s: tcpcb has to be stopped here", __func__));
1061*ff945008SGleb Smirnoff 	if (--tp->t_timers->tt_draincnt > 0 ||
1062*ff945008SGleb Smirnoff 	    tcp_freecb(tp) == false)
1063*ff945008SGleb Smirnoff 		INP_WUNLOCK(inp);
1064*ff945008SGleb Smirnoff 	NET_EPOCH_EXIT(et);
1065*ff945008SGleb Smirnoff 	CURVNET_RESTORE();
1066*ff945008SGleb Smirnoff }
1067*ff945008SGleb Smirnoff 
10685571f9cfSJulien Charbon void
10695571f9cfSJulien Charbon tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
10705571f9cfSJulien Charbon {
10715571f9cfSJulien Charbon 	struct callout *t_callout;
10725571f9cfSJulien Charbon 
10735571f9cfSJulien Charbon 	tp->t_timers->tt_flags |= TT_STOPPED;
10745571f9cfSJulien Charbon 	switch (timer_type) {
10755571f9cfSJulien Charbon 		case TT_DELACK:
10765571f9cfSJulien Charbon 			t_callout = &tp->t_timers->tt_delack;
10775571f9cfSJulien Charbon 			break;
10785571f9cfSJulien Charbon 		case TT_REXMT:
10795571f9cfSJulien Charbon 			t_callout = &tp->t_timers->tt_rexmt;
10805571f9cfSJulien Charbon 			break;
10815571f9cfSJulien Charbon 		case TT_PERSIST:
10825571f9cfSJulien Charbon 			t_callout = &tp->t_timers->tt_persist;
10835571f9cfSJulien Charbon 			break;
10845571f9cfSJulien Charbon 		case TT_KEEP:
10855571f9cfSJulien Charbon 			t_callout = &tp->t_timers->tt_keep;
10865571f9cfSJulien Charbon 			break;
10875571f9cfSJulien Charbon 		case TT_2MSL:
10885571f9cfSJulien Charbon 			t_callout = &tp->t_timers->tt_2msl;
10895571f9cfSJulien Charbon 			break;
10905571f9cfSJulien Charbon 		default:
109155bceb1eSRandall Stewart 			if (tp->t_fb->tfb_tcp_timer_stop) {
109255bceb1eSRandall Stewart 				/*
109355bceb1eSRandall Stewart 				 * XXXrrs we need to look at this with the
109455bceb1eSRandall Stewart 				 * stop case below (flags).
109555bceb1eSRandall Stewart 				 */
109655bceb1eSRandall Stewart 				tp->t_fb->tfb_tcp_timer_stop(tp, timer_type);
109755bceb1eSRandall Stewart 				return;
109855bceb1eSRandall Stewart 			}
10995571f9cfSJulien Charbon 			panic("tp %p bad timer_type %#x", tp, timer_type);
11005571f9cfSJulien Charbon 		}
11015571f9cfSJulien Charbon 
1102e5ad6456SRandall Stewart 	if (callout_async_drain(t_callout, tcp_timer_discard) == 0) {
11035571f9cfSJulien Charbon 		/*
11045571f9cfSJulien Charbon 		 * Can't stop the callout, defer tcpcb actual deletion
1105e5ad6456SRandall Stewart 		 * to the last one. We do this using the async drain
1106e5ad6456SRandall Stewart 		 * function and incrementing the count in
11075571f9cfSJulien Charbon 		 */
1108e5ad6456SRandall Stewart 		tp->t_timers->tt_draincnt++;
11095571f9cfSJulien Charbon 	}
11105571f9cfSJulien Charbon }
1111