xref: /freebsd/sys/netinet/tcp_timer.c (revision 77198a945ae5dc04ac46f379cb624208a1453c65)
1c398230bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4e79adb8eSGarrett Wollman  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  *
31e79adb8eSGarrett Wollman  *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
32df8bae1dSRodney W. Grimes  */
33df8bae1dSRodney W. Grimes 
344b421e2dSMike Silbersack #include <sys/cdefs.h>
354b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
364b421e2dSMike Silbersack 
37825fd1e4SNavdeep Parhar #include "opt_inet.h"
38fb59c426SYoshinobu Inoue #include "opt_inet6.h"
390cc12cc5SJoerg Wunsch #include "opt_tcpdebug.h"
40883831c6SAdrian Chadd #include "opt_rss.h"
410cc12cc5SJoerg Wunsch 
42df8bae1dSRodney W. Grimes #include <sys/param.h>
4398163b98SPoul-Henning Kamp #include <sys/kernel.h>
44c74af4faSBruce Evans #include <sys/lock.h>
4508517d53SMike Silbersack #include <sys/mbuf.h>
46c74af4faSBruce Evans #include <sys/mutex.h>
47c74af4faSBruce Evans #include <sys/protosw.h>
4887aedea4SKip Macy #include <sys/smp.h>
49df8bae1dSRodney W. Grimes #include <sys/socket.h>
50df8bae1dSRodney W. Grimes #include <sys/socketvar.h>
51c74af4faSBruce Evans #include <sys/sysctl.h>
52c74af4faSBruce Evans #include <sys/systm.h>
53e79adb8eSGarrett Wollman 
544b79449eSBjoern A. Zeeb #include <net/if.h>
55df8bae1dSRodney W. Grimes #include <net/route.h>
56b2bdc62aSAdrian Chadd #include <net/rss_config.h>
57530c0060SRobert Watson #include <net/vnet.h>
58883831c6SAdrian Chadd #include <net/netisr.h>
59df8bae1dSRodney W. Grimes 
60df8bae1dSRodney W. Grimes #include <netinet/in.h>
615d06879aSGeorge V. Neville-Neil #include <netinet/in_kdtrace.h>
62df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
63883831c6SAdrian Chadd #include <netinet/in_rss.h>
64c74af4faSBruce Evans #include <netinet/in_systm.h>
65fb59c426SYoshinobu Inoue #ifdef INET6
66fb59c426SYoshinobu Inoue #include <netinet6/in6_pcb.h>
67fb59c426SYoshinobu Inoue #endif
68df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
692de3e790SGleb Smirnoff #include <netinet/tcp.h>
70df8bae1dSRodney W. Grimes #include <netinet/tcp_fsm.h>
712529f56eSJonathan T. Looney #include <netinet/tcp_log_buf.h>
72df8bae1dSRodney W. Grimes #include <netinet/tcp_timer.h>
73df8bae1dSRodney W. Grimes #include <netinet/tcp_var.h>
7489e560f4SRandall Stewart #include <netinet/tcp_seq.h>
754644fda3SGleb Smirnoff #include <netinet/cc/cc.h>
76f6f6703fSSean Bruno #ifdef INET6
77f6f6703fSSean Bruno #include <netinet6/tcp6_var.h>
78f6f6703fSSean Bruno #endif
79df8bae1dSRodney W. Grimes #include <netinet/tcpip.h>
80af7a2999SDavid Greenman #include <netinet/tcp_debug.h>
81df8bae1dSRodney W. Grimes 
820645c604SHiren Panchasara int    tcp_persmin;
837029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin,
847029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
857029da5cSPawel Biernacki     &tcp_persmin, 0, sysctl_msec_to_ticks, "I",
867029da5cSPawel Biernacki     "minimum persistence interval");
870645c604SHiren Panchasara 
880645c604SHiren Panchasara int    tcp_persmax;
897029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax,
907029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
917029da5cSPawel Biernacki     &tcp_persmax, 0, sysctl_msec_to_ticks, "I",
927029da5cSPawel Biernacki     "maximum persistence interval");
930645c604SHiren Panchasara 
949b8b58e0SJonathan Lemon int	tcp_keepinit;
957029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit,
967029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
977029da5cSPawel Biernacki     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I",
987029da5cSPawel Biernacki     "time to establish connection");
997b40aa32SPaul Traina 
1009b8b58e0SJonathan Lemon int	tcp_keepidle;
1017029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle,
1027029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1037029da5cSPawel Biernacki     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I",
1047029da5cSPawel Biernacki     "time before keepalive probes begin");
10598163b98SPoul-Henning Kamp 
1069b8b58e0SJonathan Lemon int	tcp_keepintvl;
1077029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl,
1087029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1097029da5cSPawel Biernacki     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I",
1107029da5cSPawel Biernacki     "time between keepalive probes");
11198163b98SPoul-Henning Kamp 
1129b8b58e0SJonathan Lemon int	tcp_delacktime;
1137029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime,
1147029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1156489fe65SAndre Oppermann     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
116ccb4d0c6SJonathan Lemon     "Time before a delayed ACK is sent");
1179b8b58e0SJonathan Lemon 
118c2c8e360SAlexander V. Chernikov VNET_DEFINE(int, tcp_msl);
1197029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl,
120c2c8e360SAlexander V. Chernikov     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET,
121c2c8e360SAlexander V. Chernikov     &VNET_NAME(tcp_msl), 0, sysctl_msec_to_ticks, "I",
1227029da5cSPawel Biernacki     "Maximum segment lifetime");
1239b8b58e0SJonathan Lemon 
1240999766dSMichael Tuexen int	tcp_rexmit_initial;
1257029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_initial,
1267029da5cSPawel Biernacki    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1270999766dSMichael Tuexen     &tcp_rexmit_initial, 0, sysctl_msec_to_ticks, "I",
1280999766dSMichael Tuexen     "Initial Retransmission Timeout");
1290999766dSMichael Tuexen 
130701bec5aSMatthew Dillon int	tcp_rexmit_min;
1317029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min,
1327029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1336489fe65SAndre Oppermann     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
1346489fe65SAndre Oppermann     "Minimum Retransmission Timeout");
135701bec5aSMatthew Dillon 
136701bec5aSMatthew Dillon int	tcp_rexmit_slop;
1377029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop,
1387029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1396489fe65SAndre Oppermann     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
1406489fe65SAndre Oppermann     "Retransmission Timer Slop");
141701bec5aSMatthew Dillon 
142334fc582SBjoern A. Zeeb VNET_DEFINE(int, tcp_always_keepalive) = 1;
143334fc582SBjoern A. Zeeb SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_VNET|CTLFLAG_RW,
144334fc582SBjoern A. Zeeb     &VNET_NAME(tcp_always_keepalive) , 0,
145334fc582SBjoern A. Zeeb     "Assume SO_KEEPALIVE on all TCP connections");
14634be9bf3SPoul-Henning Kamp 
1477c72af87SMohan Srinivasan int    tcp_fast_finwait2_recycle = 0;
1487c72af87SMohan Srinivasan SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
1496489fe65SAndre Oppermann     &tcp_fast_finwait2_recycle, 0,
1506489fe65SAndre Oppermann     "Recycle closed FIN_WAIT_2 connections faster");
1517c72af87SMohan Srinivasan 
1527c72af87SMohan Srinivasan int    tcp_finwait2_timeout;
1537029da5cSPawel Biernacki SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout,
1547029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
1557029da5cSPawel Biernacki     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I",
1567029da5cSPawel Biernacki     "FIN-WAIT2 timeout");
1577c72af87SMohan Srinivasan 
1589077f387SGleb Smirnoff int	tcp_keepcnt = TCPTV_KEEPCNT;
1599077f387SGleb Smirnoff SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
1609077f387SGleb Smirnoff     "Number of keepalive probes to send");
1617c72af87SMohan Srinivasan 
1620312fbe9SPoul-Henning Kamp 	/* max idle probes */
1639b8b58e0SJonathan Lemon int	tcp_maxpersistidle;
164e79adb8eSGarrett Wollman 
16589e560f4SRandall Stewart int	tcp_rexmit_drop_options = 0;
1666c0ef895SJohn Baldwin SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
1676c0ef895SJohn Baldwin     &tcp_rexmit_drop_options, 0,
1686c0ef895SJohn Baldwin     "Drop TCP options from 3rd and later retransmitted SYN");
1696c0ef895SJohn Baldwin 
17008af8aacSRandall Stewart int	tcp_maxunacktime = TCPTV_MAXUNACKTIME;
17108af8aacSRandall Stewart SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxunacktime,
17208af8aacSRandall Stewart     CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_NEEDGIANT,
17308af8aacSRandall Stewart     &tcp_maxunacktime, 0, sysctl_msec_to_ticks, "I",
17408af8aacSRandall Stewart     "Maximum time (in ms) that a session can linger without making progress");
17508af8aacSRandall Stewart 
176e29c55e4SGleb Smirnoff VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
177f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
178f0188618SHans Petter Selasky     CTLFLAG_RW|CTLFLAG_VNET,
179f6f6703fSSean Bruno     &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
180f6f6703fSSean Bruno     "Path MTU Discovery Black Hole Detection Enabled");
181f6f6703fSSean Bruno 
182f6f6703fSSean Bruno #ifdef INET
183e29c55e4SGleb Smirnoff VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
184f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
185f0188618SHans Petter Selasky     CTLFLAG_RW|CTLFLAG_VNET,
186f6f6703fSSean Bruno     &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
187f6f6703fSSean Bruno     "Path MTU Discovery Black Hole Detection lowered MSS");
188f6f6703fSSean Bruno #endif
189f6f6703fSSean Bruno 
190f6f6703fSSean Bruno #ifdef INET6
191e29c55e4SGleb Smirnoff VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
192f6f6703fSSean Bruno SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
193f0188618SHans Petter Selasky     CTLFLAG_RW|CTLFLAG_VNET,
194f6f6703fSSean Bruno     &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
195f6f6703fSSean Bruno     "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
196f6f6703fSSean Bruno #endif
197f6f6703fSSean Bruno 
1988f7e75cbSAdrian Chadd #ifdef	RSS
1998f7e75cbSAdrian Chadd static int	per_cpu_timers = 1;
2008f7e75cbSAdrian Chadd #else
20187aedea4SKip Macy static int	per_cpu_timers = 0;
2028f7e75cbSAdrian Chadd #endif
20387aedea4SKip Macy SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
20487aedea4SKip Macy     &per_cpu_timers , 0, "run tcp timers on all cpus");
20587aedea4SKip Macy 
206883831c6SAdrian Chadd /*
207883831c6SAdrian Chadd  * Map the given inp to a CPU id.
208883831c6SAdrian Chadd  *
209883831c6SAdrian Chadd  * This queries RSS if it's compiled in, else it defaults to the current
210883831c6SAdrian Chadd  * CPU ID.
211883831c6SAdrian Chadd  */
21289e560f4SRandall Stewart inline int
213883831c6SAdrian Chadd inp_to_cpuid(struct inpcb *inp)
214883831c6SAdrian Chadd {
215883831c6SAdrian Chadd 	u_int cpuid;
216883831c6SAdrian Chadd 
217883831c6SAdrian Chadd 	if (per_cpu_timers) {
21847ded797SFranco Fichtner #ifdef	RSS
219883831c6SAdrian Chadd 		cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
220883831c6SAdrian Chadd 		if (cpuid == NETISR_CPUID_NONE)
221883831c6SAdrian Chadd 			return (curcpu);	/* XXX */
222883831c6SAdrian Chadd 		else
223883831c6SAdrian Chadd 			return (cpuid);
22447ded797SFranco Fichtner #endif
225883831c6SAdrian Chadd 		/*
226883831c6SAdrian Chadd 		 * We don't have a flowid -> cpuid mapping, so cheat and
227883831c6SAdrian Chadd 		 * just map unknown cpuids to curcpu.  Not the best, but
228883831c6SAdrian Chadd 		 * apparently better than defaulting to swi 0.
229883831c6SAdrian Chadd 		 */
230883831c6SAdrian Chadd 		cpuid = inp->inp_flowid % (mp_maxid + 1);
231883831c6SAdrian Chadd 		if (! CPU_ABSENT(cpuid))
232883831c6SAdrian Chadd 			return (cpuid);
233883831c6SAdrian Chadd 		return (curcpu);
23447ded797SFranco Fichtner 	} else {
235883831c6SAdrian Chadd 		return (0);
236883831c6SAdrian Chadd 	}
237883831c6SAdrian Chadd }
23887aedea4SKip Macy 
239df8bae1dSRodney W. Grimes /*
2406c452841SGleb Smirnoff  * Legacy TCP global callout routine called every 500 ms.
2416c452841SGleb Smirnoff  * Used to cleanup timewait states, which lack their own callouts.
242df8bae1dSRodney W. Grimes  */
2436c452841SGleb Smirnoff static struct callout tcpslow_callout;
2446c452841SGleb Smirnoff static void
2456c452841SGleb Smirnoff tcp_slowtimo(void *arg __unused)
246df8bae1dSRodney W. Grimes {
2476c452841SGleb Smirnoff 	struct epoch_tracker et;
2488b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
24915bd2b43SDavid Greenman 
2506c452841SGleb Smirnoff 	NET_EPOCH_ENTER(et);
2515ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
2528b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
2538b615593SMarko Zec 		CURVNET_SET(vnet_iter);
254cea40c48SJulien Charbon 		(void) tcp_tw_2msl_scan(0);
2558b615593SMarko Zec 		CURVNET_RESTORE();
2568b615593SMarko Zec 	}
2575ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
2586c452841SGleb Smirnoff 	NET_EPOCH_EXIT(et);
2596c452841SGleb Smirnoff 
2606c452841SGleb Smirnoff 	callout_reset_sbt(&tcpslow_callout, SBT_1MS * 500, SBT_1MS * 10,
2616c452841SGleb Smirnoff 	    tcp_slowtimo, NULL, 0);
262df8bae1dSRodney W. Grimes }
263df8bae1dSRodney W. Grimes 
2646c452841SGleb Smirnoff static void
2656c452841SGleb Smirnoff tcp_slowtimo_init(void *arg __unused)
2666c452841SGleb Smirnoff {
2676c452841SGleb Smirnoff 
2686c452841SGleb Smirnoff         callout_init(&tcpslow_callout, 1);
2696c452841SGleb Smirnoff 	callout_reset_sbt(&tcpslow_callout, SBT_1MS * 500, SBT_1MS * 10,
2706c452841SGleb Smirnoff 	    tcp_slowtimo, NULL, 0);
2716c452841SGleb Smirnoff }
2726c452841SGleb Smirnoff SYSINIT(tcp_timer, SI_SUB_VNET_DONE, SI_ORDER_ANY, tcp_slowtimo_init, NULL);
2736c452841SGleb Smirnoff 
274df8bae1dSRodney W. Grimes int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
275f058535dSJeffrey Hsu     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
276df8bae1dSRodney W. Grimes 
27789e560f4SRandall Stewart int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
278e79adb8eSGarrett Wollman 
279df8bae1dSRodney W. Grimes /*
280df8bae1dSRodney W. Grimes  * TCP timer processing.
281df8bae1dSRodney W. Grimes  */
28285d94372SRobert Watson 
28385d94372SRobert Watson void
28485d94372SRobert Watson tcp_timer_delack(void *xtp)
285df8bae1dSRodney W. Grimes {
286109eb549SGleb Smirnoff 	struct epoch_tracker et;
28785d94372SRobert Watson 	struct tcpcb *tp = xtp;
28885d94372SRobert Watson 	struct inpcb *inp;
2898b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
29085d94372SRobert Watson 
29185d94372SRobert Watson 	inp = tp->t_inpcb;
2925571f9cfSJulien Charbon 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
2938501a69cSRobert Watson 	INP_WLOCK(inp);
294655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_delack) ||
295655f934bSMikolaj Golub 	    !callout_active(&tp->t_timers->tt_delack)) {
2968501a69cSRobert Watson 		INP_WUNLOCK(inp);
2978b615593SMarko Zec 		CURVNET_RESTORE();
29885d94372SRobert Watson 		return;
29985d94372SRobert Watson 	}
300e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_delack);
301655f934bSMikolaj Golub 	if ((inp->inp_flags & INP_DROPPED) != 0) {
302655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
303655f934bSMikolaj Golub 		CURVNET_RESTORE();
304655f934bSMikolaj Golub 		return;
305655f934bSMikolaj Golub 	}
3069b8b58e0SJonathan Lemon 	tp->t_flags |= TF_ACKNOW;
30778b50714SRobert Watson 	TCPSTAT_INC(tcps_delack);
308109eb549SGleb Smirnoff 	NET_EPOCH_ENTER(et);
309f64dc2abSGleb Smirnoff 	(void) tcp_output_unlock(tp);
310109eb549SGleb Smirnoff 	NET_EPOCH_EXIT(et);
3118b615593SMarko Zec 	CURVNET_RESTORE();
3129b8b58e0SJonathan Lemon }
3139b8b58e0SJonathan Lemon 
314*77198a94SGleb Smirnoff /*
315*77198a94SGleb Smirnoff  * Call tcp_close() from a callout context.
316*77198a94SGleb Smirnoff  */
317*77198a94SGleb Smirnoff static void
318*77198a94SGleb Smirnoff tcp_timer_close(struct tcpcb *tp)
319b07fef50SRandall Stewart {
320*77198a94SGleb Smirnoff 	struct epoch_tracker et;
321*77198a94SGleb Smirnoff 	struct inpcb *inp = tp->t_inpcb;
322*77198a94SGleb Smirnoff 
323*77198a94SGleb Smirnoff 	INP_WLOCK_ASSERT(inp);
324*77198a94SGleb Smirnoff 
325*77198a94SGleb Smirnoff 	NET_EPOCH_ENTER(et);
326*77198a94SGleb Smirnoff 	tp = tcp_close(tp);
327*77198a94SGleb Smirnoff 	NET_EPOCH_EXIT(et);
328*77198a94SGleb Smirnoff 	if (tp != NULL)
329*77198a94SGleb Smirnoff 		INP_WUNLOCK(inp);
330*77198a94SGleb Smirnoff }
331*77198a94SGleb Smirnoff 
332*77198a94SGleb Smirnoff /*
333*77198a94SGleb Smirnoff  * Call tcp_drop() from a callout context.
334*77198a94SGleb Smirnoff  */
335*77198a94SGleb Smirnoff static void
336*77198a94SGleb Smirnoff tcp_timer_drop(struct tcpcb *tp)
337*77198a94SGleb Smirnoff {
338*77198a94SGleb Smirnoff 	struct epoch_tracker et;
339*77198a94SGleb Smirnoff 	struct inpcb *inp = tp->t_inpcb;
340*77198a94SGleb Smirnoff 
341*77198a94SGleb Smirnoff 	INP_WLOCK_ASSERT(inp);
342*77198a94SGleb Smirnoff 
343*77198a94SGleb Smirnoff 	NET_EPOCH_ENTER(et);
344*77198a94SGleb Smirnoff 	tp = tcp_drop(tp, ETIMEDOUT);
345*77198a94SGleb Smirnoff 	NET_EPOCH_EXIT(et);
346*77198a94SGleb Smirnoff 	if (tp != NULL)
347b07fef50SRandall Stewart 		INP_WUNLOCK(inp);
348b07fef50SRandall Stewart }
349b07fef50SRandall Stewart 
35085d94372SRobert Watson void
35185d94372SRobert Watson tcp_timer_2msl(void *xtp)
3529b8b58e0SJonathan Lemon {
35385d94372SRobert Watson 	struct tcpcb *tp = xtp;
35485d94372SRobert Watson 	struct inpcb *inp;
3558b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
3569b8b58e0SJonathan Lemon #ifdef TCPDEBUG
3579b8b58e0SJonathan Lemon 	int ostate;
3589b8b58e0SJonathan Lemon 
3599b8b58e0SJonathan Lemon 	ostate = tp->t_state;
3609b8b58e0SJonathan Lemon #endif
36185d94372SRobert Watson 	inp = tp->t_inpcb;
3625571f9cfSJulien Charbon 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
3638501a69cSRobert Watson 	INP_WLOCK(inp);
364d1b07f36SRandall Stewart 	tcp_log_end_status(tp, TCP_EI_STATUS_2MSL);
36585d94372SRobert Watson 	tcp_free_sackholes(tp);
366655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_2msl) ||
367e2f2059fSMike Silbersack 	    !callout_active(&tp->t_timers->tt_2msl)) {
3688501a69cSRobert Watson 		INP_WUNLOCK(tp->t_inpcb);
3698b615593SMarko Zec 		CURVNET_RESTORE();
37085d94372SRobert Watson 		return;
37185d94372SRobert Watson 	}
372e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_2msl);
3739a06a824SGleb Smirnoff 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
374655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
375655f934bSMikolaj Golub 		CURVNET_RESTORE();
376655f934bSMikolaj Golub 		return;
377655f934bSMikolaj Golub 	}
3785571f9cfSJulien Charbon 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
3795571f9cfSJulien Charbon 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
38085d94372SRobert Watson 	/*
381df8bae1dSRodney W. Grimes 	 * 2 MSL timeout in shutdown went off.  If we're closed but
382df8bae1dSRodney W. Grimes 	 * still waiting for peer to close and connection has been idle
38331a7749dSJulien Charbon 	 * too long delete connection control block.  Otherwise, check
38431a7749dSJulien Charbon 	 * again in a bit.
38531a7749dSJulien Charbon 	 *
3867c72af87SMohan Srinivasan 	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
3877c72af87SMohan Srinivasan 	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
3887c72af87SMohan Srinivasan 	 * Ignore fact that there were recent incoming segments.
389df8bae1dSRodney W. Grimes 	 */
3907c72af87SMohan Srinivasan 	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
39185d94372SRobert Watson 	    tp->t_inpcb && tp->t_inpcb->inp_socket &&
3927c72af87SMohan Srinivasan 	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
39378b50714SRobert Watson 		TCPSTAT_INC(tcps_finwait2_drops);
394*77198a94SGleb Smirnoff 		tcp_timer_close(tp);
395*77198a94SGleb Smirnoff 		CURVNET_RESTORE();
396*77198a94SGleb Smirnoff 		return;
3977c72af87SMohan Srinivasan 	} else {
398d6de19acSJulien Charbon 		if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
399b07fef50SRandall Stewart 			callout_reset(&tp->t_timers->tt_2msl,
400b07fef50SRandall Stewart 				      TP_KEEPINTVL(tp), tcp_timer_2msl, tp);
401b07fef50SRandall Stewart 		} else {
402*77198a94SGleb Smirnoff 			tcp_timer_close(tp);
403*77198a94SGleb Smirnoff 			CURVNET_RESTORE();
404*77198a94SGleb Smirnoff 			return;
405b07fef50SRandall Stewart 		}
4067c72af87SMohan Srinivasan 	}
407df8bae1dSRodney W. Grimes 
4089b8b58e0SJonathan Lemon #ifdef TCPDEBUG
409*77198a94SGleb Smirnoff 	if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
410fb59c426SYoshinobu Inoue 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
4119b8b58e0SJonathan Lemon 			  PRU_SLOWTIMO);
4129b8b58e0SJonathan Lemon #endif
4135d06879aSGeorge V. Neville-Neil 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
4145d06879aSGeorge V. Neville-Neil 
4158501a69cSRobert Watson 	INP_WUNLOCK(inp);
4168b615593SMarko Zec 	CURVNET_RESTORE();
4179b8b58e0SJonathan Lemon }
4189b8b58e0SJonathan Lemon 
41985d94372SRobert Watson void
42085d94372SRobert Watson tcp_timer_keep(void *xtp)
4219b8b58e0SJonathan Lemon {
42285d94372SRobert Watson 	struct tcpcb *tp = xtp;
42308517d53SMike Silbersack 	struct tcptemp *t_template;
42485d94372SRobert Watson 	struct inpcb *inp;
4256573d758SMatt Macy 	struct epoch_tracker et;
4268b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
4279b8b58e0SJonathan Lemon #ifdef TCPDEBUG
4289b8b58e0SJonathan Lemon 	int ostate;
4299b8b58e0SJonathan Lemon 
4309b8b58e0SJonathan Lemon 	ostate = tp->t_state;
4319b8b58e0SJonathan Lemon #endif
43285d94372SRobert Watson 	inp = tp->t_inpcb;
4335571f9cfSJulien Charbon 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
4348501a69cSRobert Watson 	INP_WLOCK(inp);
435655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_keep) ||
436655f934bSMikolaj Golub 	    !callout_active(&tp->t_timers->tt_keep)) {
4378501a69cSRobert Watson 		INP_WUNLOCK(inp);
4388b615593SMarko Zec 		CURVNET_RESTORE();
43985d94372SRobert Watson 		return;
44085d94372SRobert Watson 	}
441e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_keep);
4429a06a824SGleb Smirnoff 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
443655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
444655f934bSMikolaj Golub 		CURVNET_RESTORE();
445655f934bSMikolaj Golub 		return;
446655f934bSMikolaj Golub 	}
4475571f9cfSJulien Charbon 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
4485571f9cfSJulien Charbon 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
4496d172f58SJonathan T. Looney 
4506d172f58SJonathan T. Looney 	/*
4516d172f58SJonathan T. Looney 	 * Because we don't regularly reset the keepalive callout in
4526d172f58SJonathan T. Looney 	 * the ESTABLISHED state, it may be that we don't actually need
4536d172f58SJonathan T. Looney 	 * to send a keepalive yet. If that occurs, schedule another
4546d172f58SJonathan T. Looney 	 * call for the next time the keepalive timer might expire.
4556d172f58SJonathan T. Looney 	 */
4566d172f58SJonathan T. Looney 	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
4576d172f58SJonathan T. Looney 		u_int idletime;
4586d172f58SJonathan T. Looney 
4596d172f58SJonathan T. Looney 		idletime = ticks - tp->t_rcvtime;
4606d172f58SJonathan T. Looney 		if (idletime < TP_KEEPIDLE(tp)) {
4616d172f58SJonathan T. Looney 			callout_reset(&tp->t_timers->tt_keep,
4626d172f58SJonathan T. Looney 			    TP_KEEPIDLE(tp) - idletime, tcp_timer_keep, tp);
4636d172f58SJonathan T. Looney 			INP_WUNLOCK(inp);
4646d172f58SJonathan T. Looney 			CURVNET_RESTORE();
4656d172f58SJonathan T. Looney 			return;
4666d172f58SJonathan T. Looney 		}
4676d172f58SJonathan T. Looney 	}
4686d172f58SJonathan T. Looney 
4699b8b58e0SJonathan Lemon 	/*
4709b8b58e0SJonathan Lemon 	 * Keep-alive timer went off; send something
4719b8b58e0SJonathan Lemon 	 * or drop connection if idle for too long.
4729b8b58e0SJonathan Lemon 	 */
47378b50714SRobert Watson 	TCPSTAT_INC(tcps_keeptimeo);
4749b8b58e0SJonathan Lemon 	if (tp->t_state < TCPS_ESTABLISHED)
4759b8b58e0SJonathan Lemon 		goto dropit;
476334fc582SBjoern A. Zeeb 	if ((V_tcp_always_keepalive ||
477f1798531SJohn Baldwin 	    inp->inp_socket->so_options & SO_KEEPALIVE) &&
4789b8b58e0SJonathan Lemon 	    tp->t_state <= TCPS_CLOSING) {
4799077f387SGleb Smirnoff 		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
4809b8b58e0SJonathan Lemon 			goto dropit;
4819b8b58e0SJonathan Lemon 		/*
4829b8b58e0SJonathan Lemon 		 * Send a packet designed to force a response
4839b8b58e0SJonathan Lemon 		 * if the peer is up and reachable:
4849b8b58e0SJonathan Lemon 		 * either an ACK if the connection is still alive,
4859b8b58e0SJonathan Lemon 		 * or an RST if the peer has closed the connection
4869b8b58e0SJonathan Lemon 		 * due to timeout or reboot.
4879b8b58e0SJonathan Lemon 		 * Using sequence number tp->snd_una-1
4889b8b58e0SJonathan Lemon 		 * causes the transmitted zero-length segment
4899b8b58e0SJonathan Lemon 		 * to lie outside the receive window;
4909b8b58e0SJonathan Lemon 		 * by the protocol spec, this requires the
4919b8b58e0SJonathan Lemon 		 * correspondent TCP to respond.
4929b8b58e0SJonathan Lemon 		 */
49378b50714SRobert Watson 		TCPSTAT_INC(tcps_keepprobe);
49479909384SJonathan Lemon 		t_template = tcpip_maketemplate(inp);
49508517d53SMike Silbersack 		if (t_template) {
496b9555453SGleb Smirnoff 			NET_EPOCH_ENTER(et);
49708517d53SMike Silbersack 			tcp_respond(tp, t_template->tt_ipgen,
49808517d53SMike Silbersack 				    &t_template->tt_t, (struct mbuf *)NULL,
4999b8b58e0SJonathan Lemon 				    tp->rcv_nxt, tp->snd_una - 1, 0);
500b9555453SGleb Smirnoff 			NET_EPOCH_EXIT(et);
50153640b0eSRobert Watson 			free(t_template, M_TEMP);
50208517d53SMike Silbersack 		}
503b07fef50SRandall Stewart 		callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
504b07fef50SRandall Stewart 			      tcp_timer_keep, tp);
505b07fef50SRandall Stewart 	} else
506b07fef50SRandall Stewart 		callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
507b07fef50SRandall Stewart 			      tcp_timer_keep, tp);
5089b8b58e0SJonathan Lemon 
5099b8b58e0SJonathan Lemon #ifdef TCPDEBUG
5102a074620SSam Leffler 	if (inp->inp_socket->so_options & SO_DEBUG)
511fb59c426SYoshinobu Inoue 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
5129b8b58e0SJonathan Lemon 			  PRU_SLOWTIMO);
5139b8b58e0SJonathan Lemon #endif
5145d06879aSGeorge V. Neville-Neil 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
5158501a69cSRobert Watson 	INP_WUNLOCK(inp);
5168b615593SMarko Zec 	CURVNET_RESTORE();
51785d94372SRobert Watson 	return;
5189b8b58e0SJonathan Lemon 
5199b8b58e0SJonathan Lemon dropit:
52078b50714SRobert Watson 	TCPSTAT_INC(tcps_keepdrops);
52158d94bd0SGleb Smirnoff 	NET_EPOCH_ENTER(et);
522d1b07f36SRandall Stewart 	tcp_log_end_status(tp, TCP_EI_STATUS_KEEP_MAX);
52385d94372SRobert Watson 	tp = tcp_drop(tp, ETIMEDOUT);
52485d94372SRobert Watson 
52585d94372SRobert Watson #ifdef TCPDEBUG
52685d94372SRobert Watson 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
52785d94372SRobert Watson 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
52885d94372SRobert Watson 			  PRU_SLOWTIMO);
52985d94372SRobert Watson #endif
5305d06879aSGeorge V. Neville-Neil 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
53158d94bd0SGleb Smirnoff 	NET_EPOCH_EXIT(et);
532*77198a94SGleb Smirnoff 	if (tp != NULL)
533*77198a94SGleb Smirnoff 		INP_WUNLOCK(inp);
5348b615593SMarko Zec 	CURVNET_RESTORE();
5359b8b58e0SJonathan Lemon }
5369b8b58e0SJonathan Lemon 
53708af8aacSRandall Stewart /*
53808af8aacSRandall Stewart  * Has this session exceeded the maximum time without seeing a substantive
53908af8aacSRandall Stewart  * acknowledgement? If so, return true; otherwise false.
54008af8aacSRandall Stewart  */
54108af8aacSRandall Stewart static bool
54208af8aacSRandall Stewart tcp_maxunacktime_check(struct tcpcb *tp)
54308af8aacSRandall Stewart {
54408af8aacSRandall Stewart 
54508af8aacSRandall Stewart 	/* Are we tracking this timer for this session? */
54608af8aacSRandall Stewart 	if (TP_MAXUNACKTIME(tp) == 0)
54708af8aacSRandall Stewart 		return false;
54808af8aacSRandall Stewart 
54908af8aacSRandall Stewart 	/* Do we have a current measurement. */
55008af8aacSRandall Stewart 	if (tp->t_acktime == 0)
55108af8aacSRandall Stewart 		return false;
55208af8aacSRandall Stewart 
55308af8aacSRandall Stewart 	/* Are we within the acceptable range? */
55408af8aacSRandall Stewart 	if (TSTMP_GT(TP_MAXUNACKTIME(tp) + tp->t_acktime, (u_int)ticks))
55508af8aacSRandall Stewart 		return false;
55608af8aacSRandall Stewart 
55708af8aacSRandall Stewart 	/* We exceeded the timer. */
55808af8aacSRandall Stewart 	TCPSTAT_INC(tcps_progdrops);
55908af8aacSRandall Stewart 	return true;
56008af8aacSRandall Stewart }
56108af8aacSRandall Stewart 
56285d94372SRobert Watson void
56385d94372SRobert Watson tcp_timer_persist(void *xtp)
5649b8b58e0SJonathan Lemon {
56585d94372SRobert Watson 	struct tcpcb *tp = xtp;
56685d94372SRobert Watson 	struct inpcb *inp;
5676573d758SMatt Macy 	struct epoch_tracker et;
56808af8aacSRandall Stewart 	bool progdrop;
569f64dc2abSGleb Smirnoff 	int outrv;
5708b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
5719b8b58e0SJonathan Lemon #ifdef TCPDEBUG
5729b8b58e0SJonathan Lemon 	int ostate;
5739b8b58e0SJonathan Lemon 
5749b8b58e0SJonathan Lemon 	ostate = tp->t_state;
5759b8b58e0SJonathan Lemon #endif
57685d94372SRobert Watson 	inp = tp->t_inpcb;
5775571f9cfSJulien Charbon 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
5788501a69cSRobert Watson 	INP_WLOCK(inp);
579655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_persist) ||
580655f934bSMikolaj Golub 	    !callout_active(&tp->t_timers->tt_persist)) {
5818501a69cSRobert Watson 		INP_WUNLOCK(inp);
5828b615593SMarko Zec 		CURVNET_RESTORE();
58385d94372SRobert Watson 		return;
58485d94372SRobert Watson 	}
585e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_persist);
5869a06a824SGleb Smirnoff 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
587655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
588655f934bSMikolaj Golub 		CURVNET_RESTORE();
589655f934bSMikolaj Golub 		return;
590655f934bSMikolaj Golub 	}
5915571f9cfSJulien Charbon 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
5925571f9cfSJulien Charbon 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
5939b8b58e0SJonathan Lemon 	/*
594a4641f4eSPedro F. Giffuni 	 * Persistence timer into zero window.
5959b8b58e0SJonathan Lemon 	 * Force a byte to be output, if possible.
5969b8b58e0SJonathan Lemon 	 */
59778b50714SRobert Watson 	TCPSTAT_INC(tcps_persisttimeo);
5989b8b58e0SJonathan Lemon 	/*
5999b8b58e0SJonathan Lemon 	 * Hack: if the peer is dead/unreachable, we do not
6009b8b58e0SJonathan Lemon 	 * time out if the window is closed.  After a full
6019b8b58e0SJonathan Lemon 	 * backoff, drop the connection if the idle time
6029b8b58e0SJonathan Lemon 	 * (no responses to probes) reaches the maximum
6039b8b58e0SJonathan Lemon 	 * backoff that we would use if retransmitting.
60408af8aacSRandall Stewart 	 * Also, drop the connection if we haven't been making
60508af8aacSRandall Stewart 	 * progress.
6069b8b58e0SJonathan Lemon 	 */
60708af8aacSRandall Stewart 	progdrop = tcp_maxunacktime_check(tp);
60808af8aacSRandall Stewart 	if (progdrop || (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
6096b0c5521SJohn Baldwin 	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
61008af8aacSRandall Stewart 	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) {
61108af8aacSRandall Stewart 		if (!progdrop)
61278b50714SRobert Watson 			TCPSTAT_INC(tcps_persistdrop);
613d1b07f36SRandall Stewart 		tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
614*77198a94SGleb Smirnoff 		tcp_timer_drop(tp);
615*77198a94SGleb Smirnoff 		CURVNET_RESTORE();
616*77198a94SGleb Smirnoff 		return;
6179b8b58e0SJonathan Lemon 	}
618322181c9SAndre Oppermann 	/*
619322181c9SAndre Oppermann 	 * If the user has closed the socket then drop a persisting
620322181c9SAndre Oppermann 	 * connection after a much reduced timeout.
621322181c9SAndre Oppermann 	 */
622322181c9SAndre Oppermann 	if (tp->t_state > TCPS_CLOSE_WAIT &&
623322181c9SAndre Oppermann 	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
624322181c9SAndre Oppermann 		TCPSTAT_INC(tcps_persistdrop);
625d1b07f36SRandall Stewart 		tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
626*77198a94SGleb Smirnoff 		tcp_timer_drop(tp);
627*77198a94SGleb Smirnoff 		CURVNET_RESTORE();
628*77198a94SGleb Smirnoff 		return;
629322181c9SAndre Oppermann 	}
6309b8b58e0SJonathan Lemon 	tcp_setpersist(tp);
6312cdbfa66SPaul Saab 	tp->t_flags |= TF_FORCEDATA;
632109eb549SGleb Smirnoff 	NET_EPOCH_ENTER(et);
633f64dc2abSGleb Smirnoff 	outrv = tcp_output_nodrop(tp);
6342cdbfa66SPaul Saab 	tp->t_flags &= ~TF_FORCEDATA;
6359b8b58e0SJonathan Lemon 
6369b8b58e0SJonathan Lemon #ifdef TCPDEBUG
637ffb761f6SGleb Smirnoff 	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
638ffb761f6SGleb Smirnoff 		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
6399b8b58e0SJonathan Lemon #endif
6405d06879aSGeorge V. Neville-Neil 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
641f64dc2abSGleb Smirnoff 	(void) tcp_unlock_or_drop(tp, outrv);
642f64dc2abSGleb Smirnoff 	NET_EPOCH_EXIT(et);
6438b615593SMarko Zec 	CURVNET_RESTORE();
6449b8b58e0SJonathan Lemon }
6459b8b58e0SJonathan Lemon 
64685d94372SRobert Watson void
64785d94372SRobert Watson tcp_timer_rexmt(void * xtp)
6489b8b58e0SJonathan Lemon {
64985d94372SRobert Watson 	struct tcpcb *tp = xtp;
6508b615593SMarko Zec 	CURVNET_SET(tp->t_vnet);
651f64dc2abSGleb Smirnoff 	int rexmt, outrv;
65285d94372SRobert Watson 	struct inpcb *inp;
6536573d758SMatt Macy 	struct epoch_tracker et;
654413c3db1SMichael Tuexen 	bool isipv6;
6559b8b58e0SJonathan Lemon #ifdef TCPDEBUG
6569b8b58e0SJonathan Lemon 	int ostate;
6579b8b58e0SJonathan Lemon 
6589b8b58e0SJonathan Lemon 	ostate = tp->t_state;
6599b8b58e0SJonathan Lemon #endif
66085d94372SRobert Watson 	inp = tp->t_inpcb;
6615571f9cfSJulien Charbon 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
6628501a69cSRobert Watson 	INP_WLOCK(inp);
663655f934bSMikolaj Golub 	if (callout_pending(&tp->t_timers->tt_rexmt) ||
664655f934bSMikolaj Golub 	    !callout_active(&tp->t_timers->tt_rexmt)) {
6658501a69cSRobert Watson 		INP_WUNLOCK(inp);
6668b615593SMarko Zec 		CURVNET_RESTORE();
66785d94372SRobert Watson 		return;
66885d94372SRobert Watson 	}
669e2f2059fSMike Silbersack 	callout_deactivate(&tp->t_timers->tt_rexmt);
6709a06a824SGleb Smirnoff 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
671655f934bSMikolaj Golub 		INP_WUNLOCK(inp);
672655f934bSMikolaj Golub 		CURVNET_RESTORE();
673655f934bSMikolaj Golub 		return;
674655f934bSMikolaj Golub 	}
6755571f9cfSJulien Charbon 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
6765571f9cfSJulien Charbon 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
6776d90faf3SPaul Saab 	tcp_free_sackholes(tp);
6782529f56eSJonathan T. Looney 	TCP_LOG_EVENT(tp, NULL, NULL, NULL, TCP_LOG_RTO, 0, 0, NULL, false);
6795105a92cSRandall Stewart 	if (tp->t_fb->tfb_tcp_rexmit_tmr) {
6805105a92cSRandall Stewart 		/* The stack has a timer action too. */
6815105a92cSRandall Stewart 		(*tp->t_fb->tfb_tcp_rexmit_tmr)(tp);
6825105a92cSRandall Stewart 	}
683df8bae1dSRodney W. Grimes 	/*
684df8bae1dSRodney W. Grimes 	 * Retransmission timer went off.  Message has not
685df8bae1dSRodney W. Grimes 	 * been acked within retransmit interval.  Back off
686df8bae1dSRodney W. Grimes 	 * to a longer retransmit interval and retransmit one segment.
68708af8aacSRandall Stewart 	 *
68808af8aacSRandall Stewart 	 * If we've either exceeded the maximum number of retransmissions,
68908af8aacSRandall Stewart 	 * or we've gone long enough without making progress, then drop
69008af8aacSRandall Stewart 	 * the session.
691df8bae1dSRodney W. Grimes 	 */
69208af8aacSRandall Stewart 	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT || tcp_maxunacktime_check(tp)) {
69308af8aacSRandall Stewart 		if (tp->t_rxtshift > TCP_MAXRXTSHIFT)
69478b50714SRobert Watson 			TCPSTAT_INC(tcps_timeoutdrop);
69508af8aacSRandall Stewart 		tp->t_rxtshift = TCP_MAXRXTSHIFT;
696d1b07f36SRandall Stewart 		tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
697*77198a94SGleb Smirnoff 		tcp_timer_drop(tp);
698*77198a94SGleb Smirnoff 		CURVNET_RESTORE();
699*77198a94SGleb Smirnoff 		return;
700b07fef50SRandall Stewart 	}
701cf8f04f4SAndre Oppermann 	if (tp->t_state == TCPS_SYN_SENT) {
702cf8f04f4SAndre Oppermann 		/*
703cf8f04f4SAndre Oppermann 		 * If the SYN was retransmitted, indicate CWND to be
704cf8f04f4SAndre Oppermann 		 * limited to 1 segment in cc_conn_init().
705cf8f04f4SAndre Oppermann 		 */
706cf8f04f4SAndre Oppermann 		tp->snd_cwnd = 1;
707cf8f04f4SAndre Oppermann 	} else if (tp->t_rxtshift == 1) {
7089b8b58e0SJonathan Lemon 		/*
7099b8b58e0SJonathan Lemon 		 * first retransmit; record ssthresh and cwnd so they can
7109b8b58e0SJonathan Lemon 		 * be recovered if this turns out to be a "bad" retransmit.
7119b8b58e0SJonathan Lemon 		 * A retransmit is considered "bad" if an ACK for this
7129b8b58e0SJonathan Lemon 		 * segment is received within RTT/2 interval; the assumption
7139b8b58e0SJonathan Lemon 		 * here is that the ACK was already in flight.  See
7149b8b58e0SJonathan Lemon 		 * "On Estimating End-to-End Network Path Properties" by
7159b8b58e0SJonathan Lemon 		 * Allman and Paxson for more details.
7169b8b58e0SJonathan Lemon 		 */
7179b8b58e0SJonathan Lemon 		tp->snd_cwnd_prev = tp->snd_cwnd;
7189b8b58e0SJonathan Lemon 		tp->snd_ssthresh_prev = tp->snd_ssthresh;
7199d11646dSJeffrey Hsu 		tp->snd_recover_prev = tp->snd_recover;
720dbc42409SLawrence Stewart 		if (IN_FASTRECOVERY(tp->t_flags))
7219d11646dSJeffrey Hsu 			tp->t_flags |= TF_WASFRECOVERY;
7229d11646dSJeffrey Hsu 		else
7239d11646dSJeffrey Hsu 			tp->t_flags &= ~TF_WASFRECOVERY;
724dbc42409SLawrence Stewart 		if (IN_CONGRECOVERY(tp->t_flags))
725dbc42409SLawrence Stewart 			tp->t_flags |= TF_WASCRECOVERY;
726dbc42409SLawrence Stewart 		else
727dbc42409SLawrence Stewart 			tp->t_flags &= ~TF_WASCRECOVERY;
72810d20c84SMatt Macy 		if ((tp->t_flags & TF_RCVD_TSTMP) == 0)
7299b8b58e0SJonathan Lemon 			tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
73010d20c84SMatt Macy 		/* In the event that we've negotiated timestamps
73110d20c84SMatt Macy 		 * badrxtwin will be set to the value that we set
73210d20c84SMatt Macy 		 * the retransmitted packet's to_tsval to by tcp_output
73310d20c84SMatt Macy 		 */
734672dc4aeSJohn Baldwin 		tp->t_flags |= TF_PREVVALID;
735672dc4aeSJohn Baldwin 	} else
736672dc4aeSJohn Baldwin 		tp->t_flags &= ~TF_PREVVALID;
73778b50714SRobert Watson 	TCPSTAT_INC(tcps_rexmttimeo);
738281a0fd4SPatrick Kelsey 	if ((tp->t_state == TCPS_SYN_SENT) ||
739281a0fd4SPatrick Kelsey 	    (tp->t_state == TCPS_SYN_RECEIVED))
7400999766dSMichael Tuexen 		rexmt = tcp_rexmit_initial * tcp_backoff[tp->t_rxtshift];
7417d42e30cSJonathan Lemon 	else
742df8bae1dSRodney W. Grimes 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
743df8bae1dSRodney W. Grimes 	TCPT_RANGESET(tp->t_rxtcur, rexmt,
744df8bae1dSRodney W. Grimes 		      tp->t_rttmin, TCPTV_REXMTMAX);
745f6f6703fSSean Bruno 
746882ac53eSSean Bruno 	/*
747882ac53eSSean Bruno 	 * We enter the path for PLMTUD if connection is established or, if
748882ac53eSSean Bruno 	 * connection is FIN_WAIT_1 status, reason for the last is that if
749882ac53eSSean Bruno 	 * amount of data we send is very small, we could send it in couple of
750882ac53eSSean Bruno 	 * packets and process straight to FIN. In that case we won't catch
751882ac53eSSean Bruno 	 * ESTABLISHED state.
752882ac53eSSean Bruno 	 */
753f6f6703fSSean Bruno #ifdef INET6
754413c3db1SMichael Tuexen 	isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? true : false;
755413c3db1SMichael Tuexen #else
756413c3db1SMichael Tuexen 	isipv6 = false;
757f6f6703fSSean Bruno #endif
758413c3db1SMichael Tuexen 	if (((V_tcp_pmtud_blackhole_detect == 1) ||
759413c3db1SMichael Tuexen 	    (V_tcp_pmtud_blackhole_detect == 2 && !isipv6) ||
760413c3db1SMichael Tuexen 	    (V_tcp_pmtud_blackhole_detect == 3 && isipv6)) &&
761413c3db1SMichael Tuexen 	    ((tp->t_state == TCPS_ESTABLISHED) ||
762413c3db1SMichael Tuexen 	    (tp->t_state == TCPS_FIN_WAIT_1))) {
763b89af8e1SMichael Tuexen 		if (tp->t_rxtshift == 1) {
764adf43a92SHiren Panchasara 			/*
765b89af8e1SMichael Tuexen 			 * We enter blackhole detection after the first
766b89af8e1SMichael Tuexen 			 * unsuccessful timer based retransmission.
767b89af8e1SMichael Tuexen 			 * Then we reduce up to two times the MSS, each
768b89af8e1SMichael Tuexen 			 * candidate giving two tries of retransmissions.
769b89af8e1SMichael Tuexen 			 * But we give a candidate only two tries, if it
770b89af8e1SMichael Tuexen 			 * actually reduces the MSS.
771adf43a92SHiren Panchasara 			 */
772b89af8e1SMichael Tuexen 			tp->t_blackhole_enter = 2;
773b89af8e1SMichael Tuexen 			tp->t_blackhole_exit = tp->t_blackhole_enter;
774b89af8e1SMichael Tuexen 			if (isipv6) {
775b89af8e1SMichael Tuexen #ifdef INET6
776b89af8e1SMichael Tuexen 				if (tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss)
777b89af8e1SMichael Tuexen 					tp->t_blackhole_exit += 2;
778b89af8e1SMichael Tuexen 				if (tp->t_maxseg > V_tcp_v6mssdflt &&
779b89af8e1SMichael Tuexen 				    V_tcp_v6pmtud_blackhole_mss > V_tcp_v6mssdflt)
780b89af8e1SMichael Tuexen 					tp->t_blackhole_exit += 2;
781b89af8e1SMichael Tuexen #endif
782b89af8e1SMichael Tuexen 			} else {
783b89af8e1SMichael Tuexen #ifdef INET
784b89af8e1SMichael Tuexen 				if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss)
785b89af8e1SMichael Tuexen 					tp->t_blackhole_exit += 2;
786b89af8e1SMichael Tuexen 				if (tp->t_maxseg > V_tcp_mssdflt &&
787b89af8e1SMichael Tuexen 				    V_tcp_pmtud_blackhole_mss > V_tcp_mssdflt)
788b89af8e1SMichael Tuexen 					tp->t_blackhole_exit += 2;
789b89af8e1SMichael Tuexen #endif
790b89af8e1SMichael Tuexen 			}
791b89af8e1SMichael Tuexen 		}
792f6f6703fSSean Bruno 		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
793f6f6703fSSean Bruno 		    (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
794b89af8e1SMichael Tuexen 		    (tp->t_rxtshift >= tp->t_blackhole_enter &&
795b89af8e1SMichael Tuexen 		    tp->t_rxtshift < tp->t_blackhole_exit &&
796b89af8e1SMichael Tuexen 		    (tp->t_rxtshift - tp->t_blackhole_enter) % 2 == 0)) {
797f6f6703fSSean Bruno 			/*
798f6f6703fSSean Bruno 			 * Enter Path MTU Black-hole Detection mechanism:
799f6f6703fSSean Bruno 			 * - Disable Path MTU Discovery (IP "DF" bit).
800f6f6703fSSean Bruno 			 * - Reduce MTU to lower value than what we
801f6f6703fSSean Bruno 			 *   negotiated with peer.
802f6f6703fSSean Bruno 			 */
8033d5af7a1SMichael Tuexen 			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) == 0) {
804f6f6703fSSean Bruno 				/* Record that we may have found a black hole. */
805f6f6703fSSean Bruno 				tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
806f6f6703fSSean Bruno 				/* Keep track of previous MSS. */
8070c39d38dSGleb Smirnoff 				tp->t_pmtud_saved_maxseg = tp->t_maxseg;
8083d5af7a1SMichael Tuexen 			}
809f6f6703fSSean Bruno 
810f6f6703fSSean Bruno 			/*
811f6f6703fSSean Bruno 			 * Reduce the MSS to blackhole value or to the default
812f6f6703fSSean Bruno 			 * in an attempt to retransmit.
813f6f6703fSSean Bruno 			 */
814f6f6703fSSean Bruno #ifdef INET6
815f6f6703fSSean Bruno 			if (isipv6 &&
816b89af8e1SMichael Tuexen 			    tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss &&
817b89af8e1SMichael Tuexen 			    V_tcp_v6pmtud_blackhole_mss > V_tcp_v6mssdflt) {
818f6f6703fSSean Bruno 				/* Use the sysctl tuneable blackhole MSS. */
8190c39d38dSGleb Smirnoff 				tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss;
82032a04bb8SSean Bruno 				TCPSTAT_INC(tcps_pmtud_blackhole_activated);
821f6f6703fSSean Bruno 			} else if (isipv6) {
822f6f6703fSSean Bruno 				/* Use the default MSS. */
8230c39d38dSGleb Smirnoff 				tp->t_maxseg = V_tcp_v6mssdflt;
824f6f6703fSSean Bruno 				/*
825f6f6703fSSean Bruno 				 * Disable Path MTU Discovery when we switch to
826f6f6703fSSean Bruno 				 * minmss.
827f6f6703fSSean Bruno 				 */
828f6f6703fSSean Bruno 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
82932a04bb8SSean Bruno 				TCPSTAT_INC(tcps_pmtud_blackhole_activated_min_mss);
830f6f6703fSSean Bruno 			}
831f6f6703fSSean Bruno #endif
832f6f6703fSSean Bruno #if defined(INET6) && defined(INET)
833f6f6703fSSean Bruno 			else
834f6f6703fSSean Bruno #endif
835f6f6703fSSean Bruno #ifdef INET
836b89af8e1SMichael Tuexen 			if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss &&
837b89af8e1SMichael Tuexen 			    V_tcp_pmtud_blackhole_mss > V_tcp_mssdflt) {
838f6f6703fSSean Bruno 				/* Use the sysctl tuneable blackhole MSS. */
8390c39d38dSGleb Smirnoff 				tp->t_maxseg = V_tcp_pmtud_blackhole_mss;
84032a04bb8SSean Bruno 				TCPSTAT_INC(tcps_pmtud_blackhole_activated);
841f6f6703fSSean Bruno 			} else {
842f6f6703fSSean Bruno 				/* Use the default MSS. */
8430c39d38dSGleb Smirnoff 				tp->t_maxseg = V_tcp_mssdflt;
844f6f6703fSSean Bruno 				/*
845f6f6703fSSean Bruno 				 * Disable Path MTU Discovery when we switch to
846f6f6703fSSean Bruno 				 * minmss.
847f6f6703fSSean Bruno 				 */
848f6f6703fSSean Bruno 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
84932a04bb8SSean Bruno 				TCPSTAT_INC(tcps_pmtud_blackhole_activated_min_mss);
850f6f6703fSSean Bruno 			}
851f6f6703fSSean Bruno #endif
852f6f6703fSSean Bruno 			/*
853f6f6703fSSean Bruno 			 * Reset the slow-start flight size
854f6f6703fSSean Bruno 			 * as it may depend on the new MSS.
855f6f6703fSSean Bruno 			 */
856f6f6703fSSean Bruno 			if (CC_ALGO(tp)->conn_init != NULL)
857f6f6703fSSean Bruno 				CC_ALGO(tp)->conn_init(tp->ccv);
858f6f6703fSSean Bruno 		} else {
859f6f6703fSSean Bruno 			/*
860f6f6703fSSean Bruno 			 * If further retransmissions are still unsuccessful
861f6f6703fSSean Bruno 			 * with a lowered MTU, maybe this isn't a blackhole and
862f6f6703fSSean Bruno 			 * we restore the previous MSS and blackhole detection
863f6f6703fSSean Bruno 			 * flags.
864f6f6703fSSean Bruno 			 */
865f6f6703fSSean Bruno 			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
866b89af8e1SMichael Tuexen 			    (tp->t_rxtshift >= tp->t_blackhole_exit)) {
867f6f6703fSSean Bruno 				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
868f6f6703fSSean Bruno 				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
8690c39d38dSGleb Smirnoff 				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
87032a04bb8SSean Bruno 				TCPSTAT_INC(tcps_pmtud_blackhole_failed);
871f6f6703fSSean Bruno 				/*
872f6f6703fSSean Bruno 				 * Reset the slow-start flight size as it
873f6f6703fSSean Bruno 				 * may depend on the new MSS.
874f6f6703fSSean Bruno 				 */
875f6f6703fSSean Bruno 				if (CC_ALGO(tp)->conn_init != NULL)
876f6f6703fSSean Bruno 					CC_ALGO(tp)->conn_init(tp->ccv);
877f6f6703fSSean Bruno 			}
878f6f6703fSSean Bruno 		}
879f6f6703fSSean Bruno 	}
880f6f6703fSSean Bruno 
881df8bae1dSRodney W. Grimes 	/*
88277339e1cSAndre Oppermann 	 * Disable RFC1323 and SACK if we haven't got any response to
8837ceb7783SJesper Skriver 	 * our third SYN to work-around some broken terminal servers
8847ceb7783SJesper Skriver 	 * (most of which have hopefully been retired) that have bad VJ
8857ceb7783SJesper Skriver 	 * header compression code which trashes TCP segments containing
8867ceb7783SJesper Skriver 	 * unknown-to-them TCP options.
8877ceb7783SJesper Skriver 	 */
8886c0ef895SJohn Baldwin 	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
8896c0ef895SJohn Baldwin 	    (tp->t_rxtshift == 3))
890c4ab59c1SAndre Oppermann 		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
8917ceb7783SJesper Skriver 	/*
8925ede40dcSRyan Stone 	 * If we backed off this far, notify the L3 protocol that we're having
8935ede40dcSRyan Stone 	 * connection problems.
894df8bae1dSRodney W. Grimes 	 */
8955ede40dcSRyan Stone 	if (tp->t_rxtshift > TCP_RTT_INVALIDATE) {
896fb59c426SYoshinobu Inoue #ifdef INET6
897fb59c426SYoshinobu Inoue 		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
898fb59c426SYoshinobu Inoue 			in6_losing(tp->t_inpcb);
89984cc0778SGeorge V. Neville-Neil 		else
900fb59c426SYoshinobu Inoue #endif
90184cc0778SGeorge V. Neville-Neil 			in_losing(tp->t_inpcb);
902df8bae1dSRodney W. Grimes 	}
903df8bae1dSRodney W. Grimes 	tp->snd_nxt = tp->snd_una;
9049d11646dSJeffrey Hsu 	tp->snd_recover = tp->snd_max;
90546f58482SJonathan Lemon 	/*
90674b48c1dSAndras Olah 	 * Force a segment to be sent.
90774b48c1dSAndras Olah 	 */
90874b48c1dSAndras Olah 	tp->t_flags |= TF_ACKNOW;
90974b48c1dSAndras Olah 	/*
910df8bae1dSRodney W. Grimes 	 * If timing a segment in this window, stop the timer.
911df8bae1dSRodney W. Grimes 	 */
9129b8b58e0SJonathan Lemon 	tp->t_rtttime = 0;
913dbc42409SLawrence Stewart 
914b5af1b88SLawrence Stewart 	cc_cong_signal(tp, NULL, CC_RTO);
915109eb549SGleb Smirnoff 	NET_EPOCH_ENTER(et);
916f64dc2abSGleb Smirnoff 	outrv = tcp_output_nodrop(tp);
9179b8b58e0SJonathan Lemon #ifdef TCPDEBUG
9181c53f806SRobert Watson 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
919fb59c426SYoshinobu Inoue 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
9209b8b58e0SJonathan Lemon 			  PRU_SLOWTIMO);
921df8bae1dSRodney W. Grimes #endif
9225d06879aSGeorge V. Neville-Neil 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
923f64dc2abSGleb Smirnoff 	(void) tcp_unlock_or_drop(tp, outrv);
924f64dc2abSGleb Smirnoff 	NET_EPOCH_EXIT(et);
9258b615593SMarko Zec 	CURVNET_RESTORE();
92685d94372SRobert Watson }
92785d94372SRobert Watson 
92885d94372SRobert Watson void
9295571f9cfSJulien Charbon tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
93085d94372SRobert Watson {
93185d94372SRobert Watson 	struct callout *t_callout;
9325773ac11SJohn Baldwin 	callout_func_t *f_callout;
93387aedea4SKip Macy 	struct inpcb *inp = tp->t_inpcb;
934883831c6SAdrian Chadd 	int cpu = inp_to_cpuid(inp);
93585d94372SRobert Watson 
93609fe6320SNavdeep Parhar #ifdef TCP_OFFLOAD
93709fe6320SNavdeep Parhar 	if (tp->t_flags & TF_TOE)
93809fe6320SNavdeep Parhar 		return;
93909fe6320SNavdeep Parhar #endif
94009fe6320SNavdeep Parhar 
9415571f9cfSJulien Charbon 	if (tp->t_timers->tt_flags & TT_STOPPED)
9425571f9cfSJulien Charbon 		return;
9435571f9cfSJulien Charbon 
94485d94372SRobert Watson 	switch (timer_type) {
94585d94372SRobert Watson 		case TT_DELACK:
946e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_delack;
94785d94372SRobert Watson 			f_callout = tcp_timer_delack;
94885d94372SRobert Watson 			break;
94985d94372SRobert Watson 		case TT_REXMT:
950e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_rexmt;
95185d94372SRobert Watson 			f_callout = tcp_timer_rexmt;
95285d94372SRobert Watson 			break;
95385d94372SRobert Watson 		case TT_PERSIST:
954e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_persist;
95585d94372SRobert Watson 			f_callout = tcp_timer_persist;
95685d94372SRobert Watson 			break;
95785d94372SRobert Watson 		case TT_KEEP:
958e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_keep;
95985d94372SRobert Watson 			f_callout = tcp_timer_keep;
96085d94372SRobert Watson 			break;
96185d94372SRobert Watson 		case TT_2MSL:
962e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_2msl;
96385d94372SRobert Watson 			f_callout = tcp_timer_2msl;
96485d94372SRobert Watson 			break;
96585d94372SRobert Watson 		default:
96655bceb1eSRandall Stewart 			if (tp->t_fb->tfb_tcp_timer_activate) {
96755bceb1eSRandall Stewart 				tp->t_fb->tfb_tcp_timer_activate(tp, timer_type, delta);
96855bceb1eSRandall Stewart 				return;
96955bceb1eSRandall Stewart 			}
97003374917SJulien Charbon 			panic("tp %p bad timer_type %#x", tp, timer_type);
97185d94372SRobert Watson 		}
97285d94372SRobert Watson 	if (delta == 0) {
973b07fef50SRandall Stewart 		callout_stop(t_callout);
97485d94372SRobert Watson 	} else {
97587aedea4SKip Macy 		callout_reset_on(t_callout, delta, f_callout, tp, cpu);
97685d94372SRobert Watson 	}
97785d94372SRobert Watson }
97885d94372SRobert Watson 
97985d94372SRobert Watson int
9805571f9cfSJulien Charbon tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
98185d94372SRobert Watson {
98285d94372SRobert Watson 	struct callout *t_callout;
98385d94372SRobert Watson 
98485d94372SRobert Watson 	switch (timer_type) {
98585d94372SRobert Watson 		case TT_DELACK:
986e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_delack;
98785d94372SRobert Watson 			break;
98885d94372SRobert Watson 		case TT_REXMT:
989e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_rexmt;
99085d94372SRobert Watson 			break;
99185d94372SRobert Watson 		case TT_PERSIST:
992e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_persist;
99385d94372SRobert Watson 			break;
99485d94372SRobert Watson 		case TT_KEEP:
995e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_keep;
99685d94372SRobert Watson 			break;
99785d94372SRobert Watson 		case TT_2MSL:
998e2f2059fSMike Silbersack 			t_callout = &tp->t_timers->tt_2msl;
99985d94372SRobert Watson 			break;
100085d94372SRobert Watson 		default:
100155bceb1eSRandall Stewart 			if (tp->t_fb->tfb_tcp_timer_active) {
100255bceb1eSRandall Stewart 				return(tp->t_fb->tfb_tcp_timer_active(tp, timer_type));
100355bceb1eSRandall Stewart 			}
100403374917SJulien Charbon 			panic("tp %p bad timer_type %#x", tp, timer_type);
100585d94372SRobert Watson 		}
100685d94372SRobert Watson 	return callout_active(t_callout);
1007df8bae1dSRodney W. Grimes }
1008b8614722SMike Silbersack 
100989e560f4SRandall Stewart /*
101089e560f4SRandall Stewart  * Stop the timer from running, and apply a flag
101189e560f4SRandall Stewart  * against the timer_flags that will force the
101289e560f4SRandall Stewart  * timer never to run. The flag is needed to assure
101389e560f4SRandall Stewart  * a race does not leave it running and cause
101489e560f4SRandall Stewart  * the timer to possibly restart itself (keep and persist
101589e560f4SRandall Stewart  * especially do this).
101689e560f4SRandall Stewart  */
101789e560f4SRandall Stewart int
101889e560f4SRandall Stewart tcp_timer_suspend(struct tcpcb *tp, uint32_t timer_type)
101989e560f4SRandall Stewart {
102089e560f4SRandall Stewart 	struct callout *t_callout;
102189e560f4SRandall Stewart 	uint32_t t_flags;
102289e560f4SRandall Stewart 
102389e560f4SRandall Stewart 	switch (timer_type) {
102489e560f4SRandall Stewart 		case TT_DELACK:
102589e560f4SRandall Stewart 			t_flags = TT_DELACK_SUS;
102689e560f4SRandall Stewart 			t_callout = &tp->t_timers->tt_delack;
102789e560f4SRandall Stewart 			break;
102889e560f4SRandall Stewart 		case TT_REXMT:
102989e560f4SRandall Stewart 			t_flags = TT_REXMT_SUS;
103089e560f4SRandall Stewart 			t_callout = &tp->t_timers->tt_rexmt;
103189e560f4SRandall Stewart 			break;
103289e560f4SRandall Stewart 		case TT_PERSIST:
103389e560f4SRandall Stewart 			t_flags = TT_PERSIST_SUS;
103489e560f4SRandall Stewart 			t_callout = &tp->t_timers->tt_persist;
103589e560f4SRandall Stewart 			break;
103689e560f4SRandall Stewart 		case TT_KEEP:
103789e560f4SRandall Stewart 			t_flags = TT_KEEP_SUS;
103889e560f4SRandall Stewart 			t_callout = &tp->t_timers->tt_keep;
103989e560f4SRandall Stewart 			break;
104089e560f4SRandall Stewart 		case TT_2MSL:
104189e560f4SRandall Stewart 			t_flags = TT_2MSL_SUS;
104289e560f4SRandall Stewart 			t_callout = &tp->t_timers->tt_2msl;
104389e560f4SRandall Stewart 			break;
104489e560f4SRandall Stewart 		default:
104589e560f4SRandall Stewart 			panic("tp:%p bad timer_type 0x%x", tp, timer_type);
104689e560f4SRandall Stewart 	}
104789e560f4SRandall Stewart 	tp->t_timers->tt_flags |= t_flags;
104889e560f4SRandall Stewart 	return (callout_stop(t_callout));
104989e560f4SRandall Stewart }
105089e560f4SRandall Stewart 
105189e560f4SRandall Stewart void
105289e560f4SRandall Stewart tcp_timers_unsuspend(struct tcpcb *tp, uint32_t timer_type)
105389e560f4SRandall Stewart {
105489e560f4SRandall Stewart 	switch (timer_type) {
105589e560f4SRandall Stewart 		case TT_DELACK:
105689e560f4SRandall Stewart 			if (tp->t_timers->tt_flags & TT_DELACK_SUS) {
105789e560f4SRandall Stewart 				tp->t_timers->tt_flags &= ~TT_DELACK_SUS;
105889e560f4SRandall Stewart 				if (tp->t_flags & TF_DELACK) {
105989e560f4SRandall Stewart 					/* Delayed ack timer should be up activate a timer */
106089e560f4SRandall Stewart 					tp->t_flags &= ~TF_DELACK;
106189e560f4SRandall Stewart 					tcp_timer_activate(tp, TT_DELACK,
106289e560f4SRandall Stewart 					    tcp_delacktime);
106389e560f4SRandall Stewart 				}
106489e560f4SRandall Stewart 			}
106589e560f4SRandall Stewart 			break;
106689e560f4SRandall Stewart 		case TT_REXMT:
106789e560f4SRandall Stewart 			if (tp->t_timers->tt_flags & TT_REXMT_SUS) {
106889e560f4SRandall Stewart 				tp->t_timers->tt_flags &= ~TT_REXMT_SUS;
106989e560f4SRandall Stewart 				if (SEQ_GT(tp->snd_max, tp->snd_una) &&
107089e560f4SRandall Stewart 				    (tcp_timer_active((tp), TT_PERSIST) == 0) &&
107189e560f4SRandall Stewart 				    tp->snd_wnd) {
107289e560f4SRandall Stewart 					/* We have outstanding data activate a timer */
107389e560f4SRandall Stewart 					tcp_timer_activate(tp, TT_REXMT,
107489e560f4SRandall Stewart                                             tp->t_rxtcur);
107589e560f4SRandall Stewart 				}
107689e560f4SRandall Stewart 			}
107789e560f4SRandall Stewart 			break;
107889e560f4SRandall Stewart 		case TT_PERSIST:
107989e560f4SRandall Stewart 			if (tp->t_timers->tt_flags & TT_PERSIST_SUS) {
108089e560f4SRandall Stewart 				tp->t_timers->tt_flags &= ~TT_PERSIST_SUS;
108189e560f4SRandall Stewart 				if (tp->snd_wnd == 0) {
108289e560f4SRandall Stewart 					/* Activate the persists timer */
108389e560f4SRandall Stewart 					tp->t_rxtshift = 0;
108489e560f4SRandall Stewart 					tcp_setpersist(tp);
108589e560f4SRandall Stewart 				}
108689e560f4SRandall Stewart 			}
108789e560f4SRandall Stewart 			break;
108889e560f4SRandall Stewart 		case TT_KEEP:
108989e560f4SRandall Stewart 			if (tp->t_timers->tt_flags & TT_KEEP_SUS) {
109089e560f4SRandall Stewart 				tp->t_timers->tt_flags &= ~TT_KEEP_SUS;
109189e560f4SRandall Stewart 				tcp_timer_activate(tp, TT_KEEP,
109289e560f4SRandall Stewart 					    TCPS_HAVEESTABLISHED(tp->t_state) ?
109389e560f4SRandall Stewart 					    TP_KEEPIDLE(tp) : TP_KEEPINIT(tp));
109489e560f4SRandall Stewart 			}
109589e560f4SRandall Stewart 			break;
109689e560f4SRandall Stewart 		case TT_2MSL:
109789e560f4SRandall Stewart 			if (tp->t_timers->tt_flags &= TT_2MSL_SUS) {
109889e560f4SRandall Stewart 				tp->t_timers->tt_flags &= ~TT_2MSL_SUS;
109989e560f4SRandall Stewart 				if ((tp->t_state == TCPS_FIN_WAIT_2) &&
110089e560f4SRandall Stewart 				    ((tp->t_inpcb->inp_socket == NULL) ||
110189e560f4SRandall Stewart 				     (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE))) {
110289e560f4SRandall Stewart 					/* Star the 2MSL timer */
110389e560f4SRandall Stewart 					tcp_timer_activate(tp, TT_2MSL,
110489e560f4SRandall Stewart 					    (tcp_fast_finwait2_recycle) ?
110589e560f4SRandall Stewart 					    tcp_finwait2_timeout : TP_MAXIDLE(tp));
110689e560f4SRandall Stewart 				}
110789e560f4SRandall Stewart 			}
110889e560f4SRandall Stewart 			break;
110989e560f4SRandall Stewart 		default:
111089e560f4SRandall Stewart 			panic("tp:%p bad timer_type 0x%x", tp, timer_type);
111189e560f4SRandall Stewart 	}
111289e560f4SRandall Stewart }
111389e560f4SRandall Stewart 
1114ff945008SGleb Smirnoff static void
1115ff945008SGleb Smirnoff tcp_timer_discard(void *ptp)
1116ff945008SGleb Smirnoff {
1117ff945008SGleb Smirnoff 	struct inpcb *inp;
1118ff945008SGleb Smirnoff 	struct tcpcb *tp;
1119ff945008SGleb Smirnoff 	struct epoch_tracker et;
1120ff945008SGleb Smirnoff 
1121ff945008SGleb Smirnoff 	tp = (struct tcpcb *)ptp;
1122ff945008SGleb Smirnoff 	CURVNET_SET(tp->t_vnet);
1123ff945008SGleb Smirnoff 	NET_EPOCH_ENTER(et);
1124ff945008SGleb Smirnoff 	inp = tp->t_inpcb;
1125ff945008SGleb Smirnoff 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
1126ff945008SGleb Smirnoff 		__func__, tp));
1127ff945008SGleb Smirnoff 	INP_WLOCK(inp);
1128ff945008SGleb Smirnoff 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0,
1129ff945008SGleb Smirnoff 		("%s: tcpcb has to be stopped here", __func__));
1130ff945008SGleb Smirnoff 	if (--tp->t_timers->tt_draincnt > 0 ||
1131ff945008SGleb Smirnoff 	    tcp_freecb(tp) == false)
1132ff945008SGleb Smirnoff 		INP_WUNLOCK(inp);
1133ff945008SGleb Smirnoff 	NET_EPOCH_EXIT(et);
1134ff945008SGleb Smirnoff 	CURVNET_RESTORE();
1135ff945008SGleb Smirnoff }
1136ff945008SGleb Smirnoff 
11375571f9cfSJulien Charbon void
11385571f9cfSJulien Charbon tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
11395571f9cfSJulien Charbon {
11405571f9cfSJulien Charbon 	struct callout *t_callout;
11415571f9cfSJulien Charbon 
11425571f9cfSJulien Charbon 	tp->t_timers->tt_flags |= TT_STOPPED;
11435571f9cfSJulien Charbon 	switch (timer_type) {
11445571f9cfSJulien Charbon 		case TT_DELACK:
11455571f9cfSJulien Charbon 			t_callout = &tp->t_timers->tt_delack;
11465571f9cfSJulien Charbon 			break;
11475571f9cfSJulien Charbon 		case TT_REXMT:
11485571f9cfSJulien Charbon 			t_callout = &tp->t_timers->tt_rexmt;
11495571f9cfSJulien Charbon 			break;
11505571f9cfSJulien Charbon 		case TT_PERSIST:
11515571f9cfSJulien Charbon 			t_callout = &tp->t_timers->tt_persist;
11525571f9cfSJulien Charbon 			break;
11535571f9cfSJulien Charbon 		case TT_KEEP:
11545571f9cfSJulien Charbon 			t_callout = &tp->t_timers->tt_keep;
11555571f9cfSJulien Charbon 			break;
11565571f9cfSJulien Charbon 		case TT_2MSL:
11575571f9cfSJulien Charbon 			t_callout = &tp->t_timers->tt_2msl;
11585571f9cfSJulien Charbon 			break;
11595571f9cfSJulien Charbon 		default:
116055bceb1eSRandall Stewart 			if (tp->t_fb->tfb_tcp_timer_stop) {
116155bceb1eSRandall Stewart 				/*
116255bceb1eSRandall Stewart 				 * XXXrrs we need to look at this with the
116355bceb1eSRandall Stewart 				 * stop case below (flags).
116455bceb1eSRandall Stewart 				 */
116555bceb1eSRandall Stewart 				tp->t_fb->tfb_tcp_timer_stop(tp, timer_type);
116655bceb1eSRandall Stewart 				return;
116755bceb1eSRandall Stewart 			}
11685571f9cfSJulien Charbon 			panic("tp %p bad timer_type %#x", tp, timer_type);
11695571f9cfSJulien Charbon 		}
11705571f9cfSJulien Charbon 
1171e5ad6456SRandall Stewart 	if (callout_async_drain(t_callout, tcp_timer_discard) == 0) {
11725571f9cfSJulien Charbon 		/*
11735571f9cfSJulien Charbon 		 * Can't stop the callout, defer tcpcb actual deletion
1174e5ad6456SRandall Stewart 		 * to the last one. We do this using the async drain
1175e5ad6456SRandall Stewart 		 * function and incrementing the count in
11765571f9cfSJulien Charbon 		 */
1177e5ad6456SRandall Stewart 		tp->t_timers->tt_draincnt++;
11785571f9cfSJulien Charbon 	}
11795571f9cfSJulien Charbon }
1180