xref: /freebsd/sys/netinet/tcp_timer.c (revision 1b647f44bcb39b5249e197f1c8d028a7304b44cd)
1 /*-
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
30  * $FreeBSD$
31  */
32 
33 #include "opt_inet6.h"
34 #include "opt_tcpdebug.h"
35 
36 #include <sys/param.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/mbuf.h>
40 #include <sys/mutex.h>
41 #include <sys/protosw.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46 
47 #include <net/route.h>
48 
49 #include <netinet/in.h>
50 #include <netinet/in_pcb.h>
51 #include <netinet/in_systm.h>
52 #ifdef INET6
53 #include <netinet6/in6_pcb.h>
54 #endif
55 #include <netinet/ip_var.h>
56 #include <netinet/tcp.h>
57 #include <netinet/tcp_fsm.h>
58 #include <netinet/tcp_timer.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcpip.h>
61 #ifdef TCPDEBUG
62 #include <netinet/tcp_debug.h>
63 #endif
64 
65 int	tcp_keepinit;
66 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
67     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "");
68 
69 int	tcp_keepidle;
70 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
71     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "");
72 
73 int	tcp_keepintvl;
74 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
75     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "");
76 
77 int	tcp_delacktime;
78 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
79     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
80     "Time before a delayed ACK is sent");
81 
82 int	tcp_msl;
83 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
84     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
85 
86 int	tcp_rexmit_min;
87 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
88     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
89     "Minimum Retransmission Timeout");
90 
91 int	tcp_rexmit_slop;
92 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
93     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
94     "Retransmission Timer Slop");
95 
96 static int	always_keepalive = 1;
97 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
98     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
99 
100 int    tcp_fast_finwait2_recycle = 0;
101 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
102     &tcp_fast_finwait2_recycle, 0,
103     "Recycle closed FIN_WAIT_2 connections faster");
104 
105 int    tcp_finwait2_timeout;
106 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
107     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
108 
109 
110 static int	tcp_keepcnt = TCPTV_KEEPCNT;
111 	/* max idle probes */
112 int	tcp_maxpersistidle;
113 	/* max idle time in persist */
114 int	tcp_maxidle;
115 
116 /*
117  * Tcp protocol timeout routine called every 500 ms.
118  * Updates timestamps used for TCP
119  * causes finite state machine actions if timers expire.
120  */
121 void
122 tcp_slowtimo(void)
123 {
124 
125 	tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
126 	INP_INFO_WLOCK(&tcbinfo);
127 	(void) tcp_tw_2msl_scan(0);
128 	INP_INFO_WUNLOCK(&tcbinfo);
129 }
130 
131 int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
132     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
133 
134 int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
135     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
136 
137 static int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
138 
139 static int tcp_timer_race;
140 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
141     0, "Count of t_inpcb races on tcp_discardcb");
142 
143 /*
144  * TCP timer processing.
145  */
146 
147 void
148 tcp_timer_delack(void *xtp)
149 {
150 	struct tcpcb *tp = xtp;
151 	struct inpcb *inp;
152 
153 	INP_INFO_RLOCK(&tcbinfo);
154 	inp = tp->t_inpcb;
155 	/*
156 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
157 	 * tear-down mean we need it as a work-around for races between
158 	 * timers and tcp_discardcb().
159 	 *
160 	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
161 	 */
162 	if (inp == NULL) {
163 		tcp_timer_race++;
164 		INP_INFO_RUNLOCK(&tcbinfo);
165 		return;
166 	}
167 	INP_LOCK(inp);
168 	INP_INFO_RUNLOCK(&tcbinfo);
169 	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack)
170 	    || !callout_active(&tp->t_timers->tt_delack)) {
171 		INP_UNLOCK(inp);
172 		return;
173 	}
174 	callout_deactivate(&tp->t_timers->tt_delack);
175 
176 	tp->t_flags |= TF_ACKNOW;
177 	tcpstat.tcps_delack++;
178 	(void) tcp_output(tp);
179 	INP_UNLOCK(inp);
180 }
181 
182 void
183 tcp_timer_2msl(void *xtp)
184 {
185 	struct tcpcb *tp = xtp;
186 	struct inpcb *inp;
187 #ifdef TCPDEBUG
188 	int ostate;
189 
190 	ostate = tp->t_state;
191 #endif
192 	/*
193 	 * XXXRW: Does this actually happen?
194 	 */
195 	INP_INFO_WLOCK(&tcbinfo);
196 	inp = tp->t_inpcb;
197 	/*
198 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
199 	 * tear-down mean we need it as a work-around for races between
200 	 * timers and tcp_discardcb().
201 	 *
202 	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
203 	 */
204 	if (inp == NULL) {
205 		tcp_timer_race++;
206 		INP_INFO_WUNLOCK(&tcbinfo);
207 		return;
208 	}
209 	INP_LOCK(inp);
210 	tcp_free_sackholes(tp);
211 	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) ||
212 	    !callout_active(&tp->t_timers->tt_2msl)) {
213 		INP_UNLOCK(tp->t_inpcb);
214 		INP_INFO_WUNLOCK(&tcbinfo);
215 		return;
216 	}
217 	callout_deactivate(&tp->t_timers->tt_2msl);
218 	/*
219 	 * 2 MSL timeout in shutdown went off.  If we're closed but
220 	 * still waiting for peer to close and connection has been idle
221 	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
222 	 * control block.  Otherwise, check again in a bit.
223 	 *
224 	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
225 	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
226 	 * Ignore fact that there were recent incoming segments.
227 	 */
228 	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
229 	    tp->t_inpcb && tp->t_inpcb->inp_socket &&
230 	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
231 		tcpstat.tcps_finwait2_drops++;
232 		tp = tcp_close(tp);
233 	} else {
234 		if (tp->t_state != TCPS_TIME_WAIT &&
235 		   (ticks - tp->t_rcvtime) <= tcp_maxidle)
236 		       callout_reset(&tp->t_timers->tt_2msl, tcp_keepintvl,
237 				     tcp_timer_2msl, tp);
238 	       else
239 		       tp = tcp_close(tp);
240        }
241 
242 #ifdef TCPDEBUG
243 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
244 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
245 			  PRU_SLOWTIMO);
246 #endif
247 	if (tp != NULL)
248 		INP_UNLOCK(inp);
249 	INP_INFO_WUNLOCK(&tcbinfo);
250 }
251 
252 void
253 tcp_timer_keep(void *xtp)
254 {
255 	struct tcpcb *tp = xtp;
256 	struct tcptemp *t_template;
257 	struct inpcb *inp;
258 #ifdef TCPDEBUG
259 	int ostate;
260 
261 	ostate = tp->t_state;
262 #endif
263 	INP_INFO_WLOCK(&tcbinfo);
264 	inp = tp->t_inpcb;
265 	/*
266 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
267 	 * tear-down mean we need it as a work-around for races between
268 	 * timers and tcp_discardcb().
269 	 *
270 	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
271 	 */
272 	if (inp == NULL) {
273 		tcp_timer_race++;
274 		INP_INFO_WUNLOCK(&tcbinfo);
275 		return;
276 	}
277 	INP_LOCK(inp);
278 	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep)
279 	    || !callout_active(&tp->t_timers->tt_keep)) {
280 		INP_UNLOCK(inp);
281 		INP_INFO_WUNLOCK(&tcbinfo);
282 		return;
283 	}
284 	callout_deactivate(&tp->t_timers->tt_keep);
285 	/*
286 	 * Keep-alive timer went off; send something
287 	 * or drop connection if idle for too long.
288 	 */
289 	tcpstat.tcps_keeptimeo++;
290 	if (tp->t_state < TCPS_ESTABLISHED)
291 		goto dropit;
292 	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
293 	    tp->t_state <= TCPS_CLOSING) {
294 		if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle)
295 			goto dropit;
296 		/*
297 		 * Send a packet designed to force a response
298 		 * if the peer is up and reachable:
299 		 * either an ACK if the connection is still alive,
300 		 * or an RST if the peer has closed the connection
301 		 * due to timeout or reboot.
302 		 * Using sequence number tp->snd_una-1
303 		 * causes the transmitted zero-length segment
304 		 * to lie outside the receive window;
305 		 * by the protocol spec, this requires the
306 		 * correspondent TCP to respond.
307 		 */
308 		tcpstat.tcps_keepprobe++;
309 		t_template = tcpip_maketemplate(inp);
310 		if (t_template) {
311 			tcp_respond(tp, t_template->tt_ipgen,
312 				    &t_template->tt_t, (struct mbuf *)NULL,
313 				    tp->rcv_nxt, tp->snd_una - 1, 0);
314 			(void) m_free(dtom(t_template));
315 		}
316 		callout_reset(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
317 	} else
318 		callout_reset(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
319 
320 #ifdef TCPDEBUG
321 	if (inp->inp_socket->so_options & SO_DEBUG)
322 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
323 			  PRU_SLOWTIMO);
324 #endif
325 	INP_UNLOCK(inp);
326 	INP_INFO_WUNLOCK(&tcbinfo);
327 	return;
328 
329 dropit:
330 	tcpstat.tcps_keepdrops++;
331 	tp = tcp_drop(tp, ETIMEDOUT);
332 
333 #ifdef TCPDEBUG
334 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
335 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
336 			  PRU_SLOWTIMO);
337 #endif
338 	if (tp != NULL)
339 		INP_UNLOCK(tp->t_inpcb);
340 	INP_INFO_WUNLOCK(&tcbinfo);
341 }
342 
343 void
344 tcp_timer_persist(void *xtp)
345 {
346 	struct tcpcb *tp = xtp;
347 	struct inpcb *inp;
348 #ifdef TCPDEBUG
349 	int ostate;
350 
351 	ostate = tp->t_state;
352 #endif
353 	INP_INFO_WLOCK(&tcbinfo);
354 	inp = tp->t_inpcb;
355 	/*
356 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
357 	 * tear-down mean we need it as a work-around for races between
358 	 * timers and tcp_discardcb().
359 	 *
360 	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
361 	 */
362 	if (inp == NULL) {
363 		tcp_timer_race++;
364 		INP_INFO_WUNLOCK(&tcbinfo);
365 		return;
366 	}
367 	INP_LOCK(inp);
368 	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist)
369 	    || !callout_active(&tp->t_timers->tt_persist)) {
370 		INP_UNLOCK(inp);
371 		INP_INFO_WUNLOCK(&tcbinfo);
372 		return;
373 	}
374 	callout_deactivate(&tp->t_timers->tt_persist);
375 	/*
376 	 * Persistance timer into zero window.
377 	 * Force a byte to be output, if possible.
378 	 */
379 	tcpstat.tcps_persisttimeo++;
380 	/*
381 	 * Hack: if the peer is dead/unreachable, we do not
382 	 * time out if the window is closed.  After a full
383 	 * backoff, drop the connection if the idle time
384 	 * (no responses to probes) reaches the maximum
385 	 * backoff that we would use if retransmitting.
386 	 */
387 	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
388 	    ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle ||
389 	     (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
390 		tcpstat.tcps_persistdrop++;
391 		tp = tcp_drop(tp, ETIMEDOUT);
392 		goto out;
393 	}
394 	tcp_setpersist(tp);
395 	tp->t_flags |= TF_FORCEDATA;
396 	(void) tcp_output(tp);
397 	tp->t_flags &= ~TF_FORCEDATA;
398 
399 out:
400 #ifdef TCPDEBUG
401 	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
402 		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
403 #endif
404 	if (tp != NULL)
405 		INP_UNLOCK(inp);
406 	INP_INFO_WUNLOCK(&tcbinfo);
407 }
408 
409 void
410 tcp_timer_rexmt(void * xtp)
411 {
412 	struct tcpcb *tp = xtp;
413 	int rexmt;
414 	int headlocked;
415 	struct inpcb *inp;
416 #ifdef TCPDEBUG
417 	int ostate;
418 
419 	ostate = tp->t_state;
420 #endif
421 	INP_INFO_WLOCK(&tcbinfo);
422 	headlocked = 1;
423 	inp = tp->t_inpcb;
424 	/*
425 	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
426 	 * tear-down mean we need it as a work-around for races between
427 	 * timers and tcp_discardcb().
428 	 *
429 	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
430 	 */
431 	if (inp == NULL) {
432 		tcp_timer_race++;
433 		INP_INFO_WUNLOCK(&tcbinfo);
434 		return;
435 	}
436 	INP_LOCK(inp);
437 	if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt)
438 	    || !callout_active(&tp->t_timers->tt_rexmt)) {
439 		INP_UNLOCK(inp);
440 		INP_INFO_WUNLOCK(&tcbinfo);
441 		return;
442 	}
443 	callout_deactivate(&tp->t_timers->tt_rexmt);
444 	tcp_free_sackholes(tp);
445 	/*
446 	 * Retransmission timer went off.  Message has not
447 	 * been acked within retransmit interval.  Back off
448 	 * to a longer retransmit interval and retransmit one segment.
449 	 */
450 	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
451 		tp->t_rxtshift = TCP_MAXRXTSHIFT;
452 		tcpstat.tcps_timeoutdrop++;
453 		tp = tcp_drop(tp, tp->t_softerror ?
454 			      tp->t_softerror : ETIMEDOUT);
455 		goto out;
456 	}
457 	INP_INFO_WUNLOCK(&tcbinfo);
458 	headlocked = 0;
459 	if (tp->t_rxtshift == 1) {
460 		/*
461 		 * first retransmit; record ssthresh and cwnd so they can
462 		 * be recovered if this turns out to be a "bad" retransmit.
463 		 * A retransmit is considered "bad" if an ACK for this
464 		 * segment is received within RTT/2 interval; the assumption
465 		 * here is that the ACK was already in flight.  See
466 		 * "On Estimating End-to-End Network Path Properties" by
467 		 * Allman and Paxson for more details.
468 		 */
469 		tp->snd_cwnd_prev = tp->snd_cwnd;
470 		tp->snd_ssthresh_prev = tp->snd_ssthresh;
471 		tp->snd_recover_prev = tp->snd_recover;
472 		if (IN_FASTRECOVERY(tp))
473 		  tp->t_flags |= TF_WASFRECOVERY;
474 		else
475 		  tp->t_flags &= ~TF_WASFRECOVERY;
476 		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
477 	}
478 	tcpstat.tcps_rexmttimeo++;
479 	if (tp->t_state == TCPS_SYN_SENT)
480 		rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
481 	else
482 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
483 	TCPT_RANGESET(tp->t_rxtcur, rexmt,
484 		      tp->t_rttmin, TCPTV_REXMTMAX);
485 	/*
486 	 * Disable rfc1323 if we havn't got any response to
487 	 * our third SYN to work-around some broken terminal servers
488 	 * (most of which have hopefully been retired) that have bad VJ
489 	 * header compression code which trashes TCP segments containing
490 	 * unknown-to-them TCP options.
491 	 */
492 	if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3))
493 		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP);
494 	/*
495 	 * If we backed off this far, our srtt estimate is probably bogus.
496 	 * Clobber it so we'll take the next rtt measurement as our srtt;
497 	 * move the current srtt into rttvar to keep the current
498 	 * retransmit times until then.
499 	 */
500 	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
501 #ifdef INET6
502 		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
503 			in6_losing(tp->t_inpcb);
504 		else
505 #endif
506 		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
507 		tp->t_srtt = 0;
508 	}
509 	tp->snd_nxt = tp->snd_una;
510 	tp->snd_recover = tp->snd_max;
511 	/*
512 	 * Force a segment to be sent.
513 	 */
514 	tp->t_flags |= TF_ACKNOW;
515 	/*
516 	 * If timing a segment in this window, stop the timer.
517 	 */
518 	tp->t_rtttime = 0;
519 	/*
520 	 * Close the congestion window down to one segment
521 	 * (we'll open it by one segment for each ack we get).
522 	 * Since we probably have a window's worth of unacked
523 	 * data accumulated, this "slow start" keeps us from
524 	 * dumping all that data as back-to-back packets (which
525 	 * might overwhelm an intermediate gateway).
526 	 *
527 	 * There are two phases to the opening: Initially we
528 	 * open by one mss on each ack.  This makes the window
529 	 * size increase exponentially with time.  If the
530 	 * window is larger than the path can handle, this
531 	 * exponential growth results in dropped packet(s)
532 	 * almost immediately.  To get more time between
533 	 * drops but still "push" the network to take advantage
534 	 * of improving conditions, we switch from exponential
535 	 * to linear window opening at some threshhold size.
536 	 * For a threshhold, we use half the current window
537 	 * size, truncated to a multiple of the mss.
538 	 *
539 	 * (the minimum cwnd that will give us exponential
540 	 * growth is 2 mss.  We don't allow the threshhold
541 	 * to go below this.)
542 	 */
543 	{
544 		u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
545 		if (win < 2)
546 			win = 2;
547 		tp->snd_cwnd = tp->t_maxseg;
548 		tp->snd_ssthresh = win * tp->t_maxseg;
549 		tp->t_dupacks = 0;
550 	}
551 	EXIT_FASTRECOVERY(tp);
552 	(void) tcp_output(tp);
553 
554 out:
555 #ifdef TCPDEBUG
556 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
557 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
558 			  PRU_SLOWTIMO);
559 #endif
560 	if (tp != NULL)
561 		INP_UNLOCK(inp);
562 	if (headlocked)
563 		INP_INFO_WUNLOCK(&tcbinfo);
564 }
565 
566 void
567 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
568 {
569 	struct callout *t_callout;
570 	void *f_callout;
571 
572 	switch (timer_type) {
573 		case TT_DELACK:
574 			t_callout = &tp->t_timers->tt_delack;
575 			f_callout = tcp_timer_delack;
576 			break;
577 		case TT_REXMT:
578 			t_callout = &tp->t_timers->tt_rexmt;
579 			f_callout = tcp_timer_rexmt;
580 			break;
581 		case TT_PERSIST:
582 			t_callout = &tp->t_timers->tt_persist;
583 			f_callout = tcp_timer_persist;
584 			break;
585 		case TT_KEEP:
586 			t_callout = &tp->t_timers->tt_keep;
587 			f_callout = tcp_timer_keep;
588 			break;
589 		case TT_2MSL:
590 			t_callout = &tp->t_timers->tt_2msl;
591 			f_callout = tcp_timer_2msl;
592 			break;
593 		default:
594 			panic("bad timer_type");
595 		}
596 	if (delta == 0) {
597 		callout_stop(t_callout);
598 	} else {
599 		callout_reset(t_callout, delta, f_callout, tp);
600 	}
601 }
602 
603 int
604 tcp_timer_active(struct tcpcb *tp, int timer_type)
605 {
606 	struct callout *t_callout;
607 
608 	switch (timer_type) {
609 		case TT_DELACK:
610 			t_callout = &tp->t_timers->tt_delack;
611 			break;
612 		case TT_REXMT:
613 			t_callout = &tp->t_timers->tt_rexmt;
614 			break;
615 		case TT_PERSIST:
616 			t_callout = &tp->t_timers->tt_persist;
617 			break;
618 		case TT_KEEP:
619 			t_callout = &tp->t_timers->tt_keep;
620 			break;
621 		case TT_2MSL:
622 			t_callout = &tp->t_timers->tt_2msl;
623 			break;
624 		default:
625 			panic("bad timer_type");
626 		}
627 	return callout_active(t_callout);
628 }
629