1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30 * $FreeBSD$ 31 */ 32 33 #include "opt_inet6.h" 34 #include "opt_tcpdebug.h" 35 #include "opt_tcp_sack.h" 36 37 #include <sys/param.h> 38 #include <sys/kernel.h> 39 #include <sys/lock.h> 40 #include <sys/mbuf.h> 41 #include <sys/mutex.h> 42 #include <sys/protosw.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/sysctl.h> 46 #include <sys/systm.h> 47 48 #include <net/route.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_pcb.h> 52 #include <netinet/in_systm.h> 53 #ifdef INET6 54 #include <netinet6/in6_pcb.h> 55 #endif 56 #include <netinet/ip_var.h> 57 #include <netinet/tcp.h> 58 #include <netinet/tcp_fsm.h> 59 #include <netinet/tcp_timer.h> 60 #include <netinet/tcp_var.h> 61 #include <netinet/tcpip.h> 62 #ifdef TCPDEBUG 63 #include <netinet/tcp_debug.h> 64 #endif 65 66 int tcp_keepinit; 67 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 68 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); 69 70 int tcp_keepidle; 71 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 72 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); 73 74 int tcp_keepintvl; 75 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 76 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); 77 78 int tcp_delacktime; 79 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 80 &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 81 "Time before a delayed ACK is sent"); 82 83 int tcp_msl; 84 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 85 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 86 87 int tcp_rexmit_min; 88 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 89 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 90 "Minimum Retransmission Timeout"); 91 92 int tcp_rexmit_slop; 93 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 94 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 95 "Retransmission Timer Slop"); 96 97 static int always_keepalive = 1; 98 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 99 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 100 101 int tcp_fast_finwait2_recycle = 0; 102 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 103 &tcp_fast_finwait2_recycle, 0, 104 "Recycle closed FIN_WAIT_2 connections faster"); 105 106 int tcp_finwait2_timeout; 107 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 108 &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 109 110 111 static int tcp_keepcnt = TCPTV_KEEPCNT; 112 /* max idle probes */ 113 int tcp_maxpersistidle; 114 /* max idle time in persist */ 115 int tcp_maxidle; 116 117 /* 118 * Tcp protocol timeout routine called every 500 ms. 119 * Updates timestamps used for TCP 120 * causes finite state machine actions if timers expire. 121 */ 122 void 123 tcp_slowtimo() 124 { 125 126 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 127 INP_INFO_WLOCK(&tcbinfo); 128 (void) tcp_timer_2msl_tw(0); 129 INP_INFO_WUNLOCK(&tcbinfo); 130 } 131 132 int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 133 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 134 135 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 136 { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 137 138 static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 139 140 static int tcp_timer_race; 141 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 142 0, "Count of t_inpcb races on tcp_discardcb"); 143 144 /* 145 * TCP timer processing. 146 */ 147 148 void 149 tcp_timer_delack(void *xtp) 150 { 151 struct tcpcb *tp = xtp; 152 struct inpcb *inp; 153 154 INP_INFO_RLOCK(&tcbinfo); 155 inp = tp->t_inpcb; 156 /* 157 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 158 * tear-down mean we need it as a work-around for races between 159 * timers and tcp_discardcb(). 160 * 161 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL")); 162 */ 163 if (inp == NULL) { 164 tcp_timer_race++; 165 INP_INFO_RUNLOCK(&tcbinfo); 166 return; 167 } 168 INP_LOCK(inp); 169 INP_INFO_RUNLOCK(&tcbinfo); 170 if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_delack) 171 || !callout_active(tp->tt_delack)) { 172 INP_UNLOCK(inp); 173 return; 174 } 175 callout_deactivate(tp->tt_delack); 176 177 tp->t_flags |= TF_ACKNOW; 178 tcpstat.tcps_delack++; 179 (void) tcp_output(tp); 180 INP_UNLOCK(inp); 181 } 182 183 void 184 tcp_timer_2msl(void *xtp) 185 { 186 struct tcpcb *tp = xtp; 187 struct inpcb *inp; 188 #ifdef TCPDEBUG 189 int ostate; 190 191 ostate = tp->t_state; 192 #endif 193 /* 194 * XXXRW: Does this actually happen? 195 */ 196 INP_INFO_WLOCK(&tcbinfo); 197 inp = tp->t_inpcb; 198 /* 199 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 200 * tear-down mean we need it as a work-around for races between 201 * timers and tcp_discardcb(). 202 * 203 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL")); 204 */ 205 if (inp == NULL) { 206 tcp_timer_race++; 207 INP_INFO_WUNLOCK(&tcbinfo); 208 return; 209 } 210 INP_LOCK(inp); 211 tcp_free_sackholes(tp); 212 if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_2msl) || 213 !callout_active(tp->tt_2msl)) { 214 INP_UNLOCK(tp->t_inpcb); 215 INP_INFO_WUNLOCK(&tcbinfo); 216 return; 217 } 218 callout_deactivate(tp->tt_2msl); 219 /* 220 * 2 MSL timeout in shutdown went off. If we're closed but 221 * still waiting for peer to close and connection has been idle 222 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 223 * control block. Otherwise, check again in a bit. 224 * 225 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 226 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 227 * Ignore fact that there were recent incoming segments. 228 */ 229 if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 230 tp->t_inpcb && tp->t_inpcb->inp_socket && 231 (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 232 tcpstat.tcps_finwait2_drops++; 233 tp = tcp_close(tp); 234 } else { 235 if (tp->t_state != TCPS_TIME_WAIT && 236 (ticks - tp->t_rcvtime) <= tcp_maxidle) 237 callout_reset(tp->tt_2msl, tcp_keepintvl, 238 tcp_timer_2msl, tp); 239 else 240 tp = tcp_close(tp); 241 } 242 243 #ifdef TCPDEBUG 244 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 245 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 246 PRU_SLOWTIMO); 247 #endif 248 if (tp != NULL) 249 INP_UNLOCK(inp); 250 INP_INFO_WUNLOCK(&tcbinfo); 251 } 252 253 /* 254 * The timed wait queue contains references to each of the TCP sessions 255 * currently in the TIME_WAIT state. The queue pointers, including the 256 * queue pointers in each tcptw structure, are protected using the global 257 * tcbinfo lock, which must be held over queue iteration and modification. 258 */ 259 static TAILQ_HEAD(, tcptw) twq_2msl; 260 261 void 262 tcp_timer_init(void) 263 { 264 265 TAILQ_INIT(&twq_2msl); 266 } 267 268 void 269 tcp_timer_2msl_reset(struct tcptw *tw, int rearm) 270 { 271 272 INP_INFO_WLOCK_ASSERT(&tcbinfo); 273 INP_LOCK_ASSERT(tw->tw_inpcb); 274 if (rearm) 275 TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); 276 tw->tw_time = ticks + 2 * tcp_msl; 277 TAILQ_INSERT_TAIL(&twq_2msl, tw, tw_2msl); 278 } 279 280 void 281 tcp_timer_2msl_stop(struct tcptw *tw) 282 { 283 284 INP_INFO_WLOCK_ASSERT(&tcbinfo); 285 TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); 286 } 287 288 struct tcptw * 289 tcp_timer_2msl_tw(int reuse) 290 { 291 struct tcptw *tw; 292 293 INP_INFO_WLOCK_ASSERT(&tcbinfo); 294 for (;;) { 295 tw = TAILQ_FIRST(&twq_2msl); 296 if (tw == NULL || (!reuse && tw->tw_time > ticks)) 297 break; 298 INP_LOCK(tw->tw_inpcb); 299 tcp_twclose(tw, reuse); 300 if (reuse) 301 return (tw); 302 } 303 return (NULL); 304 } 305 306 void 307 tcp_timer_keep(void *xtp) 308 { 309 struct tcpcb *tp = xtp; 310 struct tcptemp *t_template; 311 struct inpcb *inp; 312 #ifdef TCPDEBUG 313 int ostate; 314 315 ostate = tp->t_state; 316 #endif 317 INP_INFO_WLOCK(&tcbinfo); 318 inp = tp->t_inpcb; 319 /* 320 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 321 * tear-down mean we need it as a work-around for races between 322 * timers and tcp_discardcb(). 323 * 324 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL")); 325 */ 326 if (inp == NULL) { 327 tcp_timer_race++; 328 INP_INFO_WUNLOCK(&tcbinfo); 329 return; 330 } 331 INP_LOCK(inp); 332 if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_keep) 333 || !callout_active(tp->tt_keep)) { 334 INP_UNLOCK(inp); 335 INP_INFO_WUNLOCK(&tcbinfo); 336 return; 337 } 338 callout_deactivate(tp->tt_keep); 339 /* 340 * Keep-alive timer went off; send something 341 * or drop connection if idle for too long. 342 */ 343 tcpstat.tcps_keeptimeo++; 344 if (tp->t_state < TCPS_ESTABLISHED) 345 goto dropit; 346 if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 347 tp->t_state <= TCPS_CLOSING) { 348 if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle) 349 goto dropit; 350 /* 351 * Send a packet designed to force a response 352 * if the peer is up and reachable: 353 * either an ACK if the connection is still alive, 354 * or an RST if the peer has closed the connection 355 * due to timeout or reboot. 356 * Using sequence number tp->snd_una-1 357 * causes the transmitted zero-length segment 358 * to lie outside the receive window; 359 * by the protocol spec, this requires the 360 * correspondent TCP to respond. 361 */ 362 tcpstat.tcps_keepprobe++; 363 t_template = tcpip_maketemplate(inp); 364 if (t_template) { 365 tcp_respond(tp, t_template->tt_ipgen, 366 &t_template->tt_t, (struct mbuf *)NULL, 367 tp->rcv_nxt, tp->snd_una - 1, 0); 368 (void) m_free(dtom(t_template)); 369 } 370 callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp); 371 } else 372 callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp); 373 374 #ifdef TCPDEBUG 375 if (inp->inp_socket->so_options & SO_DEBUG) 376 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 377 PRU_SLOWTIMO); 378 #endif 379 INP_UNLOCK(inp); 380 INP_INFO_WUNLOCK(&tcbinfo); 381 return; 382 383 dropit: 384 tcpstat.tcps_keepdrops++; 385 tp = tcp_drop(tp, ETIMEDOUT); 386 387 #ifdef TCPDEBUG 388 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 389 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 390 PRU_SLOWTIMO); 391 #endif 392 if (tp != NULL) 393 INP_UNLOCK(tp->t_inpcb); 394 INP_INFO_WUNLOCK(&tcbinfo); 395 } 396 397 void 398 tcp_timer_persist(void *xtp) 399 { 400 struct tcpcb *tp = xtp; 401 struct inpcb *inp; 402 #ifdef TCPDEBUG 403 int ostate; 404 405 ostate = tp->t_state; 406 #endif 407 INP_INFO_WLOCK(&tcbinfo); 408 inp = tp->t_inpcb; 409 /* 410 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 411 * tear-down mean we need it as a work-around for races between 412 * timers and tcp_discardcb(). 413 * 414 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL")); 415 */ 416 if (inp == NULL) { 417 tcp_timer_race++; 418 INP_INFO_WUNLOCK(&tcbinfo); 419 return; 420 } 421 INP_LOCK(inp); 422 if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_persist) 423 || !callout_active(tp->tt_persist)) { 424 INP_UNLOCK(inp); 425 INP_INFO_WUNLOCK(&tcbinfo); 426 return; 427 } 428 callout_deactivate(tp->tt_persist); 429 /* 430 * Persistance timer into zero window. 431 * Force a byte to be output, if possible. 432 */ 433 tcpstat.tcps_persisttimeo++; 434 /* 435 * Hack: if the peer is dead/unreachable, we do not 436 * time out if the window is closed. After a full 437 * backoff, drop the connection if the idle time 438 * (no responses to probes) reaches the maximum 439 * backoff that we would use if retransmitting. 440 */ 441 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 442 ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 443 (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 444 tcpstat.tcps_persistdrop++; 445 tp = tcp_drop(tp, ETIMEDOUT); 446 goto out; 447 } 448 tcp_setpersist(tp); 449 tp->t_flags |= TF_FORCEDATA; 450 (void) tcp_output(tp); 451 tp->t_flags &= ~TF_FORCEDATA; 452 453 out: 454 #ifdef TCPDEBUG 455 if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 456 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 457 #endif 458 if (tp != NULL) 459 INP_UNLOCK(inp); 460 INP_INFO_WUNLOCK(&tcbinfo); 461 } 462 463 void 464 tcp_timer_rexmt(void * xtp) 465 { 466 struct tcpcb *tp = xtp; 467 int rexmt; 468 int headlocked; 469 struct inpcb *inp; 470 #ifdef TCPDEBUG 471 int ostate; 472 473 ostate = tp->t_state; 474 #endif 475 INP_INFO_WLOCK(&tcbinfo); 476 headlocked = 1; 477 inp = tp->t_inpcb; 478 /* 479 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 480 * tear-down mean we need it as a work-around for races between 481 * timers and tcp_discardcb(). 482 * 483 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL")); 484 */ 485 if (inp == NULL) { 486 tcp_timer_race++; 487 INP_INFO_WUNLOCK(&tcbinfo); 488 return; 489 } 490 INP_LOCK(inp); 491 if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_rexmt) 492 || !callout_active(tp->tt_rexmt)) { 493 INP_UNLOCK(inp); 494 INP_INFO_WUNLOCK(&tcbinfo); 495 return; 496 } 497 callout_deactivate(tp->tt_rexmt); 498 tcp_free_sackholes(tp); 499 /* 500 * Retransmission timer went off. Message has not 501 * been acked within retransmit interval. Back off 502 * to a longer retransmit interval and retransmit one segment. 503 */ 504 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 505 tp->t_rxtshift = TCP_MAXRXTSHIFT; 506 tcpstat.tcps_timeoutdrop++; 507 tp = tcp_drop(tp, tp->t_softerror ? 508 tp->t_softerror : ETIMEDOUT); 509 goto out; 510 } 511 INP_INFO_WUNLOCK(&tcbinfo); 512 headlocked = 0; 513 if (tp->t_rxtshift == 1) { 514 /* 515 * first retransmit; record ssthresh and cwnd so they can 516 * be recovered if this turns out to be a "bad" retransmit. 517 * A retransmit is considered "bad" if an ACK for this 518 * segment is received within RTT/2 interval; the assumption 519 * here is that the ACK was already in flight. See 520 * "On Estimating End-to-End Network Path Properties" by 521 * Allman and Paxson for more details. 522 */ 523 tp->snd_cwnd_prev = tp->snd_cwnd; 524 tp->snd_ssthresh_prev = tp->snd_ssthresh; 525 tp->snd_recover_prev = tp->snd_recover; 526 if (IN_FASTRECOVERY(tp)) 527 tp->t_flags |= TF_WASFRECOVERY; 528 else 529 tp->t_flags &= ~TF_WASFRECOVERY; 530 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 531 } 532 tcpstat.tcps_rexmttimeo++; 533 if (tp->t_state == TCPS_SYN_SENT) 534 rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; 535 else 536 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 537 TCPT_RANGESET(tp->t_rxtcur, rexmt, 538 tp->t_rttmin, TCPTV_REXMTMAX); 539 /* 540 * Disable rfc1323 if we havn't got any response to 541 * our third SYN to work-around some broken terminal servers 542 * (most of which have hopefully been retired) that have bad VJ 543 * header compression code which trashes TCP segments containing 544 * unknown-to-them TCP options. 545 */ 546 if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 547 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP); 548 /* 549 * If we backed off this far, our srtt estimate is probably bogus. 550 * Clobber it so we'll take the next rtt measurement as our srtt; 551 * move the current srtt into rttvar to keep the current 552 * retransmit times until then. 553 */ 554 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 555 #ifdef INET6 556 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 557 in6_losing(tp->t_inpcb); 558 else 559 #endif 560 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 561 tp->t_srtt = 0; 562 } 563 tp->snd_nxt = tp->snd_una; 564 tp->snd_recover = tp->snd_max; 565 /* 566 * Force a segment to be sent. 567 */ 568 tp->t_flags |= TF_ACKNOW; 569 /* 570 * If timing a segment in this window, stop the timer. 571 */ 572 tp->t_rtttime = 0; 573 /* 574 * Close the congestion window down to one segment 575 * (we'll open it by one segment for each ack we get). 576 * Since we probably have a window's worth of unacked 577 * data accumulated, this "slow start" keeps us from 578 * dumping all that data as back-to-back packets (which 579 * might overwhelm an intermediate gateway). 580 * 581 * There are two phases to the opening: Initially we 582 * open by one mss on each ack. This makes the window 583 * size increase exponentially with time. If the 584 * window is larger than the path can handle, this 585 * exponential growth results in dropped packet(s) 586 * almost immediately. To get more time between 587 * drops but still "push" the network to take advantage 588 * of improving conditions, we switch from exponential 589 * to linear window opening at some threshhold size. 590 * For a threshhold, we use half the current window 591 * size, truncated to a multiple of the mss. 592 * 593 * (the minimum cwnd that will give us exponential 594 * growth is 2 mss. We don't allow the threshhold 595 * to go below this.) 596 */ 597 { 598 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 599 if (win < 2) 600 win = 2; 601 tp->snd_cwnd = tp->t_maxseg; 602 tp->snd_ssthresh = win * tp->t_maxseg; 603 tp->t_dupacks = 0; 604 } 605 EXIT_FASTRECOVERY(tp); 606 (void) tcp_output(tp); 607 608 out: 609 #ifdef TCPDEBUG 610 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 611 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 612 PRU_SLOWTIMO); 613 #endif 614 if (tp != NULL) 615 INP_UNLOCK(inp); 616 if (headlocked) 617 INP_INFO_WUNLOCK(&tcbinfo); 618 } 619