1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30 * $FreeBSD$ 31 */ 32 33 #include "opt_inet6.h" 34 #include "opt_tcpdebug.h" 35 36 #include <sys/param.h> 37 #include <sys/kernel.h> 38 #include <sys/ktr.h> 39 #include <sys/lock.h> 40 #include <sys/limits.h> 41 #include <sys/mbuf.h> 42 #include <sys/mutex.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/sysctl.h> 47 #include <sys/systm.h> 48 49 #include <net/route.h> 50 51 #include <netinet/in.h> 52 #include <netinet/in_pcb.h> 53 #include <netinet/in_systm.h> 54 #ifdef INET6 55 #include <netinet6/in6_pcb.h> 56 #endif 57 #include <netinet/ip_var.h> 58 #include <netinet/tcp.h> 59 #include <netinet/tcp_fsm.h> 60 #include <netinet/tcp_timer.h> 61 #include <netinet/tcp_var.h> 62 #include <netinet/tcpip.h> 63 #ifdef TCPDEBUG 64 #include <netinet/tcp_debug.h> 65 #endif 66 67 int tcp_keepinit; 68 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 69 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); 70 71 int tcp_keepidle; 72 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 73 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); 74 75 int tcp_keepintvl; 76 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 77 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); 78 79 int tcp_delacktime; 80 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 81 &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 82 "Time before a delayed ACK is sent"); 83 84 int tcp_msl; 85 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 86 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 87 88 int tcp_rexmit_min; 89 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 90 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 91 "Minimum Retransmission Timeout"); 92 93 int tcp_rexmit_slop; 94 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 95 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 96 "Retransmission Timer Slop"); 97 98 static int always_keepalive = 1; 99 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 100 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 101 102 int tcp_fast_finwait2_recycle = 0; 103 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 104 &tcp_fast_finwait2_recycle, 0, 105 "Recycle closed FIN_WAIT_2 connections faster"); 106 107 int tcp_finwait2_timeout; 108 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 109 &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 110 111 112 static int tcp_keepcnt = TCPTV_KEEPCNT; 113 /* max idle probes */ 114 int tcp_maxpersistidle; 115 /* max idle time in persist */ 116 int tcp_maxidle; 117 118 static void tcp_timer(void *); 119 static int tcp_timer_delack(struct tcpcb *, struct inpcb *); 120 static int tcp_timer_2msl(struct tcpcb *, struct inpcb *); 121 static int tcp_timer_keep(struct tcpcb *, struct inpcb *); 122 static int tcp_timer_persist(struct tcpcb *, struct inpcb *); 123 static int tcp_timer_rexmt(struct tcpcb *, struct inpcb *); 124 125 /* 126 * Tcp protocol timeout routine called every 500 ms. 127 * Updates timestamps used for TCP 128 * causes finite state machine actions if timers expire. 129 */ 130 void 131 tcp_slowtimo(void) 132 { 133 134 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 135 INP_INFO_WLOCK(&tcbinfo); 136 (void) tcp_tw_2msl_scan(0); 137 INP_INFO_WUNLOCK(&tcbinfo); 138 } 139 140 int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 141 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 142 143 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 144 { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 145 146 static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 147 148 static int tcp_timer_race; 149 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 150 0, "Count of t_inpcb races on tcp_discardcb"); 151 152 void 153 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 154 { 155 struct inpcb *inp = tp->t_inpcb; 156 struct tcp_timer *tt = tp->t_timers; 157 int tick = ticks; /* Stable time base. */ 158 int next = delta ? tick + delta : 0; 159 160 INP_LOCK_ASSERT(inp); 161 162 CTR6(KTR_NET, "%p %s inp %p active %x delta %i nextc %i", 163 tp, __func__, inp, tt->tt_active, delta, tt->tt_nextc); 164 165 /* Set new value for timer. */ 166 switch(timer_type) { 167 case TT_DELACK: 168 CTR4(KTR_NET, "%p %s TT_DELACK old %i new %i", 169 tp, __func__, tt->tt_delack, next); 170 tt->tt_delack = next; 171 break; 172 case TT_REXMT: 173 CTR4(KTR_NET, "%p %s TT_REXMT old %i new %i", 174 tp, __func__, tt->tt_rexmt, next); 175 tt->tt_rexmt = next; 176 break; 177 case TT_PERSIST: 178 CTR4(KTR_NET, "%p %s TT_PERSIST old %i new %i", 179 tp, __func__, tt->tt_persist, next); 180 tt->tt_persist = next; 181 break; 182 case TT_KEEP: 183 CTR4(KTR_NET, "%p %s TT_KEEP old %i new %i", 184 tp, __func__, tt->tt_keep, next); 185 tt->tt_keep = next; 186 break; 187 case TT_2MSL: 188 CTR4(KTR_NET, "%p %s TT_2MSL old %i new %i", 189 tp, __func__, tt->tt_2msl, next); 190 tt->tt_2msl = next; 191 break; 192 case 0: /* Dummy for timer rescan. */ 193 CTR3(KTR_NET, "%p %s timer rescan new %i", tp, __func__, next); 194 break; 195 } 196 197 /* If some other timer is active and is schedules sooner just return. */ 198 if (tt->tt_active != timer_type && tt->tt_nextc < next && 199 callout_active(&tt->tt_timer)) 200 return; 201 202 /* Select next timer to schedule. */ 203 tt->tt_nextc = INT_MAX; 204 tt->tt_active = 0; 205 if (tt->tt_delack && tt->tt_delack < tt->tt_nextc) { 206 tt->tt_nextc = tt->tt_delack; 207 tt->tt_active = TT_DELACK; 208 } 209 if (tt->tt_rexmt && tt->tt_rexmt < tt->tt_nextc) { 210 tt->tt_nextc = tt->tt_rexmt; 211 tt->tt_active = TT_REXMT; 212 } 213 if (tt->tt_persist && tt->tt_persist < tt->tt_nextc) { 214 tt->tt_nextc = tt->tt_persist; 215 tt->tt_active = TT_PERSIST; 216 } 217 if (tt->tt_keep && tt->tt_keep < tt->tt_nextc) { 218 tt->tt_nextc = tt->tt_keep; 219 tt->tt_active = TT_KEEP; 220 } 221 if (tt->tt_2msl && tt->tt_2msl < tt->tt_nextc) { 222 tt->tt_nextc = tt->tt_2msl; 223 tt->tt_active = TT_2MSL; 224 } 225 226 /* Rearm callout with new timer if we found one. */ 227 if (tt->tt_active) { 228 CTR4(KTR_NET, "%p %s callout_reset active %x nextc in %i", 229 tp, __func__, tt->tt_active, tt->tt_nextc - tick); 230 callout_reset(&tt->tt_timer, 231 tt->tt_nextc - tick, tcp_timer, (void *)inp); 232 } else { 233 CTR2(KTR_NET, "%p %s callout_stop", tp, __func__); 234 callout_stop(&tt->tt_timer); 235 tt->tt_nextc = 0; 236 } 237 238 return; 239 } 240 241 int 242 tcp_timer_active(struct tcpcb *tp, int timer_type) 243 { 244 245 switch (timer_type) { 246 case TT_DELACK: 247 CTR3(KTR_NET, "%p %s TT_DELACK %i", 248 tp, __func__, tp->t_timers->tt_delack); 249 return (tp->t_timers->tt_delack ? 1 : 0); 250 break; 251 case TT_REXMT: 252 CTR3(KTR_NET, "%p %s TT_REXMT %i", 253 tp, __func__, tp->t_timers->tt_rexmt); 254 return (tp->t_timers->tt_rexmt ? 1 : 0); 255 break; 256 case TT_PERSIST: 257 CTR3(KTR_NET, "%p %s TT_PERSIST %i", 258 tp, __func__, tp->t_timers->tt_persist); 259 return (tp->t_timers->tt_persist ? 1 : 0); 260 break; 261 case TT_KEEP: 262 CTR3(KTR_NET, "%p %s TT_KEEP %i", 263 tp, __func__, tp->t_timers->tt_keep); 264 return (tp->t_timers->tt_keep ? 1 : 0); 265 break; 266 case TT_2MSL: 267 CTR3(KTR_NET, "%p %s TT_2MSL %i", 268 tp, __func__, tp->t_timers->tt_2msl); 269 return (tp->t_timers->tt_2msl ? 1 : 0); 270 break; 271 } 272 return (0); 273 } 274 275 static void 276 tcp_timer(void *xinp) 277 { 278 struct inpcb *inp = (struct inpcb *)xinp; 279 struct tcpcb *tp = intotcpcb(inp); 280 struct tcp_timer *tt; 281 int tick = ticks; 282 int down, timer; 283 284 /* INP lock was obtained by callout. */ 285 INP_LOCK_ASSERT(inp); 286 287 /* 288 * We've got a couple of race conditions here: 289 * - The tcpcb was converted into a compressed TW pcb. All our 290 * timers have been stopped while this callout already tried 291 * to obtain the inpcb lock. TW pcbs have their own timers 292 * and we just return. 293 */ 294 if (inp->inp_vflag & INP_TIMEWAIT) 295 return; 296 /* 297 * - The tcpcb was discarded. All our timers have been stopped 298 * while this callout already tried to obtain the inpcb lock 299 * and we just return. 300 */ 301 if (tp == NULL) 302 return; 303 304 tt = tp->t_timers; /* Initialize. */ 305 CTR6(KTR_NET, "%p %s inp %p active %x tick %i nextc %i", 306 tp, __func__, inp, tt->tt_active, tick, tt->tt_nextc); 307 308 /* 309 * - We may have been waiting on the lock while the tcpcb has 310 * been scheduled for destruction. In this case no active 311 * timers remain and we just return. 312 */ 313 if (tt->tt_active == 0) 314 goto done; 315 316 /* 317 * - The timer was rescheduled while this callout was already 318 * waiting on the lock. This may happen when a packet just 319 * came in. Rescan and reschedule the the timer in case we 320 * just turned it off. 321 */ 322 if (tick < tt->tt_nextc) 323 goto rescan; 324 325 /* 326 * Mark as done. The active bit in struct callout is not 327 * automatically cleared. See callout(9) for more info. 328 * In tcp_discardcb() we depend on the correctly cleared 329 * active bit for faster processing. 330 */ 331 callout_deactivate(&tt->tt_timer); 332 333 /* Check which timer has fired and remove this timer activation. */ 334 timer = tt->tt_active; 335 tt->tt_active = 0; 336 tt->tt_nextc = 0; 337 338 switch (timer) { 339 case TT_DELACK: 340 CTR2(KTR_NET, "%p %s running TT_DELACK", tp, __func__); 341 tt->tt_delack = 0; 342 down = tcp_timer_delack(tp, inp); /* down == 0 */ 343 break; 344 case TT_REXMT: 345 CTR2(KTR_NET, "%p %s running TT_REXMT", tp, __func__); 346 tt->tt_rexmt = 0; 347 down = tcp_timer_rexmt(tp, inp); 348 break; 349 case TT_PERSIST: 350 CTR2(KTR_NET, "%p %s running TT_PERSIST", tp, __func__); 351 tt->tt_persist = 0; 352 down = tcp_timer_persist(tp, inp); 353 break; 354 case TT_KEEP: 355 CTR2(KTR_NET, "%p %s running TT_KEEP", tp, __func__); 356 tt->tt_keep = 0; 357 down = tcp_timer_keep(tp, inp); 358 break; 359 case TT_2MSL: 360 CTR2(KTR_NET, "%p %s running TT_2MSL", tp, __func__); 361 tt->tt_2msl = 0; 362 down = tcp_timer_2msl(tp, inp); 363 break; 364 default: 365 CTR2(KTR_NET, "%p %s running nothing", tp, __func__); 366 down = 0; 367 } 368 369 CTR4(KTR_NET, "%p %s down %i active %x", 370 tp, __func__, down, tt->tt_active); 371 /* Do we still exist? */ 372 if (down) 373 goto shutdown; 374 375 rescan: 376 /* Rescan if no timer was reactivated above. */ 377 if (tt->tt_active == 0) 378 tcp_timer_activate(tp, 0, 0); 379 380 done: 381 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 382 return; 383 384 shutdown: 385 INP_UNLOCK(inp); /* Prevent LOR at expense of race. */ 386 INP_INFO_WLOCK(&tcbinfo); 387 INP_LOCK(inp); 388 389 /* 390 * XXX: When our tcpcb went into TIMEWAIT, is gone or no 391 * longer the one we used to work with we've lost the race. 392 * This race is inherent in the current socket/inpcb life 393 * cycle system. 394 */ 395 if ((inp->inp_vflag & INP_TIMEWAIT) || inp->inp_ppcb == NULL || 396 inp->inp_ppcb != tp) { 397 CTR3(KTR_NET, "%p %s inp %p lost shutdown race", 398 tp, __func__, inp); 399 tcp_timer_race++; 400 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 401 INP_INFO_WUNLOCK(&tcbinfo); 402 return; 403 } 404 KASSERT(tp == inp->inp_ppcb, ("%s: tp changed", __func__)); 405 406 /* Shutdown the connection. */ 407 switch (down) { 408 case 1: 409 tp = tcp_close(tp); 410 break; 411 case 2: 412 tp = tcp_drop(tp, 413 tp->t_softerror ? tp->t_softerror : ETIMEDOUT); 414 break; 415 } 416 CTR3(KTR_NET, "%p %s inp %p after shutdown", tp, __func__, inp); 417 418 if (tp) 419 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 420 421 INP_INFO_WUNLOCK(&tcbinfo); 422 return; 423 } 424 425 /* 426 * TCP timer processing. 427 */ 428 static int 429 tcp_timer_delack(struct tcpcb *tp, struct inpcb *inp) 430 { 431 432 tp->t_flags |= TF_ACKNOW; 433 tcpstat.tcps_delack++; 434 (void) tcp_output(tp); 435 return (0); 436 } 437 438 static int 439 tcp_timer_2msl(struct tcpcb *tp, struct inpcb *inp) 440 { 441 #ifdef TCPDEBUG 442 int ostate; 443 444 ostate = tp->t_state; 445 #endif 446 /* 447 * 2 MSL timeout in shutdown went off. If we're closed but 448 * still waiting for peer to close and connection has been idle 449 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 450 * control block. Otherwise, check again in a bit. 451 * 452 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 453 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 454 * Ignore fact that there were recent incoming segments. 455 */ 456 if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 457 tp->t_inpcb->inp_socket && 458 (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 459 tcpstat.tcps_finwait2_drops++; 460 return (1); /* tcp_close */ 461 } else { 462 if (tp->t_state != TCPS_TIME_WAIT && 463 (ticks - tp->t_rcvtime) <= tcp_maxidle) 464 tcp_timer_activate(tp, TT_2MSL, tcp_keepintvl); 465 else 466 return (1); /* tcp_close */ 467 } 468 469 #ifdef TCPDEBUG 470 if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 471 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 472 PRU_SLOWTIMO); 473 #endif 474 return (0); 475 } 476 477 static int 478 tcp_timer_keep(struct tcpcb *tp, struct inpcb *inp) 479 { 480 struct tcptemp *t_template; 481 #ifdef TCPDEBUG 482 int ostate; 483 484 ostate = tp->t_state; 485 #endif 486 /* 487 * Keep-alive timer went off; send something 488 * or drop connection if idle for too long. 489 */ 490 tcpstat.tcps_keeptimeo++; 491 if (tp->t_state < TCPS_ESTABLISHED) 492 goto dropit; 493 if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 494 tp->t_state <= TCPS_CLOSING) { 495 if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle) 496 goto dropit; 497 /* 498 * Send a packet designed to force a response 499 * if the peer is up and reachable: 500 * either an ACK if the connection is still alive, 501 * or an RST if the peer has closed the connection 502 * due to timeout or reboot. 503 * Using sequence number tp->snd_una-1 504 * causes the transmitted zero-length segment 505 * to lie outside the receive window; 506 * by the protocol spec, this requires the 507 * correspondent TCP to respond. 508 */ 509 tcpstat.tcps_keepprobe++; 510 t_template = tcpip_maketemplate(inp); 511 if (t_template) { 512 tcp_respond(tp, t_template->tt_ipgen, 513 &t_template->tt_t, (struct mbuf *)NULL, 514 tp->rcv_nxt, tp->snd_una - 1, 0); 515 (void) m_free(dtom(t_template)); 516 } 517 tcp_timer_activate(tp, TT_KEEP, tcp_keepintvl); 518 } else 519 tcp_timer_activate(tp, TT_KEEP, tcp_keepidle); 520 521 #ifdef TCPDEBUG 522 if (inp->inp_socket->so_options & SO_DEBUG) 523 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 524 PRU_SLOWTIMO); 525 #endif 526 return (0); 527 528 dropit: 529 tcpstat.tcps_keepdrops++; 530 return (2); /* tcp_drop() */ 531 } 532 533 static int 534 tcp_timer_persist(struct tcpcb *tp, struct inpcb *inp) 535 { 536 #ifdef TCPDEBUG 537 int ostate; 538 539 ostate = tp->t_state; 540 #endif 541 /* 542 * Persistance timer into zero window. 543 * Force a byte to be output, if possible. 544 */ 545 tcpstat.tcps_persisttimeo++; 546 /* 547 * Hack: if the peer is dead/unreachable, we do not 548 * time out if the window is closed. After a full 549 * backoff, drop the connection if the idle time 550 * (no responses to probes) reaches the maximum 551 * backoff that we would use if retransmitting. 552 */ 553 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 554 ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 555 (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 556 tcpstat.tcps_persistdrop++; 557 return (2); /* tcp_drop() */ 558 } 559 tcp_setpersist(tp); 560 tp->t_flags |= TF_FORCEDATA; 561 (void) tcp_output(tp); 562 tp->t_flags &= ~TF_FORCEDATA; 563 564 #ifdef TCPDEBUG 565 if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 566 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 567 #endif 568 return (0); 569 } 570 571 static int 572 tcp_timer_rexmt(struct tcpcb *tp, struct inpcb *inp) 573 { 574 int rexmt; 575 #ifdef TCPDEBUG 576 int ostate; 577 578 ostate = tp->t_state; 579 #endif 580 tcp_free_sackholes(tp); 581 /* 582 * Retransmission timer went off. Message has not 583 * been acked within retransmit interval. Back off 584 * to a longer retransmit interval and retransmit one segment. 585 */ 586 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 587 tp->t_rxtshift = TCP_MAXRXTSHIFT; 588 tcpstat.tcps_timeoutdrop++; 589 return (2); /* tcp_drop() */ 590 } 591 if (tp->t_rxtshift == 1) { 592 /* 593 * first retransmit; record ssthresh and cwnd so they can 594 * be recovered if this turns out to be a "bad" retransmit. 595 * A retransmit is considered "bad" if an ACK for this 596 * segment is received within RTT/2 interval; the assumption 597 * here is that the ACK was already in flight. See 598 * "On Estimating End-to-End Network Path Properties" by 599 * Allman and Paxson for more details. 600 */ 601 tp->snd_cwnd_prev = tp->snd_cwnd; 602 tp->snd_ssthresh_prev = tp->snd_ssthresh; 603 tp->snd_recover_prev = tp->snd_recover; 604 if (IN_FASTRECOVERY(tp)) 605 tp->t_flags |= TF_WASFRECOVERY; 606 else 607 tp->t_flags &= ~TF_WASFRECOVERY; 608 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 609 } 610 tcpstat.tcps_rexmttimeo++; 611 if (tp->t_state == TCPS_SYN_SENT) 612 rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; 613 else 614 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 615 TCPT_RANGESET(tp->t_rxtcur, rexmt, 616 tp->t_rttmin, TCPTV_REXMTMAX); 617 /* 618 * Disable rfc1323 if we havn't got any response to 619 * our third SYN to work-around some broken terminal servers 620 * (most of which have hopefully been retired) that have bad VJ 621 * header compression code which trashes TCP segments containing 622 * unknown-to-them TCP options. 623 */ 624 if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 625 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP); 626 /* 627 * If we backed off this far, our srtt estimate is probably bogus. 628 * Clobber it so we'll take the next rtt measurement as our srtt; 629 * move the current srtt into rttvar to keep the current 630 * retransmit times until then. 631 */ 632 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 633 #ifdef INET6 634 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 635 in6_losing(tp->t_inpcb); 636 else 637 #endif 638 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 639 tp->t_srtt = 0; 640 } 641 tp->snd_nxt = tp->snd_una; 642 tp->snd_recover = tp->snd_max; 643 /* 644 * Force a segment to be sent. 645 */ 646 tp->t_flags |= TF_ACKNOW; 647 /* 648 * If timing a segment in this window, stop the timer. 649 */ 650 tp->t_rtttime = 0; 651 /* 652 * Close the congestion window down to one segment 653 * (we'll open it by one segment for each ack we get). 654 * Since we probably have a window's worth of unacked 655 * data accumulated, this "slow start" keeps us from 656 * dumping all that data as back-to-back packets (which 657 * might overwhelm an intermediate gateway). 658 * 659 * There are two phases to the opening: Initially we 660 * open by one mss on each ack. This makes the window 661 * size increase exponentially with time. If the 662 * window is larger than the path can handle, this 663 * exponential growth results in dropped packet(s) 664 * almost immediately. To get more time between 665 * drops but still "push" the network to take advantage 666 * of improving conditions, we switch from exponential 667 * to linear window opening at some threshhold size. 668 * For a threshhold, we use half the current window 669 * size, truncated to a multiple of the mss. 670 * 671 * (the minimum cwnd that will give us exponential 672 * growth is 2 mss. We don't allow the threshhold 673 * to go below this.) 674 */ 675 { 676 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 677 if (win < 2) 678 win = 2; 679 tp->snd_cwnd = tp->t_maxseg; 680 tp->snd_ssthresh = win * tp->t_maxseg; 681 tp->t_dupacks = 0; 682 } 683 EXIT_FASTRECOVERY(tp); 684 (void) tcp_output(tp); 685 686 #ifdef TCPDEBUG 687 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 688 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 689 PRU_SLOWTIMO); 690 #endif 691 return (0); 692 } 693