1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30 * $FreeBSD$ 31 */ 32 33 #include "opt_inet6.h" 34 #include "opt_tcpdebug.h" 35 36 #include <sys/param.h> 37 #include <sys/kernel.h> 38 #include <sys/ktr.h> 39 #include <sys/lock.h> 40 #include <sys/limits.h> 41 #include <sys/mbuf.h> 42 #include <sys/mutex.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/sysctl.h> 47 #include <sys/systm.h> 48 49 #include <net/route.h> 50 51 #include <netinet/in.h> 52 #include <netinet/in_pcb.h> 53 #include <netinet/in_systm.h> 54 #ifdef INET6 55 #include <netinet6/in6_pcb.h> 56 #endif 57 #include <netinet/ip_var.h> 58 #include <netinet/tcp.h> 59 #include <netinet/tcp_fsm.h> 60 #include <netinet/tcp_timer.h> 61 #include <netinet/tcp_var.h> 62 #include <netinet/tcpip.h> 63 #ifdef TCPDEBUG 64 #include <netinet/tcp_debug.h> 65 #endif 66 67 int tcp_keepinit; 68 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 69 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); 70 71 int tcp_keepidle; 72 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 73 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); 74 75 int tcp_keepintvl; 76 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 77 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); 78 79 int tcp_delacktime; 80 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 81 &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 82 "Time before a delayed ACK is sent"); 83 84 int tcp_msl; 85 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 86 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 87 88 int tcp_rexmit_min; 89 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 90 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 91 "Minimum Retransmission Timeout"); 92 93 int tcp_rexmit_slop; 94 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 95 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 96 "Retransmission Timer Slop"); 97 98 static int always_keepalive = 1; 99 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 100 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 101 102 int tcp_fast_finwait2_recycle = 0; 103 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 104 &tcp_fast_finwait2_recycle, 0, 105 "Recycle closed FIN_WAIT_2 connections faster"); 106 107 int tcp_finwait2_timeout; 108 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 109 &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 110 111 112 static int tcp_keepcnt = TCPTV_KEEPCNT; 113 /* max idle probes */ 114 int tcp_maxpersistidle; 115 /* max idle time in persist */ 116 int tcp_maxidle; 117 118 static void tcp_timer(void *); 119 static int tcp_timer_delack(struct tcpcb *, struct inpcb *); 120 static int tcp_timer_2msl(struct tcpcb *, struct inpcb *); 121 static int tcp_timer_keep(struct tcpcb *, struct inpcb *); 122 static int tcp_timer_persist(struct tcpcb *, struct inpcb *); 123 static int tcp_timer_rexmt(struct tcpcb *, struct inpcb *); 124 125 /* 126 * Tcp protocol timeout routine called every 500 ms. 127 * Updates timestamps used for TCP 128 * causes finite state machine actions if timers expire. 129 */ 130 void 131 tcp_slowtimo() 132 { 133 134 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 135 INP_INFO_WLOCK(&tcbinfo); 136 (void) tcp_timer_2msl_tw(0); 137 INP_INFO_WUNLOCK(&tcbinfo); 138 } 139 140 int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 141 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 142 143 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 144 { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 145 146 static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 147 148 static int tcp_timer_race; 149 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 150 0, "Count of t_inpcb races on tcp_discardcb"); 151 152 153 void 154 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 155 { 156 struct inpcb *inp = tp->t_inpcb; 157 struct tcp_timer *tt = tp->t_timers; 158 int tick = ticks; /* Stable time base. */ 159 int next = delta ? tick + delta : 0; 160 161 INP_LOCK_ASSERT(inp); 162 163 CTR6(KTR_NET, "%p %s inp %p active %x delta %i nextc %i", 164 tp, __func__, inp, tt->tt_active, delta, tt->tt_nextc); 165 166 /* Set new value for timer. */ 167 switch(timer_type) { 168 case TT_DELACK: 169 CTR4(KTR_NET, "%p %s TT_DELACK old %i new %i", 170 tp, __func__, tt->tt_delack, next); 171 tt->tt_delack = next; 172 break; 173 case TT_REXMT: 174 CTR4(KTR_NET, "%p %s TT_REXMT old %i new %i", 175 tp, __func__, tt->tt_rexmt, next); 176 tt->tt_rexmt = next; 177 break; 178 case TT_PERSIST: 179 CTR4(KTR_NET, "%p %s TT_PERSIST old %i new %i", 180 tp, __func__, tt->tt_persist, next); 181 tt->tt_persist = next; 182 break; 183 case TT_KEEP: 184 CTR4(KTR_NET, "%p %s TT_KEEP old %i new %i", 185 tp, __func__, tt->tt_keep, next); 186 tt->tt_keep = next; 187 break; 188 case TT_2MSL: 189 CTR4(KTR_NET, "%p %s TT_2MSL old %i new %i", 190 tp, __func__, tt->tt_2msl, next); 191 tt->tt_2msl = next; 192 break; 193 case 0: /* Dummy for timer rescan. */ 194 CTR3(KTR_NET, "%p %s timer rescan new %i", tp, __func__, next); 195 break; 196 } 197 198 /* If some other timer is active and is schedules sooner just return. */ 199 if (tt->tt_active != timer_type && tt->tt_nextc < next && 200 callout_active(&tt->tt_timer)) 201 return; 202 203 /* Select next timer to schedule. */ 204 tt->tt_nextc = INT_MAX; 205 tt->tt_active = 0; 206 if (tt->tt_delack && tt->tt_delack < tt->tt_nextc) { 207 tt->tt_nextc = tt->tt_delack; 208 tt->tt_active = TT_DELACK; 209 } 210 if (tt->tt_rexmt && tt->tt_rexmt < tt->tt_nextc) { 211 tt->tt_nextc = tt->tt_rexmt; 212 tt->tt_active = TT_REXMT; 213 } 214 if (tt->tt_persist && tt->tt_persist < tt->tt_nextc) { 215 tt->tt_nextc = tt->tt_persist; 216 tt->tt_active = TT_PERSIST; 217 } 218 if (tt->tt_keep && tt->tt_keep < tt->tt_nextc) { 219 tt->tt_nextc = tt->tt_keep; 220 tt->tt_active = TT_KEEP; 221 } 222 if (tt->tt_2msl && tt->tt_2msl < tt->tt_nextc) { 223 tt->tt_nextc = tt->tt_2msl; 224 tt->tt_active = TT_2MSL; 225 } 226 227 /* Rearm callout with new timer if we found one. */ 228 if (tt->tt_active) { 229 CTR4(KTR_NET, "%p %s callout_reset active %x nextc in %i", 230 tp, __func__, tt->tt_active, tt->tt_nextc - tick); 231 callout_reset(&tt->tt_timer, 232 tt->tt_nextc - tick, tcp_timer, (void *)inp); 233 } else { 234 CTR2(KTR_NET, "%p %s callout_stop", tp, __func__); 235 callout_stop(&tt->tt_timer); 236 tt->tt_nextc = 0; 237 } 238 239 return; 240 } 241 242 int 243 tcp_timer_active(struct tcpcb *tp, int timer_type) 244 { 245 246 switch (timer_type) { 247 case TT_DELACK: 248 CTR3(KTR_NET, "%p %s TT_DELACK %i", 249 tp, __func__, tp->t_timers->tt_delack); 250 return (tp->t_timers->tt_delack ? 1 : 0); 251 break; 252 case TT_REXMT: 253 CTR3(KTR_NET, "%p %s TT_REXMT %i", 254 tp, __func__, tp->t_timers->tt_rexmt); 255 return (tp->t_timers->tt_rexmt ? 1 : 0); 256 break; 257 case TT_PERSIST: 258 CTR3(KTR_NET, "%p %s TT_PERSIST %i", 259 tp, __func__, tp->t_timers->tt_persist); 260 return (tp->t_timers->tt_persist ? 1 : 0); 261 break; 262 case TT_KEEP: 263 CTR3(KTR_NET, "%p %s TT_KEEP %i", 264 tp, __func__, tp->t_timers->tt_keep); 265 return (tp->t_timers->tt_keep ? 1 : 0); 266 break; 267 case TT_2MSL: 268 CTR3(KTR_NET, "%p %s TT_2MSL %i", 269 tp, __func__, tp->t_timers->tt_2msl); 270 return (tp->t_timers->tt_2msl ? 1 : 0); 271 break; 272 } 273 return (0); 274 } 275 276 static void 277 tcp_timer(void *xinp) 278 { 279 struct inpcb *inp = (struct inpcb *)xinp; 280 struct tcpcb *tp = intotcpcb(inp); 281 struct tcp_timer *tt; 282 int tick = ticks; 283 int down, timer; 284 285 /* INP lock was obtained by callout. */ 286 INP_LOCK_ASSERT(inp); 287 288 /* 289 * We've got a couple of race conditions here: 290 * - The tcpcb was converted into a compressed TW pcb. All our 291 * timers have been stopped while this callout already tried 292 * to obtain the inpcb lock. TW pcbs have their own timers 293 * and we just return. 294 */ 295 if (inp->inp_vflag & INP_TIMEWAIT) 296 return; 297 /* 298 * - The tcpcb was discarded. All our timers have been stopped 299 * while this callout already tried to obtain the inpcb lock 300 * and we just return. 301 */ 302 if (tp == NULL) 303 return; 304 305 tt = tp->t_timers; /* Initialize. */ 306 CTR6(KTR_NET, "%p %s inp %p active %x tick %i nextc %i", 307 tp, __func__, inp, tt->tt_active, tick, tt->tt_nextc); 308 309 /* 310 * - We may have been waiting on the lock while the tcpcb has 311 * been scheduled for destruction. In this case no active 312 * timers remain and we just return. 313 */ 314 if (tt->tt_active == 0) 315 goto done; 316 317 /* 318 * - The timer was rescheduled while this callout was already 319 * waiting on the lock. This may happen when a packet just 320 * came in. Rescan and reschedule the the timer in case we 321 * just turned it off. 322 */ 323 if (tick < tt->tt_nextc) 324 goto rescan; 325 326 /* 327 * Mark as done. The active bit in struct callout is not 328 * automatically cleared. See callout(9) for more info. 329 * In tcp_discardcb() we depend on the correctly cleared 330 * active bit for faster processing. 331 */ 332 callout_deactivate(&tt->tt_timer); 333 334 /* Check which timer has fired and remove this timer activation. */ 335 timer = tt->tt_active; 336 tt->tt_active = 0; 337 tt->tt_nextc = 0; 338 339 switch (timer) { 340 case TT_DELACK: 341 CTR2(KTR_NET, "%p %s running TT_DELACK", tp, __func__); 342 tt->tt_delack = 0; 343 down = tcp_timer_delack(tp, inp); /* down == 0 */ 344 break; 345 case TT_REXMT: 346 CTR2(KTR_NET, "%p %s running TT_REXMT", tp, __func__); 347 tt->tt_rexmt = 0; 348 down = tcp_timer_rexmt(tp, inp); 349 break; 350 case TT_PERSIST: 351 CTR2(KTR_NET, "%p %s running TT_PERSIST", tp, __func__); 352 tt->tt_persist = 0; 353 down = tcp_timer_persist(tp, inp); 354 break; 355 case TT_KEEP: 356 CTR2(KTR_NET, "%p %s running TT_KEEP", tp, __func__); 357 tt->tt_keep = 0; 358 down = tcp_timer_keep(tp, inp); 359 break; 360 case TT_2MSL: 361 CTR2(KTR_NET, "%p %s running TT_2MSL", tp, __func__); 362 tt->tt_2msl = 0; 363 down = tcp_timer_2msl(tp, inp); 364 break; 365 default: 366 CTR2(KTR_NET, "%p %s running nothing", tp, __func__); 367 down = 0; 368 } 369 370 CTR4(KTR_NET, "%p %s down %i active %x", 371 tp, __func__, down, tt->tt_active); 372 /* Do we still exist? */ 373 if (down) 374 goto shutdown; 375 376 rescan: 377 /* Rescan if no timer was reactivated above. */ 378 if (tt->tt_active == 0) 379 tcp_timer_activate(tp, 0, 0); 380 381 done: 382 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 383 return; 384 385 shutdown: 386 INP_UNLOCK(inp); /* Prevent LOR at expense of race. */ 387 INP_INFO_WLOCK(&tcbinfo); 388 INP_LOCK(inp); 389 390 /* When tp is gone we've lost the race. */ 391 if (inp->inp_ppcb == NULL) { 392 CTR3(KTR_NET, "%p %s inp %p lost shutdown race", 393 tp, __func__, inp); 394 tcp_timer_race++; 395 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 396 INP_INFO_WUNLOCK(&tcbinfo); 397 return; 398 } 399 KASSERT(tp == inp->inp_ppcb, ("%s: tp changed", __func__)); 400 401 /* Shutdown the connection. */ 402 switch (down) { 403 case 1: 404 tp = tcp_close(tp); 405 break; 406 case 2: 407 tp = tcp_drop(tp, 408 tp->t_softerror ? tp->t_softerror : ETIMEDOUT); 409 break; 410 } 411 CTR3(KTR_NET, "%p %s inp %p after shutdown", tp, __func__, inp); 412 413 if (tp) 414 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 415 416 INP_INFO_WUNLOCK(&tcbinfo); 417 return; 418 } 419 420 421 /* 422 * TCP timer processing. 423 */ 424 static int 425 tcp_timer_delack(struct tcpcb *tp, struct inpcb *inp) 426 { 427 428 tp->t_flags |= TF_ACKNOW; 429 tcpstat.tcps_delack++; 430 (void) tcp_output(tp); 431 return (0); 432 } 433 434 static int 435 tcp_timer_2msl(struct tcpcb *tp, struct inpcb *inp) 436 { 437 #ifdef TCPDEBUG 438 int ostate; 439 440 ostate = tp->t_state; 441 #endif 442 /* 443 * 2 MSL timeout in shutdown went off. If we're closed but 444 * still waiting for peer to close and connection has been idle 445 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 446 * control block. Otherwise, check again in a bit. 447 * 448 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 449 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 450 * Ignore fact that there were recent incoming segments. 451 */ 452 if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 453 tp->t_inpcb->inp_socket && 454 (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 455 tcpstat.tcps_finwait2_drops++; 456 return (1); /* tcp_close() */ 457 } else { 458 if (tp->t_state != TCPS_TIME_WAIT && 459 (ticks - tp->t_rcvtime) <= tcp_maxidle) 460 tcp_timer_activate(tp, TT_2MSL, tcp_keepintvl); 461 else 462 return (1); /* tcp_close( */ 463 } 464 465 #ifdef TCPDEBUG 466 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 467 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 468 PRU_SLOWTIMO); 469 #endif 470 return (0); 471 } 472 473 /* 474 * The timed wait queue contains references to each of the TCP sessions 475 * currently in the TIME_WAIT state. The queue pointers, including the 476 * queue pointers in each tcptw structure, are protected using the global 477 * tcbinfo lock, which must be held over queue iteration and modification. 478 */ 479 static TAILQ_HEAD(, tcptw) twq_2msl; 480 481 void 482 tcp_timer_init(void) 483 { 484 485 TAILQ_INIT(&twq_2msl); 486 } 487 488 void 489 tcp_timer_2msl_reset(struct tcptw *tw, int rearm) 490 { 491 492 INP_INFO_WLOCK_ASSERT(&tcbinfo); 493 INP_LOCK_ASSERT(tw->tw_inpcb); 494 if (rearm) 495 TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); 496 tw->tw_time = ticks + 2 * tcp_msl; 497 TAILQ_INSERT_TAIL(&twq_2msl, tw, tw_2msl); 498 } 499 500 void 501 tcp_timer_2msl_stop(struct tcptw *tw) 502 { 503 504 INP_INFO_WLOCK_ASSERT(&tcbinfo); 505 TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); 506 } 507 508 struct tcptw * 509 tcp_timer_2msl_tw(int reuse) 510 { 511 struct tcptw *tw; 512 513 INP_INFO_WLOCK_ASSERT(&tcbinfo); 514 for (;;) { 515 tw = TAILQ_FIRST(&twq_2msl); 516 if (tw == NULL || (!reuse && tw->tw_time > ticks)) 517 break; 518 INP_LOCK(tw->tw_inpcb); 519 tcp_twclose(tw, reuse); 520 if (reuse) 521 return (tw); 522 } 523 return (NULL); 524 } 525 526 static int 527 tcp_timer_keep(struct tcpcb *tp, struct inpcb *inp) 528 { 529 struct tcptemp *t_template; 530 #ifdef TCPDEBUG 531 int ostate; 532 533 ostate = tp->t_state; 534 #endif 535 /* 536 * Keep-alive timer went off; send something 537 * or drop connection if idle for too long. 538 */ 539 tcpstat.tcps_keeptimeo++; 540 if (tp->t_state < TCPS_ESTABLISHED) 541 goto dropit; 542 if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 543 tp->t_state <= TCPS_CLOSING) { 544 if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle) 545 goto dropit; 546 /* 547 * Send a packet designed to force a response 548 * if the peer is up and reachable: 549 * either an ACK if the connection is still alive, 550 * or an RST if the peer has closed the connection 551 * due to timeout or reboot. 552 * Using sequence number tp->snd_una-1 553 * causes the transmitted zero-length segment 554 * to lie outside the receive window; 555 * by the protocol spec, this requires the 556 * correspondent TCP to respond. 557 */ 558 tcpstat.tcps_keepprobe++; 559 t_template = tcpip_maketemplate(inp); 560 if (t_template) { 561 tcp_respond(tp, t_template->tt_ipgen, 562 &t_template->tt_t, (struct mbuf *)NULL, 563 tp->rcv_nxt, tp->snd_una - 1, 0); 564 (void) m_free(dtom(t_template)); 565 } 566 tcp_timer_activate(tp, TT_KEEP, tcp_keepintvl); 567 } else 568 tcp_timer_activate(tp, TT_KEEP, tcp_keepidle); 569 570 #ifdef TCPDEBUG 571 if (inp->inp_socket->so_options & SO_DEBUG) 572 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 573 PRU_SLOWTIMO); 574 #endif 575 return (0); 576 577 dropit: 578 tcpstat.tcps_keepdrops++; 579 return (2); /* tcp_drop() */ 580 } 581 582 static int 583 tcp_timer_persist(struct tcpcb *tp, struct inpcb *inp) 584 { 585 #ifdef TCPDEBUG 586 int ostate; 587 588 ostate = tp->t_state; 589 #endif 590 /* 591 * Persistance timer into zero window. 592 * Force a byte to be output, if possible. 593 */ 594 tcpstat.tcps_persisttimeo++; 595 /* 596 * Hack: if the peer is dead/unreachable, we do not 597 * time out if the window is closed. After a full 598 * backoff, drop the connection if the idle time 599 * (no responses to probes) reaches the maximum 600 * backoff that we would use if retransmitting. 601 */ 602 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 603 ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 604 (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 605 tcpstat.tcps_persistdrop++; 606 return (2); /* tcp_drop() */ 607 } 608 tcp_setpersist(tp); 609 tp->t_flags |= TF_FORCEDATA; 610 (void) tcp_output(tp); 611 tp->t_flags &= ~TF_FORCEDATA; 612 613 #ifdef TCPDEBUG 614 if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 615 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 616 #endif 617 return (0); 618 } 619 620 static int 621 tcp_timer_rexmt(struct tcpcb *tp, struct inpcb *inp) 622 { 623 int rexmt; 624 #ifdef TCPDEBUG 625 int ostate; 626 627 ostate = tp->t_state; 628 #endif 629 tcp_free_sackholes(tp); 630 /* 631 * Retransmission timer went off. Message has not 632 * been acked within retransmit interval. Back off 633 * to a longer retransmit interval and retransmit one segment. 634 */ 635 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 636 tp->t_rxtshift = TCP_MAXRXTSHIFT; 637 tcpstat.tcps_timeoutdrop++; 638 return (2); /* tcp_drop() */ 639 } 640 if (tp->t_rxtshift == 1) { 641 /* 642 * first retransmit; record ssthresh and cwnd so they can 643 * be recovered if this turns out to be a "bad" retransmit. 644 * A retransmit is considered "bad" if an ACK for this 645 * segment is received within RTT/2 interval; the assumption 646 * here is that the ACK was already in flight. See 647 * "On Estimating End-to-End Network Path Properties" by 648 * Allman and Paxson for more details. 649 */ 650 tp->snd_cwnd_prev = tp->snd_cwnd; 651 tp->snd_ssthresh_prev = tp->snd_ssthresh; 652 tp->snd_recover_prev = tp->snd_recover; 653 if (IN_FASTRECOVERY(tp)) 654 tp->t_flags |= TF_WASFRECOVERY; 655 else 656 tp->t_flags &= ~TF_WASFRECOVERY; 657 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 658 } 659 tcpstat.tcps_rexmttimeo++; 660 if (tp->t_state == TCPS_SYN_SENT) 661 rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; 662 else 663 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 664 TCPT_RANGESET(tp->t_rxtcur, rexmt, 665 tp->t_rttmin, TCPTV_REXMTMAX); 666 /* 667 * Disable rfc1323 if we havn't got any response to 668 * our third SYN to work-around some broken terminal servers 669 * (most of which have hopefully been retired) that have bad VJ 670 * header compression code which trashes TCP segments containing 671 * unknown-to-them TCP options. 672 */ 673 if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 674 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP); 675 /* 676 * If we backed off this far, our srtt estimate is probably bogus. 677 * Clobber it so we'll take the next rtt measurement as our srtt; 678 * move the current srtt into rttvar to keep the current 679 * retransmit times until then. 680 */ 681 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 682 #ifdef INET6 683 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 684 in6_losing(tp->t_inpcb); 685 else 686 #endif 687 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 688 tp->t_srtt = 0; 689 } 690 tp->snd_nxt = tp->snd_una; 691 tp->snd_recover = tp->snd_max; 692 /* 693 * Force a segment to be sent. 694 */ 695 tp->t_flags |= TF_ACKNOW; 696 /* 697 * If timing a segment in this window, stop the timer. 698 */ 699 tp->t_rtttime = 0; 700 /* 701 * Close the congestion window down to one segment 702 * (we'll open it by one segment for each ack we get). 703 * Since we probably have a window's worth of unacked 704 * data accumulated, this "slow start" keeps us from 705 * dumping all that data as back-to-back packets (which 706 * might overwhelm an intermediate gateway). 707 * 708 * There are two phases to the opening: Initially we 709 * open by one mss on each ack. This makes the window 710 * size increase exponentially with time. If the 711 * window is larger than the path can handle, this 712 * exponential growth results in dropped packet(s) 713 * almost immediately. To get more time between 714 * drops but still "push" the network to take advantage 715 * of improving conditions, we switch from exponential 716 * to linear window opening at some threshhold size. 717 * For a threshhold, we use half the current window 718 * size, truncated to a multiple of the mss. 719 * 720 * (the minimum cwnd that will give us exponential 721 * growth is 2 mss. We don't allow the threshhold 722 * to go below this.) 723 */ 724 { 725 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 726 if (win < 2) 727 win = 2; 728 tp->snd_cwnd = tp->t_maxseg; 729 tp->snd_ssthresh = win * tp->t_maxseg; 730 tp->t_dupacks = 0; 731 } 732 EXIT_FASTRECOVERY(tp); 733 (void) tcp_output(tp); 734 735 #ifdef TCPDEBUG 736 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 737 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 738 PRU_SLOWTIMO); 739 #endif 740 return (0); 741 } 742