1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 #include "opt_tcpdebug.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mbuf.h> 43 #include <sys/mutex.h> 44 #include <sys/protosw.h> 45 #include <sys/smp.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/if.h> 52 #include <net/route.h> 53 #include <net/vnet.h> 54 55 #include <netinet/cc.h> 56 #include <netinet/in.h> 57 #include <netinet/in_pcb.h> 58 #include <netinet/in_systm.h> 59 #ifdef INET6 60 #include <netinet6/in6_pcb.h> 61 #endif 62 #include <netinet/ip_var.h> 63 #include <netinet/tcp_fsm.h> 64 #include <netinet/tcp_timer.h> 65 #include <netinet/tcp_var.h> 66 #include <netinet/tcpip.h> 67 #ifdef TCPDEBUG 68 #include <netinet/tcp_debug.h> 69 #endif 70 71 int tcp_keepinit; 72 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 73 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection"); 74 75 int tcp_keepidle; 76 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 77 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin"); 78 79 int tcp_keepintvl; 80 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 81 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes"); 82 83 int tcp_delacktime; 84 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 85 &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 86 "Time before a delayed ACK is sent"); 87 88 int tcp_msl; 89 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 90 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 91 92 int tcp_rexmit_min; 93 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 94 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 95 "Minimum Retransmission Timeout"); 96 97 int tcp_rexmit_slop; 98 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 99 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 100 "Retransmission Timer Slop"); 101 102 static int always_keepalive = 1; 103 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 104 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 105 106 int tcp_fast_finwait2_recycle = 0; 107 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 108 &tcp_fast_finwait2_recycle, 0, 109 "Recycle closed FIN_WAIT_2 connections faster"); 110 111 int tcp_finwait2_timeout; 112 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 113 &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 114 115 int tcp_keepcnt = TCPTV_KEEPCNT; 116 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0, 117 "Number of keepalive probes to send"); 118 119 /* max idle probes */ 120 int tcp_maxpersistidle; 121 122 static int tcp_rexmit_drop_options = 0; 123 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW, 124 &tcp_rexmit_drop_options, 0, 125 "Drop TCP options from 3rd and later retransmitted SYN"); 126 127 static int per_cpu_timers = 0; 128 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, 129 &per_cpu_timers , 0, "run tcp timers on all cpus"); 130 131 #define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \ 132 ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0) 133 134 /* 135 * Tcp protocol timeout routine called every 500 ms. 136 * Updates timestamps used for TCP 137 * causes finite state machine actions if timers expire. 138 */ 139 void 140 tcp_slowtimo(void) 141 { 142 VNET_ITERATOR_DECL(vnet_iter); 143 144 VNET_LIST_RLOCK_NOSLEEP(); 145 VNET_FOREACH(vnet_iter) { 146 CURVNET_SET(vnet_iter); 147 tcp_tw_2msl_scan(); 148 CURVNET_RESTORE(); 149 } 150 VNET_LIST_RUNLOCK_NOSLEEP(); 151 } 152 153 int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 154 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 155 156 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 157 { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 158 159 static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 160 161 static int tcp_timer_race; 162 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 163 0, "Count of t_inpcb races on tcp_discardcb"); 164 165 /* 166 * TCP timer processing. 167 */ 168 169 void 170 tcp_timer_delack(void *xtp) 171 { 172 struct tcpcb *tp = xtp; 173 struct inpcb *inp; 174 CURVNET_SET(tp->t_vnet); 175 176 inp = tp->t_inpcb; 177 /* 178 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 179 * tear-down mean we need it as a work-around for races between 180 * timers and tcp_discardcb(). 181 * 182 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL")); 183 */ 184 if (inp == NULL) { 185 tcp_timer_race++; 186 CURVNET_RESTORE(); 187 return; 188 } 189 INP_WLOCK(inp); 190 if (callout_pending(&tp->t_timers->tt_delack) || 191 !callout_active(&tp->t_timers->tt_delack)) { 192 INP_WUNLOCK(inp); 193 CURVNET_RESTORE(); 194 return; 195 } 196 callout_deactivate(&tp->t_timers->tt_delack); 197 if ((inp->inp_flags & INP_DROPPED) != 0) { 198 INP_WUNLOCK(inp); 199 CURVNET_RESTORE(); 200 return; 201 } 202 203 tp->t_flags |= TF_ACKNOW; 204 TCPSTAT_INC(tcps_delack); 205 (void) tcp_output(tp); 206 INP_WUNLOCK(inp); 207 CURVNET_RESTORE(); 208 } 209 210 void 211 tcp_timer_2msl(void *xtp) 212 { 213 struct tcpcb *tp = xtp; 214 struct inpcb *inp; 215 CURVNET_SET(tp->t_vnet); 216 #ifdef TCPDEBUG 217 int ostate; 218 219 ostate = tp->t_state; 220 #endif 221 /* 222 * XXXRW: Does this actually happen? 223 */ 224 INP_INFO_WLOCK(&V_tcbinfo); 225 inp = tp->t_inpcb; 226 /* 227 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 228 * tear-down mean we need it as a work-around for races between 229 * timers and tcp_discardcb(). 230 * 231 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL")); 232 */ 233 if (inp == NULL) { 234 tcp_timer_race++; 235 INP_INFO_WUNLOCK(&V_tcbinfo); 236 CURVNET_RESTORE(); 237 return; 238 } 239 INP_WLOCK(inp); 240 tcp_free_sackholes(tp); 241 if (callout_pending(&tp->t_timers->tt_2msl) || 242 !callout_active(&tp->t_timers->tt_2msl)) { 243 INP_WUNLOCK(tp->t_inpcb); 244 INP_INFO_WUNLOCK(&V_tcbinfo); 245 CURVNET_RESTORE(); 246 return; 247 } 248 callout_deactivate(&tp->t_timers->tt_2msl); 249 if ((inp->inp_flags & INP_DROPPED) != 0) { 250 INP_WUNLOCK(inp); 251 INP_INFO_WUNLOCK(&V_tcbinfo); 252 CURVNET_RESTORE(); 253 return; 254 } 255 /* 256 * 2 MSL timeout in shutdown went off. If we're closed but 257 * still waiting for peer to close and connection has been idle 258 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 259 * control block. Otherwise, check again in a bit. 260 * 261 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 262 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 263 * Ignore fact that there were recent incoming segments. 264 */ 265 if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 266 tp->t_inpcb && tp->t_inpcb->inp_socket && 267 (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 268 TCPSTAT_INC(tcps_finwait2_drops); 269 tp = tcp_close(tp); 270 } else { 271 if (tp->t_state != TCPS_TIME_WAIT && 272 ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) 273 callout_reset_on(&tp->t_timers->tt_2msl, 274 TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp)); 275 else 276 tp = tcp_close(tp); 277 } 278 279 #ifdef TCPDEBUG 280 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 281 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 282 PRU_SLOWTIMO); 283 #endif 284 if (tp != NULL) 285 INP_WUNLOCK(inp); 286 INP_INFO_WUNLOCK(&V_tcbinfo); 287 CURVNET_RESTORE(); 288 } 289 290 void 291 tcp_timer_keep(void *xtp) 292 { 293 struct tcpcb *tp = xtp; 294 struct tcptemp *t_template; 295 struct inpcb *inp; 296 CURVNET_SET(tp->t_vnet); 297 #ifdef TCPDEBUG 298 int ostate; 299 300 ostate = tp->t_state; 301 #endif 302 INP_INFO_WLOCK(&V_tcbinfo); 303 inp = tp->t_inpcb; 304 /* 305 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 306 * tear-down mean we need it as a work-around for races between 307 * timers and tcp_discardcb(). 308 * 309 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL")); 310 */ 311 if (inp == NULL) { 312 tcp_timer_race++; 313 INP_INFO_WUNLOCK(&V_tcbinfo); 314 CURVNET_RESTORE(); 315 return; 316 } 317 INP_WLOCK(inp); 318 if (callout_pending(&tp->t_timers->tt_keep) || 319 !callout_active(&tp->t_timers->tt_keep)) { 320 INP_WUNLOCK(inp); 321 INP_INFO_WUNLOCK(&V_tcbinfo); 322 CURVNET_RESTORE(); 323 return; 324 } 325 callout_deactivate(&tp->t_timers->tt_keep); 326 if ((inp->inp_flags & INP_DROPPED) != 0) { 327 INP_WUNLOCK(inp); 328 INP_INFO_WUNLOCK(&V_tcbinfo); 329 CURVNET_RESTORE(); 330 return; 331 } 332 /* 333 * Keep-alive timer went off; send something 334 * or drop connection if idle for too long. 335 */ 336 TCPSTAT_INC(tcps_keeptimeo); 337 if (tp->t_state < TCPS_ESTABLISHED) 338 goto dropit; 339 if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 340 tp->t_state <= TCPS_CLOSING) { 341 if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) 342 goto dropit; 343 /* 344 * Send a packet designed to force a response 345 * if the peer is up and reachable: 346 * either an ACK if the connection is still alive, 347 * or an RST if the peer has closed the connection 348 * due to timeout or reboot. 349 * Using sequence number tp->snd_una-1 350 * causes the transmitted zero-length segment 351 * to lie outside the receive window; 352 * by the protocol spec, this requires the 353 * correspondent TCP to respond. 354 */ 355 TCPSTAT_INC(tcps_keepprobe); 356 t_template = tcpip_maketemplate(inp); 357 if (t_template) { 358 tcp_respond(tp, t_template->tt_ipgen, 359 &t_template->tt_t, (struct mbuf *)NULL, 360 tp->rcv_nxt, tp->snd_una - 1, 0); 361 free(t_template, M_TEMP); 362 } 363 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), 364 tcp_timer_keep, tp, INP_CPU(inp)); 365 } else 366 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), 367 tcp_timer_keep, tp, INP_CPU(inp)); 368 369 #ifdef TCPDEBUG 370 if (inp->inp_socket->so_options & SO_DEBUG) 371 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 372 PRU_SLOWTIMO); 373 #endif 374 INP_WUNLOCK(inp); 375 INP_INFO_WUNLOCK(&V_tcbinfo); 376 CURVNET_RESTORE(); 377 return; 378 379 dropit: 380 TCPSTAT_INC(tcps_keepdrops); 381 tp = tcp_drop(tp, ETIMEDOUT); 382 383 #ifdef TCPDEBUG 384 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 385 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 386 PRU_SLOWTIMO); 387 #endif 388 if (tp != NULL) 389 INP_WUNLOCK(tp->t_inpcb); 390 INP_INFO_WUNLOCK(&V_tcbinfo); 391 CURVNET_RESTORE(); 392 } 393 394 void 395 tcp_timer_persist(void *xtp) 396 { 397 struct tcpcb *tp = xtp; 398 struct inpcb *inp; 399 CURVNET_SET(tp->t_vnet); 400 #ifdef TCPDEBUG 401 int ostate; 402 403 ostate = tp->t_state; 404 #endif 405 INP_INFO_WLOCK(&V_tcbinfo); 406 inp = tp->t_inpcb; 407 /* 408 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 409 * tear-down mean we need it as a work-around for races between 410 * timers and tcp_discardcb(). 411 * 412 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL")); 413 */ 414 if (inp == NULL) { 415 tcp_timer_race++; 416 INP_INFO_WUNLOCK(&V_tcbinfo); 417 CURVNET_RESTORE(); 418 return; 419 } 420 INP_WLOCK(inp); 421 if (callout_pending(&tp->t_timers->tt_persist) || 422 !callout_active(&tp->t_timers->tt_persist)) { 423 INP_WUNLOCK(inp); 424 INP_INFO_WUNLOCK(&V_tcbinfo); 425 CURVNET_RESTORE(); 426 return; 427 } 428 callout_deactivate(&tp->t_timers->tt_persist); 429 if ((inp->inp_flags & INP_DROPPED) != 0) { 430 INP_WUNLOCK(inp); 431 INP_INFO_WUNLOCK(&V_tcbinfo); 432 CURVNET_RESTORE(); 433 return; 434 } 435 /* 436 * Persistance timer into zero window. 437 * Force a byte to be output, if possible. 438 */ 439 TCPSTAT_INC(tcps_persisttimeo); 440 /* 441 * Hack: if the peer is dead/unreachable, we do not 442 * time out if the window is closed. After a full 443 * backoff, drop the connection if the idle time 444 * (no responses to probes) reaches the maximum 445 * backoff that we would use if retransmitting. 446 */ 447 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 448 (ticks - tp->t_rcvtime >= tcp_maxpersistidle || 449 ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 450 TCPSTAT_INC(tcps_persistdrop); 451 tp = tcp_drop(tp, ETIMEDOUT); 452 goto out; 453 } 454 /* 455 * If the user has closed the socket then drop a persisting 456 * connection after a much reduced timeout. 457 */ 458 if (tp->t_state > TCPS_CLOSE_WAIT && 459 (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { 460 TCPSTAT_INC(tcps_persistdrop); 461 tp = tcp_drop(tp, ETIMEDOUT); 462 goto out; 463 } 464 tcp_setpersist(tp); 465 tp->t_flags |= TF_FORCEDATA; 466 (void) tcp_output(tp); 467 tp->t_flags &= ~TF_FORCEDATA; 468 469 out: 470 #ifdef TCPDEBUG 471 if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 472 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 473 #endif 474 if (tp != NULL) 475 INP_WUNLOCK(inp); 476 INP_INFO_WUNLOCK(&V_tcbinfo); 477 CURVNET_RESTORE(); 478 } 479 480 void 481 tcp_timer_rexmt(void * xtp) 482 { 483 struct tcpcb *tp = xtp; 484 CURVNET_SET(tp->t_vnet); 485 int rexmt; 486 int headlocked; 487 struct inpcb *inp; 488 #ifdef TCPDEBUG 489 int ostate; 490 491 ostate = tp->t_state; 492 #endif 493 INP_INFO_RLOCK(&V_tcbinfo); 494 inp = tp->t_inpcb; 495 /* 496 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 497 * tear-down mean we need it as a work-around for races between 498 * timers and tcp_discardcb(). 499 * 500 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL")); 501 */ 502 if (inp == NULL) { 503 tcp_timer_race++; 504 INP_INFO_RUNLOCK(&V_tcbinfo); 505 CURVNET_RESTORE(); 506 return; 507 } 508 INP_WLOCK(inp); 509 if (callout_pending(&tp->t_timers->tt_rexmt) || 510 !callout_active(&tp->t_timers->tt_rexmt)) { 511 INP_WUNLOCK(inp); 512 INP_INFO_RUNLOCK(&V_tcbinfo); 513 CURVNET_RESTORE(); 514 return; 515 } 516 callout_deactivate(&tp->t_timers->tt_rexmt); 517 if ((inp->inp_flags & INP_DROPPED) != 0) { 518 INP_WUNLOCK(inp); 519 INP_INFO_RUNLOCK(&V_tcbinfo); 520 CURVNET_RESTORE(); 521 return; 522 } 523 tcp_free_sackholes(tp); 524 /* 525 * Retransmission timer went off. Message has not 526 * been acked within retransmit interval. Back off 527 * to a longer retransmit interval and retransmit one segment. 528 */ 529 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 530 tp->t_rxtshift = TCP_MAXRXTSHIFT; 531 TCPSTAT_INC(tcps_timeoutdrop); 532 in_pcbref(inp); 533 INP_INFO_RUNLOCK(&V_tcbinfo); 534 INP_WUNLOCK(inp); 535 INP_INFO_WLOCK(&V_tcbinfo); 536 INP_WLOCK(inp); 537 if (in_pcbrele_wlocked(inp)) { 538 INP_INFO_WUNLOCK(&V_tcbinfo); 539 CURVNET_RESTORE(); 540 return; 541 } 542 if (inp->inp_flags & INP_DROPPED) { 543 INP_WUNLOCK(inp); 544 INP_INFO_WUNLOCK(&V_tcbinfo); 545 CURVNET_RESTORE(); 546 return; 547 } 548 549 tp = tcp_drop(tp, tp->t_softerror ? 550 tp->t_softerror : ETIMEDOUT); 551 headlocked = 1; 552 goto out; 553 } 554 INP_INFO_RUNLOCK(&V_tcbinfo); 555 headlocked = 0; 556 if (tp->t_state == TCPS_SYN_SENT) { 557 /* 558 * If the SYN was retransmitted, indicate CWND to be 559 * limited to 1 segment in cc_conn_init(). 560 */ 561 tp->snd_cwnd = 1; 562 } else if (tp->t_rxtshift == 1) { 563 /* 564 * first retransmit; record ssthresh and cwnd so they can 565 * be recovered if this turns out to be a "bad" retransmit. 566 * A retransmit is considered "bad" if an ACK for this 567 * segment is received within RTT/2 interval; the assumption 568 * here is that the ACK was already in flight. See 569 * "On Estimating End-to-End Network Path Properties" by 570 * Allman and Paxson for more details. 571 */ 572 tp->snd_cwnd_prev = tp->snd_cwnd; 573 tp->snd_ssthresh_prev = tp->snd_ssthresh; 574 tp->snd_recover_prev = tp->snd_recover; 575 if (IN_FASTRECOVERY(tp->t_flags)) 576 tp->t_flags |= TF_WASFRECOVERY; 577 else 578 tp->t_flags &= ~TF_WASFRECOVERY; 579 if (IN_CONGRECOVERY(tp->t_flags)) 580 tp->t_flags |= TF_WASCRECOVERY; 581 else 582 tp->t_flags &= ~TF_WASCRECOVERY; 583 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 584 tp->t_flags |= TF_PREVVALID; 585 } else 586 tp->t_flags &= ~TF_PREVVALID; 587 TCPSTAT_INC(tcps_rexmttimeo); 588 if (tp->t_state == TCPS_SYN_SENT) 589 rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift]; 590 else 591 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 592 TCPT_RANGESET(tp->t_rxtcur, rexmt, 593 tp->t_rttmin, TCPTV_REXMTMAX); 594 /* 595 * Disable RFC1323 and SACK if we haven't got any response to 596 * our third SYN to work-around some broken terminal servers 597 * (most of which have hopefully been retired) that have bad VJ 598 * header compression code which trashes TCP segments containing 599 * unknown-to-them TCP options. 600 */ 601 if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) && 602 (tp->t_rxtshift == 3)) 603 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT); 604 /* 605 * If we backed off this far, our srtt estimate is probably bogus. 606 * Clobber it so we'll take the next rtt measurement as our srtt; 607 * move the current srtt into rttvar to keep the current 608 * retransmit times until then. 609 */ 610 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 611 #ifdef INET6 612 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 613 in6_losing(tp->t_inpcb); 614 #endif 615 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 616 tp->t_srtt = 0; 617 } 618 tp->snd_nxt = tp->snd_una; 619 tp->snd_recover = tp->snd_max; 620 /* 621 * Force a segment to be sent. 622 */ 623 tp->t_flags |= TF_ACKNOW; 624 /* 625 * If timing a segment in this window, stop the timer. 626 */ 627 tp->t_rtttime = 0; 628 629 cc_cong_signal(tp, NULL, CC_RTO); 630 631 (void) tcp_output(tp); 632 633 out: 634 #ifdef TCPDEBUG 635 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 636 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 637 PRU_SLOWTIMO); 638 #endif 639 if (tp != NULL) 640 INP_WUNLOCK(inp); 641 if (headlocked) 642 INP_INFO_WUNLOCK(&V_tcbinfo); 643 CURVNET_RESTORE(); 644 } 645 646 void 647 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 648 { 649 struct callout *t_callout; 650 void *f_callout; 651 struct inpcb *inp = tp->t_inpcb; 652 int cpu = INP_CPU(inp); 653 654 #ifdef TCP_OFFLOAD 655 if (tp->t_flags & TF_TOE) 656 return; 657 #endif 658 659 switch (timer_type) { 660 case TT_DELACK: 661 t_callout = &tp->t_timers->tt_delack; 662 f_callout = tcp_timer_delack; 663 break; 664 case TT_REXMT: 665 t_callout = &tp->t_timers->tt_rexmt; 666 f_callout = tcp_timer_rexmt; 667 break; 668 case TT_PERSIST: 669 t_callout = &tp->t_timers->tt_persist; 670 f_callout = tcp_timer_persist; 671 break; 672 case TT_KEEP: 673 t_callout = &tp->t_timers->tt_keep; 674 f_callout = tcp_timer_keep; 675 break; 676 case TT_2MSL: 677 t_callout = &tp->t_timers->tt_2msl; 678 f_callout = tcp_timer_2msl; 679 break; 680 default: 681 panic("bad timer_type"); 682 } 683 if (delta == 0) { 684 callout_stop(t_callout); 685 } else { 686 callout_reset_on(t_callout, delta, f_callout, tp, cpu); 687 } 688 } 689 690 int 691 tcp_timer_active(struct tcpcb *tp, int timer_type) 692 { 693 struct callout *t_callout; 694 695 switch (timer_type) { 696 case TT_DELACK: 697 t_callout = &tp->t_timers->tt_delack; 698 break; 699 case TT_REXMT: 700 t_callout = &tp->t_timers->tt_rexmt; 701 break; 702 case TT_PERSIST: 703 t_callout = &tp->t_timers->tt_persist; 704 break; 705 case TT_KEEP: 706 t_callout = &tp->t_timers->tt_keep; 707 break; 708 case TT_2MSL: 709 t_callout = &tp->t_timers->tt_2msl; 710 break; 711 default: 712 panic("bad timer_type"); 713 } 714 return callout_active(t_callout); 715 } 716 717 #define ticks_to_msecs(t) (1000*(t) / hz) 718 719 void 720 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, 721 struct xtcp_timer *xtimer) 722 { 723 sbintime_t now; 724 725 bzero(xtimer, sizeof(*xtimer)); 726 if (timer == NULL) 727 return; 728 now = getsbinuptime(); 729 if (callout_active(&timer->tt_delack)) 730 xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS; 731 if (callout_active(&timer->tt_rexmt)) 732 xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS; 733 if (callout_active(&timer->tt_persist)) 734 xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS; 735 if (callout_active(&timer->tt_keep)) 736 xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS; 737 if (callout_active(&timer->tt_2msl)) 738 xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS; 739 xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); 740 } 741