1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 #include "opt_tcpdebug.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mbuf.h> 43 #include <sys/mutex.h> 44 #include <sys/protosw.h> 45 #include <sys/smp.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/if.h> 52 #include <net/route.h> 53 #include <net/vnet.h> 54 55 #include <netinet/cc.h> 56 #include <netinet/in.h> 57 #include <netinet/in_pcb.h> 58 #include <netinet/in_systm.h> 59 #ifdef INET6 60 #include <netinet6/in6_pcb.h> 61 #endif 62 #include <netinet/ip_var.h> 63 #include <netinet/tcp_fsm.h> 64 #include <netinet/tcp_timer.h> 65 #include <netinet/tcp_var.h> 66 #include <netinet/tcpip.h> 67 #ifdef TCPDEBUG 68 #include <netinet/tcp_debug.h> 69 #endif 70 71 int tcp_keepinit; 72 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 73 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection"); 74 75 int tcp_keepidle; 76 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 77 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin"); 78 79 int tcp_keepintvl; 80 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 81 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes"); 82 83 int tcp_delacktime; 84 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 85 &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 86 "Time before a delayed ACK is sent"); 87 88 int tcp_msl; 89 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 90 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 91 92 int tcp_rexmit_min; 93 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 94 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 95 "Minimum Retransmission Timeout"); 96 97 int tcp_rexmit_slop; 98 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 99 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 100 "Retransmission Timer Slop"); 101 102 static int always_keepalive = 1; 103 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 104 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 105 106 int tcp_fast_finwait2_recycle = 0; 107 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 108 &tcp_fast_finwait2_recycle, 0, 109 "Recycle closed FIN_WAIT_2 connections faster"); 110 111 int tcp_finwait2_timeout; 112 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 113 &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 114 115 int tcp_keepcnt = TCPTV_KEEPCNT; 116 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0, 117 "Number of keepalive probes to send"); 118 119 /* max idle probes */ 120 int tcp_maxpersistidle; 121 122 static int tcp_rexmit_drop_options = 0; 123 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW, 124 &tcp_rexmit_drop_options, 0, 125 "Drop TCP options from 3rd and later retransmitted SYN"); 126 127 static int per_cpu_timers = 0; 128 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, 129 &per_cpu_timers , 0, "run tcp timers on all cpus"); 130 131 #define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \ 132 ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0) 133 134 /* 135 * Tcp protocol timeout routine called every 500 ms. 136 * Updates timestamps used for TCP 137 * causes finite state machine actions if timers expire. 138 */ 139 void 140 tcp_slowtimo(void) 141 { 142 VNET_ITERATOR_DECL(vnet_iter); 143 144 VNET_LIST_RLOCK_NOSLEEP(); 145 VNET_FOREACH(vnet_iter) { 146 CURVNET_SET(vnet_iter); 147 INP_INFO_WLOCK(&V_tcbinfo); 148 (void) tcp_tw_2msl_scan(0); 149 INP_INFO_WUNLOCK(&V_tcbinfo); 150 CURVNET_RESTORE(); 151 } 152 VNET_LIST_RUNLOCK_NOSLEEP(); 153 } 154 155 int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 156 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 157 158 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 159 { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 160 161 static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 162 163 static int tcp_timer_race; 164 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 165 0, "Count of t_inpcb races on tcp_discardcb"); 166 167 /* 168 * TCP timer processing. 169 */ 170 171 void 172 tcp_timer_delack(void *xtp) 173 { 174 struct tcpcb *tp = xtp; 175 struct inpcb *inp; 176 CURVNET_SET(tp->t_vnet); 177 178 inp = tp->t_inpcb; 179 /* 180 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 181 * tear-down mean we need it as a work-around for races between 182 * timers and tcp_discardcb(). 183 * 184 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL")); 185 */ 186 if (inp == NULL) { 187 tcp_timer_race++; 188 CURVNET_RESTORE(); 189 return; 190 } 191 INP_WLOCK(inp); 192 if (callout_pending(&tp->t_timers->tt_delack) || 193 !callout_active(&tp->t_timers->tt_delack)) { 194 INP_WUNLOCK(inp); 195 CURVNET_RESTORE(); 196 return; 197 } 198 callout_deactivate(&tp->t_timers->tt_delack); 199 if ((inp->inp_flags & INP_DROPPED) != 0) { 200 INP_WUNLOCK(inp); 201 CURVNET_RESTORE(); 202 return; 203 } 204 205 tp->t_flags |= TF_ACKNOW; 206 TCPSTAT_INC(tcps_delack); 207 (void) tcp_output(tp); 208 INP_WUNLOCK(inp); 209 CURVNET_RESTORE(); 210 } 211 212 void 213 tcp_timer_2msl(void *xtp) 214 { 215 struct tcpcb *tp = xtp; 216 struct inpcb *inp; 217 CURVNET_SET(tp->t_vnet); 218 #ifdef TCPDEBUG 219 int ostate; 220 221 ostate = tp->t_state; 222 #endif 223 /* 224 * XXXRW: Does this actually happen? 225 */ 226 INP_INFO_WLOCK(&V_tcbinfo); 227 inp = tp->t_inpcb; 228 /* 229 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 230 * tear-down mean we need it as a work-around for races between 231 * timers and tcp_discardcb(). 232 * 233 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL")); 234 */ 235 if (inp == NULL) { 236 tcp_timer_race++; 237 INP_INFO_WUNLOCK(&V_tcbinfo); 238 CURVNET_RESTORE(); 239 return; 240 } 241 INP_WLOCK(inp); 242 tcp_free_sackholes(tp); 243 if (callout_pending(&tp->t_timers->tt_2msl) || 244 !callout_active(&tp->t_timers->tt_2msl)) { 245 INP_WUNLOCK(tp->t_inpcb); 246 INP_INFO_WUNLOCK(&V_tcbinfo); 247 CURVNET_RESTORE(); 248 return; 249 } 250 callout_deactivate(&tp->t_timers->tt_2msl); 251 if ((inp->inp_flags & INP_DROPPED) != 0) { 252 INP_WUNLOCK(inp); 253 INP_INFO_WUNLOCK(&V_tcbinfo); 254 CURVNET_RESTORE(); 255 return; 256 } 257 /* 258 * 2 MSL timeout in shutdown went off. If we're closed but 259 * still waiting for peer to close and connection has been idle 260 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 261 * control block. Otherwise, check again in a bit. 262 * 263 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 264 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 265 * Ignore fact that there were recent incoming segments. 266 */ 267 if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 268 tp->t_inpcb && tp->t_inpcb->inp_socket && 269 (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 270 TCPSTAT_INC(tcps_finwait2_drops); 271 tp = tcp_close(tp); 272 } else { 273 if (tp->t_state != TCPS_TIME_WAIT && 274 ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) 275 callout_reset_on(&tp->t_timers->tt_2msl, 276 TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp)); 277 else 278 tp = tcp_close(tp); 279 } 280 281 #ifdef TCPDEBUG 282 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 283 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 284 PRU_SLOWTIMO); 285 #endif 286 if (tp != NULL) 287 INP_WUNLOCK(inp); 288 INP_INFO_WUNLOCK(&V_tcbinfo); 289 CURVNET_RESTORE(); 290 } 291 292 void 293 tcp_timer_keep(void *xtp) 294 { 295 struct tcpcb *tp = xtp; 296 struct tcptemp *t_template; 297 struct inpcb *inp; 298 CURVNET_SET(tp->t_vnet); 299 #ifdef TCPDEBUG 300 int ostate; 301 302 ostate = tp->t_state; 303 #endif 304 INP_INFO_WLOCK(&V_tcbinfo); 305 inp = tp->t_inpcb; 306 /* 307 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 308 * tear-down mean we need it as a work-around for races between 309 * timers and tcp_discardcb(). 310 * 311 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL")); 312 */ 313 if (inp == NULL) { 314 tcp_timer_race++; 315 INP_INFO_WUNLOCK(&V_tcbinfo); 316 CURVNET_RESTORE(); 317 return; 318 } 319 INP_WLOCK(inp); 320 if (callout_pending(&tp->t_timers->tt_keep) || 321 !callout_active(&tp->t_timers->tt_keep)) { 322 INP_WUNLOCK(inp); 323 INP_INFO_WUNLOCK(&V_tcbinfo); 324 CURVNET_RESTORE(); 325 return; 326 } 327 callout_deactivate(&tp->t_timers->tt_keep); 328 if ((inp->inp_flags & INP_DROPPED) != 0) { 329 INP_WUNLOCK(inp); 330 INP_INFO_WUNLOCK(&V_tcbinfo); 331 CURVNET_RESTORE(); 332 return; 333 } 334 /* 335 * Keep-alive timer went off; send something 336 * or drop connection if idle for too long. 337 */ 338 TCPSTAT_INC(tcps_keeptimeo); 339 if (tp->t_state < TCPS_ESTABLISHED) 340 goto dropit; 341 if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 342 tp->t_state <= TCPS_CLOSING) { 343 if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) 344 goto dropit; 345 /* 346 * Send a packet designed to force a response 347 * if the peer is up and reachable: 348 * either an ACK if the connection is still alive, 349 * or an RST if the peer has closed the connection 350 * due to timeout or reboot. 351 * Using sequence number tp->snd_una-1 352 * causes the transmitted zero-length segment 353 * to lie outside the receive window; 354 * by the protocol spec, this requires the 355 * correspondent TCP to respond. 356 */ 357 TCPSTAT_INC(tcps_keepprobe); 358 t_template = tcpip_maketemplate(inp); 359 if (t_template) { 360 tcp_respond(tp, t_template->tt_ipgen, 361 &t_template->tt_t, (struct mbuf *)NULL, 362 tp->rcv_nxt, tp->snd_una - 1, 0); 363 free(t_template, M_TEMP); 364 } 365 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), 366 tcp_timer_keep, tp, INP_CPU(inp)); 367 } else 368 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), 369 tcp_timer_keep, tp, INP_CPU(inp)); 370 371 #ifdef TCPDEBUG 372 if (inp->inp_socket->so_options & SO_DEBUG) 373 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 374 PRU_SLOWTIMO); 375 #endif 376 INP_WUNLOCK(inp); 377 INP_INFO_WUNLOCK(&V_tcbinfo); 378 CURVNET_RESTORE(); 379 return; 380 381 dropit: 382 TCPSTAT_INC(tcps_keepdrops); 383 tp = tcp_drop(tp, ETIMEDOUT); 384 385 #ifdef TCPDEBUG 386 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 387 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 388 PRU_SLOWTIMO); 389 #endif 390 if (tp != NULL) 391 INP_WUNLOCK(tp->t_inpcb); 392 INP_INFO_WUNLOCK(&V_tcbinfo); 393 CURVNET_RESTORE(); 394 } 395 396 void 397 tcp_timer_persist(void *xtp) 398 { 399 struct tcpcb *tp = xtp; 400 struct inpcb *inp; 401 CURVNET_SET(tp->t_vnet); 402 #ifdef TCPDEBUG 403 int ostate; 404 405 ostate = tp->t_state; 406 #endif 407 INP_INFO_WLOCK(&V_tcbinfo); 408 inp = tp->t_inpcb; 409 /* 410 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 411 * tear-down mean we need it as a work-around for races between 412 * timers and tcp_discardcb(). 413 * 414 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL")); 415 */ 416 if (inp == NULL) { 417 tcp_timer_race++; 418 INP_INFO_WUNLOCK(&V_tcbinfo); 419 CURVNET_RESTORE(); 420 return; 421 } 422 INP_WLOCK(inp); 423 if (callout_pending(&tp->t_timers->tt_persist) || 424 !callout_active(&tp->t_timers->tt_persist)) { 425 INP_WUNLOCK(inp); 426 INP_INFO_WUNLOCK(&V_tcbinfo); 427 CURVNET_RESTORE(); 428 return; 429 } 430 callout_deactivate(&tp->t_timers->tt_persist); 431 if ((inp->inp_flags & INP_DROPPED) != 0) { 432 INP_WUNLOCK(inp); 433 INP_INFO_WUNLOCK(&V_tcbinfo); 434 CURVNET_RESTORE(); 435 return; 436 } 437 /* 438 * Persistance timer into zero window. 439 * Force a byte to be output, if possible. 440 */ 441 TCPSTAT_INC(tcps_persisttimeo); 442 /* 443 * Hack: if the peer is dead/unreachable, we do not 444 * time out if the window is closed. After a full 445 * backoff, drop the connection if the idle time 446 * (no responses to probes) reaches the maximum 447 * backoff that we would use if retransmitting. 448 */ 449 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 450 (ticks - tp->t_rcvtime >= tcp_maxpersistidle || 451 ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 452 TCPSTAT_INC(tcps_persistdrop); 453 tp = tcp_drop(tp, ETIMEDOUT); 454 goto out; 455 } 456 /* 457 * If the user has closed the socket then drop a persisting 458 * connection after a much reduced timeout. 459 */ 460 if (tp->t_state > TCPS_CLOSE_WAIT && 461 (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { 462 TCPSTAT_INC(tcps_persistdrop); 463 tp = tcp_drop(tp, ETIMEDOUT); 464 goto out; 465 } 466 tcp_setpersist(tp); 467 tp->t_flags |= TF_FORCEDATA; 468 (void) tcp_output(tp); 469 tp->t_flags &= ~TF_FORCEDATA; 470 471 out: 472 #ifdef TCPDEBUG 473 if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 474 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 475 #endif 476 if (tp != NULL) 477 INP_WUNLOCK(inp); 478 INP_INFO_WUNLOCK(&V_tcbinfo); 479 CURVNET_RESTORE(); 480 } 481 482 void 483 tcp_timer_rexmt(void * xtp) 484 { 485 struct tcpcb *tp = xtp; 486 CURVNET_SET(tp->t_vnet); 487 int rexmt; 488 int headlocked; 489 struct inpcb *inp; 490 #ifdef TCPDEBUG 491 int ostate; 492 493 ostate = tp->t_state; 494 #endif 495 INP_INFO_RLOCK(&V_tcbinfo); 496 inp = tp->t_inpcb; 497 /* 498 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 499 * tear-down mean we need it as a work-around for races between 500 * timers and tcp_discardcb(). 501 * 502 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL")); 503 */ 504 if (inp == NULL) { 505 tcp_timer_race++; 506 INP_INFO_RUNLOCK(&V_tcbinfo); 507 CURVNET_RESTORE(); 508 return; 509 } 510 INP_WLOCK(inp); 511 if (callout_pending(&tp->t_timers->tt_rexmt) || 512 !callout_active(&tp->t_timers->tt_rexmt)) { 513 INP_WUNLOCK(inp); 514 INP_INFO_RUNLOCK(&V_tcbinfo); 515 CURVNET_RESTORE(); 516 return; 517 } 518 callout_deactivate(&tp->t_timers->tt_rexmt); 519 if ((inp->inp_flags & INP_DROPPED) != 0) { 520 INP_WUNLOCK(inp); 521 INP_INFO_RUNLOCK(&V_tcbinfo); 522 CURVNET_RESTORE(); 523 return; 524 } 525 tcp_free_sackholes(tp); 526 /* 527 * Retransmission timer went off. Message has not 528 * been acked within retransmit interval. Back off 529 * to a longer retransmit interval and retransmit one segment. 530 */ 531 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 532 tp->t_rxtshift = TCP_MAXRXTSHIFT; 533 TCPSTAT_INC(tcps_timeoutdrop); 534 in_pcbref(inp); 535 INP_INFO_RUNLOCK(&V_tcbinfo); 536 INP_WUNLOCK(inp); 537 INP_INFO_WLOCK(&V_tcbinfo); 538 INP_WLOCK(inp); 539 if (in_pcbrele_wlocked(inp)) { 540 INP_INFO_WUNLOCK(&V_tcbinfo); 541 CURVNET_RESTORE(); 542 return; 543 } 544 if (inp->inp_flags & INP_DROPPED) { 545 INP_WUNLOCK(inp); 546 INP_INFO_WUNLOCK(&V_tcbinfo); 547 CURVNET_RESTORE(); 548 return; 549 } 550 551 tp = tcp_drop(tp, tp->t_softerror ? 552 tp->t_softerror : ETIMEDOUT); 553 headlocked = 1; 554 goto out; 555 } 556 INP_INFO_RUNLOCK(&V_tcbinfo); 557 headlocked = 0; 558 if (tp->t_state == TCPS_SYN_SENT) { 559 /* 560 * If the SYN was retransmitted, indicate CWND to be 561 * limited to 1 segment in cc_conn_init(). 562 */ 563 tp->snd_cwnd = 1; 564 } else if (tp->t_rxtshift == 1) { 565 /* 566 * first retransmit; record ssthresh and cwnd so they can 567 * be recovered if this turns out to be a "bad" retransmit. 568 * A retransmit is considered "bad" if an ACK for this 569 * segment is received within RTT/2 interval; the assumption 570 * here is that the ACK was already in flight. See 571 * "On Estimating End-to-End Network Path Properties" by 572 * Allman and Paxson for more details. 573 */ 574 tp->snd_cwnd_prev = tp->snd_cwnd; 575 tp->snd_ssthresh_prev = tp->snd_ssthresh; 576 tp->snd_recover_prev = tp->snd_recover; 577 if (IN_FASTRECOVERY(tp->t_flags)) 578 tp->t_flags |= TF_WASFRECOVERY; 579 else 580 tp->t_flags &= ~TF_WASFRECOVERY; 581 if (IN_CONGRECOVERY(tp->t_flags)) 582 tp->t_flags |= TF_WASCRECOVERY; 583 else 584 tp->t_flags &= ~TF_WASCRECOVERY; 585 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 586 tp->t_flags |= TF_PREVVALID; 587 } else 588 tp->t_flags &= ~TF_PREVVALID; 589 TCPSTAT_INC(tcps_rexmttimeo); 590 if (tp->t_state == TCPS_SYN_SENT) 591 rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift]; 592 else 593 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 594 TCPT_RANGESET(tp->t_rxtcur, rexmt, 595 tp->t_rttmin, TCPTV_REXMTMAX); 596 /* 597 * Disable RFC1323 and SACK if we haven't got any response to 598 * our third SYN to work-around some broken terminal servers 599 * (most of which have hopefully been retired) that have bad VJ 600 * header compression code which trashes TCP segments containing 601 * unknown-to-them TCP options. 602 */ 603 if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) && 604 (tp->t_rxtshift == 3)) 605 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT); 606 /* 607 * If we backed off this far, our srtt estimate is probably bogus. 608 * Clobber it so we'll take the next rtt measurement as our srtt; 609 * move the current srtt into rttvar to keep the current 610 * retransmit times until then. 611 */ 612 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 613 #ifdef INET6 614 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 615 in6_losing(tp->t_inpcb); 616 #endif 617 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 618 tp->t_srtt = 0; 619 } 620 tp->snd_nxt = tp->snd_una; 621 tp->snd_recover = tp->snd_max; 622 /* 623 * Force a segment to be sent. 624 */ 625 tp->t_flags |= TF_ACKNOW; 626 /* 627 * If timing a segment in this window, stop the timer. 628 */ 629 tp->t_rtttime = 0; 630 631 cc_cong_signal(tp, NULL, CC_RTO); 632 633 (void) tcp_output(tp); 634 635 out: 636 #ifdef TCPDEBUG 637 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 638 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 639 PRU_SLOWTIMO); 640 #endif 641 if (tp != NULL) 642 INP_WUNLOCK(inp); 643 if (headlocked) 644 INP_INFO_WUNLOCK(&V_tcbinfo); 645 CURVNET_RESTORE(); 646 } 647 648 void 649 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 650 { 651 struct callout *t_callout; 652 void *f_callout; 653 struct inpcb *inp = tp->t_inpcb; 654 int cpu = INP_CPU(inp); 655 656 #ifdef TCP_OFFLOAD 657 if (tp->t_flags & TF_TOE) 658 return; 659 #endif 660 661 switch (timer_type) { 662 case TT_DELACK: 663 t_callout = &tp->t_timers->tt_delack; 664 f_callout = tcp_timer_delack; 665 break; 666 case TT_REXMT: 667 t_callout = &tp->t_timers->tt_rexmt; 668 f_callout = tcp_timer_rexmt; 669 break; 670 case TT_PERSIST: 671 t_callout = &tp->t_timers->tt_persist; 672 f_callout = tcp_timer_persist; 673 break; 674 case TT_KEEP: 675 t_callout = &tp->t_timers->tt_keep; 676 f_callout = tcp_timer_keep; 677 break; 678 case TT_2MSL: 679 t_callout = &tp->t_timers->tt_2msl; 680 f_callout = tcp_timer_2msl; 681 break; 682 default: 683 panic("bad timer_type"); 684 } 685 if (delta == 0) { 686 callout_stop(t_callout); 687 } else { 688 callout_reset_on(t_callout, delta, f_callout, tp, cpu); 689 } 690 } 691 692 int 693 tcp_timer_active(struct tcpcb *tp, int timer_type) 694 { 695 struct callout *t_callout; 696 697 switch (timer_type) { 698 case TT_DELACK: 699 t_callout = &tp->t_timers->tt_delack; 700 break; 701 case TT_REXMT: 702 t_callout = &tp->t_timers->tt_rexmt; 703 break; 704 case TT_PERSIST: 705 t_callout = &tp->t_timers->tt_persist; 706 break; 707 case TT_KEEP: 708 t_callout = &tp->t_timers->tt_keep; 709 break; 710 case TT_2MSL: 711 t_callout = &tp->t_timers->tt_2msl; 712 break; 713 default: 714 panic("bad timer_type"); 715 } 716 return callout_active(t_callout); 717 } 718 719 #define ticks_to_msecs(t) (1000*(t) / hz) 720 721 void 722 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, 723 struct xtcp_timer *xtimer) 724 { 725 sbintime_t now; 726 727 bzero(xtimer, sizeof(*xtimer)); 728 if (timer == NULL) 729 return; 730 now = getsbinuptime(); 731 if (callout_active(&timer->tt_delack)) 732 xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS; 733 if (callout_active(&timer->tt_rexmt)) 734 xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS; 735 if (callout_active(&timer->tt_persist)) 736 xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS; 737 if (callout_active(&timer->tt_keep)) 738 xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS; 739 if (callout_active(&timer->tt_2msl)) 740 xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS; 741 xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); 742 } 743