1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 #include "opt_tcpdebug.h" 38 39 #include <sys/param.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mbuf.h> 43 #include <sys/mutex.h> 44 #include <sys/protosw.h> 45 #include <sys/smp.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/sysctl.h> 49 #include <sys/systm.h> 50 51 #include <net/if.h> 52 #include <net/route.h> 53 #include <net/vnet.h> 54 55 #include <netinet/cc.h> 56 #include <netinet/in.h> 57 #include <netinet/in_pcb.h> 58 #include <netinet/in_systm.h> 59 #ifdef INET6 60 #include <netinet6/in6_pcb.h> 61 #endif 62 #include <netinet/ip_var.h> 63 #include <netinet/tcp_fsm.h> 64 #include <netinet/tcp_timer.h> 65 #include <netinet/tcp_var.h> 66 #include <netinet/tcpip.h> 67 #ifdef TCPDEBUG 68 #include <netinet/tcp_debug.h> 69 #endif 70 71 int tcp_keepinit; 72 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 73 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection"); 74 75 int tcp_keepidle; 76 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 77 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin"); 78 79 int tcp_keepintvl; 80 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 81 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes"); 82 83 int tcp_delacktime; 84 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 85 &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 86 "Time before a delayed ACK is sent"); 87 88 int tcp_msl; 89 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 90 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 91 92 int tcp_rexmit_min; 93 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 94 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 95 "Minimum Retransmission Timeout"); 96 97 int tcp_rexmit_slop; 98 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 99 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 100 "Retransmission Timer Slop"); 101 102 static int always_keepalive = 1; 103 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 104 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 105 106 int tcp_fast_finwait2_recycle = 0; 107 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 108 &tcp_fast_finwait2_recycle, 0, 109 "Recycle closed FIN_WAIT_2 connections faster"); 110 111 int tcp_finwait2_timeout; 112 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 113 &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 114 115 int tcp_keepcnt = TCPTV_KEEPCNT; 116 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0, 117 "Number of keepalive probes to send"); 118 119 /* max idle probes */ 120 int tcp_maxpersistidle; 121 122 static int per_cpu_timers = 0; 123 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, 124 &per_cpu_timers , 0, "run tcp timers on all cpus"); 125 126 #define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \ 127 ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0) 128 129 /* 130 * Tcp protocol timeout routine called every 500 ms. 131 * Updates timestamps used for TCP 132 * causes finite state machine actions if timers expire. 133 */ 134 void 135 tcp_slowtimo(void) 136 { 137 VNET_ITERATOR_DECL(vnet_iter); 138 139 VNET_LIST_RLOCK_NOSLEEP(); 140 VNET_FOREACH(vnet_iter) { 141 CURVNET_SET(vnet_iter); 142 INP_INFO_WLOCK(&V_tcbinfo); 143 (void) tcp_tw_2msl_scan(0); 144 INP_INFO_WUNLOCK(&V_tcbinfo); 145 CURVNET_RESTORE(); 146 } 147 VNET_LIST_RUNLOCK_NOSLEEP(); 148 } 149 150 int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 151 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 152 153 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 154 { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 155 156 static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 157 158 static int tcp_timer_race; 159 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 160 0, "Count of t_inpcb races on tcp_discardcb"); 161 162 /* 163 * TCP timer processing. 164 */ 165 166 void 167 tcp_timer_delack(void *xtp) 168 { 169 struct tcpcb *tp = xtp; 170 struct inpcb *inp; 171 CURVNET_SET(tp->t_vnet); 172 173 inp = tp->t_inpcb; 174 /* 175 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 176 * tear-down mean we need it as a work-around for races between 177 * timers and tcp_discardcb(). 178 * 179 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL")); 180 */ 181 if (inp == NULL) { 182 tcp_timer_race++; 183 CURVNET_RESTORE(); 184 return; 185 } 186 INP_WLOCK(inp); 187 if (callout_pending(&tp->t_timers->tt_delack) || 188 !callout_active(&tp->t_timers->tt_delack)) { 189 INP_WUNLOCK(inp); 190 CURVNET_RESTORE(); 191 return; 192 } 193 callout_deactivate(&tp->t_timers->tt_delack); 194 if ((inp->inp_flags & INP_DROPPED) != 0) { 195 INP_WUNLOCK(inp); 196 CURVNET_RESTORE(); 197 return; 198 } 199 200 tp->t_flags |= TF_ACKNOW; 201 TCPSTAT_INC(tcps_delack); 202 (void) tcp_output(tp); 203 INP_WUNLOCK(inp); 204 CURVNET_RESTORE(); 205 } 206 207 void 208 tcp_timer_2msl(void *xtp) 209 { 210 struct tcpcb *tp = xtp; 211 struct inpcb *inp; 212 CURVNET_SET(tp->t_vnet); 213 #ifdef TCPDEBUG 214 int ostate; 215 216 ostate = tp->t_state; 217 #endif 218 /* 219 * XXXRW: Does this actually happen? 220 */ 221 INP_INFO_WLOCK(&V_tcbinfo); 222 inp = tp->t_inpcb; 223 /* 224 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 225 * tear-down mean we need it as a work-around for races between 226 * timers and tcp_discardcb(). 227 * 228 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL")); 229 */ 230 if (inp == NULL) { 231 tcp_timer_race++; 232 INP_INFO_WUNLOCK(&V_tcbinfo); 233 CURVNET_RESTORE(); 234 return; 235 } 236 INP_WLOCK(inp); 237 tcp_free_sackholes(tp); 238 if (callout_pending(&tp->t_timers->tt_2msl) || 239 !callout_active(&tp->t_timers->tt_2msl)) { 240 INP_WUNLOCK(tp->t_inpcb); 241 INP_INFO_WUNLOCK(&V_tcbinfo); 242 CURVNET_RESTORE(); 243 return; 244 } 245 callout_deactivate(&tp->t_timers->tt_2msl); 246 if ((inp->inp_flags & INP_DROPPED) != 0) { 247 INP_WUNLOCK(inp); 248 INP_INFO_WUNLOCK(&V_tcbinfo); 249 CURVNET_RESTORE(); 250 return; 251 } 252 /* 253 * 2 MSL timeout in shutdown went off. If we're closed but 254 * still waiting for peer to close and connection has been idle 255 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 256 * control block. Otherwise, check again in a bit. 257 * 258 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 259 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 260 * Ignore fact that there were recent incoming segments. 261 */ 262 if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 263 tp->t_inpcb && tp->t_inpcb->inp_socket && 264 (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 265 TCPSTAT_INC(tcps_finwait2_drops); 266 tp = tcp_close(tp); 267 } else { 268 if (tp->t_state != TCPS_TIME_WAIT && 269 ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) 270 callout_reset_on(&tp->t_timers->tt_2msl, 271 TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp)); 272 else 273 tp = tcp_close(tp); 274 } 275 276 #ifdef TCPDEBUG 277 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 278 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 279 PRU_SLOWTIMO); 280 #endif 281 if (tp != NULL) 282 INP_WUNLOCK(inp); 283 INP_INFO_WUNLOCK(&V_tcbinfo); 284 CURVNET_RESTORE(); 285 } 286 287 void 288 tcp_timer_keep(void *xtp) 289 { 290 struct tcpcb *tp = xtp; 291 struct tcptemp *t_template; 292 struct inpcb *inp; 293 CURVNET_SET(tp->t_vnet); 294 #ifdef TCPDEBUG 295 int ostate; 296 297 ostate = tp->t_state; 298 #endif 299 INP_INFO_WLOCK(&V_tcbinfo); 300 inp = tp->t_inpcb; 301 /* 302 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 303 * tear-down mean we need it as a work-around for races between 304 * timers and tcp_discardcb(). 305 * 306 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL")); 307 */ 308 if (inp == NULL) { 309 tcp_timer_race++; 310 INP_INFO_WUNLOCK(&V_tcbinfo); 311 CURVNET_RESTORE(); 312 return; 313 } 314 INP_WLOCK(inp); 315 if (callout_pending(&tp->t_timers->tt_keep) || 316 !callout_active(&tp->t_timers->tt_keep)) { 317 INP_WUNLOCK(inp); 318 INP_INFO_WUNLOCK(&V_tcbinfo); 319 CURVNET_RESTORE(); 320 return; 321 } 322 callout_deactivate(&tp->t_timers->tt_keep); 323 if ((inp->inp_flags & INP_DROPPED) != 0) { 324 INP_WUNLOCK(inp); 325 INP_INFO_WUNLOCK(&V_tcbinfo); 326 CURVNET_RESTORE(); 327 return; 328 } 329 /* 330 * Keep-alive timer went off; send something 331 * or drop connection if idle for too long. 332 */ 333 TCPSTAT_INC(tcps_keeptimeo); 334 if (tp->t_state < TCPS_ESTABLISHED) 335 goto dropit; 336 if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 337 tp->t_state <= TCPS_CLOSING) { 338 if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) 339 goto dropit; 340 /* 341 * Send a packet designed to force a response 342 * if the peer is up and reachable: 343 * either an ACK if the connection is still alive, 344 * or an RST if the peer has closed the connection 345 * due to timeout or reboot. 346 * Using sequence number tp->snd_una-1 347 * causes the transmitted zero-length segment 348 * to lie outside the receive window; 349 * by the protocol spec, this requires the 350 * correspondent TCP to respond. 351 */ 352 TCPSTAT_INC(tcps_keepprobe); 353 t_template = tcpip_maketemplate(inp); 354 if (t_template) { 355 tcp_respond(tp, t_template->tt_ipgen, 356 &t_template->tt_t, (struct mbuf *)NULL, 357 tp->rcv_nxt, tp->snd_una - 1, 0); 358 free(t_template, M_TEMP); 359 } 360 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), 361 tcp_timer_keep, tp, INP_CPU(inp)); 362 } else 363 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), 364 tcp_timer_keep, tp, INP_CPU(inp)); 365 366 #ifdef TCPDEBUG 367 if (inp->inp_socket->so_options & SO_DEBUG) 368 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 369 PRU_SLOWTIMO); 370 #endif 371 INP_WUNLOCK(inp); 372 INP_INFO_WUNLOCK(&V_tcbinfo); 373 CURVNET_RESTORE(); 374 return; 375 376 dropit: 377 TCPSTAT_INC(tcps_keepdrops); 378 tp = tcp_drop(tp, ETIMEDOUT); 379 380 #ifdef TCPDEBUG 381 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 382 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 383 PRU_SLOWTIMO); 384 #endif 385 if (tp != NULL) 386 INP_WUNLOCK(tp->t_inpcb); 387 INP_INFO_WUNLOCK(&V_tcbinfo); 388 CURVNET_RESTORE(); 389 } 390 391 void 392 tcp_timer_persist(void *xtp) 393 { 394 struct tcpcb *tp = xtp; 395 struct inpcb *inp; 396 CURVNET_SET(tp->t_vnet); 397 #ifdef TCPDEBUG 398 int ostate; 399 400 ostate = tp->t_state; 401 #endif 402 INP_INFO_WLOCK(&V_tcbinfo); 403 inp = tp->t_inpcb; 404 /* 405 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 406 * tear-down mean we need it as a work-around for races between 407 * timers and tcp_discardcb(). 408 * 409 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL")); 410 */ 411 if (inp == NULL) { 412 tcp_timer_race++; 413 INP_INFO_WUNLOCK(&V_tcbinfo); 414 CURVNET_RESTORE(); 415 return; 416 } 417 INP_WLOCK(inp); 418 if (callout_pending(&tp->t_timers->tt_persist) || 419 !callout_active(&tp->t_timers->tt_persist)) { 420 INP_WUNLOCK(inp); 421 INP_INFO_WUNLOCK(&V_tcbinfo); 422 CURVNET_RESTORE(); 423 return; 424 } 425 callout_deactivate(&tp->t_timers->tt_persist); 426 if ((inp->inp_flags & INP_DROPPED) != 0) { 427 INP_WUNLOCK(inp); 428 INP_INFO_WUNLOCK(&V_tcbinfo); 429 CURVNET_RESTORE(); 430 return; 431 } 432 /* 433 * Persistance timer into zero window. 434 * Force a byte to be output, if possible. 435 */ 436 TCPSTAT_INC(tcps_persisttimeo); 437 /* 438 * Hack: if the peer is dead/unreachable, we do not 439 * time out if the window is closed. After a full 440 * backoff, drop the connection if the idle time 441 * (no responses to probes) reaches the maximum 442 * backoff that we would use if retransmitting. 443 */ 444 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 445 (ticks - tp->t_rcvtime >= tcp_maxpersistidle || 446 ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 447 TCPSTAT_INC(tcps_persistdrop); 448 tp = tcp_drop(tp, ETIMEDOUT); 449 goto out; 450 } 451 /* 452 * If the user has closed the socket then drop a persisting 453 * connection after a much reduced timeout. 454 */ 455 if (tp->t_state > TCPS_CLOSE_WAIT && 456 (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { 457 TCPSTAT_INC(tcps_persistdrop); 458 tp = tcp_drop(tp, ETIMEDOUT); 459 goto out; 460 } 461 tcp_setpersist(tp); 462 tp->t_flags |= TF_FORCEDATA; 463 (void) tcp_output(tp); 464 tp->t_flags &= ~TF_FORCEDATA; 465 466 out: 467 #ifdef TCPDEBUG 468 if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 469 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 470 #endif 471 if (tp != NULL) 472 INP_WUNLOCK(inp); 473 INP_INFO_WUNLOCK(&V_tcbinfo); 474 CURVNET_RESTORE(); 475 } 476 477 void 478 tcp_timer_rexmt(void * xtp) 479 { 480 struct tcpcb *tp = xtp; 481 CURVNET_SET(tp->t_vnet); 482 int rexmt; 483 int headlocked; 484 struct inpcb *inp; 485 #ifdef TCPDEBUG 486 int ostate; 487 488 ostate = tp->t_state; 489 #endif 490 INP_INFO_RLOCK(&V_tcbinfo); 491 inp = tp->t_inpcb; 492 /* 493 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 494 * tear-down mean we need it as a work-around for races between 495 * timers and tcp_discardcb(). 496 * 497 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL")); 498 */ 499 if (inp == NULL) { 500 tcp_timer_race++; 501 INP_INFO_RUNLOCK(&V_tcbinfo); 502 CURVNET_RESTORE(); 503 return; 504 } 505 INP_WLOCK(inp); 506 if (callout_pending(&tp->t_timers->tt_rexmt) || 507 !callout_active(&tp->t_timers->tt_rexmt)) { 508 INP_WUNLOCK(inp); 509 INP_INFO_RUNLOCK(&V_tcbinfo); 510 CURVNET_RESTORE(); 511 return; 512 } 513 callout_deactivate(&tp->t_timers->tt_rexmt); 514 if ((inp->inp_flags & INP_DROPPED) != 0) { 515 INP_WUNLOCK(inp); 516 INP_INFO_RUNLOCK(&V_tcbinfo); 517 CURVNET_RESTORE(); 518 return; 519 } 520 tcp_free_sackholes(tp); 521 /* 522 * Retransmission timer went off. Message has not 523 * been acked within retransmit interval. Back off 524 * to a longer retransmit interval and retransmit one segment. 525 */ 526 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 527 tp->t_rxtshift = TCP_MAXRXTSHIFT; 528 TCPSTAT_INC(tcps_timeoutdrop); 529 in_pcbref(inp); 530 INP_INFO_RUNLOCK(&V_tcbinfo); 531 INP_WUNLOCK(inp); 532 INP_INFO_WLOCK(&V_tcbinfo); 533 INP_WLOCK(inp); 534 if (in_pcbrele_wlocked(inp)) { 535 INP_INFO_WUNLOCK(&V_tcbinfo); 536 CURVNET_RESTORE(); 537 return; 538 } 539 if (inp->inp_flags & INP_DROPPED) { 540 INP_WUNLOCK(inp); 541 INP_INFO_WUNLOCK(&V_tcbinfo); 542 CURVNET_RESTORE(); 543 return; 544 } 545 546 tp = tcp_drop(tp, tp->t_softerror ? 547 tp->t_softerror : ETIMEDOUT); 548 headlocked = 1; 549 goto out; 550 } 551 INP_INFO_RUNLOCK(&V_tcbinfo); 552 headlocked = 0; 553 if (tp->t_state == TCPS_SYN_SENT) { 554 /* 555 * If the SYN was retransmitted, indicate CWND to be 556 * limited to 1 segment in cc_conn_init(). 557 */ 558 tp->snd_cwnd = 1; 559 } else if (tp->t_rxtshift == 1) { 560 /* 561 * first retransmit; record ssthresh and cwnd so they can 562 * be recovered if this turns out to be a "bad" retransmit. 563 * A retransmit is considered "bad" if an ACK for this 564 * segment is received within RTT/2 interval; the assumption 565 * here is that the ACK was already in flight. See 566 * "On Estimating End-to-End Network Path Properties" by 567 * Allman and Paxson for more details. 568 */ 569 tp->snd_cwnd_prev = tp->snd_cwnd; 570 tp->snd_ssthresh_prev = tp->snd_ssthresh; 571 tp->snd_recover_prev = tp->snd_recover; 572 if (IN_FASTRECOVERY(tp->t_flags)) 573 tp->t_flags |= TF_WASFRECOVERY; 574 else 575 tp->t_flags &= ~TF_WASFRECOVERY; 576 if (IN_CONGRECOVERY(tp->t_flags)) 577 tp->t_flags |= TF_WASCRECOVERY; 578 else 579 tp->t_flags &= ~TF_WASCRECOVERY; 580 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 581 tp->t_flags |= TF_PREVVALID; 582 } else 583 tp->t_flags &= ~TF_PREVVALID; 584 TCPSTAT_INC(tcps_rexmttimeo); 585 if (tp->t_state == TCPS_SYN_SENT) 586 rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift]; 587 else 588 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 589 TCPT_RANGESET(tp->t_rxtcur, rexmt, 590 tp->t_rttmin, TCPTV_REXMTMAX); 591 /* 592 * Disable RFC1323 and SACK if we haven't got any response to 593 * our third SYN to work-around some broken terminal servers 594 * (most of which have hopefully been retired) that have bad VJ 595 * header compression code which trashes TCP segments containing 596 * unknown-to-them TCP options. 597 */ 598 if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 599 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT); 600 /* 601 * If we backed off this far, our srtt estimate is probably bogus. 602 * Clobber it so we'll take the next rtt measurement as our srtt; 603 * move the current srtt into rttvar to keep the current 604 * retransmit times until then. 605 */ 606 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 607 #ifdef INET6 608 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 609 in6_losing(tp->t_inpcb); 610 #endif 611 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 612 tp->t_srtt = 0; 613 } 614 tp->snd_nxt = tp->snd_una; 615 tp->snd_recover = tp->snd_max; 616 /* 617 * Force a segment to be sent. 618 */ 619 tp->t_flags |= TF_ACKNOW; 620 /* 621 * If timing a segment in this window, stop the timer. 622 */ 623 tp->t_rtttime = 0; 624 625 cc_cong_signal(tp, NULL, CC_RTO); 626 627 (void) tcp_output(tp); 628 629 out: 630 #ifdef TCPDEBUG 631 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 632 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 633 PRU_SLOWTIMO); 634 #endif 635 if (tp != NULL) 636 INP_WUNLOCK(inp); 637 if (headlocked) 638 INP_INFO_WUNLOCK(&V_tcbinfo); 639 CURVNET_RESTORE(); 640 } 641 642 void 643 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 644 { 645 struct callout *t_callout; 646 void *f_callout; 647 struct inpcb *inp = tp->t_inpcb; 648 int cpu = INP_CPU(inp); 649 650 #ifdef TCP_OFFLOAD 651 if (tp->t_flags & TF_TOE) 652 return; 653 #endif 654 655 switch (timer_type) { 656 case TT_DELACK: 657 t_callout = &tp->t_timers->tt_delack; 658 f_callout = tcp_timer_delack; 659 break; 660 case TT_REXMT: 661 t_callout = &tp->t_timers->tt_rexmt; 662 f_callout = tcp_timer_rexmt; 663 break; 664 case TT_PERSIST: 665 t_callout = &tp->t_timers->tt_persist; 666 f_callout = tcp_timer_persist; 667 break; 668 case TT_KEEP: 669 t_callout = &tp->t_timers->tt_keep; 670 f_callout = tcp_timer_keep; 671 break; 672 case TT_2MSL: 673 t_callout = &tp->t_timers->tt_2msl; 674 f_callout = tcp_timer_2msl; 675 break; 676 default: 677 panic("bad timer_type"); 678 } 679 if (delta == 0) { 680 callout_stop(t_callout); 681 } else { 682 callout_reset_on(t_callout, delta, f_callout, tp, cpu); 683 } 684 } 685 686 int 687 tcp_timer_active(struct tcpcb *tp, int timer_type) 688 { 689 struct callout *t_callout; 690 691 switch (timer_type) { 692 case TT_DELACK: 693 t_callout = &tp->t_timers->tt_delack; 694 break; 695 case TT_REXMT: 696 t_callout = &tp->t_timers->tt_rexmt; 697 break; 698 case TT_PERSIST: 699 t_callout = &tp->t_timers->tt_persist; 700 break; 701 case TT_KEEP: 702 t_callout = &tp->t_timers->tt_keep; 703 break; 704 case TT_2MSL: 705 t_callout = &tp->t_timers->tt_2msl; 706 break; 707 default: 708 panic("bad timer_type"); 709 } 710 return callout_active(t_callout); 711 } 712 713 #define ticks_to_msecs(t) (1000*(t) / hz) 714 715 void 716 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer) 717 { 718 bzero(xtimer, sizeof(struct xtcp_timer)); 719 if (timer == NULL) 720 return; 721 if (callout_active(&timer->tt_delack)) 722 xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks); 723 if (callout_active(&timer->tt_rexmt)) 724 xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks); 725 if (callout_active(&timer->tt_persist)) 726 xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks); 727 if (callout_active(&timer->tt_keep)) 728 xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks); 729 if (callout_active(&timer->tt_2msl)) 730 xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks); 731 xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); 732 } 733