1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 #include "opt_tcpdebug.h" 38 #include "opt_rss.h" 39 40 #include <sys/param.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/mbuf.h> 44 #include <sys/mutex.h> 45 #include <sys/protosw.h> 46 #include <sys/smp.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/sysctl.h> 50 #include <sys/systm.h> 51 52 #include <net/if.h> 53 #include <net/route.h> 54 #include <net/vnet.h> 55 #include <net/netisr.h> 56 57 #include <netinet/cc.h> 58 #include <netinet/in.h> 59 #include <netinet/in_pcb.h> 60 #include <netinet/in_rss.h> 61 #include <netinet/in_systm.h> 62 #ifdef INET6 63 #include <netinet6/in6_pcb.h> 64 #endif 65 #include <netinet/ip_var.h> 66 #include <netinet/tcp_fsm.h> 67 #include <netinet/tcp_timer.h> 68 #include <netinet/tcp_var.h> 69 #include <netinet/tcpip.h> 70 #ifdef TCPDEBUG 71 #include <netinet/tcp_debug.h> 72 #endif 73 74 int tcp_keepinit; 75 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 76 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection"); 77 78 int tcp_keepidle; 79 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 80 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin"); 81 82 int tcp_keepintvl; 83 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 84 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes"); 85 86 int tcp_delacktime; 87 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 88 &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 89 "Time before a delayed ACK is sent"); 90 91 int tcp_msl; 92 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 93 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 94 95 int tcp_rexmit_min; 96 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 97 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 98 "Minimum Retransmission Timeout"); 99 100 int tcp_rexmit_slop; 101 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 102 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 103 "Retransmission Timer Slop"); 104 105 static int always_keepalive = 1; 106 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 107 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 108 109 int tcp_fast_finwait2_recycle = 0; 110 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 111 &tcp_fast_finwait2_recycle, 0, 112 "Recycle closed FIN_WAIT_2 connections faster"); 113 114 int tcp_finwait2_timeout; 115 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 116 &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 117 118 int tcp_keepcnt = TCPTV_KEEPCNT; 119 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0, 120 "Number of keepalive probes to send"); 121 122 /* max idle probes */ 123 int tcp_maxpersistidle; 124 125 static int tcp_rexmit_drop_options = 0; 126 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW, 127 &tcp_rexmit_drop_options, 0, 128 "Drop TCP options from 3rd and later retransmitted SYN"); 129 130 static int per_cpu_timers = 0; 131 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, 132 &per_cpu_timers , 0, "run tcp timers on all cpus"); 133 134 #if 0 135 #define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \ 136 ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0) 137 #endif 138 139 /* 140 * Map the given inp to a CPU id. 141 * 142 * This queries RSS if it's compiled in, else it defaults to the current 143 * CPU ID. 144 */ 145 static inline int 146 inp_to_cpuid(struct inpcb *inp) 147 { 148 u_int cpuid; 149 150 #ifdef RSS 151 if (per_cpu_timers) { 152 cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); 153 if (cpuid == NETISR_CPUID_NONE) 154 return (curcpu); /* XXX */ 155 else 156 return (cpuid); 157 } 158 #else 159 /* Legacy, pre-RSS behaviour */ 160 if (per_cpu_timers) { 161 /* 162 * We don't have a flowid -> cpuid mapping, so cheat and 163 * just map unknown cpuids to curcpu. Not the best, but 164 * apparently better than defaulting to swi 0. 165 */ 166 cpuid = inp->inp_flowid % (mp_maxid + 1); 167 if (! CPU_ABSENT(cpuid)) 168 return (cpuid); 169 return (curcpu); 170 } 171 #endif 172 /* Default for RSS and non-RSS - cpuid 0 */ 173 else { 174 return (0); 175 } 176 } 177 178 /* 179 * Tcp protocol timeout routine called every 500 ms. 180 * Updates timestamps used for TCP 181 * causes finite state machine actions if timers expire. 182 */ 183 void 184 tcp_slowtimo(void) 185 { 186 VNET_ITERATOR_DECL(vnet_iter); 187 188 VNET_LIST_RLOCK_NOSLEEP(); 189 VNET_FOREACH(vnet_iter) { 190 CURVNET_SET(vnet_iter); 191 tcp_tw_2msl_scan(); 192 CURVNET_RESTORE(); 193 } 194 VNET_LIST_RUNLOCK_NOSLEEP(); 195 } 196 197 int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 198 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 199 200 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 201 { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 202 203 static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 204 205 static int tcp_timer_race; 206 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 207 0, "Count of t_inpcb races on tcp_discardcb"); 208 209 /* 210 * TCP timer processing. 211 */ 212 213 void 214 tcp_timer_delack(void *xtp) 215 { 216 struct tcpcb *tp = xtp; 217 struct inpcb *inp; 218 CURVNET_SET(tp->t_vnet); 219 220 inp = tp->t_inpcb; 221 /* 222 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 223 * tear-down mean we need it as a work-around for races between 224 * timers and tcp_discardcb(). 225 * 226 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL")); 227 */ 228 if (inp == NULL) { 229 tcp_timer_race++; 230 CURVNET_RESTORE(); 231 return; 232 } 233 INP_WLOCK(inp); 234 if (callout_pending(&tp->t_timers->tt_delack) || 235 !callout_active(&tp->t_timers->tt_delack)) { 236 INP_WUNLOCK(inp); 237 CURVNET_RESTORE(); 238 return; 239 } 240 callout_deactivate(&tp->t_timers->tt_delack); 241 if ((inp->inp_flags & INP_DROPPED) != 0) { 242 INP_WUNLOCK(inp); 243 CURVNET_RESTORE(); 244 return; 245 } 246 247 tp->t_flags |= TF_ACKNOW; 248 TCPSTAT_INC(tcps_delack); 249 (void) tcp_output(tp); 250 INP_WUNLOCK(inp); 251 CURVNET_RESTORE(); 252 } 253 254 void 255 tcp_timer_2msl(void *xtp) 256 { 257 struct tcpcb *tp = xtp; 258 struct inpcb *inp; 259 CURVNET_SET(tp->t_vnet); 260 #ifdef TCPDEBUG 261 int ostate; 262 263 ostate = tp->t_state; 264 #endif 265 /* 266 * XXXRW: Does this actually happen? 267 */ 268 INP_INFO_WLOCK(&V_tcbinfo); 269 inp = tp->t_inpcb; 270 /* 271 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 272 * tear-down mean we need it as a work-around for races between 273 * timers and tcp_discardcb(). 274 * 275 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL")); 276 */ 277 if (inp == NULL) { 278 tcp_timer_race++; 279 INP_INFO_WUNLOCK(&V_tcbinfo); 280 CURVNET_RESTORE(); 281 return; 282 } 283 INP_WLOCK(inp); 284 tcp_free_sackholes(tp); 285 if (callout_pending(&tp->t_timers->tt_2msl) || 286 !callout_active(&tp->t_timers->tt_2msl)) { 287 INP_WUNLOCK(tp->t_inpcb); 288 INP_INFO_WUNLOCK(&V_tcbinfo); 289 CURVNET_RESTORE(); 290 return; 291 } 292 callout_deactivate(&tp->t_timers->tt_2msl); 293 if ((inp->inp_flags & INP_DROPPED) != 0) { 294 INP_WUNLOCK(inp); 295 INP_INFO_WUNLOCK(&V_tcbinfo); 296 CURVNET_RESTORE(); 297 return; 298 } 299 /* 300 * 2 MSL timeout in shutdown went off. If we're closed but 301 * still waiting for peer to close and connection has been idle 302 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 303 * control block. Otherwise, check again in a bit. 304 * 305 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 306 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 307 * Ignore fact that there were recent incoming segments. 308 */ 309 if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 310 tp->t_inpcb && tp->t_inpcb->inp_socket && 311 (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 312 TCPSTAT_INC(tcps_finwait2_drops); 313 tp = tcp_close(tp); 314 } else { 315 if (tp->t_state != TCPS_TIME_WAIT && 316 ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) 317 callout_reset_on(&tp->t_timers->tt_2msl, 318 TP_KEEPINTVL(tp), tcp_timer_2msl, tp, 319 inp_to_cpuid(inp)); 320 else 321 tp = tcp_close(tp); 322 } 323 324 #ifdef TCPDEBUG 325 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 326 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 327 PRU_SLOWTIMO); 328 #endif 329 if (tp != NULL) 330 INP_WUNLOCK(inp); 331 INP_INFO_WUNLOCK(&V_tcbinfo); 332 CURVNET_RESTORE(); 333 } 334 335 void 336 tcp_timer_keep(void *xtp) 337 { 338 struct tcpcb *tp = xtp; 339 struct tcptemp *t_template; 340 struct inpcb *inp; 341 CURVNET_SET(tp->t_vnet); 342 #ifdef TCPDEBUG 343 int ostate; 344 345 ostate = tp->t_state; 346 #endif 347 INP_INFO_WLOCK(&V_tcbinfo); 348 inp = tp->t_inpcb; 349 /* 350 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 351 * tear-down mean we need it as a work-around for races between 352 * timers and tcp_discardcb(). 353 * 354 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL")); 355 */ 356 if (inp == NULL) { 357 tcp_timer_race++; 358 INP_INFO_WUNLOCK(&V_tcbinfo); 359 CURVNET_RESTORE(); 360 return; 361 } 362 INP_WLOCK(inp); 363 if (callout_pending(&tp->t_timers->tt_keep) || 364 !callout_active(&tp->t_timers->tt_keep)) { 365 INP_WUNLOCK(inp); 366 INP_INFO_WUNLOCK(&V_tcbinfo); 367 CURVNET_RESTORE(); 368 return; 369 } 370 callout_deactivate(&tp->t_timers->tt_keep); 371 if ((inp->inp_flags & INP_DROPPED) != 0) { 372 INP_WUNLOCK(inp); 373 INP_INFO_WUNLOCK(&V_tcbinfo); 374 CURVNET_RESTORE(); 375 return; 376 } 377 /* 378 * Keep-alive timer went off; send something 379 * or drop connection if idle for too long. 380 */ 381 TCPSTAT_INC(tcps_keeptimeo); 382 if (tp->t_state < TCPS_ESTABLISHED) 383 goto dropit; 384 if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 385 tp->t_state <= TCPS_CLOSING) { 386 if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) 387 goto dropit; 388 /* 389 * Send a packet designed to force a response 390 * if the peer is up and reachable: 391 * either an ACK if the connection is still alive, 392 * or an RST if the peer has closed the connection 393 * due to timeout or reboot. 394 * Using sequence number tp->snd_una-1 395 * causes the transmitted zero-length segment 396 * to lie outside the receive window; 397 * by the protocol spec, this requires the 398 * correspondent TCP to respond. 399 */ 400 TCPSTAT_INC(tcps_keepprobe); 401 t_template = tcpip_maketemplate(inp); 402 if (t_template) { 403 tcp_respond(tp, t_template->tt_ipgen, 404 &t_template->tt_t, (struct mbuf *)NULL, 405 tp->rcv_nxt, tp->snd_una - 1, 0); 406 free(t_template, M_TEMP); 407 } 408 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), 409 tcp_timer_keep, tp, inp_to_cpuid(inp)); 410 } else 411 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), 412 tcp_timer_keep, tp, inp_to_cpuid(inp)); 413 414 #ifdef TCPDEBUG 415 if (inp->inp_socket->so_options & SO_DEBUG) 416 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 417 PRU_SLOWTIMO); 418 #endif 419 INP_WUNLOCK(inp); 420 INP_INFO_WUNLOCK(&V_tcbinfo); 421 CURVNET_RESTORE(); 422 return; 423 424 dropit: 425 TCPSTAT_INC(tcps_keepdrops); 426 tp = tcp_drop(tp, ETIMEDOUT); 427 428 #ifdef TCPDEBUG 429 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 430 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 431 PRU_SLOWTIMO); 432 #endif 433 if (tp != NULL) 434 INP_WUNLOCK(tp->t_inpcb); 435 INP_INFO_WUNLOCK(&V_tcbinfo); 436 CURVNET_RESTORE(); 437 } 438 439 void 440 tcp_timer_persist(void *xtp) 441 { 442 struct tcpcb *tp = xtp; 443 struct inpcb *inp; 444 CURVNET_SET(tp->t_vnet); 445 #ifdef TCPDEBUG 446 int ostate; 447 448 ostate = tp->t_state; 449 #endif 450 INP_INFO_WLOCK(&V_tcbinfo); 451 inp = tp->t_inpcb; 452 /* 453 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 454 * tear-down mean we need it as a work-around for races between 455 * timers and tcp_discardcb(). 456 * 457 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL")); 458 */ 459 if (inp == NULL) { 460 tcp_timer_race++; 461 INP_INFO_WUNLOCK(&V_tcbinfo); 462 CURVNET_RESTORE(); 463 return; 464 } 465 INP_WLOCK(inp); 466 if (callout_pending(&tp->t_timers->tt_persist) || 467 !callout_active(&tp->t_timers->tt_persist)) { 468 INP_WUNLOCK(inp); 469 INP_INFO_WUNLOCK(&V_tcbinfo); 470 CURVNET_RESTORE(); 471 return; 472 } 473 callout_deactivate(&tp->t_timers->tt_persist); 474 if ((inp->inp_flags & INP_DROPPED) != 0) { 475 INP_WUNLOCK(inp); 476 INP_INFO_WUNLOCK(&V_tcbinfo); 477 CURVNET_RESTORE(); 478 return; 479 } 480 /* 481 * Persistance timer into zero window. 482 * Force a byte to be output, if possible. 483 */ 484 TCPSTAT_INC(tcps_persisttimeo); 485 /* 486 * Hack: if the peer is dead/unreachable, we do not 487 * time out if the window is closed. After a full 488 * backoff, drop the connection if the idle time 489 * (no responses to probes) reaches the maximum 490 * backoff that we would use if retransmitting. 491 */ 492 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 493 (ticks - tp->t_rcvtime >= tcp_maxpersistidle || 494 ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 495 TCPSTAT_INC(tcps_persistdrop); 496 tp = tcp_drop(tp, ETIMEDOUT); 497 goto out; 498 } 499 /* 500 * If the user has closed the socket then drop a persisting 501 * connection after a much reduced timeout. 502 */ 503 if (tp->t_state > TCPS_CLOSE_WAIT && 504 (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { 505 TCPSTAT_INC(tcps_persistdrop); 506 tp = tcp_drop(tp, ETIMEDOUT); 507 goto out; 508 } 509 tcp_setpersist(tp); 510 tp->t_flags |= TF_FORCEDATA; 511 (void) tcp_output(tp); 512 tp->t_flags &= ~TF_FORCEDATA; 513 514 out: 515 #ifdef TCPDEBUG 516 if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 517 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 518 #endif 519 if (tp != NULL) 520 INP_WUNLOCK(inp); 521 INP_INFO_WUNLOCK(&V_tcbinfo); 522 CURVNET_RESTORE(); 523 } 524 525 void 526 tcp_timer_rexmt(void * xtp) 527 { 528 struct tcpcb *tp = xtp; 529 CURVNET_SET(tp->t_vnet); 530 int rexmt; 531 int headlocked; 532 struct inpcb *inp; 533 #ifdef TCPDEBUG 534 int ostate; 535 536 ostate = tp->t_state; 537 #endif 538 INP_INFO_RLOCK(&V_tcbinfo); 539 inp = tp->t_inpcb; 540 /* 541 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 542 * tear-down mean we need it as a work-around for races between 543 * timers and tcp_discardcb(). 544 * 545 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL")); 546 */ 547 if (inp == NULL) { 548 tcp_timer_race++; 549 INP_INFO_RUNLOCK(&V_tcbinfo); 550 CURVNET_RESTORE(); 551 return; 552 } 553 INP_WLOCK(inp); 554 if (callout_pending(&tp->t_timers->tt_rexmt) || 555 !callout_active(&tp->t_timers->tt_rexmt)) { 556 INP_WUNLOCK(inp); 557 INP_INFO_RUNLOCK(&V_tcbinfo); 558 CURVNET_RESTORE(); 559 return; 560 } 561 callout_deactivate(&tp->t_timers->tt_rexmt); 562 if ((inp->inp_flags & INP_DROPPED) != 0) { 563 INP_WUNLOCK(inp); 564 INP_INFO_RUNLOCK(&V_tcbinfo); 565 CURVNET_RESTORE(); 566 return; 567 } 568 tcp_free_sackholes(tp); 569 /* 570 * Retransmission timer went off. Message has not 571 * been acked within retransmit interval. Back off 572 * to a longer retransmit interval and retransmit one segment. 573 */ 574 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 575 tp->t_rxtshift = TCP_MAXRXTSHIFT; 576 TCPSTAT_INC(tcps_timeoutdrop); 577 in_pcbref(inp); 578 INP_INFO_RUNLOCK(&V_tcbinfo); 579 INP_WUNLOCK(inp); 580 INP_INFO_WLOCK(&V_tcbinfo); 581 INP_WLOCK(inp); 582 if (in_pcbrele_wlocked(inp)) { 583 INP_INFO_WUNLOCK(&V_tcbinfo); 584 CURVNET_RESTORE(); 585 return; 586 } 587 if (inp->inp_flags & INP_DROPPED) { 588 INP_WUNLOCK(inp); 589 INP_INFO_WUNLOCK(&V_tcbinfo); 590 CURVNET_RESTORE(); 591 return; 592 } 593 594 tp = tcp_drop(tp, tp->t_softerror ? 595 tp->t_softerror : ETIMEDOUT); 596 headlocked = 1; 597 goto out; 598 } 599 INP_INFO_RUNLOCK(&V_tcbinfo); 600 headlocked = 0; 601 if (tp->t_state == TCPS_SYN_SENT) { 602 /* 603 * If the SYN was retransmitted, indicate CWND to be 604 * limited to 1 segment in cc_conn_init(). 605 */ 606 tp->snd_cwnd = 1; 607 } else if (tp->t_rxtshift == 1) { 608 /* 609 * first retransmit; record ssthresh and cwnd so they can 610 * be recovered if this turns out to be a "bad" retransmit. 611 * A retransmit is considered "bad" if an ACK for this 612 * segment is received within RTT/2 interval; the assumption 613 * here is that the ACK was already in flight. See 614 * "On Estimating End-to-End Network Path Properties" by 615 * Allman and Paxson for more details. 616 */ 617 tp->snd_cwnd_prev = tp->snd_cwnd; 618 tp->snd_ssthresh_prev = tp->snd_ssthresh; 619 tp->snd_recover_prev = tp->snd_recover; 620 if (IN_FASTRECOVERY(tp->t_flags)) 621 tp->t_flags |= TF_WASFRECOVERY; 622 else 623 tp->t_flags &= ~TF_WASFRECOVERY; 624 if (IN_CONGRECOVERY(tp->t_flags)) 625 tp->t_flags |= TF_WASCRECOVERY; 626 else 627 tp->t_flags &= ~TF_WASCRECOVERY; 628 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 629 tp->t_flags |= TF_PREVVALID; 630 } else 631 tp->t_flags &= ~TF_PREVVALID; 632 TCPSTAT_INC(tcps_rexmttimeo); 633 if (tp->t_state == TCPS_SYN_SENT) 634 rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift]; 635 else 636 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 637 TCPT_RANGESET(tp->t_rxtcur, rexmt, 638 tp->t_rttmin, TCPTV_REXMTMAX); 639 /* 640 * Disable RFC1323 and SACK if we haven't got any response to 641 * our third SYN to work-around some broken terminal servers 642 * (most of which have hopefully been retired) that have bad VJ 643 * header compression code which trashes TCP segments containing 644 * unknown-to-them TCP options. 645 */ 646 if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) && 647 (tp->t_rxtshift == 3)) 648 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT); 649 /* 650 * If we backed off this far, our srtt estimate is probably bogus. 651 * Clobber it so we'll take the next rtt measurement as our srtt; 652 * move the current srtt into rttvar to keep the current 653 * retransmit times until then. 654 */ 655 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 656 #ifdef INET6 657 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 658 in6_losing(tp->t_inpcb); 659 #endif 660 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 661 tp->t_srtt = 0; 662 } 663 tp->snd_nxt = tp->snd_una; 664 tp->snd_recover = tp->snd_max; 665 /* 666 * Force a segment to be sent. 667 */ 668 tp->t_flags |= TF_ACKNOW; 669 /* 670 * If timing a segment in this window, stop the timer. 671 */ 672 tp->t_rtttime = 0; 673 674 cc_cong_signal(tp, NULL, CC_RTO); 675 676 (void) tcp_output(tp); 677 678 out: 679 #ifdef TCPDEBUG 680 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 681 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 682 PRU_SLOWTIMO); 683 #endif 684 if (tp != NULL) 685 INP_WUNLOCK(inp); 686 if (headlocked) 687 INP_INFO_WUNLOCK(&V_tcbinfo); 688 CURVNET_RESTORE(); 689 } 690 691 void 692 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 693 { 694 struct callout *t_callout; 695 void *f_callout; 696 struct inpcb *inp = tp->t_inpcb; 697 int cpu = inp_to_cpuid(inp); 698 699 #ifdef TCP_OFFLOAD 700 if (tp->t_flags & TF_TOE) 701 return; 702 #endif 703 704 switch (timer_type) { 705 case TT_DELACK: 706 t_callout = &tp->t_timers->tt_delack; 707 f_callout = tcp_timer_delack; 708 break; 709 case TT_REXMT: 710 t_callout = &tp->t_timers->tt_rexmt; 711 f_callout = tcp_timer_rexmt; 712 break; 713 case TT_PERSIST: 714 t_callout = &tp->t_timers->tt_persist; 715 f_callout = tcp_timer_persist; 716 break; 717 case TT_KEEP: 718 t_callout = &tp->t_timers->tt_keep; 719 f_callout = tcp_timer_keep; 720 break; 721 case TT_2MSL: 722 t_callout = &tp->t_timers->tt_2msl; 723 f_callout = tcp_timer_2msl; 724 break; 725 default: 726 panic("bad timer_type"); 727 } 728 if (delta == 0) { 729 callout_stop(t_callout); 730 } else { 731 callout_reset_on(t_callout, delta, f_callout, tp, cpu); 732 } 733 } 734 735 int 736 tcp_timer_active(struct tcpcb *tp, int timer_type) 737 { 738 struct callout *t_callout; 739 740 switch (timer_type) { 741 case TT_DELACK: 742 t_callout = &tp->t_timers->tt_delack; 743 break; 744 case TT_REXMT: 745 t_callout = &tp->t_timers->tt_rexmt; 746 break; 747 case TT_PERSIST: 748 t_callout = &tp->t_timers->tt_persist; 749 break; 750 case TT_KEEP: 751 t_callout = &tp->t_timers->tt_keep; 752 break; 753 case TT_2MSL: 754 t_callout = &tp->t_timers->tt_2msl; 755 break; 756 default: 757 panic("bad timer_type"); 758 } 759 return callout_active(t_callout); 760 } 761 762 #define ticks_to_msecs(t) (1000*(t) / hz) 763 764 void 765 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, 766 struct xtcp_timer *xtimer) 767 { 768 sbintime_t now; 769 770 bzero(xtimer, sizeof(*xtimer)); 771 if (timer == NULL) 772 return; 773 now = getsbinuptime(); 774 if (callout_active(&timer->tt_delack)) 775 xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS; 776 if (callout_active(&timer->tt_rexmt)) 777 xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS; 778 if (callout_active(&timer->tt_persist)) 779 xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS; 780 if (callout_active(&timer->tt_keep)) 781 xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS; 782 if (callout_active(&timer->tt_2msl)) 783 xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS; 784 xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); 785 } 786