1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 #include "opt_tcpdebug.h" 38 #include "opt_rss.h" 39 40 #include <sys/param.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/mbuf.h> 44 #include <sys/mutex.h> 45 #include <sys/protosw.h> 46 #include <sys/smp.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/sysctl.h> 50 #include <sys/systm.h> 51 52 #include <net/if.h> 53 #include <net/route.h> 54 #include <net/vnet.h> 55 #include <net/netisr.h> 56 57 #include <netinet/cc.h> 58 #include <netinet/in.h> 59 #include <netinet/in_pcb.h> 60 #include <netinet/in_rss.h> 61 #include <netinet/in_systm.h> 62 #ifdef INET6 63 #include <netinet6/in6_pcb.h> 64 #endif 65 #include <netinet/ip_var.h> 66 #include <netinet/tcp_fsm.h> 67 #include <netinet/tcp_timer.h> 68 #include <netinet/tcp_var.h> 69 #include <netinet/tcpip.h> 70 #ifdef TCPDEBUG 71 #include <netinet/tcp_debug.h> 72 #endif 73 74 int tcp_keepinit; 75 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 76 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection"); 77 78 int tcp_keepidle; 79 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 80 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin"); 81 82 int tcp_keepintvl; 83 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 84 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes"); 85 86 int tcp_delacktime; 87 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 88 &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 89 "Time before a delayed ACK is sent"); 90 91 int tcp_msl; 92 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 93 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 94 95 int tcp_rexmit_min; 96 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 97 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 98 "Minimum Retransmission Timeout"); 99 100 int tcp_rexmit_slop; 101 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 102 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 103 "Retransmission Timer Slop"); 104 105 static int always_keepalive = 1; 106 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 107 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 108 109 int tcp_fast_finwait2_recycle = 0; 110 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 111 &tcp_fast_finwait2_recycle, 0, 112 "Recycle closed FIN_WAIT_2 connections faster"); 113 114 int tcp_finwait2_timeout; 115 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 116 &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 117 118 int tcp_keepcnt = TCPTV_KEEPCNT; 119 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0, 120 "Number of keepalive probes to send"); 121 122 /* max idle probes */ 123 int tcp_maxpersistidle; 124 125 static int tcp_rexmit_drop_options = 0; 126 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW, 127 &tcp_rexmit_drop_options, 0, 128 "Drop TCP options from 3rd and later retransmitted SYN"); 129 130 #ifdef RSS 131 static int per_cpu_timers = 1; 132 #else 133 static int per_cpu_timers = 0; 134 #endif 135 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, 136 &per_cpu_timers , 0, "run tcp timers on all cpus"); 137 138 #if 0 139 #define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \ 140 ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0) 141 #endif 142 143 /* 144 * Map the given inp to a CPU id. 145 * 146 * This queries RSS if it's compiled in, else it defaults to the current 147 * CPU ID. 148 */ 149 static inline int 150 inp_to_cpuid(struct inpcb *inp) 151 { 152 u_int cpuid; 153 154 #ifdef RSS 155 if (per_cpu_timers) { 156 cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); 157 if (cpuid == NETISR_CPUID_NONE) 158 return (curcpu); /* XXX */ 159 else 160 return (cpuid); 161 } 162 #else 163 /* Legacy, pre-RSS behaviour */ 164 if (per_cpu_timers) { 165 /* 166 * We don't have a flowid -> cpuid mapping, so cheat and 167 * just map unknown cpuids to curcpu. Not the best, but 168 * apparently better than defaulting to swi 0. 169 */ 170 cpuid = inp->inp_flowid % (mp_maxid + 1); 171 if (! CPU_ABSENT(cpuid)) 172 return (cpuid); 173 return (curcpu); 174 } 175 #endif 176 /* Default for RSS and non-RSS - cpuid 0 */ 177 else { 178 return (0); 179 } 180 } 181 182 /* 183 * Tcp protocol timeout routine called every 500 ms. 184 * Updates timestamps used for TCP 185 * causes finite state machine actions if timers expire. 186 */ 187 void 188 tcp_slowtimo(void) 189 { 190 VNET_ITERATOR_DECL(vnet_iter); 191 192 VNET_LIST_RLOCK_NOSLEEP(); 193 VNET_FOREACH(vnet_iter) { 194 CURVNET_SET(vnet_iter); 195 tcp_tw_2msl_scan(); 196 CURVNET_RESTORE(); 197 } 198 VNET_LIST_RUNLOCK_NOSLEEP(); 199 } 200 201 int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 202 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 203 204 int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 205 { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 206 207 static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 208 209 static int tcp_timer_race; 210 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 211 0, "Count of t_inpcb races on tcp_discardcb"); 212 213 /* 214 * TCP timer processing. 215 */ 216 217 void 218 tcp_timer_delack(void *xtp) 219 { 220 struct tcpcb *tp = xtp; 221 struct inpcb *inp; 222 CURVNET_SET(tp->t_vnet); 223 224 inp = tp->t_inpcb; 225 /* 226 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 227 * tear-down mean we need it as a work-around for races between 228 * timers and tcp_discardcb(). 229 * 230 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL")); 231 */ 232 if (inp == NULL) { 233 tcp_timer_race++; 234 CURVNET_RESTORE(); 235 return; 236 } 237 INP_WLOCK(inp); 238 if (callout_pending(&tp->t_timers->tt_delack) || 239 !callout_active(&tp->t_timers->tt_delack)) { 240 INP_WUNLOCK(inp); 241 CURVNET_RESTORE(); 242 return; 243 } 244 callout_deactivate(&tp->t_timers->tt_delack); 245 if ((inp->inp_flags & INP_DROPPED) != 0) { 246 INP_WUNLOCK(inp); 247 CURVNET_RESTORE(); 248 return; 249 } 250 251 tp->t_flags |= TF_ACKNOW; 252 TCPSTAT_INC(tcps_delack); 253 (void) tcp_output(tp); 254 INP_WUNLOCK(inp); 255 CURVNET_RESTORE(); 256 } 257 258 void 259 tcp_timer_2msl(void *xtp) 260 { 261 struct tcpcb *tp = xtp; 262 struct inpcb *inp; 263 CURVNET_SET(tp->t_vnet); 264 #ifdef TCPDEBUG 265 int ostate; 266 267 ostate = tp->t_state; 268 #endif 269 /* 270 * XXXRW: Does this actually happen? 271 */ 272 INP_INFO_WLOCK(&V_tcbinfo); 273 inp = tp->t_inpcb; 274 /* 275 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 276 * tear-down mean we need it as a work-around for races between 277 * timers and tcp_discardcb(). 278 * 279 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL")); 280 */ 281 if (inp == NULL) { 282 tcp_timer_race++; 283 INP_INFO_WUNLOCK(&V_tcbinfo); 284 CURVNET_RESTORE(); 285 return; 286 } 287 INP_WLOCK(inp); 288 tcp_free_sackholes(tp); 289 if (callout_pending(&tp->t_timers->tt_2msl) || 290 !callout_active(&tp->t_timers->tt_2msl)) { 291 INP_WUNLOCK(tp->t_inpcb); 292 INP_INFO_WUNLOCK(&V_tcbinfo); 293 CURVNET_RESTORE(); 294 return; 295 } 296 callout_deactivate(&tp->t_timers->tt_2msl); 297 if ((inp->inp_flags & INP_DROPPED) != 0) { 298 INP_WUNLOCK(inp); 299 INP_INFO_WUNLOCK(&V_tcbinfo); 300 CURVNET_RESTORE(); 301 return; 302 } 303 /* 304 * 2 MSL timeout in shutdown went off. If we're closed but 305 * still waiting for peer to close and connection has been idle 306 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 307 * control block. Otherwise, check again in a bit. 308 * 309 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 310 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 311 * Ignore fact that there were recent incoming segments. 312 */ 313 if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 314 tp->t_inpcb && tp->t_inpcb->inp_socket && 315 (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 316 TCPSTAT_INC(tcps_finwait2_drops); 317 tp = tcp_close(tp); 318 } else { 319 if (tp->t_state != TCPS_TIME_WAIT && 320 ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) 321 callout_reset_on(&tp->t_timers->tt_2msl, 322 TP_KEEPINTVL(tp), tcp_timer_2msl, tp, 323 inp_to_cpuid(inp)); 324 else 325 tp = tcp_close(tp); 326 } 327 328 #ifdef TCPDEBUG 329 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 330 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 331 PRU_SLOWTIMO); 332 #endif 333 if (tp != NULL) 334 INP_WUNLOCK(inp); 335 INP_INFO_WUNLOCK(&V_tcbinfo); 336 CURVNET_RESTORE(); 337 } 338 339 void 340 tcp_timer_keep(void *xtp) 341 { 342 struct tcpcb *tp = xtp; 343 struct tcptemp *t_template; 344 struct inpcb *inp; 345 CURVNET_SET(tp->t_vnet); 346 #ifdef TCPDEBUG 347 int ostate; 348 349 ostate = tp->t_state; 350 #endif 351 INP_INFO_WLOCK(&V_tcbinfo); 352 inp = tp->t_inpcb; 353 /* 354 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 355 * tear-down mean we need it as a work-around for races between 356 * timers and tcp_discardcb(). 357 * 358 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL")); 359 */ 360 if (inp == NULL) { 361 tcp_timer_race++; 362 INP_INFO_WUNLOCK(&V_tcbinfo); 363 CURVNET_RESTORE(); 364 return; 365 } 366 INP_WLOCK(inp); 367 if (callout_pending(&tp->t_timers->tt_keep) || 368 !callout_active(&tp->t_timers->tt_keep)) { 369 INP_WUNLOCK(inp); 370 INP_INFO_WUNLOCK(&V_tcbinfo); 371 CURVNET_RESTORE(); 372 return; 373 } 374 callout_deactivate(&tp->t_timers->tt_keep); 375 if ((inp->inp_flags & INP_DROPPED) != 0) { 376 INP_WUNLOCK(inp); 377 INP_INFO_WUNLOCK(&V_tcbinfo); 378 CURVNET_RESTORE(); 379 return; 380 } 381 /* 382 * Keep-alive timer went off; send something 383 * or drop connection if idle for too long. 384 */ 385 TCPSTAT_INC(tcps_keeptimeo); 386 if (tp->t_state < TCPS_ESTABLISHED) 387 goto dropit; 388 if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 389 tp->t_state <= TCPS_CLOSING) { 390 if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) 391 goto dropit; 392 /* 393 * Send a packet designed to force a response 394 * if the peer is up and reachable: 395 * either an ACK if the connection is still alive, 396 * or an RST if the peer has closed the connection 397 * due to timeout or reboot. 398 * Using sequence number tp->snd_una-1 399 * causes the transmitted zero-length segment 400 * to lie outside the receive window; 401 * by the protocol spec, this requires the 402 * correspondent TCP to respond. 403 */ 404 TCPSTAT_INC(tcps_keepprobe); 405 t_template = tcpip_maketemplate(inp); 406 if (t_template) { 407 tcp_respond(tp, t_template->tt_ipgen, 408 &t_template->tt_t, (struct mbuf *)NULL, 409 tp->rcv_nxt, tp->snd_una - 1, 0); 410 free(t_template, M_TEMP); 411 } 412 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), 413 tcp_timer_keep, tp, inp_to_cpuid(inp)); 414 } else 415 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), 416 tcp_timer_keep, tp, inp_to_cpuid(inp)); 417 418 #ifdef TCPDEBUG 419 if (inp->inp_socket->so_options & SO_DEBUG) 420 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 421 PRU_SLOWTIMO); 422 #endif 423 INP_WUNLOCK(inp); 424 INP_INFO_WUNLOCK(&V_tcbinfo); 425 CURVNET_RESTORE(); 426 return; 427 428 dropit: 429 TCPSTAT_INC(tcps_keepdrops); 430 tp = tcp_drop(tp, ETIMEDOUT); 431 432 #ifdef TCPDEBUG 433 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 434 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 435 PRU_SLOWTIMO); 436 #endif 437 if (tp != NULL) 438 INP_WUNLOCK(tp->t_inpcb); 439 INP_INFO_WUNLOCK(&V_tcbinfo); 440 CURVNET_RESTORE(); 441 } 442 443 void 444 tcp_timer_persist(void *xtp) 445 { 446 struct tcpcb *tp = xtp; 447 struct inpcb *inp; 448 CURVNET_SET(tp->t_vnet); 449 #ifdef TCPDEBUG 450 int ostate; 451 452 ostate = tp->t_state; 453 #endif 454 INP_INFO_WLOCK(&V_tcbinfo); 455 inp = tp->t_inpcb; 456 /* 457 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 458 * tear-down mean we need it as a work-around for races between 459 * timers and tcp_discardcb(). 460 * 461 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL")); 462 */ 463 if (inp == NULL) { 464 tcp_timer_race++; 465 INP_INFO_WUNLOCK(&V_tcbinfo); 466 CURVNET_RESTORE(); 467 return; 468 } 469 INP_WLOCK(inp); 470 if (callout_pending(&tp->t_timers->tt_persist) || 471 !callout_active(&tp->t_timers->tt_persist)) { 472 INP_WUNLOCK(inp); 473 INP_INFO_WUNLOCK(&V_tcbinfo); 474 CURVNET_RESTORE(); 475 return; 476 } 477 callout_deactivate(&tp->t_timers->tt_persist); 478 if ((inp->inp_flags & INP_DROPPED) != 0) { 479 INP_WUNLOCK(inp); 480 INP_INFO_WUNLOCK(&V_tcbinfo); 481 CURVNET_RESTORE(); 482 return; 483 } 484 /* 485 * Persistance timer into zero window. 486 * Force a byte to be output, if possible. 487 */ 488 TCPSTAT_INC(tcps_persisttimeo); 489 /* 490 * Hack: if the peer is dead/unreachable, we do not 491 * time out if the window is closed. After a full 492 * backoff, drop the connection if the idle time 493 * (no responses to probes) reaches the maximum 494 * backoff that we would use if retransmitting. 495 */ 496 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 497 (ticks - tp->t_rcvtime >= tcp_maxpersistidle || 498 ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 499 TCPSTAT_INC(tcps_persistdrop); 500 tp = tcp_drop(tp, ETIMEDOUT); 501 goto out; 502 } 503 /* 504 * If the user has closed the socket then drop a persisting 505 * connection after a much reduced timeout. 506 */ 507 if (tp->t_state > TCPS_CLOSE_WAIT && 508 (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { 509 TCPSTAT_INC(tcps_persistdrop); 510 tp = tcp_drop(tp, ETIMEDOUT); 511 goto out; 512 } 513 tcp_setpersist(tp); 514 tp->t_flags |= TF_FORCEDATA; 515 (void) tcp_output(tp); 516 tp->t_flags &= ~TF_FORCEDATA; 517 518 out: 519 #ifdef TCPDEBUG 520 if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 521 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 522 #endif 523 if (tp != NULL) 524 INP_WUNLOCK(inp); 525 INP_INFO_WUNLOCK(&V_tcbinfo); 526 CURVNET_RESTORE(); 527 } 528 529 void 530 tcp_timer_rexmt(void * xtp) 531 { 532 struct tcpcb *tp = xtp; 533 CURVNET_SET(tp->t_vnet); 534 int rexmt; 535 int headlocked; 536 struct inpcb *inp; 537 #ifdef TCPDEBUG 538 int ostate; 539 540 ostate = tp->t_state; 541 #endif 542 INP_INFO_RLOCK(&V_tcbinfo); 543 inp = tp->t_inpcb; 544 /* 545 * XXXRW: While this assert is in fact correct, bugs in the tcpcb 546 * tear-down mean we need it as a work-around for races between 547 * timers and tcp_discardcb(). 548 * 549 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL")); 550 */ 551 if (inp == NULL) { 552 tcp_timer_race++; 553 INP_INFO_RUNLOCK(&V_tcbinfo); 554 CURVNET_RESTORE(); 555 return; 556 } 557 INP_WLOCK(inp); 558 if (callout_pending(&tp->t_timers->tt_rexmt) || 559 !callout_active(&tp->t_timers->tt_rexmt)) { 560 INP_WUNLOCK(inp); 561 INP_INFO_RUNLOCK(&V_tcbinfo); 562 CURVNET_RESTORE(); 563 return; 564 } 565 callout_deactivate(&tp->t_timers->tt_rexmt); 566 if ((inp->inp_flags & INP_DROPPED) != 0) { 567 INP_WUNLOCK(inp); 568 INP_INFO_RUNLOCK(&V_tcbinfo); 569 CURVNET_RESTORE(); 570 return; 571 } 572 tcp_free_sackholes(tp); 573 /* 574 * Retransmission timer went off. Message has not 575 * been acked within retransmit interval. Back off 576 * to a longer retransmit interval and retransmit one segment. 577 */ 578 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 579 tp->t_rxtshift = TCP_MAXRXTSHIFT; 580 TCPSTAT_INC(tcps_timeoutdrop); 581 in_pcbref(inp); 582 INP_INFO_RUNLOCK(&V_tcbinfo); 583 INP_WUNLOCK(inp); 584 INP_INFO_WLOCK(&V_tcbinfo); 585 INP_WLOCK(inp); 586 if (in_pcbrele_wlocked(inp)) { 587 INP_INFO_WUNLOCK(&V_tcbinfo); 588 CURVNET_RESTORE(); 589 return; 590 } 591 if (inp->inp_flags & INP_DROPPED) { 592 INP_WUNLOCK(inp); 593 INP_INFO_WUNLOCK(&V_tcbinfo); 594 CURVNET_RESTORE(); 595 return; 596 } 597 598 tp = tcp_drop(tp, tp->t_softerror ? 599 tp->t_softerror : ETIMEDOUT); 600 headlocked = 1; 601 goto out; 602 } 603 INP_INFO_RUNLOCK(&V_tcbinfo); 604 headlocked = 0; 605 if (tp->t_state == TCPS_SYN_SENT) { 606 /* 607 * If the SYN was retransmitted, indicate CWND to be 608 * limited to 1 segment in cc_conn_init(). 609 */ 610 tp->snd_cwnd = 1; 611 } else if (tp->t_rxtshift == 1) { 612 /* 613 * first retransmit; record ssthresh and cwnd so they can 614 * be recovered if this turns out to be a "bad" retransmit. 615 * A retransmit is considered "bad" if an ACK for this 616 * segment is received within RTT/2 interval; the assumption 617 * here is that the ACK was already in flight. See 618 * "On Estimating End-to-End Network Path Properties" by 619 * Allman and Paxson for more details. 620 */ 621 tp->snd_cwnd_prev = tp->snd_cwnd; 622 tp->snd_ssthresh_prev = tp->snd_ssthresh; 623 tp->snd_recover_prev = tp->snd_recover; 624 if (IN_FASTRECOVERY(tp->t_flags)) 625 tp->t_flags |= TF_WASFRECOVERY; 626 else 627 tp->t_flags &= ~TF_WASFRECOVERY; 628 if (IN_CONGRECOVERY(tp->t_flags)) 629 tp->t_flags |= TF_WASCRECOVERY; 630 else 631 tp->t_flags &= ~TF_WASCRECOVERY; 632 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 633 tp->t_flags |= TF_PREVVALID; 634 } else 635 tp->t_flags &= ~TF_PREVVALID; 636 TCPSTAT_INC(tcps_rexmttimeo); 637 if (tp->t_state == TCPS_SYN_SENT) 638 rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift]; 639 else 640 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 641 TCPT_RANGESET(tp->t_rxtcur, rexmt, 642 tp->t_rttmin, TCPTV_REXMTMAX); 643 /* 644 * Disable RFC1323 and SACK if we haven't got any response to 645 * our third SYN to work-around some broken terminal servers 646 * (most of which have hopefully been retired) that have bad VJ 647 * header compression code which trashes TCP segments containing 648 * unknown-to-them TCP options. 649 */ 650 if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) && 651 (tp->t_rxtshift == 3)) 652 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT); 653 /* 654 * If we backed off this far, our srtt estimate is probably bogus. 655 * Clobber it so we'll take the next rtt measurement as our srtt; 656 * move the current srtt into rttvar to keep the current 657 * retransmit times until then. 658 */ 659 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 660 #ifdef INET6 661 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 662 in6_losing(tp->t_inpcb); 663 #endif 664 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 665 tp->t_srtt = 0; 666 } 667 tp->snd_nxt = tp->snd_una; 668 tp->snd_recover = tp->snd_max; 669 /* 670 * Force a segment to be sent. 671 */ 672 tp->t_flags |= TF_ACKNOW; 673 /* 674 * If timing a segment in this window, stop the timer. 675 */ 676 tp->t_rtttime = 0; 677 678 cc_cong_signal(tp, NULL, CC_RTO); 679 680 (void) tcp_output(tp); 681 682 out: 683 #ifdef TCPDEBUG 684 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 685 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 686 PRU_SLOWTIMO); 687 #endif 688 if (tp != NULL) 689 INP_WUNLOCK(inp); 690 if (headlocked) 691 INP_INFO_WUNLOCK(&V_tcbinfo); 692 CURVNET_RESTORE(); 693 } 694 695 void 696 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 697 { 698 struct callout *t_callout; 699 void *f_callout; 700 struct inpcb *inp = tp->t_inpcb; 701 int cpu = inp_to_cpuid(inp); 702 703 #ifdef TCP_OFFLOAD 704 if (tp->t_flags & TF_TOE) 705 return; 706 #endif 707 708 switch (timer_type) { 709 case TT_DELACK: 710 t_callout = &tp->t_timers->tt_delack; 711 f_callout = tcp_timer_delack; 712 break; 713 case TT_REXMT: 714 t_callout = &tp->t_timers->tt_rexmt; 715 f_callout = tcp_timer_rexmt; 716 break; 717 case TT_PERSIST: 718 t_callout = &tp->t_timers->tt_persist; 719 f_callout = tcp_timer_persist; 720 break; 721 case TT_KEEP: 722 t_callout = &tp->t_timers->tt_keep; 723 f_callout = tcp_timer_keep; 724 break; 725 case TT_2MSL: 726 t_callout = &tp->t_timers->tt_2msl; 727 f_callout = tcp_timer_2msl; 728 break; 729 default: 730 panic("bad timer_type"); 731 } 732 if (delta == 0) { 733 callout_stop(t_callout); 734 } else { 735 callout_reset_on(t_callout, delta, f_callout, tp, cpu); 736 } 737 } 738 739 int 740 tcp_timer_active(struct tcpcb *tp, int timer_type) 741 { 742 struct callout *t_callout; 743 744 switch (timer_type) { 745 case TT_DELACK: 746 t_callout = &tp->t_timers->tt_delack; 747 break; 748 case TT_REXMT: 749 t_callout = &tp->t_timers->tt_rexmt; 750 break; 751 case TT_PERSIST: 752 t_callout = &tp->t_timers->tt_persist; 753 break; 754 case TT_KEEP: 755 t_callout = &tp->t_timers->tt_keep; 756 break; 757 case TT_2MSL: 758 t_callout = &tp->t_timers->tt_2msl; 759 break; 760 default: 761 panic("bad timer_type"); 762 } 763 return callout_active(t_callout); 764 } 765 766 #define ticks_to_msecs(t) (1000*(t) / hz) 767 768 void 769 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, 770 struct xtcp_timer *xtimer) 771 { 772 sbintime_t now; 773 774 bzero(xtimer, sizeof(*xtimer)); 775 if (timer == NULL) 776 return; 777 now = getsbinuptime(); 778 if (callout_active(&timer->tt_delack)) 779 xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS; 780 if (callout_active(&timer->tt_rexmt)) 781 xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS; 782 if (callout_active(&timer->tt_persist)) 783 xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS; 784 if (callout_active(&timer->tt_keep)) 785 xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS; 786 if (callout_active(&timer->tt_2msl)) 787 xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS; 788 xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); 789 } 790