1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * This file contains functions related to TCP time wait processing. Also 28 * refer to the time wait handling comments in tcp_impl.h. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/strsun.h> 33 #include <sys/squeue_impl.h> 34 #include <sys/squeue.h> 35 #include <sys/callo.h> 36 37 #include <inet/common.h> 38 #include <inet/ip.h> 39 #include <inet/tcp.h> 40 #include <inet/tcp_impl.h> 41 #include <inet/tcp_cluster.h> 42 43 static void tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *); 44 45 /* 46 * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs. 47 * Running it every 5 seconds seems to give the best results. 48 */ 49 #define TCP_TIME_WAIT_DELAY ((hrtime_t)5 * NANOSEC) 50 51 /* 52 * Remove a connection from the list of detached TIME_WAIT connections. 53 * It returns B_FALSE if it can't remove the connection from the list 54 * as the connection has already been removed from the list due to an 55 * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE. 56 */ 57 boolean_t 58 tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait) 59 { 60 boolean_t locked = B_FALSE; 61 62 if (tcp_time_wait == NULL) { 63 tcp_time_wait = *((tcp_squeue_priv_t **) 64 squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP)); 65 mutex_enter(&tcp_time_wait->tcp_time_wait_lock); 66 locked = B_TRUE; 67 } else { 68 ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock)); 69 } 70 71 /* 0 means that the tcp_t has not been added to the time wait list. */ 72 if (tcp->tcp_time_wait_expire == 0) { 73 ASSERT(tcp->tcp_time_wait_next == NULL); 74 ASSERT(tcp->tcp_time_wait_prev == NULL); 75 if (locked) 76 mutex_exit(&tcp_time_wait->tcp_time_wait_lock); 77 return (B_FALSE); 78 } 79 ASSERT(TCP_IS_DETACHED(tcp)); 80 ASSERT(tcp->tcp_state == TCPS_TIME_WAIT); 81 82 if (tcp == tcp_time_wait->tcp_time_wait_head) { 83 ASSERT(tcp->tcp_time_wait_prev == NULL); 84 tcp_time_wait->tcp_time_wait_head = tcp->tcp_time_wait_next; 85 if (tcp_time_wait->tcp_time_wait_head != NULL) { 86 tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev = 87 NULL; 88 } else { 89 tcp_time_wait->tcp_time_wait_tail = NULL; 90 } 91 } else if (tcp == tcp_time_wait->tcp_time_wait_tail) { 92 ASSERT(tcp->tcp_time_wait_next == NULL); 93 tcp_time_wait->tcp_time_wait_tail = tcp->tcp_time_wait_prev; 94 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL); 95 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = NULL; 96 } else { 97 ASSERT(tcp->tcp_time_wait_prev->tcp_time_wait_next == tcp); 98 ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == tcp); 99 tcp->tcp_time_wait_prev->tcp_time_wait_next = 100 tcp->tcp_time_wait_next; 101 tcp->tcp_time_wait_next->tcp_time_wait_prev = 102 tcp->tcp_time_wait_prev; 103 } 104 tcp->tcp_time_wait_next = NULL; 105 tcp->tcp_time_wait_prev = NULL; 106 tcp->tcp_time_wait_expire = 0; 107 108 if (locked) 109 mutex_exit(&tcp_time_wait->tcp_time_wait_lock); 110 return (B_TRUE); 111 } 112 113 /* 114 * Add a connection to the list of detached TIME_WAIT connections 115 * and set its time to expire. 116 */ 117 void 118 tcp_time_wait_append(tcp_t *tcp) 119 { 120 tcp_stack_t *tcps = tcp->tcp_tcps; 121 squeue_t *sqp = tcp->tcp_connp->conn_sqp; 122 tcp_squeue_priv_t *tcp_time_wait = 123 *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); 124 125 tcp_timers_stop(tcp); 126 127 /* Freed above */ 128 ASSERT(tcp->tcp_timer_tid == 0); 129 ASSERT(tcp->tcp_ack_tid == 0); 130 131 /* must have happened at the time of detaching the tcp */ 132 ASSERT(tcp->tcp_ptpahn == NULL); 133 ASSERT(tcp->tcp_flow_stopped == 0); 134 ASSERT(tcp->tcp_time_wait_next == NULL); 135 ASSERT(tcp->tcp_time_wait_prev == NULL); 136 ASSERT(tcp->tcp_time_wait_expire == 0); 137 ASSERT(tcp->tcp_listener == NULL); 138 139 tcp->tcp_time_wait_expire = ddi_get_lbolt64(); 140 /* 141 * Since tcp_time_wait_expire is lbolt64, it should not wrap around 142 * in practice. Hence it cannot be 0. Note that zero means that the 143 * tcp_t is not in the TIME_WAIT list. 144 */ 145 tcp->tcp_time_wait_expire += MSEC_TO_TICK( 146 tcps->tcps_time_wait_interval); 147 148 ASSERT(TCP_IS_DETACHED(tcp)); 149 ASSERT(tcp->tcp_state == TCPS_TIME_WAIT); 150 ASSERT(tcp->tcp_time_wait_next == NULL); 151 ASSERT(tcp->tcp_time_wait_prev == NULL); 152 TCP_DBGSTAT(tcps, tcp_time_wait); 153 154 mutex_enter(&tcp_time_wait->tcp_time_wait_lock); 155 if (tcp_time_wait->tcp_time_wait_head == NULL) { 156 ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL); 157 tcp_time_wait->tcp_time_wait_head = tcp; 158 159 /* 160 * Even if the list was empty before, there may be a timer 161 * running since a tcp_t can be removed from the list 162 * in other places, such as tcp_clean_death(). So check if 163 * a timer is needed. 164 */ 165 if (tcp_time_wait->tcp_time_wait_tid == 0) { 166 tcp_time_wait->tcp_time_wait_tid = 167 timeout_generic(CALLOUT_NORMAL, 168 tcp_time_wait_collector, sqp, 169 (hrtime_t)(tcps->tcps_time_wait_interval + 1) * 170 MICROSEC, CALLOUT_TCP_RESOLUTION, 171 CALLOUT_FLAG_ROUNDUP); 172 } 173 } else { 174 /* 175 * The list is not empty, so a timer must be running. If not, 176 * tcp_time_wait_collector() must be running on this 177 * tcp_time_wait list at the same time. 178 */ 179 ASSERT(tcp_time_wait->tcp_time_wait_tid != 0 || 180 tcp_time_wait->tcp_time_wait_running); 181 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL); 182 ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state == 183 TCPS_TIME_WAIT); 184 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp; 185 tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail; 186 187 } 188 tcp_time_wait->tcp_time_wait_tail = tcp; 189 mutex_exit(&tcp_time_wait->tcp_time_wait_lock); 190 } 191 192 /* 193 * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT 194 * tcp_t. Used in tcp_time_wait_collector(). 195 */ 196 /* ARGSUSED */ 197 static void 198 tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy) 199 { 200 conn_t *connp = (conn_t *)arg; 201 tcp_t *tcp = connp->conn_tcp; 202 203 ASSERT(tcp != NULL); 204 if (tcp->tcp_state == TCPS_CLOSED) { 205 return; 206 } 207 208 ASSERT((connp->conn_family == AF_INET && 209 connp->conn_ipversion == IPV4_VERSION) || 210 (connp->conn_family == AF_INET6 && 211 (connp->conn_ipversion == IPV4_VERSION || 212 connp->conn_ipversion == IPV6_VERSION))); 213 ASSERT(!tcp->tcp_listener); 214 215 ASSERT(TCP_IS_DETACHED(tcp)); 216 217 /* 218 * Because they have no upstream client to rebind or tcp_close() 219 * them later, we axe the connection here and now. 220 */ 221 tcp_close_detached(tcp); 222 } 223 224 /* 225 * Blows away all tcps whose TIME_WAIT has expired. List traversal 226 * is done forwards from the head. 227 * This walks all stack instances since 228 * tcp_time_wait remains global across all stacks. 229 */ 230 /* ARGSUSED */ 231 void 232 tcp_time_wait_collector(void *arg) 233 { 234 tcp_t *tcp; 235 int64_t now; 236 mblk_t *mp; 237 conn_t *connp; 238 kmutex_t *lock; 239 boolean_t removed; 240 extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t, 241 uint8_t *, in_port_t, uint8_t *, in_port_t, void *); 242 243 squeue_t *sqp = (squeue_t *)arg; 244 tcp_squeue_priv_t *tcp_time_wait = 245 *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); 246 247 mutex_enter(&tcp_time_wait->tcp_time_wait_lock); 248 tcp_time_wait->tcp_time_wait_tid = 0; 249 #ifdef DEBUG 250 tcp_time_wait->tcp_time_wait_running = B_TRUE; 251 #endif 252 253 if (tcp_time_wait->tcp_free_list != NULL && 254 tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) { 255 TCP_G_STAT(tcp_freelist_cleanup); 256 while ((tcp = tcp_time_wait->tcp_free_list) != NULL) { 257 tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next; 258 tcp->tcp_time_wait_next = NULL; 259 tcp_time_wait->tcp_free_list_cnt--; 260 ASSERT(tcp->tcp_tcps == NULL); 261 CONN_DEC_REF(tcp->tcp_connp); 262 } 263 ASSERT(tcp_time_wait->tcp_free_list_cnt == 0); 264 } 265 266 /* 267 * In order to reap time waits reliably, we should use a 268 * source of time that is not adjustable by the user -- hence 269 * the call to ddi_get_lbolt64(). 270 */ 271 now = ddi_get_lbolt64(); 272 while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) { 273 /* 274 * lbolt64 should not wrap around in practice... So we can 275 * do a direct comparison. 276 */ 277 if (now < tcp->tcp_time_wait_expire) 278 break; 279 280 removed = tcp_time_wait_remove(tcp, tcp_time_wait); 281 ASSERT(removed); 282 283 connp = tcp->tcp_connp; 284 ASSERT(connp->conn_fanout != NULL); 285 lock = &connp->conn_fanout->connf_lock; 286 /* 287 * This is essentially a TW reclaim fast path optimization for 288 * performance where the timewait collector checks under the 289 * fanout lock (so that no one else can get access to the 290 * conn_t) that the refcnt is 2 i.e. one for TCP and one for 291 * the classifier hash list. If ref count is indeed 2, we can 292 * just remove the conn under the fanout lock and avoid 293 * cleaning up the conn under the squeue, provided that 294 * clustering callbacks are not enabled. If clustering is 295 * enabled, we need to make the clustering callback before 296 * setting the CONDEMNED flag and after dropping all locks and 297 * so we forego this optimization and fall back to the slow 298 * path. Also please see the comments in tcp_closei_local 299 * regarding the refcnt logic. 300 * 301 * Since we are holding the tcp_time_wait_lock, its better 302 * not to block on the fanout_lock because other connections 303 * can't add themselves to time_wait list. So we do a 304 * tryenter instead of mutex_enter. 305 */ 306 if (mutex_tryenter(lock)) { 307 mutex_enter(&connp->conn_lock); 308 if ((connp->conn_ref == 2) && 309 (cl_inet_disconnect == NULL)) { 310 ipcl_hash_remove_locked(connp, 311 connp->conn_fanout); 312 /* 313 * Set the CONDEMNED flag now itself so that 314 * the refcnt cannot increase due to any 315 * walker. 316 */ 317 connp->conn_state_flags |= CONN_CONDEMNED; 318 mutex_exit(lock); 319 mutex_exit(&connp->conn_lock); 320 if (tcp_time_wait->tcp_free_list_cnt < 321 tcp_free_list_max_cnt) { 322 /* Add to head of tcp_free_list */ 323 mutex_exit( 324 &tcp_time_wait->tcp_time_wait_lock); 325 tcp_cleanup(tcp); 326 ASSERT(connp->conn_latch == NULL); 327 ASSERT(connp->conn_policy == NULL); 328 ASSERT(tcp->tcp_tcps == NULL); 329 ASSERT(connp->conn_netstack == NULL); 330 331 mutex_enter( 332 &tcp_time_wait->tcp_time_wait_lock); 333 tcp->tcp_time_wait_next = 334 tcp_time_wait->tcp_free_list; 335 tcp_time_wait->tcp_free_list = tcp; 336 tcp_time_wait->tcp_free_list_cnt++; 337 continue; 338 } else { 339 /* Do not add to tcp_free_list */ 340 mutex_exit( 341 &tcp_time_wait->tcp_time_wait_lock); 342 tcp_bind_hash_remove(tcp); 343 ixa_cleanup(tcp->tcp_connp->conn_ixa); 344 tcp_ipsec_cleanup(tcp); 345 CONN_DEC_REF(tcp->tcp_connp); 346 } 347 } else { 348 CONN_INC_REF_LOCKED(connp); 349 mutex_exit(lock); 350 mutex_exit(&tcp_time_wait->tcp_time_wait_lock); 351 mutex_exit(&connp->conn_lock); 352 /* 353 * We can reuse the closemp here since conn has 354 * detached (otherwise we wouldn't even be in 355 * time_wait list). tcp_closemp_used can safely 356 * be changed without taking a lock as no other 357 * thread can concurrently access it at this 358 * point in the connection lifecycle. 359 */ 360 361 if (tcp->tcp_closemp.b_prev == NULL) 362 tcp->tcp_closemp_used = B_TRUE; 363 else 364 cmn_err(CE_PANIC, 365 "tcp_timewait_collector: " 366 "concurrent use of tcp_closemp: " 367 "connp %p tcp %p\n", (void *)connp, 368 (void *)tcp); 369 370 TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15); 371 mp = &tcp->tcp_closemp; 372 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 373 tcp_timewait_close, connp, NULL, 374 SQ_FILL, SQTAG_TCP_TIMEWAIT); 375 } 376 } else { 377 mutex_enter(&connp->conn_lock); 378 CONN_INC_REF_LOCKED(connp); 379 mutex_exit(&tcp_time_wait->tcp_time_wait_lock); 380 mutex_exit(&connp->conn_lock); 381 /* 382 * We can reuse the closemp here since conn has 383 * detached (otherwise we wouldn't even be in 384 * time_wait list). tcp_closemp_used can safely 385 * be changed without taking a lock as no other 386 * thread can concurrently access it at this 387 * point in the connection lifecycle. 388 */ 389 390 if (tcp->tcp_closemp.b_prev == NULL) 391 tcp->tcp_closemp_used = B_TRUE; 392 else 393 cmn_err(CE_PANIC, "tcp_timewait_collector: " 394 "concurrent use of tcp_closemp: " 395 "connp %p tcp %p\n", (void *)connp, 396 (void *)tcp); 397 398 TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15); 399 mp = &tcp->tcp_closemp; 400 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 401 tcp_timewait_close, connp, NULL, 402 SQ_FILL, SQTAG_TCP_TIMEWAIT); 403 } 404 mutex_enter(&tcp_time_wait->tcp_time_wait_lock); 405 } 406 407 if (tcp_time_wait->tcp_free_list != NULL) 408 tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE; 409 410 /* 411 * If the time wait list is not empty and there is no timer running, 412 * restart it. 413 */ 414 if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL && 415 tcp_time_wait->tcp_time_wait_tid == 0) { 416 hrtime_t firetime; 417 418 firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now); 419 /* This ensures that we won't wake up too often. */ 420 firetime = MAX(TCP_TIME_WAIT_DELAY, firetime); 421 tcp_time_wait->tcp_time_wait_tid = 422 timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector, 423 sqp, firetime, CALLOUT_TCP_RESOLUTION, 424 CALLOUT_FLAG_ROUNDUP); 425 } 426 #ifdef DEBUG 427 tcp_time_wait->tcp_time_wait_running = B_FALSE; 428 #endif 429 mutex_exit(&tcp_time_wait->tcp_time_wait_lock); 430 } 431 432 /* 433 * tcp_time_wait_processing() handles processing of incoming packets when 434 * the tcp_t is in the TIME_WAIT state. 435 * 436 * A TIME_WAIT tcp_t that has an associated open TCP end point (not in 437 * detached state) is never put on the time wait list. 438 */ 439 void 440 tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, 441 uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira) 442 { 443 int32_t bytes_acked; 444 int32_t gap; 445 int32_t rgap; 446 tcp_opt_t tcpopt; 447 uint_t flags; 448 uint32_t new_swnd = 0; 449 conn_t *nconnp; 450 conn_t *connp = tcp->tcp_connp; 451 tcp_stack_t *tcps = tcp->tcp_tcps; 452 453 BUMP_LOCAL(tcp->tcp_ibsegs); 454 DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp); 455 456 flags = (unsigned int)tcpha->tha_flags & 0xFF; 457 new_swnd = ntohs(tcpha->tha_win) << 458 ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws); 459 if (tcp->tcp_snd_ts_ok) { 460 if (!tcp_paws_check(tcp, tcpha, &tcpopt)) { 461 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, 462 tcp->tcp_rnxt, TH_ACK); 463 goto done; 464 } 465 } 466 gap = seg_seq - tcp->tcp_rnxt; 467 rgap = tcp->tcp_rwnd - (gap + seg_len); 468 if (gap < 0) { 469 TCPS_BUMP_MIB(tcps, tcpInDataDupSegs); 470 TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes, 471 (seg_len > -gap ? -gap : seg_len)); 472 seg_len += gap; 473 if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) { 474 if (flags & TH_RST) { 475 goto done; 476 } 477 if ((flags & TH_FIN) && seg_len == -1) { 478 /* 479 * When TCP receives a duplicate FIN in 480 * TIME_WAIT state, restart the 2 MSL timer. 481 * See page 73 in RFC 793. Make sure this TCP 482 * is already on the TIME_WAIT list. If not, 483 * just restart the timer. 484 */ 485 if (TCP_IS_DETACHED(tcp)) { 486 if (tcp_time_wait_remove(tcp, NULL) == 487 B_TRUE) { 488 tcp_time_wait_append(tcp); 489 TCP_DBGSTAT(tcps, 490 tcp_rput_time_wait); 491 } 492 } else { 493 ASSERT(tcp != NULL); 494 TCP_TIMER_RESTART(tcp, 495 tcps->tcps_time_wait_interval); 496 } 497 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, 498 tcp->tcp_rnxt, TH_ACK); 499 goto done; 500 } 501 flags |= TH_ACK_NEEDED; 502 seg_len = 0; 503 goto process_ack; 504 } 505 506 /* Fix seg_seq, and chew the gap off the front. */ 507 seg_seq = tcp->tcp_rnxt; 508 } 509 510 if ((flags & TH_SYN) && gap > 0 && rgap < 0) { 511 /* 512 * Make sure that when we accept the connection, pick 513 * an ISS greater than (tcp_snxt + ISS_INCR/2) for the 514 * old connection. 515 * 516 * The next ISS generated is equal to tcp_iss_incr_extra 517 * + ISS_INCR/2 + other components depending on the 518 * value of tcp_strong_iss. We pre-calculate the new 519 * ISS here and compare with tcp_snxt to determine if 520 * we need to make adjustment to tcp_iss_incr_extra. 521 * 522 * The above calculation is ugly and is a 523 * waste of CPU cycles... 524 */ 525 uint32_t new_iss = tcps->tcps_iss_incr_extra; 526 int32_t adj; 527 ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; 528 529 switch (tcps->tcps_strong_iss) { 530 case 2: { 531 /* Add time and MD5 components. */ 532 uint32_t answer[4]; 533 struct { 534 uint32_t ports; 535 in6_addr_t src; 536 in6_addr_t dst; 537 } arg; 538 MD5_CTX context; 539 540 mutex_enter(&tcps->tcps_iss_key_lock); 541 context = tcps->tcps_iss_key; 542 mutex_exit(&tcps->tcps_iss_key_lock); 543 arg.ports = connp->conn_ports; 544 /* We use MAPPED addresses in tcp_iss_init */ 545 arg.src = connp->conn_laddr_v6; 546 arg.dst = connp->conn_faddr_v6; 547 MD5Update(&context, (uchar_t *)&arg, 548 sizeof (arg)); 549 MD5Final((uchar_t *)answer, &context); 550 answer[0] ^= answer[1] ^ answer[2] ^ answer[3]; 551 new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0]; 552 break; 553 } 554 case 1: 555 /* Add time component and min random (i.e. 1). */ 556 new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1; 557 break; 558 default: 559 /* Add only time component. */ 560 new_iss += (uint32_t)gethrestime_sec() * ISS_INCR; 561 break; 562 } 563 if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) { 564 /* 565 * New ISS not guaranteed to be ISS_INCR/2 566 * ahead of the current tcp_snxt, so add the 567 * difference to tcp_iss_incr_extra. 568 */ 569 tcps->tcps_iss_incr_extra += adj; 570 } 571 /* 572 * If tcp_clean_death() can not perform the task now, 573 * drop the SYN packet and let the other side re-xmit. 574 * Otherwise pass the SYN packet back in, since the 575 * old tcp state has been cleaned up or freed. 576 */ 577 if (tcp_clean_death(tcp, 0) == -1) 578 goto done; 579 nconnp = ipcl_classify(mp, ira, ipst); 580 if (nconnp != NULL) { 581 TCP_STAT(tcps, tcp_time_wait_syn_success); 582 /* Drops ref on nconnp */ 583 tcp_reinput(nconnp, mp, ira, ipst); 584 return; 585 } 586 goto done; 587 } 588 589 /* 590 * rgap is the amount of stuff received out of window. A negative 591 * value is the amount out of window. 592 */ 593 if (rgap < 0) { 594 TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs); 595 TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap); 596 /* Fix seg_len and make sure there is something left. */ 597 seg_len += rgap; 598 if (seg_len <= 0) { 599 if (flags & TH_RST) { 600 goto done; 601 } 602 flags |= TH_ACK_NEEDED; 603 seg_len = 0; 604 goto process_ack; 605 } 606 } 607 /* 608 * Check whether we can update tcp_ts_recent. This test is 609 * NOT the one in RFC 1323 3.4. It is from Braden, 1993, "TCP 610 * Extensions for High Performance: An Update", Internet Draft. 611 */ 612 if (tcp->tcp_snd_ts_ok && 613 TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) && 614 SEQ_LEQ(seg_seq, tcp->tcp_rack)) { 615 tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val; 616 tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64(); 617 } 618 619 if (seg_seq != tcp->tcp_rnxt && seg_len > 0) { 620 /* Always ack out of order packets */ 621 flags |= TH_ACK_NEEDED; 622 seg_len = 0; 623 } else if (seg_len > 0) { 624 TCPS_BUMP_MIB(tcps, tcpInClosed); 625 TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs); 626 TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len); 627 } 628 if (flags & TH_RST) { 629 (void) tcp_clean_death(tcp, 0); 630 goto done; 631 } 632 if (flags & TH_SYN) { 633 tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1, 634 TH_RST|TH_ACK); 635 /* 636 * Do not delete the TCP structure if it is in 637 * TIME_WAIT state. Refer to RFC 1122, 4.2.2.13. 638 */ 639 goto done; 640 } 641 process_ack: 642 if (flags & TH_ACK) { 643 bytes_acked = (int)(seg_ack - tcp->tcp_suna); 644 if (bytes_acked <= 0) { 645 if (bytes_acked == 0 && seg_len == 0 && 646 new_swnd == tcp->tcp_swnd) 647 TCPS_BUMP_MIB(tcps, tcpInDupAck); 648 } else { 649 /* Acks something not sent */ 650 flags |= TH_ACK_NEEDED; 651 } 652 } 653 if (flags & TH_ACK_NEEDED) { 654 /* 655 * Time to send an ack for some reason. 656 */ 657 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, 658 tcp->tcp_rnxt, TH_ACK); 659 } 660 done: 661 freemsg(mp); 662 } 663