1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2011, Joyent Inc. All rights reserved. 25 */ 26 27 /* 28 * This file contains functions related to TCP time wait processing. Also 29 * refer to the time wait handling comments in tcp_impl.h. 30 */ 31 32 #include <sys/types.h> 33 #include <sys/strsun.h> 34 #include <sys/squeue_impl.h> 35 #include <sys/squeue.h> 36 #include <sys/callo.h> 37 38 #include <inet/common.h> 39 #include <inet/ip.h> 40 #include <inet/tcp.h> 41 #include <inet/tcp_impl.h> 42 #include <inet/tcp_cluster.h> 43 44 static void tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *); 45 46 /* 47 * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs. 48 * Running it every 5 seconds seems to give the best results. 49 */ 50 #define TCP_TIME_WAIT_DELAY ((hrtime_t)5 * NANOSEC) 51 52 /* 53 * Remove a connection from the list of detached TIME_WAIT connections. 54 * It returns B_FALSE if it can't remove the connection from the list 55 * as the connection has already been removed from the list due to an 56 * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE. 57 */ 58 boolean_t 59 tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait) 60 { 61 boolean_t locked = B_FALSE; 62 63 if (tcp_time_wait == NULL) { 64 tcp_time_wait = *((tcp_squeue_priv_t **) 65 squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP)); 66 mutex_enter(&tcp_time_wait->tcp_time_wait_lock); 67 locked = B_TRUE; 68 } else { 69 ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock)); 70 } 71 72 /* 0 means that the tcp_t has not been added to the time wait list. */ 73 if (tcp->tcp_time_wait_expire == 0) { 74 ASSERT(tcp->tcp_time_wait_next == NULL); 75 ASSERT(tcp->tcp_time_wait_prev == NULL); 76 if (locked) 77 mutex_exit(&tcp_time_wait->tcp_time_wait_lock); 78 return (B_FALSE); 79 } 80 ASSERT(TCP_IS_DETACHED(tcp)); 81 ASSERT(tcp->tcp_state == TCPS_TIME_WAIT); 82 83 if (tcp == tcp_time_wait->tcp_time_wait_head) { 84 ASSERT(tcp->tcp_time_wait_prev == NULL); 85 tcp_time_wait->tcp_time_wait_head = tcp->tcp_time_wait_next; 86 if (tcp_time_wait->tcp_time_wait_head != NULL) { 87 tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev = 88 NULL; 89 } else { 90 tcp_time_wait->tcp_time_wait_tail = NULL; 91 } 92 } else if (tcp == tcp_time_wait->tcp_time_wait_tail) { 93 ASSERT(tcp->tcp_time_wait_next == NULL); 94 tcp_time_wait->tcp_time_wait_tail = tcp->tcp_time_wait_prev; 95 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL); 96 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = NULL; 97 } else { 98 ASSERT(tcp->tcp_time_wait_prev->tcp_time_wait_next == tcp); 99 ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == tcp); 100 tcp->tcp_time_wait_prev->tcp_time_wait_next = 101 tcp->tcp_time_wait_next; 102 tcp->tcp_time_wait_next->tcp_time_wait_prev = 103 tcp->tcp_time_wait_prev; 104 } 105 tcp->tcp_time_wait_next = NULL; 106 tcp->tcp_time_wait_prev = NULL; 107 tcp->tcp_time_wait_expire = 0; 108 109 if (locked) 110 mutex_exit(&tcp_time_wait->tcp_time_wait_lock); 111 return (B_TRUE); 112 } 113 114 /* 115 * Add a connection to the list of detached TIME_WAIT connections 116 * and set its time to expire. 117 */ 118 void 119 tcp_time_wait_append(tcp_t *tcp) 120 { 121 tcp_stack_t *tcps = tcp->tcp_tcps; 122 squeue_t *sqp = tcp->tcp_connp->conn_sqp; 123 tcp_squeue_priv_t *tcp_time_wait = 124 *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); 125 126 tcp_timers_stop(tcp); 127 128 /* Freed above */ 129 ASSERT(tcp->tcp_timer_tid == 0); 130 ASSERT(tcp->tcp_ack_tid == 0); 131 132 /* must have happened at the time of detaching the tcp */ 133 ASSERT(tcp->tcp_ptpahn == NULL); 134 ASSERT(tcp->tcp_flow_stopped == 0); 135 ASSERT(tcp->tcp_time_wait_next == NULL); 136 ASSERT(tcp->tcp_time_wait_prev == NULL); 137 ASSERT(tcp->tcp_time_wait_expire == 0); 138 ASSERT(tcp->tcp_listener == NULL); 139 140 tcp->tcp_time_wait_expire = ddi_get_lbolt64(); 141 /* 142 * Since tcp_time_wait_expire is lbolt64, it should not wrap around 143 * in practice. Hence it cannot be 0. Note that zero means that the 144 * tcp_t is not in the TIME_WAIT list. 145 */ 146 tcp->tcp_time_wait_expire += MSEC_TO_TICK( 147 tcps->tcps_time_wait_interval); 148 149 ASSERT(TCP_IS_DETACHED(tcp)); 150 ASSERT(tcp->tcp_state == TCPS_TIME_WAIT); 151 ASSERT(tcp->tcp_time_wait_next == NULL); 152 ASSERT(tcp->tcp_time_wait_prev == NULL); 153 TCP_DBGSTAT(tcps, tcp_time_wait); 154 155 mutex_enter(&tcp_time_wait->tcp_time_wait_lock); 156 if (tcp_time_wait->tcp_time_wait_head == NULL) { 157 ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL); 158 tcp_time_wait->tcp_time_wait_head = tcp; 159 160 /* 161 * Even if the list was empty before, there may be a timer 162 * running since a tcp_t can be removed from the list 163 * in other places, such as tcp_clean_death(). So check if 164 * a timer is needed. 165 */ 166 if (tcp_time_wait->tcp_time_wait_tid == 0) { 167 tcp_time_wait->tcp_time_wait_tid = 168 timeout_generic(CALLOUT_NORMAL, 169 tcp_time_wait_collector, sqp, 170 (hrtime_t)(tcps->tcps_time_wait_interval + 1) * 171 MICROSEC, CALLOUT_TCP_RESOLUTION, 172 CALLOUT_FLAG_ROUNDUP); 173 } 174 } else { 175 /* 176 * The list is not empty, so a timer must be running. If not, 177 * tcp_time_wait_collector() must be running on this 178 * tcp_time_wait list at the same time. 179 */ 180 ASSERT(tcp_time_wait->tcp_time_wait_tid != 0 || 181 tcp_time_wait->tcp_time_wait_running); 182 ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL); 183 ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state == 184 TCPS_TIME_WAIT); 185 tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp; 186 tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail; 187 188 } 189 tcp_time_wait->tcp_time_wait_tail = tcp; 190 mutex_exit(&tcp_time_wait->tcp_time_wait_lock); 191 } 192 193 /* 194 * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT 195 * tcp_t. Used in tcp_time_wait_collector(). 196 */ 197 /* ARGSUSED */ 198 static void 199 tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy) 200 { 201 conn_t *connp = (conn_t *)arg; 202 tcp_t *tcp = connp->conn_tcp; 203 204 ASSERT(tcp != NULL); 205 if (tcp->tcp_state == TCPS_CLOSED) { 206 return; 207 } 208 209 ASSERT((connp->conn_family == AF_INET && 210 connp->conn_ipversion == IPV4_VERSION) || 211 (connp->conn_family == AF_INET6 && 212 (connp->conn_ipversion == IPV4_VERSION || 213 connp->conn_ipversion == IPV6_VERSION))); 214 ASSERT(!tcp->tcp_listener); 215 216 ASSERT(TCP_IS_DETACHED(tcp)); 217 218 /* 219 * Because they have no upstream client to rebind or tcp_close() 220 * them later, we axe the connection here and now. 221 */ 222 tcp_close_detached(tcp); 223 } 224 225 /* 226 * Blows away all tcps whose TIME_WAIT has expired. List traversal 227 * is done forwards from the head. 228 * This walks all stack instances since 229 * tcp_time_wait remains global across all stacks. 230 */ 231 /* ARGSUSED */ 232 void 233 tcp_time_wait_collector(void *arg) 234 { 235 tcp_t *tcp; 236 int64_t now; 237 mblk_t *mp; 238 conn_t *connp; 239 kmutex_t *lock; 240 boolean_t removed; 241 extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t, 242 uint8_t *, in_port_t, uint8_t *, in_port_t, void *); 243 244 squeue_t *sqp = (squeue_t *)arg; 245 tcp_squeue_priv_t *tcp_time_wait = 246 *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); 247 248 mutex_enter(&tcp_time_wait->tcp_time_wait_lock); 249 tcp_time_wait->tcp_time_wait_tid = 0; 250 #ifdef DEBUG 251 tcp_time_wait->tcp_time_wait_running = B_TRUE; 252 #endif 253 254 if (tcp_time_wait->tcp_free_list != NULL && 255 tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) { 256 TCP_G_STAT(tcp_freelist_cleanup); 257 while ((tcp = tcp_time_wait->tcp_free_list) != NULL) { 258 tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next; 259 tcp->tcp_time_wait_next = NULL; 260 tcp_time_wait->tcp_free_list_cnt--; 261 ASSERT(tcp->tcp_tcps == NULL); 262 CONN_DEC_REF(tcp->tcp_connp); 263 } 264 ASSERT(tcp_time_wait->tcp_free_list_cnt == 0); 265 } 266 267 /* 268 * In order to reap time waits reliably, we should use a 269 * source of time that is not adjustable by the user -- hence 270 * the call to ddi_get_lbolt64(). 271 */ 272 now = ddi_get_lbolt64(); 273 while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) { 274 /* 275 * lbolt64 should not wrap around in practice... So we can 276 * do a direct comparison. 277 */ 278 if (now < tcp->tcp_time_wait_expire) 279 break; 280 281 removed = tcp_time_wait_remove(tcp, tcp_time_wait); 282 ASSERT(removed); 283 284 connp = tcp->tcp_connp; 285 ASSERT(connp->conn_fanout != NULL); 286 lock = &connp->conn_fanout->connf_lock; 287 /* 288 * This is essentially a TW reclaim fast path optimization for 289 * performance where the timewait collector checks under the 290 * fanout lock (so that no one else can get access to the 291 * conn_t) that the refcnt is 2 i.e. one for TCP and one for 292 * the classifier hash list. If ref count is indeed 2, we can 293 * just remove the conn under the fanout lock and avoid 294 * cleaning up the conn under the squeue, provided that 295 * clustering callbacks are not enabled. If clustering is 296 * enabled, we need to make the clustering callback before 297 * setting the CONDEMNED flag and after dropping all locks and 298 * so we forego this optimization and fall back to the slow 299 * path. Also please see the comments in tcp_closei_local 300 * regarding the refcnt logic. 301 * 302 * Since we are holding the tcp_time_wait_lock, its better 303 * not to block on the fanout_lock because other connections 304 * can't add themselves to time_wait list. So we do a 305 * tryenter instead of mutex_enter. 306 */ 307 if (mutex_tryenter(lock)) { 308 mutex_enter(&connp->conn_lock); 309 if ((connp->conn_ref == 2) && 310 (cl_inet_disconnect == NULL)) { 311 ipcl_hash_remove_locked(connp, 312 connp->conn_fanout); 313 /* 314 * Set the CONDEMNED flag now itself so that 315 * the refcnt cannot increase due to any 316 * walker. 317 */ 318 connp->conn_state_flags |= CONN_CONDEMNED; 319 mutex_exit(lock); 320 mutex_exit(&connp->conn_lock); 321 if (tcp_time_wait->tcp_free_list_cnt < 322 tcp_free_list_max_cnt) { 323 /* Add to head of tcp_free_list */ 324 mutex_exit( 325 &tcp_time_wait->tcp_time_wait_lock); 326 tcp_cleanup(tcp); 327 ASSERT(connp->conn_latch == NULL); 328 ASSERT(connp->conn_policy == NULL); 329 ASSERT(tcp->tcp_tcps == NULL); 330 ASSERT(connp->conn_netstack == NULL); 331 332 mutex_enter( 333 &tcp_time_wait->tcp_time_wait_lock); 334 tcp->tcp_time_wait_next = 335 tcp_time_wait->tcp_free_list; 336 tcp_time_wait->tcp_free_list = tcp; 337 tcp_time_wait->tcp_free_list_cnt++; 338 continue; 339 } else { 340 /* Do not add to tcp_free_list */ 341 mutex_exit( 342 &tcp_time_wait->tcp_time_wait_lock); 343 tcp_bind_hash_remove(tcp); 344 ixa_cleanup(tcp->tcp_connp->conn_ixa); 345 tcp_ipsec_cleanup(tcp); 346 CONN_DEC_REF(tcp->tcp_connp); 347 } 348 } else { 349 CONN_INC_REF_LOCKED(connp); 350 mutex_exit(lock); 351 mutex_exit(&tcp_time_wait->tcp_time_wait_lock); 352 mutex_exit(&connp->conn_lock); 353 /* 354 * We can reuse the closemp here since conn has 355 * detached (otherwise we wouldn't even be in 356 * time_wait list). tcp_closemp_used can safely 357 * be changed without taking a lock as no other 358 * thread can concurrently access it at this 359 * point in the connection lifecycle. 360 */ 361 362 if (tcp->tcp_closemp.b_prev == NULL) 363 tcp->tcp_closemp_used = B_TRUE; 364 else 365 cmn_err(CE_PANIC, 366 "tcp_timewait_collector: " 367 "concurrent use of tcp_closemp: " 368 "connp %p tcp %p\n", (void *)connp, 369 (void *)tcp); 370 371 TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15); 372 mp = &tcp->tcp_closemp; 373 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 374 tcp_timewait_close, connp, NULL, 375 SQ_FILL, SQTAG_TCP_TIMEWAIT); 376 } 377 } else { 378 mutex_enter(&connp->conn_lock); 379 CONN_INC_REF_LOCKED(connp); 380 mutex_exit(&tcp_time_wait->tcp_time_wait_lock); 381 mutex_exit(&connp->conn_lock); 382 /* 383 * We can reuse the closemp here since conn has 384 * detached (otherwise we wouldn't even be in 385 * time_wait list). tcp_closemp_used can safely 386 * be changed without taking a lock as no other 387 * thread can concurrently access it at this 388 * point in the connection lifecycle. 389 */ 390 391 if (tcp->tcp_closemp.b_prev == NULL) 392 tcp->tcp_closemp_used = B_TRUE; 393 else 394 cmn_err(CE_PANIC, "tcp_timewait_collector: " 395 "concurrent use of tcp_closemp: " 396 "connp %p tcp %p\n", (void *)connp, 397 (void *)tcp); 398 399 TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15); 400 mp = &tcp->tcp_closemp; 401 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 402 tcp_timewait_close, connp, NULL, 403 SQ_FILL, SQTAG_TCP_TIMEWAIT); 404 } 405 mutex_enter(&tcp_time_wait->tcp_time_wait_lock); 406 } 407 408 if (tcp_time_wait->tcp_free_list != NULL) 409 tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE; 410 411 /* 412 * If the time wait list is not empty and there is no timer running, 413 * restart it. 414 */ 415 if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL && 416 tcp_time_wait->tcp_time_wait_tid == 0) { 417 hrtime_t firetime; 418 419 firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now); 420 /* This ensures that we won't wake up too often. */ 421 firetime = MAX(TCP_TIME_WAIT_DELAY, firetime); 422 tcp_time_wait->tcp_time_wait_tid = 423 timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector, 424 sqp, firetime, CALLOUT_TCP_RESOLUTION, 425 CALLOUT_FLAG_ROUNDUP); 426 } 427 #ifdef DEBUG 428 tcp_time_wait->tcp_time_wait_running = B_FALSE; 429 #endif 430 mutex_exit(&tcp_time_wait->tcp_time_wait_lock); 431 } 432 433 /* 434 * tcp_time_wait_processing() handles processing of incoming packets when 435 * the tcp_t is in the TIME_WAIT state. 436 * 437 * A TIME_WAIT tcp_t that has an associated open TCP end point (not in 438 * detached state) is never put on the time wait list. 439 */ 440 void 441 tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, 442 uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira) 443 { 444 int32_t bytes_acked; 445 int32_t gap; 446 int32_t rgap; 447 tcp_opt_t tcpopt; 448 uint_t flags; 449 uint32_t new_swnd = 0; 450 conn_t *nconnp; 451 conn_t *connp = tcp->tcp_connp; 452 tcp_stack_t *tcps = tcp->tcp_tcps; 453 454 BUMP_LOCAL(tcp->tcp_ibsegs); 455 DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp); 456 457 flags = (unsigned int)tcpha->tha_flags & 0xFF; 458 new_swnd = ntohs(tcpha->tha_win) << 459 ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws); 460 if (tcp->tcp_snd_ts_ok) { 461 if (!tcp_paws_check(tcp, tcpha, &tcpopt)) { 462 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, 463 tcp->tcp_rnxt, TH_ACK); 464 goto done; 465 } 466 } 467 gap = seg_seq - tcp->tcp_rnxt; 468 rgap = tcp->tcp_rwnd - (gap + seg_len); 469 if (gap < 0) { 470 TCPS_BUMP_MIB(tcps, tcpInDataDupSegs); 471 TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes, 472 (seg_len > -gap ? -gap : seg_len)); 473 seg_len += gap; 474 if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) { 475 if (flags & TH_RST) { 476 goto done; 477 } 478 if ((flags & TH_FIN) && seg_len == -1) { 479 /* 480 * When TCP receives a duplicate FIN in 481 * TIME_WAIT state, restart the 2 MSL timer. 482 * See page 73 in RFC 793. Make sure this TCP 483 * is already on the TIME_WAIT list. If not, 484 * just restart the timer. 485 */ 486 if (TCP_IS_DETACHED(tcp)) { 487 if (tcp_time_wait_remove(tcp, NULL) == 488 B_TRUE) { 489 tcp_time_wait_append(tcp); 490 TCP_DBGSTAT(tcps, 491 tcp_rput_time_wait); 492 } 493 } else { 494 ASSERT(tcp != NULL); 495 TCP_TIMER_RESTART(tcp, 496 tcps->tcps_time_wait_interval); 497 } 498 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, 499 tcp->tcp_rnxt, TH_ACK); 500 goto done; 501 } 502 flags |= TH_ACK_NEEDED; 503 seg_len = 0; 504 goto process_ack; 505 } 506 507 /* Fix seg_seq, and chew the gap off the front. */ 508 seg_seq = tcp->tcp_rnxt; 509 } 510 511 if ((flags & TH_SYN) && gap > 0 && rgap < 0) { 512 /* 513 * Make sure that when we accept the connection, pick 514 * an ISS greater than (tcp_snxt + tcp_iss_incr/2) for the 515 * old connection. 516 * 517 * The next ISS generated is equal to tcp_iss_incr_extra 518 * + tcp_iss_incr/2 + other components depending on the 519 * value of tcp_strong_iss. We pre-calculate the new 520 * ISS here and compare with tcp_snxt to determine if 521 * we need to make adjustment to tcp_iss_incr_extra. 522 * 523 * The above calculation is ugly and is a 524 * waste of CPU cycles... 525 */ 526 uint32_t new_iss = tcps->tcps_iss_incr_extra; 527 int32_t adj; 528 ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; 529 530 switch (tcps->tcps_strong_iss) { 531 case 2: { 532 /* Add time and MD5 components. */ 533 uint32_t answer[4]; 534 struct { 535 uint32_t ports; 536 in6_addr_t src; 537 in6_addr_t dst; 538 } arg; 539 MD5_CTX context; 540 541 mutex_enter(&tcps->tcps_iss_key_lock); 542 context = tcps->tcps_iss_key; 543 mutex_exit(&tcps->tcps_iss_key_lock); 544 arg.ports = connp->conn_ports; 545 /* We use MAPPED addresses in tcp_iss_init */ 546 arg.src = connp->conn_laddr_v6; 547 arg.dst = connp->conn_faddr_v6; 548 MD5Update(&context, (uchar_t *)&arg, 549 sizeof (arg)); 550 MD5Final((uchar_t *)answer, &context); 551 answer[0] ^= answer[1] ^ answer[2] ^ answer[3]; 552 new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0]; 553 break; 554 } 555 case 1: 556 /* Add time component and min random (i.e. 1). */ 557 new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1; 558 break; 559 default: 560 /* Add only time component. */ 561 new_iss += (uint32_t)gethrestime_sec() * 562 tcps->tcps_iss_incr; 563 break; 564 } 565 if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) { 566 /* 567 * New ISS not guaranteed to be tcp_iss_incr/2 568 * ahead of the current tcp_snxt, so add the 569 * difference to tcp_iss_incr_extra. 570 */ 571 tcps->tcps_iss_incr_extra += adj; 572 } 573 /* 574 * If tcp_clean_death() can not perform the task now, 575 * drop the SYN packet and let the other side re-xmit. 576 * Otherwise pass the SYN packet back in, since the 577 * old tcp state has been cleaned up or freed. 578 */ 579 if (tcp_clean_death(tcp, 0) == -1) 580 goto done; 581 nconnp = ipcl_classify(mp, ira, ipst); 582 if (nconnp != NULL) { 583 TCP_STAT(tcps, tcp_time_wait_syn_success); 584 /* Drops ref on nconnp */ 585 tcp_reinput(nconnp, mp, ira, ipst); 586 return; 587 } 588 goto done; 589 } 590 591 /* 592 * rgap is the amount of stuff received out of window. A negative 593 * value is the amount out of window. 594 */ 595 if (rgap < 0) { 596 TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs); 597 TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap); 598 /* Fix seg_len and make sure there is something left. */ 599 seg_len += rgap; 600 if (seg_len <= 0) { 601 if (flags & TH_RST) { 602 goto done; 603 } 604 flags |= TH_ACK_NEEDED; 605 seg_len = 0; 606 goto process_ack; 607 } 608 } 609 /* 610 * Check whether we can update tcp_ts_recent. This test is 611 * NOT the one in RFC 1323 3.4. It is from Braden, 1993, "TCP 612 * Extensions for High Performance: An Update", Internet Draft. 613 */ 614 if (tcp->tcp_snd_ts_ok && 615 TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) && 616 SEQ_LEQ(seg_seq, tcp->tcp_rack)) { 617 tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val; 618 tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64(); 619 } 620 621 if (seg_seq != tcp->tcp_rnxt && seg_len > 0) { 622 /* Always ack out of order packets */ 623 flags |= TH_ACK_NEEDED; 624 seg_len = 0; 625 } else if (seg_len > 0) { 626 TCPS_BUMP_MIB(tcps, tcpInClosed); 627 TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs); 628 TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len); 629 } 630 if (flags & TH_RST) { 631 (void) tcp_clean_death(tcp, 0); 632 goto done; 633 } 634 if (flags & TH_SYN) { 635 tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1, 636 TH_RST|TH_ACK); 637 /* 638 * Do not delete the TCP structure if it is in 639 * TIME_WAIT state. Refer to RFC 1122, 4.2.2.13. 640 */ 641 goto done; 642 } 643 process_ack: 644 if (flags & TH_ACK) { 645 bytes_acked = (int)(seg_ack - tcp->tcp_suna); 646 if (bytes_acked <= 0) { 647 if (bytes_acked == 0 && seg_len == 0 && 648 new_swnd == tcp->tcp_swnd) 649 TCPS_BUMP_MIB(tcps, tcpInDupAck); 650 } else { 651 /* Acks something not sent */ 652 flags |= TH_ACK_NEEDED; 653 } 654 } 655 if (flags & TH_ACK_NEEDED) { 656 /* 657 * Time to send an ack for some reason. 658 */ 659 tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, 660 tcp->tcp_rnxt, TH_ACK); 661 } 662 done: 663 freemsg(mp); 664 } 665