1721fffe3SKacheong Poon /* 2721fffe3SKacheong Poon * CDDL HEADER START 3721fffe3SKacheong Poon * 4721fffe3SKacheong Poon * The contents of this file are subject to the terms of the 5721fffe3SKacheong Poon * Common Development and Distribution License (the "License"). 6721fffe3SKacheong Poon * You may not use this file except in compliance with the License. 7721fffe3SKacheong Poon * 8721fffe3SKacheong Poon * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9721fffe3SKacheong Poon * or http://www.opensolaris.org/os/licensing. 10721fffe3SKacheong Poon * See the License for the specific language governing permissions 11721fffe3SKacheong Poon * and limitations under the License. 12721fffe3SKacheong Poon * 13721fffe3SKacheong Poon * When distributing Covered Code, include this CDDL HEADER in each 14721fffe3SKacheong Poon * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15721fffe3SKacheong Poon * If applicable, add the following below this CDDL HEADER, with the 16721fffe3SKacheong Poon * fields enclosed by brackets "[]" replaced with your own identifying 17721fffe3SKacheong Poon * information: Portions Copyright [yyyy] [name of copyright owner] 18721fffe3SKacheong Poon * 19721fffe3SKacheong Poon * CDDL HEADER END 20721fffe3SKacheong Poon */ 21721fffe3SKacheong Poon 22721fffe3SKacheong Poon /* 2366cd0f60SKacheong Poon * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 242404c9e6SPatrick Mooney * Copyright 2016 Joyent, Inc. 25721fffe3SKacheong Poon */ 26721fffe3SKacheong Poon 27721fffe3SKacheong Poon /* 28721fffe3SKacheong Poon * This file contains functions related to TCP time wait processing. Also 29721fffe3SKacheong Poon * refer to the time wait handling comments in tcp_impl.h. 30721fffe3SKacheong Poon */ 31721fffe3SKacheong Poon 32721fffe3SKacheong Poon #include <sys/types.h> 33721fffe3SKacheong Poon #include <sys/strsun.h> 34721fffe3SKacheong Poon #include <sys/squeue_impl.h> 35721fffe3SKacheong Poon #include <sys/squeue.h> 36721fffe3SKacheong Poon #include <sys/callo.h> 37721fffe3SKacheong Poon 38721fffe3SKacheong Poon #include <inet/common.h> 39721fffe3SKacheong Poon #include <inet/ip.h> 40721fffe3SKacheong Poon #include <inet/tcp.h> 41721fffe3SKacheong Poon #include <inet/tcp_impl.h> 42721fffe3SKacheong Poon #include <inet/tcp_cluster.h> 43721fffe3SKacheong Poon 442404c9e6SPatrick Mooney static void tcp_time_wait_purge(tcp_t *, tcp_squeue_priv_t *); 45721fffe3SKacheong Poon 462404c9e6SPatrick Mooney #define TW_BUCKET(t) \ 472404c9e6SPatrick Mooney (((t) / MSEC_TO_TICK(TCP_TIME_WAIT_DELAY)) % TCP_TIME_WAIT_BUCKETS) 482404c9e6SPatrick Mooney 492404c9e6SPatrick Mooney #define TW_BUCKET_NEXT(b) (((b) + 1) % TCP_TIME_WAIT_BUCKETS) 502404c9e6SPatrick Mooney 51721fffe3SKacheong Poon 52721fffe3SKacheong Poon /* 53721fffe3SKacheong Poon * Remove a connection from the list of detached TIME_WAIT connections. 54721fffe3SKacheong Poon * It returns B_FALSE if it can't remove the connection from the list 55721fffe3SKacheong Poon * as the connection has already been removed from the list due to an 56721fffe3SKacheong Poon * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE. 57721fffe3SKacheong Poon */ 58721fffe3SKacheong Poon boolean_t 592404c9e6SPatrick Mooney tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tsp) 60721fffe3SKacheong Poon { 61721fffe3SKacheong Poon boolean_t locked = B_FALSE; 62721fffe3SKacheong Poon 632404c9e6SPatrick Mooney if (tsp == NULL) { 642404c9e6SPatrick Mooney tsp = *((tcp_squeue_priv_t **) 65721fffe3SKacheong Poon squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP)); 662404c9e6SPatrick Mooney mutex_enter(&tsp->tcp_time_wait_lock); 67721fffe3SKacheong Poon locked = B_TRUE; 68721fffe3SKacheong Poon } else { 692404c9e6SPatrick Mooney ASSERT(MUTEX_HELD(&tsp->tcp_time_wait_lock)); 70721fffe3SKacheong Poon } 71721fffe3SKacheong Poon 72721fffe3SKacheong Poon /* 0 means that the tcp_t has not been added to the time wait list. */ 73721fffe3SKacheong Poon if (tcp->tcp_time_wait_expire == 0) { 74721fffe3SKacheong Poon ASSERT(tcp->tcp_time_wait_next == NULL); 75721fffe3SKacheong Poon ASSERT(tcp->tcp_time_wait_prev == NULL); 76721fffe3SKacheong Poon if (locked) 772404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock); 78721fffe3SKacheong Poon return (B_FALSE); 79721fffe3SKacheong Poon } 80721fffe3SKacheong Poon ASSERT(TCP_IS_DETACHED(tcp)); 81721fffe3SKacheong Poon ASSERT(tcp->tcp_state == TCPS_TIME_WAIT); 822404c9e6SPatrick Mooney ASSERT(tsp->tcp_time_wait_cnt > 0); 83721fffe3SKacheong Poon 842404c9e6SPatrick Mooney if (tcp->tcp_time_wait_next != NULL) { 85721fffe3SKacheong Poon tcp->tcp_time_wait_next->tcp_time_wait_prev = 86721fffe3SKacheong Poon tcp->tcp_time_wait_prev; 87721fffe3SKacheong Poon } 882404c9e6SPatrick Mooney if (tcp->tcp_time_wait_prev != NULL) { 892404c9e6SPatrick Mooney tcp->tcp_time_wait_prev->tcp_time_wait_next = 902404c9e6SPatrick Mooney tcp->tcp_time_wait_next; 912404c9e6SPatrick Mooney } else { 922404c9e6SPatrick Mooney unsigned int bucket; 932404c9e6SPatrick Mooney 942404c9e6SPatrick Mooney bucket = TW_BUCKET(tcp->tcp_time_wait_expire); 952404c9e6SPatrick Mooney ASSERT(tsp->tcp_time_wait_bucket[bucket] == tcp); 962404c9e6SPatrick Mooney tsp->tcp_time_wait_bucket[bucket] = tcp->tcp_time_wait_next; 972404c9e6SPatrick Mooney } 98721fffe3SKacheong Poon tcp->tcp_time_wait_next = NULL; 99721fffe3SKacheong Poon tcp->tcp_time_wait_prev = NULL; 100721fffe3SKacheong Poon tcp->tcp_time_wait_expire = 0; 1012404c9e6SPatrick Mooney tsp->tcp_time_wait_cnt--; 102721fffe3SKacheong Poon 103721fffe3SKacheong Poon if (locked) 1042404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock); 105721fffe3SKacheong Poon return (B_TRUE); 106721fffe3SKacheong Poon } 107721fffe3SKacheong Poon 10881b60dacSJerry Jelinek /* Constants used for fast checking of a localhost address */ 10981b60dacSJerry Jelinek #if defined(_BIG_ENDIAN) 11081b60dacSJerry Jelinek #define IPv4_LOCALHOST 0x7f000000U 11181b60dacSJerry Jelinek #define IPv4_LH_MASK 0xffffff00U 11281b60dacSJerry Jelinek #else 11381b60dacSJerry Jelinek #define IPv4_LOCALHOST 0x0000007fU 11481b60dacSJerry Jelinek #define IPv4_LH_MASK 0x00ffffffU 11581b60dacSJerry Jelinek #endif 11681b60dacSJerry Jelinek 11781b60dacSJerry Jelinek #define IS_LOCAL_HOST(x) ( \ 11881b60dacSJerry Jelinek ((x)->tcp_connp->conn_ipversion == IPV4_VERSION && \ 11981b60dacSJerry Jelinek ((x)->tcp_connp->conn_laddr_v4 & IPv4_LH_MASK) == IPv4_LOCALHOST) || \ 12081b60dacSJerry Jelinek ((x)->tcp_connp->conn_ipversion == IPV6_VERSION && \ 12181b60dacSJerry Jelinek IN6_IS_ADDR_LOOPBACK(&(x)->tcp_connp->conn_laddr_v6))) 12281b60dacSJerry Jelinek 1232404c9e6SPatrick Mooney 124721fffe3SKacheong Poon /* 125721fffe3SKacheong Poon * Add a connection to the list of detached TIME_WAIT connections 126721fffe3SKacheong Poon * and set its time to expire. 127721fffe3SKacheong Poon */ 128721fffe3SKacheong Poon void 129721fffe3SKacheong Poon tcp_time_wait_append(tcp_t *tcp) 130721fffe3SKacheong Poon { 131721fffe3SKacheong Poon tcp_stack_t *tcps = tcp->tcp_tcps; 13266cd0f60SKacheong Poon squeue_t *sqp = tcp->tcp_connp->conn_sqp; 1332404c9e6SPatrick Mooney tcp_squeue_priv_t *tsp = 13466cd0f60SKacheong Poon *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); 1352404c9e6SPatrick Mooney int64_t now, schedule; 1362404c9e6SPatrick Mooney unsigned int bucket; 137721fffe3SKacheong Poon 138721fffe3SKacheong Poon tcp_timers_stop(tcp); 139721fffe3SKacheong Poon 140721fffe3SKacheong Poon /* Freed above */ 141721fffe3SKacheong Poon ASSERT(tcp->tcp_timer_tid == 0); 142721fffe3SKacheong Poon ASSERT(tcp->tcp_ack_tid == 0); 143721fffe3SKacheong Poon 144721fffe3SKacheong Poon /* must have happened at the time of detaching the tcp */ 1452404c9e6SPatrick Mooney ASSERT(TCP_IS_DETACHED(tcp)); 1462404c9e6SPatrick Mooney ASSERT(tcp->tcp_state == TCPS_TIME_WAIT); 147721fffe3SKacheong Poon ASSERT(tcp->tcp_ptpahn == NULL); 148721fffe3SKacheong Poon ASSERT(tcp->tcp_flow_stopped == 0); 149721fffe3SKacheong Poon ASSERT(tcp->tcp_time_wait_next == NULL); 150721fffe3SKacheong Poon ASSERT(tcp->tcp_time_wait_prev == NULL); 15166cd0f60SKacheong Poon ASSERT(tcp->tcp_time_wait_expire == 0); 152721fffe3SKacheong Poon ASSERT(tcp->tcp_listener == NULL); 153721fffe3SKacheong Poon 1542404c9e6SPatrick Mooney TCP_DBGSTAT(tcps, tcp_time_wait); 1552404c9e6SPatrick Mooney mutex_enter(&tsp->tcp_time_wait_lock); 1562404c9e6SPatrick Mooney 157721fffe3SKacheong Poon /* 1582404c9e6SPatrick Mooney * Immediately expire loopback connections. Since there is no worry 1592404c9e6SPatrick Mooney * about packets on the local host showing up after a long network 1602404c9e6SPatrick Mooney * delay, this is safe and allows much higher rates of connection churn 1612404c9e6SPatrick Mooney * for applications operating locally. 16281b60dacSJerry Jelinek * 1632404c9e6SPatrick Mooney * This typically bypasses the tcp_free_list fast path due to squeue 1642404c9e6SPatrick Mooney * re-entry for the loopback close operation. 16581b60dacSJerry Jelinek */ 1662404c9e6SPatrick Mooney if (tcp->tcp_loopback) { 1672404c9e6SPatrick Mooney tcp_time_wait_purge(tcp, tsp); 1682404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock); 1692404c9e6SPatrick Mooney return; 17081b60dacSJerry Jelinek } 171721fffe3SKacheong Poon 1722404c9e6SPatrick Mooney /* 1732404c9e6SPatrick Mooney * In order to reap TIME_WAITs reliably, we should use a source of time 1742404c9e6SPatrick Mooney * that is not adjustable by the user. While it would be more accurate 1752404c9e6SPatrick Mooney * to grab this timestamp before (potentially) sleeping on the 1762404c9e6SPatrick Mooney * tcp_time_wait_lock, doing so complicates bucket addressing later. 1772404c9e6SPatrick Mooney */ 1782404c9e6SPatrick Mooney now = ddi_get_lbolt64(); 17966cd0f60SKacheong Poon 18066cd0f60SKacheong Poon /* 1812404c9e6SPatrick Mooney * Each squeue uses an arbitrary time offset when scheduling 1822404c9e6SPatrick Mooney * expiration timers. This prevents the bucketing from forcing 1832404c9e6SPatrick Mooney * tcp_time_wait_collector to run in locksetup across squeues. 1842404c9e6SPatrick Mooney * 1852404c9e6SPatrick Mooney * This offset is (re)initialized when a new TIME_WAIT connection is 1862404c9e6SPatrick Mooney * added to an squeue which has no connections waiting to expire. 18766cd0f60SKacheong Poon */ 1882404c9e6SPatrick Mooney if (tsp->tcp_time_wait_tid == 0) { 1892404c9e6SPatrick Mooney ASSERT(tsp->tcp_time_wait_cnt == 0); 1902404c9e6SPatrick Mooney tsp->tcp_time_wait_offset = 1912404c9e6SPatrick Mooney now % MSEC_TO_TICK(TCP_TIME_WAIT_DELAY); 1922404c9e6SPatrick Mooney } 1932404c9e6SPatrick Mooney now -= tsp->tcp_time_wait_offset; 19481b60dacSJerry Jelinek 1952404c9e6SPatrick Mooney /* 1962404c9e6SPatrick Mooney * Use the netstack-defined timeout, rounded up to the minimum 1972404c9e6SPatrick Mooney * time_wait_collector interval. 1982404c9e6SPatrick Mooney */ 1992404c9e6SPatrick Mooney schedule = now + MSEC_TO_TICK(tcps->tcps_time_wait_interval); 2002404c9e6SPatrick Mooney tcp->tcp_time_wait_expire = schedule; 2012404c9e6SPatrick Mooney 2022404c9e6SPatrick Mooney /* 2032404c9e6SPatrick Mooney * Append the connection into the appropriate bucket. 2042404c9e6SPatrick Mooney */ 2052404c9e6SPatrick Mooney bucket = TW_BUCKET(tcp->tcp_time_wait_expire); 2062404c9e6SPatrick Mooney tcp->tcp_time_wait_next = tsp->tcp_time_wait_bucket[bucket]; 2072404c9e6SPatrick Mooney tsp->tcp_time_wait_bucket[bucket] = tcp; 2082404c9e6SPatrick Mooney if (tcp->tcp_time_wait_next != NULL) { 2092404c9e6SPatrick Mooney ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == NULL); 2102404c9e6SPatrick Mooney tcp->tcp_time_wait_next->tcp_time_wait_prev = tcp; 2112404c9e6SPatrick Mooney } 2122404c9e6SPatrick Mooney tsp->tcp_time_wait_cnt++; 2132404c9e6SPatrick Mooney 2142404c9e6SPatrick Mooney /* 2152404c9e6SPatrick Mooney * Round delay up to the nearest bucket boundary. 2162404c9e6SPatrick Mooney */ 2172404c9e6SPatrick Mooney schedule += MSEC_TO_TICK(TCP_TIME_WAIT_DELAY); 2182404c9e6SPatrick Mooney schedule -= schedule % MSEC_TO_TICK(TCP_TIME_WAIT_DELAY); 2192404c9e6SPatrick Mooney 2202404c9e6SPatrick Mooney /* 2212404c9e6SPatrick Mooney * The newly inserted entry may require a tighter schedule for the 2222404c9e6SPatrick Mooney * expiration timer. 2232404c9e6SPatrick Mooney */ 2242404c9e6SPatrick Mooney if (schedule < tsp->tcp_time_wait_schedule) { 2252404c9e6SPatrick Mooney callout_id_t old_tid = tsp->tcp_time_wait_tid; 2262404c9e6SPatrick Mooney 2272404c9e6SPatrick Mooney tsp->tcp_time_wait_schedule = schedule; 2282404c9e6SPatrick Mooney tsp->tcp_time_wait_tid = 22966cd0f60SKacheong Poon timeout_generic(CALLOUT_NORMAL, 2302404c9e6SPatrick Mooney tcp_time_wait_collector, sqp, 2312404c9e6SPatrick Mooney TICK_TO_NSEC(schedule - now), 2322404c9e6SPatrick Mooney CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP); 2332404c9e6SPatrick Mooney 2342404c9e6SPatrick Mooney /* 2352404c9e6SPatrick Mooney * It is possible for the timer to fire before the untimeout 2362404c9e6SPatrick Mooney * action is able to complete. In that case, the exclusion 2372404c9e6SPatrick Mooney * offered by the tcp_time_wait_collector_active flag will 2382404c9e6SPatrick Mooney * prevent multiple collector threads from processing records 2392404c9e6SPatrick Mooney * simultaneously from the same squeue. 2402404c9e6SPatrick Mooney */ 2412404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock); 2422404c9e6SPatrick Mooney (void) untimeout_default(old_tid, 0); 2432404c9e6SPatrick Mooney return; 2442404c9e6SPatrick Mooney } 2452404c9e6SPatrick Mooney 2462404c9e6SPatrick Mooney /* 2472404c9e6SPatrick Mooney * Start a fresh timer if none exists. 2482404c9e6SPatrick Mooney */ 2492404c9e6SPatrick Mooney if (tsp->tcp_time_wait_schedule == 0) { 2502404c9e6SPatrick Mooney ASSERT(tsp->tcp_time_wait_tid == 0); 2512404c9e6SPatrick Mooney 2522404c9e6SPatrick Mooney tsp->tcp_time_wait_schedule = schedule; 2532404c9e6SPatrick Mooney tsp->tcp_time_wait_tid = 2542404c9e6SPatrick Mooney timeout_generic(CALLOUT_NORMAL, 2552404c9e6SPatrick Mooney tcp_time_wait_collector, sqp, 2562404c9e6SPatrick Mooney TICK_TO_NSEC(schedule - now), 25781b60dacSJerry Jelinek CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP); 25866cd0f60SKacheong Poon } 2592404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock); 260721fffe3SKacheong Poon } 261721fffe3SKacheong Poon 262721fffe3SKacheong Poon /* 263721fffe3SKacheong Poon * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT 264721fffe3SKacheong Poon * tcp_t. Used in tcp_time_wait_collector(). 265721fffe3SKacheong Poon */ 266721fffe3SKacheong Poon /* ARGSUSED */ 267721fffe3SKacheong Poon static void 268721fffe3SKacheong Poon tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy) 269721fffe3SKacheong Poon { 270721fffe3SKacheong Poon conn_t *connp = (conn_t *)arg; 271721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 272721fffe3SKacheong Poon 273721fffe3SKacheong Poon ASSERT(tcp != NULL); 274721fffe3SKacheong Poon if (tcp->tcp_state == TCPS_CLOSED) { 275721fffe3SKacheong Poon return; 276721fffe3SKacheong Poon } 277721fffe3SKacheong Poon 278721fffe3SKacheong Poon ASSERT((connp->conn_family == AF_INET && 279721fffe3SKacheong Poon connp->conn_ipversion == IPV4_VERSION) || 280721fffe3SKacheong Poon (connp->conn_family == AF_INET6 && 281721fffe3SKacheong Poon (connp->conn_ipversion == IPV4_VERSION || 282721fffe3SKacheong Poon connp->conn_ipversion == IPV6_VERSION))); 283721fffe3SKacheong Poon ASSERT(!tcp->tcp_listener); 284721fffe3SKacheong Poon 285721fffe3SKacheong Poon ASSERT(TCP_IS_DETACHED(tcp)); 286721fffe3SKacheong Poon 287721fffe3SKacheong Poon /* 288721fffe3SKacheong Poon * Because they have no upstream client to rebind or tcp_close() 289721fffe3SKacheong Poon * them later, we axe the connection here and now. 290721fffe3SKacheong Poon */ 291721fffe3SKacheong Poon tcp_close_detached(tcp); 292721fffe3SKacheong Poon } 293721fffe3SKacheong Poon 2942404c9e6SPatrick Mooney 2952404c9e6SPatrick Mooney static void 2962404c9e6SPatrick Mooney tcp_time_wait_purge(tcp_t *tcp, tcp_squeue_priv_t *tsp) 297721fffe3SKacheong Poon { 298721fffe3SKacheong Poon mblk_t *mp; 2992404c9e6SPatrick Mooney conn_t *connp = tcp->tcp_connp; 300721fffe3SKacheong Poon kmutex_t *lock; 301721fffe3SKacheong Poon 3022404c9e6SPatrick Mooney ASSERT(MUTEX_HELD(&tsp->tcp_time_wait_lock)); 303721fffe3SKacheong Poon ASSERT(connp->conn_fanout != NULL); 3042404c9e6SPatrick Mooney 305721fffe3SKacheong Poon lock = &connp->conn_fanout->connf_lock; 3062404c9e6SPatrick Mooney 307721fffe3SKacheong Poon /* 3082404c9e6SPatrick Mooney * This is essentially a TIME_WAIT reclaim fast path optimization for 3092404c9e6SPatrick Mooney * performance where the connection is checked under the fanout lock 3102404c9e6SPatrick Mooney * (so that no one else can get access to the conn_t) that the refcnt 3112404c9e6SPatrick Mooney * is 2 (one each for TCP and the classifier hash list). That is the 3122404c9e6SPatrick Mooney * case and clustering callbacks are not enabled, the conn can be 3132404c9e6SPatrick Mooney * removed under the fanout lock and avoid clean-up under the squeue. 314721fffe3SKacheong Poon * 3152404c9e6SPatrick Mooney * This optimization is forgone when clustering is enabled since the 3162404c9e6SPatrick Mooney * clustering callback must be made before setting the CONDEMNED flag 3172404c9e6SPatrick Mooney * and after dropping all locks 3182404c9e6SPatrick Mooney * 3192404c9e6SPatrick Mooney * See the comments in tcp_closei_local for additional information 3202404c9e6SPatrick Mooney * regarding the refcnt logic. 321721fffe3SKacheong Poon */ 322721fffe3SKacheong Poon if (mutex_tryenter(lock)) { 323721fffe3SKacheong Poon mutex_enter(&connp->conn_lock); 3242404c9e6SPatrick Mooney if (connp->conn_ref == 2 && cl_inet_disconnect == NULL) { 3252404c9e6SPatrick Mooney ipcl_hash_remove_locked(connp, connp->conn_fanout); 326721fffe3SKacheong Poon /* 3272404c9e6SPatrick Mooney * Set the CONDEMNED flag now itself so that the refcnt 3282404c9e6SPatrick Mooney * cannot increase due to any walker. 329721fffe3SKacheong Poon */ 330721fffe3SKacheong Poon connp->conn_state_flags |= CONN_CONDEMNED; 331721fffe3SKacheong Poon mutex_exit(&connp->conn_lock); 3322404c9e6SPatrick Mooney mutex_exit(lock); 3332404c9e6SPatrick Mooney if (tsp->tcp_free_list_cnt < tcp_free_list_max_cnt) { 3342404c9e6SPatrick Mooney /* 3352404c9e6SPatrick Mooney * Add to head of tcp_free_list 3362404c9e6SPatrick Mooney */ 337721fffe3SKacheong Poon tcp_cleanup(tcp); 338721fffe3SKacheong Poon ASSERT(connp->conn_latch == NULL); 339721fffe3SKacheong Poon ASSERT(connp->conn_policy == NULL); 340721fffe3SKacheong Poon ASSERT(tcp->tcp_tcps == NULL); 341721fffe3SKacheong Poon ASSERT(connp->conn_netstack == NULL); 342721fffe3SKacheong Poon 3432404c9e6SPatrick Mooney tcp->tcp_time_wait_next = tsp->tcp_free_list; 3442404c9e6SPatrick Mooney tcp->tcp_in_free_list = B_TRUE; 3452404c9e6SPatrick Mooney tsp->tcp_free_list = tcp; 3462404c9e6SPatrick Mooney tsp->tcp_free_list_cnt++; 347721fffe3SKacheong Poon } else { 3482404c9e6SPatrick Mooney /* 3492404c9e6SPatrick Mooney * Do not add to tcp_free_list 3502404c9e6SPatrick Mooney */ 351721fffe3SKacheong Poon tcp_bind_hash_remove(tcp); 352721fffe3SKacheong Poon ixa_cleanup(tcp->tcp_connp->conn_ixa); 353721fffe3SKacheong Poon tcp_ipsec_cleanup(tcp); 354721fffe3SKacheong Poon CONN_DEC_REF(tcp->tcp_connp); 355721fffe3SKacheong Poon } 3562404c9e6SPatrick Mooney 3572404c9e6SPatrick Mooney /* 3582404c9e6SPatrick Mooney * With the fast-path complete, we can bail. 3592404c9e6SPatrick Mooney */ 3602404c9e6SPatrick Mooney return; 361721fffe3SKacheong Poon } else { 3622404c9e6SPatrick Mooney /* 3632404c9e6SPatrick Mooney * Fall back to slow path. 3642404c9e6SPatrick Mooney */ 365721fffe3SKacheong Poon CONN_INC_REF_LOCKED(connp); 3662404c9e6SPatrick Mooney mutex_exit(&connp->conn_lock); 367721fffe3SKacheong Poon mutex_exit(lock); 368721fffe3SKacheong Poon } 369721fffe3SKacheong Poon } else { 3702404c9e6SPatrick Mooney CONN_INC_REF(connp); 3712404c9e6SPatrick Mooney } 372721fffe3SKacheong Poon 3732404c9e6SPatrick Mooney /* 3742404c9e6SPatrick Mooney * We can reuse the closemp here since conn has detached (otherwise we 3752404c9e6SPatrick Mooney * wouldn't even be in time_wait list). It is safe to change 3762404c9e6SPatrick Mooney * tcp_closemp_used without taking a lock as no other thread can 3772404c9e6SPatrick Mooney * concurrently access it at this point in the connection lifecycle. 3782404c9e6SPatrick Mooney */ 3792404c9e6SPatrick Mooney if (tcp->tcp_closemp.b_prev == NULL) { 380721fffe3SKacheong Poon tcp->tcp_closemp_used = B_TRUE; 3812404c9e6SPatrick Mooney } else { 3822404c9e6SPatrick Mooney cmn_err(CE_PANIC, 3832404c9e6SPatrick Mooney "tcp_timewait_collector: concurrent use of tcp_closemp: " 3842404c9e6SPatrick Mooney "connp %p tcp %p\n", (void *)connp, (void *)tcp); 3852404c9e6SPatrick Mooney } 386721fffe3SKacheong Poon 387721fffe3SKacheong Poon TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15); 388721fffe3SKacheong Poon mp = &tcp->tcp_closemp; 3892404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock); 3902404c9e6SPatrick Mooney SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timewait_close, connp, NULL, 391721fffe3SKacheong Poon SQ_FILL, SQTAG_TCP_TIMEWAIT); 3922404c9e6SPatrick Mooney mutex_enter(&tsp->tcp_time_wait_lock); 393721fffe3SKacheong Poon } 394721fffe3SKacheong Poon 39566cd0f60SKacheong Poon /* 3962404c9e6SPatrick Mooney * Purge any tcp_t instances associated with this squeue which have expired 3972404c9e6SPatrick Mooney * from the TIME_WAIT state. 39866cd0f60SKacheong Poon */ 3992404c9e6SPatrick Mooney void 4002404c9e6SPatrick Mooney tcp_time_wait_collector(void *arg) 4012404c9e6SPatrick Mooney { 4022404c9e6SPatrick Mooney tcp_t *tcp; 403*c79a72d7SPatrick Mooney int64_t now, sched_active, sched_cur, sched_new; 4042404c9e6SPatrick Mooney unsigned int idx; 40566cd0f60SKacheong Poon 4062404c9e6SPatrick Mooney squeue_t *sqp = (squeue_t *)arg; 4072404c9e6SPatrick Mooney tcp_squeue_priv_t *tsp = 4082404c9e6SPatrick Mooney *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); 40981b60dacSJerry Jelinek 4102404c9e6SPatrick Mooney mutex_enter(&tsp->tcp_time_wait_lock); 4112404c9e6SPatrick Mooney 4122404c9e6SPatrick Mooney /* 4132404c9e6SPatrick Mooney * Because of timer scheduling complexity and the fact that the 4142404c9e6SPatrick Mooney * tcp_time_wait_lock is dropped during tcp_time_wait_purge, it is 4152404c9e6SPatrick Mooney * possible for multiple tcp_time_wait_collector threads to run against 4162404c9e6SPatrick Mooney * the same squeue. This flag is used to exclude other collectors from 4172404c9e6SPatrick Mooney * the squeue during execution. 4182404c9e6SPatrick Mooney */ 4192404c9e6SPatrick Mooney if (tsp->tcp_time_wait_collector_active) { 4202404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock); 4212404c9e6SPatrick Mooney return; 42266cd0f60SKacheong Poon } 4232404c9e6SPatrick Mooney tsp->tcp_time_wait_collector_active = B_TRUE; 4242404c9e6SPatrick Mooney 4252404c9e6SPatrick Mooney /* 426*c79a72d7SPatrick Mooney * After its assignment here, the value of sched_active must not be 427*c79a72d7SPatrick Mooney * altered as it is used to validate the state of the 428*c79a72d7SPatrick Mooney * tcp_time_wait_collector callout schedule for this squeue. 429*c79a72d7SPatrick Mooney * 430*c79a72d7SPatrick Mooney * The same does not hold true of sched_cur, which holds the timestamp 431*c79a72d7SPatrick Mooney * of the bucket undergoing processing. While it is initially equal to 432*c79a72d7SPatrick Mooney * sched_active, certain conditions below can walk it forward, 433*c79a72d7SPatrick Mooney * triggering the retry loop. 434*c79a72d7SPatrick Mooney */ 435*c79a72d7SPatrick Mooney sched_cur = sched_active = tsp->tcp_time_wait_schedule; 436*c79a72d7SPatrick Mooney 437*c79a72d7SPatrick Mooney /* 4382404c9e6SPatrick Mooney * Purge the free list if necessary 4392404c9e6SPatrick Mooney */ 4402404c9e6SPatrick Mooney if (tsp->tcp_free_list != NULL) { 4412404c9e6SPatrick Mooney TCP_G_STAT(tcp_freelist_cleanup); 4422404c9e6SPatrick Mooney while ((tcp = tsp->tcp_free_list) != NULL) { 4432404c9e6SPatrick Mooney tsp->tcp_free_list = tcp->tcp_time_wait_next; 4442404c9e6SPatrick Mooney tcp->tcp_time_wait_next = NULL; 4452404c9e6SPatrick Mooney tsp->tcp_free_list_cnt--; 4462404c9e6SPatrick Mooney ASSERT(tcp->tcp_tcps == NULL); 4472404c9e6SPatrick Mooney CONN_DEC_REF(tcp->tcp_connp); 4482404c9e6SPatrick Mooney } 4492404c9e6SPatrick Mooney ASSERT(tsp->tcp_free_list_cnt == 0); 4502404c9e6SPatrick Mooney } 4512404c9e6SPatrick Mooney 4522404c9e6SPatrick Mooney /* 4532404c9e6SPatrick Mooney * If there are no connections pending, clear timer-related state to be 4542404c9e6SPatrick Mooney * reinitialized by the next caller. 4552404c9e6SPatrick Mooney */ 4562404c9e6SPatrick Mooney if (tsp->tcp_time_wait_cnt == 0) { 4572404c9e6SPatrick Mooney tsp->tcp_time_wait_offset = 0; 4582404c9e6SPatrick Mooney tsp->tcp_time_wait_schedule = 0; 4592404c9e6SPatrick Mooney tsp->tcp_time_wait_tid = 0; 4602404c9e6SPatrick Mooney tsp->tcp_time_wait_collector_active = B_FALSE; 4612404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock); 4622404c9e6SPatrick Mooney return; 4632404c9e6SPatrick Mooney } 4642404c9e6SPatrick Mooney 465*c79a72d7SPatrick Mooney retry: 4662404c9e6SPatrick Mooney /* 4672404c9e6SPatrick Mooney * Grab the bucket which we were scheduled to cleanse. 4682404c9e6SPatrick Mooney */ 469*c79a72d7SPatrick Mooney idx = TW_BUCKET(sched_cur - 1); 4702404c9e6SPatrick Mooney now = ddi_get_lbolt64() - tsp->tcp_time_wait_offset; 4712404c9e6SPatrick Mooney tcp = tsp->tcp_time_wait_bucket[idx]; 4722404c9e6SPatrick Mooney 4732404c9e6SPatrick Mooney while (tcp != NULL) { 4742404c9e6SPatrick Mooney /* 4752404c9e6SPatrick Mooney * Since the bucket count is sized to prevent wrap-around 4762404c9e6SPatrick Mooney * during typical operation and timers are schedule to process 4772404c9e6SPatrick Mooney * buckets with only expired connections, there is only one 4782404c9e6SPatrick Mooney * reason to encounter a connection expiring in the future: 4792404c9e6SPatrick Mooney * The tcp_time_wait_collector thread has been so delayed in 4802404c9e6SPatrick Mooney * its processing that connections have wrapped around the 4812404c9e6SPatrick Mooney * timing wheel into this bucket. 4822404c9e6SPatrick Mooney * 4832404c9e6SPatrick Mooney * In that case, the remaining entires in the bucket can be 4842404c9e6SPatrick Mooney * ignored since, being appended sequentially, they should all 4852404c9e6SPatrick Mooney * expire in the future. 4862404c9e6SPatrick Mooney */ 4872404c9e6SPatrick Mooney if (now < tcp->tcp_time_wait_expire) { 4882404c9e6SPatrick Mooney break; 4892404c9e6SPatrick Mooney } 4902404c9e6SPatrick Mooney 4912404c9e6SPatrick Mooney /* 4922404c9e6SPatrick Mooney * Pull the connection out of the bucket. 4932404c9e6SPatrick Mooney */ 4942404c9e6SPatrick Mooney VERIFY(tcp_time_wait_remove(tcp, tsp)); 4952404c9e6SPatrick Mooney 4962404c9e6SPatrick Mooney /* 4972404c9e6SPatrick Mooney * Purge the connection. 4982404c9e6SPatrick Mooney * 4992404c9e6SPatrick Mooney * While tcp_time_wait_lock will be temporarily dropped as part 5002404c9e6SPatrick Mooney * of the process, there is no risk of the timer being 5012404c9e6SPatrick Mooney * (re)scheduled while the collector is running since a value 5022404c9e6SPatrick Mooney * corresponding to the past is left in tcp_time_wait_schedule. 5032404c9e6SPatrick Mooney */ 5042404c9e6SPatrick Mooney tcp_time_wait_purge(tcp, tsp); 5052404c9e6SPatrick Mooney 5062404c9e6SPatrick Mooney /* 5072404c9e6SPatrick Mooney * Because tcp_time_wait_remove clears the tcp_time_wait_next 5082404c9e6SPatrick Mooney * field, the next item must be grabbed directly from the 5092404c9e6SPatrick Mooney * bucket itself. 5102404c9e6SPatrick Mooney */ 5112404c9e6SPatrick Mooney tcp = tsp->tcp_time_wait_bucket[idx]; 5122404c9e6SPatrick Mooney } 5132404c9e6SPatrick Mooney 5142404c9e6SPatrick Mooney if (tsp->tcp_time_wait_cnt == 0) { 5152404c9e6SPatrick Mooney /* 5162404c9e6SPatrick Mooney * There is not a need for the collector to schedule a new 5172404c9e6SPatrick Mooney * timer if no pending items remain. The timer state can be 5182404c9e6SPatrick Mooney * cleared only if it was untouched while the collector dropped 5192404c9e6SPatrick Mooney * its locks during tcp_time_wait_purge. 5202404c9e6SPatrick Mooney */ 521*c79a72d7SPatrick Mooney if (tsp->tcp_time_wait_schedule == sched_active) { 5222404c9e6SPatrick Mooney tsp->tcp_time_wait_offset = 0; 5232404c9e6SPatrick Mooney tsp->tcp_time_wait_schedule = 0; 5242404c9e6SPatrick Mooney tsp->tcp_time_wait_tid = 0; 5252404c9e6SPatrick Mooney } 5262404c9e6SPatrick Mooney tsp->tcp_time_wait_collector_active = B_FALSE; 5272404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock); 5282404c9e6SPatrick Mooney return; 5292404c9e6SPatrick Mooney } else { 5302404c9e6SPatrick Mooney unsigned int nidx; 5312404c9e6SPatrick Mooney 5322404c9e6SPatrick Mooney /* 5332404c9e6SPatrick Mooney * Locate the next bucket containing entries. 5342404c9e6SPatrick Mooney */ 535*c79a72d7SPatrick Mooney sched_new = sched_cur + MSEC_TO_TICK(TCP_TIME_WAIT_DELAY); 5362404c9e6SPatrick Mooney nidx = TW_BUCKET_NEXT(idx); 5372404c9e6SPatrick Mooney while (tsp->tcp_time_wait_bucket[nidx] == NULL) { 5382404c9e6SPatrick Mooney if (nidx == idx) { 5392404c9e6SPatrick Mooney break; 5402404c9e6SPatrick Mooney } 5412404c9e6SPatrick Mooney nidx = TW_BUCKET_NEXT(nidx); 542*c79a72d7SPatrick Mooney sched_new += MSEC_TO_TICK(TCP_TIME_WAIT_DELAY); 5432404c9e6SPatrick Mooney } 5442404c9e6SPatrick Mooney ASSERT(tsp->tcp_time_wait_bucket[nidx] != NULL); 5452404c9e6SPatrick Mooney } 5462404c9e6SPatrick Mooney 5472404c9e6SPatrick Mooney /* 5482404c9e6SPatrick Mooney * It is possible that the system is under such dire load that between 5492404c9e6SPatrick Mooney * the timer scheduling and TIME_WAIT processing delay, execution 5502404c9e6SPatrick Mooney * overran the interval allocated to this bucket. 5512404c9e6SPatrick Mooney */ 5522404c9e6SPatrick Mooney now = ddi_get_lbolt64() - tsp->tcp_time_wait_offset; 553*c79a72d7SPatrick Mooney if (sched_new <= now) { 5542404c9e6SPatrick Mooney /* 5552404c9e6SPatrick Mooney * Attempt to right the situation by immediately performing a 5562404c9e6SPatrick Mooney * purge on the next bucket. This loop will continue as needed 5572404c9e6SPatrick Mooney * until the schedule can be pushed out ahead of the clock. 5582404c9e6SPatrick Mooney */ 559*c79a72d7SPatrick Mooney sched_cur = sched_new; 560*c79a72d7SPatrick Mooney DTRACE_PROBE3(tcp__time__wait__overrun, 561*c79a72d7SPatrick Mooney tcp_squeue_priv_t *, tsp, int64_t, sched_new, int64_t, now); 5622404c9e6SPatrick Mooney goto retry; 5632404c9e6SPatrick Mooney } 5642404c9e6SPatrick Mooney 5652404c9e6SPatrick Mooney /* 5662404c9e6SPatrick Mooney * Another thread may have snuck in to reschedule the timer while locks 5672404c9e6SPatrick Mooney * were dropped during tcp_time_wait_purge. Defer to the running timer 5682404c9e6SPatrick Mooney * if that is the case. 5692404c9e6SPatrick Mooney */ 570*c79a72d7SPatrick Mooney if (tsp->tcp_time_wait_schedule != sched_active) { 5712404c9e6SPatrick Mooney tsp->tcp_time_wait_collector_active = B_FALSE; 5722404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock); 5732404c9e6SPatrick Mooney return; 5742404c9e6SPatrick Mooney } 5752404c9e6SPatrick Mooney 5762404c9e6SPatrick Mooney /* 5772404c9e6SPatrick Mooney * Schedule the next timer. 5782404c9e6SPatrick Mooney */ 579*c79a72d7SPatrick Mooney tsp->tcp_time_wait_schedule = sched_new; 5802404c9e6SPatrick Mooney tsp->tcp_time_wait_tid = 5812404c9e6SPatrick Mooney timeout_generic(CALLOUT_NORMAL, 5822404c9e6SPatrick Mooney tcp_time_wait_collector, sqp, 583*c79a72d7SPatrick Mooney TICK_TO_NSEC(sched_new - now), 5842404c9e6SPatrick Mooney CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP); 5852404c9e6SPatrick Mooney tsp->tcp_time_wait_collector_active = B_FALSE; 5862404c9e6SPatrick Mooney mutex_exit(&tsp->tcp_time_wait_lock); 587721fffe3SKacheong Poon } 588721fffe3SKacheong Poon 589721fffe3SKacheong Poon /* 590721fffe3SKacheong Poon * tcp_time_wait_processing() handles processing of incoming packets when 591721fffe3SKacheong Poon * the tcp_t is in the TIME_WAIT state. 592721fffe3SKacheong Poon * 593721fffe3SKacheong Poon * A TIME_WAIT tcp_t that has an associated open TCP end point (not in 594721fffe3SKacheong Poon * detached state) is never put on the time wait list. 595721fffe3SKacheong Poon */ 596721fffe3SKacheong Poon void 597721fffe3SKacheong Poon tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, 598721fffe3SKacheong Poon uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira) 599721fffe3SKacheong Poon { 600721fffe3SKacheong Poon int32_t bytes_acked; 601721fffe3SKacheong Poon int32_t gap; 602721fffe3SKacheong Poon int32_t rgap; 603721fffe3SKacheong Poon tcp_opt_t tcpopt; 604721fffe3SKacheong Poon uint_t flags; 605721fffe3SKacheong Poon uint32_t new_swnd = 0; 606721fffe3SKacheong Poon conn_t *nconnp; 607721fffe3SKacheong Poon conn_t *connp = tcp->tcp_connp; 608721fffe3SKacheong Poon tcp_stack_t *tcps = tcp->tcp_tcps; 609721fffe3SKacheong Poon 610721fffe3SKacheong Poon BUMP_LOCAL(tcp->tcp_ibsegs); 611721fffe3SKacheong Poon DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp); 612721fffe3SKacheong Poon 613721fffe3SKacheong Poon flags = (unsigned int)tcpha->tha_flags & 0xFF; 614721fffe3SKacheong Poon new_swnd = ntohs(tcpha->tha_win) << 615721fffe3SKacheong Poon ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws); 6161f183ba0SLauri Tirkkonen 6171f183ba0SLauri Tirkkonen if (tcp->tcp_snd_ts_ok && !(tcpha->tha_flags & TH_RST)) { 6181f183ba0SLauri Tirkkonen int options; 6191f183ba0SLauri Tirkkonen if (tcp->tcp_snd_sack_ok) 6201f183ba0SLauri Tirkkonen tcpopt.tcp = tcp; 6211f183ba0SLauri Tirkkonen else 6221f183ba0SLauri Tirkkonen tcpopt.tcp = NULL; 6231f183ba0SLauri Tirkkonen options = tcp_parse_options(tcpha, &tcpopt); 6241f183ba0SLauri Tirkkonen if (!(options & TCP_OPT_TSTAMP_PRESENT)) { 6251f183ba0SLauri Tirkkonen DTRACE_TCP1(droppedtimestamp, tcp_t *, tcp); 6261f183ba0SLauri Tirkkonen goto done; 6271f183ba0SLauri Tirkkonen } else if (!tcp_paws_check(tcp, &tcpopt)) { 6281f183ba0SLauri Tirkkonen tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, 6291f183ba0SLauri Tirkkonen TH_ACK); 630721fffe3SKacheong Poon goto done; 631721fffe3SKacheong Poon } 632721fffe3SKacheong Poon } 633721fffe3SKacheong Poon gap = seg_seq - tcp->tcp_rnxt; 634721fffe3SKacheong Poon rgap = tcp->tcp_rwnd - (gap + seg_len); 635721fffe3SKacheong Poon if (gap < 0) { 636721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpInDataDupSegs); 637721fffe3SKacheong Poon TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes, 638721fffe3SKacheong Poon (seg_len > -gap ? -gap : seg_len)); 639721fffe3SKacheong Poon seg_len += gap; 640721fffe3SKacheong Poon if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) { 641721fffe3SKacheong Poon if (flags & TH_RST) { 642721fffe3SKacheong Poon goto done; 643721fffe3SKacheong Poon } 644721fffe3SKacheong Poon if ((flags & TH_FIN) && seg_len == -1) { 645721fffe3SKacheong Poon /* 646721fffe3SKacheong Poon * When TCP receives a duplicate FIN in 647721fffe3SKacheong Poon * TIME_WAIT state, restart the 2 MSL timer. 648721fffe3SKacheong Poon * See page 73 in RFC 793. Make sure this TCP 649721fffe3SKacheong Poon * is already on the TIME_WAIT list. If not, 650721fffe3SKacheong Poon * just restart the timer. 651721fffe3SKacheong Poon */ 652721fffe3SKacheong Poon if (TCP_IS_DETACHED(tcp)) { 653721fffe3SKacheong Poon if (tcp_time_wait_remove(tcp, NULL) == 654721fffe3SKacheong Poon B_TRUE) { 655721fffe3SKacheong Poon tcp_time_wait_append(tcp); 656721fffe3SKacheong Poon TCP_DBGSTAT(tcps, 657721fffe3SKacheong Poon tcp_rput_time_wait); 658721fffe3SKacheong Poon } 659721fffe3SKacheong Poon } else { 660721fffe3SKacheong Poon ASSERT(tcp != NULL); 661721fffe3SKacheong Poon TCP_TIMER_RESTART(tcp, 662721fffe3SKacheong Poon tcps->tcps_time_wait_interval); 663721fffe3SKacheong Poon } 664721fffe3SKacheong Poon tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, 665721fffe3SKacheong Poon tcp->tcp_rnxt, TH_ACK); 666721fffe3SKacheong Poon goto done; 667721fffe3SKacheong Poon } 668721fffe3SKacheong Poon flags |= TH_ACK_NEEDED; 669721fffe3SKacheong Poon seg_len = 0; 670721fffe3SKacheong Poon goto process_ack; 671721fffe3SKacheong Poon } 672721fffe3SKacheong Poon 673721fffe3SKacheong Poon /* Fix seg_seq, and chew the gap off the front. */ 674721fffe3SKacheong Poon seg_seq = tcp->tcp_rnxt; 675721fffe3SKacheong Poon } 676721fffe3SKacheong Poon 677721fffe3SKacheong Poon if ((flags & TH_SYN) && gap > 0 && rgap < 0) { 678721fffe3SKacheong Poon /* 679721fffe3SKacheong Poon * Make sure that when we accept the connection, pick 680c0e6663fSJerry Jelinek * an ISS greater than (tcp_snxt + tcp_iss_incr/2) for the 681721fffe3SKacheong Poon * old connection. 682721fffe3SKacheong Poon * 683721fffe3SKacheong Poon * The next ISS generated is equal to tcp_iss_incr_extra 684c0e6663fSJerry Jelinek * + tcp_iss_incr/2 + other components depending on the 685721fffe3SKacheong Poon * value of tcp_strong_iss. We pre-calculate the new 686721fffe3SKacheong Poon * ISS here and compare with tcp_snxt to determine if 687721fffe3SKacheong Poon * we need to make adjustment to tcp_iss_incr_extra. 688721fffe3SKacheong Poon * 689721fffe3SKacheong Poon * The above calculation is ugly and is a 690721fffe3SKacheong Poon * waste of CPU cycles... 691721fffe3SKacheong Poon */ 692721fffe3SKacheong Poon uint32_t new_iss = tcps->tcps_iss_incr_extra; 693721fffe3SKacheong Poon int32_t adj; 694721fffe3SKacheong Poon ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; 695721fffe3SKacheong Poon 696721fffe3SKacheong Poon switch (tcps->tcps_strong_iss) { 697721fffe3SKacheong Poon case 2: { 698721fffe3SKacheong Poon /* Add time and MD5 components. */ 699721fffe3SKacheong Poon uint32_t answer[4]; 700721fffe3SKacheong Poon struct { 701721fffe3SKacheong Poon uint32_t ports; 702721fffe3SKacheong Poon in6_addr_t src; 703721fffe3SKacheong Poon in6_addr_t dst; 704721fffe3SKacheong Poon } arg; 705721fffe3SKacheong Poon MD5_CTX context; 706721fffe3SKacheong Poon 707721fffe3SKacheong Poon mutex_enter(&tcps->tcps_iss_key_lock); 708721fffe3SKacheong Poon context = tcps->tcps_iss_key; 709721fffe3SKacheong Poon mutex_exit(&tcps->tcps_iss_key_lock); 710721fffe3SKacheong Poon arg.ports = connp->conn_ports; 711721fffe3SKacheong Poon /* We use MAPPED addresses in tcp_iss_init */ 712721fffe3SKacheong Poon arg.src = connp->conn_laddr_v6; 713721fffe3SKacheong Poon arg.dst = connp->conn_faddr_v6; 714721fffe3SKacheong Poon MD5Update(&context, (uchar_t *)&arg, 715721fffe3SKacheong Poon sizeof (arg)); 716721fffe3SKacheong Poon MD5Final((uchar_t *)answer, &context); 717721fffe3SKacheong Poon answer[0] ^= answer[1] ^ answer[2] ^ answer[3]; 718721fffe3SKacheong Poon new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0]; 719721fffe3SKacheong Poon break; 720721fffe3SKacheong Poon } 721721fffe3SKacheong Poon case 1: 722721fffe3SKacheong Poon /* Add time component and min random (i.e. 1). */ 723721fffe3SKacheong Poon new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1; 724721fffe3SKacheong Poon break; 725721fffe3SKacheong Poon default: 726721fffe3SKacheong Poon /* Add only time component. */ 727c0e6663fSJerry Jelinek new_iss += (uint32_t)gethrestime_sec() * 728c0e6663fSJerry Jelinek tcps->tcps_iss_incr; 729721fffe3SKacheong Poon break; 730721fffe3SKacheong Poon } 731721fffe3SKacheong Poon if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) { 732721fffe3SKacheong Poon /* 733c0e6663fSJerry Jelinek * New ISS not guaranteed to be tcp_iss_incr/2 734721fffe3SKacheong Poon * ahead of the current tcp_snxt, so add the 735721fffe3SKacheong Poon * difference to tcp_iss_incr_extra. 736721fffe3SKacheong Poon */ 737721fffe3SKacheong Poon tcps->tcps_iss_incr_extra += adj; 738721fffe3SKacheong Poon } 739721fffe3SKacheong Poon /* 740721fffe3SKacheong Poon * If tcp_clean_death() can not perform the task now, 741721fffe3SKacheong Poon * drop the SYN packet and let the other side re-xmit. 742721fffe3SKacheong Poon * Otherwise pass the SYN packet back in, since the 743721fffe3SKacheong Poon * old tcp state has been cleaned up or freed. 744721fffe3SKacheong Poon */ 745721fffe3SKacheong Poon if (tcp_clean_death(tcp, 0) == -1) 746721fffe3SKacheong Poon goto done; 747721fffe3SKacheong Poon nconnp = ipcl_classify(mp, ira, ipst); 748721fffe3SKacheong Poon if (nconnp != NULL) { 749721fffe3SKacheong Poon TCP_STAT(tcps, tcp_time_wait_syn_success); 750721fffe3SKacheong Poon /* Drops ref on nconnp */ 751721fffe3SKacheong Poon tcp_reinput(nconnp, mp, ira, ipst); 752721fffe3SKacheong Poon return; 753721fffe3SKacheong Poon } 754721fffe3SKacheong Poon goto done; 755721fffe3SKacheong Poon } 756721fffe3SKacheong Poon 757721fffe3SKacheong Poon /* 758721fffe3SKacheong Poon * rgap is the amount of stuff received out of window. A negative 759721fffe3SKacheong Poon * value is the amount out of window. 760721fffe3SKacheong Poon */ 761721fffe3SKacheong Poon if (rgap < 0) { 762721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs); 763721fffe3SKacheong Poon TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap); 764721fffe3SKacheong Poon /* Fix seg_len and make sure there is something left. */ 765721fffe3SKacheong Poon seg_len += rgap; 766721fffe3SKacheong Poon if (seg_len <= 0) { 767721fffe3SKacheong Poon if (flags & TH_RST) { 768721fffe3SKacheong Poon goto done; 769721fffe3SKacheong Poon } 770721fffe3SKacheong Poon flags |= TH_ACK_NEEDED; 771721fffe3SKacheong Poon seg_len = 0; 772721fffe3SKacheong Poon goto process_ack; 773721fffe3SKacheong Poon } 774721fffe3SKacheong Poon } 775721fffe3SKacheong Poon /* 7761f183ba0SLauri Tirkkonen * Check whether we can update tcp_ts_recent. This test is from RFC 7771f183ba0SLauri Tirkkonen * 7323, section 5.3. 778721fffe3SKacheong Poon */ 7791f183ba0SLauri Tirkkonen if (tcp->tcp_snd_ts_ok && !(flags & TH_RST) && 780721fffe3SKacheong Poon TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) && 781721fffe3SKacheong Poon SEQ_LEQ(seg_seq, tcp->tcp_rack)) { 782721fffe3SKacheong Poon tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val; 783721fffe3SKacheong Poon tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64(); 784721fffe3SKacheong Poon } 785721fffe3SKacheong Poon 786721fffe3SKacheong Poon if (seg_seq != tcp->tcp_rnxt && seg_len > 0) { 787721fffe3SKacheong Poon /* Always ack out of order packets */ 788721fffe3SKacheong Poon flags |= TH_ACK_NEEDED; 789721fffe3SKacheong Poon seg_len = 0; 790721fffe3SKacheong Poon } else if (seg_len > 0) { 791721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpInClosed); 792721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs); 793721fffe3SKacheong Poon TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len); 794721fffe3SKacheong Poon } 795721fffe3SKacheong Poon if (flags & TH_RST) { 796721fffe3SKacheong Poon (void) tcp_clean_death(tcp, 0); 797721fffe3SKacheong Poon goto done; 798721fffe3SKacheong Poon } 799721fffe3SKacheong Poon if (flags & TH_SYN) { 800721fffe3SKacheong Poon tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1, 801721fffe3SKacheong Poon TH_RST|TH_ACK); 802721fffe3SKacheong Poon /* 803721fffe3SKacheong Poon * Do not delete the TCP structure if it is in 804721fffe3SKacheong Poon * TIME_WAIT state. Refer to RFC 1122, 4.2.2.13. 805721fffe3SKacheong Poon */ 806721fffe3SKacheong Poon goto done; 807721fffe3SKacheong Poon } 808721fffe3SKacheong Poon process_ack: 809721fffe3SKacheong Poon if (flags & TH_ACK) { 810721fffe3SKacheong Poon bytes_acked = (int)(seg_ack - tcp->tcp_suna); 811721fffe3SKacheong Poon if (bytes_acked <= 0) { 812721fffe3SKacheong Poon if (bytes_acked == 0 && seg_len == 0 && 813721fffe3SKacheong Poon new_swnd == tcp->tcp_swnd) 814721fffe3SKacheong Poon TCPS_BUMP_MIB(tcps, tcpInDupAck); 815721fffe3SKacheong Poon } else { 816721fffe3SKacheong Poon /* Acks something not sent */ 817721fffe3SKacheong Poon flags |= TH_ACK_NEEDED; 818721fffe3SKacheong Poon } 819721fffe3SKacheong Poon } 820721fffe3SKacheong Poon if (flags & TH_ACK_NEEDED) { 821721fffe3SKacheong Poon /* 822721fffe3SKacheong Poon * Time to send an ack for some reason. 823721fffe3SKacheong Poon */ 824721fffe3SKacheong Poon tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, 825721fffe3SKacheong Poon tcp->tcp_rnxt, TH_ACK); 826721fffe3SKacheong Poon } 827721fffe3SKacheong Poon done: 828721fffe3SKacheong Poon freemsg(mp); 829721fffe3SKacheong Poon } 830