xref: /illumos-gate/usr/src/uts/common/inet/tcp/tcp_time_wait.c (revision c79a72d7e59c9ff493d8dde8b462cca0445f51c8)
1721fffe3SKacheong Poon /*
2721fffe3SKacheong Poon  * CDDL HEADER START
3721fffe3SKacheong Poon  *
4721fffe3SKacheong Poon  * The contents of this file are subject to the terms of the
5721fffe3SKacheong Poon  * Common Development and Distribution License (the "License").
6721fffe3SKacheong Poon  * You may not use this file except in compliance with the License.
7721fffe3SKacheong Poon  *
8721fffe3SKacheong Poon  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9721fffe3SKacheong Poon  * or http://www.opensolaris.org/os/licensing.
10721fffe3SKacheong Poon  * See the License for the specific language governing permissions
11721fffe3SKacheong Poon  * and limitations under the License.
12721fffe3SKacheong Poon  *
13721fffe3SKacheong Poon  * When distributing Covered Code, include this CDDL HEADER in each
14721fffe3SKacheong Poon  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15721fffe3SKacheong Poon  * If applicable, add the following below this CDDL HEADER, with the
16721fffe3SKacheong Poon  * fields enclosed by brackets "[]" replaced with your own identifying
17721fffe3SKacheong Poon  * information: Portions Copyright [yyyy] [name of copyright owner]
18721fffe3SKacheong Poon  *
19721fffe3SKacheong Poon  * CDDL HEADER END
20721fffe3SKacheong Poon  */
21721fffe3SKacheong Poon 
22721fffe3SKacheong Poon /*
2366cd0f60SKacheong Poon  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
242404c9e6SPatrick Mooney  * Copyright 2016 Joyent, Inc.
25721fffe3SKacheong Poon  */
26721fffe3SKacheong Poon 
27721fffe3SKacheong Poon /*
28721fffe3SKacheong Poon  * This file contains functions related to TCP time wait processing.  Also
29721fffe3SKacheong Poon  * refer to the time wait handling comments in tcp_impl.h.
30721fffe3SKacheong Poon  */
31721fffe3SKacheong Poon 
32721fffe3SKacheong Poon #include <sys/types.h>
33721fffe3SKacheong Poon #include <sys/strsun.h>
34721fffe3SKacheong Poon #include <sys/squeue_impl.h>
35721fffe3SKacheong Poon #include <sys/squeue.h>
36721fffe3SKacheong Poon #include <sys/callo.h>
37721fffe3SKacheong Poon 
38721fffe3SKacheong Poon #include <inet/common.h>
39721fffe3SKacheong Poon #include <inet/ip.h>
40721fffe3SKacheong Poon #include <inet/tcp.h>
41721fffe3SKacheong Poon #include <inet/tcp_impl.h>
42721fffe3SKacheong Poon #include <inet/tcp_cluster.h>
43721fffe3SKacheong Poon 
442404c9e6SPatrick Mooney static void tcp_time_wait_purge(tcp_t *, tcp_squeue_priv_t *);
45721fffe3SKacheong Poon 
462404c9e6SPatrick Mooney #define	TW_BUCKET(t)					\
472404c9e6SPatrick Mooney 	(((t) / MSEC_TO_TICK(TCP_TIME_WAIT_DELAY)) % TCP_TIME_WAIT_BUCKETS)
482404c9e6SPatrick Mooney 
492404c9e6SPatrick Mooney #define	TW_BUCKET_NEXT(b)	(((b) + 1) % TCP_TIME_WAIT_BUCKETS)
502404c9e6SPatrick Mooney 
51721fffe3SKacheong Poon 
52721fffe3SKacheong Poon /*
53721fffe3SKacheong Poon  * Remove a connection from the list of detached TIME_WAIT connections.
54721fffe3SKacheong Poon  * It returns B_FALSE if it can't remove the connection from the list
55721fffe3SKacheong Poon  * as the connection has already been removed from the list due to an
56721fffe3SKacheong Poon  * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
57721fffe3SKacheong Poon  */
58721fffe3SKacheong Poon boolean_t
592404c9e6SPatrick Mooney tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tsp)
60721fffe3SKacheong Poon {
61721fffe3SKacheong Poon 	boolean_t	locked = B_FALSE;
62721fffe3SKacheong Poon 
632404c9e6SPatrick Mooney 	if (tsp == NULL) {
642404c9e6SPatrick Mooney 		tsp = *((tcp_squeue_priv_t **)
65721fffe3SKacheong Poon 		    squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP));
662404c9e6SPatrick Mooney 		mutex_enter(&tsp->tcp_time_wait_lock);
67721fffe3SKacheong Poon 		locked = B_TRUE;
68721fffe3SKacheong Poon 	} else {
692404c9e6SPatrick Mooney 		ASSERT(MUTEX_HELD(&tsp->tcp_time_wait_lock));
70721fffe3SKacheong Poon 	}
71721fffe3SKacheong Poon 
72721fffe3SKacheong Poon 	/* 0 means that the tcp_t has not been added to the time wait list. */
73721fffe3SKacheong Poon 	if (tcp->tcp_time_wait_expire == 0) {
74721fffe3SKacheong Poon 		ASSERT(tcp->tcp_time_wait_next == NULL);
75721fffe3SKacheong Poon 		ASSERT(tcp->tcp_time_wait_prev == NULL);
76721fffe3SKacheong Poon 		if (locked)
772404c9e6SPatrick Mooney 			mutex_exit(&tsp->tcp_time_wait_lock);
78721fffe3SKacheong Poon 		return (B_FALSE);
79721fffe3SKacheong Poon 	}
80721fffe3SKacheong Poon 	ASSERT(TCP_IS_DETACHED(tcp));
81721fffe3SKacheong Poon 	ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
822404c9e6SPatrick Mooney 	ASSERT(tsp->tcp_time_wait_cnt > 0);
83721fffe3SKacheong Poon 
842404c9e6SPatrick Mooney 	if (tcp->tcp_time_wait_next != NULL) {
85721fffe3SKacheong Poon 		tcp->tcp_time_wait_next->tcp_time_wait_prev =
86721fffe3SKacheong Poon 		    tcp->tcp_time_wait_prev;
87721fffe3SKacheong Poon 	}
882404c9e6SPatrick Mooney 	if (tcp->tcp_time_wait_prev != NULL) {
892404c9e6SPatrick Mooney 		tcp->tcp_time_wait_prev->tcp_time_wait_next =
902404c9e6SPatrick Mooney 		    tcp->tcp_time_wait_next;
912404c9e6SPatrick Mooney 	} else {
922404c9e6SPatrick Mooney 		unsigned int bucket;
932404c9e6SPatrick Mooney 
942404c9e6SPatrick Mooney 		bucket = TW_BUCKET(tcp->tcp_time_wait_expire);
952404c9e6SPatrick Mooney 		ASSERT(tsp->tcp_time_wait_bucket[bucket] == tcp);
962404c9e6SPatrick Mooney 		tsp->tcp_time_wait_bucket[bucket] = tcp->tcp_time_wait_next;
972404c9e6SPatrick Mooney 	}
98721fffe3SKacheong Poon 	tcp->tcp_time_wait_next = NULL;
99721fffe3SKacheong Poon 	tcp->tcp_time_wait_prev = NULL;
100721fffe3SKacheong Poon 	tcp->tcp_time_wait_expire = 0;
1012404c9e6SPatrick Mooney 	tsp->tcp_time_wait_cnt--;
102721fffe3SKacheong Poon 
103721fffe3SKacheong Poon 	if (locked)
1042404c9e6SPatrick Mooney 		mutex_exit(&tsp->tcp_time_wait_lock);
105721fffe3SKacheong Poon 	return (B_TRUE);
106721fffe3SKacheong Poon }
107721fffe3SKacheong Poon 
10881b60dacSJerry Jelinek /* Constants used for fast checking of a localhost address */
10981b60dacSJerry Jelinek #if defined(_BIG_ENDIAN)
11081b60dacSJerry Jelinek #define	IPv4_LOCALHOST	0x7f000000U
11181b60dacSJerry Jelinek #define	IPv4_LH_MASK	0xffffff00U
11281b60dacSJerry Jelinek #else
11381b60dacSJerry Jelinek #define	IPv4_LOCALHOST	0x0000007fU
11481b60dacSJerry Jelinek #define	IPv4_LH_MASK	0x00ffffffU
11581b60dacSJerry Jelinek #endif
11681b60dacSJerry Jelinek 
11781b60dacSJerry Jelinek #define	IS_LOCAL_HOST(x)	( \
11881b60dacSJerry Jelinek 	((x)->tcp_connp->conn_ipversion == IPV4_VERSION && \
11981b60dacSJerry Jelinek 	((x)->tcp_connp->conn_laddr_v4 & IPv4_LH_MASK) == IPv4_LOCALHOST) || \
12081b60dacSJerry Jelinek 	((x)->tcp_connp->conn_ipversion == IPV6_VERSION && \
12181b60dacSJerry Jelinek 	IN6_IS_ADDR_LOOPBACK(&(x)->tcp_connp->conn_laddr_v6)))
12281b60dacSJerry Jelinek 
1232404c9e6SPatrick Mooney 
124721fffe3SKacheong Poon /*
125721fffe3SKacheong Poon  * Add a connection to the list of detached TIME_WAIT connections
126721fffe3SKacheong Poon  * and set its time to expire.
127721fffe3SKacheong Poon  */
128721fffe3SKacheong Poon void
129721fffe3SKacheong Poon tcp_time_wait_append(tcp_t *tcp)
130721fffe3SKacheong Poon {
131721fffe3SKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
13266cd0f60SKacheong Poon 	squeue_t	*sqp = tcp->tcp_connp->conn_sqp;
1332404c9e6SPatrick Mooney 	tcp_squeue_priv_t *tsp =
13466cd0f60SKacheong Poon 	    *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
1352404c9e6SPatrick Mooney 	int64_t		now, schedule;
1362404c9e6SPatrick Mooney 	unsigned int	bucket;
137721fffe3SKacheong Poon 
138721fffe3SKacheong Poon 	tcp_timers_stop(tcp);
139721fffe3SKacheong Poon 
140721fffe3SKacheong Poon 	/* Freed above */
141721fffe3SKacheong Poon 	ASSERT(tcp->tcp_timer_tid == 0);
142721fffe3SKacheong Poon 	ASSERT(tcp->tcp_ack_tid == 0);
143721fffe3SKacheong Poon 
144721fffe3SKacheong Poon 	/* must have happened at the time of detaching the tcp */
1452404c9e6SPatrick Mooney 	ASSERT(TCP_IS_DETACHED(tcp));
1462404c9e6SPatrick Mooney 	ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
147721fffe3SKacheong Poon 	ASSERT(tcp->tcp_ptpahn == NULL);
148721fffe3SKacheong Poon 	ASSERT(tcp->tcp_flow_stopped == 0);
149721fffe3SKacheong Poon 	ASSERT(tcp->tcp_time_wait_next == NULL);
150721fffe3SKacheong Poon 	ASSERT(tcp->tcp_time_wait_prev == NULL);
15166cd0f60SKacheong Poon 	ASSERT(tcp->tcp_time_wait_expire == 0);
152721fffe3SKacheong Poon 	ASSERT(tcp->tcp_listener == NULL);
153721fffe3SKacheong Poon 
1542404c9e6SPatrick Mooney 	TCP_DBGSTAT(tcps, tcp_time_wait);
1552404c9e6SPatrick Mooney 	mutex_enter(&tsp->tcp_time_wait_lock);
1562404c9e6SPatrick Mooney 
157721fffe3SKacheong Poon 	/*
1582404c9e6SPatrick Mooney 	 * Immediately expire loopback connections.  Since there is no worry
1592404c9e6SPatrick Mooney 	 * about packets on the local host showing up after a long network
1602404c9e6SPatrick Mooney 	 * delay, this is safe and allows much higher rates of connection churn
1612404c9e6SPatrick Mooney 	 * for applications operating locally.
16281b60dacSJerry Jelinek 	 *
1632404c9e6SPatrick Mooney 	 * This typically bypasses the tcp_free_list fast path due to squeue
1642404c9e6SPatrick Mooney 	 * re-entry for the loopback close operation.
16581b60dacSJerry Jelinek 	 */
1662404c9e6SPatrick Mooney 	if (tcp->tcp_loopback) {
1672404c9e6SPatrick Mooney 		tcp_time_wait_purge(tcp, tsp);
1682404c9e6SPatrick Mooney 		mutex_exit(&tsp->tcp_time_wait_lock);
1692404c9e6SPatrick Mooney 		return;
17081b60dacSJerry Jelinek 	}
171721fffe3SKacheong Poon 
1722404c9e6SPatrick Mooney 	/*
1732404c9e6SPatrick Mooney 	 * In order to reap TIME_WAITs reliably, we should use a source of time
1742404c9e6SPatrick Mooney 	 * that is not adjustable by the user.  While it would be more accurate
1752404c9e6SPatrick Mooney 	 * to grab this timestamp before (potentially) sleeping on the
1762404c9e6SPatrick Mooney 	 * tcp_time_wait_lock, doing so complicates bucket addressing later.
1772404c9e6SPatrick Mooney 	 */
1782404c9e6SPatrick Mooney 	now = ddi_get_lbolt64();
17966cd0f60SKacheong Poon 
18066cd0f60SKacheong Poon 	/*
1812404c9e6SPatrick Mooney 	 * Each squeue uses an arbitrary time offset when scheduling
1822404c9e6SPatrick Mooney 	 * expiration timers.  This prevents the bucketing from forcing
1832404c9e6SPatrick Mooney 	 * tcp_time_wait_collector to run in locksetup across squeues.
1842404c9e6SPatrick Mooney 	 *
1852404c9e6SPatrick Mooney 	 * This offset is (re)initialized when a new TIME_WAIT connection is
1862404c9e6SPatrick Mooney 	 * added to an squeue which has no connections waiting to expire.
18766cd0f60SKacheong Poon 	 */
1882404c9e6SPatrick Mooney 	if (tsp->tcp_time_wait_tid == 0) {
1892404c9e6SPatrick Mooney 		ASSERT(tsp->tcp_time_wait_cnt == 0);
1902404c9e6SPatrick Mooney 		tsp->tcp_time_wait_offset =
1912404c9e6SPatrick Mooney 		    now % MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
1922404c9e6SPatrick Mooney 	}
1932404c9e6SPatrick Mooney 	now -= tsp->tcp_time_wait_offset;
19481b60dacSJerry Jelinek 
1952404c9e6SPatrick Mooney 	/*
1962404c9e6SPatrick Mooney 	 * Use the netstack-defined timeout, rounded up to the minimum
1972404c9e6SPatrick Mooney 	 * time_wait_collector interval.
1982404c9e6SPatrick Mooney 	 */
1992404c9e6SPatrick Mooney 	schedule = now + MSEC_TO_TICK(tcps->tcps_time_wait_interval);
2002404c9e6SPatrick Mooney 	tcp->tcp_time_wait_expire = schedule;
2012404c9e6SPatrick Mooney 
2022404c9e6SPatrick Mooney 	/*
2032404c9e6SPatrick Mooney 	 * Append the connection into the appropriate bucket.
2042404c9e6SPatrick Mooney 	 */
2052404c9e6SPatrick Mooney 	bucket = TW_BUCKET(tcp->tcp_time_wait_expire);
2062404c9e6SPatrick Mooney 	tcp->tcp_time_wait_next = tsp->tcp_time_wait_bucket[bucket];
2072404c9e6SPatrick Mooney 	tsp->tcp_time_wait_bucket[bucket] = tcp;
2082404c9e6SPatrick Mooney 	if (tcp->tcp_time_wait_next != NULL) {
2092404c9e6SPatrick Mooney 		ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == NULL);
2102404c9e6SPatrick Mooney 		tcp->tcp_time_wait_next->tcp_time_wait_prev = tcp;
2112404c9e6SPatrick Mooney 	}
2122404c9e6SPatrick Mooney 	tsp->tcp_time_wait_cnt++;
2132404c9e6SPatrick Mooney 
2142404c9e6SPatrick Mooney 	/*
2152404c9e6SPatrick Mooney 	 * Round delay up to the nearest bucket boundary.
2162404c9e6SPatrick Mooney 	 */
2172404c9e6SPatrick Mooney 	schedule += MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
2182404c9e6SPatrick Mooney 	schedule -= schedule % MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
2192404c9e6SPatrick Mooney 
2202404c9e6SPatrick Mooney 	/*
2212404c9e6SPatrick Mooney 	 * The newly inserted entry may require a tighter schedule for the
2222404c9e6SPatrick Mooney 	 * expiration timer.
2232404c9e6SPatrick Mooney 	 */
2242404c9e6SPatrick Mooney 	if (schedule < tsp->tcp_time_wait_schedule) {
2252404c9e6SPatrick Mooney 		callout_id_t old_tid = tsp->tcp_time_wait_tid;
2262404c9e6SPatrick Mooney 
2272404c9e6SPatrick Mooney 		tsp->tcp_time_wait_schedule = schedule;
2282404c9e6SPatrick Mooney 		tsp->tcp_time_wait_tid =
22966cd0f60SKacheong Poon 		    timeout_generic(CALLOUT_NORMAL,
2302404c9e6SPatrick Mooney 		    tcp_time_wait_collector, sqp,
2312404c9e6SPatrick Mooney 		    TICK_TO_NSEC(schedule - now),
2322404c9e6SPatrick Mooney 		    CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP);
2332404c9e6SPatrick Mooney 
2342404c9e6SPatrick Mooney 		/*
2352404c9e6SPatrick Mooney 		 * It is possible for the timer to fire before the untimeout
2362404c9e6SPatrick Mooney 		 * action is able to complete.  In that case, the exclusion
2372404c9e6SPatrick Mooney 		 * offered by the tcp_time_wait_collector_active flag will
2382404c9e6SPatrick Mooney 		 * prevent multiple collector threads from processing records
2392404c9e6SPatrick Mooney 		 * simultaneously from the same squeue.
2402404c9e6SPatrick Mooney 		 */
2412404c9e6SPatrick Mooney 		mutex_exit(&tsp->tcp_time_wait_lock);
2422404c9e6SPatrick Mooney 		(void) untimeout_default(old_tid, 0);
2432404c9e6SPatrick Mooney 		return;
2442404c9e6SPatrick Mooney 	}
2452404c9e6SPatrick Mooney 
2462404c9e6SPatrick Mooney 	/*
2472404c9e6SPatrick Mooney 	 * Start a fresh timer if none exists.
2482404c9e6SPatrick Mooney 	 */
2492404c9e6SPatrick Mooney 	if (tsp->tcp_time_wait_schedule == 0) {
2502404c9e6SPatrick Mooney 		ASSERT(tsp->tcp_time_wait_tid == 0);
2512404c9e6SPatrick Mooney 
2522404c9e6SPatrick Mooney 		tsp->tcp_time_wait_schedule = schedule;
2532404c9e6SPatrick Mooney 		tsp->tcp_time_wait_tid =
2542404c9e6SPatrick Mooney 		    timeout_generic(CALLOUT_NORMAL,
2552404c9e6SPatrick Mooney 		    tcp_time_wait_collector, sqp,
2562404c9e6SPatrick Mooney 		    TICK_TO_NSEC(schedule - now),
25781b60dacSJerry Jelinek 		    CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP);
25866cd0f60SKacheong Poon 	}
2592404c9e6SPatrick Mooney 	mutex_exit(&tsp->tcp_time_wait_lock);
260721fffe3SKacheong Poon }
261721fffe3SKacheong Poon 
262721fffe3SKacheong Poon /*
263721fffe3SKacheong Poon  * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
264721fffe3SKacheong Poon  * tcp_t.  Used in tcp_time_wait_collector().
265721fffe3SKacheong Poon  */
266721fffe3SKacheong Poon /* ARGSUSED */
267721fffe3SKacheong Poon static void
268721fffe3SKacheong Poon tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
269721fffe3SKacheong Poon {
270721fffe3SKacheong Poon 	conn_t	*connp = (conn_t *)arg;
271721fffe3SKacheong Poon 	tcp_t	*tcp = connp->conn_tcp;
272721fffe3SKacheong Poon 
273721fffe3SKacheong Poon 	ASSERT(tcp != NULL);
274721fffe3SKacheong Poon 	if (tcp->tcp_state == TCPS_CLOSED) {
275721fffe3SKacheong Poon 		return;
276721fffe3SKacheong Poon 	}
277721fffe3SKacheong Poon 
278721fffe3SKacheong Poon 	ASSERT((connp->conn_family == AF_INET &&
279721fffe3SKacheong Poon 	    connp->conn_ipversion == IPV4_VERSION) ||
280721fffe3SKacheong Poon 	    (connp->conn_family == AF_INET6 &&
281721fffe3SKacheong Poon 	    (connp->conn_ipversion == IPV4_VERSION ||
282721fffe3SKacheong Poon 	    connp->conn_ipversion == IPV6_VERSION)));
283721fffe3SKacheong Poon 	ASSERT(!tcp->tcp_listener);
284721fffe3SKacheong Poon 
285721fffe3SKacheong Poon 	ASSERT(TCP_IS_DETACHED(tcp));
286721fffe3SKacheong Poon 
287721fffe3SKacheong Poon 	/*
288721fffe3SKacheong Poon 	 * Because they have no upstream client to rebind or tcp_close()
289721fffe3SKacheong Poon 	 * them later, we axe the connection here and now.
290721fffe3SKacheong Poon 	 */
291721fffe3SKacheong Poon 	tcp_close_detached(tcp);
292721fffe3SKacheong Poon }
293721fffe3SKacheong Poon 
2942404c9e6SPatrick Mooney 
2952404c9e6SPatrick Mooney static void
2962404c9e6SPatrick Mooney tcp_time_wait_purge(tcp_t *tcp, tcp_squeue_priv_t *tsp)
297721fffe3SKacheong Poon {
298721fffe3SKacheong Poon 	mblk_t *mp;
2992404c9e6SPatrick Mooney 	conn_t *connp = tcp->tcp_connp;
300721fffe3SKacheong Poon 	kmutex_t *lock;
301721fffe3SKacheong Poon 
3022404c9e6SPatrick Mooney 	ASSERT(MUTEX_HELD(&tsp->tcp_time_wait_lock));
303721fffe3SKacheong Poon 	ASSERT(connp->conn_fanout != NULL);
3042404c9e6SPatrick Mooney 
305721fffe3SKacheong Poon 	lock = &connp->conn_fanout->connf_lock;
3062404c9e6SPatrick Mooney 
307721fffe3SKacheong Poon 	/*
3082404c9e6SPatrick Mooney 	 * This is essentially a TIME_WAIT reclaim fast path optimization for
3092404c9e6SPatrick Mooney 	 * performance where the connection is checked under the fanout lock
3102404c9e6SPatrick Mooney 	 * (so that no one else can get access to the conn_t) that the refcnt
3112404c9e6SPatrick Mooney 	 * is 2 (one each for TCP and the classifier hash list).  That is the
3122404c9e6SPatrick Mooney 	 * case and clustering callbacks are not enabled, the conn can be
3132404c9e6SPatrick Mooney 	 * removed under the fanout lock and avoid clean-up under the squeue.
314721fffe3SKacheong Poon 	 *
3152404c9e6SPatrick Mooney 	 * This optimization is forgone when clustering is enabled since the
3162404c9e6SPatrick Mooney 	 * clustering callback must be made before setting the CONDEMNED flag
3172404c9e6SPatrick Mooney 	 * and after dropping all locks
3182404c9e6SPatrick Mooney 	 *
3192404c9e6SPatrick Mooney 	 * See the comments in tcp_closei_local for additional information
3202404c9e6SPatrick Mooney 	 * regarding the refcnt logic.
321721fffe3SKacheong Poon 	 */
322721fffe3SKacheong Poon 	if (mutex_tryenter(lock)) {
323721fffe3SKacheong Poon 		mutex_enter(&connp->conn_lock);
3242404c9e6SPatrick Mooney 		if (connp->conn_ref == 2 && cl_inet_disconnect == NULL) {
3252404c9e6SPatrick Mooney 			ipcl_hash_remove_locked(connp, connp->conn_fanout);
326721fffe3SKacheong Poon 			/*
3272404c9e6SPatrick Mooney 			 * Set the CONDEMNED flag now itself so that the refcnt
3282404c9e6SPatrick Mooney 			 * cannot increase due to any walker.
329721fffe3SKacheong Poon 			 */
330721fffe3SKacheong Poon 			connp->conn_state_flags |= CONN_CONDEMNED;
331721fffe3SKacheong Poon 			mutex_exit(&connp->conn_lock);
3322404c9e6SPatrick Mooney 			mutex_exit(lock);
3332404c9e6SPatrick Mooney 			if (tsp->tcp_free_list_cnt < tcp_free_list_max_cnt) {
3342404c9e6SPatrick Mooney 				/*
3352404c9e6SPatrick Mooney 				 * Add to head of tcp_free_list
3362404c9e6SPatrick Mooney 				 */
337721fffe3SKacheong Poon 				tcp_cleanup(tcp);
338721fffe3SKacheong Poon 				ASSERT(connp->conn_latch == NULL);
339721fffe3SKacheong Poon 				ASSERT(connp->conn_policy == NULL);
340721fffe3SKacheong Poon 				ASSERT(tcp->tcp_tcps == NULL);
341721fffe3SKacheong Poon 				ASSERT(connp->conn_netstack == NULL);
342721fffe3SKacheong Poon 
3432404c9e6SPatrick Mooney 				tcp->tcp_time_wait_next = tsp->tcp_free_list;
3442404c9e6SPatrick Mooney 				tcp->tcp_in_free_list = B_TRUE;
3452404c9e6SPatrick Mooney 				tsp->tcp_free_list = tcp;
3462404c9e6SPatrick Mooney 				tsp->tcp_free_list_cnt++;
347721fffe3SKacheong Poon 			} else {
3482404c9e6SPatrick Mooney 				/*
3492404c9e6SPatrick Mooney 				 * Do not add to tcp_free_list
3502404c9e6SPatrick Mooney 				 */
351721fffe3SKacheong Poon 				tcp_bind_hash_remove(tcp);
352721fffe3SKacheong Poon 				ixa_cleanup(tcp->tcp_connp->conn_ixa);
353721fffe3SKacheong Poon 				tcp_ipsec_cleanup(tcp);
354721fffe3SKacheong Poon 				CONN_DEC_REF(tcp->tcp_connp);
355721fffe3SKacheong Poon 			}
3562404c9e6SPatrick Mooney 
3572404c9e6SPatrick Mooney 			/*
3582404c9e6SPatrick Mooney 			 * With the fast-path complete, we can bail.
3592404c9e6SPatrick Mooney 			 */
3602404c9e6SPatrick Mooney 			return;
361721fffe3SKacheong Poon 		} else {
3622404c9e6SPatrick Mooney 			/*
3632404c9e6SPatrick Mooney 			 * Fall back to slow path.
3642404c9e6SPatrick Mooney 			 */
365721fffe3SKacheong Poon 			CONN_INC_REF_LOCKED(connp);
3662404c9e6SPatrick Mooney 			mutex_exit(&connp->conn_lock);
367721fffe3SKacheong Poon 			mutex_exit(lock);
368721fffe3SKacheong Poon 		}
369721fffe3SKacheong Poon 	} else {
3702404c9e6SPatrick Mooney 		CONN_INC_REF(connp);
3712404c9e6SPatrick Mooney 	}
372721fffe3SKacheong Poon 
3732404c9e6SPatrick Mooney 	/*
3742404c9e6SPatrick Mooney 	 * We can reuse the closemp here since conn has detached (otherwise we
3752404c9e6SPatrick Mooney 	 * wouldn't even be in time_wait list). It is safe to change
3762404c9e6SPatrick Mooney 	 * tcp_closemp_used without taking a lock as no other thread can
3772404c9e6SPatrick Mooney 	 * concurrently access it at this point in the connection lifecycle.
3782404c9e6SPatrick Mooney 	 */
3792404c9e6SPatrick Mooney 	if (tcp->tcp_closemp.b_prev == NULL) {
380721fffe3SKacheong Poon 		tcp->tcp_closemp_used = B_TRUE;
3812404c9e6SPatrick Mooney 	} else {
3822404c9e6SPatrick Mooney 		cmn_err(CE_PANIC,
3832404c9e6SPatrick Mooney 		    "tcp_timewait_collector: concurrent use of tcp_closemp: "
3842404c9e6SPatrick Mooney 		    "connp %p tcp %p\n", (void *)connp, (void *)tcp);
3852404c9e6SPatrick Mooney 	}
386721fffe3SKacheong Poon 
387721fffe3SKacheong Poon 	TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
388721fffe3SKacheong Poon 	mp = &tcp->tcp_closemp;
3892404c9e6SPatrick Mooney 	mutex_exit(&tsp->tcp_time_wait_lock);
3902404c9e6SPatrick Mooney 	SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timewait_close, connp, NULL,
391721fffe3SKacheong Poon 	    SQ_FILL, SQTAG_TCP_TIMEWAIT);
3922404c9e6SPatrick Mooney 	mutex_enter(&tsp->tcp_time_wait_lock);
393721fffe3SKacheong Poon }
394721fffe3SKacheong Poon 
39566cd0f60SKacheong Poon /*
3962404c9e6SPatrick Mooney  * Purge any tcp_t instances associated with this squeue which have expired
3972404c9e6SPatrick Mooney  * from the TIME_WAIT state.
39866cd0f60SKacheong Poon  */
3992404c9e6SPatrick Mooney void
4002404c9e6SPatrick Mooney tcp_time_wait_collector(void *arg)
4012404c9e6SPatrick Mooney {
4022404c9e6SPatrick Mooney 	tcp_t *tcp;
403*c79a72d7SPatrick Mooney 	int64_t now, sched_active, sched_cur, sched_new;
4042404c9e6SPatrick Mooney 	unsigned int idx;
40566cd0f60SKacheong Poon 
4062404c9e6SPatrick Mooney 	squeue_t *sqp = (squeue_t *)arg;
4072404c9e6SPatrick Mooney 	tcp_squeue_priv_t *tsp =
4082404c9e6SPatrick Mooney 	    *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
40981b60dacSJerry Jelinek 
4102404c9e6SPatrick Mooney 	mutex_enter(&tsp->tcp_time_wait_lock);
4112404c9e6SPatrick Mooney 
4122404c9e6SPatrick Mooney 	/*
4132404c9e6SPatrick Mooney 	 * Because of timer scheduling complexity and the fact that the
4142404c9e6SPatrick Mooney 	 * tcp_time_wait_lock is dropped during tcp_time_wait_purge, it is
4152404c9e6SPatrick Mooney 	 * possible for multiple tcp_time_wait_collector threads to run against
4162404c9e6SPatrick Mooney 	 * the same squeue.  This flag is used to exclude other collectors from
4172404c9e6SPatrick Mooney 	 * the squeue during execution.
4182404c9e6SPatrick Mooney 	 */
4192404c9e6SPatrick Mooney 	if (tsp->tcp_time_wait_collector_active) {
4202404c9e6SPatrick Mooney 		mutex_exit(&tsp->tcp_time_wait_lock);
4212404c9e6SPatrick Mooney 		return;
42266cd0f60SKacheong Poon 	}
4232404c9e6SPatrick Mooney 	tsp->tcp_time_wait_collector_active = B_TRUE;
4242404c9e6SPatrick Mooney 
4252404c9e6SPatrick Mooney 	/*
426*c79a72d7SPatrick Mooney 	 * After its assignment here, the value of sched_active must not be
427*c79a72d7SPatrick Mooney 	 * altered as it is used to validate the state of the
428*c79a72d7SPatrick Mooney 	 * tcp_time_wait_collector callout schedule for this squeue.
429*c79a72d7SPatrick Mooney 	 *
430*c79a72d7SPatrick Mooney 	 * The same does not hold true of sched_cur, which holds the timestamp
431*c79a72d7SPatrick Mooney 	 * of the bucket undergoing processing.  While it is initially equal to
432*c79a72d7SPatrick Mooney 	 * sched_active, certain conditions below can walk it forward,
433*c79a72d7SPatrick Mooney 	 * triggering the retry loop.
434*c79a72d7SPatrick Mooney 	 */
435*c79a72d7SPatrick Mooney 	sched_cur = sched_active = tsp->tcp_time_wait_schedule;
436*c79a72d7SPatrick Mooney 
437*c79a72d7SPatrick Mooney 	/*
4382404c9e6SPatrick Mooney 	 * Purge the free list if necessary
4392404c9e6SPatrick Mooney 	 */
4402404c9e6SPatrick Mooney 	if (tsp->tcp_free_list != NULL) {
4412404c9e6SPatrick Mooney 		TCP_G_STAT(tcp_freelist_cleanup);
4422404c9e6SPatrick Mooney 		while ((tcp = tsp->tcp_free_list) != NULL) {
4432404c9e6SPatrick Mooney 			tsp->tcp_free_list = tcp->tcp_time_wait_next;
4442404c9e6SPatrick Mooney 			tcp->tcp_time_wait_next = NULL;
4452404c9e6SPatrick Mooney 			tsp->tcp_free_list_cnt--;
4462404c9e6SPatrick Mooney 			ASSERT(tcp->tcp_tcps == NULL);
4472404c9e6SPatrick Mooney 			CONN_DEC_REF(tcp->tcp_connp);
4482404c9e6SPatrick Mooney 		}
4492404c9e6SPatrick Mooney 		ASSERT(tsp->tcp_free_list_cnt == 0);
4502404c9e6SPatrick Mooney 	}
4512404c9e6SPatrick Mooney 
4522404c9e6SPatrick Mooney 	/*
4532404c9e6SPatrick Mooney 	 * If there are no connections pending, clear timer-related state to be
4542404c9e6SPatrick Mooney 	 * reinitialized by the next caller.
4552404c9e6SPatrick Mooney 	 */
4562404c9e6SPatrick Mooney 	if (tsp->tcp_time_wait_cnt == 0) {
4572404c9e6SPatrick Mooney 		tsp->tcp_time_wait_offset = 0;
4582404c9e6SPatrick Mooney 		tsp->tcp_time_wait_schedule = 0;
4592404c9e6SPatrick Mooney 		tsp->tcp_time_wait_tid = 0;
4602404c9e6SPatrick Mooney 		tsp->tcp_time_wait_collector_active = B_FALSE;
4612404c9e6SPatrick Mooney 		mutex_exit(&tsp->tcp_time_wait_lock);
4622404c9e6SPatrick Mooney 		return;
4632404c9e6SPatrick Mooney 	}
4642404c9e6SPatrick Mooney 
465*c79a72d7SPatrick Mooney retry:
4662404c9e6SPatrick Mooney 	/*
4672404c9e6SPatrick Mooney 	 * Grab the bucket which we were scheduled to cleanse.
4682404c9e6SPatrick Mooney 	 */
469*c79a72d7SPatrick Mooney 	idx = TW_BUCKET(sched_cur - 1);
4702404c9e6SPatrick Mooney 	now = ddi_get_lbolt64() - tsp->tcp_time_wait_offset;
4712404c9e6SPatrick Mooney 	tcp = tsp->tcp_time_wait_bucket[idx];
4722404c9e6SPatrick Mooney 
4732404c9e6SPatrick Mooney 	while (tcp != NULL) {
4742404c9e6SPatrick Mooney 		/*
4752404c9e6SPatrick Mooney 		 * Since the bucket count is sized to prevent wrap-around
4762404c9e6SPatrick Mooney 		 * during typical operation and timers are schedule to process
4772404c9e6SPatrick Mooney 		 * buckets with only expired connections, there is only one
4782404c9e6SPatrick Mooney 		 * reason to encounter a connection expiring in the future:
4792404c9e6SPatrick Mooney 		 * The tcp_time_wait_collector thread has been so delayed in
4802404c9e6SPatrick Mooney 		 * its processing that connections have wrapped around the
4812404c9e6SPatrick Mooney 		 * timing wheel into this bucket.
4822404c9e6SPatrick Mooney 		 *
4832404c9e6SPatrick Mooney 		 * In that case, the remaining entires in the bucket can be
4842404c9e6SPatrick Mooney 		 * ignored since, being appended sequentially, they should all
4852404c9e6SPatrick Mooney 		 * expire in the future.
4862404c9e6SPatrick Mooney 		 */
4872404c9e6SPatrick Mooney 		if (now < tcp->tcp_time_wait_expire) {
4882404c9e6SPatrick Mooney 			break;
4892404c9e6SPatrick Mooney 		}
4902404c9e6SPatrick Mooney 
4912404c9e6SPatrick Mooney 		/*
4922404c9e6SPatrick Mooney 		 * Pull the connection out of the bucket.
4932404c9e6SPatrick Mooney 		 */
4942404c9e6SPatrick Mooney 		VERIFY(tcp_time_wait_remove(tcp, tsp));
4952404c9e6SPatrick Mooney 
4962404c9e6SPatrick Mooney 		/*
4972404c9e6SPatrick Mooney 		 * Purge the connection.
4982404c9e6SPatrick Mooney 		 *
4992404c9e6SPatrick Mooney 		 * While tcp_time_wait_lock will be temporarily dropped as part
5002404c9e6SPatrick Mooney 		 * of the process, there is no risk of the timer being
5012404c9e6SPatrick Mooney 		 * (re)scheduled while the collector is running since a value
5022404c9e6SPatrick Mooney 		 * corresponding to the past is left in tcp_time_wait_schedule.
5032404c9e6SPatrick Mooney 		 */
5042404c9e6SPatrick Mooney 		tcp_time_wait_purge(tcp, tsp);
5052404c9e6SPatrick Mooney 
5062404c9e6SPatrick Mooney 		/*
5072404c9e6SPatrick Mooney 		 * Because tcp_time_wait_remove clears the tcp_time_wait_next
5082404c9e6SPatrick Mooney 		 * field, the next item must be grabbed directly from the
5092404c9e6SPatrick Mooney 		 * bucket itself.
5102404c9e6SPatrick Mooney 		 */
5112404c9e6SPatrick Mooney 		tcp = tsp->tcp_time_wait_bucket[idx];
5122404c9e6SPatrick Mooney 	}
5132404c9e6SPatrick Mooney 
5142404c9e6SPatrick Mooney 	if (tsp->tcp_time_wait_cnt == 0) {
5152404c9e6SPatrick Mooney 		/*
5162404c9e6SPatrick Mooney 		 * There is not a need for the collector to schedule a new
5172404c9e6SPatrick Mooney 		 * timer if no pending items remain.  The timer state can be
5182404c9e6SPatrick Mooney 		 * cleared only if it was untouched while the collector dropped
5192404c9e6SPatrick Mooney 		 * its locks during tcp_time_wait_purge.
5202404c9e6SPatrick Mooney 		 */
521*c79a72d7SPatrick Mooney 		if (tsp->tcp_time_wait_schedule == sched_active) {
5222404c9e6SPatrick Mooney 			tsp->tcp_time_wait_offset = 0;
5232404c9e6SPatrick Mooney 			tsp->tcp_time_wait_schedule = 0;
5242404c9e6SPatrick Mooney 			tsp->tcp_time_wait_tid = 0;
5252404c9e6SPatrick Mooney 		}
5262404c9e6SPatrick Mooney 		tsp->tcp_time_wait_collector_active = B_FALSE;
5272404c9e6SPatrick Mooney 		mutex_exit(&tsp->tcp_time_wait_lock);
5282404c9e6SPatrick Mooney 		return;
5292404c9e6SPatrick Mooney 	} else {
5302404c9e6SPatrick Mooney 		unsigned int nidx;
5312404c9e6SPatrick Mooney 
5322404c9e6SPatrick Mooney 		/*
5332404c9e6SPatrick Mooney 		 * Locate the next bucket containing entries.
5342404c9e6SPatrick Mooney 		 */
535*c79a72d7SPatrick Mooney 		sched_new = sched_cur + MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
5362404c9e6SPatrick Mooney 		nidx = TW_BUCKET_NEXT(idx);
5372404c9e6SPatrick Mooney 		while (tsp->tcp_time_wait_bucket[nidx] == NULL) {
5382404c9e6SPatrick Mooney 			if (nidx == idx) {
5392404c9e6SPatrick Mooney 				break;
5402404c9e6SPatrick Mooney 			}
5412404c9e6SPatrick Mooney 			nidx = TW_BUCKET_NEXT(nidx);
542*c79a72d7SPatrick Mooney 			sched_new += MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
5432404c9e6SPatrick Mooney 		}
5442404c9e6SPatrick Mooney 		ASSERT(tsp->tcp_time_wait_bucket[nidx] != NULL);
5452404c9e6SPatrick Mooney 	}
5462404c9e6SPatrick Mooney 
5472404c9e6SPatrick Mooney 	/*
5482404c9e6SPatrick Mooney 	 * It is possible that the system is under such dire load that between
5492404c9e6SPatrick Mooney 	 * the timer scheduling and TIME_WAIT processing delay, execution
5502404c9e6SPatrick Mooney 	 * overran the interval allocated to this bucket.
5512404c9e6SPatrick Mooney 	 */
5522404c9e6SPatrick Mooney 	now = ddi_get_lbolt64() - tsp->tcp_time_wait_offset;
553*c79a72d7SPatrick Mooney 	if (sched_new <= now) {
5542404c9e6SPatrick Mooney 		/*
5552404c9e6SPatrick Mooney 		 * Attempt to right the situation by immediately performing a
5562404c9e6SPatrick Mooney 		 * purge on the next bucket.  This loop will continue as needed
5572404c9e6SPatrick Mooney 		 * until the schedule can be pushed out ahead of the clock.
5582404c9e6SPatrick Mooney 		 */
559*c79a72d7SPatrick Mooney 		sched_cur = sched_new;
560*c79a72d7SPatrick Mooney 		DTRACE_PROBE3(tcp__time__wait__overrun,
561*c79a72d7SPatrick Mooney 		    tcp_squeue_priv_t *, tsp, int64_t, sched_new, int64_t, now);
5622404c9e6SPatrick Mooney 		goto retry;
5632404c9e6SPatrick Mooney 	}
5642404c9e6SPatrick Mooney 
5652404c9e6SPatrick Mooney 	/*
5662404c9e6SPatrick Mooney 	 * Another thread may have snuck in to reschedule the timer while locks
5672404c9e6SPatrick Mooney 	 * were dropped during tcp_time_wait_purge.  Defer to the running timer
5682404c9e6SPatrick Mooney 	 * if that is the case.
5692404c9e6SPatrick Mooney 	 */
570*c79a72d7SPatrick Mooney 	if (tsp->tcp_time_wait_schedule != sched_active) {
5712404c9e6SPatrick Mooney 		tsp->tcp_time_wait_collector_active = B_FALSE;
5722404c9e6SPatrick Mooney 		mutex_exit(&tsp->tcp_time_wait_lock);
5732404c9e6SPatrick Mooney 		return;
5742404c9e6SPatrick Mooney 	}
5752404c9e6SPatrick Mooney 
5762404c9e6SPatrick Mooney 	/*
5772404c9e6SPatrick Mooney 	 * Schedule the next timer.
5782404c9e6SPatrick Mooney 	 */
579*c79a72d7SPatrick Mooney 	tsp->tcp_time_wait_schedule = sched_new;
5802404c9e6SPatrick Mooney 	tsp->tcp_time_wait_tid =
5812404c9e6SPatrick Mooney 	    timeout_generic(CALLOUT_NORMAL,
5822404c9e6SPatrick Mooney 	    tcp_time_wait_collector, sqp,
583*c79a72d7SPatrick Mooney 	    TICK_TO_NSEC(sched_new - now),
5842404c9e6SPatrick Mooney 	    CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP);
5852404c9e6SPatrick Mooney 	tsp->tcp_time_wait_collector_active = B_FALSE;
5862404c9e6SPatrick Mooney 	mutex_exit(&tsp->tcp_time_wait_lock);
587721fffe3SKacheong Poon }
588721fffe3SKacheong Poon 
589721fffe3SKacheong Poon /*
590721fffe3SKacheong Poon  * tcp_time_wait_processing() handles processing of incoming packets when
591721fffe3SKacheong Poon  * the tcp_t is in the TIME_WAIT state.
592721fffe3SKacheong Poon  *
593721fffe3SKacheong Poon  * A TIME_WAIT tcp_t that has an associated open TCP end point (not in
594721fffe3SKacheong Poon  * detached state) is never put on the time wait list.
595721fffe3SKacheong Poon  */
596721fffe3SKacheong Poon void
597721fffe3SKacheong Poon tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
598721fffe3SKacheong Poon     uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira)
599721fffe3SKacheong Poon {
600721fffe3SKacheong Poon 	int32_t		bytes_acked;
601721fffe3SKacheong Poon 	int32_t		gap;
602721fffe3SKacheong Poon 	int32_t		rgap;
603721fffe3SKacheong Poon 	tcp_opt_t	tcpopt;
604721fffe3SKacheong Poon 	uint_t		flags;
605721fffe3SKacheong Poon 	uint32_t	new_swnd = 0;
606721fffe3SKacheong Poon 	conn_t		*nconnp;
607721fffe3SKacheong Poon 	conn_t		*connp = tcp->tcp_connp;
608721fffe3SKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
609721fffe3SKacheong Poon 
610721fffe3SKacheong Poon 	BUMP_LOCAL(tcp->tcp_ibsegs);
611721fffe3SKacheong Poon 	DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
612721fffe3SKacheong Poon 
613721fffe3SKacheong Poon 	flags = (unsigned int)tcpha->tha_flags & 0xFF;
614721fffe3SKacheong Poon 	new_swnd = ntohs(tcpha->tha_win) <<
615721fffe3SKacheong Poon 	    ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
6161f183ba0SLauri Tirkkonen 
6171f183ba0SLauri Tirkkonen 	if (tcp->tcp_snd_ts_ok && !(tcpha->tha_flags & TH_RST)) {
6181f183ba0SLauri Tirkkonen 		int options;
6191f183ba0SLauri Tirkkonen 		if (tcp->tcp_snd_sack_ok)
6201f183ba0SLauri Tirkkonen 			tcpopt.tcp = tcp;
6211f183ba0SLauri Tirkkonen 		else
6221f183ba0SLauri Tirkkonen 			tcpopt.tcp = NULL;
6231f183ba0SLauri Tirkkonen 		options = tcp_parse_options(tcpha, &tcpopt);
6241f183ba0SLauri Tirkkonen 		if (!(options & TCP_OPT_TSTAMP_PRESENT)) {
6251f183ba0SLauri Tirkkonen 			DTRACE_TCP1(droppedtimestamp, tcp_t *, tcp);
6261f183ba0SLauri Tirkkonen 			goto done;
6271f183ba0SLauri Tirkkonen 		} else if (!tcp_paws_check(tcp, &tcpopt)) {
6281f183ba0SLauri Tirkkonen 			tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt,
6291f183ba0SLauri Tirkkonen 			    TH_ACK);
630721fffe3SKacheong Poon 			goto done;
631721fffe3SKacheong Poon 		}
632721fffe3SKacheong Poon 	}
633721fffe3SKacheong Poon 	gap = seg_seq - tcp->tcp_rnxt;
634721fffe3SKacheong Poon 	rgap = tcp->tcp_rwnd - (gap + seg_len);
635721fffe3SKacheong Poon 	if (gap < 0) {
636721fffe3SKacheong Poon 		TCPS_BUMP_MIB(tcps, tcpInDataDupSegs);
637721fffe3SKacheong Poon 		TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes,
638721fffe3SKacheong Poon 		    (seg_len > -gap ? -gap : seg_len));
639721fffe3SKacheong Poon 		seg_len += gap;
640721fffe3SKacheong Poon 		if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) {
641721fffe3SKacheong Poon 			if (flags & TH_RST) {
642721fffe3SKacheong Poon 				goto done;
643721fffe3SKacheong Poon 			}
644721fffe3SKacheong Poon 			if ((flags & TH_FIN) && seg_len == -1) {
645721fffe3SKacheong Poon 				/*
646721fffe3SKacheong Poon 				 * When TCP receives a duplicate FIN in
647721fffe3SKacheong Poon 				 * TIME_WAIT state, restart the 2 MSL timer.
648721fffe3SKacheong Poon 				 * See page 73 in RFC 793. Make sure this TCP
649721fffe3SKacheong Poon 				 * is already on the TIME_WAIT list. If not,
650721fffe3SKacheong Poon 				 * just restart the timer.
651721fffe3SKacheong Poon 				 */
652721fffe3SKacheong Poon 				if (TCP_IS_DETACHED(tcp)) {
653721fffe3SKacheong Poon 					if (tcp_time_wait_remove(tcp, NULL) ==
654721fffe3SKacheong Poon 					    B_TRUE) {
655721fffe3SKacheong Poon 						tcp_time_wait_append(tcp);
656721fffe3SKacheong Poon 						TCP_DBGSTAT(tcps,
657721fffe3SKacheong Poon 						    tcp_rput_time_wait);
658721fffe3SKacheong Poon 					}
659721fffe3SKacheong Poon 				} else {
660721fffe3SKacheong Poon 					ASSERT(tcp != NULL);
661721fffe3SKacheong Poon 					TCP_TIMER_RESTART(tcp,
662721fffe3SKacheong Poon 					    tcps->tcps_time_wait_interval);
663721fffe3SKacheong Poon 				}
664721fffe3SKacheong Poon 				tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
665721fffe3SKacheong Poon 				    tcp->tcp_rnxt, TH_ACK);
666721fffe3SKacheong Poon 				goto done;
667721fffe3SKacheong Poon 			}
668721fffe3SKacheong Poon 			flags |=  TH_ACK_NEEDED;
669721fffe3SKacheong Poon 			seg_len = 0;
670721fffe3SKacheong Poon 			goto process_ack;
671721fffe3SKacheong Poon 		}
672721fffe3SKacheong Poon 
673721fffe3SKacheong Poon 		/* Fix seg_seq, and chew the gap off the front. */
674721fffe3SKacheong Poon 		seg_seq = tcp->tcp_rnxt;
675721fffe3SKacheong Poon 	}
676721fffe3SKacheong Poon 
677721fffe3SKacheong Poon 	if ((flags & TH_SYN) && gap > 0 && rgap < 0) {
678721fffe3SKacheong Poon 		/*
679721fffe3SKacheong Poon 		 * Make sure that when we accept the connection, pick
680c0e6663fSJerry Jelinek 		 * an ISS greater than (tcp_snxt + tcp_iss_incr/2) for the
681721fffe3SKacheong Poon 		 * old connection.
682721fffe3SKacheong Poon 		 *
683721fffe3SKacheong Poon 		 * The next ISS generated is equal to tcp_iss_incr_extra
684c0e6663fSJerry Jelinek 		 * + tcp_iss_incr/2 + other components depending on the
685721fffe3SKacheong Poon 		 * value of tcp_strong_iss.  We pre-calculate the new
686721fffe3SKacheong Poon 		 * ISS here and compare with tcp_snxt to determine if
687721fffe3SKacheong Poon 		 * we need to make adjustment to tcp_iss_incr_extra.
688721fffe3SKacheong Poon 		 *
689721fffe3SKacheong Poon 		 * The above calculation is ugly and is a
690721fffe3SKacheong Poon 		 * waste of CPU cycles...
691721fffe3SKacheong Poon 		 */
692721fffe3SKacheong Poon 		uint32_t new_iss = tcps->tcps_iss_incr_extra;
693721fffe3SKacheong Poon 		int32_t adj;
694721fffe3SKacheong Poon 		ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
695721fffe3SKacheong Poon 
696721fffe3SKacheong Poon 		switch (tcps->tcps_strong_iss) {
697721fffe3SKacheong Poon 		case 2: {
698721fffe3SKacheong Poon 			/* Add time and MD5 components. */
699721fffe3SKacheong Poon 			uint32_t answer[4];
700721fffe3SKacheong Poon 			struct {
701721fffe3SKacheong Poon 				uint32_t ports;
702721fffe3SKacheong Poon 				in6_addr_t src;
703721fffe3SKacheong Poon 				in6_addr_t dst;
704721fffe3SKacheong Poon 			} arg;
705721fffe3SKacheong Poon 			MD5_CTX context;
706721fffe3SKacheong Poon 
707721fffe3SKacheong Poon 			mutex_enter(&tcps->tcps_iss_key_lock);
708721fffe3SKacheong Poon 			context = tcps->tcps_iss_key;
709721fffe3SKacheong Poon 			mutex_exit(&tcps->tcps_iss_key_lock);
710721fffe3SKacheong Poon 			arg.ports = connp->conn_ports;
711721fffe3SKacheong Poon 			/* We use MAPPED addresses in tcp_iss_init */
712721fffe3SKacheong Poon 			arg.src = connp->conn_laddr_v6;
713721fffe3SKacheong Poon 			arg.dst = connp->conn_faddr_v6;
714721fffe3SKacheong Poon 			MD5Update(&context, (uchar_t *)&arg,
715721fffe3SKacheong Poon 			    sizeof (arg));
716721fffe3SKacheong Poon 			MD5Final((uchar_t *)answer, &context);
717721fffe3SKacheong Poon 			answer[0] ^= answer[1] ^ answer[2] ^ answer[3];
718721fffe3SKacheong Poon 			new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0];
719721fffe3SKacheong Poon 			break;
720721fffe3SKacheong Poon 		}
721721fffe3SKacheong Poon 		case 1:
722721fffe3SKacheong Poon 			/* Add time component and min random (i.e. 1). */
723721fffe3SKacheong Poon 			new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1;
724721fffe3SKacheong Poon 			break;
725721fffe3SKacheong Poon 		default:
726721fffe3SKacheong Poon 			/* Add only time component. */
727c0e6663fSJerry Jelinek 			new_iss += (uint32_t)gethrestime_sec() *
728c0e6663fSJerry Jelinek 			    tcps->tcps_iss_incr;
729721fffe3SKacheong Poon 			break;
730721fffe3SKacheong Poon 		}
731721fffe3SKacheong Poon 		if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) {
732721fffe3SKacheong Poon 			/*
733c0e6663fSJerry Jelinek 			 * New ISS not guaranteed to be tcp_iss_incr/2
734721fffe3SKacheong Poon 			 * ahead of the current tcp_snxt, so add the
735721fffe3SKacheong Poon 			 * difference to tcp_iss_incr_extra.
736721fffe3SKacheong Poon 			 */
737721fffe3SKacheong Poon 			tcps->tcps_iss_incr_extra += adj;
738721fffe3SKacheong Poon 		}
739721fffe3SKacheong Poon 		/*
740721fffe3SKacheong Poon 		 * If tcp_clean_death() can not perform the task now,
741721fffe3SKacheong Poon 		 * drop the SYN packet and let the other side re-xmit.
742721fffe3SKacheong Poon 		 * Otherwise pass the SYN packet back in, since the
743721fffe3SKacheong Poon 		 * old tcp state has been cleaned up or freed.
744721fffe3SKacheong Poon 		 */
745721fffe3SKacheong Poon 		if (tcp_clean_death(tcp, 0) == -1)
746721fffe3SKacheong Poon 			goto done;
747721fffe3SKacheong Poon 		nconnp = ipcl_classify(mp, ira, ipst);
748721fffe3SKacheong Poon 		if (nconnp != NULL) {
749721fffe3SKacheong Poon 			TCP_STAT(tcps, tcp_time_wait_syn_success);
750721fffe3SKacheong Poon 			/* Drops ref on nconnp */
751721fffe3SKacheong Poon 			tcp_reinput(nconnp, mp, ira, ipst);
752721fffe3SKacheong Poon 			return;
753721fffe3SKacheong Poon 		}
754721fffe3SKacheong Poon 		goto done;
755721fffe3SKacheong Poon 	}
756721fffe3SKacheong Poon 
757721fffe3SKacheong Poon 	/*
758721fffe3SKacheong Poon 	 * rgap is the amount of stuff received out of window.  A negative
759721fffe3SKacheong Poon 	 * value is the amount out of window.
760721fffe3SKacheong Poon 	 */
761721fffe3SKacheong Poon 	if (rgap < 0) {
762721fffe3SKacheong Poon 		TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs);
763721fffe3SKacheong Poon 		TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap);
764721fffe3SKacheong Poon 		/* Fix seg_len and make sure there is something left. */
765721fffe3SKacheong Poon 		seg_len += rgap;
766721fffe3SKacheong Poon 		if (seg_len <= 0) {
767721fffe3SKacheong Poon 			if (flags & TH_RST) {
768721fffe3SKacheong Poon 				goto done;
769721fffe3SKacheong Poon 			}
770721fffe3SKacheong Poon 			flags |=  TH_ACK_NEEDED;
771721fffe3SKacheong Poon 			seg_len = 0;
772721fffe3SKacheong Poon 			goto process_ack;
773721fffe3SKacheong Poon 		}
774721fffe3SKacheong Poon 	}
775721fffe3SKacheong Poon 	/*
7761f183ba0SLauri Tirkkonen 	 * Check whether we can update tcp_ts_recent. This test is from RFC
7771f183ba0SLauri Tirkkonen 	 * 7323, section 5.3.
778721fffe3SKacheong Poon 	 */
7791f183ba0SLauri Tirkkonen 	if (tcp->tcp_snd_ts_ok && !(flags & TH_RST) &&
780721fffe3SKacheong Poon 	    TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) &&
781721fffe3SKacheong Poon 	    SEQ_LEQ(seg_seq, tcp->tcp_rack)) {
782721fffe3SKacheong Poon 		tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val;
783721fffe3SKacheong Poon 		tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64();
784721fffe3SKacheong Poon 	}
785721fffe3SKacheong Poon 
786721fffe3SKacheong Poon 	if (seg_seq != tcp->tcp_rnxt && seg_len > 0) {
787721fffe3SKacheong Poon 		/* Always ack out of order packets */
788721fffe3SKacheong Poon 		flags |= TH_ACK_NEEDED;
789721fffe3SKacheong Poon 		seg_len = 0;
790721fffe3SKacheong Poon 	} else if (seg_len > 0) {
791721fffe3SKacheong Poon 		TCPS_BUMP_MIB(tcps, tcpInClosed);
792721fffe3SKacheong Poon 		TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
793721fffe3SKacheong Poon 		TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
794721fffe3SKacheong Poon 	}
795721fffe3SKacheong Poon 	if (flags & TH_RST) {
796721fffe3SKacheong Poon 		(void) tcp_clean_death(tcp, 0);
797721fffe3SKacheong Poon 		goto done;
798721fffe3SKacheong Poon 	}
799721fffe3SKacheong Poon 	if (flags & TH_SYN) {
800721fffe3SKacheong Poon 		tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1,
801721fffe3SKacheong Poon 		    TH_RST|TH_ACK);
802721fffe3SKacheong Poon 		/*
803721fffe3SKacheong Poon 		 * Do not delete the TCP structure if it is in
804721fffe3SKacheong Poon 		 * TIME_WAIT state.  Refer to RFC 1122, 4.2.2.13.
805721fffe3SKacheong Poon 		 */
806721fffe3SKacheong Poon 		goto done;
807721fffe3SKacheong Poon 	}
808721fffe3SKacheong Poon process_ack:
809721fffe3SKacheong Poon 	if (flags & TH_ACK) {
810721fffe3SKacheong Poon 		bytes_acked = (int)(seg_ack - tcp->tcp_suna);
811721fffe3SKacheong Poon 		if (bytes_acked <= 0) {
812721fffe3SKacheong Poon 			if (bytes_acked == 0 && seg_len == 0 &&
813721fffe3SKacheong Poon 			    new_swnd == tcp->tcp_swnd)
814721fffe3SKacheong Poon 				TCPS_BUMP_MIB(tcps, tcpInDupAck);
815721fffe3SKacheong Poon 		} else {
816721fffe3SKacheong Poon 			/* Acks something not sent */
817721fffe3SKacheong Poon 			flags |= TH_ACK_NEEDED;
818721fffe3SKacheong Poon 		}
819721fffe3SKacheong Poon 	}
820721fffe3SKacheong Poon 	if (flags & TH_ACK_NEEDED) {
821721fffe3SKacheong Poon 		/*
822721fffe3SKacheong Poon 		 * Time to send an ack for some reason.
823721fffe3SKacheong Poon 		 */
824721fffe3SKacheong Poon 		tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
825721fffe3SKacheong Poon 		    tcp->tcp_rnxt, TH_ACK);
826721fffe3SKacheong Poon 	}
827721fffe3SKacheong Poon done:
828721fffe3SKacheong Poon 	freemsg(mp);
829721fffe3SKacheong Poon }
830