xref: /illumos-gate/usr/src/uts/common/inet/tcp/tcp_time_wait.c (revision 2360e12de6667a0a73d68895549343137c26c892)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * This file contains functions related to TCP time wait processing.  Also
28  * refer to the time wait handling comments in tcp_impl.h.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/strsun.h>
33 #include <sys/squeue_impl.h>
34 #include <sys/squeue.h>
35 #include <sys/callo.h>
36 
37 #include <inet/common.h>
38 #include <inet/ip.h>
39 #include <inet/tcp.h>
40 #include <inet/tcp_impl.h>
41 #include <inet/tcp_cluster.h>
42 
43 static void	tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *);
44 
45 /*
46  * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs.
47  * Running it every 5 seconds seems to give the best results.
48  */
49 #define	TCP_TIME_WAIT_DELAY ((hrtime_t)5 * NANOSEC)
50 
51 /*
52  * Remove a connection from the list of detached TIME_WAIT connections.
53  * It returns B_FALSE if it can't remove the connection from the list
54  * as the connection has already been removed from the list due to an
55  * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
56  */
57 boolean_t
58 tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait)
59 {
60 	boolean_t	locked = B_FALSE;
61 
62 	if (tcp_time_wait == NULL) {
63 		tcp_time_wait = *((tcp_squeue_priv_t **)
64 		    squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP));
65 		mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
66 		locked = B_TRUE;
67 	} else {
68 		ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock));
69 	}
70 
71 	/* 0 means that the tcp_t has not been added to the time wait list. */
72 	if (tcp->tcp_time_wait_expire == 0) {
73 		ASSERT(tcp->tcp_time_wait_next == NULL);
74 		ASSERT(tcp->tcp_time_wait_prev == NULL);
75 		if (locked)
76 			mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
77 		return (B_FALSE);
78 	}
79 	ASSERT(TCP_IS_DETACHED(tcp));
80 	ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
81 
82 	if (tcp == tcp_time_wait->tcp_time_wait_head) {
83 		ASSERT(tcp->tcp_time_wait_prev == NULL);
84 		tcp_time_wait->tcp_time_wait_head = tcp->tcp_time_wait_next;
85 		if (tcp_time_wait->tcp_time_wait_head != NULL) {
86 			tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev =
87 			    NULL;
88 		} else {
89 			tcp_time_wait->tcp_time_wait_tail = NULL;
90 		}
91 	} else if (tcp == tcp_time_wait->tcp_time_wait_tail) {
92 		ASSERT(tcp->tcp_time_wait_next == NULL);
93 		tcp_time_wait->tcp_time_wait_tail = tcp->tcp_time_wait_prev;
94 		ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
95 		tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = NULL;
96 	} else {
97 		ASSERT(tcp->tcp_time_wait_prev->tcp_time_wait_next == tcp);
98 		ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == tcp);
99 		tcp->tcp_time_wait_prev->tcp_time_wait_next =
100 		    tcp->tcp_time_wait_next;
101 		tcp->tcp_time_wait_next->tcp_time_wait_prev =
102 		    tcp->tcp_time_wait_prev;
103 	}
104 	tcp->tcp_time_wait_next = NULL;
105 	tcp->tcp_time_wait_prev = NULL;
106 	tcp->tcp_time_wait_expire = 0;
107 
108 	if (locked)
109 		mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
110 	return (B_TRUE);
111 }
112 
113 /*
114  * Add a connection to the list of detached TIME_WAIT connections
115  * and set its time to expire.
116  */
117 void
118 tcp_time_wait_append(tcp_t *tcp)
119 {
120 	tcp_stack_t	*tcps = tcp->tcp_tcps;
121 	squeue_t	*sqp = tcp->tcp_connp->conn_sqp;
122 	tcp_squeue_priv_t *tcp_time_wait =
123 	    *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
124 
125 	tcp_timers_stop(tcp);
126 
127 	/* Freed above */
128 	ASSERT(tcp->tcp_timer_tid == 0);
129 	ASSERT(tcp->tcp_ack_tid == 0);
130 
131 	/* must have happened at the time of detaching the tcp */
132 	ASSERT(tcp->tcp_ptpahn == NULL);
133 	ASSERT(tcp->tcp_flow_stopped == 0);
134 	ASSERT(tcp->tcp_time_wait_next == NULL);
135 	ASSERT(tcp->tcp_time_wait_prev == NULL);
136 	ASSERT(tcp->tcp_time_wait_expire == 0);
137 	ASSERT(tcp->tcp_listener == NULL);
138 
139 	tcp->tcp_time_wait_expire = ddi_get_lbolt64();
140 	/*
141 	 * Since tcp_time_wait_expire is lbolt64, it should not wrap around
142 	 * in practice.  Hence it cannot be 0.  Note that zero means that the
143 	 * tcp_t is not in the TIME_WAIT list.
144 	 */
145 	tcp->tcp_time_wait_expire += MSEC_TO_TICK(
146 	    tcps->tcps_time_wait_interval);
147 
148 	ASSERT(TCP_IS_DETACHED(tcp));
149 	ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
150 	ASSERT(tcp->tcp_time_wait_next == NULL);
151 	ASSERT(tcp->tcp_time_wait_prev == NULL);
152 	TCP_DBGSTAT(tcps, tcp_time_wait);
153 
154 	mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
155 	if (tcp_time_wait->tcp_time_wait_head == NULL) {
156 		ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL);
157 		tcp_time_wait->tcp_time_wait_head = tcp;
158 
159 		/*
160 		 * Even if the list was empty before, there may be a timer
161 		 * running since a tcp_t can be removed from the list
162 		 * in other places, such as tcp_clean_death().  So check if
163 		 * a timer is needed.
164 		 */
165 		if (tcp_time_wait->tcp_time_wait_tid == 0) {
166 			tcp_time_wait->tcp_time_wait_tid =
167 			    timeout_generic(CALLOUT_NORMAL,
168 			    tcp_time_wait_collector, sqp,
169 			    (hrtime_t)(tcps->tcps_time_wait_interval + 1) *
170 			    MICROSEC, CALLOUT_TCP_RESOLUTION,
171 			    CALLOUT_FLAG_ROUNDUP);
172 		}
173 	} else {
174 		/*
175 		 * The list is not empty, so a timer must be running.  If not,
176 		 * tcp_time_wait_collector() must be running on this
177 		 * tcp_time_wait list at the same time.
178 		 */
179 		ASSERT(tcp_time_wait->tcp_time_wait_tid != 0 ||
180 		    tcp_time_wait->tcp_time_wait_running);
181 		ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
182 		ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state ==
183 		    TCPS_TIME_WAIT);
184 		tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp;
185 		tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail;
186 
187 	}
188 	tcp_time_wait->tcp_time_wait_tail = tcp;
189 	mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
190 }
191 
192 /*
193  * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
194  * tcp_t.  Used in tcp_time_wait_collector().
195  */
196 /* ARGSUSED */
197 static void
198 tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
199 {
200 	conn_t	*connp = (conn_t *)arg;
201 	tcp_t	*tcp = connp->conn_tcp;
202 
203 	ASSERT(tcp != NULL);
204 	if (tcp->tcp_state == TCPS_CLOSED) {
205 		return;
206 	}
207 
208 	ASSERT((connp->conn_family == AF_INET &&
209 	    connp->conn_ipversion == IPV4_VERSION) ||
210 	    (connp->conn_family == AF_INET6 &&
211 	    (connp->conn_ipversion == IPV4_VERSION ||
212 	    connp->conn_ipversion == IPV6_VERSION)));
213 	ASSERT(!tcp->tcp_listener);
214 
215 	ASSERT(TCP_IS_DETACHED(tcp));
216 
217 	/*
218 	 * Because they have no upstream client to rebind or tcp_close()
219 	 * them later, we axe the connection here and now.
220 	 */
221 	tcp_close_detached(tcp);
222 }
223 
224 /*
225  * Blows away all tcps whose TIME_WAIT has expired. List traversal
226  * is done forwards from the head.
227  * This walks all stack instances since
228  * tcp_time_wait remains global across all stacks.
229  */
230 /* ARGSUSED */
231 void
232 tcp_time_wait_collector(void *arg)
233 {
234 	tcp_t *tcp;
235 	int64_t now;
236 	mblk_t *mp;
237 	conn_t *connp;
238 	kmutex_t *lock;
239 	boolean_t removed;
240 	extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t,
241 	    uint8_t *, in_port_t, uint8_t *, in_port_t, void *);
242 
243 	squeue_t *sqp = (squeue_t *)arg;
244 	tcp_squeue_priv_t *tcp_time_wait =
245 	    *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
246 
247 	mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
248 	tcp_time_wait->tcp_time_wait_tid = 0;
249 #ifdef DEBUG
250 	tcp_time_wait->tcp_time_wait_running = B_TRUE;
251 #endif
252 
253 	if (tcp_time_wait->tcp_free_list != NULL &&
254 	    tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) {
255 		TCP_G_STAT(tcp_freelist_cleanup);
256 		while ((tcp = tcp_time_wait->tcp_free_list) != NULL) {
257 			tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
258 			tcp->tcp_time_wait_next = NULL;
259 			tcp_time_wait->tcp_free_list_cnt--;
260 			ASSERT(tcp->tcp_tcps == NULL);
261 			CONN_DEC_REF(tcp->tcp_connp);
262 		}
263 		ASSERT(tcp_time_wait->tcp_free_list_cnt == 0);
264 	}
265 
266 	/*
267 	 * In order to reap time waits reliably, we should use a
268 	 * source of time that is not adjustable by the user -- hence
269 	 * the call to ddi_get_lbolt64().
270 	 */
271 	now = ddi_get_lbolt64();
272 	while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) {
273 		/*
274 		 * lbolt64 should not wrap around in practice...  So we can
275 		 * do a direct comparison.
276 		 */
277 		if (now < tcp->tcp_time_wait_expire)
278 			break;
279 
280 		removed = tcp_time_wait_remove(tcp, tcp_time_wait);
281 		ASSERT(removed);
282 
283 		connp = tcp->tcp_connp;
284 		ASSERT(connp->conn_fanout != NULL);
285 		lock = &connp->conn_fanout->connf_lock;
286 		/*
287 		 * This is essentially a TW reclaim fast path optimization for
288 		 * performance where the timewait collector checks under the
289 		 * fanout lock (so that no one else can get access to the
290 		 * conn_t) that the refcnt is 2 i.e. one for TCP and one for
291 		 * the classifier hash list. If ref count is indeed 2, we can
292 		 * just remove the conn under the fanout lock and avoid
293 		 * cleaning up the conn under the squeue, provided that
294 		 * clustering callbacks are not enabled. If clustering is
295 		 * enabled, we need to make the clustering callback before
296 		 * setting the CONDEMNED flag and after dropping all locks and
297 		 * so we forego this optimization and fall back to the slow
298 		 * path. Also please see the comments in tcp_closei_local
299 		 * regarding the refcnt logic.
300 		 *
301 		 * Since we are holding the tcp_time_wait_lock, its better
302 		 * not to block on the fanout_lock because other connections
303 		 * can't add themselves to time_wait list. So we do a
304 		 * tryenter instead of mutex_enter.
305 		 */
306 		if (mutex_tryenter(lock)) {
307 			mutex_enter(&connp->conn_lock);
308 			if ((connp->conn_ref == 2) &&
309 			    (cl_inet_disconnect == NULL)) {
310 				ipcl_hash_remove_locked(connp,
311 				    connp->conn_fanout);
312 				/*
313 				 * Set the CONDEMNED flag now itself so that
314 				 * the refcnt cannot increase due to any
315 				 * walker.
316 				 */
317 				connp->conn_state_flags |= CONN_CONDEMNED;
318 				mutex_exit(lock);
319 				mutex_exit(&connp->conn_lock);
320 				if (tcp_time_wait->tcp_free_list_cnt <
321 				    tcp_free_list_max_cnt) {
322 					/* Add to head of tcp_free_list */
323 					mutex_exit(
324 					    &tcp_time_wait->tcp_time_wait_lock);
325 					tcp_cleanup(tcp);
326 					ASSERT(connp->conn_latch == NULL);
327 					ASSERT(connp->conn_policy == NULL);
328 					ASSERT(tcp->tcp_tcps == NULL);
329 					ASSERT(connp->conn_netstack == NULL);
330 
331 					mutex_enter(
332 					    &tcp_time_wait->tcp_time_wait_lock);
333 					tcp->tcp_time_wait_next =
334 					    tcp_time_wait->tcp_free_list;
335 					tcp_time_wait->tcp_free_list = tcp;
336 					tcp_time_wait->tcp_free_list_cnt++;
337 					continue;
338 				} else {
339 					/* Do not add to tcp_free_list */
340 					mutex_exit(
341 					    &tcp_time_wait->tcp_time_wait_lock);
342 					tcp_bind_hash_remove(tcp);
343 					ixa_cleanup(tcp->tcp_connp->conn_ixa);
344 					tcp_ipsec_cleanup(tcp);
345 					CONN_DEC_REF(tcp->tcp_connp);
346 				}
347 			} else {
348 				CONN_INC_REF_LOCKED(connp);
349 				mutex_exit(lock);
350 				mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
351 				mutex_exit(&connp->conn_lock);
352 				/*
353 				 * We can reuse the closemp here since conn has
354 				 * detached (otherwise we wouldn't even be in
355 				 * time_wait list). tcp_closemp_used can safely
356 				 * be changed without taking a lock as no other
357 				 * thread can concurrently access it at this
358 				 * point in the connection lifecycle.
359 				 */
360 
361 				if (tcp->tcp_closemp.b_prev == NULL)
362 					tcp->tcp_closemp_used = B_TRUE;
363 				else
364 					cmn_err(CE_PANIC,
365 					    "tcp_timewait_collector: "
366 					    "concurrent use of tcp_closemp: "
367 					    "connp %p tcp %p\n", (void *)connp,
368 					    (void *)tcp);
369 
370 				TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
371 				mp = &tcp->tcp_closemp;
372 				SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
373 				    tcp_timewait_close, connp, NULL,
374 				    SQ_FILL, SQTAG_TCP_TIMEWAIT);
375 			}
376 		} else {
377 			mutex_enter(&connp->conn_lock);
378 			CONN_INC_REF_LOCKED(connp);
379 			mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
380 			mutex_exit(&connp->conn_lock);
381 			/*
382 			 * We can reuse the closemp here since conn has
383 			 * detached (otherwise we wouldn't even be in
384 			 * time_wait list). tcp_closemp_used can safely
385 			 * be changed without taking a lock as no other
386 			 * thread can concurrently access it at this
387 			 * point in the connection lifecycle.
388 			 */
389 
390 			if (tcp->tcp_closemp.b_prev == NULL)
391 				tcp->tcp_closemp_used = B_TRUE;
392 			else
393 				cmn_err(CE_PANIC, "tcp_timewait_collector: "
394 				    "concurrent use of tcp_closemp: "
395 				    "connp %p tcp %p\n", (void *)connp,
396 				    (void *)tcp);
397 
398 			TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
399 			mp = &tcp->tcp_closemp;
400 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
401 			    tcp_timewait_close, connp, NULL,
402 			    SQ_FILL, SQTAG_TCP_TIMEWAIT);
403 		}
404 		mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
405 	}
406 
407 	if (tcp_time_wait->tcp_free_list != NULL)
408 		tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE;
409 
410 	/*
411 	 * If the time wait list is not empty and there is no timer running,
412 	 * restart it.
413 	 */
414 	if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL &&
415 	    tcp_time_wait->tcp_time_wait_tid == 0) {
416 		hrtime_t firetime;
417 
418 		firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now);
419 		/* This ensures that we won't wake up too often. */
420 		firetime = MAX(TCP_TIME_WAIT_DELAY, firetime);
421 		tcp_time_wait->tcp_time_wait_tid =
422 		    timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector,
423 		    sqp, firetime, CALLOUT_TCP_RESOLUTION,
424 		    CALLOUT_FLAG_ROUNDUP);
425 	}
426 #ifdef DEBUG
427 	tcp_time_wait->tcp_time_wait_running = B_FALSE;
428 #endif
429 	mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
430 }
431 
432 /*
433  * tcp_time_wait_processing() handles processing of incoming packets when
434  * the tcp_t is in the TIME_WAIT state.
435  *
436  * A TIME_WAIT tcp_t that has an associated open TCP end point (not in
437  * detached state) is never put on the time wait list.
438  */
439 void
440 tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
441     uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira)
442 {
443 	int32_t		bytes_acked;
444 	int32_t		gap;
445 	int32_t		rgap;
446 	tcp_opt_t	tcpopt;
447 	uint_t		flags;
448 	uint32_t	new_swnd = 0;
449 	conn_t		*nconnp;
450 	conn_t		*connp = tcp->tcp_connp;
451 	tcp_stack_t	*tcps = tcp->tcp_tcps;
452 
453 	BUMP_LOCAL(tcp->tcp_ibsegs);
454 	DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
455 
456 	flags = (unsigned int)tcpha->tha_flags & 0xFF;
457 	new_swnd = ntohs(tcpha->tha_win) <<
458 	    ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
459 	if (tcp->tcp_snd_ts_ok) {
460 		if (!tcp_paws_check(tcp, tcpha, &tcpopt)) {
461 			tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
462 			    tcp->tcp_rnxt, TH_ACK);
463 			goto done;
464 		}
465 	}
466 	gap = seg_seq - tcp->tcp_rnxt;
467 	rgap = tcp->tcp_rwnd - (gap + seg_len);
468 	if (gap < 0) {
469 		TCPS_BUMP_MIB(tcps, tcpInDataDupSegs);
470 		TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes,
471 		    (seg_len > -gap ? -gap : seg_len));
472 		seg_len += gap;
473 		if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) {
474 			if (flags & TH_RST) {
475 				goto done;
476 			}
477 			if ((flags & TH_FIN) && seg_len == -1) {
478 				/*
479 				 * When TCP receives a duplicate FIN in
480 				 * TIME_WAIT state, restart the 2 MSL timer.
481 				 * See page 73 in RFC 793. Make sure this TCP
482 				 * is already on the TIME_WAIT list. If not,
483 				 * just restart the timer.
484 				 */
485 				if (TCP_IS_DETACHED(tcp)) {
486 					if (tcp_time_wait_remove(tcp, NULL) ==
487 					    B_TRUE) {
488 						tcp_time_wait_append(tcp);
489 						TCP_DBGSTAT(tcps,
490 						    tcp_rput_time_wait);
491 					}
492 				} else {
493 					ASSERT(tcp != NULL);
494 					TCP_TIMER_RESTART(tcp,
495 					    tcps->tcps_time_wait_interval);
496 				}
497 				tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
498 				    tcp->tcp_rnxt, TH_ACK);
499 				goto done;
500 			}
501 			flags |=  TH_ACK_NEEDED;
502 			seg_len = 0;
503 			goto process_ack;
504 		}
505 
506 		/* Fix seg_seq, and chew the gap off the front. */
507 		seg_seq = tcp->tcp_rnxt;
508 	}
509 
510 	if ((flags & TH_SYN) && gap > 0 && rgap < 0) {
511 		/*
512 		 * Make sure that when we accept the connection, pick
513 		 * an ISS greater than (tcp_snxt + ISS_INCR/2) for the
514 		 * old connection.
515 		 *
516 		 * The next ISS generated is equal to tcp_iss_incr_extra
517 		 * + ISS_INCR/2 + other components depending on the
518 		 * value of tcp_strong_iss.  We pre-calculate the new
519 		 * ISS here and compare with tcp_snxt to determine if
520 		 * we need to make adjustment to tcp_iss_incr_extra.
521 		 *
522 		 * The above calculation is ugly and is a
523 		 * waste of CPU cycles...
524 		 */
525 		uint32_t new_iss = tcps->tcps_iss_incr_extra;
526 		int32_t adj;
527 		ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
528 
529 		switch (tcps->tcps_strong_iss) {
530 		case 2: {
531 			/* Add time and MD5 components. */
532 			uint32_t answer[4];
533 			struct {
534 				uint32_t ports;
535 				in6_addr_t src;
536 				in6_addr_t dst;
537 			} arg;
538 			MD5_CTX context;
539 
540 			mutex_enter(&tcps->tcps_iss_key_lock);
541 			context = tcps->tcps_iss_key;
542 			mutex_exit(&tcps->tcps_iss_key_lock);
543 			arg.ports = connp->conn_ports;
544 			/* We use MAPPED addresses in tcp_iss_init */
545 			arg.src = connp->conn_laddr_v6;
546 			arg.dst = connp->conn_faddr_v6;
547 			MD5Update(&context, (uchar_t *)&arg,
548 			    sizeof (arg));
549 			MD5Final((uchar_t *)answer, &context);
550 			answer[0] ^= answer[1] ^ answer[2] ^ answer[3];
551 			new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0];
552 			break;
553 		}
554 		case 1:
555 			/* Add time component and min random (i.e. 1). */
556 			new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1;
557 			break;
558 		default:
559 			/* Add only time component. */
560 			new_iss += (uint32_t)gethrestime_sec() * ISS_INCR;
561 			break;
562 		}
563 		if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) {
564 			/*
565 			 * New ISS not guaranteed to be ISS_INCR/2
566 			 * ahead of the current tcp_snxt, so add the
567 			 * difference to tcp_iss_incr_extra.
568 			 */
569 			tcps->tcps_iss_incr_extra += adj;
570 		}
571 		/*
572 		 * If tcp_clean_death() can not perform the task now,
573 		 * drop the SYN packet and let the other side re-xmit.
574 		 * Otherwise pass the SYN packet back in, since the
575 		 * old tcp state has been cleaned up or freed.
576 		 */
577 		if (tcp_clean_death(tcp, 0) == -1)
578 			goto done;
579 		nconnp = ipcl_classify(mp, ira, ipst);
580 		if (nconnp != NULL) {
581 			TCP_STAT(tcps, tcp_time_wait_syn_success);
582 			/* Drops ref on nconnp */
583 			tcp_reinput(nconnp, mp, ira, ipst);
584 			return;
585 		}
586 		goto done;
587 	}
588 
589 	/*
590 	 * rgap is the amount of stuff received out of window.  A negative
591 	 * value is the amount out of window.
592 	 */
593 	if (rgap < 0) {
594 		TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs);
595 		TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap);
596 		/* Fix seg_len and make sure there is something left. */
597 		seg_len += rgap;
598 		if (seg_len <= 0) {
599 			if (flags & TH_RST) {
600 				goto done;
601 			}
602 			flags |=  TH_ACK_NEEDED;
603 			seg_len = 0;
604 			goto process_ack;
605 		}
606 	}
607 	/*
608 	 * Check whether we can update tcp_ts_recent.  This test is
609 	 * NOT the one in RFC 1323 3.4.  It is from Braden, 1993, "TCP
610 	 * Extensions for High Performance: An Update", Internet Draft.
611 	 */
612 	if (tcp->tcp_snd_ts_ok &&
613 	    TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) &&
614 	    SEQ_LEQ(seg_seq, tcp->tcp_rack)) {
615 		tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val;
616 		tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64();
617 	}
618 
619 	if (seg_seq != tcp->tcp_rnxt && seg_len > 0) {
620 		/* Always ack out of order packets */
621 		flags |= TH_ACK_NEEDED;
622 		seg_len = 0;
623 	} else if (seg_len > 0) {
624 		TCPS_BUMP_MIB(tcps, tcpInClosed);
625 		TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
626 		TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
627 	}
628 	if (flags & TH_RST) {
629 		(void) tcp_clean_death(tcp, 0);
630 		goto done;
631 	}
632 	if (flags & TH_SYN) {
633 		tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1,
634 		    TH_RST|TH_ACK);
635 		/*
636 		 * Do not delete the TCP structure if it is in
637 		 * TIME_WAIT state.  Refer to RFC 1122, 4.2.2.13.
638 		 */
639 		goto done;
640 	}
641 process_ack:
642 	if (flags & TH_ACK) {
643 		bytes_acked = (int)(seg_ack - tcp->tcp_suna);
644 		if (bytes_acked <= 0) {
645 			if (bytes_acked == 0 && seg_len == 0 &&
646 			    new_swnd == tcp->tcp_swnd)
647 				TCPS_BUMP_MIB(tcps, tcpInDupAck);
648 		} else {
649 			/* Acks something not sent */
650 			flags |= TH_ACK_NEEDED;
651 		}
652 	}
653 	if (flags & TH_ACK_NEEDED) {
654 		/*
655 		 * Time to send an ack for some reason.
656 		 */
657 		tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
658 		    tcp->tcp_rnxt, TH_ACK);
659 	}
660 done:
661 	freemsg(mp);
662 }
663