xref: /illumos-gate/usr/src/uts/common/inet/tcp/tcp_time_wait.c (revision 904e51f67bfac9f3ec88d9254757474c448808eb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * This file contains functions related to TCP time wait processing.  Also
28  * refer to the time wait handling comments in tcp_impl.h.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/strsun.h>
33 #include <sys/squeue_impl.h>
34 #include <sys/squeue.h>
35 #include <sys/callo.h>
36 
37 #include <inet/common.h>
38 #include <inet/ip.h>
39 #include <inet/tcp.h>
40 #include <inet/tcp_impl.h>
41 #include <inet/tcp_cluster.h>
42 
43 static void	tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *);
44 
45 /*
46  * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs.
47  * Running it every 5 seconds seems to give the best results.
48  */
49 #define	TCP_TIME_WAIT_DELAY ((hrtime_t)5 * NANOSEC)
50 
51 /*
52  * Remove a connection from the list of detached TIME_WAIT connections.
53  * It returns B_FALSE if it can't remove the connection from the list
54  * as the connection has already been removed from the list due to an
55  * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
56  */
57 boolean_t
58 tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait)
59 {
60 	boolean_t	locked = B_FALSE;
61 
62 	if (tcp_time_wait == NULL) {
63 		tcp_time_wait = *((tcp_squeue_priv_t **)
64 		    squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP));
65 		mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
66 		locked = B_TRUE;
67 	} else {
68 		ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock));
69 	}
70 
71 	/* 0 means that the tcp_t has not been added to the time wait list. */
72 	if (tcp->tcp_time_wait_expire == 0) {
73 		ASSERT(tcp->tcp_time_wait_next == NULL);
74 		ASSERT(tcp->tcp_time_wait_prev == NULL);
75 		if (locked)
76 			mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
77 		return (B_FALSE);
78 	}
79 	ASSERT(TCP_IS_DETACHED(tcp));
80 	ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
81 
82 	if (tcp == tcp_time_wait->tcp_time_wait_head) {
83 		ASSERT(tcp->tcp_time_wait_prev == NULL);
84 		tcp_time_wait->tcp_time_wait_head = tcp->tcp_time_wait_next;
85 		if (tcp_time_wait->tcp_time_wait_head != NULL) {
86 			tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev =
87 			    NULL;
88 		} else {
89 			tcp_time_wait->tcp_time_wait_tail = NULL;
90 		}
91 	} else if (tcp == tcp_time_wait->tcp_time_wait_tail) {
92 		ASSERT(tcp->tcp_time_wait_next == NULL);
93 		tcp_time_wait->tcp_time_wait_tail = tcp->tcp_time_wait_prev;
94 		ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
95 		tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = NULL;
96 	} else {
97 		ASSERT(tcp->tcp_time_wait_prev->tcp_time_wait_next == tcp);
98 		ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == tcp);
99 		tcp->tcp_time_wait_prev->tcp_time_wait_next =
100 		    tcp->tcp_time_wait_next;
101 		tcp->tcp_time_wait_next->tcp_time_wait_prev =
102 		    tcp->tcp_time_wait_prev;
103 	}
104 	tcp->tcp_time_wait_next = NULL;
105 	tcp->tcp_time_wait_prev = NULL;
106 	tcp->tcp_time_wait_expire = 0;
107 
108 	if (locked)
109 		mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
110 	return (B_TRUE);
111 }
112 
113 /*
114  * Add a connection to the list of detached TIME_WAIT connections
115  * and set its time to expire.
116  */
117 void
118 tcp_time_wait_append(tcp_t *tcp)
119 {
120 	tcp_stack_t	*tcps = tcp->tcp_tcps;
121 	squeue_t	*sqp = tcp->tcp_connp->conn_sqp;
122 	tcp_squeue_priv_t *tcp_time_wait =
123 	    *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
124 
125 	tcp_timers_stop(tcp);
126 
127 	/* Freed above */
128 	ASSERT(tcp->tcp_timer_tid == 0);
129 	ASSERT(tcp->tcp_ack_tid == 0);
130 
131 	/* must have happened at the time of detaching the tcp */
132 	ASSERT(tcp->tcp_ptpahn == NULL);
133 	ASSERT(tcp->tcp_flow_stopped == 0);
134 	ASSERT(tcp->tcp_time_wait_next == NULL);
135 	ASSERT(tcp->tcp_time_wait_prev == NULL);
136 	ASSERT(tcp->tcp_time_wait_expire == 0);
137 	ASSERT(tcp->tcp_listener == NULL);
138 
139 	tcp->tcp_time_wait_expire = ddi_get_lbolt64();
140 	/*
141 	 * Since tcp_time_wait_expire is lbolt64, it should not wrap around
142 	 * in practice.  Hence it cannot be 0.  Note that zero means that the
143 	 * tcp_t is not in the TIME_WAIT list.
144 	 */
145 	tcp->tcp_time_wait_expire += MSEC_TO_TICK(
146 	    tcps->tcps_time_wait_interval);
147 
148 	ASSERT(TCP_IS_DETACHED(tcp));
149 	ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
150 	ASSERT(tcp->tcp_time_wait_next == NULL);
151 	ASSERT(tcp->tcp_time_wait_prev == NULL);
152 	TCP_DBGSTAT(tcps, tcp_time_wait);
153 
154 	mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
155 	if (tcp_time_wait->tcp_time_wait_head == NULL) {
156 		ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL);
157 		tcp_time_wait->tcp_time_wait_head = tcp;
158 
159 		/*
160 		 * Even if the list was empty before, there may be a timer
161 		 * running since a tcp_t can be removed from the list
162 		 * in other places, such as tcp_clean_death().  So check if
163 		 * a timer is needed.
164 		 */
165 		if (tcp_time_wait->tcp_time_wait_tid == 0) {
166 			tcp_time_wait->tcp_time_wait_tid =
167 			    timeout_generic(CALLOUT_NORMAL,
168 			    tcp_time_wait_collector, sqp,
169 			    (hrtime_t)(tcps->tcps_time_wait_interval + 1) *
170 			    MICROSEC, CALLOUT_TCP_RESOLUTION,
171 			    CALLOUT_FLAG_ROUNDUP);
172 		}
173 	} else {
174 		ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
175 		ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state ==
176 		    TCPS_TIME_WAIT);
177 		tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp;
178 		tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail;
179 
180 		/* The list is not empty, so a timer must be running. */
181 		ASSERT(tcp_time_wait->tcp_time_wait_tid != 0);
182 	}
183 	tcp_time_wait->tcp_time_wait_tail = tcp;
184 	mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
185 }
186 
187 /*
188  * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
189  * tcp_t.  Used in tcp_time_wait_collector().
190  */
191 /* ARGSUSED */
192 static void
193 tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
194 {
195 	conn_t	*connp = (conn_t *)arg;
196 	tcp_t	*tcp = connp->conn_tcp;
197 
198 	ASSERT(tcp != NULL);
199 	if (tcp->tcp_state == TCPS_CLOSED) {
200 		return;
201 	}
202 
203 	ASSERT((connp->conn_family == AF_INET &&
204 	    connp->conn_ipversion == IPV4_VERSION) ||
205 	    (connp->conn_family == AF_INET6 &&
206 	    (connp->conn_ipversion == IPV4_VERSION ||
207 	    connp->conn_ipversion == IPV6_VERSION)));
208 	ASSERT(!tcp->tcp_listener);
209 
210 	ASSERT(TCP_IS_DETACHED(tcp));
211 
212 	/*
213 	 * Because they have no upstream client to rebind or tcp_close()
214 	 * them later, we axe the connection here and now.
215 	 */
216 	tcp_close_detached(tcp);
217 }
218 
219 /*
220  * Blows away all tcps whose TIME_WAIT has expired. List traversal
221  * is done forwards from the head.
222  * This walks all stack instances since
223  * tcp_time_wait remains global across all stacks.
224  */
225 /* ARGSUSED */
226 void
227 tcp_time_wait_collector(void *arg)
228 {
229 	tcp_t *tcp;
230 	int64_t now;
231 	mblk_t *mp;
232 	conn_t *connp;
233 	kmutex_t *lock;
234 	boolean_t removed;
235 	extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t,
236 	    uint8_t *, in_port_t, uint8_t *, in_port_t, void *);
237 
238 	squeue_t *sqp = (squeue_t *)arg;
239 	tcp_squeue_priv_t *tcp_time_wait =
240 	    *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
241 
242 	mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
243 	tcp_time_wait->tcp_time_wait_tid = 0;
244 
245 	if (tcp_time_wait->tcp_free_list != NULL &&
246 	    tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) {
247 		TCP_G_STAT(tcp_freelist_cleanup);
248 		while ((tcp = tcp_time_wait->tcp_free_list) != NULL) {
249 			tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
250 			tcp->tcp_time_wait_next = NULL;
251 			tcp_time_wait->tcp_free_list_cnt--;
252 			ASSERT(tcp->tcp_tcps == NULL);
253 			CONN_DEC_REF(tcp->tcp_connp);
254 		}
255 		ASSERT(tcp_time_wait->tcp_free_list_cnt == 0);
256 	}
257 
258 	/*
259 	 * In order to reap time waits reliably, we should use a
260 	 * source of time that is not adjustable by the user -- hence
261 	 * the call to ddi_get_lbolt64().
262 	 */
263 	now = ddi_get_lbolt64();
264 	while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) {
265 		/*
266 		 * lbolt64 should not wrap around in practice...  So we can
267 		 * do a direct comparison.
268 		 */
269 		if (now < tcp->tcp_time_wait_expire)
270 			break;
271 
272 		removed = tcp_time_wait_remove(tcp, tcp_time_wait);
273 		ASSERT(removed);
274 
275 		connp = tcp->tcp_connp;
276 		ASSERT(connp->conn_fanout != NULL);
277 		lock = &connp->conn_fanout->connf_lock;
278 		/*
279 		 * This is essentially a TW reclaim fast path optimization for
280 		 * performance where the timewait collector checks under the
281 		 * fanout lock (so that no one else can get access to the
282 		 * conn_t) that the refcnt is 2 i.e. one for TCP and one for
283 		 * the classifier hash list. If ref count is indeed 2, we can
284 		 * just remove the conn under the fanout lock and avoid
285 		 * cleaning up the conn under the squeue, provided that
286 		 * clustering callbacks are not enabled. If clustering is
287 		 * enabled, we need to make the clustering callback before
288 		 * setting the CONDEMNED flag and after dropping all locks and
289 		 * so we forego this optimization and fall back to the slow
290 		 * path. Also please see the comments in tcp_closei_local
291 		 * regarding the refcnt logic.
292 		 *
293 		 * Since we are holding the tcp_time_wait_lock, its better
294 		 * not to block on the fanout_lock because other connections
295 		 * can't add themselves to time_wait list. So we do a
296 		 * tryenter instead of mutex_enter.
297 		 */
298 		if (mutex_tryenter(lock)) {
299 			mutex_enter(&connp->conn_lock);
300 			if ((connp->conn_ref == 2) &&
301 			    (cl_inet_disconnect == NULL)) {
302 				ipcl_hash_remove_locked(connp,
303 				    connp->conn_fanout);
304 				/*
305 				 * Set the CONDEMNED flag now itself so that
306 				 * the refcnt cannot increase due to any
307 				 * walker.
308 				 */
309 				connp->conn_state_flags |= CONN_CONDEMNED;
310 				mutex_exit(lock);
311 				mutex_exit(&connp->conn_lock);
312 				if (tcp_time_wait->tcp_free_list_cnt <
313 				    tcp_free_list_max_cnt) {
314 					/* Add to head of tcp_free_list */
315 					mutex_exit(
316 					    &tcp_time_wait->tcp_time_wait_lock);
317 					tcp_cleanup(tcp);
318 					ASSERT(connp->conn_latch == NULL);
319 					ASSERT(connp->conn_policy == NULL);
320 					ASSERT(tcp->tcp_tcps == NULL);
321 					ASSERT(connp->conn_netstack == NULL);
322 
323 					mutex_enter(
324 					    &tcp_time_wait->tcp_time_wait_lock);
325 					tcp->tcp_time_wait_next =
326 					    tcp_time_wait->tcp_free_list;
327 					tcp_time_wait->tcp_free_list = tcp;
328 					tcp_time_wait->tcp_free_list_cnt++;
329 					continue;
330 				} else {
331 					/* Do not add to tcp_free_list */
332 					mutex_exit(
333 					    &tcp_time_wait->tcp_time_wait_lock);
334 					tcp_bind_hash_remove(tcp);
335 					ixa_cleanup(tcp->tcp_connp->conn_ixa);
336 					tcp_ipsec_cleanup(tcp);
337 					CONN_DEC_REF(tcp->tcp_connp);
338 				}
339 			} else {
340 				CONN_INC_REF_LOCKED(connp);
341 				mutex_exit(lock);
342 				mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
343 				mutex_exit(&connp->conn_lock);
344 				/*
345 				 * We can reuse the closemp here since conn has
346 				 * detached (otherwise we wouldn't even be in
347 				 * time_wait list). tcp_closemp_used can safely
348 				 * be changed without taking a lock as no other
349 				 * thread can concurrently access it at this
350 				 * point in the connection lifecycle.
351 				 */
352 
353 				if (tcp->tcp_closemp.b_prev == NULL)
354 					tcp->tcp_closemp_used = B_TRUE;
355 				else
356 					cmn_err(CE_PANIC,
357 					    "tcp_timewait_collector: "
358 					    "concurrent use of tcp_closemp: "
359 					    "connp %p tcp %p\n", (void *)connp,
360 					    (void *)tcp);
361 
362 				TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
363 				mp = &tcp->tcp_closemp;
364 				SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
365 				    tcp_timewait_close, connp, NULL,
366 				    SQ_FILL, SQTAG_TCP_TIMEWAIT);
367 			}
368 		} else {
369 			mutex_enter(&connp->conn_lock);
370 			CONN_INC_REF_LOCKED(connp);
371 			mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
372 			mutex_exit(&connp->conn_lock);
373 			/*
374 			 * We can reuse the closemp here since conn has
375 			 * detached (otherwise we wouldn't even be in
376 			 * time_wait list). tcp_closemp_used can safely
377 			 * be changed without taking a lock as no other
378 			 * thread can concurrently access it at this
379 			 * point in the connection lifecycle.
380 			 */
381 
382 			if (tcp->tcp_closemp.b_prev == NULL)
383 				tcp->tcp_closemp_used = B_TRUE;
384 			else
385 				cmn_err(CE_PANIC, "tcp_timewait_collector: "
386 				    "concurrent use of tcp_closemp: "
387 				    "connp %p tcp %p\n", (void *)connp,
388 				    (void *)tcp);
389 
390 			TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
391 			mp = &tcp->tcp_closemp;
392 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
393 			    tcp_timewait_close, connp, NULL,
394 			    SQ_FILL, SQTAG_TCP_TIMEWAIT);
395 		}
396 		mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
397 	}
398 
399 	if (tcp_time_wait->tcp_free_list != NULL)
400 		tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE;
401 
402 	/*
403 	 * If the time wait list is not empty and there is no timer running,
404 	 * restart it.
405 	 */
406 	if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL &&
407 	    tcp_time_wait->tcp_time_wait_tid == 0) {
408 		hrtime_t firetime;
409 
410 		firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now);
411 		/* This ensures that we won't wake up too often. */
412 		firetime = MAX(TCP_TIME_WAIT_DELAY, firetime);
413 		tcp_time_wait->tcp_time_wait_tid =
414 		    timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector,
415 		    sqp, firetime, CALLOUT_TCP_RESOLUTION,
416 		    CALLOUT_FLAG_ROUNDUP);
417 	}
418 	mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
419 }
420 
421 /*
422  * tcp_time_wait_processing() handles processing of incoming packets when
423  * the tcp_t is in the TIME_WAIT state.
424  *
425  * A TIME_WAIT tcp_t that has an associated open TCP end point (not in
426  * detached state) is never put on the time wait list.
427  */
428 void
429 tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
430     uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira)
431 {
432 	int32_t		bytes_acked;
433 	int32_t		gap;
434 	int32_t		rgap;
435 	tcp_opt_t	tcpopt;
436 	uint_t		flags;
437 	uint32_t	new_swnd = 0;
438 	conn_t		*nconnp;
439 	conn_t		*connp = tcp->tcp_connp;
440 	tcp_stack_t	*tcps = tcp->tcp_tcps;
441 
442 	BUMP_LOCAL(tcp->tcp_ibsegs);
443 	DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
444 
445 	flags = (unsigned int)tcpha->tha_flags & 0xFF;
446 	new_swnd = ntohs(tcpha->tha_win) <<
447 	    ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
448 	if (tcp->tcp_snd_ts_ok) {
449 		if (!tcp_paws_check(tcp, tcpha, &tcpopt)) {
450 			tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
451 			    tcp->tcp_rnxt, TH_ACK);
452 			goto done;
453 		}
454 	}
455 	gap = seg_seq - tcp->tcp_rnxt;
456 	rgap = tcp->tcp_rwnd - (gap + seg_len);
457 	if (gap < 0) {
458 		TCPS_BUMP_MIB(tcps, tcpInDataDupSegs);
459 		TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes,
460 		    (seg_len > -gap ? -gap : seg_len));
461 		seg_len += gap;
462 		if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) {
463 			if (flags & TH_RST) {
464 				goto done;
465 			}
466 			if ((flags & TH_FIN) && seg_len == -1) {
467 				/*
468 				 * When TCP receives a duplicate FIN in
469 				 * TIME_WAIT state, restart the 2 MSL timer.
470 				 * See page 73 in RFC 793. Make sure this TCP
471 				 * is already on the TIME_WAIT list. If not,
472 				 * just restart the timer.
473 				 */
474 				if (TCP_IS_DETACHED(tcp)) {
475 					if (tcp_time_wait_remove(tcp, NULL) ==
476 					    B_TRUE) {
477 						tcp_time_wait_append(tcp);
478 						TCP_DBGSTAT(tcps,
479 						    tcp_rput_time_wait);
480 					}
481 				} else {
482 					ASSERT(tcp != NULL);
483 					TCP_TIMER_RESTART(tcp,
484 					    tcps->tcps_time_wait_interval);
485 				}
486 				tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
487 				    tcp->tcp_rnxt, TH_ACK);
488 				goto done;
489 			}
490 			flags |=  TH_ACK_NEEDED;
491 			seg_len = 0;
492 			goto process_ack;
493 		}
494 
495 		/* Fix seg_seq, and chew the gap off the front. */
496 		seg_seq = tcp->tcp_rnxt;
497 	}
498 
499 	if ((flags & TH_SYN) && gap > 0 && rgap < 0) {
500 		/*
501 		 * Make sure that when we accept the connection, pick
502 		 * an ISS greater than (tcp_snxt + ISS_INCR/2) for the
503 		 * old connection.
504 		 *
505 		 * The next ISS generated is equal to tcp_iss_incr_extra
506 		 * + ISS_INCR/2 + other components depending on the
507 		 * value of tcp_strong_iss.  We pre-calculate the new
508 		 * ISS here and compare with tcp_snxt to determine if
509 		 * we need to make adjustment to tcp_iss_incr_extra.
510 		 *
511 		 * The above calculation is ugly and is a
512 		 * waste of CPU cycles...
513 		 */
514 		uint32_t new_iss = tcps->tcps_iss_incr_extra;
515 		int32_t adj;
516 		ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
517 
518 		switch (tcps->tcps_strong_iss) {
519 		case 2: {
520 			/* Add time and MD5 components. */
521 			uint32_t answer[4];
522 			struct {
523 				uint32_t ports;
524 				in6_addr_t src;
525 				in6_addr_t dst;
526 			} arg;
527 			MD5_CTX context;
528 
529 			mutex_enter(&tcps->tcps_iss_key_lock);
530 			context = tcps->tcps_iss_key;
531 			mutex_exit(&tcps->tcps_iss_key_lock);
532 			arg.ports = connp->conn_ports;
533 			/* We use MAPPED addresses in tcp_iss_init */
534 			arg.src = connp->conn_laddr_v6;
535 			arg.dst = connp->conn_faddr_v6;
536 			MD5Update(&context, (uchar_t *)&arg,
537 			    sizeof (arg));
538 			MD5Final((uchar_t *)answer, &context);
539 			answer[0] ^= answer[1] ^ answer[2] ^ answer[3];
540 			new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0];
541 			break;
542 		}
543 		case 1:
544 			/* Add time component and min random (i.e. 1). */
545 			new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1;
546 			break;
547 		default:
548 			/* Add only time component. */
549 			new_iss += (uint32_t)gethrestime_sec() * ISS_INCR;
550 			break;
551 		}
552 		if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) {
553 			/*
554 			 * New ISS not guaranteed to be ISS_INCR/2
555 			 * ahead of the current tcp_snxt, so add the
556 			 * difference to tcp_iss_incr_extra.
557 			 */
558 			tcps->tcps_iss_incr_extra += adj;
559 		}
560 		/*
561 		 * If tcp_clean_death() can not perform the task now,
562 		 * drop the SYN packet and let the other side re-xmit.
563 		 * Otherwise pass the SYN packet back in, since the
564 		 * old tcp state has been cleaned up or freed.
565 		 */
566 		if (tcp_clean_death(tcp, 0) == -1)
567 			goto done;
568 		nconnp = ipcl_classify(mp, ira, ipst);
569 		if (nconnp != NULL) {
570 			TCP_STAT(tcps, tcp_time_wait_syn_success);
571 			/* Drops ref on nconnp */
572 			tcp_reinput(nconnp, mp, ira, ipst);
573 			return;
574 		}
575 		goto done;
576 	}
577 
578 	/*
579 	 * rgap is the amount of stuff received out of window.  A negative
580 	 * value is the amount out of window.
581 	 */
582 	if (rgap < 0) {
583 		TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs);
584 		TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap);
585 		/* Fix seg_len and make sure there is something left. */
586 		seg_len += rgap;
587 		if (seg_len <= 0) {
588 			if (flags & TH_RST) {
589 				goto done;
590 			}
591 			flags |=  TH_ACK_NEEDED;
592 			seg_len = 0;
593 			goto process_ack;
594 		}
595 	}
596 	/*
597 	 * Check whether we can update tcp_ts_recent.  This test is
598 	 * NOT the one in RFC 1323 3.4.  It is from Braden, 1993, "TCP
599 	 * Extensions for High Performance: An Update", Internet Draft.
600 	 */
601 	if (tcp->tcp_snd_ts_ok &&
602 	    TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) &&
603 	    SEQ_LEQ(seg_seq, tcp->tcp_rack)) {
604 		tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val;
605 		tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64();
606 	}
607 
608 	if (seg_seq != tcp->tcp_rnxt && seg_len > 0) {
609 		/* Always ack out of order packets */
610 		flags |= TH_ACK_NEEDED;
611 		seg_len = 0;
612 	} else if (seg_len > 0) {
613 		TCPS_BUMP_MIB(tcps, tcpInClosed);
614 		TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
615 		TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
616 	}
617 	if (flags & TH_RST) {
618 		(void) tcp_clean_death(tcp, 0);
619 		goto done;
620 	}
621 	if (flags & TH_SYN) {
622 		tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1,
623 		    TH_RST|TH_ACK);
624 		/*
625 		 * Do not delete the TCP structure if it is in
626 		 * TIME_WAIT state.  Refer to RFC 1122, 4.2.2.13.
627 		 */
628 		goto done;
629 	}
630 process_ack:
631 	if (flags & TH_ACK) {
632 		bytes_acked = (int)(seg_ack - tcp->tcp_suna);
633 		if (bytes_acked <= 0) {
634 			if (bytes_acked == 0 && seg_len == 0 &&
635 			    new_swnd == tcp->tcp_swnd)
636 				TCPS_BUMP_MIB(tcps, tcpInDupAck);
637 		} else {
638 			/* Acks something not sent */
639 			flags |= TH_ACK_NEEDED;
640 		}
641 	}
642 	if (flags & TH_ACK_NEEDED) {
643 		/*
644 		 * Time to send an ack for some reason.
645 		 */
646 		tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
647 		    tcp->tcp_rnxt, TH_ACK);
648 	}
649 done:
650 	freemsg(mp);
651 }
652