xref: /illumos-gate/usr/src/uts/common/inet/tcp/tcp_time_wait.c (revision 8fd04b8338ed5093ec2d1e668fa620b7de44c177)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * This file contains functions related to TCP time wait processing.  Also
29  * refer to the time wait handling comments in tcp_impl.h.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/strsun.h>
34 #include <sys/squeue_impl.h>
35 #include <sys/squeue.h>
36 #include <sys/callo.h>
37 
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/tcp.h>
41 #include <inet/tcp_impl.h>
42 #include <inet/tcp_cluster.h>
43 
44 static void	tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *);
45 
46 /*
47  * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs.
48  * Running it every 5 seconds seems to give the best results.
49  */
50 #define	TCP_TIME_WAIT_DELAY ((hrtime_t)5 * NANOSEC)
51 
52 /*
53  * Remove a connection from the list of detached TIME_WAIT connections.
54  * It returns B_FALSE if it can't remove the connection from the list
55  * as the connection has already been removed from the list due to an
56  * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
57  */
58 boolean_t
59 tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait)
60 {
61 	boolean_t	locked = B_FALSE;
62 
63 	if (tcp_time_wait == NULL) {
64 		tcp_time_wait = *((tcp_squeue_priv_t **)
65 		    squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP));
66 		mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
67 		locked = B_TRUE;
68 	} else {
69 		ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock));
70 	}
71 
72 	/* 0 means that the tcp_t has not been added to the time wait list. */
73 	if (tcp->tcp_time_wait_expire == 0) {
74 		ASSERT(tcp->tcp_time_wait_next == NULL);
75 		ASSERT(tcp->tcp_time_wait_prev == NULL);
76 		if (locked)
77 			mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
78 		return (B_FALSE);
79 	}
80 	ASSERT(TCP_IS_DETACHED(tcp));
81 	ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
82 
83 	if (tcp == tcp_time_wait->tcp_time_wait_head) {
84 		ASSERT(tcp->tcp_time_wait_prev == NULL);
85 		tcp_time_wait->tcp_time_wait_head = tcp->tcp_time_wait_next;
86 		if (tcp_time_wait->tcp_time_wait_head != NULL) {
87 			tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev =
88 			    NULL;
89 		} else {
90 			tcp_time_wait->tcp_time_wait_tail = NULL;
91 		}
92 	} else if (tcp == tcp_time_wait->tcp_time_wait_tail) {
93 		ASSERT(tcp->tcp_time_wait_next == NULL);
94 		tcp_time_wait->tcp_time_wait_tail = tcp->tcp_time_wait_prev;
95 		ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
96 		tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = NULL;
97 	} else {
98 		ASSERT(tcp->tcp_time_wait_prev->tcp_time_wait_next == tcp);
99 		ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == tcp);
100 		tcp->tcp_time_wait_prev->tcp_time_wait_next =
101 		    tcp->tcp_time_wait_next;
102 		tcp->tcp_time_wait_next->tcp_time_wait_prev =
103 		    tcp->tcp_time_wait_prev;
104 	}
105 	tcp->tcp_time_wait_next = NULL;
106 	tcp->tcp_time_wait_prev = NULL;
107 	tcp->tcp_time_wait_expire = 0;
108 
109 	if (locked)
110 		mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
111 	return (B_TRUE);
112 }
113 
114 /*
115  * Add a connection to the list of detached TIME_WAIT connections
116  * and set its time to expire.
117  */
118 void
119 tcp_time_wait_append(tcp_t *tcp)
120 {
121 	tcp_stack_t	*tcps = tcp->tcp_tcps;
122 	tcp_squeue_priv_t *tcp_time_wait =
123 	    *((tcp_squeue_priv_t **)squeue_getprivate(tcp->tcp_connp->conn_sqp,
124 	    SQPRIVATE_TCP));
125 
126 	tcp_timers_stop(tcp);
127 
128 	/* Freed above */
129 	ASSERT(tcp->tcp_timer_tid == 0);
130 	ASSERT(tcp->tcp_ack_tid == 0);
131 
132 	/* must have happened at the time of detaching the tcp */
133 	ASSERT(tcp->tcp_ptpahn == NULL);
134 	ASSERT(tcp->tcp_flow_stopped == 0);
135 	ASSERT(tcp->tcp_time_wait_next == NULL);
136 	ASSERT(tcp->tcp_time_wait_prev == NULL);
137 	ASSERT(tcp->tcp_time_wait_expire == NULL);
138 	ASSERT(tcp->tcp_listener == NULL);
139 
140 	tcp->tcp_time_wait_expire = ddi_get_lbolt();
141 	/*
142 	 * The value computed below in tcp->tcp_time_wait_expire may
143 	 * appear negative or wrap around. That is ok since our
144 	 * interest is only in the difference between the current lbolt
145 	 * value and tcp->tcp_time_wait_expire. But the value should not
146 	 * be zero, since it means the tcp is not in the TIME_WAIT list.
147 	 * The corresponding comparison in tcp_time_wait_collector() uses
148 	 * modular arithmetic.
149 	 */
150 	tcp->tcp_time_wait_expire += MSEC_TO_TICK(
151 	    tcps->tcps_time_wait_interval);
152 	if (tcp->tcp_time_wait_expire == 0)
153 		tcp->tcp_time_wait_expire = 1;
154 
155 	ASSERT(TCP_IS_DETACHED(tcp));
156 	ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
157 	ASSERT(tcp->tcp_time_wait_next == NULL);
158 	ASSERT(tcp->tcp_time_wait_prev == NULL);
159 	TCP_DBGSTAT(tcps, tcp_time_wait);
160 
161 	mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
162 	if (tcp_time_wait->tcp_time_wait_head == NULL) {
163 		ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL);
164 		tcp_time_wait->tcp_time_wait_head = tcp;
165 	} else {
166 		ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
167 		ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state ==
168 		    TCPS_TIME_WAIT);
169 		tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp;
170 		tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail;
171 	}
172 	tcp_time_wait->tcp_time_wait_tail = tcp;
173 	mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
174 }
175 
176 /*
177  * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT
178  * tcp_t.  Used in tcp_time_wait_collector().
179  */
180 /* ARGSUSED */
181 static void
182 tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
183 {
184 	conn_t	*connp = (conn_t *)arg;
185 	tcp_t	*tcp = connp->conn_tcp;
186 
187 	ASSERT(tcp != NULL);
188 	if (tcp->tcp_state == TCPS_CLOSED) {
189 		return;
190 	}
191 
192 	ASSERT((connp->conn_family == AF_INET &&
193 	    connp->conn_ipversion == IPV4_VERSION) ||
194 	    (connp->conn_family == AF_INET6 &&
195 	    (connp->conn_ipversion == IPV4_VERSION ||
196 	    connp->conn_ipversion == IPV6_VERSION)));
197 	ASSERT(!tcp->tcp_listener);
198 
199 	ASSERT(TCP_IS_DETACHED(tcp));
200 
201 	/*
202 	 * Because they have no upstream client to rebind or tcp_close()
203 	 * them later, we axe the connection here and now.
204 	 */
205 	tcp_close_detached(tcp);
206 }
207 
208 /*
209  * Blows away all tcps whose TIME_WAIT has expired. List traversal
210  * is done forwards from the head.
211  * This walks all stack instances since
212  * tcp_time_wait remains global across all stacks.
213  */
214 /* ARGSUSED */
215 void
216 tcp_time_wait_collector(void *arg)
217 {
218 	tcp_t *tcp;
219 	clock_t now;
220 	mblk_t *mp;
221 	conn_t *connp;
222 	kmutex_t *lock;
223 	boolean_t removed;
224 	extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t,
225 	    uint8_t *, in_port_t, uint8_t *, in_port_t, void *);
226 
227 	squeue_t *sqp = (squeue_t *)arg;
228 	tcp_squeue_priv_t *tcp_time_wait =
229 	    *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
230 
231 	mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
232 	tcp_time_wait->tcp_time_wait_tid = 0;
233 
234 	if (tcp_time_wait->tcp_free_list != NULL &&
235 	    tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) {
236 		TCP_G_STAT(tcp_freelist_cleanup);
237 		while ((tcp = tcp_time_wait->tcp_free_list) != NULL) {
238 			tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
239 			tcp->tcp_time_wait_next = NULL;
240 			tcp_time_wait->tcp_free_list_cnt--;
241 			ASSERT(tcp->tcp_tcps == NULL);
242 			CONN_DEC_REF(tcp->tcp_connp);
243 		}
244 		ASSERT(tcp_time_wait->tcp_free_list_cnt == 0);
245 	}
246 
247 	/*
248 	 * In order to reap time waits reliably, we should use a
249 	 * source of time that is not adjustable by the user -- hence
250 	 * the call to ddi_get_lbolt().
251 	 */
252 	now = ddi_get_lbolt();
253 	while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) {
254 		/*
255 		 * Compare times using modular arithmetic, since
256 		 * lbolt can wrapover.
257 		 */
258 		if ((now - tcp->tcp_time_wait_expire) < 0) {
259 			break;
260 		}
261 
262 		removed = tcp_time_wait_remove(tcp, tcp_time_wait);
263 		ASSERT(removed);
264 
265 		connp = tcp->tcp_connp;
266 		ASSERT(connp->conn_fanout != NULL);
267 		lock = &connp->conn_fanout->connf_lock;
268 		/*
269 		 * This is essentially a TW reclaim fast path optimization for
270 		 * performance where the timewait collector checks under the
271 		 * fanout lock (so that no one else can get access to the
272 		 * conn_t) that the refcnt is 2 i.e. one for TCP and one for
273 		 * the classifier hash list. If ref count is indeed 2, we can
274 		 * just remove the conn under the fanout lock and avoid
275 		 * cleaning up the conn under the squeue, provided that
276 		 * clustering callbacks are not enabled. If clustering is
277 		 * enabled, we need to make the clustering callback before
278 		 * setting the CONDEMNED flag and after dropping all locks and
279 		 * so we forego this optimization and fall back to the slow
280 		 * path. Also please see the comments in tcp_closei_local
281 		 * regarding the refcnt logic.
282 		 *
283 		 * Since we are holding the tcp_time_wait_lock, its better
284 		 * not to block on the fanout_lock because other connections
285 		 * can't add themselves to time_wait list. So we do a
286 		 * tryenter instead of mutex_enter.
287 		 */
288 		if (mutex_tryenter(lock)) {
289 			mutex_enter(&connp->conn_lock);
290 			if ((connp->conn_ref == 2) &&
291 			    (cl_inet_disconnect == NULL)) {
292 				ipcl_hash_remove_locked(connp,
293 				    connp->conn_fanout);
294 				/*
295 				 * Set the CONDEMNED flag now itself so that
296 				 * the refcnt cannot increase due to any
297 				 * walker.
298 				 */
299 				connp->conn_state_flags |= CONN_CONDEMNED;
300 				mutex_exit(lock);
301 				mutex_exit(&connp->conn_lock);
302 				if (tcp_time_wait->tcp_free_list_cnt <
303 				    tcp_free_list_max_cnt) {
304 					/* Add to head of tcp_free_list */
305 					mutex_exit(
306 					    &tcp_time_wait->tcp_time_wait_lock);
307 					tcp_cleanup(tcp);
308 					ASSERT(connp->conn_latch == NULL);
309 					ASSERT(connp->conn_policy == NULL);
310 					ASSERT(tcp->tcp_tcps == NULL);
311 					ASSERT(connp->conn_netstack == NULL);
312 
313 					mutex_enter(
314 					    &tcp_time_wait->tcp_time_wait_lock);
315 					tcp->tcp_time_wait_next =
316 					    tcp_time_wait->tcp_free_list;
317 					tcp_time_wait->tcp_free_list = tcp;
318 					tcp_time_wait->tcp_free_list_cnt++;
319 					continue;
320 				} else {
321 					/* Do not add to tcp_free_list */
322 					mutex_exit(
323 					    &tcp_time_wait->tcp_time_wait_lock);
324 					tcp_bind_hash_remove(tcp);
325 					ixa_cleanup(tcp->tcp_connp->conn_ixa);
326 					tcp_ipsec_cleanup(tcp);
327 					CONN_DEC_REF(tcp->tcp_connp);
328 				}
329 			} else {
330 				CONN_INC_REF_LOCKED(connp);
331 				mutex_exit(lock);
332 				mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
333 				mutex_exit(&connp->conn_lock);
334 				/*
335 				 * We can reuse the closemp here since conn has
336 				 * detached (otherwise we wouldn't even be in
337 				 * time_wait list). tcp_closemp_used can safely
338 				 * be changed without taking a lock as no other
339 				 * thread can concurrently access it at this
340 				 * point in the connection lifecycle.
341 				 */
342 
343 				if (tcp->tcp_closemp.b_prev == NULL)
344 					tcp->tcp_closemp_used = B_TRUE;
345 				else
346 					cmn_err(CE_PANIC,
347 					    "tcp_timewait_collector: "
348 					    "concurrent use of tcp_closemp: "
349 					    "connp %p tcp %p\n", (void *)connp,
350 					    (void *)tcp);
351 
352 				TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
353 				mp = &tcp->tcp_closemp;
354 				SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
355 				    tcp_timewait_close, connp, NULL,
356 				    SQ_FILL, SQTAG_TCP_TIMEWAIT);
357 			}
358 		} else {
359 			mutex_enter(&connp->conn_lock);
360 			CONN_INC_REF_LOCKED(connp);
361 			mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
362 			mutex_exit(&connp->conn_lock);
363 			/*
364 			 * We can reuse the closemp here since conn has
365 			 * detached (otherwise we wouldn't even be in
366 			 * time_wait list). tcp_closemp_used can safely
367 			 * be changed without taking a lock as no other
368 			 * thread can concurrently access it at this
369 			 * point in the connection lifecycle.
370 			 */
371 
372 			if (tcp->tcp_closemp.b_prev == NULL)
373 				tcp->tcp_closemp_used = B_TRUE;
374 			else
375 				cmn_err(CE_PANIC, "tcp_timewait_collector: "
376 				    "concurrent use of tcp_closemp: "
377 				    "connp %p tcp %p\n", (void *)connp,
378 				    (void *)tcp);
379 
380 			TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
381 			mp = &tcp->tcp_closemp;
382 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
383 			    tcp_timewait_close, connp, NULL,
384 			    SQ_FILL, SQTAG_TCP_TIMEWAIT);
385 		}
386 		mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
387 	}
388 
389 	if (tcp_time_wait->tcp_free_list != NULL)
390 		tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE;
391 
392 	tcp_time_wait->tcp_time_wait_tid =
393 	    timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector, sqp,
394 	    TCP_TIME_WAIT_DELAY, CALLOUT_TCP_RESOLUTION,
395 	    CALLOUT_FLAG_ROUNDUP);
396 	mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
397 }
398 
399 /*
400  * tcp_time_wait_processing() handles processing of incoming packets when
401  * the tcp_t is in the TIME_WAIT state.
402  *
403  * A TIME_WAIT tcp_t that has an associated open TCP end point (not in
404  * detached state) is never put on the time wait list.
405  */
406 void
407 tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq,
408     uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira)
409 {
410 	int32_t		bytes_acked;
411 	int32_t		gap;
412 	int32_t		rgap;
413 	tcp_opt_t	tcpopt;
414 	uint_t		flags;
415 	uint32_t	new_swnd = 0;
416 	conn_t		*nconnp;
417 	conn_t		*connp = tcp->tcp_connp;
418 	tcp_stack_t	*tcps = tcp->tcp_tcps;
419 
420 	BUMP_LOCAL(tcp->tcp_ibsegs);
421 	DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp);
422 
423 	flags = (unsigned int)tcpha->tha_flags & 0xFF;
424 	new_swnd = ntohs(tcpha->tha_win) <<
425 	    ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws);
426 	if (tcp->tcp_snd_ts_ok) {
427 		if (!tcp_paws_check(tcp, tcpha, &tcpopt)) {
428 			tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
429 			    tcp->tcp_rnxt, TH_ACK);
430 			goto done;
431 		}
432 	}
433 	gap = seg_seq - tcp->tcp_rnxt;
434 	rgap = tcp->tcp_rwnd - (gap + seg_len);
435 	if (gap < 0) {
436 		TCPS_BUMP_MIB(tcps, tcpInDataDupSegs);
437 		TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes,
438 		    (seg_len > -gap ? -gap : seg_len));
439 		seg_len += gap;
440 		if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) {
441 			if (flags & TH_RST) {
442 				goto done;
443 			}
444 			if ((flags & TH_FIN) && seg_len == -1) {
445 				/*
446 				 * When TCP receives a duplicate FIN in
447 				 * TIME_WAIT state, restart the 2 MSL timer.
448 				 * See page 73 in RFC 793. Make sure this TCP
449 				 * is already on the TIME_WAIT list. If not,
450 				 * just restart the timer.
451 				 */
452 				if (TCP_IS_DETACHED(tcp)) {
453 					if (tcp_time_wait_remove(tcp, NULL) ==
454 					    B_TRUE) {
455 						tcp_time_wait_append(tcp);
456 						TCP_DBGSTAT(tcps,
457 						    tcp_rput_time_wait);
458 					}
459 				} else {
460 					ASSERT(tcp != NULL);
461 					TCP_TIMER_RESTART(tcp,
462 					    tcps->tcps_time_wait_interval);
463 				}
464 				tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
465 				    tcp->tcp_rnxt, TH_ACK);
466 				goto done;
467 			}
468 			flags |=  TH_ACK_NEEDED;
469 			seg_len = 0;
470 			goto process_ack;
471 		}
472 
473 		/* Fix seg_seq, and chew the gap off the front. */
474 		seg_seq = tcp->tcp_rnxt;
475 	}
476 
477 	if ((flags & TH_SYN) && gap > 0 && rgap < 0) {
478 		/*
479 		 * Make sure that when we accept the connection, pick
480 		 * an ISS greater than (tcp_snxt + ISS_INCR/2) for the
481 		 * old connection.
482 		 *
483 		 * The next ISS generated is equal to tcp_iss_incr_extra
484 		 * + ISS_INCR/2 + other components depending on the
485 		 * value of tcp_strong_iss.  We pre-calculate the new
486 		 * ISS here and compare with tcp_snxt to determine if
487 		 * we need to make adjustment to tcp_iss_incr_extra.
488 		 *
489 		 * The above calculation is ugly and is a
490 		 * waste of CPU cycles...
491 		 */
492 		uint32_t new_iss = tcps->tcps_iss_incr_extra;
493 		int32_t adj;
494 		ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
495 
496 		switch (tcps->tcps_strong_iss) {
497 		case 2: {
498 			/* Add time and MD5 components. */
499 			uint32_t answer[4];
500 			struct {
501 				uint32_t ports;
502 				in6_addr_t src;
503 				in6_addr_t dst;
504 			} arg;
505 			MD5_CTX context;
506 
507 			mutex_enter(&tcps->tcps_iss_key_lock);
508 			context = tcps->tcps_iss_key;
509 			mutex_exit(&tcps->tcps_iss_key_lock);
510 			arg.ports = connp->conn_ports;
511 			/* We use MAPPED addresses in tcp_iss_init */
512 			arg.src = connp->conn_laddr_v6;
513 			arg.dst = connp->conn_faddr_v6;
514 			MD5Update(&context, (uchar_t *)&arg,
515 			    sizeof (arg));
516 			MD5Final((uchar_t *)answer, &context);
517 			answer[0] ^= answer[1] ^ answer[2] ^ answer[3];
518 			new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0];
519 			break;
520 		}
521 		case 1:
522 			/* Add time component and min random (i.e. 1). */
523 			new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1;
524 			break;
525 		default:
526 			/* Add only time component. */
527 			new_iss += (uint32_t)gethrestime_sec() * ISS_INCR;
528 			break;
529 		}
530 		if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) {
531 			/*
532 			 * New ISS not guaranteed to be ISS_INCR/2
533 			 * ahead of the current tcp_snxt, so add the
534 			 * difference to tcp_iss_incr_extra.
535 			 */
536 			tcps->tcps_iss_incr_extra += adj;
537 		}
538 		/*
539 		 * If tcp_clean_death() can not perform the task now,
540 		 * drop the SYN packet and let the other side re-xmit.
541 		 * Otherwise pass the SYN packet back in, since the
542 		 * old tcp state has been cleaned up or freed.
543 		 */
544 		if (tcp_clean_death(tcp, 0) == -1)
545 			goto done;
546 		nconnp = ipcl_classify(mp, ira, ipst);
547 		if (nconnp != NULL) {
548 			TCP_STAT(tcps, tcp_time_wait_syn_success);
549 			/* Drops ref on nconnp */
550 			tcp_reinput(nconnp, mp, ira, ipst);
551 			return;
552 		}
553 		goto done;
554 	}
555 
556 	/*
557 	 * rgap is the amount of stuff received out of window.  A negative
558 	 * value is the amount out of window.
559 	 */
560 	if (rgap < 0) {
561 		TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs);
562 		TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap);
563 		/* Fix seg_len and make sure there is something left. */
564 		seg_len += rgap;
565 		if (seg_len <= 0) {
566 			if (flags & TH_RST) {
567 				goto done;
568 			}
569 			flags |=  TH_ACK_NEEDED;
570 			seg_len = 0;
571 			goto process_ack;
572 		}
573 	}
574 	/*
575 	 * Check whether we can update tcp_ts_recent.  This test is
576 	 * NOT the one in RFC 1323 3.4.  It is from Braden, 1993, "TCP
577 	 * Extensions for High Performance: An Update", Internet Draft.
578 	 */
579 	if (tcp->tcp_snd_ts_ok &&
580 	    TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) &&
581 	    SEQ_LEQ(seg_seq, tcp->tcp_rack)) {
582 		tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val;
583 		tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64();
584 	}
585 
586 	if (seg_seq != tcp->tcp_rnxt && seg_len > 0) {
587 		/* Always ack out of order packets */
588 		flags |= TH_ACK_NEEDED;
589 		seg_len = 0;
590 	} else if (seg_len > 0) {
591 		TCPS_BUMP_MIB(tcps, tcpInClosed);
592 		TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs);
593 		TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len);
594 	}
595 	if (flags & TH_RST) {
596 		(void) tcp_clean_death(tcp, 0);
597 		goto done;
598 	}
599 	if (flags & TH_SYN) {
600 		tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1,
601 		    TH_RST|TH_ACK);
602 		/*
603 		 * Do not delete the TCP structure if it is in
604 		 * TIME_WAIT state.  Refer to RFC 1122, 4.2.2.13.
605 		 */
606 		goto done;
607 	}
608 process_ack:
609 	if (flags & TH_ACK) {
610 		bytes_acked = (int)(seg_ack - tcp->tcp_suna);
611 		if (bytes_acked <= 0) {
612 			if (bytes_acked == 0 && seg_len == 0 &&
613 			    new_swnd == tcp->tcp_swnd)
614 				TCPS_BUMP_MIB(tcps, tcpInDupAck);
615 		} else {
616 			/* Acks something not sent */
617 			flags |= TH_ACK_NEEDED;
618 		}
619 	}
620 	if (flags & TH_ACK_NEEDED) {
621 		/*
622 		 * Time to send an ack for some reason.
623 		 */
624 		tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt,
625 		    tcp->tcp_rnxt, TH_ACK);
626 	}
627 done:
628 	freemsg(mp);
629 }
630