/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2016 Joyent, Inc. */ /* * This file contains functions related to TCP time wait processing. Also * refer to the time wait handling comments in tcp_impl.h. */ #include #include #include #include #include #include #include #include #include #include static void tcp_time_wait_purge(tcp_t *, tcp_squeue_priv_t *); #define TW_BUCKET(t) \ (((t) / MSEC_TO_TICK(TCP_TIME_WAIT_DELAY)) % TCP_TIME_WAIT_BUCKETS) #define TW_BUCKET_NEXT(b) (((b) + 1) % TCP_TIME_WAIT_BUCKETS) /* * Remove a connection from the list of detached TIME_WAIT connections. * It returns B_FALSE if it can't remove the connection from the list * as the connection has already been removed from the list due to an * earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE. */ boolean_t tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tsp) { boolean_t locked = B_FALSE; if (tsp == NULL) { tsp = *((tcp_squeue_priv_t **) squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP)); mutex_enter(&tsp->tcp_time_wait_lock); locked = B_TRUE; } else { ASSERT(MUTEX_HELD(&tsp->tcp_time_wait_lock)); } /* 0 means that the tcp_t has not been added to the time wait list. */ if (tcp->tcp_time_wait_expire == 0) { ASSERT(tcp->tcp_time_wait_next == NULL); ASSERT(tcp->tcp_time_wait_prev == NULL); if (locked) mutex_exit(&tsp->tcp_time_wait_lock); return (B_FALSE); } ASSERT(TCP_IS_DETACHED(tcp)); ASSERT(tcp->tcp_state == TCPS_TIME_WAIT); ASSERT(tsp->tcp_time_wait_cnt > 0); if (tcp->tcp_time_wait_next != NULL) { tcp->tcp_time_wait_next->tcp_time_wait_prev = tcp->tcp_time_wait_prev; } if (tcp->tcp_time_wait_prev != NULL) { tcp->tcp_time_wait_prev->tcp_time_wait_next = tcp->tcp_time_wait_next; } else { unsigned int bucket; bucket = TW_BUCKET(tcp->tcp_time_wait_expire); ASSERT(tsp->tcp_time_wait_bucket[bucket] == tcp); tsp->tcp_time_wait_bucket[bucket] = tcp->tcp_time_wait_next; } tcp->tcp_time_wait_next = NULL; tcp->tcp_time_wait_prev = NULL; tcp->tcp_time_wait_expire = 0; tsp->tcp_time_wait_cnt--; if (locked) mutex_exit(&tsp->tcp_time_wait_lock); return (B_TRUE); } /* Constants used for fast checking of a localhost address */ #if defined(_BIG_ENDIAN) #define IPv4_LOCALHOST 0x7f000000U #define IPv4_LH_MASK 0xffffff00U #else #define IPv4_LOCALHOST 0x0000007fU #define IPv4_LH_MASK 0x00ffffffU #endif #define IS_LOCAL_HOST(x) ( \ ((x)->tcp_connp->conn_ipversion == IPV4_VERSION && \ ((x)->tcp_connp->conn_laddr_v4 & IPv4_LH_MASK) == IPv4_LOCALHOST) || \ ((x)->tcp_connp->conn_ipversion == IPV6_VERSION && \ IN6_IS_ADDR_LOOPBACK(&(x)->tcp_connp->conn_laddr_v6))) /* * Add a connection to the list of detached TIME_WAIT connections * and set its time to expire. */ void tcp_time_wait_append(tcp_t *tcp) { tcp_stack_t *tcps = tcp->tcp_tcps; squeue_t *sqp = tcp->tcp_connp->conn_sqp; tcp_squeue_priv_t *tsp = *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); int64_t now, schedule; unsigned int bucket; tcp_timers_stop(tcp); /* Freed above */ ASSERT(tcp->tcp_timer_tid == 0); ASSERT(tcp->tcp_ack_tid == 0); /* must have happened at the time of detaching the tcp */ ASSERT(TCP_IS_DETACHED(tcp)); ASSERT(tcp->tcp_state == TCPS_TIME_WAIT); ASSERT(tcp->tcp_ptpahn == NULL); ASSERT(tcp->tcp_flow_stopped == 0); ASSERT(tcp->tcp_time_wait_next == NULL); ASSERT(tcp->tcp_time_wait_prev == NULL); ASSERT(tcp->tcp_time_wait_expire == 0); ASSERT(tcp->tcp_listener == NULL); TCP_DBGSTAT(tcps, tcp_time_wait); mutex_enter(&tsp->tcp_time_wait_lock); /* * Immediately expire loopback connections. Since there is no worry * about packets on the local host showing up after a long network * delay, this is safe and allows much higher rates of connection churn * for applications operating locally. * * This typically bypasses the tcp_free_list fast path due to squeue * re-entry for the loopback close operation. */ if (tcp->tcp_loopback) { tcp_time_wait_purge(tcp, tsp); mutex_exit(&tsp->tcp_time_wait_lock); return; } /* * In order to reap TIME_WAITs reliably, we should use a source of time * that is not adjustable by the user. While it would be more accurate * to grab this timestamp before (potentially) sleeping on the * tcp_time_wait_lock, doing so complicates bucket addressing later. */ now = ddi_get_lbolt64(); /* * Each squeue uses an arbitrary time offset when scheduling * expiration timers. This prevents the bucketing from forcing * tcp_time_wait_collector to run in locksetup across squeues. * * This offset is (re)initialized when a new TIME_WAIT connection is * added to an squeue which has no connections waiting to expire. */ if (tsp->tcp_time_wait_tid == 0) { ASSERT(tsp->tcp_time_wait_cnt == 0); tsp->tcp_time_wait_offset = now % MSEC_TO_TICK(TCP_TIME_WAIT_DELAY); } now -= tsp->tcp_time_wait_offset; /* * Use the netstack-defined timeout, rounded up to the minimum * time_wait_collector interval. */ schedule = now + MSEC_TO_TICK(tcps->tcps_time_wait_interval); tcp->tcp_time_wait_expire = schedule; /* * Append the connection into the appropriate bucket. */ bucket = TW_BUCKET(tcp->tcp_time_wait_expire); tcp->tcp_time_wait_next = tsp->tcp_time_wait_bucket[bucket]; tsp->tcp_time_wait_bucket[bucket] = tcp; if (tcp->tcp_time_wait_next != NULL) { ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == NULL); tcp->tcp_time_wait_next->tcp_time_wait_prev = tcp; } tsp->tcp_time_wait_cnt++; /* * Round delay up to the nearest bucket boundary. */ schedule += MSEC_TO_TICK(TCP_TIME_WAIT_DELAY); schedule -= schedule % MSEC_TO_TICK(TCP_TIME_WAIT_DELAY); /* * The newly inserted entry may require a tighter schedule for the * expiration timer. */ if (schedule < tsp->tcp_time_wait_schedule) { callout_id_t old_tid = tsp->tcp_time_wait_tid; tsp->tcp_time_wait_schedule = schedule; tsp->tcp_time_wait_tid = timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector, sqp, TICK_TO_NSEC(schedule - now), CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP); /* * It is possible for the timer to fire before the untimeout * action is able to complete. In that case, the exclusion * offered by the tcp_time_wait_collector_active flag will * prevent multiple collector threads from processing records * simultaneously from the same squeue. */ mutex_exit(&tsp->tcp_time_wait_lock); (void) untimeout_default(old_tid, 0); return; } /* * Start a fresh timer if none exists. */ if (tsp->tcp_time_wait_schedule == 0) { ASSERT(tsp->tcp_time_wait_tid == 0); tsp->tcp_time_wait_schedule = schedule; tsp->tcp_time_wait_tid = timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector, sqp, TICK_TO_NSEC(schedule - now), CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP); } mutex_exit(&tsp->tcp_time_wait_lock); } /* * Wrapper to call tcp_close_detached() via squeue to clean up TIME-WAIT * tcp_t. Used in tcp_time_wait_collector(). */ /* ARGSUSED */ static void tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy) { conn_t *connp = (conn_t *)arg; tcp_t *tcp = connp->conn_tcp; ASSERT(tcp != NULL); if (tcp->tcp_state == TCPS_CLOSED) { return; } ASSERT((connp->conn_family == AF_INET && connp->conn_ipversion == IPV4_VERSION) || (connp->conn_family == AF_INET6 && (connp->conn_ipversion == IPV4_VERSION || connp->conn_ipversion == IPV6_VERSION))); ASSERT(!tcp->tcp_listener); ASSERT(TCP_IS_DETACHED(tcp)); /* * Because they have no upstream client to rebind or tcp_close() * them later, we axe the connection here and now. */ tcp_close_detached(tcp); } static void tcp_time_wait_purge(tcp_t *tcp, tcp_squeue_priv_t *tsp) { mblk_t *mp; conn_t *connp = tcp->tcp_connp; kmutex_t *lock; ASSERT(MUTEX_HELD(&tsp->tcp_time_wait_lock)); ASSERT(connp->conn_fanout != NULL); lock = &connp->conn_fanout->connf_lock; /* * This is essentially a TIME_WAIT reclaim fast path optimization for * performance where the connection is checked under the fanout lock * (so that no one else can get access to the conn_t) that the refcnt * is 2 (one each for TCP and the classifier hash list). That is the * case and clustering callbacks are not enabled, the conn can be * removed under the fanout lock and avoid clean-up under the squeue. * * This optimization is forgone when clustering is enabled since the * clustering callback must be made before setting the CONDEMNED flag * and after dropping all locks * * See the comments in tcp_closei_local for additional information * regarding the refcnt logic. */ if (mutex_tryenter(lock)) { mutex_enter(&connp->conn_lock); if (connp->conn_ref == 2 && cl_inet_disconnect == NULL) { ipcl_hash_remove_locked(connp, connp->conn_fanout); /* * Set the CONDEMNED flag now itself so that the refcnt * cannot increase due to any walker. */ connp->conn_state_flags |= CONN_CONDEMNED; mutex_exit(&connp->conn_lock); mutex_exit(lock); if (tsp->tcp_free_list_cnt < tcp_free_list_max_cnt) { /* * Add to head of tcp_free_list */ tcp_cleanup(tcp); ASSERT(connp->conn_latch == NULL); ASSERT(connp->conn_policy == NULL); ASSERT(tcp->tcp_tcps == NULL); ASSERT(connp->conn_netstack == NULL); tcp->tcp_time_wait_next = tsp->tcp_free_list; tcp->tcp_in_free_list = B_TRUE; tsp->tcp_free_list = tcp; tsp->tcp_free_list_cnt++; } else { /* * Do not add to tcp_free_list */ tcp_bind_hash_remove(tcp); ixa_cleanup(tcp->tcp_connp->conn_ixa); tcp_ipsec_cleanup(tcp); CONN_DEC_REF(tcp->tcp_connp); } /* * With the fast-path complete, we can bail. */ return; } else { /* * Fall back to slow path. */ CONN_INC_REF_LOCKED(connp); mutex_exit(&connp->conn_lock); mutex_exit(lock); } } else { CONN_INC_REF(connp); } /* * We can reuse the closemp here since conn has detached (otherwise we * wouldn't even be in time_wait list). It is safe to change * tcp_closemp_used without taking a lock as no other thread can * concurrently access it at this point in the connection lifecycle. */ if (tcp->tcp_closemp.b_prev == NULL) { tcp->tcp_closemp_used = B_TRUE; } else { cmn_err(CE_PANIC, "tcp_timewait_collector: concurrent use of tcp_closemp: " "connp %p tcp %p\n", (void *)connp, (void *)tcp); } TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15); mp = &tcp->tcp_closemp; mutex_exit(&tsp->tcp_time_wait_lock); SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timewait_close, connp, NULL, SQ_FILL, SQTAG_TCP_TIMEWAIT); mutex_enter(&tsp->tcp_time_wait_lock); } /* * Purge any tcp_t instances associated with this squeue which have expired * from the TIME_WAIT state. */ void tcp_time_wait_collector(void *arg) { tcp_t *tcp; int64_t now, active_schedule, new_schedule; unsigned int idx; squeue_t *sqp = (squeue_t *)arg; tcp_squeue_priv_t *tsp = *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); mutex_enter(&tsp->tcp_time_wait_lock); /* * Because of timer scheduling complexity and the fact that the * tcp_time_wait_lock is dropped during tcp_time_wait_purge, it is * possible for multiple tcp_time_wait_collector threads to run against * the same squeue. This flag is used to exclude other collectors from * the squeue during execution. */ if (tsp->tcp_time_wait_collector_active) { mutex_exit(&tsp->tcp_time_wait_lock); return; } tsp->tcp_time_wait_collector_active = B_TRUE; /* * Purge the free list if necessary */ if (tsp->tcp_free_list != NULL) { TCP_G_STAT(tcp_freelist_cleanup); while ((tcp = tsp->tcp_free_list) != NULL) { tsp->tcp_free_list = tcp->tcp_time_wait_next; tcp->tcp_time_wait_next = NULL; tsp->tcp_free_list_cnt--; ASSERT(tcp->tcp_tcps == NULL); CONN_DEC_REF(tcp->tcp_connp); } ASSERT(tsp->tcp_free_list_cnt == 0); } /* * If there are no connections pending, clear timer-related state to be * reinitialized by the next caller. */ if (tsp->tcp_time_wait_cnt == 0) { tsp->tcp_time_wait_offset = 0; tsp->tcp_time_wait_schedule = 0; tsp->tcp_time_wait_tid = 0; tsp->tcp_time_wait_collector_active = B_FALSE; mutex_exit(&tsp->tcp_time_wait_lock); return; } /* * Grab the bucket which we were scheduled to cleanse. */ active_schedule = tsp->tcp_time_wait_schedule; idx = TW_BUCKET(active_schedule - 1); now = ddi_get_lbolt64() - tsp->tcp_time_wait_offset; retry: tcp = tsp->tcp_time_wait_bucket[idx]; while (tcp != NULL) { /* * Since the bucket count is sized to prevent wrap-around * during typical operation and timers are schedule to process * buckets with only expired connections, there is only one * reason to encounter a connection expiring in the future: * The tcp_time_wait_collector thread has been so delayed in * its processing that connections have wrapped around the * timing wheel into this bucket. * * In that case, the remaining entires in the bucket can be * ignored since, being appended sequentially, they should all * expire in the future. */ if (now < tcp->tcp_time_wait_expire) { break; } /* * Pull the connection out of the bucket. */ VERIFY(tcp_time_wait_remove(tcp, tsp)); /* * Purge the connection. * * While tcp_time_wait_lock will be temporarily dropped as part * of the process, there is no risk of the timer being * (re)scheduled while the collector is running since a value * corresponding to the past is left in tcp_time_wait_schedule. */ tcp_time_wait_purge(tcp, tsp); /* * Because tcp_time_wait_remove clears the tcp_time_wait_next * field, the next item must be grabbed directly from the * bucket itself. */ tcp = tsp->tcp_time_wait_bucket[idx]; } if (tsp->tcp_time_wait_cnt == 0) { /* * There is not a need for the collector to schedule a new * timer if no pending items remain. The timer state can be * cleared only if it was untouched while the collector dropped * its locks during tcp_time_wait_purge. */ if (tsp->tcp_time_wait_schedule == active_schedule) { tsp->tcp_time_wait_offset = 0; tsp->tcp_time_wait_schedule = 0; tsp->tcp_time_wait_tid = 0; } tsp->tcp_time_wait_collector_active = B_FALSE; mutex_exit(&tsp->tcp_time_wait_lock); return; } else { unsigned int nidx; /* * Locate the next bucket containing entries. */ new_schedule = active_schedule + MSEC_TO_TICK(TCP_TIME_WAIT_DELAY); nidx = TW_BUCKET_NEXT(idx); while (tsp->tcp_time_wait_bucket[nidx] == NULL) { if (nidx == idx) { break; } nidx = TW_BUCKET_NEXT(nidx); new_schedule += MSEC_TO_TICK(TCP_TIME_WAIT_DELAY); } ASSERT(tsp->tcp_time_wait_bucket[nidx] != NULL); } /* * It is possible that the system is under such dire load that between * the timer scheduling and TIME_WAIT processing delay, execution * overran the interval allocated to this bucket. */ now = ddi_get_lbolt64() - tsp->tcp_time_wait_offset; if (new_schedule <= now) { /* * Attempt to right the situation by immediately performing a * purge on the next bucket. This loop will continue as needed * until the schedule can be pushed out ahead of the clock. */ idx = TW_BUCKET(new_schedule - 1); goto retry; } /* * Another thread may have snuck in to reschedule the timer while locks * were dropped during tcp_time_wait_purge. Defer to the running timer * if that is the case. */ if (tsp->tcp_time_wait_schedule != active_schedule) { tsp->tcp_time_wait_collector_active = B_FALSE; mutex_exit(&tsp->tcp_time_wait_lock); return; } /* * Schedule the next timer. */ tsp->tcp_time_wait_schedule = new_schedule; tsp->tcp_time_wait_tid = timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector, sqp, TICK_TO_NSEC(new_schedule - now), CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP); tsp->tcp_time_wait_collector_active = B_FALSE; mutex_exit(&tsp->tcp_time_wait_lock); } /* * tcp_time_wait_processing() handles processing of incoming packets when * the tcp_t is in the TIME_WAIT state. * * A TIME_WAIT tcp_t that has an associated open TCP end point (not in * detached state) is never put on the time wait list. */ void tcp_time_wait_processing(tcp_t *tcp, mblk_t *mp, uint32_t seg_seq, uint32_t seg_ack, int seg_len, tcpha_t *tcpha, ip_recv_attr_t *ira) { int32_t bytes_acked; int32_t gap; int32_t rgap; tcp_opt_t tcpopt; uint_t flags; uint32_t new_swnd = 0; conn_t *nconnp; conn_t *connp = tcp->tcp_connp; tcp_stack_t *tcps = tcp->tcp_tcps; BUMP_LOCAL(tcp->tcp_ibsegs); DTRACE_PROBE2(tcp__trace__recv, mblk_t *, mp, tcp_t *, tcp); flags = (unsigned int)tcpha->tha_flags & 0xFF; new_swnd = ntohs(tcpha->tha_win) << ((tcpha->tha_flags & TH_SYN) ? 0 : tcp->tcp_snd_ws); if (tcp->tcp_snd_ts_ok && !(tcpha->tha_flags & TH_RST)) { int options; if (tcp->tcp_snd_sack_ok) tcpopt.tcp = tcp; else tcpopt.tcp = NULL; options = tcp_parse_options(tcpha, &tcpopt); if (!(options & TCP_OPT_TSTAMP_PRESENT)) { DTRACE_TCP1(droppedtimestamp, tcp_t *, tcp); goto done; } else if (!tcp_paws_check(tcp, &tcpopt)) { tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); goto done; } } gap = seg_seq - tcp->tcp_rnxt; rgap = tcp->tcp_rwnd - (gap + seg_len); if (gap < 0) { TCPS_BUMP_MIB(tcps, tcpInDataDupSegs); TCPS_UPDATE_MIB(tcps, tcpInDataDupBytes, (seg_len > -gap ? -gap : seg_len)); seg_len += gap; if (seg_len < 0 || (seg_len == 0 && !(flags & TH_FIN))) { if (flags & TH_RST) { goto done; } if ((flags & TH_FIN) && seg_len == -1) { /* * When TCP receives a duplicate FIN in * TIME_WAIT state, restart the 2 MSL timer. * See page 73 in RFC 793. Make sure this TCP * is already on the TIME_WAIT list. If not, * just restart the timer. */ if (TCP_IS_DETACHED(tcp)) { if (tcp_time_wait_remove(tcp, NULL) == B_TRUE) { tcp_time_wait_append(tcp); TCP_DBGSTAT(tcps, tcp_rput_time_wait); } } else { ASSERT(tcp != NULL); TCP_TIMER_RESTART(tcp, tcps->tcps_time_wait_interval); } tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); goto done; } flags |= TH_ACK_NEEDED; seg_len = 0; goto process_ack; } /* Fix seg_seq, and chew the gap off the front. */ seg_seq = tcp->tcp_rnxt; } if ((flags & TH_SYN) && gap > 0 && rgap < 0) { /* * Make sure that when we accept the connection, pick * an ISS greater than (tcp_snxt + tcp_iss_incr/2) for the * old connection. * * The next ISS generated is equal to tcp_iss_incr_extra * + tcp_iss_incr/2 + other components depending on the * value of tcp_strong_iss. We pre-calculate the new * ISS here and compare with tcp_snxt to determine if * we need to make adjustment to tcp_iss_incr_extra. * * The above calculation is ugly and is a * waste of CPU cycles... */ uint32_t new_iss = tcps->tcps_iss_incr_extra; int32_t adj; ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip; switch (tcps->tcps_strong_iss) { case 2: { /* Add time and MD5 components. */ uint32_t answer[4]; struct { uint32_t ports; in6_addr_t src; in6_addr_t dst; } arg; MD5_CTX context; mutex_enter(&tcps->tcps_iss_key_lock); context = tcps->tcps_iss_key; mutex_exit(&tcps->tcps_iss_key_lock); arg.ports = connp->conn_ports; /* We use MAPPED addresses in tcp_iss_init */ arg.src = connp->conn_laddr_v6; arg.dst = connp->conn_faddr_v6; MD5Update(&context, (uchar_t *)&arg, sizeof (arg)); MD5Final((uchar_t *)answer, &context); answer[0] ^= answer[1] ^ answer[2] ^ answer[3]; new_iss += (gethrtime() >> ISS_NSEC_SHT) + answer[0]; break; } case 1: /* Add time component and min random (i.e. 1). */ new_iss += (gethrtime() >> ISS_NSEC_SHT) + 1; break; default: /* Add only time component. */ new_iss += (uint32_t)gethrestime_sec() * tcps->tcps_iss_incr; break; } if ((adj = (int32_t)(tcp->tcp_snxt - new_iss)) > 0) { /* * New ISS not guaranteed to be tcp_iss_incr/2 * ahead of the current tcp_snxt, so add the * difference to tcp_iss_incr_extra. */ tcps->tcps_iss_incr_extra += adj; } /* * If tcp_clean_death() can not perform the task now, * drop the SYN packet and let the other side re-xmit. * Otherwise pass the SYN packet back in, since the * old tcp state has been cleaned up or freed. */ if (tcp_clean_death(tcp, 0) == -1) goto done; nconnp = ipcl_classify(mp, ira, ipst); if (nconnp != NULL) { TCP_STAT(tcps, tcp_time_wait_syn_success); /* Drops ref on nconnp */ tcp_reinput(nconnp, mp, ira, ipst); return; } goto done; } /* * rgap is the amount of stuff received out of window. A negative * value is the amount out of window. */ if (rgap < 0) { TCPS_BUMP_MIB(tcps, tcpInDataPastWinSegs); TCPS_UPDATE_MIB(tcps, tcpInDataPastWinBytes, -rgap); /* Fix seg_len and make sure there is something left. */ seg_len += rgap; if (seg_len <= 0) { if (flags & TH_RST) { goto done; } flags |= TH_ACK_NEEDED; seg_len = 0; goto process_ack; } } /* * Check whether we can update tcp_ts_recent. This test is from RFC * 7323, section 5.3. */ if (tcp->tcp_snd_ts_ok && !(flags & TH_RST) && TSTMP_GEQ(tcpopt.tcp_opt_ts_val, tcp->tcp_ts_recent) && SEQ_LEQ(seg_seq, tcp->tcp_rack)) { tcp->tcp_ts_recent = tcpopt.tcp_opt_ts_val; tcp->tcp_last_rcv_lbolt = ddi_get_lbolt64(); } if (seg_seq != tcp->tcp_rnxt && seg_len > 0) { /* Always ack out of order packets */ flags |= TH_ACK_NEEDED; seg_len = 0; } else if (seg_len > 0) { TCPS_BUMP_MIB(tcps, tcpInClosed); TCPS_BUMP_MIB(tcps, tcpInDataInorderSegs); TCPS_UPDATE_MIB(tcps, tcpInDataInorderBytes, seg_len); } if (flags & TH_RST) { (void) tcp_clean_death(tcp, 0); goto done; } if (flags & TH_SYN) { tcp_xmit_ctl("TH_SYN", tcp, seg_ack, seg_seq + 1, TH_RST|TH_ACK); /* * Do not delete the TCP structure if it is in * TIME_WAIT state. Refer to RFC 1122, 4.2.2.13. */ goto done; } process_ack: if (flags & TH_ACK) { bytes_acked = (int)(seg_ack - tcp->tcp_suna); if (bytes_acked <= 0) { if (bytes_acked == 0 && seg_len == 0 && new_swnd == tcp->tcp_swnd) TCPS_BUMP_MIB(tcps, tcpInDupAck); } else { /* Acks something not sent */ flags |= TH_ACK_NEEDED; } } if (flags & TH_ACK_NEEDED) { /* * Time to send an ack for some reason. */ tcp_xmit_ctl(NULL, tcp, tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); } done: freemsg(mp); }