xref: /titanic_52/usr/src/uts/common/inet/ilb/ilb_conn.c (revision d17b05b6ba5ce4569b13b250fe44164219de8c53)
1dbed73cbSSangeeta Misra /*
2dbed73cbSSangeeta Misra  * CDDL HEADER START
3dbed73cbSSangeeta Misra  *
4dbed73cbSSangeeta Misra  * The contents of this file are subject to the terms of the
5dbed73cbSSangeeta Misra  * Common Development and Distribution License (the "License").
6dbed73cbSSangeeta Misra  * You may not use this file except in compliance with the License.
7dbed73cbSSangeeta Misra  *
8dbed73cbSSangeeta Misra  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9dbed73cbSSangeeta Misra  * or http://www.opensolaris.org/os/licensing.
10dbed73cbSSangeeta Misra  * See the License for the specific language governing permissions
11dbed73cbSSangeeta Misra  * and limitations under the License.
12dbed73cbSSangeeta Misra  *
13dbed73cbSSangeeta Misra  * When distributing Covered Code, include this CDDL HEADER in each
14dbed73cbSSangeeta Misra  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15dbed73cbSSangeeta Misra  * If applicable, add the following below this CDDL HEADER, with the
16dbed73cbSSangeeta Misra  * fields enclosed by brackets "[]" replaced with your own identifying
17dbed73cbSSangeeta Misra  * information: Portions Copyright [yyyy] [name of copyright owner]
18dbed73cbSSangeeta Misra  *
19dbed73cbSSangeeta Misra  * CDDL HEADER END
20dbed73cbSSangeeta Misra  */
21dbed73cbSSangeeta Misra 
22dbed73cbSSangeeta Misra /*
23dbed73cbSSangeeta Misra  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24dbed73cbSSangeeta Misra  * Use is subject to license terms.
25*d17b05b6SJerry Jelinek  * Copyright 2014 Joyent, Inc.  All rights reserved.
26dbed73cbSSangeeta Misra  */
27dbed73cbSSangeeta Misra 
28de710d24SJosef 'Jeff' Sipek #include <sys/sysmacros.h>
29dbed73cbSSangeeta Misra #include <sys/types.h>
30dbed73cbSSangeeta Misra #include <sys/conf.h>
31dbed73cbSSangeeta Misra #include <sys/time.h>
32dbed73cbSSangeeta Misra #include <sys/taskq.h>
33dbed73cbSSangeeta Misra #include <sys/cmn_err.h>
34dbed73cbSSangeeta Misra #include <sys/sdt.h>
35dbed73cbSSangeeta Misra #include <sys/atomic.h>
36dbed73cbSSangeeta Misra #include <netinet/in.h>
37dbed73cbSSangeeta Misra #include <inet/ip.h>
38dbed73cbSSangeeta Misra #include <inet/ip6.h>
39dbed73cbSSangeeta Misra #include <inet/tcp.h>
40dbed73cbSSangeeta Misra #include <inet/udp_impl.h>
41dbed73cbSSangeeta Misra #include <inet/ilb.h>
42dbed73cbSSangeeta Misra 
43dbed73cbSSangeeta Misra #include "ilb_stack.h"
44dbed73cbSSangeeta Misra #include "ilb_impl.h"
45dbed73cbSSangeeta Misra #include "ilb_conn.h"
46dbed73cbSSangeeta Misra #include "ilb_nat.h"
47dbed73cbSSangeeta Misra 
48dbed73cbSSangeeta Misra /*
49dbed73cbSSangeeta Misra  * Timer struct for ilb_conn_t and ilb_sticky_t garbage collection
50dbed73cbSSangeeta Misra  *
51dbed73cbSSangeeta Misra  * start: starting index into the hash table to do gc
52dbed73cbSSangeeta Misra  * end: ending index into the hash table to do gc
53dbed73cbSSangeeta Misra  * ilbs: pointer to the ilb_stack_t of the IP stack
54dbed73cbSSangeeta Misra  * tid_lock: mutex to protect the timer id.
55dbed73cbSSangeeta Misra  * tid: timer id of the timer
56dbed73cbSSangeeta Misra  */
57dbed73cbSSangeeta Misra typedef struct ilb_timer_s {
58dbed73cbSSangeeta Misra 	uint32_t	start;
59dbed73cbSSangeeta Misra 	uint32_t	end;
60dbed73cbSSangeeta Misra 	ilb_stack_t	*ilbs;
61dbed73cbSSangeeta Misra 	kmutex_t	tid_lock;
62dbed73cbSSangeeta Misra 	timeout_id_t	tid;
63dbed73cbSSangeeta Misra } ilb_timer_t;
64dbed73cbSSangeeta Misra 
65dbed73cbSSangeeta Misra /* Hash macro for finding the index to the conn hash table */
66dbed73cbSSangeeta Misra #define	ILB_CONN_HASH(saddr, sport, daddr, dport, hash_size)	\
67dbed73cbSSangeeta Misra 	(((*((saddr) + 3) ^ *((daddr) + 3)) * 50653 +		\
68dbed73cbSSangeeta Misra 	(*((saddr) + 2) ^ *((daddr) + 2)) * 1369 +		\
69dbed73cbSSangeeta Misra 	(*((saddr) + 1) ^ *((daddr) + 1)) * 37 +		\
70dbed73cbSSangeeta Misra 	(*(saddr) ^ *(daddr)) + (sport) * 37 + (dport)) &	\
71dbed73cbSSangeeta Misra 	((hash_size) - 1))
72dbed73cbSSangeeta Misra 
73dbed73cbSSangeeta Misra /* Kmem cache for the conn hash entry */
74dbed73cbSSangeeta Misra static struct kmem_cache *ilb_conn_cache = NULL;
75dbed73cbSSangeeta Misra 
76dbed73cbSSangeeta Misra /*
77dbed73cbSSangeeta Misra  * There are 60 timers running to do conn cache garbage collection.  Each
78dbed73cbSSangeeta Misra  * gc thread is responsible for 1/60 of the conn hash table.
79dbed73cbSSangeeta Misra  */
80dbed73cbSSangeeta Misra static int ilb_conn_timer_size = 60;
81dbed73cbSSangeeta Misra 
82dbed73cbSSangeeta Misra /* Each of the above gc timers wake up every 15s to do the gc. */
83dbed73cbSSangeeta Misra static int ilb_conn_cache_timeout = 15;
84dbed73cbSSangeeta Misra 
85dbed73cbSSangeeta Misra #define	ILB_STICKY_HASH(saddr, rule, hash_size)			\
86dbed73cbSSangeeta Misra 	(((*((saddr) + 3) ^ ((rule) >> 24)) * 29791 +		\
87dbed73cbSSangeeta Misra 	(*((saddr) + 2) ^ ((rule) >> 16)) * 961 +		\
88dbed73cbSSangeeta Misra 	(*((saddr) + 1) ^ ((rule) >> 8)) * 31 +			\
89dbed73cbSSangeeta Misra 	(*(saddr) ^ (rule))) & ((hash_size) - 1))
90dbed73cbSSangeeta Misra 
91dbed73cbSSangeeta Misra static struct kmem_cache *ilb_sticky_cache = NULL;
92dbed73cbSSangeeta Misra 
93dbed73cbSSangeeta Misra /*
94dbed73cbSSangeeta Misra  * There are 60 timers running to do sticky cache garbage collection.  Each
95dbed73cbSSangeeta Misra  * gc thread is responsible for 1/60 of the sticky hash table.
96dbed73cbSSangeeta Misra  */
97dbed73cbSSangeeta Misra static int ilb_sticky_timer_size = 60;
98dbed73cbSSangeeta Misra 
99dbed73cbSSangeeta Misra /* Each of the above gc timers wake up every 15s to do the gc. */
100dbed73cbSSangeeta Misra static int ilb_sticky_timeout = 15;
101dbed73cbSSangeeta Misra 
102dbed73cbSSangeeta Misra #define	ILB_STICKY_REFRELE(s)			\
103dbed73cbSSangeeta Misra {						\
104dbed73cbSSangeeta Misra 	mutex_enter(&(s)->hash->sticky_lock);	\
105dbed73cbSSangeeta Misra 	(s)->refcnt--;				\
106d3d50737SRafael Vanoni 	(s)->atime = ddi_get_lbolt64();		\
107dbed73cbSSangeeta Misra 	mutex_exit(&s->hash->sticky_lock);	\
108dbed73cbSSangeeta Misra }
109dbed73cbSSangeeta Misra 
110dbed73cbSSangeeta Misra 
111dbed73cbSSangeeta Misra static void
112dbed73cbSSangeeta Misra ilb_conn_cache_init(void)
113dbed73cbSSangeeta Misra {
114dbed73cbSSangeeta Misra 	ilb_conn_cache = kmem_cache_create("ilb_conn_cache",
115dbed73cbSSangeeta Misra 	    sizeof (ilb_conn_t), 0, NULL, NULL, NULL, NULL, NULL,
116dbed73cbSSangeeta Misra 	    ilb_kmem_flags);
117dbed73cbSSangeeta Misra }
118dbed73cbSSangeeta Misra 
119dbed73cbSSangeeta Misra void
120dbed73cbSSangeeta Misra ilb_conn_cache_fini(void)
121dbed73cbSSangeeta Misra {
122dbed73cbSSangeeta Misra 	if (ilb_conn_cache != NULL) {
123dbed73cbSSangeeta Misra 		kmem_cache_destroy(ilb_conn_cache);
124dbed73cbSSangeeta Misra 		ilb_conn_cache = NULL;
125dbed73cbSSangeeta Misra 	}
126dbed73cbSSangeeta Misra }
127dbed73cbSSangeeta Misra 
128dbed73cbSSangeeta Misra static void
129dbed73cbSSangeeta Misra ilb_conn_remove_common(ilb_conn_t *connp, boolean_t c2s)
130dbed73cbSSangeeta Misra {
131dbed73cbSSangeeta Misra 	ilb_conn_hash_t *hash;
132dbed73cbSSangeeta Misra 	ilb_conn_t **next, **prev;
133dbed73cbSSangeeta Misra 	ilb_conn_t **next_prev, **prev_next;
134dbed73cbSSangeeta Misra 
135dbed73cbSSangeeta Misra 	if (c2s) {
136dbed73cbSSangeeta Misra 		hash = connp->conn_c2s_hash;
137dbed73cbSSangeeta Misra 		ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock));
138dbed73cbSSangeeta Misra 		next = &connp->conn_c2s_next;
139dbed73cbSSangeeta Misra 		prev = &connp->conn_c2s_prev;
140dbed73cbSSangeeta Misra 		if (*next != NULL)
141dbed73cbSSangeeta Misra 			next_prev = &(*next)->conn_c2s_prev;
142dbed73cbSSangeeta Misra 		if (*prev != NULL)
143dbed73cbSSangeeta Misra 			prev_next = &(*prev)->conn_c2s_next;
144dbed73cbSSangeeta Misra 	} else {
145dbed73cbSSangeeta Misra 		hash = connp->conn_s2c_hash;
146dbed73cbSSangeeta Misra 		ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock));
147dbed73cbSSangeeta Misra 		next = &connp->conn_s2c_next;
148dbed73cbSSangeeta Misra 		prev = &connp->conn_s2c_prev;
149dbed73cbSSangeeta Misra 		if (*next != NULL)
150dbed73cbSSangeeta Misra 			next_prev = &(*next)->conn_s2c_prev;
151dbed73cbSSangeeta Misra 		if (*prev != NULL)
152dbed73cbSSangeeta Misra 			prev_next = &(*prev)->conn_s2c_next;
153dbed73cbSSangeeta Misra 	}
154dbed73cbSSangeeta Misra 
155dbed73cbSSangeeta Misra 	if (hash->ilb_connp == connp) {
156dbed73cbSSangeeta Misra 		hash->ilb_connp = *next;
157dbed73cbSSangeeta Misra 		if (*next != NULL)
158dbed73cbSSangeeta Misra 			*next_prev = NULL;
159dbed73cbSSangeeta Misra 	} else {
160dbed73cbSSangeeta Misra 		if (*prev != NULL)
161dbed73cbSSangeeta Misra 			*prev_next = *next;
162dbed73cbSSangeeta Misra 		if (*next != NULL)
163dbed73cbSSangeeta Misra 			*next_prev = *prev;
164dbed73cbSSangeeta Misra 	}
165dbed73cbSSangeeta Misra 	ASSERT(hash->ilb_conn_cnt > 0);
166dbed73cbSSangeeta Misra 	hash->ilb_conn_cnt--;
167dbed73cbSSangeeta Misra 
168dbed73cbSSangeeta Misra 	*next = NULL;
169dbed73cbSSangeeta Misra 	*prev = NULL;
170dbed73cbSSangeeta Misra }
171dbed73cbSSangeeta Misra 
172dbed73cbSSangeeta Misra static void
173dbed73cbSSangeeta Misra ilb_conn_remove(ilb_conn_t *connp)
174dbed73cbSSangeeta Misra {
175dbed73cbSSangeeta Misra 	ASSERT(MUTEX_HELD(&connp->conn_c2s_hash->ilb_conn_hash_lock));
176dbed73cbSSangeeta Misra 	ilb_conn_remove_common(connp, B_TRUE);
177dbed73cbSSangeeta Misra 	ASSERT(MUTEX_HELD(&connp->conn_s2c_hash->ilb_conn_hash_lock));
178dbed73cbSSangeeta Misra 	ilb_conn_remove_common(connp, B_FALSE);
179dbed73cbSSangeeta Misra 
180dbed73cbSSangeeta Misra 	if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) {
181dbed73cbSSangeeta Misra 		in_port_t port;
182dbed73cbSSangeeta Misra 
183dbed73cbSSangeeta Misra 		port = ntohs(connp->conn_rule_cache.info.nat_sport);
184dbed73cbSSangeeta Misra 		vmem_free(connp->conn_rule_cache.info.src_ent->nse_port_arena,
185dbed73cbSSangeeta Misra 		    (void *)(uintptr_t)port, 1);
186dbed73cbSSangeeta Misra 	}
187dbed73cbSSangeeta Misra 
188dbed73cbSSangeeta Misra 	if (connp->conn_sticky != NULL)
189dbed73cbSSangeeta Misra 		ILB_STICKY_REFRELE(connp->conn_sticky);
190dbed73cbSSangeeta Misra 	ILB_SERVER_REFRELE(connp->conn_server);
191dbed73cbSSangeeta Misra 	kmem_cache_free(ilb_conn_cache, connp);
192dbed73cbSSangeeta Misra }
193dbed73cbSSangeeta Misra 
194dbed73cbSSangeeta Misra /*
195dbed73cbSSangeeta Misra  * Routine to do periodic garbage collection of conn hash entries.  When
196dbed73cbSSangeeta Misra  * a conn hash timer fires, it dispatches a taskq to call this function
197dbed73cbSSangeeta Misra  * to do the gc.  Note that each taskq is responisble for a portion of
198dbed73cbSSangeeta Misra  * the table.  The portion is stored in timer->start, timer->end.
199dbed73cbSSangeeta Misra  */
200dbed73cbSSangeeta Misra static void
201dbed73cbSSangeeta Misra ilb_conn_cleanup(void *arg)
202dbed73cbSSangeeta Misra {
203dbed73cbSSangeeta Misra 	ilb_timer_t *timer = (ilb_timer_t *)arg;
204dbed73cbSSangeeta Misra 	uint32_t i;
205dbed73cbSSangeeta Misra 	ilb_stack_t *ilbs;
206dbed73cbSSangeeta Misra 	ilb_conn_hash_t *c2s_hash, *s2c_hash;
207dbed73cbSSangeeta Misra 	ilb_conn_t *connp, *nxt_connp;
208dbed73cbSSangeeta Misra 	int64_t now;
209dbed73cbSSangeeta Misra 	int64_t expiry;
210dbed73cbSSangeeta Misra 	boolean_t die_now;
211dbed73cbSSangeeta Misra 
212dbed73cbSSangeeta Misra 	ilbs = timer->ilbs;
213dbed73cbSSangeeta Misra 	c2s_hash = ilbs->ilbs_c2s_conn_hash;
214dbed73cbSSangeeta Misra 	ASSERT(c2s_hash != NULL);
215dbed73cbSSangeeta Misra 
216d3d50737SRafael Vanoni 	now = ddi_get_lbolt64();
217dbed73cbSSangeeta Misra 	for (i = timer->start; i < timer->end; i++) {
218dbed73cbSSangeeta Misra 		mutex_enter(&c2s_hash[i].ilb_conn_hash_lock);
219dbed73cbSSangeeta Misra 		if ((connp = c2s_hash[i].ilb_connp) == NULL) {
220dbed73cbSSangeeta Misra 			ASSERT(c2s_hash[i].ilb_conn_cnt == 0);
221dbed73cbSSangeeta Misra 			mutex_exit(&c2s_hash[i].ilb_conn_hash_lock);
222dbed73cbSSangeeta Misra 			continue;
223dbed73cbSSangeeta Misra 		}
224dbed73cbSSangeeta Misra 		do {
225dbed73cbSSangeeta Misra 			ASSERT(c2s_hash[i].ilb_conn_cnt > 0);
226dbed73cbSSangeeta Misra 			ASSERT(connp->conn_c2s_hash == &c2s_hash[i]);
227dbed73cbSSangeeta Misra 			nxt_connp = connp->conn_c2s_next;
228dbed73cbSSangeeta Misra 			expiry = now - SEC_TO_TICK(connp->conn_expiry);
229dbed73cbSSangeeta Misra 			if (connp->conn_server->iser_die_time != 0 &&
230dbed73cbSSangeeta Misra 			    connp->conn_server->iser_die_time < now)
231dbed73cbSSangeeta Misra 				die_now = B_TRUE;
232dbed73cbSSangeeta Misra 			else
233dbed73cbSSangeeta Misra 				die_now = B_FALSE;
234dbed73cbSSangeeta Misra 			s2c_hash = connp->conn_s2c_hash;
235dbed73cbSSangeeta Misra 			mutex_enter(&s2c_hash->ilb_conn_hash_lock);
236dbed73cbSSangeeta Misra 
237dbed73cbSSangeeta Misra 			if (connp->conn_gc || die_now ||
238dbed73cbSSangeeta Misra 			    (connp->conn_c2s_atime < expiry &&
239dbed73cbSSangeeta Misra 			    connp->conn_s2c_atime < expiry)) {
240dbed73cbSSangeeta Misra 				/* Need to update the nat list cur_connp */
241dbed73cbSSangeeta Misra 				if (connp == ilbs->ilbs_conn_list_connp) {
242dbed73cbSSangeeta Misra 					ilbs->ilbs_conn_list_connp =
243dbed73cbSSangeeta Misra 					    connp->conn_c2s_next;
244dbed73cbSSangeeta Misra 				}
245dbed73cbSSangeeta Misra 				ilb_conn_remove(connp);
246dbed73cbSSangeeta Misra 				goto nxt_connp;
247dbed73cbSSangeeta Misra 			}
248dbed73cbSSangeeta Misra 
249dbed73cbSSangeeta Misra 			if (connp->conn_l4 != IPPROTO_TCP)
250dbed73cbSSangeeta Misra 				goto nxt_connp;
251dbed73cbSSangeeta Misra 
252dbed73cbSSangeeta Misra 			/* Update and check TCP related conn info */
253dbed73cbSSangeeta Misra 			if (connp->conn_c2s_tcp_fin_sent &&
254dbed73cbSSangeeta Misra 			    SEQ_GT(connp->conn_s2c_tcp_ack,
255dbed73cbSSangeeta Misra 			    connp->conn_c2s_tcp_fss)) {
256dbed73cbSSangeeta Misra 				connp->conn_c2s_tcp_fin_acked = B_TRUE;
257dbed73cbSSangeeta Misra 			}
258dbed73cbSSangeeta Misra 			if (connp->conn_s2c_tcp_fin_sent &&
259dbed73cbSSangeeta Misra 			    SEQ_GT(connp->conn_c2s_tcp_ack,
260dbed73cbSSangeeta Misra 			    connp->conn_s2c_tcp_fss)) {
261dbed73cbSSangeeta Misra 				connp->conn_s2c_tcp_fin_acked = B_TRUE;
262dbed73cbSSangeeta Misra 			}
263dbed73cbSSangeeta Misra 			if (connp->conn_c2s_tcp_fin_acked &&
264dbed73cbSSangeeta Misra 			    connp->conn_s2c_tcp_fin_acked) {
265dbed73cbSSangeeta Misra 				ilb_conn_remove(connp);
266dbed73cbSSangeeta Misra 			}
267dbed73cbSSangeeta Misra nxt_connp:
268dbed73cbSSangeeta Misra 			mutex_exit(&s2c_hash->ilb_conn_hash_lock);
269dbed73cbSSangeeta Misra 			connp = nxt_connp;
270dbed73cbSSangeeta Misra 		} while (connp != NULL);
271dbed73cbSSangeeta Misra 		mutex_exit(&c2s_hash[i].ilb_conn_hash_lock);
272dbed73cbSSangeeta Misra 	}
273dbed73cbSSangeeta Misra }
274dbed73cbSSangeeta Misra 
275dbed73cbSSangeeta Misra /* Conn hash timer routine.  It dispatches a taskq and restart the timer */
276dbed73cbSSangeeta Misra static void
277dbed73cbSSangeeta Misra ilb_conn_timer(void *arg)
278dbed73cbSSangeeta Misra {
279dbed73cbSSangeeta Misra 	ilb_timer_t *timer = (ilb_timer_t *)arg;
280dbed73cbSSangeeta Misra 
281dbed73cbSSangeeta Misra 	(void) taskq_dispatch(timer->ilbs->ilbs_conn_taskq, ilb_conn_cleanup,
282dbed73cbSSangeeta Misra 	    arg, TQ_SLEEP);
283dbed73cbSSangeeta Misra 	mutex_enter(&timer->tid_lock);
284dbed73cbSSangeeta Misra 	if (timer->tid == 0) {
285dbed73cbSSangeeta Misra 		mutex_exit(&timer->tid_lock);
286dbed73cbSSangeeta Misra 	} else {
287dbed73cbSSangeeta Misra 		timer->tid = timeout(ilb_conn_timer, arg,
288dbed73cbSSangeeta Misra 		    SEC_TO_TICK(ilb_conn_cache_timeout));
289dbed73cbSSangeeta Misra 		mutex_exit(&timer->tid_lock);
290dbed73cbSSangeeta Misra 	}
291dbed73cbSSangeeta Misra }
292dbed73cbSSangeeta Misra 
293dbed73cbSSangeeta Misra void
294dbed73cbSSangeeta Misra ilb_conn_hash_init(ilb_stack_t *ilbs)
295dbed73cbSSangeeta Misra {
296dbed73cbSSangeeta Misra 	extern pri_t minclsyspri;
297dbed73cbSSangeeta Misra 	int i, part;
298dbed73cbSSangeeta Misra 	ilb_timer_t *tm;
299dbed73cbSSangeeta Misra 	char tq_name[TASKQ_NAMELEN];
300dbed73cbSSangeeta Misra 
301dbed73cbSSangeeta Misra 	/*
302dbed73cbSSangeeta Misra 	 * If ilbs->ilbs_conn_hash_size is not a power of 2, bump it up to
303dbed73cbSSangeeta Misra 	 * the next power of 2.
304dbed73cbSSangeeta Misra 	 */
305de710d24SJosef 'Jeff' Sipek 	if (!ISP2(ilbs->ilbs_conn_hash_size)) {
306dbed73cbSSangeeta Misra 		for (i = 0; i < 31; i++) {
307dbed73cbSSangeeta Misra 			if (ilbs->ilbs_conn_hash_size < (1 << i))
308dbed73cbSSangeeta Misra 				break;
309dbed73cbSSangeeta Misra 		}
310dbed73cbSSangeeta Misra 		ilbs->ilbs_conn_hash_size = 1 << i;
311dbed73cbSSangeeta Misra 	}
312dbed73cbSSangeeta Misra 
313dbed73cbSSangeeta Misra 	/*
314dbed73cbSSangeeta Misra 	 * Can sleep since this should be called when a rule is being added,
315dbed73cbSSangeeta Misra 	 * hence we are not in interrupt context.
316dbed73cbSSangeeta Misra 	 */
317dbed73cbSSangeeta Misra 	ilbs->ilbs_c2s_conn_hash = kmem_zalloc(sizeof (ilb_conn_hash_t) *
318dbed73cbSSangeeta Misra 	    ilbs->ilbs_conn_hash_size, KM_SLEEP);
319dbed73cbSSangeeta Misra 	ilbs->ilbs_s2c_conn_hash = kmem_zalloc(sizeof (ilb_conn_hash_t) *
320dbed73cbSSangeeta Misra 	    ilbs->ilbs_conn_hash_size, KM_SLEEP);
321dbed73cbSSangeeta Misra 
322dbed73cbSSangeeta Misra 	for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
323dbed73cbSSangeeta Misra 		mutex_init(&ilbs->ilbs_c2s_conn_hash[i].ilb_conn_hash_lock,
324dbed73cbSSangeeta Misra 		    NULL, MUTEX_DEFAULT, NULL);
325dbed73cbSSangeeta Misra 	}
326dbed73cbSSangeeta Misra 	for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
327dbed73cbSSangeeta Misra 		mutex_init(&ilbs->ilbs_s2c_conn_hash[i].ilb_conn_hash_lock,
328dbed73cbSSangeeta Misra 		    NULL, MUTEX_DEFAULT, NULL);
329dbed73cbSSangeeta Misra 	}
330dbed73cbSSangeeta Misra 
331dbed73cbSSangeeta Misra 	if (ilb_conn_cache == NULL)
332dbed73cbSSangeeta Misra 		ilb_conn_cache_init();
333dbed73cbSSangeeta Misra 
334dbed73cbSSangeeta Misra 	(void) snprintf(tq_name, sizeof (tq_name), "ilb_conn_taskq_%p",
3356e0672acSSangeeta Misra 	    (void *)ilbs->ilbs_netstack);
336dbed73cbSSangeeta Misra 	ASSERT(ilbs->ilbs_conn_taskq == NULL);
337dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_taskq = taskq_create(tq_name,
338dbed73cbSSangeeta Misra 	    ilb_conn_timer_size * 2, minclsyspri, ilb_conn_timer_size,
339dbed73cbSSangeeta Misra 	    ilb_conn_timer_size * 2, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
340dbed73cbSSangeeta Misra 
341dbed73cbSSangeeta Misra 	ASSERT(ilbs->ilbs_conn_timer_list == NULL);
342dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_timer_list = kmem_zalloc(sizeof (ilb_timer_t) *
343dbed73cbSSangeeta Misra 	    ilb_conn_timer_size, KM_SLEEP);
344dbed73cbSSangeeta Misra 
345dbed73cbSSangeeta Misra 	/*
346dbed73cbSSangeeta Misra 	 * The hash table is divided in equal partition for those timers
347dbed73cbSSangeeta Misra 	 * to do garbage collection.
348dbed73cbSSangeeta Misra 	 */
349dbed73cbSSangeeta Misra 	part = ilbs->ilbs_conn_hash_size / ilb_conn_timer_size + 1;
350dbed73cbSSangeeta Misra 	for (i = 0; i < ilb_conn_timer_size; i++) {
351dbed73cbSSangeeta Misra 		tm = ilbs->ilbs_conn_timer_list + i;
352dbed73cbSSangeeta Misra 		tm->start = i * part;
353dbed73cbSSangeeta Misra 		tm->end = i * part + part;
354dbed73cbSSangeeta Misra 		if (tm->end > ilbs->ilbs_conn_hash_size)
355dbed73cbSSangeeta Misra 			tm->end = ilbs->ilbs_conn_hash_size;
356dbed73cbSSangeeta Misra 		tm->ilbs = ilbs;
357dbed73cbSSangeeta Misra 		mutex_init(&tm->tid_lock, NULL, MUTEX_DEFAULT, NULL);
358dbed73cbSSangeeta Misra 		/* Spread out the starting execution time of all the timers. */
359dbed73cbSSangeeta Misra 		tm->tid = timeout(ilb_conn_timer, tm,
360dbed73cbSSangeeta Misra 		    SEC_TO_TICK(ilb_conn_cache_timeout + i));
361dbed73cbSSangeeta Misra 	}
362dbed73cbSSangeeta Misra }
363dbed73cbSSangeeta Misra 
364dbed73cbSSangeeta Misra void
365dbed73cbSSangeeta Misra ilb_conn_hash_fini(ilb_stack_t *ilbs)
366dbed73cbSSangeeta Misra {
367dbed73cbSSangeeta Misra 	uint32_t i;
368dbed73cbSSangeeta Misra 	ilb_conn_t *connp;
369*d17b05b6SJerry Jelinek 	ilb_conn_hash_t *hash;
370dbed73cbSSangeeta Misra 
371dbed73cbSSangeeta Misra 	if (ilbs->ilbs_c2s_conn_hash == NULL) {
372dbed73cbSSangeeta Misra 		ASSERT(ilbs->ilbs_s2c_conn_hash == NULL);
373dbed73cbSSangeeta Misra 		return;
374dbed73cbSSangeeta Misra 	}
375dbed73cbSSangeeta Misra 
376dbed73cbSSangeeta Misra 	/* Stop all the timers first. */
377dbed73cbSSangeeta Misra 	for (i = 0; i < ilb_conn_timer_size; i++) {
378dbed73cbSSangeeta Misra 		timeout_id_t tid;
379dbed73cbSSangeeta Misra 
380dbed73cbSSangeeta Misra 		/* Setting tid to 0 tells the timer handler not to restart. */
381dbed73cbSSangeeta Misra 		mutex_enter(&ilbs->ilbs_conn_timer_list[i].tid_lock);
382dbed73cbSSangeeta Misra 		tid = ilbs->ilbs_conn_timer_list[i].tid;
383dbed73cbSSangeeta Misra 		ilbs->ilbs_conn_timer_list[i].tid = 0;
384dbed73cbSSangeeta Misra 		mutex_exit(&ilbs->ilbs_conn_timer_list[i].tid_lock);
385dbed73cbSSangeeta Misra 		(void) untimeout(tid);
386dbed73cbSSangeeta Misra 	}
387dbed73cbSSangeeta Misra 	kmem_free(ilbs->ilbs_conn_timer_list, sizeof (ilb_timer_t) *
388dbed73cbSSangeeta Misra 	    ilb_conn_timer_size);
389dbed73cbSSangeeta Misra 	taskq_destroy(ilbs->ilbs_conn_taskq);
390dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_taskq = NULL;
391dbed73cbSSangeeta Misra 
392dbed73cbSSangeeta Misra 	/* Then remove all the conns. */
393*d17b05b6SJerry Jelinek 	hash = ilbs->ilbs_s2c_conn_hash;
394dbed73cbSSangeeta Misra 	for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
395*d17b05b6SJerry Jelinek 		while ((connp = hash[i].ilb_connp) != NULL) {
396*d17b05b6SJerry Jelinek 			hash[i].ilb_connp = connp->conn_s2c_next;
397dbed73cbSSangeeta Misra 			ILB_SERVER_REFRELE(connp->conn_server);
398dbed73cbSSangeeta Misra 			if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) {
399dbed73cbSSangeeta Misra 				ilb_nat_src_entry_t *ent;
400dbed73cbSSangeeta Misra 				in_port_t port;
401dbed73cbSSangeeta Misra 
402dbed73cbSSangeeta Misra 				/*
403dbed73cbSSangeeta Misra 				 * src_ent will be freed in ilb_nat_src_fini().
404dbed73cbSSangeeta Misra 				 */
405dbed73cbSSangeeta Misra 				port = ntohs(
406dbed73cbSSangeeta Misra 				    connp->conn_rule_cache.info.nat_sport);
407dbed73cbSSangeeta Misra 				ent = connp->conn_rule_cache.info.src_ent;
408dbed73cbSSangeeta Misra 				vmem_free(ent->nse_port_arena,
409dbed73cbSSangeeta Misra 				    (void *)(uintptr_t)port, 1);
410dbed73cbSSangeeta Misra 			}
411dbed73cbSSangeeta Misra 			kmem_cache_free(ilb_conn_cache, connp);
412dbed73cbSSangeeta Misra 		}
413dbed73cbSSangeeta Misra 	}
414dbed73cbSSangeeta Misra 	kmem_free(ilbs->ilbs_c2s_conn_hash, sizeof (ilb_conn_hash_t) *
415dbed73cbSSangeeta Misra 	    ilbs->ilbs_conn_hash_size);
416dbed73cbSSangeeta Misra 	kmem_free(ilbs->ilbs_s2c_conn_hash, sizeof (ilb_conn_hash_t) *
417dbed73cbSSangeeta Misra 	    ilbs->ilbs_conn_hash_size);
418dbed73cbSSangeeta Misra }
419dbed73cbSSangeeta Misra 
420dbed73cbSSangeeta Misra /*
421dbed73cbSSangeeta Misra  * Internet checksum adjustment calculation routines.  We pre-calculate
422dbed73cbSSangeeta Misra  * checksum adjustment so that we don't need to compute the checksum on
423dbed73cbSSangeeta Misra  * the whole packet when we change address/port in the packet.
424dbed73cbSSangeeta Misra  */
425dbed73cbSSangeeta Misra 
426dbed73cbSSangeeta Misra static void
427dbed73cbSSangeeta Misra hnat_cksum_v4(uint16_t *oaddr, uint16_t *naddr, in_port_t old_port,
428dbed73cbSSangeeta Misra     in_port_t new_port, uint32_t *adj_sum)
429dbed73cbSSangeeta Misra {
430dbed73cbSSangeeta Misra 	uint32_t sum;
431dbed73cbSSangeeta Misra 
432dbed73cbSSangeeta Misra 	sum = *oaddr + *(oaddr + 1) + old_port;
433dbed73cbSSangeeta Misra 	while ((sum >> 16) != 0)
434dbed73cbSSangeeta Misra 		sum = (sum & 0xffff) + (sum >> 16);
435dbed73cbSSangeeta Misra 	*adj_sum = (uint16_t)~sum + *naddr + *(naddr + 1) + new_port;
436dbed73cbSSangeeta Misra }
437dbed73cbSSangeeta Misra 
438dbed73cbSSangeeta Misra static void
439dbed73cbSSangeeta Misra hnat_cksum_v6(uint16_t *oaddr, uint16_t *naddr, in_port_t old_port,
440dbed73cbSSangeeta Misra     in_port_t new_port, uint32_t *adj_sum)
441dbed73cbSSangeeta Misra {
442dbed73cbSSangeeta Misra 	uint32_t sum = 0;
443dbed73cbSSangeeta Misra 
444dbed73cbSSangeeta Misra 	sum = *oaddr + *(oaddr + 1) + *(oaddr + 2) + *(oaddr + 3) +
445dbed73cbSSangeeta Misra 	    *(oaddr + 4) + *(oaddr + 5) + *(oaddr + 6) + *(oaddr + 7) +
446dbed73cbSSangeeta Misra 	    old_port;
447dbed73cbSSangeeta Misra 	while ((sum >> 16) != 0)
448dbed73cbSSangeeta Misra 		sum = (sum & 0xffff) + (sum >> 16);
449dbed73cbSSangeeta Misra 	*adj_sum = (uint16_t)~sum + *naddr + *(naddr + 1) +
450dbed73cbSSangeeta Misra 	    *(naddr + 2) + *(naddr + 3) + *(naddr + 4) + *(naddr + 5) +
451dbed73cbSSangeeta Misra 	    *(naddr + 6) + *(naddr + 7) + new_port;
452dbed73cbSSangeeta Misra }
453dbed73cbSSangeeta Misra 
454dbed73cbSSangeeta Misra static void
455dbed73cbSSangeeta Misra fnat_cksum_v4(uint16_t *oaddr1, uint16_t *oaddr2, uint16_t *naddr1,
456dbed73cbSSangeeta Misra     uint16_t *naddr2, in_port_t old_port1, in_port_t old_port2,
457dbed73cbSSangeeta Misra     in_port_t new_port1, in_port_t new_port2, uint32_t *adj_sum)
458dbed73cbSSangeeta Misra {
459dbed73cbSSangeeta Misra 	uint32_t sum;
460dbed73cbSSangeeta Misra 
461dbed73cbSSangeeta Misra 	sum = *oaddr1 + *(oaddr1 + 1) + old_port1 + *oaddr2 + *(oaddr2 + 1) +
462dbed73cbSSangeeta Misra 	    old_port2;
463dbed73cbSSangeeta Misra 	while ((sum >> 16) != 0)
464dbed73cbSSangeeta Misra 		sum = (sum & 0xffff) + (sum >> 16);
465dbed73cbSSangeeta Misra 	*adj_sum = (uint16_t)~sum + *naddr1 + *(naddr1 + 1) + new_port1 +
466dbed73cbSSangeeta Misra 	    *naddr2 + *(naddr2 + 1) + new_port2;
467dbed73cbSSangeeta Misra }
468dbed73cbSSangeeta Misra 
469dbed73cbSSangeeta Misra static void
470dbed73cbSSangeeta Misra fnat_cksum_v6(uint16_t *oaddr1, uint16_t *oaddr2, uint16_t *naddr1,
471dbed73cbSSangeeta Misra     uint16_t *naddr2, in_port_t old_port1, in_port_t old_port2,
472dbed73cbSSangeeta Misra     in_port_t new_port1, in_port_t new_port2, uint32_t *adj_sum)
473dbed73cbSSangeeta Misra {
474dbed73cbSSangeeta Misra 	uint32_t sum = 0;
475dbed73cbSSangeeta Misra 
476dbed73cbSSangeeta Misra 	sum = *oaddr1 + *(oaddr1 + 1) + *(oaddr1 + 2) + *(oaddr1 + 3) +
477dbed73cbSSangeeta Misra 	    *(oaddr1 + 4) + *(oaddr1 + 5) + *(oaddr1 + 6) + *(oaddr1 + 7) +
478dbed73cbSSangeeta Misra 	    old_port1;
479dbed73cbSSangeeta Misra 	sum += *oaddr2 + *(oaddr2 + 1) + *(oaddr2 + 2) + *(oaddr2 + 3) +
480dbed73cbSSangeeta Misra 	    *(oaddr2 + 4) + *(oaddr2 + 5) + *(oaddr2 + 6) + *(oaddr2 + 7) +
481dbed73cbSSangeeta Misra 	    old_port2;
482dbed73cbSSangeeta Misra 	while ((sum >> 16) != 0)
483dbed73cbSSangeeta Misra 		sum = (sum & 0xffff) + (sum >> 16);
484dbed73cbSSangeeta Misra 	sum = (uint16_t)~sum + *naddr1 + *(naddr1 + 1) + *(naddr1 + 2) +
485dbed73cbSSangeeta Misra 	    *(naddr1 + 3) + *(naddr1 + 4) + *(naddr1 + 5) + *(naddr1 + 6) +
486dbed73cbSSangeeta Misra 	    *(naddr1 + 7) + new_port1;
487dbed73cbSSangeeta Misra 	*adj_sum = sum + *naddr2 + *(naddr2 + 1) + *(naddr2 + 2) +
488dbed73cbSSangeeta Misra 	    *(naddr2 + 3) + *(naddr2 + 4) + *(naddr2 + 5) + *(naddr2 + 6) +
489dbed73cbSSangeeta Misra 	    *(naddr2 + 7) + new_port2;
490dbed73cbSSangeeta Misra }
491dbed73cbSSangeeta Misra 
492dbed73cbSSangeeta Misra /*
493dbed73cbSSangeeta Misra  * Add a conn hash entry to the tables.  Note that a conn hash entry
494dbed73cbSSangeeta Misra  * (ilb_conn_t) contains info on both directions.  And there are two hash
495dbed73cbSSangeeta Misra  * tables, one for client to server and the other for server to client.
496dbed73cbSSangeeta Misra  * So the same entry is added to both tables and can be ccessed by two
497dbed73cbSSangeeta Misra  * thread simultaneously.  But each thread will only access data on one
498dbed73cbSSangeeta Misra  * direction, so there is no conflict.
499dbed73cbSSangeeta Misra  */
500dbed73cbSSangeeta Misra int
501dbed73cbSSangeeta Misra ilb_conn_add(ilb_stack_t *ilbs, ilb_rule_t *rule, ilb_server_t *server,
502dbed73cbSSangeeta Misra     in6_addr_t *src, in_port_t sport, in6_addr_t *dst, in_port_t dport,
503dbed73cbSSangeeta Misra     ilb_nat_info_t *info, uint32_t *ip_sum, uint32_t *tp_sum, ilb_sticky_t *s)
504dbed73cbSSangeeta Misra {
505dbed73cbSSangeeta Misra 	ilb_conn_t *connp;
506dbed73cbSSangeeta Misra 	ilb_conn_hash_t *hash;
507dbed73cbSSangeeta Misra 	int i;
508dbed73cbSSangeeta Misra 
509dbed73cbSSangeeta Misra 	connp = kmem_cache_alloc(ilb_conn_cache, KM_NOSLEEP);
510dbed73cbSSangeeta Misra 	if (connp == NULL) {
511dbed73cbSSangeeta Misra 		if (s != NULL) {
512dbed73cbSSangeeta Misra 			if (rule->ir_topo == ILB_TOPO_IMPL_NAT) {
513dbed73cbSSangeeta Misra 				ilb_nat_src_entry_t **entry;
514dbed73cbSSangeeta Misra 
515dbed73cbSSangeeta Misra 				entry = s->server->iser_nat_src->src_list;
516dbed73cbSSangeeta Misra 				vmem_free(entry[s->nat_src_idx]->nse_port_arena,
517dbed73cbSSangeeta Misra 				    (void *)(uintptr_t)ntohs(info->nat_sport),
518dbed73cbSSangeeta Misra 				    1);
519dbed73cbSSangeeta Misra 			}
520dbed73cbSSangeeta Misra 			ILB_STICKY_REFRELE(s);
521dbed73cbSSangeeta Misra 		}
522dbed73cbSSangeeta Misra 		return (ENOMEM);
523dbed73cbSSangeeta Misra 	}
524dbed73cbSSangeeta Misra 
525dbed73cbSSangeeta Misra 	connp->conn_l4 = rule->ir_proto;
526dbed73cbSSangeeta Misra 
527dbed73cbSSangeeta Misra 	connp->conn_server = server;
528dbed73cbSSangeeta Misra 	ILB_SERVER_REFHOLD(server);
529dbed73cbSSangeeta Misra 	connp->conn_sticky = s;
530dbed73cbSSangeeta Misra 
531dbed73cbSSangeeta Misra 	connp->conn_rule_cache.topo = rule->ir_topo;
532dbed73cbSSangeeta Misra 	connp->conn_rule_cache.info = *info;
533dbed73cbSSangeeta Misra 
534dbed73cbSSangeeta Misra 	connp->conn_gc = B_FALSE;
535dbed73cbSSangeeta Misra 
536dbed73cbSSangeeta Misra 	connp->conn_expiry = rule->ir_nat_expiry;
537d3d50737SRafael Vanoni 	connp->conn_cr_time = ddi_get_lbolt64();
538dbed73cbSSangeeta Misra 
539dbed73cbSSangeeta Misra 	/* Client to server info. */
540dbed73cbSSangeeta Misra 	connp->conn_c2s_saddr = *src;
541dbed73cbSSangeeta Misra 	connp->conn_c2s_sport = sport;
542dbed73cbSSangeeta Misra 	connp->conn_c2s_daddr = *dst;
543dbed73cbSSangeeta Misra 	connp->conn_c2s_dport = dport;
544dbed73cbSSangeeta Misra 
545d3d50737SRafael Vanoni 	connp->conn_c2s_atime = ddi_get_lbolt64();
546dbed73cbSSangeeta Misra 	/* The packet ths triggers this creation should be counted */
547dbed73cbSSangeeta Misra 	connp->conn_c2s_pkt_cnt = 1;
548dbed73cbSSangeeta Misra 	connp->conn_c2s_tcp_fin_sent = B_FALSE;
549dbed73cbSSangeeta Misra 	connp->conn_c2s_tcp_fin_acked = B_FALSE;
550dbed73cbSSangeeta Misra 
551dbed73cbSSangeeta Misra 	/* Server to client info, before NAT */
552dbed73cbSSangeeta Misra 	switch (rule->ir_topo) {
553dbed73cbSSangeeta Misra 	case ILB_TOPO_IMPL_HALF_NAT:
554dbed73cbSSangeeta Misra 		connp->conn_s2c_saddr = info->nat_dst;
555dbed73cbSSangeeta Misra 		connp->conn_s2c_sport = info->nat_dport;
556dbed73cbSSangeeta Misra 		connp->conn_s2c_daddr = *src;
557dbed73cbSSangeeta Misra 		connp->conn_s2c_dport = sport;
558dbed73cbSSangeeta Misra 
559dbed73cbSSangeeta Misra 		/* Pre-calculate checksum changes for both directions */
560dbed73cbSSangeeta Misra 		if (rule->ir_ipver == IPPROTO_IP) {
561dbed73cbSSangeeta Misra 			hnat_cksum_v4((uint16_t *)&dst->s6_addr32[3],
562dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_dst.s6_addr32[3], 0, 0,
563dbed73cbSSangeeta Misra 			    &connp->conn_c2s_ip_sum);
564dbed73cbSSangeeta Misra 			hnat_cksum_v4((uint16_t *)&dst->s6_addr32[3],
565dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_dst.s6_addr32[3], dport,
566dbed73cbSSangeeta Misra 			    info->nat_dport, &connp->conn_c2s_tp_sum);
567dbed73cbSSangeeta Misra 			*ip_sum = connp->conn_c2s_ip_sum;
568dbed73cbSSangeeta Misra 			*tp_sum = connp->conn_c2s_tp_sum;
569dbed73cbSSangeeta Misra 
570dbed73cbSSangeeta Misra 			hnat_cksum_v4(
571dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_dst.s6_addr32[3],
572dbed73cbSSangeeta Misra 			    (uint16_t *)&dst->s6_addr32[3], 0, 0,
573dbed73cbSSangeeta Misra 			    &connp->conn_s2c_ip_sum);
574dbed73cbSSangeeta Misra 			hnat_cksum_v4(
575dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_dst.s6_addr32[3],
576dbed73cbSSangeeta Misra 			    (uint16_t *)&dst->s6_addr32[3],
577dbed73cbSSangeeta Misra 			    info->nat_dport, dport,
578dbed73cbSSangeeta Misra 			    &connp->conn_s2c_tp_sum);
579dbed73cbSSangeeta Misra 		} else {
580dbed73cbSSangeeta Misra 			connp->conn_c2s_ip_sum = 0;
581dbed73cbSSangeeta Misra 			hnat_cksum_v6((uint16_t *)dst,
582dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_dst, dport,
583dbed73cbSSangeeta Misra 			    info->nat_dport, &connp->conn_c2s_tp_sum);
584dbed73cbSSangeeta Misra 			*ip_sum = 0;
585dbed73cbSSangeeta Misra 			*tp_sum = connp->conn_c2s_tp_sum;
586dbed73cbSSangeeta Misra 
587dbed73cbSSangeeta Misra 			connp->conn_s2c_ip_sum = 0;
588dbed73cbSSangeeta Misra 			hnat_cksum_v6((uint16_t *)&info->nat_dst,
589dbed73cbSSangeeta Misra 			    (uint16_t *)dst, info->nat_dport, dport,
590dbed73cbSSangeeta Misra 			    &connp->conn_s2c_tp_sum);
591dbed73cbSSangeeta Misra 		}
592dbed73cbSSangeeta Misra 		break;
593dbed73cbSSangeeta Misra 	case ILB_TOPO_IMPL_NAT:
594dbed73cbSSangeeta Misra 		connp->conn_s2c_saddr = info->nat_dst;
595dbed73cbSSangeeta Misra 		connp->conn_s2c_sport = info->nat_dport;
596dbed73cbSSangeeta Misra 		connp->conn_s2c_daddr = info->nat_src;
597dbed73cbSSangeeta Misra 		connp->conn_s2c_dport = info->nat_sport;
598dbed73cbSSangeeta Misra 
599dbed73cbSSangeeta Misra 		if (rule->ir_ipver == IPPROTO_IP) {
600dbed73cbSSangeeta Misra 			fnat_cksum_v4((uint16_t *)&src->s6_addr32[3],
601dbed73cbSSangeeta Misra 			    (uint16_t *)&dst->s6_addr32[3],
602dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_src.s6_addr32[3],
603dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_dst.s6_addr32[3],
604dbed73cbSSangeeta Misra 			    0, 0, 0, 0, &connp->conn_c2s_ip_sum);
605dbed73cbSSangeeta Misra 			fnat_cksum_v4((uint16_t *)&src->s6_addr32[3],
606dbed73cbSSangeeta Misra 			    (uint16_t *)&dst->s6_addr32[3],
607dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_src.s6_addr32[3],
608dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_dst.s6_addr32[3],
609dbed73cbSSangeeta Misra 			    sport, dport, info->nat_sport,
610dbed73cbSSangeeta Misra 			    info->nat_dport, &connp->conn_c2s_tp_sum);
611dbed73cbSSangeeta Misra 			*ip_sum = connp->conn_c2s_ip_sum;
612dbed73cbSSangeeta Misra 			*tp_sum = connp->conn_c2s_tp_sum;
613dbed73cbSSangeeta Misra 
614dbed73cbSSangeeta Misra 			fnat_cksum_v4(
615dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_src.s6_addr32[3],
616dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_dst.s6_addr32[3],
617dbed73cbSSangeeta Misra 			    (uint16_t *)&src->s6_addr32[3],
618dbed73cbSSangeeta Misra 			    (uint16_t *)&dst->s6_addr32[3],
619dbed73cbSSangeeta Misra 			    0, 0, 0, 0, &connp->conn_s2c_ip_sum);
620dbed73cbSSangeeta Misra 			fnat_cksum_v4(
621dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_src.s6_addr32[3],
622dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_dst.s6_addr32[3],
623dbed73cbSSangeeta Misra 			    (uint16_t *)&src->s6_addr32[3],
624dbed73cbSSangeeta Misra 			    (uint16_t *)&dst->s6_addr32[3],
625dbed73cbSSangeeta Misra 			    info->nat_sport, info->nat_dport,
626dbed73cbSSangeeta Misra 			    sport, dport, &connp->conn_s2c_tp_sum);
627dbed73cbSSangeeta Misra 		} else {
628dbed73cbSSangeeta Misra 			fnat_cksum_v6((uint16_t *)src, (uint16_t *)dst,
629dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_src,
630dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_dst,
631dbed73cbSSangeeta Misra 			    sport, dport, info->nat_sport,
632dbed73cbSSangeeta Misra 			    info->nat_dport, &connp->conn_c2s_tp_sum);
633dbed73cbSSangeeta Misra 			connp->conn_c2s_ip_sum = 0;
634dbed73cbSSangeeta Misra 			*ip_sum = 0;
635dbed73cbSSangeeta Misra 			*tp_sum = connp->conn_c2s_tp_sum;
636dbed73cbSSangeeta Misra 
637dbed73cbSSangeeta Misra 			fnat_cksum_v6((uint16_t *)&info->nat_src,
638dbed73cbSSangeeta Misra 			    (uint16_t *)&info->nat_dst, (uint16_t *)src,
639dbed73cbSSangeeta Misra 			    (uint16_t *)dst, info->nat_sport,
640dbed73cbSSangeeta Misra 			    info->nat_dport, sport, dport,
641dbed73cbSSangeeta Misra 			    &connp->conn_s2c_tp_sum);
642dbed73cbSSangeeta Misra 			connp->conn_s2c_ip_sum = 0;
643dbed73cbSSangeeta Misra 		}
644dbed73cbSSangeeta Misra 		break;
645dbed73cbSSangeeta Misra 	}
646dbed73cbSSangeeta Misra 
647d3d50737SRafael Vanoni 	connp->conn_s2c_atime = ddi_get_lbolt64();
648dbed73cbSSangeeta Misra 	connp->conn_s2c_pkt_cnt = 1;
649dbed73cbSSangeeta Misra 	connp->conn_s2c_tcp_fin_sent = B_FALSE;
650dbed73cbSSangeeta Misra 	connp->conn_s2c_tcp_fin_acked = B_FALSE;
651dbed73cbSSangeeta Misra 
652dbed73cbSSangeeta Misra 	/* Add it to the s2c hash table. */
653dbed73cbSSangeeta Misra 	hash = ilbs->ilbs_s2c_conn_hash;
654dbed73cbSSangeeta Misra 	i = ILB_CONN_HASH((uint8_t *)&connp->conn_s2c_saddr.s6_addr32[3],
655dbed73cbSSangeeta Misra 	    ntohs(connp->conn_s2c_sport),
656dbed73cbSSangeeta Misra 	    (uint8_t *)&connp->conn_s2c_daddr.s6_addr32[3],
657dbed73cbSSangeeta Misra 	    ntohs(connp->conn_s2c_dport), ilbs->ilbs_conn_hash_size);
658dbed73cbSSangeeta Misra 	connp->conn_s2c_hash = &hash[i];
659dbed73cbSSangeeta Misra 	DTRACE_PROBE2(ilb__conn__hash__add__s2c, ilb_conn_t *, connp, int, i);
660dbed73cbSSangeeta Misra 
661dbed73cbSSangeeta Misra 	mutex_enter(&hash[i].ilb_conn_hash_lock);
662dbed73cbSSangeeta Misra 	hash[i].ilb_conn_cnt++;
663dbed73cbSSangeeta Misra 	connp->conn_s2c_next = hash[i].ilb_connp;
664dbed73cbSSangeeta Misra 	if (hash[i].ilb_connp != NULL)
665dbed73cbSSangeeta Misra 		hash[i].ilb_connp->conn_s2c_prev = connp;
666dbed73cbSSangeeta Misra 	connp->conn_s2c_prev = NULL;
667dbed73cbSSangeeta Misra 	hash[i].ilb_connp = connp;
668dbed73cbSSangeeta Misra 	mutex_exit(&hash[i].ilb_conn_hash_lock);
669dbed73cbSSangeeta Misra 
670dbed73cbSSangeeta Misra 	/* Add it to the c2s hash table. */
671dbed73cbSSangeeta Misra 	hash = ilbs->ilbs_c2s_conn_hash;
672dbed73cbSSangeeta Misra 	i = ILB_CONN_HASH((uint8_t *)&src->s6_addr32[3], ntohs(sport),
673dbed73cbSSangeeta Misra 	    (uint8_t *)&dst->s6_addr32[3], ntohs(dport),
674dbed73cbSSangeeta Misra 	    ilbs->ilbs_conn_hash_size);
675dbed73cbSSangeeta Misra 	connp->conn_c2s_hash = &hash[i];
676dbed73cbSSangeeta Misra 	DTRACE_PROBE2(ilb__conn__hash__add__c2s, ilb_conn_t *, connp, int, i);
677dbed73cbSSangeeta Misra 
678dbed73cbSSangeeta Misra 	mutex_enter(&hash[i].ilb_conn_hash_lock);
679dbed73cbSSangeeta Misra 	hash[i].ilb_conn_cnt++;
680dbed73cbSSangeeta Misra 	connp->conn_c2s_next = hash[i].ilb_connp;
681dbed73cbSSangeeta Misra 	if (hash[i].ilb_connp != NULL)
682dbed73cbSSangeeta Misra 		hash[i].ilb_connp->conn_c2s_prev = connp;
683dbed73cbSSangeeta Misra 	connp->conn_c2s_prev = NULL;
684dbed73cbSSangeeta Misra 	hash[i].ilb_connp = connp;
685dbed73cbSSangeeta Misra 	mutex_exit(&hash[i].ilb_conn_hash_lock);
686dbed73cbSSangeeta Misra 
687dbed73cbSSangeeta Misra 	return (0);
688dbed73cbSSangeeta Misra }
689dbed73cbSSangeeta Misra 
690dbed73cbSSangeeta Misra /*
691dbed73cbSSangeeta Misra  * If a connection is using TCP, we keep track of simple TCP state transition
692dbed73cbSSangeeta Misra  * so that we know when to clean up an entry.
693dbed73cbSSangeeta Misra  */
694dbed73cbSSangeeta Misra static boolean_t
695dbed73cbSSangeeta Misra update_conn_tcp(ilb_conn_t *connp, void *iph, tcpha_t *tcpha, int32_t pkt_len,
696dbed73cbSSangeeta Misra     boolean_t c2s)
697dbed73cbSSangeeta Misra {
698dbed73cbSSangeeta Misra 	uint32_t ack, seq;
699dbed73cbSSangeeta Misra 	int32_t seg_len;
700dbed73cbSSangeeta Misra 
701dbed73cbSSangeeta Misra 	if (tcpha->tha_flags & TH_RST)
702dbed73cbSSangeeta Misra 		return (B_FALSE);
703dbed73cbSSangeeta Misra 
704dbed73cbSSangeeta Misra 	seg_len = pkt_len - ((uint8_t *)tcpha - (uint8_t *)iph) -
705dbed73cbSSangeeta Misra 	    TCP_HDR_LENGTH((tcph_t *)tcpha);
706dbed73cbSSangeeta Misra 
707dbed73cbSSangeeta Misra 	if (tcpha->tha_flags & TH_ACK)
708dbed73cbSSangeeta Misra 		ack = ntohl(tcpha->tha_ack);
709dbed73cbSSangeeta Misra 	seq = ntohl(tcpha->tha_seq);
710dbed73cbSSangeeta Misra 	if (c2s) {
711dbed73cbSSangeeta Misra 		ASSERT(MUTEX_HELD(&connp->conn_c2s_hash->ilb_conn_hash_lock));
712dbed73cbSSangeeta Misra 		if (tcpha->tha_flags & TH_FIN) {
713dbed73cbSSangeeta Misra 			connp->conn_c2s_tcp_fss = seq + seg_len;
714dbed73cbSSangeeta Misra 			connp->conn_c2s_tcp_fin_sent = B_TRUE;
715dbed73cbSSangeeta Misra 		}
716dbed73cbSSangeeta Misra 		connp->conn_c2s_tcp_ack = ack;
717dbed73cbSSangeeta Misra 
718dbed73cbSSangeeta Misra 		/* Port reuse by the client, restart the conn. */
719dbed73cbSSangeeta Misra 		if (connp->conn_c2s_tcp_fin_sent &&
720dbed73cbSSangeeta Misra 		    SEQ_GT(seq, connp->conn_c2s_tcp_fss + 1)) {
721dbed73cbSSangeeta Misra 			connp->conn_c2s_tcp_fin_sent = B_FALSE;
722dbed73cbSSangeeta Misra 			connp->conn_c2s_tcp_fin_acked = B_FALSE;
723dbed73cbSSangeeta Misra 		}
724dbed73cbSSangeeta Misra 	} else {
725dbed73cbSSangeeta Misra 		ASSERT(MUTEX_HELD(&connp->conn_s2c_hash->ilb_conn_hash_lock));
726dbed73cbSSangeeta Misra 		if (tcpha->tha_flags & TH_FIN) {
727dbed73cbSSangeeta Misra 			connp->conn_s2c_tcp_fss = seq + seg_len;
728dbed73cbSSangeeta Misra 			connp->conn_s2c_tcp_fin_sent = B_TRUE;
729dbed73cbSSangeeta Misra 		}
730dbed73cbSSangeeta Misra 		connp->conn_s2c_tcp_ack = ack;
731dbed73cbSSangeeta Misra 
732dbed73cbSSangeeta Misra 		/* Port reuse by the client, restart the conn. */
733dbed73cbSSangeeta Misra 		if (connp->conn_s2c_tcp_fin_sent &&
734dbed73cbSSangeeta Misra 		    SEQ_GT(seq, connp->conn_s2c_tcp_fss + 1)) {
735dbed73cbSSangeeta Misra 			connp->conn_s2c_tcp_fin_sent = B_FALSE;
736dbed73cbSSangeeta Misra 			connp->conn_s2c_tcp_fin_acked = B_FALSE;
737dbed73cbSSangeeta Misra 		}
738dbed73cbSSangeeta Misra 	}
739dbed73cbSSangeeta Misra 
740dbed73cbSSangeeta Misra 	return (B_TRUE);
741dbed73cbSSangeeta Misra }
742dbed73cbSSangeeta Misra 
743dbed73cbSSangeeta Misra /*
744dbed73cbSSangeeta Misra  * Helper routint to find conn hash entry given some packet information and
745dbed73cbSSangeeta Misra  * the traffic direction (c2s, client to server?)
746dbed73cbSSangeeta Misra  */
747dbed73cbSSangeeta Misra static boolean_t
748dbed73cbSSangeeta Misra ilb_find_conn(ilb_stack_t *ilbs, void *iph, void *tph, int l4, in6_addr_t *src,
749dbed73cbSSangeeta Misra     in_port_t sport, in6_addr_t *dst, in_port_t dport,
750dbed73cbSSangeeta Misra     ilb_rule_info_t *rule_cache, uint32_t *ip_sum, uint32_t *tp_sum,
751dbed73cbSSangeeta Misra     int32_t pkt_len, boolean_t c2s)
752dbed73cbSSangeeta Misra {
753dbed73cbSSangeeta Misra 	ilb_conn_hash_t *hash;
754dbed73cbSSangeeta Misra 	uint_t i;
755dbed73cbSSangeeta Misra 	ilb_conn_t *connp;
756dbed73cbSSangeeta Misra 	boolean_t tcp_alive;
757dbed73cbSSangeeta Misra 	boolean_t ret = B_FALSE;
758dbed73cbSSangeeta Misra 
759dbed73cbSSangeeta Misra 	i = ILB_CONN_HASH((uint8_t *)&src->s6_addr32[3], ntohs(sport),
760dbed73cbSSangeeta Misra 	    (uint8_t *)&dst->s6_addr32[3], ntohs(dport),
761dbed73cbSSangeeta Misra 	    ilbs->ilbs_conn_hash_size);
762dbed73cbSSangeeta Misra 	if (c2s) {
763dbed73cbSSangeeta Misra 		hash = ilbs->ilbs_c2s_conn_hash;
764dbed73cbSSangeeta Misra 		mutex_enter(&hash[i].ilb_conn_hash_lock);
765dbed73cbSSangeeta Misra 		for (connp = hash[i].ilb_connp; connp != NULL;
766dbed73cbSSangeeta Misra 		    connp = connp->conn_c2s_next) {
767dbed73cbSSangeeta Misra 			if (connp->conn_l4 == l4 &&
768dbed73cbSSangeeta Misra 			    connp->conn_c2s_dport == dport &&
769dbed73cbSSangeeta Misra 			    connp->conn_c2s_sport == sport &&
770dbed73cbSSangeeta Misra 			    IN6_ARE_ADDR_EQUAL(src, &connp->conn_c2s_saddr) &&
771dbed73cbSSangeeta Misra 			    IN6_ARE_ADDR_EQUAL(dst, &connp->conn_c2s_daddr)) {
772d3d50737SRafael Vanoni 				connp->conn_c2s_atime = ddi_get_lbolt64();
773dbed73cbSSangeeta Misra 				connp->conn_c2s_pkt_cnt++;
774dbed73cbSSangeeta Misra 				*rule_cache = connp->conn_rule_cache;
775dbed73cbSSangeeta Misra 				*ip_sum = connp->conn_c2s_ip_sum;
776dbed73cbSSangeeta Misra 				*tp_sum = connp->conn_c2s_tp_sum;
777dbed73cbSSangeeta Misra 				ret = B_TRUE;
778dbed73cbSSangeeta Misra 				break;
779dbed73cbSSangeeta Misra 			}
780dbed73cbSSangeeta Misra 		}
781dbed73cbSSangeeta Misra 	} else {
782dbed73cbSSangeeta Misra 		hash = ilbs->ilbs_s2c_conn_hash;
783dbed73cbSSangeeta Misra 		mutex_enter(&hash[i].ilb_conn_hash_lock);
784dbed73cbSSangeeta Misra 		for (connp = hash[i].ilb_connp; connp != NULL;
785dbed73cbSSangeeta Misra 		    connp = connp->conn_s2c_next) {
786dbed73cbSSangeeta Misra 			if (connp->conn_l4 == l4 &&
787dbed73cbSSangeeta Misra 			    connp->conn_s2c_dport == dport &&
788dbed73cbSSangeeta Misra 			    connp->conn_s2c_sport == sport &&
789dbed73cbSSangeeta Misra 			    IN6_ARE_ADDR_EQUAL(src, &connp->conn_s2c_saddr) &&
790dbed73cbSSangeeta Misra 			    IN6_ARE_ADDR_EQUAL(dst, &connp->conn_s2c_daddr)) {
791d3d50737SRafael Vanoni 				connp->conn_s2c_atime = ddi_get_lbolt64();
792dbed73cbSSangeeta Misra 				connp->conn_s2c_pkt_cnt++;
793dbed73cbSSangeeta Misra 				*rule_cache = connp->conn_rule_cache;
794dbed73cbSSangeeta Misra 				*ip_sum = connp->conn_s2c_ip_sum;
795dbed73cbSSangeeta Misra 				*tp_sum = connp->conn_s2c_tp_sum;
796dbed73cbSSangeeta Misra 				ret = B_TRUE;
797dbed73cbSSangeeta Misra 				break;
798dbed73cbSSangeeta Misra 			}
799dbed73cbSSangeeta Misra 		}
800dbed73cbSSangeeta Misra 	}
801dbed73cbSSangeeta Misra 	if (ret) {
802dbed73cbSSangeeta Misra 		ILB_S_KSTAT(connp->conn_server, pkt_processed);
803dbed73cbSSangeeta Misra 		ILB_S_KSTAT_UPDATE(connp->conn_server, bytes_processed,
804dbed73cbSSangeeta Misra 		    pkt_len);
805dbed73cbSSangeeta Misra 
806dbed73cbSSangeeta Misra 		switch (l4) {
807dbed73cbSSangeeta Misra 		case (IPPROTO_TCP):
808dbed73cbSSangeeta Misra 			tcp_alive = update_conn_tcp(connp, iph, tph, pkt_len,
809dbed73cbSSangeeta Misra 			    c2s);
810dbed73cbSSangeeta Misra 			if (!tcp_alive) {
811dbed73cbSSangeeta Misra 				connp->conn_gc = B_TRUE;
812dbed73cbSSangeeta Misra 			}
813dbed73cbSSangeeta Misra 			break;
814dbed73cbSSangeeta Misra 		default:
815dbed73cbSSangeeta Misra 			break;
816dbed73cbSSangeeta Misra 		}
817dbed73cbSSangeeta Misra 	}
818dbed73cbSSangeeta Misra 	mutex_exit(&hash[i].ilb_conn_hash_lock);
819dbed73cbSSangeeta Misra 
820dbed73cbSSangeeta Misra 	return (ret);
821dbed73cbSSangeeta Misra }
822dbed73cbSSangeeta Misra 
823dbed73cbSSangeeta Misra /*
824dbed73cbSSangeeta Misra  * To check if a give packet matches an existing conn hash entry.  If it
825dbed73cbSSangeeta Misra  * does, return the information about this entry so that the caller can
826dbed73cbSSangeeta Misra  * do the proper NAT.
827dbed73cbSSangeeta Misra  */
828dbed73cbSSangeeta Misra boolean_t
829dbed73cbSSangeeta Misra ilb_check_conn(ilb_stack_t *ilbs, int l3, void *iph, int l4, void *tph,
830dbed73cbSSangeeta Misra     in6_addr_t *src, in6_addr_t *dst, in_port_t sport, in_port_t dport,
831dbed73cbSSangeeta Misra     uint32_t pkt_len, in6_addr_t *lb_dst)
832dbed73cbSSangeeta Misra {
833dbed73cbSSangeeta Misra 	ilb_rule_info_t rule_cache;
834dbed73cbSSangeeta Misra 	uint32_t adj_ip_sum, adj_tp_sum;
835dbed73cbSSangeeta Misra 	boolean_t ret;
836dbed73cbSSangeeta Misra 
837dbed73cbSSangeeta Misra 	/* Check the incoming hash table. */
838dbed73cbSSangeeta Misra 	if (ilb_find_conn(ilbs, iph, tph, l4, src, sport, dst, dport,
839dbed73cbSSangeeta Misra 	    &rule_cache, &adj_ip_sum, &adj_tp_sum, pkt_len, B_TRUE)) {
840dbed73cbSSangeeta Misra 		switch (rule_cache.topo) {
841dbed73cbSSangeeta Misra 		case ILB_TOPO_IMPL_NAT:
842dbed73cbSSangeeta Misra 			*lb_dst = rule_cache.info.nat_dst;
843dbed73cbSSangeeta Misra 			ilb_full_nat(l3, iph, l4, tph, &rule_cache.info,
844dbed73cbSSangeeta Misra 			    adj_ip_sum, adj_tp_sum, B_TRUE);
845dbed73cbSSangeeta Misra 			ret = B_TRUE;
846dbed73cbSSangeeta Misra 			break;
847dbed73cbSSangeeta Misra 		case ILB_TOPO_IMPL_HALF_NAT:
848dbed73cbSSangeeta Misra 			*lb_dst = rule_cache.info.nat_dst;
849dbed73cbSSangeeta Misra 			ilb_half_nat(l3, iph, l4, tph, &rule_cache.info,
850dbed73cbSSangeeta Misra 			    adj_ip_sum, adj_tp_sum, B_TRUE);
851dbed73cbSSangeeta Misra 			ret = B_TRUE;
852dbed73cbSSangeeta Misra 			break;
853dbed73cbSSangeeta Misra 		default:
854dbed73cbSSangeeta Misra 			ret = B_FALSE;
855dbed73cbSSangeeta Misra 			break;
856dbed73cbSSangeeta Misra 		}
857dbed73cbSSangeeta Misra 		return (ret);
858dbed73cbSSangeeta Misra 	}
859dbed73cbSSangeeta Misra 	if (ilb_find_conn(ilbs, iph, tph, l4, src, sport, dst, dport,
860dbed73cbSSangeeta Misra 	    &rule_cache, &adj_ip_sum, &adj_tp_sum, pkt_len, B_FALSE)) {
861dbed73cbSSangeeta Misra 		switch (rule_cache.topo) {
862dbed73cbSSangeeta Misra 		case ILB_TOPO_IMPL_NAT:
863dbed73cbSSangeeta Misra 			*lb_dst = rule_cache.info.src;
864dbed73cbSSangeeta Misra 			ilb_full_nat(l3, iph, l4, tph, &rule_cache.info,
865dbed73cbSSangeeta Misra 			    adj_ip_sum, adj_tp_sum, B_FALSE);
866dbed73cbSSangeeta Misra 			ret = B_TRUE;
867dbed73cbSSangeeta Misra 			break;
868dbed73cbSSangeeta Misra 		case ILB_TOPO_IMPL_HALF_NAT:
869dbed73cbSSangeeta Misra 			*lb_dst = *dst;
870dbed73cbSSangeeta Misra 			ilb_half_nat(l3, iph, l4, tph, &rule_cache.info,
871dbed73cbSSangeeta Misra 			    adj_ip_sum, adj_tp_sum, B_FALSE);
872dbed73cbSSangeeta Misra 			ret = B_TRUE;
873dbed73cbSSangeeta Misra 			break;
874dbed73cbSSangeeta Misra 		default:
875dbed73cbSSangeeta Misra 			ret = B_FALSE;
876dbed73cbSSangeeta Misra 			break;
877dbed73cbSSangeeta Misra 		}
878dbed73cbSSangeeta Misra 		return (ret);
879dbed73cbSSangeeta Misra 	}
880dbed73cbSSangeeta Misra 
881dbed73cbSSangeeta Misra 	return (B_FALSE);
882dbed73cbSSangeeta Misra }
883dbed73cbSSangeeta Misra 
884dbed73cbSSangeeta Misra /*
885dbed73cbSSangeeta Misra  * To check if an ICMP packet belongs to a connection in one of the conn
886dbed73cbSSangeeta Misra  * hash entries.
887dbed73cbSSangeeta Misra  */
888dbed73cbSSangeeta Misra boolean_t
889dbed73cbSSangeeta Misra ilb_check_icmp_conn(ilb_stack_t *ilbs, mblk_t *mp, int l3, void *out_iph,
890dbed73cbSSangeeta Misra     void *icmph, in6_addr_t *lb_dst)
891dbed73cbSSangeeta Misra {
892dbed73cbSSangeeta Misra 	ilb_conn_hash_t *hash;
893dbed73cbSSangeeta Misra 	ipha_t *in_iph4;
894dbed73cbSSangeeta Misra 	ip6_t *in_iph6;
895dbed73cbSSangeeta Misra 	icmph_t *icmph4;
896dbed73cbSSangeeta Misra 	icmp6_t *icmph6;
897dbed73cbSSangeeta Misra 	in6_addr_t *in_src_p, *in_dst_p;
898dbed73cbSSangeeta Misra 	in_port_t *sport, *dport;
899dbed73cbSSangeeta Misra 	int l4;
900dbed73cbSSangeeta Misra 	uint_t i;
901dbed73cbSSangeeta Misra 	ilb_conn_t *connp;
902dbed73cbSSangeeta Misra 	ilb_rule_info_t rule_cache;
903dbed73cbSSangeeta Misra 	uint32_t adj_ip_sum;
904dbed73cbSSangeeta Misra 	boolean_t full_nat;
905dbed73cbSSangeeta Misra 
906dbed73cbSSangeeta Misra 	if (l3 == IPPROTO_IP) {
907dbed73cbSSangeeta Misra 		in6_addr_t in_src, in_dst;
908dbed73cbSSangeeta Misra 
909dbed73cbSSangeeta Misra 		icmph4 = (icmph_t *)icmph;
910dbed73cbSSangeeta Misra 		in_iph4 = (ipha_t *)&icmph4[1];
911dbed73cbSSangeeta Misra 
912dbed73cbSSangeeta Misra 		if ((uint8_t *)in_iph4 + IPH_HDR_LENGTH(in_iph4) +
913dbed73cbSSangeeta Misra 		    ICMP_MIN_TP_HDR_LEN > mp->b_wptr) {
914dbed73cbSSangeeta Misra 			return (B_FALSE);
915dbed73cbSSangeeta Misra 		}
916dbed73cbSSangeeta Misra 
917dbed73cbSSangeeta Misra 		IN6_IPADDR_TO_V4MAPPED(in_iph4->ipha_src, &in_src);
918dbed73cbSSangeeta Misra 		in_src_p = &in_src;
919dbed73cbSSangeeta Misra 		IN6_IPADDR_TO_V4MAPPED(in_iph4->ipha_dst, &in_dst);
920dbed73cbSSangeeta Misra 		in_dst_p = &in_dst;
921dbed73cbSSangeeta Misra 
922dbed73cbSSangeeta Misra 		l4 = in_iph4->ipha_protocol;
923dbed73cbSSangeeta Misra 		if (l4 != IPPROTO_TCP && l4 != IPPROTO_UDP)
924dbed73cbSSangeeta Misra 			return (B_FALSE);
925dbed73cbSSangeeta Misra 
926dbed73cbSSangeeta Misra 		sport = (in_port_t *)((char *)in_iph4 +
927dbed73cbSSangeeta Misra 		    IPH_HDR_LENGTH(in_iph4));
928dbed73cbSSangeeta Misra 		dport = sport + 1;
929dbed73cbSSangeeta Misra 
930dbed73cbSSangeeta Misra 		DTRACE_PROBE4(ilb__chk__icmp__conn__v4, uint32_t,
931dbed73cbSSangeeta Misra 		    in_iph4->ipha_src, uint32_t, in_iph4->ipha_dst, uint16_t,
932dbed73cbSSangeeta Misra 		    ntohs(*sport), uint16_t, ntohs(*dport));
933dbed73cbSSangeeta Misra 	} else {
934dbed73cbSSangeeta Misra 		ASSERT(l3 == IPPROTO_IPV6);
935dbed73cbSSangeeta Misra 
936dbed73cbSSangeeta Misra 		icmph6 = (icmp6_t *)icmph;
937dbed73cbSSangeeta Misra 		in_iph6 = (ip6_t *)&icmph6[1];
938dbed73cbSSangeeta Misra 		in_src_p = &in_iph6->ip6_src;
939dbed73cbSSangeeta Misra 		in_dst_p = &in_iph6->ip6_dst;
940dbed73cbSSangeeta Misra 
941dbed73cbSSangeeta Misra 		if ((uint8_t *)in_iph6 + sizeof (ip6_t) +
942dbed73cbSSangeeta Misra 		    ICMP_MIN_TP_HDR_LEN > mp->b_wptr) {
943dbed73cbSSangeeta Misra 			return (B_FALSE);
944dbed73cbSSangeeta Misra 		}
945dbed73cbSSangeeta Misra 
946dbed73cbSSangeeta Misra 		l4 = in_iph6->ip6_nxt;
947dbed73cbSSangeeta Misra 		/* We don't go deep inside an IPv6 packet yet. */
948dbed73cbSSangeeta Misra 		if (l4 != IPPROTO_TCP && l4 != IPPROTO_UDP)
949dbed73cbSSangeeta Misra 			return (B_FALSE);
950dbed73cbSSangeeta Misra 
951dbed73cbSSangeeta Misra 		sport = (in_port_t *)&in_iph6[1];
952dbed73cbSSangeeta Misra 		dport = sport + 1;
953dbed73cbSSangeeta Misra 
954dbed73cbSSangeeta Misra 		DTRACE_PROBE4(ilb__chk__icmp__conn__v6, in6_addr_t *,
955dbed73cbSSangeeta Misra 		    &in_iph6->ip6_src, in6_addr_t *, &in_iph6->ip6_dst,
956dbed73cbSSangeeta Misra 		    uint16_t, ntohs(*sport), uint16_t, ntohs(*dport));
957dbed73cbSSangeeta Misra 	}
958dbed73cbSSangeeta Misra 
959dbed73cbSSangeeta Misra 	i = ILB_CONN_HASH((uint8_t *)&in_dst_p->s6_addr32[3], ntohs(*dport),
960dbed73cbSSangeeta Misra 	    (uint8_t *)&in_src_p->s6_addr32[3], ntohs(*sport),
961dbed73cbSSangeeta Misra 	    ilbs->ilbs_conn_hash_size);
962dbed73cbSSangeeta Misra 	hash = ilbs->ilbs_c2s_conn_hash;
963dbed73cbSSangeeta Misra 
964dbed73cbSSangeeta Misra 	mutex_enter(&hash[i].ilb_conn_hash_lock);
965dbed73cbSSangeeta Misra 	for (connp = hash[i].ilb_connp; connp != NULL;
966dbed73cbSSangeeta Misra 	    connp = connp->conn_c2s_next) {
967dbed73cbSSangeeta Misra 		if (connp->conn_l4 == l4 &&
968dbed73cbSSangeeta Misra 		    connp->conn_c2s_dport == *sport &&
969dbed73cbSSangeeta Misra 		    connp->conn_c2s_sport == *dport &&
970dbed73cbSSangeeta Misra 		    IN6_ARE_ADDR_EQUAL(in_dst_p, &connp->conn_c2s_saddr) &&
971dbed73cbSSangeeta Misra 		    IN6_ARE_ADDR_EQUAL(in_src_p, &connp->conn_c2s_daddr)) {
972d3d50737SRafael Vanoni 			connp->conn_c2s_atime = ddi_get_lbolt64();
973dbed73cbSSangeeta Misra 			connp->conn_c2s_pkt_cnt++;
974dbed73cbSSangeeta Misra 			rule_cache = connp->conn_rule_cache;
975dbed73cbSSangeeta Misra 			adj_ip_sum = connp->conn_c2s_ip_sum;
976dbed73cbSSangeeta Misra 			break;
977dbed73cbSSangeeta Misra 		}
978dbed73cbSSangeeta Misra 	}
979dbed73cbSSangeeta Misra 	mutex_exit(&hash[i].ilb_conn_hash_lock);
980dbed73cbSSangeeta Misra 
981dbed73cbSSangeeta Misra 	if (connp == NULL) {
982dbed73cbSSangeeta Misra 		DTRACE_PROBE(ilb__chk__icmp__conn__failed);
983dbed73cbSSangeeta Misra 		return (B_FALSE);
984dbed73cbSSangeeta Misra 	}
985dbed73cbSSangeeta Misra 
986dbed73cbSSangeeta Misra 	switch (rule_cache.topo) {
987dbed73cbSSangeeta Misra 	case ILB_TOPO_IMPL_NAT:
988dbed73cbSSangeeta Misra 		full_nat = B_TRUE;
989dbed73cbSSangeeta Misra 		break;
990dbed73cbSSangeeta Misra 	case ILB_TOPO_IMPL_HALF_NAT:
991dbed73cbSSangeeta Misra 		full_nat = B_FALSE;
992dbed73cbSSangeeta Misra 		break;
993dbed73cbSSangeeta Misra 	default:
994dbed73cbSSangeeta Misra 		return (B_FALSE);
995dbed73cbSSangeeta Misra 	}
996dbed73cbSSangeeta Misra 
997dbed73cbSSangeeta Misra 	*lb_dst = rule_cache.info.nat_dst;
998dbed73cbSSangeeta Misra 	if (l3 == IPPROTO_IP) {
999dbed73cbSSangeeta Misra 		ilb_nat_icmpv4(mp, out_iph, icmph4, in_iph4, sport, dport,
1000dbed73cbSSangeeta Misra 		    &rule_cache.info, adj_ip_sum, full_nat);
1001dbed73cbSSangeeta Misra 	} else {
1002dbed73cbSSangeeta Misra 		ilb_nat_icmpv6(mp, out_iph, icmph6, in_iph6, sport, dport,
1003dbed73cbSSangeeta Misra 		    &rule_cache.info, full_nat);
1004dbed73cbSSangeeta Misra 	}
1005dbed73cbSSangeeta Misra 	return (B_TRUE);
1006dbed73cbSSangeeta Misra }
1007dbed73cbSSangeeta Misra 
1008dbed73cbSSangeeta Misra /*
1009dbed73cbSSangeeta Misra  * This routine sends up the conn hash table to user land.  Note that the
1010dbed73cbSSangeeta Misra  * request is an ioctl, hence we cannot really differentiate requests
1011dbed73cbSSangeeta Misra  * from different clients.  There is no context shared between different
1012dbed73cbSSangeeta Misra  * ioctls.  Here we make the assumption that the user land ilbd will
1013dbed73cbSSangeeta Misra  * only allow one client to show the conn hash table at any time.
1014dbed73cbSSangeeta Misra  * Otherwise, the results will be "very" inconsistent.
1015dbed73cbSSangeeta Misra  *
1016dbed73cbSSangeeta Misra  * In each ioctl, a flag (ILB_LIST_BEGIN) indicates whether the client wants
1017dbed73cbSSangeeta Misra  * to read from the beginning of the able.  After a certain entries
1018dbed73cbSSangeeta Misra  * are reported, the kernel remembers the position of the last returned
1019dbed73cbSSangeeta Misra  * entry.  When the next ioctl comes in with the ILB_LIST_BEGIN flag,
1020dbed73cbSSangeeta Misra  * it will return entries starting from where it was left off.  When
1021dbed73cbSSangeeta Misra  * the end of table is reached, a flag (ILB_LIST_END) is set to tell
1022dbed73cbSSangeeta Misra  * the client that there is no more entry.
1023dbed73cbSSangeeta Misra  *
1024dbed73cbSSangeeta Misra  * It is assumed that the caller has checked the size of nat so that it
1025dbed73cbSSangeeta Misra  * can hold num entries.
1026dbed73cbSSangeeta Misra  */
1027dbed73cbSSangeeta Misra /* ARGSUSED */
1028dbed73cbSSangeeta Misra int
1029dbed73cbSSangeeta Misra ilb_list_nat(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_nat_entry_t *nat,
1030dbed73cbSSangeeta Misra     uint32_t *num, uint32_t *flags)
1031dbed73cbSSangeeta Misra {
1032dbed73cbSSangeeta Misra 	ilb_conn_hash_t *hash;
1033dbed73cbSSangeeta Misra 	ilb_conn_t *cur_connp;
1034dbed73cbSSangeeta Misra 	uint32_t i, j;
1035dbed73cbSSangeeta Misra 	int ret = 0;
1036dbed73cbSSangeeta Misra 
1037dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_conn_list_lock);
1038dbed73cbSSangeeta Misra 	while (ilbs->ilbs_conn_list_busy) {
1039dbed73cbSSangeeta Misra 		if (cv_wait_sig(&ilbs->ilbs_conn_list_cv,
1040dbed73cbSSangeeta Misra 		    &ilbs->ilbs_conn_list_lock) == 0) {
1041dbed73cbSSangeeta Misra 			mutex_exit(&ilbs->ilbs_conn_list_lock);
1042dbed73cbSSangeeta Misra 			return (EINTR);
1043dbed73cbSSangeeta Misra 		}
1044dbed73cbSSangeeta Misra 	}
1045dbed73cbSSangeeta Misra 	if ((hash = ilbs->ilbs_c2s_conn_hash) == NULL) {
1046dbed73cbSSangeeta Misra 		ASSERT(ilbs->ilbs_s2c_conn_hash == NULL);
1047dbed73cbSSangeeta Misra 		mutex_exit(&ilbs->ilbs_conn_list_lock);
1048dbed73cbSSangeeta Misra 		*num = 0;
1049dbed73cbSSangeeta Misra 		*flags |= ILB_LIST_END;
1050dbed73cbSSangeeta Misra 		return (0);
1051dbed73cbSSangeeta Misra 	}
1052dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_list_busy = B_TRUE;
1053dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_conn_list_lock);
1054dbed73cbSSangeeta Misra 
1055dbed73cbSSangeeta Misra 	if (*flags & ILB_LIST_BEGIN) {
1056dbed73cbSSangeeta Misra 		i = 0;
1057dbed73cbSSangeeta Misra 		mutex_enter(&hash[0].ilb_conn_hash_lock);
1058dbed73cbSSangeeta Misra 		cur_connp = hash[0].ilb_connp;
1059dbed73cbSSangeeta Misra 	} else if (*flags & ILB_LIST_CONT) {
1060dbed73cbSSangeeta Misra 		if (ilbs->ilbs_conn_list_cur == ilbs->ilbs_conn_hash_size) {
1061dbed73cbSSangeeta Misra 			*num = 0;
1062dbed73cbSSangeeta Misra 			*flags |= ILB_LIST_END;
1063dbed73cbSSangeeta Misra 			goto done;
1064dbed73cbSSangeeta Misra 		}
1065dbed73cbSSangeeta Misra 		i = ilbs->ilbs_conn_list_cur;
1066dbed73cbSSangeeta Misra 		mutex_enter(&hash[i].ilb_conn_hash_lock);
1067dbed73cbSSangeeta Misra 		cur_connp = ilbs->ilbs_conn_list_connp;
1068dbed73cbSSangeeta Misra 	} else {
1069dbed73cbSSangeeta Misra 		ret = EINVAL;
1070dbed73cbSSangeeta Misra 		goto done;
1071dbed73cbSSangeeta Misra 	}
1072dbed73cbSSangeeta Misra 
1073dbed73cbSSangeeta Misra 	j = 0;
1074dbed73cbSSangeeta Misra 	while (j < *num) {
1075dbed73cbSSangeeta Misra 		if (cur_connp == NULL) {
1076dbed73cbSSangeeta Misra 			mutex_exit(&hash[i].ilb_conn_hash_lock);
1077dbed73cbSSangeeta Misra 			if (++i == ilbs->ilbs_conn_hash_size) {
1078dbed73cbSSangeeta Misra 				*flags |= ILB_LIST_END;
1079dbed73cbSSangeeta Misra 				break;
1080dbed73cbSSangeeta Misra 			}
1081dbed73cbSSangeeta Misra 			mutex_enter(&hash[i].ilb_conn_hash_lock);
1082dbed73cbSSangeeta Misra 			cur_connp = hash[i].ilb_connp;
1083dbed73cbSSangeeta Misra 			continue;
1084dbed73cbSSangeeta Misra 		}
1085dbed73cbSSangeeta Misra 		nat[j].proto = cur_connp->conn_l4;
1086dbed73cbSSangeeta Misra 
1087dbed73cbSSangeeta Misra 		nat[j].in_global = cur_connp->conn_c2s_daddr;
1088dbed73cbSSangeeta Misra 		nat[j].in_global_port = cur_connp->conn_c2s_dport;
1089dbed73cbSSangeeta Misra 		nat[j].out_global = cur_connp->conn_c2s_saddr;
1090dbed73cbSSangeeta Misra 		nat[j].out_global_port = cur_connp->conn_c2s_sport;
1091dbed73cbSSangeeta Misra 
1092dbed73cbSSangeeta Misra 		nat[j].in_local = cur_connp->conn_s2c_saddr;
1093dbed73cbSSangeeta Misra 		nat[j].in_local_port = cur_connp->conn_s2c_sport;
1094dbed73cbSSangeeta Misra 		nat[j].out_local = cur_connp->conn_s2c_daddr;
1095dbed73cbSSangeeta Misra 		nat[j].out_local_port = cur_connp->conn_s2c_dport;
1096dbed73cbSSangeeta Misra 
1097dbed73cbSSangeeta Misra 		nat[j].create_time = TICK_TO_MSEC(cur_connp->conn_cr_time);
1098dbed73cbSSangeeta Misra 		nat[j].last_access_time =
1099dbed73cbSSangeeta Misra 		    TICK_TO_MSEC(cur_connp->conn_c2s_atime);
1100dbed73cbSSangeeta Misra 
1101dbed73cbSSangeeta Misra 		/*
1102dbed73cbSSangeeta Misra 		 * The conn_s2c_pkt_cnt may not be accurate since we are not
1103dbed73cbSSangeeta Misra 		 * holding the s2c hash lock.
1104dbed73cbSSangeeta Misra 		 */
1105dbed73cbSSangeeta Misra 		nat[j].pkt_cnt = cur_connp->conn_c2s_pkt_cnt +
1106dbed73cbSSangeeta Misra 		    cur_connp->conn_s2c_pkt_cnt;
1107dbed73cbSSangeeta Misra 		j++;
1108dbed73cbSSangeeta Misra 
1109dbed73cbSSangeeta Misra 		cur_connp = cur_connp->conn_c2s_next;
1110dbed73cbSSangeeta Misra 	}
1111dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_list_connp = cur_connp;
1112dbed73cbSSangeeta Misra 	if (j == *num)
1113dbed73cbSSangeeta Misra 		mutex_exit(&hash[i].ilb_conn_hash_lock);
1114dbed73cbSSangeeta Misra 
1115dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_list_cur = i;
1116dbed73cbSSangeeta Misra 
1117dbed73cbSSangeeta Misra 	*num = j;
1118dbed73cbSSangeeta Misra done:
1119dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_conn_list_lock);
1120dbed73cbSSangeeta Misra 	ilbs->ilbs_conn_list_busy = B_FALSE;
1121dbed73cbSSangeeta Misra 	cv_signal(&ilbs->ilbs_conn_list_cv);
1122dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_conn_list_lock);
1123dbed73cbSSangeeta Misra 
1124dbed73cbSSangeeta Misra 	return (ret);
1125dbed73cbSSangeeta Misra }
1126dbed73cbSSangeeta Misra 
1127dbed73cbSSangeeta Misra 
1128dbed73cbSSangeeta Misra /*
1129dbed73cbSSangeeta Misra  * Stickiness (persistence) handling routines.
1130dbed73cbSSangeeta Misra  */
1131dbed73cbSSangeeta Misra 
1132dbed73cbSSangeeta Misra 
1133dbed73cbSSangeeta Misra static void
1134dbed73cbSSangeeta Misra ilb_sticky_cache_init(void)
1135dbed73cbSSangeeta Misra {
1136dbed73cbSSangeeta Misra 	ilb_sticky_cache = kmem_cache_create("ilb_sticky_cache",
1137dbed73cbSSangeeta Misra 	    sizeof (ilb_sticky_t), 0, NULL, NULL, NULL, NULL, NULL,
1138dbed73cbSSangeeta Misra 	    ilb_kmem_flags);
1139dbed73cbSSangeeta Misra }
1140dbed73cbSSangeeta Misra 
1141dbed73cbSSangeeta Misra void
1142dbed73cbSSangeeta Misra ilb_sticky_cache_fini(void)
1143dbed73cbSSangeeta Misra {
1144dbed73cbSSangeeta Misra 	if (ilb_sticky_cache != NULL) {
1145dbed73cbSSangeeta Misra 		kmem_cache_destroy(ilb_sticky_cache);
1146dbed73cbSSangeeta Misra 		ilb_sticky_cache = NULL;
1147dbed73cbSSangeeta Misra 	}
1148dbed73cbSSangeeta Misra }
1149dbed73cbSSangeeta Misra 
1150dbed73cbSSangeeta Misra void
1151dbed73cbSSangeeta Misra ilb_sticky_refrele(ilb_sticky_t *s)
1152dbed73cbSSangeeta Misra {
1153dbed73cbSSangeeta Misra 	ILB_STICKY_REFRELE(s);
1154dbed73cbSSangeeta Misra }
1155dbed73cbSSangeeta Misra 
1156dbed73cbSSangeeta Misra static ilb_sticky_t *
1157dbed73cbSSangeeta Misra ilb_sticky_lookup(ilb_sticky_hash_t *hash, ilb_rule_t *rule, in6_addr_t *src)
1158dbed73cbSSangeeta Misra {
1159dbed73cbSSangeeta Misra 	ilb_sticky_t *s;
1160dbed73cbSSangeeta Misra 
1161dbed73cbSSangeeta Misra 	ASSERT(mutex_owned(&hash->sticky_lock));
1162dbed73cbSSangeeta Misra 
1163dbed73cbSSangeeta Misra 	for (s = list_head(&hash->sticky_head); s != NULL;
1164dbed73cbSSangeeta Misra 	    s = list_next(&hash->sticky_head, s)) {
1165dbed73cbSSangeeta Misra 		if (s->rule_instance == rule->ir_ks_instance) {
1166dbed73cbSSangeeta Misra 			if (IN6_ARE_ADDR_EQUAL(src, &s->src))
1167dbed73cbSSangeeta Misra 				return (s);
1168dbed73cbSSangeeta Misra 		}
1169dbed73cbSSangeeta Misra 	}
1170dbed73cbSSangeeta Misra 	return (NULL);
1171dbed73cbSSangeeta Misra }
1172dbed73cbSSangeeta Misra 
1173dbed73cbSSangeeta Misra static ilb_sticky_t *
1174dbed73cbSSangeeta Misra ilb_sticky_add(ilb_sticky_hash_t *hash, ilb_rule_t *rule, ilb_server_t *server,
1175dbed73cbSSangeeta Misra     in6_addr_t *src)
1176dbed73cbSSangeeta Misra {
1177dbed73cbSSangeeta Misra 	ilb_sticky_t *s;
1178dbed73cbSSangeeta Misra 
1179dbed73cbSSangeeta Misra 	ASSERT(mutex_owned(&hash->sticky_lock));
1180dbed73cbSSangeeta Misra 
1181dbed73cbSSangeeta Misra 	if ((s = kmem_cache_alloc(ilb_sticky_cache, KM_NOSLEEP)) == NULL)
1182dbed73cbSSangeeta Misra 		return (NULL);
1183dbed73cbSSangeeta Misra 
1184dbed73cbSSangeeta Misra 	/*
1185dbed73cbSSangeeta Misra 	 * The rule instance is for handling the scenario when the same
1186dbed73cbSSangeeta Misra 	 * client talks to different rules at the same time.  Stickiness
1187dbed73cbSSangeeta Misra 	 * is per rule so we can use the rule instance to differentiate
1188dbed73cbSSangeeta Misra 	 * the client's request.
1189dbed73cbSSangeeta Misra 	 */
1190dbed73cbSSangeeta Misra 	s->rule_instance = rule->ir_ks_instance;
1191dbed73cbSSangeeta Misra 	/*
1192dbed73cbSSangeeta Misra 	 * Copy the rule name for listing all sticky cache entry.  ir_name
1193dbed73cbSSangeeta Misra 	 * is guaranteed to be NULL terminated.
1194dbed73cbSSangeeta Misra 	 */
1195dbed73cbSSangeeta Misra 	(void) strcpy(s->rule_name, rule->ir_name);
1196dbed73cbSSangeeta Misra 	s->server = server;
1197dbed73cbSSangeeta Misra 
1198dbed73cbSSangeeta Misra 	/*
1199dbed73cbSSangeeta Misra 	 * Grab a ref cnt on the server so that it won't go away while
1200dbed73cbSSangeeta Misra 	 * it is still in the sticky table.
1201dbed73cbSSangeeta Misra 	 */
1202dbed73cbSSangeeta Misra 	ILB_SERVER_REFHOLD(server);
1203dbed73cbSSangeeta Misra 	s->src = *src;
1204dbed73cbSSangeeta Misra 	s->expiry = rule->ir_sticky_expiry;
1205dbed73cbSSangeeta Misra 	s->refcnt = 1;
1206dbed73cbSSangeeta Misra 	s->hash = hash;
1207dbed73cbSSangeeta Misra 
1208dbed73cbSSangeeta Misra 	/*
1209dbed73cbSSangeeta Misra 	 * There is no need to set atime here since the refcnt is not
1210dbed73cbSSangeeta Misra 	 * zero.  A sticky entry is removed only when the refcnt is
1211dbed73cbSSangeeta Misra 	 * zero.  But just set it here for debugging purpose.  The
1212dbed73cbSSangeeta Misra 	 * atime is set when a refrele is done on a sticky entry.
1213dbed73cbSSangeeta Misra 	 */
1214d3d50737SRafael Vanoni 	s->atime = ddi_get_lbolt64();
1215dbed73cbSSangeeta Misra 
1216dbed73cbSSangeeta Misra 	list_insert_head(&hash->sticky_head, s);
1217dbed73cbSSangeeta Misra 	hash->sticky_cnt++;
1218dbed73cbSSangeeta Misra 	return (s);
1219dbed73cbSSangeeta Misra }
1220dbed73cbSSangeeta Misra 
1221dbed73cbSSangeeta Misra /*
1222dbed73cbSSangeeta Misra  * This routine checks if there is an existing sticky entry which matches
1223dbed73cbSSangeeta Misra  * a given packet.  If there is one, return it.  If there is not, create
1224dbed73cbSSangeeta Misra  * a sticky entry using the packet's info.
1225dbed73cbSSangeeta Misra  */
1226dbed73cbSSangeeta Misra ilb_server_t *
1227dbed73cbSSangeeta Misra ilb_sticky_find_add(ilb_stack_t *ilbs, ilb_rule_t *rule, in6_addr_t *src,
1228dbed73cbSSangeeta Misra     ilb_server_t *server, ilb_sticky_t **res, uint16_t *src_ent_idx)
1229dbed73cbSSangeeta Misra {
1230dbed73cbSSangeeta Misra 	int i;
1231dbed73cbSSangeeta Misra 	ilb_sticky_hash_t *hash;
1232dbed73cbSSangeeta Misra 	ilb_sticky_t *s;
1233dbed73cbSSangeeta Misra 
1234dbed73cbSSangeeta Misra 	ASSERT(server != NULL);
1235dbed73cbSSangeeta Misra 
1236dbed73cbSSangeeta Misra 	*res = NULL;
1237dbed73cbSSangeeta Misra 
1238dbed73cbSSangeeta Misra 	i = ILB_STICKY_HASH((uint8_t *)&src->s6_addr32[3],
1239dbed73cbSSangeeta Misra 	    (uint32_t)(uintptr_t)rule, ilbs->ilbs_sticky_hash_size);
1240dbed73cbSSangeeta Misra 	hash = &ilbs->ilbs_sticky_hash[i];
1241dbed73cbSSangeeta Misra 
1242dbed73cbSSangeeta Misra 	/* First check if there is already an entry. */
1243dbed73cbSSangeeta Misra 	mutex_enter(&hash->sticky_lock);
1244dbed73cbSSangeeta Misra 	s = ilb_sticky_lookup(hash, rule, src);
1245dbed73cbSSangeeta Misra 
1246dbed73cbSSangeeta Misra 	/* No sticky entry, add one. */
1247dbed73cbSSangeeta Misra 	if (s == NULL) {
1248dbed73cbSSangeeta Misra add_new_entry:
1249dbed73cbSSangeeta Misra 		s = ilb_sticky_add(hash, rule, server, src);
1250dbed73cbSSangeeta Misra 		if (s == NULL) {
1251dbed73cbSSangeeta Misra 			mutex_exit(&hash->sticky_lock);
1252dbed73cbSSangeeta Misra 			return (NULL);
1253dbed73cbSSangeeta Misra 		}
1254dbed73cbSSangeeta Misra 		/*
1255dbed73cbSSangeeta Misra 		 * Find a source for this server.  All subseqent requests from
1256dbed73cbSSangeeta Misra 		 * the same client matching this sticky entry will use this
1257dbed73cbSSangeeta Misra 		 * source address in doing NAT.  The current algorithm is
1258dbed73cbSSangeeta Misra 		 * simple, rotate the source address.  Note that the
1259dbed73cbSSangeeta Misra 		 * source address array does not change after it's created, so
1260dbed73cbSSangeeta Misra 		 * it is OK to just increment the cur index.
1261dbed73cbSSangeeta Misra 		 */
1262dbed73cbSSangeeta Misra 		if (server->iser_nat_src != NULL) {
1263dbed73cbSSangeeta Misra 			/* It is a hint, does not need to be atomic. */
1264dbed73cbSSangeeta Misra 			*src_ent_idx = (server->iser_nat_src->cur++ %
1265dbed73cbSSangeeta Misra 			    server->iser_nat_src->num_src);
1266dbed73cbSSangeeta Misra 			s->nat_src_idx = *src_ent_idx;
1267dbed73cbSSangeeta Misra 		}
1268dbed73cbSSangeeta Misra 		mutex_exit(&hash->sticky_lock);
1269dbed73cbSSangeeta Misra 		*res = s;
1270dbed73cbSSangeeta Misra 		return (server);
1271dbed73cbSSangeeta Misra 	}
1272dbed73cbSSangeeta Misra 
1273dbed73cbSSangeeta Misra 	/*
1274dbed73cbSSangeeta Misra 	 * We don't hold any lock accessing iser_enabled.  Refer to the
1275dbed73cbSSangeeta Misra 	 * comment in ilb_server_add() about iser_lock.
1276dbed73cbSSangeeta Misra 	 */
1277dbed73cbSSangeeta Misra 	if (!s->server->iser_enabled) {
1278dbed73cbSSangeeta Misra 		/*
1279dbed73cbSSangeeta Misra 		 * s->server == server can only happen if there is a race in
1280dbed73cbSSangeeta Misra 		 * toggling the iser_enabled flag (we don't hold a lock doing
1281dbed73cbSSangeeta Misra 		 * that) so that the load balance algorithm still returns a
1282dbed73cbSSangeeta Misra 		 * disabled server.  In this case, just drop the packet...
1283dbed73cbSSangeeta Misra 		 */
1284dbed73cbSSangeeta Misra 		if (s->server == server) {
1285dbed73cbSSangeeta Misra 			mutex_exit(&hash->sticky_lock);
1286dbed73cbSSangeeta Misra 			return (NULL);
1287dbed73cbSSangeeta Misra 		}
1288dbed73cbSSangeeta Misra 
1289dbed73cbSSangeeta Misra 		/*
1290dbed73cbSSangeeta Misra 		 * The old server is disabled and there is a new server, use
1291dbed73cbSSangeeta Misra 		 * the new one to create a sticky entry.  Since we will
1292dbed73cbSSangeeta Misra 		 * add the entry at the beginning, subsequent lookup will
1293dbed73cbSSangeeta Misra 		 * find this new entry instead of the old one.
1294dbed73cbSSangeeta Misra 		 */
1295dbed73cbSSangeeta Misra 		goto add_new_entry;
1296dbed73cbSSangeeta Misra 	}
1297dbed73cbSSangeeta Misra 
1298dbed73cbSSangeeta Misra 	s->refcnt++;
1299dbed73cbSSangeeta Misra 	*res = s;
1300dbed73cbSSangeeta Misra 	mutex_exit(&hash->sticky_lock);
1301dbed73cbSSangeeta Misra 	if (server->iser_nat_src != NULL)
1302dbed73cbSSangeeta Misra 		*src_ent_idx = s->nat_src_idx;
1303dbed73cbSSangeeta Misra 	return (s->server);
1304dbed73cbSSangeeta Misra }
1305dbed73cbSSangeeta Misra 
1306dbed73cbSSangeeta Misra static void
1307dbed73cbSSangeeta Misra ilb_sticky_cleanup(void *arg)
1308dbed73cbSSangeeta Misra {
1309dbed73cbSSangeeta Misra 	ilb_timer_t *timer = (ilb_timer_t *)arg;
1310dbed73cbSSangeeta Misra 	uint32_t i;
1311dbed73cbSSangeeta Misra 	ilb_stack_t *ilbs;
1312dbed73cbSSangeeta Misra 	ilb_sticky_hash_t *hash;
1313dbed73cbSSangeeta Misra 	ilb_sticky_t *s, *nxt_s;
1314dbed73cbSSangeeta Misra 	int64_t now, expiry;
1315dbed73cbSSangeeta Misra 
1316dbed73cbSSangeeta Misra 	ilbs = timer->ilbs;
1317dbed73cbSSangeeta Misra 	hash = ilbs->ilbs_sticky_hash;
1318dbed73cbSSangeeta Misra 	ASSERT(hash != NULL);
1319dbed73cbSSangeeta Misra 
1320d3d50737SRafael Vanoni 	now = ddi_get_lbolt64();
1321dbed73cbSSangeeta Misra 	for (i = timer->start; i < timer->end; i++) {
1322dbed73cbSSangeeta Misra 		mutex_enter(&hash[i].sticky_lock);
1323dbed73cbSSangeeta Misra 		for (s = list_head(&hash[i].sticky_head); s != NULL;
1324dbed73cbSSangeeta Misra 		    s = nxt_s) {
1325dbed73cbSSangeeta Misra 			nxt_s = list_next(&hash[i].sticky_head, s);
1326dbed73cbSSangeeta Misra 			if (s->refcnt != 0)
1327dbed73cbSSangeeta Misra 				continue;
1328dbed73cbSSangeeta Misra 			expiry = now - SEC_TO_TICK(s->expiry);
1329dbed73cbSSangeeta Misra 			if (s->atime < expiry) {
1330dbed73cbSSangeeta Misra 				ILB_SERVER_REFRELE(s->server);
1331dbed73cbSSangeeta Misra 				list_remove(&hash[i].sticky_head, s);
1332dbed73cbSSangeeta Misra 				kmem_cache_free(ilb_sticky_cache, s);
1333dbed73cbSSangeeta Misra 				hash[i].sticky_cnt--;
1334dbed73cbSSangeeta Misra 			}
1335dbed73cbSSangeeta Misra 		}
1336dbed73cbSSangeeta Misra 		mutex_exit(&hash[i].sticky_lock);
1337dbed73cbSSangeeta Misra 	}
1338dbed73cbSSangeeta Misra }
1339dbed73cbSSangeeta Misra 
1340dbed73cbSSangeeta Misra static void
1341dbed73cbSSangeeta Misra ilb_sticky_timer(void *arg)
1342dbed73cbSSangeeta Misra {
1343dbed73cbSSangeeta Misra 	ilb_timer_t *timer = (ilb_timer_t *)arg;
1344dbed73cbSSangeeta Misra 
1345dbed73cbSSangeeta Misra 	(void) taskq_dispatch(timer->ilbs->ilbs_sticky_taskq,
1346dbed73cbSSangeeta Misra 	    ilb_sticky_cleanup, arg, TQ_SLEEP);
1347dbed73cbSSangeeta Misra 	mutex_enter(&timer->tid_lock);
1348dbed73cbSSangeeta Misra 	if (timer->tid == 0) {
1349dbed73cbSSangeeta Misra 		mutex_exit(&timer->tid_lock);
1350dbed73cbSSangeeta Misra 	} else {
1351dbed73cbSSangeeta Misra 		timer->tid = timeout(ilb_sticky_timer, arg,
1352dbed73cbSSangeeta Misra 		    SEC_TO_TICK(ilb_sticky_timeout));
1353dbed73cbSSangeeta Misra 		mutex_exit(&timer->tid_lock);
1354dbed73cbSSangeeta Misra 	}
1355dbed73cbSSangeeta Misra }
1356dbed73cbSSangeeta Misra 
1357dbed73cbSSangeeta Misra void
1358dbed73cbSSangeeta Misra ilb_sticky_hash_init(ilb_stack_t *ilbs)
1359dbed73cbSSangeeta Misra {
1360dbed73cbSSangeeta Misra 	extern pri_t minclsyspri;
1361dbed73cbSSangeeta Misra 	int i, part;
1362dbed73cbSSangeeta Misra 	char tq_name[TASKQ_NAMELEN];
1363dbed73cbSSangeeta Misra 	ilb_timer_t *tm;
1364dbed73cbSSangeeta Misra 
1365de710d24SJosef 'Jeff' Sipek 	if (!ISP2(ilbs->ilbs_sticky_hash_size)) {
1366dbed73cbSSangeeta Misra 		for (i = 0; i < 31; i++) {
1367dbed73cbSSangeeta Misra 			if (ilbs->ilbs_sticky_hash_size < (1 << i))
1368dbed73cbSSangeeta Misra 				break;
1369dbed73cbSSangeeta Misra 		}
1370dbed73cbSSangeeta Misra 		ilbs->ilbs_sticky_hash_size = 1 << i;
1371dbed73cbSSangeeta Misra 	}
1372dbed73cbSSangeeta Misra 
1373dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_hash = kmem_zalloc(sizeof (ilb_sticky_hash_t) *
1374dbed73cbSSangeeta Misra 	    ilbs->ilbs_sticky_hash_size, KM_SLEEP);
1375dbed73cbSSangeeta Misra 	for (i = 0; i < ilbs->ilbs_sticky_hash_size; i++) {
1376dbed73cbSSangeeta Misra 		mutex_init(&ilbs->ilbs_sticky_hash[i].sticky_lock, NULL,
1377dbed73cbSSangeeta Misra 		    MUTEX_DEFAULT, NULL);
1378dbed73cbSSangeeta Misra 		list_create(&ilbs->ilbs_sticky_hash[i].sticky_head,
1379dbed73cbSSangeeta Misra 		    sizeof (ilb_sticky_t),
1380dbed73cbSSangeeta Misra 		    offsetof(ilb_sticky_t, list));
1381dbed73cbSSangeeta Misra 	}
1382dbed73cbSSangeeta Misra 
1383dbed73cbSSangeeta Misra 	if (ilb_sticky_cache == NULL)
1384dbed73cbSSangeeta Misra 		ilb_sticky_cache_init();
1385dbed73cbSSangeeta Misra 
1386dbed73cbSSangeeta Misra 	(void) snprintf(tq_name, sizeof (tq_name), "ilb_sticky_taskq_%p",
13876e0672acSSangeeta Misra 	    (void *)ilbs->ilbs_netstack);
1388dbed73cbSSangeeta Misra 	ASSERT(ilbs->ilbs_sticky_taskq == NULL);
1389dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_taskq = taskq_create(tq_name,
1390dbed73cbSSangeeta Misra 	    ilb_sticky_timer_size * 2, minclsyspri, ilb_sticky_timer_size,
1391dbed73cbSSangeeta Misra 	    ilb_sticky_timer_size * 2, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
1392dbed73cbSSangeeta Misra 
1393dbed73cbSSangeeta Misra 	ASSERT(ilbs->ilbs_sticky_timer_list == NULL);
1394dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_timer_list = kmem_zalloc(sizeof (ilb_timer_t) *
1395dbed73cbSSangeeta Misra 	    ilb_sticky_timer_size, KM_SLEEP);
1396dbed73cbSSangeeta Misra 	part = ilbs->ilbs_sticky_hash_size / ilb_sticky_timer_size + 1;
1397dbed73cbSSangeeta Misra 	for (i = 0; i < ilb_sticky_timer_size; i++) {
1398dbed73cbSSangeeta Misra 		tm = ilbs->ilbs_sticky_timer_list + i;
1399dbed73cbSSangeeta Misra 		tm->start = i * part;
1400dbed73cbSSangeeta Misra 		tm->end = i * part + part;
1401dbed73cbSSangeeta Misra 		if (tm->end > ilbs->ilbs_sticky_hash_size)
1402dbed73cbSSangeeta Misra 			tm->end = ilbs->ilbs_sticky_hash_size;
1403dbed73cbSSangeeta Misra 		tm->ilbs = ilbs;
1404dbed73cbSSangeeta Misra 		mutex_init(&tm->tid_lock, NULL, MUTEX_DEFAULT, NULL);
1405dbed73cbSSangeeta Misra 		/* Spread out the starting execution time of all the timers. */
1406dbed73cbSSangeeta Misra 		tm->tid = timeout(ilb_sticky_timer, tm,
1407dbed73cbSSangeeta Misra 		    SEC_TO_TICK(ilb_sticky_timeout + i));
1408dbed73cbSSangeeta Misra 	}
1409dbed73cbSSangeeta Misra }
1410dbed73cbSSangeeta Misra 
1411dbed73cbSSangeeta Misra void
1412dbed73cbSSangeeta Misra ilb_sticky_hash_fini(ilb_stack_t *ilbs)
1413dbed73cbSSangeeta Misra {
1414dbed73cbSSangeeta Misra 	int i;
1415dbed73cbSSangeeta Misra 	ilb_sticky_t *s;
1416dbed73cbSSangeeta Misra 
1417dbed73cbSSangeeta Misra 	if (ilbs->ilbs_sticky_hash == NULL)
1418dbed73cbSSangeeta Misra 		return;
1419dbed73cbSSangeeta Misra 
1420dbed73cbSSangeeta Misra 	/* Stop all the timers first. */
1421dbed73cbSSangeeta Misra 	for (i = 0; i < ilb_sticky_timer_size; i++) {
1422dbed73cbSSangeeta Misra 		timeout_id_t tid;
1423dbed73cbSSangeeta Misra 
1424dbed73cbSSangeeta Misra 		/* Setting tid to 0 tells the timer handler not to restart. */
1425dbed73cbSSangeeta Misra 		mutex_enter(&ilbs->ilbs_sticky_timer_list[i].tid_lock);
1426dbed73cbSSangeeta Misra 		tid = ilbs->ilbs_sticky_timer_list[i].tid;
1427dbed73cbSSangeeta Misra 		ilbs->ilbs_sticky_timer_list[i].tid = 0;
1428dbed73cbSSangeeta Misra 		mutex_exit(&ilbs->ilbs_sticky_timer_list[i].tid_lock);
1429dbed73cbSSangeeta Misra 		(void) untimeout(tid);
1430dbed73cbSSangeeta Misra 	}
1431dbed73cbSSangeeta Misra 	kmem_free(ilbs->ilbs_sticky_timer_list, sizeof (ilb_timer_t) *
1432dbed73cbSSangeeta Misra 	    ilb_sticky_timer_size);
1433dbed73cbSSangeeta Misra 	taskq_destroy(ilbs->ilbs_sticky_taskq);
1434dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_taskq = NULL;
1435dbed73cbSSangeeta Misra 
1436dbed73cbSSangeeta Misra 	for (i = 0; i < ilbs->ilbs_sticky_hash_size; i++) {
1437dbed73cbSSangeeta Misra 		while ((s = list_head(&ilbs->ilbs_sticky_hash[i].sticky_head))
1438dbed73cbSSangeeta Misra 		    != NULL) {
1439dbed73cbSSangeeta Misra 			list_remove(&ilbs->ilbs_sticky_hash[i].sticky_head, s);
1440dbed73cbSSangeeta Misra 			ILB_SERVER_REFRELE(s->server);
1441dbed73cbSSangeeta Misra 			kmem_free(s, sizeof (ilb_sticky_t));
1442dbed73cbSSangeeta Misra 		}
1443dbed73cbSSangeeta Misra 	}
1444dbed73cbSSangeeta Misra 	kmem_free(ilbs->ilbs_sticky_hash, ilbs->ilbs_sticky_hash_size *
1445dbed73cbSSangeeta Misra 	    sizeof (ilb_sticky_hash_t));
1446dbed73cbSSangeeta Misra }
1447dbed73cbSSangeeta Misra 
1448dbed73cbSSangeeta Misra /*
1449dbed73cbSSangeeta Misra  * This routine sends up the sticky hash table to user land.  Refer to
1450dbed73cbSSangeeta Misra  * the comments before ilb_list_nat().  Both routines assume similar
1451dbed73cbSSangeeta Misra  * conditions.
1452dbed73cbSSangeeta Misra  *
1453dbed73cbSSangeeta Misra  * It is assumed that the caller has checked the size of st so that it
1454dbed73cbSSangeeta Misra  * can hold num entries.
1455dbed73cbSSangeeta Misra  */
1456dbed73cbSSangeeta Misra /* ARGSUSED */
1457dbed73cbSSangeeta Misra int
1458dbed73cbSSangeeta Misra ilb_list_sticky(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_sticky_entry_t *st,
1459dbed73cbSSangeeta Misra     uint32_t *num, uint32_t *flags)
1460dbed73cbSSangeeta Misra {
1461dbed73cbSSangeeta Misra 	ilb_sticky_hash_t *hash;
1462dbed73cbSSangeeta Misra 	ilb_sticky_t *curp;
1463dbed73cbSSangeeta Misra 	uint32_t i, j;
1464dbed73cbSSangeeta Misra 	int ret = 0;
1465dbed73cbSSangeeta Misra 
1466dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_sticky_list_lock);
1467dbed73cbSSangeeta Misra 	while (ilbs->ilbs_sticky_list_busy) {
1468dbed73cbSSangeeta Misra 		if (cv_wait_sig(&ilbs->ilbs_sticky_list_cv,
1469dbed73cbSSangeeta Misra 		    &ilbs->ilbs_sticky_list_lock) == 0) {
1470dbed73cbSSangeeta Misra 			mutex_exit(&ilbs->ilbs_sticky_list_lock);
1471dbed73cbSSangeeta Misra 			return (EINTR);
1472dbed73cbSSangeeta Misra 		}
1473dbed73cbSSangeeta Misra 	}
1474dbed73cbSSangeeta Misra 	if ((hash = ilbs->ilbs_sticky_hash) == NULL) {
1475dbed73cbSSangeeta Misra 		mutex_exit(&ilbs->ilbs_sticky_list_lock);
1476dbed73cbSSangeeta Misra 		*num = 0;
1477dbed73cbSSangeeta Misra 		*flags |= ILB_LIST_END;
1478dbed73cbSSangeeta Misra 		return (0);
1479dbed73cbSSangeeta Misra 	}
1480dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_list_busy = B_TRUE;
1481dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_sticky_list_lock);
1482dbed73cbSSangeeta Misra 
1483dbed73cbSSangeeta Misra 	if (*flags & ILB_LIST_BEGIN) {
1484dbed73cbSSangeeta Misra 		i = 0;
1485dbed73cbSSangeeta Misra 		mutex_enter(&hash[0].sticky_lock);
1486dbed73cbSSangeeta Misra 		curp = list_head(&hash[0].sticky_head);
1487dbed73cbSSangeeta Misra 	} else if (*flags & ILB_LIST_CONT) {
1488dbed73cbSSangeeta Misra 		if (ilbs->ilbs_sticky_list_cur == ilbs->ilbs_sticky_hash_size) {
1489dbed73cbSSangeeta Misra 			*num = 0;
1490dbed73cbSSangeeta Misra 			*flags |= ILB_LIST_END;
1491dbed73cbSSangeeta Misra 			goto done;
1492dbed73cbSSangeeta Misra 		}
1493dbed73cbSSangeeta Misra 		i = ilbs->ilbs_sticky_list_cur;
1494dbed73cbSSangeeta Misra 		mutex_enter(&hash[i].sticky_lock);
1495dbed73cbSSangeeta Misra 		curp = ilbs->ilbs_sticky_list_curp;
1496dbed73cbSSangeeta Misra 	} else {
1497dbed73cbSSangeeta Misra 		ret = EINVAL;
1498dbed73cbSSangeeta Misra 		goto done;
1499dbed73cbSSangeeta Misra 	}
1500dbed73cbSSangeeta Misra 
1501dbed73cbSSangeeta Misra 	j = 0;
1502dbed73cbSSangeeta Misra 	while (j < *num) {
1503dbed73cbSSangeeta Misra 		if (curp == NULL) {
1504dbed73cbSSangeeta Misra 			mutex_exit(&hash[i].sticky_lock);
1505dbed73cbSSangeeta Misra 			if (++i == ilbs->ilbs_sticky_hash_size) {
1506dbed73cbSSangeeta Misra 				*flags |= ILB_LIST_END;
1507dbed73cbSSangeeta Misra 				break;
1508dbed73cbSSangeeta Misra 			}
1509dbed73cbSSangeeta Misra 			mutex_enter(&hash[i].sticky_lock);
1510dbed73cbSSangeeta Misra 			curp = list_head(&hash[i].sticky_head);
1511dbed73cbSSangeeta Misra 			continue;
1512dbed73cbSSangeeta Misra 		}
1513dbed73cbSSangeeta Misra 		(void) strcpy(st[j].rule_name, curp->rule_name);
1514dbed73cbSSangeeta Misra 		st[j].req_addr = curp->src;
1515dbed73cbSSangeeta Misra 		st[j].srv_addr = curp->server->iser_addr_v6;
1516dbed73cbSSangeeta Misra 		st[j].expiry_time = TICK_TO_MSEC(curp->expiry);
1517dbed73cbSSangeeta Misra 		j++;
1518dbed73cbSSangeeta Misra 		curp = list_next(&hash[i].sticky_head, curp);
1519dbed73cbSSangeeta Misra 	}
1520dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_list_curp = curp;
1521dbed73cbSSangeeta Misra 	if (j == *num)
1522dbed73cbSSangeeta Misra 		mutex_exit(&hash[i].sticky_lock);
1523dbed73cbSSangeeta Misra 
1524dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_list_cur = i;
1525dbed73cbSSangeeta Misra 
1526dbed73cbSSangeeta Misra 	*num = j;
1527dbed73cbSSangeeta Misra done:
1528dbed73cbSSangeeta Misra 	mutex_enter(&ilbs->ilbs_sticky_list_lock);
1529dbed73cbSSangeeta Misra 	ilbs->ilbs_sticky_list_busy = B_FALSE;
1530dbed73cbSSangeeta Misra 	cv_signal(&ilbs->ilbs_sticky_list_cv);
1531dbed73cbSSangeeta Misra 	mutex_exit(&ilbs->ilbs_sticky_list_lock);
1532dbed73cbSSangeeta Misra 
1533dbed73cbSSangeeta Misra 	return (ret);
1534dbed73cbSSangeeta Misra }
1535