1dbed73cbSSangeeta Misra /*
2dbed73cbSSangeeta Misra * CDDL HEADER START
3dbed73cbSSangeeta Misra *
4dbed73cbSSangeeta Misra * The contents of this file are subject to the terms of the
5dbed73cbSSangeeta Misra * Common Development and Distribution License (the "License").
6dbed73cbSSangeeta Misra * You may not use this file except in compliance with the License.
7dbed73cbSSangeeta Misra *
8dbed73cbSSangeeta Misra * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9dbed73cbSSangeeta Misra * or http://www.opensolaris.org/os/licensing.
10dbed73cbSSangeeta Misra * See the License for the specific language governing permissions
11dbed73cbSSangeeta Misra * and limitations under the License.
12dbed73cbSSangeeta Misra *
13dbed73cbSSangeeta Misra * When distributing Covered Code, include this CDDL HEADER in each
14dbed73cbSSangeeta Misra * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15dbed73cbSSangeeta Misra * If applicable, add the following below this CDDL HEADER, with the
16dbed73cbSSangeeta Misra * fields enclosed by brackets "[]" replaced with your own identifying
17dbed73cbSSangeeta Misra * information: Portions Copyright [yyyy] [name of copyright owner]
18dbed73cbSSangeeta Misra *
19dbed73cbSSangeeta Misra * CDDL HEADER END
20dbed73cbSSangeeta Misra */
21dbed73cbSSangeeta Misra
22dbed73cbSSangeeta Misra /*
23dbed73cbSSangeeta Misra * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24dbed73cbSSangeeta Misra * Use is subject to license terms.
25*d17b05b6SJerry Jelinek * Copyright 2014 Joyent, Inc. All rights reserved.
26dbed73cbSSangeeta Misra */
27dbed73cbSSangeeta Misra
28de710d24SJosef 'Jeff' Sipek #include <sys/sysmacros.h>
29dbed73cbSSangeeta Misra #include <sys/types.h>
30dbed73cbSSangeeta Misra #include <sys/conf.h>
31dbed73cbSSangeeta Misra #include <sys/time.h>
32dbed73cbSSangeeta Misra #include <sys/taskq.h>
33dbed73cbSSangeeta Misra #include <sys/cmn_err.h>
34dbed73cbSSangeeta Misra #include <sys/sdt.h>
35dbed73cbSSangeeta Misra #include <sys/atomic.h>
36dbed73cbSSangeeta Misra #include <netinet/in.h>
37dbed73cbSSangeeta Misra #include <inet/ip.h>
38dbed73cbSSangeeta Misra #include <inet/ip6.h>
39dbed73cbSSangeeta Misra #include <inet/tcp.h>
40dbed73cbSSangeeta Misra #include <inet/udp_impl.h>
41dbed73cbSSangeeta Misra #include <inet/ilb.h>
42dbed73cbSSangeeta Misra
43dbed73cbSSangeeta Misra #include "ilb_stack.h"
44dbed73cbSSangeeta Misra #include "ilb_impl.h"
45dbed73cbSSangeeta Misra #include "ilb_conn.h"
46dbed73cbSSangeeta Misra #include "ilb_nat.h"
47dbed73cbSSangeeta Misra
48dbed73cbSSangeeta Misra /*
49dbed73cbSSangeeta Misra * Timer struct for ilb_conn_t and ilb_sticky_t garbage collection
50dbed73cbSSangeeta Misra *
51dbed73cbSSangeeta Misra * start: starting index into the hash table to do gc
52dbed73cbSSangeeta Misra * end: ending index into the hash table to do gc
53dbed73cbSSangeeta Misra * ilbs: pointer to the ilb_stack_t of the IP stack
54dbed73cbSSangeeta Misra * tid_lock: mutex to protect the timer id.
55dbed73cbSSangeeta Misra * tid: timer id of the timer
56dbed73cbSSangeeta Misra */
57dbed73cbSSangeeta Misra typedef struct ilb_timer_s {
58dbed73cbSSangeeta Misra uint32_t start;
59dbed73cbSSangeeta Misra uint32_t end;
60dbed73cbSSangeeta Misra ilb_stack_t *ilbs;
61dbed73cbSSangeeta Misra kmutex_t tid_lock;
62dbed73cbSSangeeta Misra timeout_id_t tid;
63dbed73cbSSangeeta Misra } ilb_timer_t;
64dbed73cbSSangeeta Misra
65dbed73cbSSangeeta Misra /* Hash macro for finding the index to the conn hash table */
66dbed73cbSSangeeta Misra #define ILB_CONN_HASH(saddr, sport, daddr, dport, hash_size) \
67dbed73cbSSangeeta Misra (((*((saddr) + 3) ^ *((daddr) + 3)) * 50653 + \
68dbed73cbSSangeeta Misra (*((saddr) + 2) ^ *((daddr) + 2)) * 1369 + \
69dbed73cbSSangeeta Misra (*((saddr) + 1) ^ *((daddr) + 1)) * 37 + \
70dbed73cbSSangeeta Misra (*(saddr) ^ *(daddr)) + (sport) * 37 + (dport)) & \
71dbed73cbSSangeeta Misra ((hash_size) - 1))
72dbed73cbSSangeeta Misra
73dbed73cbSSangeeta Misra /* Kmem cache for the conn hash entry */
74dbed73cbSSangeeta Misra static struct kmem_cache *ilb_conn_cache = NULL;
75dbed73cbSSangeeta Misra
76dbed73cbSSangeeta Misra /*
77dbed73cbSSangeeta Misra * There are 60 timers running to do conn cache garbage collection. Each
78dbed73cbSSangeeta Misra * gc thread is responsible for 1/60 of the conn hash table.
79dbed73cbSSangeeta Misra */
80dbed73cbSSangeeta Misra static int ilb_conn_timer_size = 60;
81dbed73cbSSangeeta Misra
82dbed73cbSSangeeta Misra /* Each of the above gc timers wake up every 15s to do the gc. */
83dbed73cbSSangeeta Misra static int ilb_conn_cache_timeout = 15;
84dbed73cbSSangeeta Misra
85dbed73cbSSangeeta Misra #define ILB_STICKY_HASH(saddr, rule, hash_size) \
86dbed73cbSSangeeta Misra (((*((saddr) + 3) ^ ((rule) >> 24)) * 29791 + \
87dbed73cbSSangeeta Misra (*((saddr) + 2) ^ ((rule) >> 16)) * 961 + \
88dbed73cbSSangeeta Misra (*((saddr) + 1) ^ ((rule) >> 8)) * 31 + \
89dbed73cbSSangeeta Misra (*(saddr) ^ (rule))) & ((hash_size) - 1))
90dbed73cbSSangeeta Misra
91dbed73cbSSangeeta Misra static struct kmem_cache *ilb_sticky_cache = NULL;
92dbed73cbSSangeeta Misra
93dbed73cbSSangeeta Misra /*
94dbed73cbSSangeeta Misra * There are 60 timers running to do sticky cache garbage collection. Each
95dbed73cbSSangeeta Misra * gc thread is responsible for 1/60 of the sticky hash table.
96dbed73cbSSangeeta Misra */
97dbed73cbSSangeeta Misra static int ilb_sticky_timer_size = 60;
98dbed73cbSSangeeta Misra
99dbed73cbSSangeeta Misra /* Each of the above gc timers wake up every 15s to do the gc. */
100dbed73cbSSangeeta Misra static int ilb_sticky_timeout = 15;
101dbed73cbSSangeeta Misra
102dbed73cbSSangeeta Misra #define ILB_STICKY_REFRELE(s) \
103dbed73cbSSangeeta Misra { \
104dbed73cbSSangeeta Misra mutex_enter(&(s)->hash->sticky_lock); \
105dbed73cbSSangeeta Misra (s)->refcnt--; \
106d3d50737SRafael Vanoni (s)->atime = ddi_get_lbolt64(); \
107dbed73cbSSangeeta Misra mutex_exit(&s->hash->sticky_lock); \
108dbed73cbSSangeeta Misra }
109dbed73cbSSangeeta Misra
110dbed73cbSSangeeta Misra
111dbed73cbSSangeeta Misra static void
ilb_conn_cache_init(void)112dbed73cbSSangeeta Misra ilb_conn_cache_init(void)
113dbed73cbSSangeeta Misra {
114dbed73cbSSangeeta Misra ilb_conn_cache = kmem_cache_create("ilb_conn_cache",
115dbed73cbSSangeeta Misra sizeof (ilb_conn_t), 0, NULL, NULL, NULL, NULL, NULL,
116dbed73cbSSangeeta Misra ilb_kmem_flags);
117dbed73cbSSangeeta Misra }
118dbed73cbSSangeeta Misra
119dbed73cbSSangeeta Misra void
ilb_conn_cache_fini(void)120dbed73cbSSangeeta Misra ilb_conn_cache_fini(void)
121dbed73cbSSangeeta Misra {
122dbed73cbSSangeeta Misra if (ilb_conn_cache != NULL) {
123dbed73cbSSangeeta Misra kmem_cache_destroy(ilb_conn_cache);
124dbed73cbSSangeeta Misra ilb_conn_cache = NULL;
125dbed73cbSSangeeta Misra }
126dbed73cbSSangeeta Misra }
127dbed73cbSSangeeta Misra
128dbed73cbSSangeeta Misra static void
ilb_conn_remove_common(ilb_conn_t * connp,boolean_t c2s)129dbed73cbSSangeeta Misra ilb_conn_remove_common(ilb_conn_t *connp, boolean_t c2s)
130dbed73cbSSangeeta Misra {
131dbed73cbSSangeeta Misra ilb_conn_hash_t *hash;
132dbed73cbSSangeeta Misra ilb_conn_t **next, **prev;
133dbed73cbSSangeeta Misra ilb_conn_t **next_prev, **prev_next;
134dbed73cbSSangeeta Misra
135dbed73cbSSangeeta Misra if (c2s) {
136dbed73cbSSangeeta Misra hash = connp->conn_c2s_hash;
137dbed73cbSSangeeta Misra ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock));
138dbed73cbSSangeeta Misra next = &connp->conn_c2s_next;
139dbed73cbSSangeeta Misra prev = &connp->conn_c2s_prev;
140dbed73cbSSangeeta Misra if (*next != NULL)
141dbed73cbSSangeeta Misra next_prev = &(*next)->conn_c2s_prev;
142dbed73cbSSangeeta Misra if (*prev != NULL)
143dbed73cbSSangeeta Misra prev_next = &(*prev)->conn_c2s_next;
144dbed73cbSSangeeta Misra } else {
145dbed73cbSSangeeta Misra hash = connp->conn_s2c_hash;
146dbed73cbSSangeeta Misra ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock));
147dbed73cbSSangeeta Misra next = &connp->conn_s2c_next;
148dbed73cbSSangeeta Misra prev = &connp->conn_s2c_prev;
149dbed73cbSSangeeta Misra if (*next != NULL)
150dbed73cbSSangeeta Misra next_prev = &(*next)->conn_s2c_prev;
151dbed73cbSSangeeta Misra if (*prev != NULL)
152dbed73cbSSangeeta Misra prev_next = &(*prev)->conn_s2c_next;
153dbed73cbSSangeeta Misra }
154dbed73cbSSangeeta Misra
155dbed73cbSSangeeta Misra if (hash->ilb_connp == connp) {
156dbed73cbSSangeeta Misra hash->ilb_connp = *next;
157dbed73cbSSangeeta Misra if (*next != NULL)
158dbed73cbSSangeeta Misra *next_prev = NULL;
159dbed73cbSSangeeta Misra } else {
160dbed73cbSSangeeta Misra if (*prev != NULL)
161dbed73cbSSangeeta Misra *prev_next = *next;
162dbed73cbSSangeeta Misra if (*next != NULL)
163dbed73cbSSangeeta Misra *next_prev = *prev;
164dbed73cbSSangeeta Misra }
165dbed73cbSSangeeta Misra ASSERT(hash->ilb_conn_cnt > 0);
166dbed73cbSSangeeta Misra hash->ilb_conn_cnt--;
167dbed73cbSSangeeta Misra
168dbed73cbSSangeeta Misra *next = NULL;
169dbed73cbSSangeeta Misra *prev = NULL;
170dbed73cbSSangeeta Misra }
171dbed73cbSSangeeta Misra
172dbed73cbSSangeeta Misra static void
ilb_conn_remove(ilb_conn_t * connp)173dbed73cbSSangeeta Misra ilb_conn_remove(ilb_conn_t *connp)
174dbed73cbSSangeeta Misra {
175dbed73cbSSangeeta Misra ASSERT(MUTEX_HELD(&connp->conn_c2s_hash->ilb_conn_hash_lock));
176dbed73cbSSangeeta Misra ilb_conn_remove_common(connp, B_TRUE);
177dbed73cbSSangeeta Misra ASSERT(MUTEX_HELD(&connp->conn_s2c_hash->ilb_conn_hash_lock));
178dbed73cbSSangeeta Misra ilb_conn_remove_common(connp, B_FALSE);
179dbed73cbSSangeeta Misra
180dbed73cbSSangeeta Misra if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) {
181dbed73cbSSangeeta Misra in_port_t port;
182dbed73cbSSangeeta Misra
183dbed73cbSSangeeta Misra port = ntohs(connp->conn_rule_cache.info.nat_sport);
184dbed73cbSSangeeta Misra vmem_free(connp->conn_rule_cache.info.src_ent->nse_port_arena,
185dbed73cbSSangeeta Misra (void *)(uintptr_t)port, 1);
186dbed73cbSSangeeta Misra }
187dbed73cbSSangeeta Misra
188dbed73cbSSangeeta Misra if (connp->conn_sticky != NULL)
189dbed73cbSSangeeta Misra ILB_STICKY_REFRELE(connp->conn_sticky);
190dbed73cbSSangeeta Misra ILB_SERVER_REFRELE(connp->conn_server);
191dbed73cbSSangeeta Misra kmem_cache_free(ilb_conn_cache, connp);
192dbed73cbSSangeeta Misra }
193dbed73cbSSangeeta Misra
194dbed73cbSSangeeta Misra /*
195dbed73cbSSangeeta Misra * Routine to do periodic garbage collection of conn hash entries. When
196dbed73cbSSangeeta Misra * a conn hash timer fires, it dispatches a taskq to call this function
197dbed73cbSSangeeta Misra * to do the gc. Note that each taskq is responisble for a portion of
198dbed73cbSSangeeta Misra * the table. The portion is stored in timer->start, timer->end.
199dbed73cbSSangeeta Misra */
200dbed73cbSSangeeta Misra static void
ilb_conn_cleanup(void * arg)201dbed73cbSSangeeta Misra ilb_conn_cleanup(void *arg)
202dbed73cbSSangeeta Misra {
203dbed73cbSSangeeta Misra ilb_timer_t *timer = (ilb_timer_t *)arg;
204dbed73cbSSangeeta Misra uint32_t i;
205dbed73cbSSangeeta Misra ilb_stack_t *ilbs;
206dbed73cbSSangeeta Misra ilb_conn_hash_t *c2s_hash, *s2c_hash;
207dbed73cbSSangeeta Misra ilb_conn_t *connp, *nxt_connp;
208dbed73cbSSangeeta Misra int64_t now;
209dbed73cbSSangeeta Misra int64_t expiry;
210dbed73cbSSangeeta Misra boolean_t die_now;
211dbed73cbSSangeeta Misra
212dbed73cbSSangeeta Misra ilbs = timer->ilbs;
213dbed73cbSSangeeta Misra c2s_hash = ilbs->ilbs_c2s_conn_hash;
214dbed73cbSSangeeta Misra ASSERT(c2s_hash != NULL);
215dbed73cbSSangeeta Misra
216d3d50737SRafael Vanoni now = ddi_get_lbolt64();
217dbed73cbSSangeeta Misra for (i = timer->start; i < timer->end; i++) {
218dbed73cbSSangeeta Misra mutex_enter(&c2s_hash[i].ilb_conn_hash_lock);
219dbed73cbSSangeeta Misra if ((connp = c2s_hash[i].ilb_connp) == NULL) {
220dbed73cbSSangeeta Misra ASSERT(c2s_hash[i].ilb_conn_cnt == 0);
221dbed73cbSSangeeta Misra mutex_exit(&c2s_hash[i].ilb_conn_hash_lock);
222dbed73cbSSangeeta Misra continue;
223dbed73cbSSangeeta Misra }
224dbed73cbSSangeeta Misra do {
225dbed73cbSSangeeta Misra ASSERT(c2s_hash[i].ilb_conn_cnt > 0);
226dbed73cbSSangeeta Misra ASSERT(connp->conn_c2s_hash == &c2s_hash[i]);
227dbed73cbSSangeeta Misra nxt_connp = connp->conn_c2s_next;
228dbed73cbSSangeeta Misra expiry = now - SEC_TO_TICK(connp->conn_expiry);
229dbed73cbSSangeeta Misra if (connp->conn_server->iser_die_time != 0 &&
230dbed73cbSSangeeta Misra connp->conn_server->iser_die_time < now)
231dbed73cbSSangeeta Misra die_now = B_TRUE;
232dbed73cbSSangeeta Misra else
233dbed73cbSSangeeta Misra die_now = B_FALSE;
234dbed73cbSSangeeta Misra s2c_hash = connp->conn_s2c_hash;
235dbed73cbSSangeeta Misra mutex_enter(&s2c_hash->ilb_conn_hash_lock);
236dbed73cbSSangeeta Misra
237dbed73cbSSangeeta Misra if (connp->conn_gc || die_now ||
238dbed73cbSSangeeta Misra (connp->conn_c2s_atime < expiry &&
239dbed73cbSSangeeta Misra connp->conn_s2c_atime < expiry)) {
240dbed73cbSSangeeta Misra /* Need to update the nat list cur_connp */
241dbed73cbSSangeeta Misra if (connp == ilbs->ilbs_conn_list_connp) {
242dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_connp =
243dbed73cbSSangeeta Misra connp->conn_c2s_next;
244dbed73cbSSangeeta Misra }
245dbed73cbSSangeeta Misra ilb_conn_remove(connp);
246dbed73cbSSangeeta Misra goto nxt_connp;
247dbed73cbSSangeeta Misra }
248dbed73cbSSangeeta Misra
249dbed73cbSSangeeta Misra if (connp->conn_l4 != IPPROTO_TCP)
250dbed73cbSSangeeta Misra goto nxt_connp;
251dbed73cbSSangeeta Misra
252dbed73cbSSangeeta Misra /* Update and check TCP related conn info */
253dbed73cbSSangeeta Misra if (connp->conn_c2s_tcp_fin_sent &&
254dbed73cbSSangeeta Misra SEQ_GT(connp->conn_s2c_tcp_ack,
255dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fss)) {
256dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fin_acked = B_TRUE;
257dbed73cbSSangeeta Misra }
258dbed73cbSSangeeta Misra if (connp->conn_s2c_tcp_fin_sent &&
259dbed73cbSSangeeta Misra SEQ_GT(connp->conn_c2s_tcp_ack,
260dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fss)) {
261dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_acked = B_TRUE;
262dbed73cbSSangeeta Misra }
263dbed73cbSSangeeta Misra if (connp->conn_c2s_tcp_fin_acked &&
264dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_acked) {
265dbed73cbSSangeeta Misra ilb_conn_remove(connp);
266dbed73cbSSangeeta Misra }
267dbed73cbSSangeeta Misra nxt_connp:
268dbed73cbSSangeeta Misra mutex_exit(&s2c_hash->ilb_conn_hash_lock);
269dbed73cbSSangeeta Misra connp = nxt_connp;
270dbed73cbSSangeeta Misra } while (connp != NULL);
271dbed73cbSSangeeta Misra mutex_exit(&c2s_hash[i].ilb_conn_hash_lock);
272dbed73cbSSangeeta Misra }
273dbed73cbSSangeeta Misra }
274dbed73cbSSangeeta Misra
275dbed73cbSSangeeta Misra /* Conn hash timer routine. It dispatches a taskq and restart the timer */
276dbed73cbSSangeeta Misra static void
ilb_conn_timer(void * arg)277dbed73cbSSangeeta Misra ilb_conn_timer(void *arg)
278dbed73cbSSangeeta Misra {
279dbed73cbSSangeeta Misra ilb_timer_t *timer = (ilb_timer_t *)arg;
280dbed73cbSSangeeta Misra
281dbed73cbSSangeeta Misra (void) taskq_dispatch(timer->ilbs->ilbs_conn_taskq, ilb_conn_cleanup,
282dbed73cbSSangeeta Misra arg, TQ_SLEEP);
283dbed73cbSSangeeta Misra mutex_enter(&timer->tid_lock);
284dbed73cbSSangeeta Misra if (timer->tid == 0) {
285dbed73cbSSangeeta Misra mutex_exit(&timer->tid_lock);
286dbed73cbSSangeeta Misra } else {
287dbed73cbSSangeeta Misra timer->tid = timeout(ilb_conn_timer, arg,
288dbed73cbSSangeeta Misra SEC_TO_TICK(ilb_conn_cache_timeout));
289dbed73cbSSangeeta Misra mutex_exit(&timer->tid_lock);
290dbed73cbSSangeeta Misra }
291dbed73cbSSangeeta Misra }
292dbed73cbSSangeeta Misra
293dbed73cbSSangeeta Misra void
ilb_conn_hash_init(ilb_stack_t * ilbs)294dbed73cbSSangeeta Misra ilb_conn_hash_init(ilb_stack_t *ilbs)
295dbed73cbSSangeeta Misra {
296dbed73cbSSangeeta Misra extern pri_t minclsyspri;
297dbed73cbSSangeeta Misra int i, part;
298dbed73cbSSangeeta Misra ilb_timer_t *tm;
299dbed73cbSSangeeta Misra char tq_name[TASKQ_NAMELEN];
300dbed73cbSSangeeta Misra
301dbed73cbSSangeeta Misra /*
302dbed73cbSSangeeta Misra * If ilbs->ilbs_conn_hash_size is not a power of 2, bump it up to
303dbed73cbSSangeeta Misra * the next power of 2.
304dbed73cbSSangeeta Misra */
305de710d24SJosef 'Jeff' Sipek if (!ISP2(ilbs->ilbs_conn_hash_size)) {
306dbed73cbSSangeeta Misra for (i = 0; i < 31; i++) {
307dbed73cbSSangeeta Misra if (ilbs->ilbs_conn_hash_size < (1 << i))
308dbed73cbSSangeeta Misra break;
309dbed73cbSSangeeta Misra }
310dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size = 1 << i;
311dbed73cbSSangeeta Misra }
312dbed73cbSSangeeta Misra
313dbed73cbSSangeeta Misra /*
314dbed73cbSSangeeta Misra * Can sleep since this should be called when a rule is being added,
315dbed73cbSSangeeta Misra * hence we are not in interrupt context.
316dbed73cbSSangeeta Misra */
317dbed73cbSSangeeta Misra ilbs->ilbs_c2s_conn_hash = kmem_zalloc(sizeof (ilb_conn_hash_t) *
318dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size, KM_SLEEP);
319dbed73cbSSangeeta Misra ilbs->ilbs_s2c_conn_hash = kmem_zalloc(sizeof (ilb_conn_hash_t) *
320dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size, KM_SLEEP);
321dbed73cbSSangeeta Misra
322dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
323dbed73cbSSangeeta Misra mutex_init(&ilbs->ilbs_c2s_conn_hash[i].ilb_conn_hash_lock,
324dbed73cbSSangeeta Misra NULL, MUTEX_DEFAULT, NULL);
325dbed73cbSSangeeta Misra }
326dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
327dbed73cbSSangeeta Misra mutex_init(&ilbs->ilbs_s2c_conn_hash[i].ilb_conn_hash_lock,
328dbed73cbSSangeeta Misra NULL, MUTEX_DEFAULT, NULL);
329dbed73cbSSangeeta Misra }
330dbed73cbSSangeeta Misra
331dbed73cbSSangeeta Misra if (ilb_conn_cache == NULL)
332dbed73cbSSangeeta Misra ilb_conn_cache_init();
333dbed73cbSSangeeta Misra
334dbed73cbSSangeeta Misra (void) snprintf(tq_name, sizeof (tq_name), "ilb_conn_taskq_%p",
3356e0672acSSangeeta Misra (void *)ilbs->ilbs_netstack);
336dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_conn_taskq == NULL);
337dbed73cbSSangeeta Misra ilbs->ilbs_conn_taskq = taskq_create(tq_name,
338dbed73cbSSangeeta Misra ilb_conn_timer_size * 2, minclsyspri, ilb_conn_timer_size,
339dbed73cbSSangeeta Misra ilb_conn_timer_size * 2, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
340dbed73cbSSangeeta Misra
341dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_conn_timer_list == NULL);
342dbed73cbSSangeeta Misra ilbs->ilbs_conn_timer_list = kmem_zalloc(sizeof (ilb_timer_t) *
343dbed73cbSSangeeta Misra ilb_conn_timer_size, KM_SLEEP);
344dbed73cbSSangeeta Misra
345dbed73cbSSangeeta Misra /*
346dbed73cbSSangeeta Misra * The hash table is divided in equal partition for those timers
347dbed73cbSSangeeta Misra * to do garbage collection.
348dbed73cbSSangeeta Misra */
349dbed73cbSSangeeta Misra part = ilbs->ilbs_conn_hash_size / ilb_conn_timer_size + 1;
350dbed73cbSSangeeta Misra for (i = 0; i < ilb_conn_timer_size; i++) {
351dbed73cbSSangeeta Misra tm = ilbs->ilbs_conn_timer_list + i;
352dbed73cbSSangeeta Misra tm->start = i * part;
353dbed73cbSSangeeta Misra tm->end = i * part + part;
354dbed73cbSSangeeta Misra if (tm->end > ilbs->ilbs_conn_hash_size)
355dbed73cbSSangeeta Misra tm->end = ilbs->ilbs_conn_hash_size;
356dbed73cbSSangeeta Misra tm->ilbs = ilbs;
357dbed73cbSSangeeta Misra mutex_init(&tm->tid_lock, NULL, MUTEX_DEFAULT, NULL);
358dbed73cbSSangeeta Misra /* Spread out the starting execution time of all the timers. */
359dbed73cbSSangeeta Misra tm->tid = timeout(ilb_conn_timer, tm,
360dbed73cbSSangeeta Misra SEC_TO_TICK(ilb_conn_cache_timeout + i));
361dbed73cbSSangeeta Misra }
362dbed73cbSSangeeta Misra }
363dbed73cbSSangeeta Misra
364dbed73cbSSangeeta Misra void
ilb_conn_hash_fini(ilb_stack_t * ilbs)365dbed73cbSSangeeta Misra ilb_conn_hash_fini(ilb_stack_t *ilbs)
366dbed73cbSSangeeta Misra {
367dbed73cbSSangeeta Misra uint32_t i;
368dbed73cbSSangeeta Misra ilb_conn_t *connp;
369*d17b05b6SJerry Jelinek ilb_conn_hash_t *hash;
370dbed73cbSSangeeta Misra
371dbed73cbSSangeeta Misra if (ilbs->ilbs_c2s_conn_hash == NULL) {
372dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_s2c_conn_hash == NULL);
373dbed73cbSSangeeta Misra return;
374dbed73cbSSangeeta Misra }
375dbed73cbSSangeeta Misra
376dbed73cbSSangeeta Misra /* Stop all the timers first. */
377dbed73cbSSangeeta Misra for (i = 0; i < ilb_conn_timer_size; i++) {
378dbed73cbSSangeeta Misra timeout_id_t tid;
379dbed73cbSSangeeta Misra
380dbed73cbSSangeeta Misra /* Setting tid to 0 tells the timer handler not to restart. */
381dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_conn_timer_list[i].tid_lock);
382dbed73cbSSangeeta Misra tid = ilbs->ilbs_conn_timer_list[i].tid;
383dbed73cbSSangeeta Misra ilbs->ilbs_conn_timer_list[i].tid = 0;
384dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_conn_timer_list[i].tid_lock);
385dbed73cbSSangeeta Misra (void) untimeout(tid);
386dbed73cbSSangeeta Misra }
387dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_conn_timer_list, sizeof (ilb_timer_t) *
388dbed73cbSSangeeta Misra ilb_conn_timer_size);
389dbed73cbSSangeeta Misra taskq_destroy(ilbs->ilbs_conn_taskq);
390dbed73cbSSangeeta Misra ilbs->ilbs_conn_taskq = NULL;
391dbed73cbSSangeeta Misra
392dbed73cbSSangeeta Misra /* Then remove all the conns. */
393*d17b05b6SJerry Jelinek hash = ilbs->ilbs_s2c_conn_hash;
394dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) {
395*d17b05b6SJerry Jelinek while ((connp = hash[i].ilb_connp) != NULL) {
396*d17b05b6SJerry Jelinek hash[i].ilb_connp = connp->conn_s2c_next;
397dbed73cbSSangeeta Misra ILB_SERVER_REFRELE(connp->conn_server);
398dbed73cbSSangeeta Misra if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) {
399dbed73cbSSangeeta Misra ilb_nat_src_entry_t *ent;
400dbed73cbSSangeeta Misra in_port_t port;
401dbed73cbSSangeeta Misra
402dbed73cbSSangeeta Misra /*
403dbed73cbSSangeeta Misra * src_ent will be freed in ilb_nat_src_fini().
404dbed73cbSSangeeta Misra */
405dbed73cbSSangeeta Misra port = ntohs(
406dbed73cbSSangeeta Misra connp->conn_rule_cache.info.nat_sport);
407dbed73cbSSangeeta Misra ent = connp->conn_rule_cache.info.src_ent;
408dbed73cbSSangeeta Misra vmem_free(ent->nse_port_arena,
409dbed73cbSSangeeta Misra (void *)(uintptr_t)port, 1);
410dbed73cbSSangeeta Misra }
411dbed73cbSSangeeta Misra kmem_cache_free(ilb_conn_cache, connp);
412dbed73cbSSangeeta Misra }
413dbed73cbSSangeeta Misra }
414dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_c2s_conn_hash, sizeof (ilb_conn_hash_t) *
415dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size);
416dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_s2c_conn_hash, sizeof (ilb_conn_hash_t) *
417dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size);
418dbed73cbSSangeeta Misra }
419dbed73cbSSangeeta Misra
420dbed73cbSSangeeta Misra /*
421dbed73cbSSangeeta Misra * Internet checksum adjustment calculation routines. We pre-calculate
422dbed73cbSSangeeta Misra * checksum adjustment so that we don't need to compute the checksum on
423dbed73cbSSangeeta Misra * the whole packet when we change address/port in the packet.
424dbed73cbSSangeeta Misra */
425dbed73cbSSangeeta Misra
426dbed73cbSSangeeta Misra static void
hnat_cksum_v4(uint16_t * oaddr,uint16_t * naddr,in_port_t old_port,in_port_t new_port,uint32_t * adj_sum)427dbed73cbSSangeeta Misra hnat_cksum_v4(uint16_t *oaddr, uint16_t *naddr, in_port_t old_port,
428dbed73cbSSangeeta Misra in_port_t new_port, uint32_t *adj_sum)
429dbed73cbSSangeeta Misra {
430dbed73cbSSangeeta Misra uint32_t sum;
431dbed73cbSSangeeta Misra
432dbed73cbSSangeeta Misra sum = *oaddr + *(oaddr + 1) + old_port;
433dbed73cbSSangeeta Misra while ((sum >> 16) != 0)
434dbed73cbSSangeeta Misra sum = (sum & 0xffff) + (sum >> 16);
435dbed73cbSSangeeta Misra *adj_sum = (uint16_t)~sum + *naddr + *(naddr + 1) + new_port;
436dbed73cbSSangeeta Misra }
437dbed73cbSSangeeta Misra
438dbed73cbSSangeeta Misra static void
hnat_cksum_v6(uint16_t * oaddr,uint16_t * naddr,in_port_t old_port,in_port_t new_port,uint32_t * adj_sum)439dbed73cbSSangeeta Misra hnat_cksum_v6(uint16_t *oaddr, uint16_t *naddr, in_port_t old_port,
440dbed73cbSSangeeta Misra in_port_t new_port, uint32_t *adj_sum)
441dbed73cbSSangeeta Misra {
442dbed73cbSSangeeta Misra uint32_t sum = 0;
443dbed73cbSSangeeta Misra
444dbed73cbSSangeeta Misra sum = *oaddr + *(oaddr + 1) + *(oaddr + 2) + *(oaddr + 3) +
445dbed73cbSSangeeta Misra *(oaddr + 4) + *(oaddr + 5) + *(oaddr + 6) + *(oaddr + 7) +
446dbed73cbSSangeeta Misra old_port;
447dbed73cbSSangeeta Misra while ((sum >> 16) != 0)
448dbed73cbSSangeeta Misra sum = (sum & 0xffff) + (sum >> 16);
449dbed73cbSSangeeta Misra *adj_sum = (uint16_t)~sum + *naddr + *(naddr + 1) +
450dbed73cbSSangeeta Misra *(naddr + 2) + *(naddr + 3) + *(naddr + 4) + *(naddr + 5) +
451dbed73cbSSangeeta Misra *(naddr + 6) + *(naddr + 7) + new_port;
452dbed73cbSSangeeta Misra }
453dbed73cbSSangeeta Misra
454dbed73cbSSangeeta Misra static void
fnat_cksum_v4(uint16_t * oaddr1,uint16_t * oaddr2,uint16_t * naddr1,uint16_t * naddr2,in_port_t old_port1,in_port_t old_port2,in_port_t new_port1,in_port_t new_port2,uint32_t * adj_sum)455dbed73cbSSangeeta Misra fnat_cksum_v4(uint16_t *oaddr1, uint16_t *oaddr2, uint16_t *naddr1,
456dbed73cbSSangeeta Misra uint16_t *naddr2, in_port_t old_port1, in_port_t old_port2,
457dbed73cbSSangeeta Misra in_port_t new_port1, in_port_t new_port2, uint32_t *adj_sum)
458dbed73cbSSangeeta Misra {
459dbed73cbSSangeeta Misra uint32_t sum;
460dbed73cbSSangeeta Misra
461dbed73cbSSangeeta Misra sum = *oaddr1 + *(oaddr1 + 1) + old_port1 + *oaddr2 + *(oaddr2 + 1) +
462dbed73cbSSangeeta Misra old_port2;
463dbed73cbSSangeeta Misra while ((sum >> 16) != 0)
464dbed73cbSSangeeta Misra sum = (sum & 0xffff) + (sum >> 16);
465dbed73cbSSangeeta Misra *adj_sum = (uint16_t)~sum + *naddr1 + *(naddr1 + 1) + new_port1 +
466dbed73cbSSangeeta Misra *naddr2 + *(naddr2 + 1) + new_port2;
467dbed73cbSSangeeta Misra }
468dbed73cbSSangeeta Misra
469dbed73cbSSangeeta Misra static void
fnat_cksum_v6(uint16_t * oaddr1,uint16_t * oaddr2,uint16_t * naddr1,uint16_t * naddr2,in_port_t old_port1,in_port_t old_port2,in_port_t new_port1,in_port_t new_port2,uint32_t * adj_sum)470dbed73cbSSangeeta Misra fnat_cksum_v6(uint16_t *oaddr1, uint16_t *oaddr2, uint16_t *naddr1,
471dbed73cbSSangeeta Misra uint16_t *naddr2, in_port_t old_port1, in_port_t old_port2,
472dbed73cbSSangeeta Misra in_port_t new_port1, in_port_t new_port2, uint32_t *adj_sum)
473dbed73cbSSangeeta Misra {
474dbed73cbSSangeeta Misra uint32_t sum = 0;
475dbed73cbSSangeeta Misra
476dbed73cbSSangeeta Misra sum = *oaddr1 + *(oaddr1 + 1) + *(oaddr1 + 2) + *(oaddr1 + 3) +
477dbed73cbSSangeeta Misra *(oaddr1 + 4) + *(oaddr1 + 5) + *(oaddr1 + 6) + *(oaddr1 + 7) +
478dbed73cbSSangeeta Misra old_port1;
479dbed73cbSSangeeta Misra sum += *oaddr2 + *(oaddr2 + 1) + *(oaddr2 + 2) + *(oaddr2 + 3) +
480dbed73cbSSangeeta Misra *(oaddr2 + 4) + *(oaddr2 + 5) + *(oaddr2 + 6) + *(oaddr2 + 7) +
481dbed73cbSSangeeta Misra old_port2;
482dbed73cbSSangeeta Misra while ((sum >> 16) != 0)
483dbed73cbSSangeeta Misra sum = (sum & 0xffff) + (sum >> 16);
484dbed73cbSSangeeta Misra sum = (uint16_t)~sum + *naddr1 + *(naddr1 + 1) + *(naddr1 + 2) +
485dbed73cbSSangeeta Misra *(naddr1 + 3) + *(naddr1 + 4) + *(naddr1 + 5) + *(naddr1 + 6) +
486dbed73cbSSangeeta Misra *(naddr1 + 7) + new_port1;
487dbed73cbSSangeeta Misra *adj_sum = sum + *naddr2 + *(naddr2 + 1) + *(naddr2 + 2) +
488dbed73cbSSangeeta Misra *(naddr2 + 3) + *(naddr2 + 4) + *(naddr2 + 5) + *(naddr2 + 6) +
489dbed73cbSSangeeta Misra *(naddr2 + 7) + new_port2;
490dbed73cbSSangeeta Misra }
491dbed73cbSSangeeta Misra
492dbed73cbSSangeeta Misra /*
493dbed73cbSSangeeta Misra * Add a conn hash entry to the tables. Note that a conn hash entry
494dbed73cbSSangeeta Misra * (ilb_conn_t) contains info on both directions. And there are two hash
495dbed73cbSSangeeta Misra * tables, one for client to server and the other for server to client.
496dbed73cbSSangeeta Misra * So the same entry is added to both tables and can be ccessed by two
497dbed73cbSSangeeta Misra * thread simultaneously. But each thread will only access data on one
498dbed73cbSSangeeta Misra * direction, so there is no conflict.
499dbed73cbSSangeeta Misra */
500dbed73cbSSangeeta Misra int
ilb_conn_add(ilb_stack_t * ilbs,ilb_rule_t * rule,ilb_server_t * server,in6_addr_t * src,in_port_t sport,in6_addr_t * dst,in_port_t dport,ilb_nat_info_t * info,uint32_t * ip_sum,uint32_t * tp_sum,ilb_sticky_t * s)501dbed73cbSSangeeta Misra ilb_conn_add(ilb_stack_t *ilbs, ilb_rule_t *rule, ilb_server_t *server,
502dbed73cbSSangeeta Misra in6_addr_t *src, in_port_t sport, in6_addr_t *dst, in_port_t dport,
503dbed73cbSSangeeta Misra ilb_nat_info_t *info, uint32_t *ip_sum, uint32_t *tp_sum, ilb_sticky_t *s)
504dbed73cbSSangeeta Misra {
505dbed73cbSSangeeta Misra ilb_conn_t *connp;
506dbed73cbSSangeeta Misra ilb_conn_hash_t *hash;
507dbed73cbSSangeeta Misra int i;
508dbed73cbSSangeeta Misra
509dbed73cbSSangeeta Misra connp = kmem_cache_alloc(ilb_conn_cache, KM_NOSLEEP);
510dbed73cbSSangeeta Misra if (connp == NULL) {
511dbed73cbSSangeeta Misra if (s != NULL) {
512dbed73cbSSangeeta Misra if (rule->ir_topo == ILB_TOPO_IMPL_NAT) {
513dbed73cbSSangeeta Misra ilb_nat_src_entry_t **entry;
514dbed73cbSSangeeta Misra
515dbed73cbSSangeeta Misra entry = s->server->iser_nat_src->src_list;
516dbed73cbSSangeeta Misra vmem_free(entry[s->nat_src_idx]->nse_port_arena,
517dbed73cbSSangeeta Misra (void *)(uintptr_t)ntohs(info->nat_sport),
518dbed73cbSSangeeta Misra 1);
519dbed73cbSSangeeta Misra }
520dbed73cbSSangeeta Misra ILB_STICKY_REFRELE(s);
521dbed73cbSSangeeta Misra }
522dbed73cbSSangeeta Misra return (ENOMEM);
523dbed73cbSSangeeta Misra }
524dbed73cbSSangeeta Misra
525dbed73cbSSangeeta Misra connp->conn_l4 = rule->ir_proto;
526dbed73cbSSangeeta Misra
527dbed73cbSSangeeta Misra connp->conn_server = server;
528dbed73cbSSangeeta Misra ILB_SERVER_REFHOLD(server);
529dbed73cbSSangeeta Misra connp->conn_sticky = s;
530dbed73cbSSangeeta Misra
531dbed73cbSSangeeta Misra connp->conn_rule_cache.topo = rule->ir_topo;
532dbed73cbSSangeeta Misra connp->conn_rule_cache.info = *info;
533dbed73cbSSangeeta Misra
534dbed73cbSSangeeta Misra connp->conn_gc = B_FALSE;
535dbed73cbSSangeeta Misra
536dbed73cbSSangeeta Misra connp->conn_expiry = rule->ir_nat_expiry;
537d3d50737SRafael Vanoni connp->conn_cr_time = ddi_get_lbolt64();
538dbed73cbSSangeeta Misra
539dbed73cbSSangeeta Misra /* Client to server info. */
540dbed73cbSSangeeta Misra connp->conn_c2s_saddr = *src;
541dbed73cbSSangeeta Misra connp->conn_c2s_sport = sport;
542dbed73cbSSangeeta Misra connp->conn_c2s_daddr = *dst;
543dbed73cbSSangeeta Misra connp->conn_c2s_dport = dport;
544dbed73cbSSangeeta Misra
545d3d50737SRafael Vanoni connp->conn_c2s_atime = ddi_get_lbolt64();
546dbed73cbSSangeeta Misra /* The packet ths triggers this creation should be counted */
547dbed73cbSSangeeta Misra connp->conn_c2s_pkt_cnt = 1;
548dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fin_sent = B_FALSE;
549dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fin_acked = B_FALSE;
550dbed73cbSSangeeta Misra
551dbed73cbSSangeeta Misra /* Server to client info, before NAT */
552dbed73cbSSangeeta Misra switch (rule->ir_topo) {
553dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_HALF_NAT:
554dbed73cbSSangeeta Misra connp->conn_s2c_saddr = info->nat_dst;
555dbed73cbSSangeeta Misra connp->conn_s2c_sport = info->nat_dport;
556dbed73cbSSangeeta Misra connp->conn_s2c_daddr = *src;
557dbed73cbSSangeeta Misra connp->conn_s2c_dport = sport;
558dbed73cbSSangeeta Misra
559dbed73cbSSangeeta Misra /* Pre-calculate checksum changes for both directions */
560dbed73cbSSangeeta Misra if (rule->ir_ipver == IPPROTO_IP) {
561dbed73cbSSangeeta Misra hnat_cksum_v4((uint16_t *)&dst->s6_addr32[3],
562dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3], 0, 0,
563dbed73cbSSangeeta Misra &connp->conn_c2s_ip_sum);
564dbed73cbSSangeeta Misra hnat_cksum_v4((uint16_t *)&dst->s6_addr32[3],
565dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3], dport,
566dbed73cbSSangeeta Misra info->nat_dport, &connp->conn_c2s_tp_sum);
567dbed73cbSSangeeta Misra *ip_sum = connp->conn_c2s_ip_sum;
568dbed73cbSSangeeta Misra *tp_sum = connp->conn_c2s_tp_sum;
569dbed73cbSSangeeta Misra
570dbed73cbSSangeeta Misra hnat_cksum_v4(
571dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3],
572dbed73cbSSangeeta Misra (uint16_t *)&dst->s6_addr32[3], 0, 0,
573dbed73cbSSangeeta Misra &connp->conn_s2c_ip_sum);
574dbed73cbSSangeeta Misra hnat_cksum_v4(
575dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3],
576dbed73cbSSangeeta Misra (uint16_t *)&dst->s6_addr32[3],
577dbed73cbSSangeeta Misra info->nat_dport, dport,
578dbed73cbSSangeeta Misra &connp->conn_s2c_tp_sum);
579dbed73cbSSangeeta Misra } else {
580dbed73cbSSangeeta Misra connp->conn_c2s_ip_sum = 0;
581dbed73cbSSangeeta Misra hnat_cksum_v6((uint16_t *)dst,
582dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst, dport,
583dbed73cbSSangeeta Misra info->nat_dport, &connp->conn_c2s_tp_sum);
584dbed73cbSSangeeta Misra *ip_sum = 0;
585dbed73cbSSangeeta Misra *tp_sum = connp->conn_c2s_tp_sum;
586dbed73cbSSangeeta Misra
587dbed73cbSSangeeta Misra connp->conn_s2c_ip_sum = 0;
588dbed73cbSSangeeta Misra hnat_cksum_v6((uint16_t *)&info->nat_dst,
589dbed73cbSSangeeta Misra (uint16_t *)dst, info->nat_dport, dport,
590dbed73cbSSangeeta Misra &connp->conn_s2c_tp_sum);
591dbed73cbSSangeeta Misra }
592dbed73cbSSangeeta Misra break;
593dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_NAT:
594dbed73cbSSangeeta Misra connp->conn_s2c_saddr = info->nat_dst;
595dbed73cbSSangeeta Misra connp->conn_s2c_sport = info->nat_dport;
596dbed73cbSSangeeta Misra connp->conn_s2c_daddr = info->nat_src;
597dbed73cbSSangeeta Misra connp->conn_s2c_dport = info->nat_sport;
598dbed73cbSSangeeta Misra
599dbed73cbSSangeeta Misra if (rule->ir_ipver == IPPROTO_IP) {
600dbed73cbSSangeeta Misra fnat_cksum_v4((uint16_t *)&src->s6_addr32[3],
601dbed73cbSSangeeta Misra (uint16_t *)&dst->s6_addr32[3],
602dbed73cbSSangeeta Misra (uint16_t *)&info->nat_src.s6_addr32[3],
603dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3],
604dbed73cbSSangeeta Misra 0, 0, 0, 0, &connp->conn_c2s_ip_sum);
605dbed73cbSSangeeta Misra fnat_cksum_v4((uint16_t *)&src->s6_addr32[3],
606dbed73cbSSangeeta Misra (uint16_t *)&dst->s6_addr32[3],
607dbed73cbSSangeeta Misra (uint16_t *)&info->nat_src.s6_addr32[3],
608dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3],
609dbed73cbSSangeeta Misra sport, dport, info->nat_sport,
610dbed73cbSSangeeta Misra info->nat_dport, &connp->conn_c2s_tp_sum);
611dbed73cbSSangeeta Misra *ip_sum = connp->conn_c2s_ip_sum;
612dbed73cbSSangeeta Misra *tp_sum = connp->conn_c2s_tp_sum;
613dbed73cbSSangeeta Misra
614dbed73cbSSangeeta Misra fnat_cksum_v4(
615dbed73cbSSangeeta Misra (uint16_t *)&info->nat_src.s6_addr32[3],
616dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3],
617dbed73cbSSangeeta Misra (uint16_t *)&src->s6_addr32[3],
618dbed73cbSSangeeta Misra (uint16_t *)&dst->s6_addr32[3],
619dbed73cbSSangeeta Misra 0, 0, 0, 0, &connp->conn_s2c_ip_sum);
620dbed73cbSSangeeta Misra fnat_cksum_v4(
621dbed73cbSSangeeta Misra (uint16_t *)&info->nat_src.s6_addr32[3],
622dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3],
623dbed73cbSSangeeta Misra (uint16_t *)&src->s6_addr32[3],
624dbed73cbSSangeeta Misra (uint16_t *)&dst->s6_addr32[3],
625dbed73cbSSangeeta Misra info->nat_sport, info->nat_dport,
626dbed73cbSSangeeta Misra sport, dport, &connp->conn_s2c_tp_sum);
627dbed73cbSSangeeta Misra } else {
628dbed73cbSSangeeta Misra fnat_cksum_v6((uint16_t *)src, (uint16_t *)dst,
629dbed73cbSSangeeta Misra (uint16_t *)&info->nat_src,
630dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst,
631dbed73cbSSangeeta Misra sport, dport, info->nat_sport,
632dbed73cbSSangeeta Misra info->nat_dport, &connp->conn_c2s_tp_sum);
633dbed73cbSSangeeta Misra connp->conn_c2s_ip_sum = 0;
634dbed73cbSSangeeta Misra *ip_sum = 0;
635dbed73cbSSangeeta Misra *tp_sum = connp->conn_c2s_tp_sum;
636dbed73cbSSangeeta Misra
637dbed73cbSSangeeta Misra fnat_cksum_v6((uint16_t *)&info->nat_src,
638dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst, (uint16_t *)src,
639dbed73cbSSangeeta Misra (uint16_t *)dst, info->nat_sport,
640dbed73cbSSangeeta Misra info->nat_dport, sport, dport,
641dbed73cbSSangeeta Misra &connp->conn_s2c_tp_sum);
642dbed73cbSSangeeta Misra connp->conn_s2c_ip_sum = 0;
643dbed73cbSSangeeta Misra }
644dbed73cbSSangeeta Misra break;
645dbed73cbSSangeeta Misra }
646dbed73cbSSangeeta Misra
647d3d50737SRafael Vanoni connp->conn_s2c_atime = ddi_get_lbolt64();
648dbed73cbSSangeeta Misra connp->conn_s2c_pkt_cnt = 1;
649dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_sent = B_FALSE;
650dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_acked = B_FALSE;
651dbed73cbSSangeeta Misra
652dbed73cbSSangeeta Misra /* Add it to the s2c hash table. */
653dbed73cbSSangeeta Misra hash = ilbs->ilbs_s2c_conn_hash;
654dbed73cbSSangeeta Misra i = ILB_CONN_HASH((uint8_t *)&connp->conn_s2c_saddr.s6_addr32[3],
655dbed73cbSSangeeta Misra ntohs(connp->conn_s2c_sport),
656dbed73cbSSangeeta Misra (uint8_t *)&connp->conn_s2c_daddr.s6_addr32[3],
657dbed73cbSSangeeta Misra ntohs(connp->conn_s2c_dport), ilbs->ilbs_conn_hash_size);
658dbed73cbSSangeeta Misra connp->conn_s2c_hash = &hash[i];
659dbed73cbSSangeeta Misra DTRACE_PROBE2(ilb__conn__hash__add__s2c, ilb_conn_t *, connp, int, i);
660dbed73cbSSangeeta Misra
661dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock);
662dbed73cbSSangeeta Misra hash[i].ilb_conn_cnt++;
663dbed73cbSSangeeta Misra connp->conn_s2c_next = hash[i].ilb_connp;
664dbed73cbSSangeeta Misra if (hash[i].ilb_connp != NULL)
665dbed73cbSSangeeta Misra hash[i].ilb_connp->conn_s2c_prev = connp;
666dbed73cbSSangeeta Misra connp->conn_s2c_prev = NULL;
667dbed73cbSSangeeta Misra hash[i].ilb_connp = connp;
668dbed73cbSSangeeta Misra mutex_exit(&hash[i].ilb_conn_hash_lock);
669dbed73cbSSangeeta Misra
670dbed73cbSSangeeta Misra /* Add it to the c2s hash table. */
671dbed73cbSSangeeta Misra hash = ilbs->ilbs_c2s_conn_hash;
672dbed73cbSSangeeta Misra i = ILB_CONN_HASH((uint8_t *)&src->s6_addr32[3], ntohs(sport),
673dbed73cbSSangeeta Misra (uint8_t *)&dst->s6_addr32[3], ntohs(dport),
674dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size);
675dbed73cbSSangeeta Misra connp->conn_c2s_hash = &hash[i];
676dbed73cbSSangeeta Misra DTRACE_PROBE2(ilb__conn__hash__add__c2s, ilb_conn_t *, connp, int, i);
677dbed73cbSSangeeta Misra
678dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock);
679dbed73cbSSangeeta Misra hash[i].ilb_conn_cnt++;
680dbed73cbSSangeeta Misra connp->conn_c2s_next = hash[i].ilb_connp;
681dbed73cbSSangeeta Misra if (hash[i].ilb_connp != NULL)
682dbed73cbSSangeeta Misra hash[i].ilb_connp->conn_c2s_prev = connp;
683dbed73cbSSangeeta Misra connp->conn_c2s_prev = NULL;
684dbed73cbSSangeeta Misra hash[i].ilb_connp = connp;
685dbed73cbSSangeeta Misra mutex_exit(&hash[i].ilb_conn_hash_lock);
686dbed73cbSSangeeta Misra
687dbed73cbSSangeeta Misra return (0);
688dbed73cbSSangeeta Misra }
689dbed73cbSSangeeta Misra
690dbed73cbSSangeeta Misra /*
691dbed73cbSSangeeta Misra * If a connection is using TCP, we keep track of simple TCP state transition
692dbed73cbSSangeeta Misra * so that we know when to clean up an entry.
693dbed73cbSSangeeta Misra */
694dbed73cbSSangeeta Misra static boolean_t
update_conn_tcp(ilb_conn_t * connp,void * iph,tcpha_t * tcpha,int32_t pkt_len,boolean_t c2s)695dbed73cbSSangeeta Misra update_conn_tcp(ilb_conn_t *connp, void *iph, tcpha_t *tcpha, int32_t pkt_len,
696dbed73cbSSangeeta Misra boolean_t c2s)
697dbed73cbSSangeeta Misra {
698dbed73cbSSangeeta Misra uint32_t ack, seq;
699dbed73cbSSangeeta Misra int32_t seg_len;
700dbed73cbSSangeeta Misra
701dbed73cbSSangeeta Misra if (tcpha->tha_flags & TH_RST)
702dbed73cbSSangeeta Misra return (B_FALSE);
703dbed73cbSSangeeta Misra
704dbed73cbSSangeeta Misra seg_len = pkt_len - ((uint8_t *)tcpha - (uint8_t *)iph) -
705dbed73cbSSangeeta Misra TCP_HDR_LENGTH((tcph_t *)tcpha);
706dbed73cbSSangeeta Misra
707dbed73cbSSangeeta Misra if (tcpha->tha_flags & TH_ACK)
708dbed73cbSSangeeta Misra ack = ntohl(tcpha->tha_ack);
709dbed73cbSSangeeta Misra seq = ntohl(tcpha->tha_seq);
710dbed73cbSSangeeta Misra if (c2s) {
711dbed73cbSSangeeta Misra ASSERT(MUTEX_HELD(&connp->conn_c2s_hash->ilb_conn_hash_lock));
712dbed73cbSSangeeta Misra if (tcpha->tha_flags & TH_FIN) {
713dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fss = seq + seg_len;
714dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fin_sent = B_TRUE;
715dbed73cbSSangeeta Misra }
716dbed73cbSSangeeta Misra connp->conn_c2s_tcp_ack = ack;
717dbed73cbSSangeeta Misra
718dbed73cbSSangeeta Misra /* Port reuse by the client, restart the conn. */
719dbed73cbSSangeeta Misra if (connp->conn_c2s_tcp_fin_sent &&
720dbed73cbSSangeeta Misra SEQ_GT(seq, connp->conn_c2s_tcp_fss + 1)) {
721dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fin_sent = B_FALSE;
722dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fin_acked = B_FALSE;
723dbed73cbSSangeeta Misra }
724dbed73cbSSangeeta Misra } else {
725dbed73cbSSangeeta Misra ASSERT(MUTEX_HELD(&connp->conn_s2c_hash->ilb_conn_hash_lock));
726dbed73cbSSangeeta Misra if (tcpha->tha_flags & TH_FIN) {
727dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fss = seq + seg_len;
728dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_sent = B_TRUE;
729dbed73cbSSangeeta Misra }
730dbed73cbSSangeeta Misra connp->conn_s2c_tcp_ack = ack;
731dbed73cbSSangeeta Misra
732dbed73cbSSangeeta Misra /* Port reuse by the client, restart the conn. */
733dbed73cbSSangeeta Misra if (connp->conn_s2c_tcp_fin_sent &&
734dbed73cbSSangeeta Misra SEQ_GT(seq, connp->conn_s2c_tcp_fss + 1)) {
735dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_sent = B_FALSE;
736dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_acked = B_FALSE;
737dbed73cbSSangeeta Misra }
738dbed73cbSSangeeta Misra }
739dbed73cbSSangeeta Misra
740dbed73cbSSangeeta Misra return (B_TRUE);
741dbed73cbSSangeeta Misra }
742dbed73cbSSangeeta Misra
743dbed73cbSSangeeta Misra /*
744dbed73cbSSangeeta Misra * Helper routint to find conn hash entry given some packet information and
745dbed73cbSSangeeta Misra * the traffic direction (c2s, client to server?)
746dbed73cbSSangeeta Misra */
747dbed73cbSSangeeta Misra static boolean_t
ilb_find_conn(ilb_stack_t * ilbs,void * iph,void * tph,int l4,in6_addr_t * src,in_port_t sport,in6_addr_t * dst,in_port_t dport,ilb_rule_info_t * rule_cache,uint32_t * ip_sum,uint32_t * tp_sum,int32_t pkt_len,boolean_t c2s)748dbed73cbSSangeeta Misra ilb_find_conn(ilb_stack_t *ilbs, void *iph, void *tph, int l4, in6_addr_t *src,
749dbed73cbSSangeeta Misra in_port_t sport, in6_addr_t *dst, in_port_t dport,
750dbed73cbSSangeeta Misra ilb_rule_info_t *rule_cache, uint32_t *ip_sum, uint32_t *tp_sum,
751dbed73cbSSangeeta Misra int32_t pkt_len, boolean_t c2s)
752dbed73cbSSangeeta Misra {
753dbed73cbSSangeeta Misra ilb_conn_hash_t *hash;
754dbed73cbSSangeeta Misra uint_t i;
755dbed73cbSSangeeta Misra ilb_conn_t *connp;
756dbed73cbSSangeeta Misra boolean_t tcp_alive;
757dbed73cbSSangeeta Misra boolean_t ret = B_FALSE;
758dbed73cbSSangeeta Misra
759dbed73cbSSangeeta Misra i = ILB_CONN_HASH((uint8_t *)&src->s6_addr32[3], ntohs(sport),
760dbed73cbSSangeeta Misra (uint8_t *)&dst->s6_addr32[3], ntohs(dport),
761dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size);
762dbed73cbSSangeeta Misra if (c2s) {
763dbed73cbSSangeeta Misra hash = ilbs->ilbs_c2s_conn_hash;
764dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock);
765dbed73cbSSangeeta Misra for (connp = hash[i].ilb_connp; connp != NULL;
766dbed73cbSSangeeta Misra connp = connp->conn_c2s_next) {
767dbed73cbSSangeeta Misra if (connp->conn_l4 == l4 &&
768dbed73cbSSangeeta Misra connp->conn_c2s_dport == dport &&
769dbed73cbSSangeeta Misra connp->conn_c2s_sport == sport &&
770dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(src, &connp->conn_c2s_saddr) &&
771dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(dst, &connp->conn_c2s_daddr)) {
772d3d50737SRafael Vanoni connp->conn_c2s_atime = ddi_get_lbolt64();
773dbed73cbSSangeeta Misra connp->conn_c2s_pkt_cnt++;
774dbed73cbSSangeeta Misra *rule_cache = connp->conn_rule_cache;
775dbed73cbSSangeeta Misra *ip_sum = connp->conn_c2s_ip_sum;
776dbed73cbSSangeeta Misra *tp_sum = connp->conn_c2s_tp_sum;
777dbed73cbSSangeeta Misra ret = B_TRUE;
778dbed73cbSSangeeta Misra break;
779dbed73cbSSangeeta Misra }
780dbed73cbSSangeeta Misra }
781dbed73cbSSangeeta Misra } else {
782dbed73cbSSangeeta Misra hash = ilbs->ilbs_s2c_conn_hash;
783dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock);
784dbed73cbSSangeeta Misra for (connp = hash[i].ilb_connp; connp != NULL;
785dbed73cbSSangeeta Misra connp = connp->conn_s2c_next) {
786dbed73cbSSangeeta Misra if (connp->conn_l4 == l4 &&
787dbed73cbSSangeeta Misra connp->conn_s2c_dport == dport &&
788dbed73cbSSangeeta Misra connp->conn_s2c_sport == sport &&
789dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(src, &connp->conn_s2c_saddr) &&
790dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(dst, &connp->conn_s2c_daddr)) {
791d3d50737SRafael Vanoni connp->conn_s2c_atime = ddi_get_lbolt64();
792dbed73cbSSangeeta Misra connp->conn_s2c_pkt_cnt++;
793dbed73cbSSangeeta Misra *rule_cache = connp->conn_rule_cache;
794dbed73cbSSangeeta Misra *ip_sum = connp->conn_s2c_ip_sum;
795dbed73cbSSangeeta Misra *tp_sum = connp->conn_s2c_tp_sum;
796dbed73cbSSangeeta Misra ret = B_TRUE;
797dbed73cbSSangeeta Misra break;
798dbed73cbSSangeeta Misra }
799dbed73cbSSangeeta Misra }
800dbed73cbSSangeeta Misra }
801dbed73cbSSangeeta Misra if (ret) {
802dbed73cbSSangeeta Misra ILB_S_KSTAT(connp->conn_server, pkt_processed);
803dbed73cbSSangeeta Misra ILB_S_KSTAT_UPDATE(connp->conn_server, bytes_processed,
804dbed73cbSSangeeta Misra pkt_len);
805dbed73cbSSangeeta Misra
806dbed73cbSSangeeta Misra switch (l4) {
807dbed73cbSSangeeta Misra case (IPPROTO_TCP):
808dbed73cbSSangeeta Misra tcp_alive = update_conn_tcp(connp, iph, tph, pkt_len,
809dbed73cbSSangeeta Misra c2s);
810dbed73cbSSangeeta Misra if (!tcp_alive) {
811dbed73cbSSangeeta Misra connp->conn_gc = B_TRUE;
812dbed73cbSSangeeta Misra }
813dbed73cbSSangeeta Misra break;
814dbed73cbSSangeeta Misra default:
815dbed73cbSSangeeta Misra break;
816dbed73cbSSangeeta Misra }
817dbed73cbSSangeeta Misra }
818dbed73cbSSangeeta Misra mutex_exit(&hash[i].ilb_conn_hash_lock);
819dbed73cbSSangeeta Misra
820dbed73cbSSangeeta Misra return (ret);
821dbed73cbSSangeeta Misra }
822dbed73cbSSangeeta Misra
823dbed73cbSSangeeta Misra /*
824dbed73cbSSangeeta Misra * To check if a give packet matches an existing conn hash entry. If it
825dbed73cbSSangeeta Misra * does, return the information about this entry so that the caller can
826dbed73cbSSangeeta Misra * do the proper NAT.
827dbed73cbSSangeeta Misra */
828dbed73cbSSangeeta Misra boolean_t
ilb_check_conn(ilb_stack_t * ilbs,int l3,void * iph,int l4,void * tph,in6_addr_t * src,in6_addr_t * dst,in_port_t sport,in_port_t dport,uint32_t pkt_len,in6_addr_t * lb_dst)829dbed73cbSSangeeta Misra ilb_check_conn(ilb_stack_t *ilbs, int l3, void *iph, int l4, void *tph,
830dbed73cbSSangeeta Misra in6_addr_t *src, in6_addr_t *dst, in_port_t sport, in_port_t dport,
831dbed73cbSSangeeta Misra uint32_t pkt_len, in6_addr_t *lb_dst)
832dbed73cbSSangeeta Misra {
833dbed73cbSSangeeta Misra ilb_rule_info_t rule_cache;
834dbed73cbSSangeeta Misra uint32_t adj_ip_sum, adj_tp_sum;
835dbed73cbSSangeeta Misra boolean_t ret;
836dbed73cbSSangeeta Misra
837dbed73cbSSangeeta Misra /* Check the incoming hash table. */
838dbed73cbSSangeeta Misra if (ilb_find_conn(ilbs, iph, tph, l4, src, sport, dst, dport,
839dbed73cbSSangeeta Misra &rule_cache, &adj_ip_sum, &adj_tp_sum, pkt_len, B_TRUE)) {
840dbed73cbSSangeeta Misra switch (rule_cache.topo) {
841dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_NAT:
842dbed73cbSSangeeta Misra *lb_dst = rule_cache.info.nat_dst;
843dbed73cbSSangeeta Misra ilb_full_nat(l3, iph, l4, tph, &rule_cache.info,
844dbed73cbSSangeeta Misra adj_ip_sum, adj_tp_sum, B_TRUE);
845dbed73cbSSangeeta Misra ret = B_TRUE;
846dbed73cbSSangeeta Misra break;
847dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_HALF_NAT:
848dbed73cbSSangeeta Misra *lb_dst = rule_cache.info.nat_dst;
849dbed73cbSSangeeta Misra ilb_half_nat(l3, iph, l4, tph, &rule_cache.info,
850dbed73cbSSangeeta Misra adj_ip_sum, adj_tp_sum, B_TRUE);
851dbed73cbSSangeeta Misra ret = B_TRUE;
852dbed73cbSSangeeta Misra break;
853dbed73cbSSangeeta Misra default:
854dbed73cbSSangeeta Misra ret = B_FALSE;
855dbed73cbSSangeeta Misra break;
856dbed73cbSSangeeta Misra }
857dbed73cbSSangeeta Misra return (ret);
858dbed73cbSSangeeta Misra }
859dbed73cbSSangeeta Misra if (ilb_find_conn(ilbs, iph, tph, l4, src, sport, dst, dport,
860dbed73cbSSangeeta Misra &rule_cache, &adj_ip_sum, &adj_tp_sum, pkt_len, B_FALSE)) {
861dbed73cbSSangeeta Misra switch (rule_cache.topo) {
862dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_NAT:
863dbed73cbSSangeeta Misra *lb_dst = rule_cache.info.src;
864dbed73cbSSangeeta Misra ilb_full_nat(l3, iph, l4, tph, &rule_cache.info,
865dbed73cbSSangeeta Misra adj_ip_sum, adj_tp_sum, B_FALSE);
866dbed73cbSSangeeta Misra ret = B_TRUE;
867dbed73cbSSangeeta Misra break;
868dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_HALF_NAT:
869dbed73cbSSangeeta Misra *lb_dst = *dst;
870dbed73cbSSangeeta Misra ilb_half_nat(l3, iph, l4, tph, &rule_cache.info,
871dbed73cbSSangeeta Misra adj_ip_sum, adj_tp_sum, B_FALSE);
872dbed73cbSSangeeta Misra ret = B_TRUE;
873dbed73cbSSangeeta Misra break;
874dbed73cbSSangeeta Misra default:
875dbed73cbSSangeeta Misra ret = B_FALSE;
876dbed73cbSSangeeta Misra break;
877dbed73cbSSangeeta Misra }
878dbed73cbSSangeeta Misra return (ret);
879dbed73cbSSangeeta Misra }
880dbed73cbSSangeeta Misra
881dbed73cbSSangeeta Misra return (B_FALSE);
882dbed73cbSSangeeta Misra }
883dbed73cbSSangeeta Misra
884dbed73cbSSangeeta Misra /*
885dbed73cbSSangeeta Misra * To check if an ICMP packet belongs to a connection in one of the conn
886dbed73cbSSangeeta Misra * hash entries.
887dbed73cbSSangeeta Misra */
888dbed73cbSSangeeta Misra boolean_t
ilb_check_icmp_conn(ilb_stack_t * ilbs,mblk_t * mp,int l3,void * out_iph,void * icmph,in6_addr_t * lb_dst)889dbed73cbSSangeeta Misra ilb_check_icmp_conn(ilb_stack_t *ilbs, mblk_t *mp, int l3, void *out_iph,
890dbed73cbSSangeeta Misra void *icmph, in6_addr_t *lb_dst)
891dbed73cbSSangeeta Misra {
892dbed73cbSSangeeta Misra ilb_conn_hash_t *hash;
893dbed73cbSSangeeta Misra ipha_t *in_iph4;
894dbed73cbSSangeeta Misra ip6_t *in_iph6;
895dbed73cbSSangeeta Misra icmph_t *icmph4;
896dbed73cbSSangeeta Misra icmp6_t *icmph6;
897dbed73cbSSangeeta Misra in6_addr_t *in_src_p, *in_dst_p;
898dbed73cbSSangeeta Misra in_port_t *sport, *dport;
899dbed73cbSSangeeta Misra int l4;
900dbed73cbSSangeeta Misra uint_t i;
901dbed73cbSSangeeta Misra ilb_conn_t *connp;
902dbed73cbSSangeeta Misra ilb_rule_info_t rule_cache;
903dbed73cbSSangeeta Misra uint32_t adj_ip_sum;
904dbed73cbSSangeeta Misra boolean_t full_nat;
905dbed73cbSSangeeta Misra
906dbed73cbSSangeeta Misra if (l3 == IPPROTO_IP) {
907dbed73cbSSangeeta Misra in6_addr_t in_src, in_dst;
908dbed73cbSSangeeta Misra
909dbed73cbSSangeeta Misra icmph4 = (icmph_t *)icmph;
910dbed73cbSSangeeta Misra in_iph4 = (ipha_t *)&icmph4[1];
911dbed73cbSSangeeta Misra
912dbed73cbSSangeeta Misra if ((uint8_t *)in_iph4 + IPH_HDR_LENGTH(in_iph4) +
913dbed73cbSSangeeta Misra ICMP_MIN_TP_HDR_LEN > mp->b_wptr) {
914dbed73cbSSangeeta Misra return (B_FALSE);
915dbed73cbSSangeeta Misra }
916dbed73cbSSangeeta Misra
917dbed73cbSSangeeta Misra IN6_IPADDR_TO_V4MAPPED(in_iph4->ipha_src, &in_src);
918dbed73cbSSangeeta Misra in_src_p = &in_src;
919dbed73cbSSangeeta Misra IN6_IPADDR_TO_V4MAPPED(in_iph4->ipha_dst, &in_dst);
920dbed73cbSSangeeta Misra in_dst_p = &in_dst;
921dbed73cbSSangeeta Misra
922dbed73cbSSangeeta Misra l4 = in_iph4->ipha_protocol;
923dbed73cbSSangeeta Misra if (l4 != IPPROTO_TCP && l4 != IPPROTO_UDP)
924dbed73cbSSangeeta Misra return (B_FALSE);
925dbed73cbSSangeeta Misra
926dbed73cbSSangeeta Misra sport = (in_port_t *)((char *)in_iph4 +
927dbed73cbSSangeeta Misra IPH_HDR_LENGTH(in_iph4));
928dbed73cbSSangeeta Misra dport = sport + 1;
929dbed73cbSSangeeta Misra
930dbed73cbSSangeeta Misra DTRACE_PROBE4(ilb__chk__icmp__conn__v4, uint32_t,
931dbed73cbSSangeeta Misra in_iph4->ipha_src, uint32_t, in_iph4->ipha_dst, uint16_t,
932dbed73cbSSangeeta Misra ntohs(*sport), uint16_t, ntohs(*dport));
933dbed73cbSSangeeta Misra } else {
934dbed73cbSSangeeta Misra ASSERT(l3 == IPPROTO_IPV6);
935dbed73cbSSangeeta Misra
936dbed73cbSSangeeta Misra icmph6 = (icmp6_t *)icmph;
937dbed73cbSSangeeta Misra in_iph6 = (ip6_t *)&icmph6[1];
938dbed73cbSSangeeta Misra in_src_p = &in_iph6->ip6_src;
939dbed73cbSSangeeta Misra in_dst_p = &in_iph6->ip6_dst;
940dbed73cbSSangeeta Misra
941dbed73cbSSangeeta Misra if ((uint8_t *)in_iph6 + sizeof (ip6_t) +
942dbed73cbSSangeeta Misra ICMP_MIN_TP_HDR_LEN > mp->b_wptr) {
943dbed73cbSSangeeta Misra return (B_FALSE);
944dbed73cbSSangeeta Misra }
945dbed73cbSSangeeta Misra
946dbed73cbSSangeeta Misra l4 = in_iph6->ip6_nxt;
947dbed73cbSSangeeta Misra /* We don't go deep inside an IPv6 packet yet. */
948dbed73cbSSangeeta Misra if (l4 != IPPROTO_TCP && l4 != IPPROTO_UDP)
949dbed73cbSSangeeta Misra return (B_FALSE);
950dbed73cbSSangeeta Misra
951dbed73cbSSangeeta Misra sport = (in_port_t *)&in_iph6[1];
952dbed73cbSSangeeta Misra dport = sport + 1;
953dbed73cbSSangeeta Misra
954dbed73cbSSangeeta Misra DTRACE_PROBE4(ilb__chk__icmp__conn__v6, in6_addr_t *,
955dbed73cbSSangeeta Misra &in_iph6->ip6_src, in6_addr_t *, &in_iph6->ip6_dst,
956dbed73cbSSangeeta Misra uint16_t, ntohs(*sport), uint16_t, ntohs(*dport));
957dbed73cbSSangeeta Misra }
958dbed73cbSSangeeta Misra
959dbed73cbSSangeeta Misra i = ILB_CONN_HASH((uint8_t *)&in_dst_p->s6_addr32[3], ntohs(*dport),
960dbed73cbSSangeeta Misra (uint8_t *)&in_src_p->s6_addr32[3], ntohs(*sport),
961dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size);
962dbed73cbSSangeeta Misra hash = ilbs->ilbs_c2s_conn_hash;
963dbed73cbSSangeeta Misra
964dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock);
965dbed73cbSSangeeta Misra for (connp = hash[i].ilb_connp; connp != NULL;
966dbed73cbSSangeeta Misra connp = connp->conn_c2s_next) {
967dbed73cbSSangeeta Misra if (connp->conn_l4 == l4 &&
968dbed73cbSSangeeta Misra connp->conn_c2s_dport == *sport &&
969dbed73cbSSangeeta Misra connp->conn_c2s_sport == *dport &&
970dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(in_dst_p, &connp->conn_c2s_saddr) &&
971dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(in_src_p, &connp->conn_c2s_daddr)) {
972d3d50737SRafael Vanoni connp->conn_c2s_atime = ddi_get_lbolt64();
973dbed73cbSSangeeta Misra connp->conn_c2s_pkt_cnt++;
974dbed73cbSSangeeta Misra rule_cache = connp->conn_rule_cache;
975dbed73cbSSangeeta Misra adj_ip_sum = connp->conn_c2s_ip_sum;
976dbed73cbSSangeeta Misra break;
977dbed73cbSSangeeta Misra }
978dbed73cbSSangeeta Misra }
979dbed73cbSSangeeta Misra mutex_exit(&hash[i].ilb_conn_hash_lock);
980dbed73cbSSangeeta Misra
981dbed73cbSSangeeta Misra if (connp == NULL) {
982dbed73cbSSangeeta Misra DTRACE_PROBE(ilb__chk__icmp__conn__failed);
983dbed73cbSSangeeta Misra return (B_FALSE);
984dbed73cbSSangeeta Misra }
985dbed73cbSSangeeta Misra
986dbed73cbSSangeeta Misra switch (rule_cache.topo) {
987dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_NAT:
988dbed73cbSSangeeta Misra full_nat = B_TRUE;
989dbed73cbSSangeeta Misra break;
990dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_HALF_NAT:
991dbed73cbSSangeeta Misra full_nat = B_FALSE;
992dbed73cbSSangeeta Misra break;
993dbed73cbSSangeeta Misra default:
994dbed73cbSSangeeta Misra return (B_FALSE);
995dbed73cbSSangeeta Misra }
996dbed73cbSSangeeta Misra
997dbed73cbSSangeeta Misra *lb_dst = rule_cache.info.nat_dst;
998dbed73cbSSangeeta Misra if (l3 == IPPROTO_IP) {
999dbed73cbSSangeeta Misra ilb_nat_icmpv4(mp, out_iph, icmph4, in_iph4, sport, dport,
1000dbed73cbSSangeeta Misra &rule_cache.info, adj_ip_sum, full_nat);
1001dbed73cbSSangeeta Misra } else {
1002dbed73cbSSangeeta Misra ilb_nat_icmpv6(mp, out_iph, icmph6, in_iph6, sport, dport,
1003dbed73cbSSangeeta Misra &rule_cache.info, full_nat);
1004dbed73cbSSangeeta Misra }
1005dbed73cbSSangeeta Misra return (B_TRUE);
1006dbed73cbSSangeeta Misra }
1007dbed73cbSSangeeta Misra
1008dbed73cbSSangeeta Misra /*
1009dbed73cbSSangeeta Misra * This routine sends up the conn hash table to user land. Note that the
1010dbed73cbSSangeeta Misra * request is an ioctl, hence we cannot really differentiate requests
1011dbed73cbSSangeeta Misra * from different clients. There is no context shared between different
1012dbed73cbSSangeeta Misra * ioctls. Here we make the assumption that the user land ilbd will
1013dbed73cbSSangeeta Misra * only allow one client to show the conn hash table at any time.
1014dbed73cbSSangeeta Misra * Otherwise, the results will be "very" inconsistent.
1015dbed73cbSSangeeta Misra *
1016dbed73cbSSangeeta Misra * In each ioctl, a flag (ILB_LIST_BEGIN) indicates whether the client wants
1017dbed73cbSSangeeta Misra * to read from the beginning of the able. After a certain entries
1018dbed73cbSSangeeta Misra * are reported, the kernel remembers the position of the last returned
1019dbed73cbSSangeeta Misra * entry. When the next ioctl comes in with the ILB_LIST_BEGIN flag,
1020dbed73cbSSangeeta Misra * it will return entries starting from where it was left off. When
1021dbed73cbSSangeeta Misra * the end of table is reached, a flag (ILB_LIST_END) is set to tell
1022dbed73cbSSangeeta Misra * the client that there is no more entry.
1023dbed73cbSSangeeta Misra *
1024dbed73cbSSangeeta Misra * It is assumed that the caller has checked the size of nat so that it
1025dbed73cbSSangeeta Misra * can hold num entries.
1026dbed73cbSSangeeta Misra */
1027dbed73cbSSangeeta Misra /* ARGSUSED */
1028dbed73cbSSangeeta Misra int
ilb_list_nat(ilb_stack_t * ilbs,zoneid_t zoneid,ilb_nat_entry_t * nat,uint32_t * num,uint32_t * flags)1029dbed73cbSSangeeta Misra ilb_list_nat(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_nat_entry_t *nat,
1030dbed73cbSSangeeta Misra uint32_t *num, uint32_t *flags)
1031dbed73cbSSangeeta Misra {
1032dbed73cbSSangeeta Misra ilb_conn_hash_t *hash;
1033dbed73cbSSangeeta Misra ilb_conn_t *cur_connp;
1034dbed73cbSSangeeta Misra uint32_t i, j;
1035dbed73cbSSangeeta Misra int ret = 0;
1036dbed73cbSSangeeta Misra
1037dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_conn_list_lock);
1038dbed73cbSSangeeta Misra while (ilbs->ilbs_conn_list_busy) {
1039dbed73cbSSangeeta Misra if (cv_wait_sig(&ilbs->ilbs_conn_list_cv,
1040dbed73cbSSangeeta Misra &ilbs->ilbs_conn_list_lock) == 0) {
1041dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_conn_list_lock);
1042dbed73cbSSangeeta Misra return (EINTR);
1043dbed73cbSSangeeta Misra }
1044dbed73cbSSangeeta Misra }
1045dbed73cbSSangeeta Misra if ((hash = ilbs->ilbs_c2s_conn_hash) == NULL) {
1046dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_s2c_conn_hash == NULL);
1047dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_conn_list_lock);
1048dbed73cbSSangeeta Misra *num = 0;
1049dbed73cbSSangeeta Misra *flags |= ILB_LIST_END;
1050dbed73cbSSangeeta Misra return (0);
1051dbed73cbSSangeeta Misra }
1052dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_busy = B_TRUE;
1053dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_conn_list_lock);
1054dbed73cbSSangeeta Misra
1055dbed73cbSSangeeta Misra if (*flags & ILB_LIST_BEGIN) {
1056dbed73cbSSangeeta Misra i = 0;
1057dbed73cbSSangeeta Misra mutex_enter(&hash[0].ilb_conn_hash_lock);
1058dbed73cbSSangeeta Misra cur_connp = hash[0].ilb_connp;
1059dbed73cbSSangeeta Misra } else if (*flags & ILB_LIST_CONT) {
1060dbed73cbSSangeeta Misra if (ilbs->ilbs_conn_list_cur == ilbs->ilbs_conn_hash_size) {
1061dbed73cbSSangeeta Misra *num = 0;
1062dbed73cbSSangeeta Misra *flags |= ILB_LIST_END;
1063dbed73cbSSangeeta Misra goto done;
1064dbed73cbSSangeeta Misra }
1065dbed73cbSSangeeta Misra i = ilbs->ilbs_conn_list_cur;
1066dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock);
1067dbed73cbSSangeeta Misra cur_connp = ilbs->ilbs_conn_list_connp;
1068dbed73cbSSangeeta Misra } else {
1069dbed73cbSSangeeta Misra ret = EINVAL;
1070dbed73cbSSangeeta Misra goto done;
1071dbed73cbSSangeeta Misra }
1072dbed73cbSSangeeta Misra
1073dbed73cbSSangeeta Misra j = 0;
1074dbed73cbSSangeeta Misra while (j < *num) {
1075dbed73cbSSangeeta Misra if (cur_connp == NULL) {
1076dbed73cbSSangeeta Misra mutex_exit(&hash[i].ilb_conn_hash_lock);
1077dbed73cbSSangeeta Misra if (++i == ilbs->ilbs_conn_hash_size) {
1078dbed73cbSSangeeta Misra *flags |= ILB_LIST_END;
1079dbed73cbSSangeeta Misra break;
1080dbed73cbSSangeeta Misra }
1081dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock);
1082dbed73cbSSangeeta Misra cur_connp = hash[i].ilb_connp;
1083dbed73cbSSangeeta Misra continue;
1084dbed73cbSSangeeta Misra }
1085dbed73cbSSangeeta Misra nat[j].proto = cur_connp->conn_l4;
1086dbed73cbSSangeeta Misra
1087dbed73cbSSangeeta Misra nat[j].in_global = cur_connp->conn_c2s_daddr;
1088dbed73cbSSangeeta Misra nat[j].in_global_port = cur_connp->conn_c2s_dport;
1089dbed73cbSSangeeta Misra nat[j].out_global = cur_connp->conn_c2s_saddr;
1090dbed73cbSSangeeta Misra nat[j].out_global_port = cur_connp->conn_c2s_sport;
1091dbed73cbSSangeeta Misra
1092dbed73cbSSangeeta Misra nat[j].in_local = cur_connp->conn_s2c_saddr;
1093dbed73cbSSangeeta Misra nat[j].in_local_port = cur_connp->conn_s2c_sport;
1094dbed73cbSSangeeta Misra nat[j].out_local = cur_connp->conn_s2c_daddr;
1095dbed73cbSSangeeta Misra nat[j].out_local_port = cur_connp->conn_s2c_dport;
1096dbed73cbSSangeeta Misra
1097dbed73cbSSangeeta Misra nat[j].create_time = TICK_TO_MSEC(cur_connp->conn_cr_time);
1098dbed73cbSSangeeta Misra nat[j].last_access_time =
1099dbed73cbSSangeeta Misra TICK_TO_MSEC(cur_connp->conn_c2s_atime);
1100dbed73cbSSangeeta Misra
1101dbed73cbSSangeeta Misra /*
1102dbed73cbSSangeeta Misra * The conn_s2c_pkt_cnt may not be accurate since we are not
1103dbed73cbSSangeeta Misra * holding the s2c hash lock.
1104dbed73cbSSangeeta Misra */
1105dbed73cbSSangeeta Misra nat[j].pkt_cnt = cur_connp->conn_c2s_pkt_cnt +
1106dbed73cbSSangeeta Misra cur_connp->conn_s2c_pkt_cnt;
1107dbed73cbSSangeeta Misra j++;
1108dbed73cbSSangeeta Misra
1109dbed73cbSSangeeta Misra cur_connp = cur_connp->conn_c2s_next;
1110dbed73cbSSangeeta Misra }
1111dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_connp = cur_connp;
1112dbed73cbSSangeeta Misra if (j == *num)
1113dbed73cbSSangeeta Misra mutex_exit(&hash[i].ilb_conn_hash_lock);
1114dbed73cbSSangeeta Misra
1115dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_cur = i;
1116dbed73cbSSangeeta Misra
1117dbed73cbSSangeeta Misra *num = j;
1118dbed73cbSSangeeta Misra done:
1119dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_conn_list_lock);
1120dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_busy = B_FALSE;
1121dbed73cbSSangeeta Misra cv_signal(&ilbs->ilbs_conn_list_cv);
1122dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_conn_list_lock);
1123dbed73cbSSangeeta Misra
1124dbed73cbSSangeeta Misra return (ret);
1125dbed73cbSSangeeta Misra }
1126dbed73cbSSangeeta Misra
1127dbed73cbSSangeeta Misra
1128dbed73cbSSangeeta Misra /*
1129dbed73cbSSangeeta Misra * Stickiness (persistence) handling routines.
1130dbed73cbSSangeeta Misra */
1131dbed73cbSSangeeta Misra
1132dbed73cbSSangeeta Misra
1133dbed73cbSSangeeta Misra static void
ilb_sticky_cache_init(void)1134dbed73cbSSangeeta Misra ilb_sticky_cache_init(void)
1135dbed73cbSSangeeta Misra {
1136dbed73cbSSangeeta Misra ilb_sticky_cache = kmem_cache_create("ilb_sticky_cache",
1137dbed73cbSSangeeta Misra sizeof (ilb_sticky_t), 0, NULL, NULL, NULL, NULL, NULL,
1138dbed73cbSSangeeta Misra ilb_kmem_flags);
1139dbed73cbSSangeeta Misra }
1140dbed73cbSSangeeta Misra
1141dbed73cbSSangeeta Misra void
ilb_sticky_cache_fini(void)1142dbed73cbSSangeeta Misra ilb_sticky_cache_fini(void)
1143dbed73cbSSangeeta Misra {
1144dbed73cbSSangeeta Misra if (ilb_sticky_cache != NULL) {
1145dbed73cbSSangeeta Misra kmem_cache_destroy(ilb_sticky_cache);
1146dbed73cbSSangeeta Misra ilb_sticky_cache = NULL;
1147dbed73cbSSangeeta Misra }
1148dbed73cbSSangeeta Misra }
1149dbed73cbSSangeeta Misra
1150dbed73cbSSangeeta Misra void
ilb_sticky_refrele(ilb_sticky_t * s)1151dbed73cbSSangeeta Misra ilb_sticky_refrele(ilb_sticky_t *s)
1152dbed73cbSSangeeta Misra {
1153dbed73cbSSangeeta Misra ILB_STICKY_REFRELE(s);
1154dbed73cbSSangeeta Misra }
1155dbed73cbSSangeeta Misra
1156dbed73cbSSangeeta Misra static ilb_sticky_t *
ilb_sticky_lookup(ilb_sticky_hash_t * hash,ilb_rule_t * rule,in6_addr_t * src)1157dbed73cbSSangeeta Misra ilb_sticky_lookup(ilb_sticky_hash_t *hash, ilb_rule_t *rule, in6_addr_t *src)
1158dbed73cbSSangeeta Misra {
1159dbed73cbSSangeeta Misra ilb_sticky_t *s;
1160dbed73cbSSangeeta Misra
1161dbed73cbSSangeeta Misra ASSERT(mutex_owned(&hash->sticky_lock));
1162dbed73cbSSangeeta Misra
1163dbed73cbSSangeeta Misra for (s = list_head(&hash->sticky_head); s != NULL;
1164dbed73cbSSangeeta Misra s = list_next(&hash->sticky_head, s)) {
1165dbed73cbSSangeeta Misra if (s->rule_instance == rule->ir_ks_instance) {
1166dbed73cbSSangeeta Misra if (IN6_ARE_ADDR_EQUAL(src, &s->src))
1167dbed73cbSSangeeta Misra return (s);
1168dbed73cbSSangeeta Misra }
1169dbed73cbSSangeeta Misra }
1170dbed73cbSSangeeta Misra return (NULL);
1171dbed73cbSSangeeta Misra }
1172dbed73cbSSangeeta Misra
1173dbed73cbSSangeeta Misra static ilb_sticky_t *
ilb_sticky_add(ilb_sticky_hash_t * hash,ilb_rule_t * rule,ilb_server_t * server,in6_addr_t * src)1174dbed73cbSSangeeta Misra ilb_sticky_add(ilb_sticky_hash_t *hash, ilb_rule_t *rule, ilb_server_t *server,
1175dbed73cbSSangeeta Misra in6_addr_t *src)
1176dbed73cbSSangeeta Misra {
1177dbed73cbSSangeeta Misra ilb_sticky_t *s;
1178dbed73cbSSangeeta Misra
1179dbed73cbSSangeeta Misra ASSERT(mutex_owned(&hash->sticky_lock));
1180dbed73cbSSangeeta Misra
1181dbed73cbSSangeeta Misra if ((s = kmem_cache_alloc(ilb_sticky_cache, KM_NOSLEEP)) == NULL)
1182dbed73cbSSangeeta Misra return (NULL);
1183dbed73cbSSangeeta Misra
1184dbed73cbSSangeeta Misra /*
1185dbed73cbSSangeeta Misra * The rule instance is for handling the scenario when the same
1186dbed73cbSSangeeta Misra * client talks to different rules at the same time. Stickiness
1187dbed73cbSSangeeta Misra * is per rule so we can use the rule instance to differentiate
1188dbed73cbSSangeeta Misra * the client's request.
1189dbed73cbSSangeeta Misra */
1190dbed73cbSSangeeta Misra s->rule_instance = rule->ir_ks_instance;
1191dbed73cbSSangeeta Misra /*
1192dbed73cbSSangeeta Misra * Copy the rule name for listing all sticky cache entry. ir_name
1193dbed73cbSSangeeta Misra * is guaranteed to be NULL terminated.
1194dbed73cbSSangeeta Misra */
1195dbed73cbSSangeeta Misra (void) strcpy(s->rule_name, rule->ir_name);
1196dbed73cbSSangeeta Misra s->server = server;
1197dbed73cbSSangeeta Misra
1198dbed73cbSSangeeta Misra /*
1199dbed73cbSSangeeta Misra * Grab a ref cnt on the server so that it won't go away while
1200dbed73cbSSangeeta Misra * it is still in the sticky table.
1201dbed73cbSSangeeta Misra */
1202dbed73cbSSangeeta Misra ILB_SERVER_REFHOLD(server);
1203dbed73cbSSangeeta Misra s->src = *src;
1204dbed73cbSSangeeta Misra s->expiry = rule->ir_sticky_expiry;
1205dbed73cbSSangeeta Misra s->refcnt = 1;
1206dbed73cbSSangeeta Misra s->hash = hash;
1207dbed73cbSSangeeta Misra
1208dbed73cbSSangeeta Misra /*
1209dbed73cbSSangeeta Misra * There is no need to set atime here since the refcnt is not
1210dbed73cbSSangeeta Misra * zero. A sticky entry is removed only when the refcnt is
1211dbed73cbSSangeeta Misra * zero. But just set it here for debugging purpose. The
1212dbed73cbSSangeeta Misra * atime is set when a refrele is done on a sticky entry.
1213dbed73cbSSangeeta Misra */
1214d3d50737SRafael Vanoni s->atime = ddi_get_lbolt64();
1215dbed73cbSSangeeta Misra
1216dbed73cbSSangeeta Misra list_insert_head(&hash->sticky_head, s);
1217dbed73cbSSangeeta Misra hash->sticky_cnt++;
1218dbed73cbSSangeeta Misra return (s);
1219dbed73cbSSangeeta Misra }
1220dbed73cbSSangeeta Misra
1221dbed73cbSSangeeta Misra /*
1222dbed73cbSSangeeta Misra * This routine checks if there is an existing sticky entry which matches
1223dbed73cbSSangeeta Misra * a given packet. If there is one, return it. If there is not, create
1224dbed73cbSSangeeta Misra * a sticky entry using the packet's info.
1225dbed73cbSSangeeta Misra */
1226dbed73cbSSangeeta Misra ilb_server_t *
ilb_sticky_find_add(ilb_stack_t * ilbs,ilb_rule_t * rule,in6_addr_t * src,ilb_server_t * server,ilb_sticky_t ** res,uint16_t * src_ent_idx)1227dbed73cbSSangeeta Misra ilb_sticky_find_add(ilb_stack_t *ilbs, ilb_rule_t *rule, in6_addr_t *src,
1228dbed73cbSSangeeta Misra ilb_server_t *server, ilb_sticky_t **res, uint16_t *src_ent_idx)
1229dbed73cbSSangeeta Misra {
1230dbed73cbSSangeeta Misra int i;
1231dbed73cbSSangeeta Misra ilb_sticky_hash_t *hash;
1232dbed73cbSSangeeta Misra ilb_sticky_t *s;
1233dbed73cbSSangeeta Misra
1234dbed73cbSSangeeta Misra ASSERT(server != NULL);
1235dbed73cbSSangeeta Misra
1236dbed73cbSSangeeta Misra *res = NULL;
1237dbed73cbSSangeeta Misra
1238dbed73cbSSangeeta Misra i = ILB_STICKY_HASH((uint8_t *)&src->s6_addr32[3],
1239dbed73cbSSangeeta Misra (uint32_t)(uintptr_t)rule, ilbs->ilbs_sticky_hash_size);
1240dbed73cbSSangeeta Misra hash = &ilbs->ilbs_sticky_hash[i];
1241dbed73cbSSangeeta Misra
1242dbed73cbSSangeeta Misra /* First check if there is already an entry. */
1243dbed73cbSSangeeta Misra mutex_enter(&hash->sticky_lock);
1244dbed73cbSSangeeta Misra s = ilb_sticky_lookup(hash, rule, src);
1245dbed73cbSSangeeta Misra
1246dbed73cbSSangeeta Misra /* No sticky entry, add one. */
1247dbed73cbSSangeeta Misra if (s == NULL) {
1248dbed73cbSSangeeta Misra add_new_entry:
1249dbed73cbSSangeeta Misra s = ilb_sticky_add(hash, rule, server, src);
1250dbed73cbSSangeeta Misra if (s == NULL) {
1251dbed73cbSSangeeta Misra mutex_exit(&hash->sticky_lock);
1252dbed73cbSSangeeta Misra return (NULL);
1253dbed73cbSSangeeta Misra }
1254dbed73cbSSangeeta Misra /*
1255dbed73cbSSangeeta Misra * Find a source for this server. All subseqent requests from
1256dbed73cbSSangeeta Misra * the same client matching this sticky entry will use this
1257dbed73cbSSangeeta Misra * source address in doing NAT. The current algorithm is
1258dbed73cbSSangeeta Misra * simple, rotate the source address. Note that the
1259dbed73cbSSangeeta Misra * source address array does not change after it's created, so
1260dbed73cbSSangeeta Misra * it is OK to just increment the cur index.
1261dbed73cbSSangeeta Misra */
1262dbed73cbSSangeeta Misra if (server->iser_nat_src != NULL) {
1263dbed73cbSSangeeta Misra /* It is a hint, does not need to be atomic. */
1264dbed73cbSSangeeta Misra *src_ent_idx = (server->iser_nat_src->cur++ %
1265dbed73cbSSangeeta Misra server->iser_nat_src->num_src);
1266dbed73cbSSangeeta Misra s->nat_src_idx = *src_ent_idx;
1267dbed73cbSSangeeta Misra }
1268dbed73cbSSangeeta Misra mutex_exit(&hash->sticky_lock);
1269dbed73cbSSangeeta Misra *res = s;
1270dbed73cbSSangeeta Misra return (server);
1271dbed73cbSSangeeta Misra }
1272dbed73cbSSangeeta Misra
1273dbed73cbSSangeeta Misra /*
1274dbed73cbSSangeeta Misra * We don't hold any lock accessing iser_enabled. Refer to the
1275dbed73cbSSangeeta Misra * comment in ilb_server_add() about iser_lock.
1276dbed73cbSSangeeta Misra */
1277dbed73cbSSangeeta Misra if (!s->server->iser_enabled) {
1278dbed73cbSSangeeta Misra /*
1279dbed73cbSSangeeta Misra * s->server == server can only happen if there is a race in
1280dbed73cbSSangeeta Misra * toggling the iser_enabled flag (we don't hold a lock doing
1281dbed73cbSSangeeta Misra * that) so that the load balance algorithm still returns a
1282dbed73cbSSangeeta Misra * disabled server. In this case, just drop the packet...
1283dbed73cbSSangeeta Misra */
1284dbed73cbSSangeeta Misra if (s->server == server) {
1285dbed73cbSSangeeta Misra mutex_exit(&hash->sticky_lock);
1286dbed73cbSSangeeta Misra return (NULL);
1287dbed73cbSSangeeta Misra }
1288dbed73cbSSangeeta Misra
1289dbed73cbSSangeeta Misra /*
1290dbed73cbSSangeeta Misra * The old server is disabled and there is a new server, use
1291dbed73cbSSangeeta Misra * the new one to create a sticky entry. Since we will
1292dbed73cbSSangeeta Misra * add the entry at the beginning, subsequent lookup will
1293dbed73cbSSangeeta Misra * find this new entry instead of the old one.
1294dbed73cbSSangeeta Misra */
1295dbed73cbSSangeeta Misra goto add_new_entry;
1296dbed73cbSSangeeta Misra }
1297dbed73cbSSangeeta Misra
1298dbed73cbSSangeeta Misra s->refcnt++;
1299dbed73cbSSangeeta Misra *res = s;
1300dbed73cbSSangeeta Misra mutex_exit(&hash->sticky_lock);
1301dbed73cbSSangeeta Misra if (server->iser_nat_src != NULL)
1302dbed73cbSSangeeta Misra *src_ent_idx = s->nat_src_idx;
1303dbed73cbSSangeeta Misra return (s->server);
1304dbed73cbSSangeeta Misra }
1305dbed73cbSSangeeta Misra
1306dbed73cbSSangeeta Misra static void
ilb_sticky_cleanup(void * arg)1307dbed73cbSSangeeta Misra ilb_sticky_cleanup(void *arg)
1308dbed73cbSSangeeta Misra {
1309dbed73cbSSangeeta Misra ilb_timer_t *timer = (ilb_timer_t *)arg;
1310dbed73cbSSangeeta Misra uint32_t i;
1311dbed73cbSSangeeta Misra ilb_stack_t *ilbs;
1312dbed73cbSSangeeta Misra ilb_sticky_hash_t *hash;
1313dbed73cbSSangeeta Misra ilb_sticky_t *s, *nxt_s;
1314dbed73cbSSangeeta Misra int64_t now, expiry;
1315dbed73cbSSangeeta Misra
1316dbed73cbSSangeeta Misra ilbs = timer->ilbs;
1317dbed73cbSSangeeta Misra hash = ilbs->ilbs_sticky_hash;
1318dbed73cbSSangeeta Misra ASSERT(hash != NULL);
1319dbed73cbSSangeeta Misra
1320d3d50737SRafael Vanoni now = ddi_get_lbolt64();
1321dbed73cbSSangeeta Misra for (i = timer->start; i < timer->end; i++) {
1322dbed73cbSSangeeta Misra mutex_enter(&hash[i].sticky_lock);
1323dbed73cbSSangeeta Misra for (s = list_head(&hash[i].sticky_head); s != NULL;
1324dbed73cbSSangeeta Misra s = nxt_s) {
1325dbed73cbSSangeeta Misra nxt_s = list_next(&hash[i].sticky_head, s);
1326dbed73cbSSangeeta Misra if (s->refcnt != 0)
1327dbed73cbSSangeeta Misra continue;
1328dbed73cbSSangeeta Misra expiry = now - SEC_TO_TICK(s->expiry);
1329dbed73cbSSangeeta Misra if (s->atime < expiry) {
1330dbed73cbSSangeeta Misra ILB_SERVER_REFRELE(s->server);
1331dbed73cbSSangeeta Misra list_remove(&hash[i].sticky_head, s);
1332dbed73cbSSangeeta Misra kmem_cache_free(ilb_sticky_cache, s);
1333dbed73cbSSangeeta Misra hash[i].sticky_cnt--;
1334dbed73cbSSangeeta Misra }
1335dbed73cbSSangeeta Misra }
1336dbed73cbSSangeeta Misra mutex_exit(&hash[i].sticky_lock);
1337dbed73cbSSangeeta Misra }
1338dbed73cbSSangeeta Misra }
1339dbed73cbSSangeeta Misra
1340dbed73cbSSangeeta Misra static void
ilb_sticky_timer(void * arg)1341dbed73cbSSangeeta Misra ilb_sticky_timer(void *arg)
1342dbed73cbSSangeeta Misra {
1343dbed73cbSSangeeta Misra ilb_timer_t *timer = (ilb_timer_t *)arg;
1344dbed73cbSSangeeta Misra
1345dbed73cbSSangeeta Misra (void) taskq_dispatch(timer->ilbs->ilbs_sticky_taskq,
1346dbed73cbSSangeeta Misra ilb_sticky_cleanup, arg, TQ_SLEEP);
1347dbed73cbSSangeeta Misra mutex_enter(&timer->tid_lock);
1348dbed73cbSSangeeta Misra if (timer->tid == 0) {
1349dbed73cbSSangeeta Misra mutex_exit(&timer->tid_lock);
1350dbed73cbSSangeeta Misra } else {
1351dbed73cbSSangeeta Misra timer->tid = timeout(ilb_sticky_timer, arg,
1352dbed73cbSSangeeta Misra SEC_TO_TICK(ilb_sticky_timeout));
1353dbed73cbSSangeeta Misra mutex_exit(&timer->tid_lock);
1354dbed73cbSSangeeta Misra }
1355dbed73cbSSangeeta Misra }
1356dbed73cbSSangeeta Misra
1357dbed73cbSSangeeta Misra void
ilb_sticky_hash_init(ilb_stack_t * ilbs)1358dbed73cbSSangeeta Misra ilb_sticky_hash_init(ilb_stack_t *ilbs)
1359dbed73cbSSangeeta Misra {
1360dbed73cbSSangeeta Misra extern pri_t minclsyspri;
1361dbed73cbSSangeeta Misra int i, part;
1362dbed73cbSSangeeta Misra char tq_name[TASKQ_NAMELEN];
1363dbed73cbSSangeeta Misra ilb_timer_t *tm;
1364dbed73cbSSangeeta Misra
1365de710d24SJosef 'Jeff' Sipek if (!ISP2(ilbs->ilbs_sticky_hash_size)) {
1366dbed73cbSSangeeta Misra for (i = 0; i < 31; i++) {
1367dbed73cbSSangeeta Misra if (ilbs->ilbs_sticky_hash_size < (1 << i))
1368dbed73cbSSangeeta Misra break;
1369dbed73cbSSangeeta Misra }
1370dbed73cbSSangeeta Misra ilbs->ilbs_sticky_hash_size = 1 << i;
1371dbed73cbSSangeeta Misra }
1372dbed73cbSSangeeta Misra
1373dbed73cbSSangeeta Misra ilbs->ilbs_sticky_hash = kmem_zalloc(sizeof (ilb_sticky_hash_t) *
1374dbed73cbSSangeeta Misra ilbs->ilbs_sticky_hash_size, KM_SLEEP);
1375dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_sticky_hash_size; i++) {
1376dbed73cbSSangeeta Misra mutex_init(&ilbs->ilbs_sticky_hash[i].sticky_lock, NULL,
1377dbed73cbSSangeeta Misra MUTEX_DEFAULT, NULL);
1378dbed73cbSSangeeta Misra list_create(&ilbs->ilbs_sticky_hash[i].sticky_head,
1379dbed73cbSSangeeta Misra sizeof (ilb_sticky_t),
1380dbed73cbSSangeeta Misra offsetof(ilb_sticky_t, list));
1381dbed73cbSSangeeta Misra }
1382dbed73cbSSangeeta Misra
1383dbed73cbSSangeeta Misra if (ilb_sticky_cache == NULL)
1384dbed73cbSSangeeta Misra ilb_sticky_cache_init();
1385dbed73cbSSangeeta Misra
1386dbed73cbSSangeeta Misra (void) snprintf(tq_name, sizeof (tq_name), "ilb_sticky_taskq_%p",
13876e0672acSSangeeta Misra (void *)ilbs->ilbs_netstack);
1388dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_sticky_taskq == NULL);
1389dbed73cbSSangeeta Misra ilbs->ilbs_sticky_taskq = taskq_create(tq_name,
1390dbed73cbSSangeeta Misra ilb_sticky_timer_size * 2, minclsyspri, ilb_sticky_timer_size,
1391dbed73cbSSangeeta Misra ilb_sticky_timer_size * 2, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
1392dbed73cbSSangeeta Misra
1393dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_sticky_timer_list == NULL);
1394dbed73cbSSangeeta Misra ilbs->ilbs_sticky_timer_list = kmem_zalloc(sizeof (ilb_timer_t) *
1395dbed73cbSSangeeta Misra ilb_sticky_timer_size, KM_SLEEP);
1396dbed73cbSSangeeta Misra part = ilbs->ilbs_sticky_hash_size / ilb_sticky_timer_size + 1;
1397dbed73cbSSangeeta Misra for (i = 0; i < ilb_sticky_timer_size; i++) {
1398dbed73cbSSangeeta Misra tm = ilbs->ilbs_sticky_timer_list + i;
1399dbed73cbSSangeeta Misra tm->start = i * part;
1400dbed73cbSSangeeta Misra tm->end = i * part + part;
1401dbed73cbSSangeeta Misra if (tm->end > ilbs->ilbs_sticky_hash_size)
1402dbed73cbSSangeeta Misra tm->end = ilbs->ilbs_sticky_hash_size;
1403dbed73cbSSangeeta Misra tm->ilbs = ilbs;
1404dbed73cbSSangeeta Misra mutex_init(&tm->tid_lock, NULL, MUTEX_DEFAULT, NULL);
1405dbed73cbSSangeeta Misra /* Spread out the starting execution time of all the timers. */
1406dbed73cbSSangeeta Misra tm->tid = timeout(ilb_sticky_timer, tm,
1407dbed73cbSSangeeta Misra SEC_TO_TICK(ilb_sticky_timeout + i));
1408dbed73cbSSangeeta Misra }
1409dbed73cbSSangeeta Misra }
1410dbed73cbSSangeeta Misra
1411dbed73cbSSangeeta Misra void
ilb_sticky_hash_fini(ilb_stack_t * ilbs)1412dbed73cbSSangeeta Misra ilb_sticky_hash_fini(ilb_stack_t *ilbs)
1413dbed73cbSSangeeta Misra {
1414dbed73cbSSangeeta Misra int i;
1415dbed73cbSSangeeta Misra ilb_sticky_t *s;
1416dbed73cbSSangeeta Misra
1417dbed73cbSSangeeta Misra if (ilbs->ilbs_sticky_hash == NULL)
1418dbed73cbSSangeeta Misra return;
1419dbed73cbSSangeeta Misra
1420dbed73cbSSangeeta Misra /* Stop all the timers first. */
1421dbed73cbSSangeeta Misra for (i = 0; i < ilb_sticky_timer_size; i++) {
1422dbed73cbSSangeeta Misra timeout_id_t tid;
1423dbed73cbSSangeeta Misra
1424dbed73cbSSangeeta Misra /* Setting tid to 0 tells the timer handler not to restart. */
1425dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_sticky_timer_list[i].tid_lock);
1426dbed73cbSSangeeta Misra tid = ilbs->ilbs_sticky_timer_list[i].tid;
1427dbed73cbSSangeeta Misra ilbs->ilbs_sticky_timer_list[i].tid = 0;
1428dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_sticky_timer_list[i].tid_lock);
1429dbed73cbSSangeeta Misra (void) untimeout(tid);
1430dbed73cbSSangeeta Misra }
1431dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_sticky_timer_list, sizeof (ilb_timer_t) *
1432dbed73cbSSangeeta Misra ilb_sticky_timer_size);
1433dbed73cbSSangeeta Misra taskq_destroy(ilbs->ilbs_sticky_taskq);
1434dbed73cbSSangeeta Misra ilbs->ilbs_sticky_taskq = NULL;
1435dbed73cbSSangeeta Misra
1436dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_sticky_hash_size; i++) {
1437dbed73cbSSangeeta Misra while ((s = list_head(&ilbs->ilbs_sticky_hash[i].sticky_head))
1438dbed73cbSSangeeta Misra != NULL) {
1439dbed73cbSSangeeta Misra list_remove(&ilbs->ilbs_sticky_hash[i].sticky_head, s);
1440dbed73cbSSangeeta Misra ILB_SERVER_REFRELE(s->server);
1441dbed73cbSSangeeta Misra kmem_free(s, sizeof (ilb_sticky_t));
1442dbed73cbSSangeeta Misra }
1443dbed73cbSSangeeta Misra }
1444dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_sticky_hash, ilbs->ilbs_sticky_hash_size *
1445dbed73cbSSangeeta Misra sizeof (ilb_sticky_hash_t));
1446dbed73cbSSangeeta Misra }
1447dbed73cbSSangeeta Misra
1448dbed73cbSSangeeta Misra /*
1449dbed73cbSSangeeta Misra * This routine sends up the sticky hash table to user land. Refer to
1450dbed73cbSSangeeta Misra * the comments before ilb_list_nat(). Both routines assume similar
1451dbed73cbSSangeeta Misra * conditions.
1452dbed73cbSSangeeta Misra *
1453dbed73cbSSangeeta Misra * It is assumed that the caller has checked the size of st so that it
1454dbed73cbSSangeeta Misra * can hold num entries.
1455dbed73cbSSangeeta Misra */
1456dbed73cbSSangeeta Misra /* ARGSUSED */
1457dbed73cbSSangeeta Misra int
ilb_list_sticky(ilb_stack_t * ilbs,zoneid_t zoneid,ilb_sticky_entry_t * st,uint32_t * num,uint32_t * flags)1458dbed73cbSSangeeta Misra ilb_list_sticky(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_sticky_entry_t *st,
1459dbed73cbSSangeeta Misra uint32_t *num, uint32_t *flags)
1460dbed73cbSSangeeta Misra {
1461dbed73cbSSangeeta Misra ilb_sticky_hash_t *hash;
1462dbed73cbSSangeeta Misra ilb_sticky_t *curp;
1463dbed73cbSSangeeta Misra uint32_t i, j;
1464dbed73cbSSangeeta Misra int ret = 0;
1465dbed73cbSSangeeta Misra
1466dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_sticky_list_lock);
1467dbed73cbSSangeeta Misra while (ilbs->ilbs_sticky_list_busy) {
1468dbed73cbSSangeeta Misra if (cv_wait_sig(&ilbs->ilbs_sticky_list_cv,
1469dbed73cbSSangeeta Misra &ilbs->ilbs_sticky_list_lock) == 0) {
1470dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_sticky_list_lock);
1471dbed73cbSSangeeta Misra return (EINTR);
1472dbed73cbSSangeeta Misra }
1473dbed73cbSSangeeta Misra }
1474dbed73cbSSangeeta Misra if ((hash = ilbs->ilbs_sticky_hash) == NULL) {
1475dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_sticky_list_lock);
1476dbed73cbSSangeeta Misra *num = 0;
1477dbed73cbSSangeeta Misra *flags |= ILB_LIST_END;
1478dbed73cbSSangeeta Misra return (0);
1479dbed73cbSSangeeta Misra }
1480dbed73cbSSangeeta Misra ilbs->ilbs_sticky_list_busy = B_TRUE;
1481dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_sticky_list_lock);
1482dbed73cbSSangeeta Misra
1483dbed73cbSSangeeta Misra if (*flags & ILB_LIST_BEGIN) {
1484dbed73cbSSangeeta Misra i = 0;
1485dbed73cbSSangeeta Misra mutex_enter(&hash[0].sticky_lock);
1486dbed73cbSSangeeta Misra curp = list_head(&hash[0].sticky_head);
1487dbed73cbSSangeeta Misra } else if (*flags & ILB_LIST_CONT) {
1488dbed73cbSSangeeta Misra if (ilbs->ilbs_sticky_list_cur == ilbs->ilbs_sticky_hash_size) {
1489dbed73cbSSangeeta Misra *num = 0;
1490dbed73cbSSangeeta Misra *flags |= ILB_LIST_END;
1491dbed73cbSSangeeta Misra goto done;
1492dbed73cbSSangeeta Misra }
1493dbed73cbSSangeeta Misra i = ilbs->ilbs_sticky_list_cur;
1494dbed73cbSSangeeta Misra mutex_enter(&hash[i].sticky_lock);
1495dbed73cbSSangeeta Misra curp = ilbs->ilbs_sticky_list_curp;
1496dbed73cbSSangeeta Misra } else {
1497dbed73cbSSangeeta Misra ret = EINVAL;
1498dbed73cbSSangeeta Misra goto done;
1499dbed73cbSSangeeta Misra }
1500dbed73cbSSangeeta Misra
1501dbed73cbSSangeeta Misra j = 0;
1502dbed73cbSSangeeta Misra while (j < *num) {
1503dbed73cbSSangeeta Misra if (curp == NULL) {
1504dbed73cbSSangeeta Misra mutex_exit(&hash[i].sticky_lock);
1505dbed73cbSSangeeta Misra if (++i == ilbs->ilbs_sticky_hash_size) {
1506dbed73cbSSangeeta Misra *flags |= ILB_LIST_END;
1507dbed73cbSSangeeta Misra break;
1508dbed73cbSSangeeta Misra }
1509dbed73cbSSangeeta Misra mutex_enter(&hash[i].sticky_lock);
1510dbed73cbSSangeeta Misra curp = list_head(&hash[i].sticky_head);
1511dbed73cbSSangeeta Misra continue;
1512dbed73cbSSangeeta Misra }
1513dbed73cbSSangeeta Misra (void) strcpy(st[j].rule_name, curp->rule_name);
1514dbed73cbSSangeeta Misra st[j].req_addr = curp->src;
1515dbed73cbSSangeeta Misra st[j].srv_addr = curp->server->iser_addr_v6;
1516dbed73cbSSangeeta Misra st[j].expiry_time = TICK_TO_MSEC(curp->expiry);
1517dbed73cbSSangeeta Misra j++;
1518dbed73cbSSangeeta Misra curp = list_next(&hash[i].sticky_head, curp);
1519dbed73cbSSangeeta Misra }
1520dbed73cbSSangeeta Misra ilbs->ilbs_sticky_list_curp = curp;
1521dbed73cbSSangeeta Misra if (j == *num)
1522dbed73cbSSangeeta Misra mutex_exit(&hash[i].sticky_lock);
1523dbed73cbSSangeeta Misra
1524dbed73cbSSangeeta Misra ilbs->ilbs_sticky_list_cur = i;
1525dbed73cbSSangeeta Misra
1526dbed73cbSSangeeta Misra *num = j;
1527dbed73cbSSangeeta Misra done:
1528dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_sticky_list_lock);
1529dbed73cbSSangeeta Misra ilbs->ilbs_sticky_list_busy = B_FALSE;
1530dbed73cbSSangeeta Misra cv_signal(&ilbs->ilbs_sticky_list_cv);
1531dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_sticky_list_lock);
1532dbed73cbSSangeeta Misra
1533dbed73cbSSangeeta Misra return (ret);
1534dbed73cbSSangeeta Misra }
1535