1dbed73cbSSangeeta Misra /* 2dbed73cbSSangeeta Misra * CDDL HEADER START 3dbed73cbSSangeeta Misra * 4dbed73cbSSangeeta Misra * The contents of this file are subject to the terms of the 5dbed73cbSSangeeta Misra * Common Development and Distribution License (the "License"). 6dbed73cbSSangeeta Misra * You may not use this file except in compliance with the License. 7dbed73cbSSangeeta Misra * 8dbed73cbSSangeeta Misra * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9dbed73cbSSangeeta Misra * or http://www.opensolaris.org/os/licensing. 10dbed73cbSSangeeta Misra * See the License for the specific language governing permissions 11dbed73cbSSangeeta Misra * and limitations under the License. 12dbed73cbSSangeeta Misra * 13dbed73cbSSangeeta Misra * When distributing Covered Code, include this CDDL HEADER in each 14dbed73cbSSangeeta Misra * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15dbed73cbSSangeeta Misra * If applicable, add the following below this CDDL HEADER, with the 16dbed73cbSSangeeta Misra * fields enclosed by brackets "[]" replaced with your own identifying 17dbed73cbSSangeeta Misra * information: Portions Copyright [yyyy] [name of copyright owner] 18dbed73cbSSangeeta Misra * 19dbed73cbSSangeeta Misra * CDDL HEADER END 20dbed73cbSSangeeta Misra */ 21dbed73cbSSangeeta Misra 22dbed73cbSSangeeta Misra /* 23dbed73cbSSangeeta Misra * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24dbed73cbSSangeeta Misra * Use is subject to license terms. 25*d17b05b6SJerry Jelinek * Copyright 2014 Joyent, Inc. All rights reserved. 26dbed73cbSSangeeta Misra */ 27dbed73cbSSangeeta Misra 28de710d24SJosef 'Jeff' Sipek #include <sys/sysmacros.h> 29dbed73cbSSangeeta Misra #include <sys/types.h> 30dbed73cbSSangeeta Misra #include <sys/conf.h> 31dbed73cbSSangeeta Misra #include <sys/time.h> 32dbed73cbSSangeeta Misra #include <sys/taskq.h> 33dbed73cbSSangeeta Misra #include <sys/cmn_err.h> 34dbed73cbSSangeeta Misra #include <sys/sdt.h> 35dbed73cbSSangeeta Misra #include <sys/atomic.h> 36dbed73cbSSangeeta Misra #include <netinet/in.h> 37dbed73cbSSangeeta Misra #include <inet/ip.h> 38dbed73cbSSangeeta Misra #include <inet/ip6.h> 39dbed73cbSSangeeta Misra #include <inet/tcp.h> 40dbed73cbSSangeeta Misra #include <inet/udp_impl.h> 41dbed73cbSSangeeta Misra #include <inet/ilb.h> 42dbed73cbSSangeeta Misra 43dbed73cbSSangeeta Misra #include "ilb_stack.h" 44dbed73cbSSangeeta Misra #include "ilb_impl.h" 45dbed73cbSSangeeta Misra #include "ilb_conn.h" 46dbed73cbSSangeeta Misra #include "ilb_nat.h" 47dbed73cbSSangeeta Misra 48dbed73cbSSangeeta Misra /* 49dbed73cbSSangeeta Misra * Timer struct for ilb_conn_t and ilb_sticky_t garbage collection 50dbed73cbSSangeeta Misra * 51dbed73cbSSangeeta Misra * start: starting index into the hash table to do gc 52dbed73cbSSangeeta Misra * end: ending index into the hash table to do gc 53dbed73cbSSangeeta Misra * ilbs: pointer to the ilb_stack_t of the IP stack 54dbed73cbSSangeeta Misra * tid_lock: mutex to protect the timer id. 55dbed73cbSSangeeta Misra * tid: timer id of the timer 56dbed73cbSSangeeta Misra */ 57dbed73cbSSangeeta Misra typedef struct ilb_timer_s { 58dbed73cbSSangeeta Misra uint32_t start; 59dbed73cbSSangeeta Misra uint32_t end; 60dbed73cbSSangeeta Misra ilb_stack_t *ilbs; 61dbed73cbSSangeeta Misra kmutex_t tid_lock; 62dbed73cbSSangeeta Misra timeout_id_t tid; 63dbed73cbSSangeeta Misra } ilb_timer_t; 64dbed73cbSSangeeta Misra 65dbed73cbSSangeeta Misra /* Hash macro for finding the index to the conn hash table */ 66dbed73cbSSangeeta Misra #define ILB_CONN_HASH(saddr, sport, daddr, dport, hash_size) \ 67dbed73cbSSangeeta Misra (((*((saddr) + 3) ^ *((daddr) + 3)) * 50653 + \ 68dbed73cbSSangeeta Misra (*((saddr) + 2) ^ *((daddr) + 2)) * 1369 + \ 69dbed73cbSSangeeta Misra (*((saddr) + 1) ^ *((daddr) + 1)) * 37 + \ 70dbed73cbSSangeeta Misra (*(saddr) ^ *(daddr)) + (sport) * 37 + (dport)) & \ 71dbed73cbSSangeeta Misra ((hash_size) - 1)) 72dbed73cbSSangeeta Misra 73dbed73cbSSangeeta Misra /* Kmem cache for the conn hash entry */ 74dbed73cbSSangeeta Misra static struct kmem_cache *ilb_conn_cache = NULL; 75dbed73cbSSangeeta Misra 76dbed73cbSSangeeta Misra /* 77dbed73cbSSangeeta Misra * There are 60 timers running to do conn cache garbage collection. Each 78dbed73cbSSangeeta Misra * gc thread is responsible for 1/60 of the conn hash table. 79dbed73cbSSangeeta Misra */ 80dbed73cbSSangeeta Misra static int ilb_conn_timer_size = 60; 81dbed73cbSSangeeta Misra 82dbed73cbSSangeeta Misra /* Each of the above gc timers wake up every 15s to do the gc. */ 83dbed73cbSSangeeta Misra static int ilb_conn_cache_timeout = 15; 84dbed73cbSSangeeta Misra 85dbed73cbSSangeeta Misra #define ILB_STICKY_HASH(saddr, rule, hash_size) \ 86dbed73cbSSangeeta Misra (((*((saddr) + 3) ^ ((rule) >> 24)) * 29791 + \ 87dbed73cbSSangeeta Misra (*((saddr) + 2) ^ ((rule) >> 16)) * 961 + \ 88dbed73cbSSangeeta Misra (*((saddr) + 1) ^ ((rule) >> 8)) * 31 + \ 89dbed73cbSSangeeta Misra (*(saddr) ^ (rule))) & ((hash_size) - 1)) 90dbed73cbSSangeeta Misra 91dbed73cbSSangeeta Misra static struct kmem_cache *ilb_sticky_cache = NULL; 92dbed73cbSSangeeta Misra 93dbed73cbSSangeeta Misra /* 94dbed73cbSSangeeta Misra * There are 60 timers running to do sticky cache garbage collection. Each 95dbed73cbSSangeeta Misra * gc thread is responsible for 1/60 of the sticky hash table. 96dbed73cbSSangeeta Misra */ 97dbed73cbSSangeeta Misra static int ilb_sticky_timer_size = 60; 98dbed73cbSSangeeta Misra 99dbed73cbSSangeeta Misra /* Each of the above gc timers wake up every 15s to do the gc. */ 100dbed73cbSSangeeta Misra static int ilb_sticky_timeout = 15; 101dbed73cbSSangeeta Misra 102dbed73cbSSangeeta Misra #define ILB_STICKY_REFRELE(s) \ 103dbed73cbSSangeeta Misra { \ 104dbed73cbSSangeeta Misra mutex_enter(&(s)->hash->sticky_lock); \ 105dbed73cbSSangeeta Misra (s)->refcnt--; \ 106d3d50737SRafael Vanoni (s)->atime = ddi_get_lbolt64(); \ 107dbed73cbSSangeeta Misra mutex_exit(&s->hash->sticky_lock); \ 108dbed73cbSSangeeta Misra } 109dbed73cbSSangeeta Misra 110dbed73cbSSangeeta Misra 111dbed73cbSSangeeta Misra static void 112dbed73cbSSangeeta Misra ilb_conn_cache_init(void) 113dbed73cbSSangeeta Misra { 114dbed73cbSSangeeta Misra ilb_conn_cache = kmem_cache_create("ilb_conn_cache", 115dbed73cbSSangeeta Misra sizeof (ilb_conn_t), 0, NULL, NULL, NULL, NULL, NULL, 116dbed73cbSSangeeta Misra ilb_kmem_flags); 117dbed73cbSSangeeta Misra } 118dbed73cbSSangeeta Misra 119dbed73cbSSangeeta Misra void 120dbed73cbSSangeeta Misra ilb_conn_cache_fini(void) 121dbed73cbSSangeeta Misra { 122dbed73cbSSangeeta Misra if (ilb_conn_cache != NULL) { 123dbed73cbSSangeeta Misra kmem_cache_destroy(ilb_conn_cache); 124dbed73cbSSangeeta Misra ilb_conn_cache = NULL; 125dbed73cbSSangeeta Misra } 126dbed73cbSSangeeta Misra } 127dbed73cbSSangeeta Misra 128dbed73cbSSangeeta Misra static void 129dbed73cbSSangeeta Misra ilb_conn_remove_common(ilb_conn_t *connp, boolean_t c2s) 130dbed73cbSSangeeta Misra { 131dbed73cbSSangeeta Misra ilb_conn_hash_t *hash; 132dbed73cbSSangeeta Misra ilb_conn_t **next, **prev; 133dbed73cbSSangeeta Misra ilb_conn_t **next_prev, **prev_next; 134dbed73cbSSangeeta Misra 135dbed73cbSSangeeta Misra if (c2s) { 136dbed73cbSSangeeta Misra hash = connp->conn_c2s_hash; 137dbed73cbSSangeeta Misra ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock)); 138dbed73cbSSangeeta Misra next = &connp->conn_c2s_next; 139dbed73cbSSangeeta Misra prev = &connp->conn_c2s_prev; 140dbed73cbSSangeeta Misra if (*next != NULL) 141dbed73cbSSangeeta Misra next_prev = &(*next)->conn_c2s_prev; 142dbed73cbSSangeeta Misra if (*prev != NULL) 143dbed73cbSSangeeta Misra prev_next = &(*prev)->conn_c2s_next; 144dbed73cbSSangeeta Misra } else { 145dbed73cbSSangeeta Misra hash = connp->conn_s2c_hash; 146dbed73cbSSangeeta Misra ASSERT(MUTEX_HELD(&hash->ilb_conn_hash_lock)); 147dbed73cbSSangeeta Misra next = &connp->conn_s2c_next; 148dbed73cbSSangeeta Misra prev = &connp->conn_s2c_prev; 149dbed73cbSSangeeta Misra if (*next != NULL) 150dbed73cbSSangeeta Misra next_prev = &(*next)->conn_s2c_prev; 151dbed73cbSSangeeta Misra if (*prev != NULL) 152dbed73cbSSangeeta Misra prev_next = &(*prev)->conn_s2c_next; 153dbed73cbSSangeeta Misra } 154dbed73cbSSangeeta Misra 155dbed73cbSSangeeta Misra if (hash->ilb_connp == connp) { 156dbed73cbSSangeeta Misra hash->ilb_connp = *next; 157dbed73cbSSangeeta Misra if (*next != NULL) 158dbed73cbSSangeeta Misra *next_prev = NULL; 159dbed73cbSSangeeta Misra } else { 160dbed73cbSSangeeta Misra if (*prev != NULL) 161dbed73cbSSangeeta Misra *prev_next = *next; 162dbed73cbSSangeeta Misra if (*next != NULL) 163dbed73cbSSangeeta Misra *next_prev = *prev; 164dbed73cbSSangeeta Misra } 165dbed73cbSSangeeta Misra ASSERT(hash->ilb_conn_cnt > 0); 166dbed73cbSSangeeta Misra hash->ilb_conn_cnt--; 167dbed73cbSSangeeta Misra 168dbed73cbSSangeeta Misra *next = NULL; 169dbed73cbSSangeeta Misra *prev = NULL; 170dbed73cbSSangeeta Misra } 171dbed73cbSSangeeta Misra 172dbed73cbSSangeeta Misra static void 173dbed73cbSSangeeta Misra ilb_conn_remove(ilb_conn_t *connp) 174dbed73cbSSangeeta Misra { 175dbed73cbSSangeeta Misra ASSERT(MUTEX_HELD(&connp->conn_c2s_hash->ilb_conn_hash_lock)); 176dbed73cbSSangeeta Misra ilb_conn_remove_common(connp, B_TRUE); 177dbed73cbSSangeeta Misra ASSERT(MUTEX_HELD(&connp->conn_s2c_hash->ilb_conn_hash_lock)); 178dbed73cbSSangeeta Misra ilb_conn_remove_common(connp, B_FALSE); 179dbed73cbSSangeeta Misra 180dbed73cbSSangeeta Misra if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) { 181dbed73cbSSangeeta Misra in_port_t port; 182dbed73cbSSangeeta Misra 183dbed73cbSSangeeta Misra port = ntohs(connp->conn_rule_cache.info.nat_sport); 184dbed73cbSSangeeta Misra vmem_free(connp->conn_rule_cache.info.src_ent->nse_port_arena, 185dbed73cbSSangeeta Misra (void *)(uintptr_t)port, 1); 186dbed73cbSSangeeta Misra } 187dbed73cbSSangeeta Misra 188dbed73cbSSangeeta Misra if (connp->conn_sticky != NULL) 189dbed73cbSSangeeta Misra ILB_STICKY_REFRELE(connp->conn_sticky); 190dbed73cbSSangeeta Misra ILB_SERVER_REFRELE(connp->conn_server); 191dbed73cbSSangeeta Misra kmem_cache_free(ilb_conn_cache, connp); 192dbed73cbSSangeeta Misra } 193dbed73cbSSangeeta Misra 194dbed73cbSSangeeta Misra /* 195dbed73cbSSangeeta Misra * Routine to do periodic garbage collection of conn hash entries. When 196dbed73cbSSangeeta Misra * a conn hash timer fires, it dispatches a taskq to call this function 197dbed73cbSSangeeta Misra * to do the gc. Note that each taskq is responisble for a portion of 198dbed73cbSSangeeta Misra * the table. The portion is stored in timer->start, timer->end. 199dbed73cbSSangeeta Misra */ 200dbed73cbSSangeeta Misra static void 201dbed73cbSSangeeta Misra ilb_conn_cleanup(void *arg) 202dbed73cbSSangeeta Misra { 203dbed73cbSSangeeta Misra ilb_timer_t *timer = (ilb_timer_t *)arg; 204dbed73cbSSangeeta Misra uint32_t i; 205dbed73cbSSangeeta Misra ilb_stack_t *ilbs; 206dbed73cbSSangeeta Misra ilb_conn_hash_t *c2s_hash, *s2c_hash; 207dbed73cbSSangeeta Misra ilb_conn_t *connp, *nxt_connp; 208dbed73cbSSangeeta Misra int64_t now; 209dbed73cbSSangeeta Misra int64_t expiry; 210dbed73cbSSangeeta Misra boolean_t die_now; 211dbed73cbSSangeeta Misra 212dbed73cbSSangeeta Misra ilbs = timer->ilbs; 213dbed73cbSSangeeta Misra c2s_hash = ilbs->ilbs_c2s_conn_hash; 214dbed73cbSSangeeta Misra ASSERT(c2s_hash != NULL); 215dbed73cbSSangeeta Misra 216d3d50737SRafael Vanoni now = ddi_get_lbolt64(); 217dbed73cbSSangeeta Misra for (i = timer->start; i < timer->end; i++) { 218dbed73cbSSangeeta Misra mutex_enter(&c2s_hash[i].ilb_conn_hash_lock); 219dbed73cbSSangeeta Misra if ((connp = c2s_hash[i].ilb_connp) == NULL) { 220dbed73cbSSangeeta Misra ASSERT(c2s_hash[i].ilb_conn_cnt == 0); 221dbed73cbSSangeeta Misra mutex_exit(&c2s_hash[i].ilb_conn_hash_lock); 222dbed73cbSSangeeta Misra continue; 223dbed73cbSSangeeta Misra } 224dbed73cbSSangeeta Misra do { 225dbed73cbSSangeeta Misra ASSERT(c2s_hash[i].ilb_conn_cnt > 0); 226dbed73cbSSangeeta Misra ASSERT(connp->conn_c2s_hash == &c2s_hash[i]); 227dbed73cbSSangeeta Misra nxt_connp = connp->conn_c2s_next; 228dbed73cbSSangeeta Misra expiry = now - SEC_TO_TICK(connp->conn_expiry); 229dbed73cbSSangeeta Misra if (connp->conn_server->iser_die_time != 0 && 230dbed73cbSSangeeta Misra connp->conn_server->iser_die_time < now) 231dbed73cbSSangeeta Misra die_now = B_TRUE; 232dbed73cbSSangeeta Misra else 233dbed73cbSSangeeta Misra die_now = B_FALSE; 234dbed73cbSSangeeta Misra s2c_hash = connp->conn_s2c_hash; 235dbed73cbSSangeeta Misra mutex_enter(&s2c_hash->ilb_conn_hash_lock); 236dbed73cbSSangeeta Misra 237dbed73cbSSangeeta Misra if (connp->conn_gc || die_now || 238dbed73cbSSangeeta Misra (connp->conn_c2s_atime < expiry && 239dbed73cbSSangeeta Misra connp->conn_s2c_atime < expiry)) { 240dbed73cbSSangeeta Misra /* Need to update the nat list cur_connp */ 241dbed73cbSSangeeta Misra if (connp == ilbs->ilbs_conn_list_connp) { 242dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_connp = 243dbed73cbSSangeeta Misra connp->conn_c2s_next; 244dbed73cbSSangeeta Misra } 245dbed73cbSSangeeta Misra ilb_conn_remove(connp); 246dbed73cbSSangeeta Misra goto nxt_connp; 247dbed73cbSSangeeta Misra } 248dbed73cbSSangeeta Misra 249dbed73cbSSangeeta Misra if (connp->conn_l4 != IPPROTO_TCP) 250dbed73cbSSangeeta Misra goto nxt_connp; 251dbed73cbSSangeeta Misra 252dbed73cbSSangeeta Misra /* Update and check TCP related conn info */ 253dbed73cbSSangeeta Misra if (connp->conn_c2s_tcp_fin_sent && 254dbed73cbSSangeeta Misra SEQ_GT(connp->conn_s2c_tcp_ack, 255dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fss)) { 256dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fin_acked = B_TRUE; 257dbed73cbSSangeeta Misra } 258dbed73cbSSangeeta Misra if (connp->conn_s2c_tcp_fin_sent && 259dbed73cbSSangeeta Misra SEQ_GT(connp->conn_c2s_tcp_ack, 260dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fss)) { 261dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_acked = B_TRUE; 262dbed73cbSSangeeta Misra } 263dbed73cbSSangeeta Misra if (connp->conn_c2s_tcp_fin_acked && 264dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_acked) { 265dbed73cbSSangeeta Misra ilb_conn_remove(connp); 266dbed73cbSSangeeta Misra } 267dbed73cbSSangeeta Misra nxt_connp: 268dbed73cbSSangeeta Misra mutex_exit(&s2c_hash->ilb_conn_hash_lock); 269dbed73cbSSangeeta Misra connp = nxt_connp; 270dbed73cbSSangeeta Misra } while (connp != NULL); 271dbed73cbSSangeeta Misra mutex_exit(&c2s_hash[i].ilb_conn_hash_lock); 272dbed73cbSSangeeta Misra } 273dbed73cbSSangeeta Misra } 274dbed73cbSSangeeta Misra 275dbed73cbSSangeeta Misra /* Conn hash timer routine. It dispatches a taskq and restart the timer */ 276dbed73cbSSangeeta Misra static void 277dbed73cbSSangeeta Misra ilb_conn_timer(void *arg) 278dbed73cbSSangeeta Misra { 279dbed73cbSSangeeta Misra ilb_timer_t *timer = (ilb_timer_t *)arg; 280dbed73cbSSangeeta Misra 281dbed73cbSSangeeta Misra (void) taskq_dispatch(timer->ilbs->ilbs_conn_taskq, ilb_conn_cleanup, 282dbed73cbSSangeeta Misra arg, TQ_SLEEP); 283dbed73cbSSangeeta Misra mutex_enter(&timer->tid_lock); 284dbed73cbSSangeeta Misra if (timer->tid == 0) { 285dbed73cbSSangeeta Misra mutex_exit(&timer->tid_lock); 286dbed73cbSSangeeta Misra } else { 287dbed73cbSSangeeta Misra timer->tid = timeout(ilb_conn_timer, arg, 288dbed73cbSSangeeta Misra SEC_TO_TICK(ilb_conn_cache_timeout)); 289dbed73cbSSangeeta Misra mutex_exit(&timer->tid_lock); 290dbed73cbSSangeeta Misra } 291dbed73cbSSangeeta Misra } 292dbed73cbSSangeeta Misra 293dbed73cbSSangeeta Misra void 294dbed73cbSSangeeta Misra ilb_conn_hash_init(ilb_stack_t *ilbs) 295dbed73cbSSangeeta Misra { 296dbed73cbSSangeeta Misra extern pri_t minclsyspri; 297dbed73cbSSangeeta Misra int i, part; 298dbed73cbSSangeeta Misra ilb_timer_t *tm; 299dbed73cbSSangeeta Misra char tq_name[TASKQ_NAMELEN]; 300dbed73cbSSangeeta Misra 301dbed73cbSSangeeta Misra /* 302dbed73cbSSangeeta Misra * If ilbs->ilbs_conn_hash_size is not a power of 2, bump it up to 303dbed73cbSSangeeta Misra * the next power of 2. 304dbed73cbSSangeeta Misra */ 305de710d24SJosef 'Jeff' Sipek if (!ISP2(ilbs->ilbs_conn_hash_size)) { 306dbed73cbSSangeeta Misra for (i = 0; i < 31; i++) { 307dbed73cbSSangeeta Misra if (ilbs->ilbs_conn_hash_size < (1 << i)) 308dbed73cbSSangeeta Misra break; 309dbed73cbSSangeeta Misra } 310dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size = 1 << i; 311dbed73cbSSangeeta Misra } 312dbed73cbSSangeeta Misra 313dbed73cbSSangeeta Misra /* 314dbed73cbSSangeeta Misra * Can sleep since this should be called when a rule is being added, 315dbed73cbSSangeeta Misra * hence we are not in interrupt context. 316dbed73cbSSangeeta Misra */ 317dbed73cbSSangeeta Misra ilbs->ilbs_c2s_conn_hash = kmem_zalloc(sizeof (ilb_conn_hash_t) * 318dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size, KM_SLEEP); 319dbed73cbSSangeeta Misra ilbs->ilbs_s2c_conn_hash = kmem_zalloc(sizeof (ilb_conn_hash_t) * 320dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size, KM_SLEEP); 321dbed73cbSSangeeta Misra 322dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) { 323dbed73cbSSangeeta Misra mutex_init(&ilbs->ilbs_c2s_conn_hash[i].ilb_conn_hash_lock, 324dbed73cbSSangeeta Misra NULL, MUTEX_DEFAULT, NULL); 325dbed73cbSSangeeta Misra } 326dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) { 327dbed73cbSSangeeta Misra mutex_init(&ilbs->ilbs_s2c_conn_hash[i].ilb_conn_hash_lock, 328dbed73cbSSangeeta Misra NULL, MUTEX_DEFAULT, NULL); 329dbed73cbSSangeeta Misra } 330dbed73cbSSangeeta Misra 331dbed73cbSSangeeta Misra if (ilb_conn_cache == NULL) 332dbed73cbSSangeeta Misra ilb_conn_cache_init(); 333dbed73cbSSangeeta Misra 334dbed73cbSSangeeta Misra (void) snprintf(tq_name, sizeof (tq_name), "ilb_conn_taskq_%p", 3356e0672acSSangeeta Misra (void *)ilbs->ilbs_netstack); 336dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_conn_taskq == NULL); 337dbed73cbSSangeeta Misra ilbs->ilbs_conn_taskq = taskq_create(tq_name, 338dbed73cbSSangeeta Misra ilb_conn_timer_size * 2, minclsyspri, ilb_conn_timer_size, 339dbed73cbSSangeeta Misra ilb_conn_timer_size * 2, TASKQ_PREPOPULATE|TASKQ_DYNAMIC); 340dbed73cbSSangeeta Misra 341dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_conn_timer_list == NULL); 342dbed73cbSSangeeta Misra ilbs->ilbs_conn_timer_list = kmem_zalloc(sizeof (ilb_timer_t) * 343dbed73cbSSangeeta Misra ilb_conn_timer_size, KM_SLEEP); 344dbed73cbSSangeeta Misra 345dbed73cbSSangeeta Misra /* 346dbed73cbSSangeeta Misra * The hash table is divided in equal partition for those timers 347dbed73cbSSangeeta Misra * to do garbage collection. 348dbed73cbSSangeeta Misra */ 349dbed73cbSSangeeta Misra part = ilbs->ilbs_conn_hash_size / ilb_conn_timer_size + 1; 350dbed73cbSSangeeta Misra for (i = 0; i < ilb_conn_timer_size; i++) { 351dbed73cbSSangeeta Misra tm = ilbs->ilbs_conn_timer_list + i; 352dbed73cbSSangeeta Misra tm->start = i * part; 353dbed73cbSSangeeta Misra tm->end = i * part + part; 354dbed73cbSSangeeta Misra if (tm->end > ilbs->ilbs_conn_hash_size) 355dbed73cbSSangeeta Misra tm->end = ilbs->ilbs_conn_hash_size; 356dbed73cbSSangeeta Misra tm->ilbs = ilbs; 357dbed73cbSSangeeta Misra mutex_init(&tm->tid_lock, NULL, MUTEX_DEFAULT, NULL); 358dbed73cbSSangeeta Misra /* Spread out the starting execution time of all the timers. */ 359dbed73cbSSangeeta Misra tm->tid = timeout(ilb_conn_timer, tm, 360dbed73cbSSangeeta Misra SEC_TO_TICK(ilb_conn_cache_timeout + i)); 361dbed73cbSSangeeta Misra } 362dbed73cbSSangeeta Misra } 363dbed73cbSSangeeta Misra 364dbed73cbSSangeeta Misra void 365dbed73cbSSangeeta Misra ilb_conn_hash_fini(ilb_stack_t *ilbs) 366dbed73cbSSangeeta Misra { 367dbed73cbSSangeeta Misra uint32_t i; 368dbed73cbSSangeeta Misra ilb_conn_t *connp; 369*d17b05b6SJerry Jelinek ilb_conn_hash_t *hash; 370dbed73cbSSangeeta Misra 371dbed73cbSSangeeta Misra if (ilbs->ilbs_c2s_conn_hash == NULL) { 372dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_s2c_conn_hash == NULL); 373dbed73cbSSangeeta Misra return; 374dbed73cbSSangeeta Misra } 375dbed73cbSSangeeta Misra 376dbed73cbSSangeeta Misra /* Stop all the timers first. */ 377dbed73cbSSangeeta Misra for (i = 0; i < ilb_conn_timer_size; i++) { 378dbed73cbSSangeeta Misra timeout_id_t tid; 379dbed73cbSSangeeta Misra 380dbed73cbSSangeeta Misra /* Setting tid to 0 tells the timer handler not to restart. */ 381dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_conn_timer_list[i].tid_lock); 382dbed73cbSSangeeta Misra tid = ilbs->ilbs_conn_timer_list[i].tid; 383dbed73cbSSangeeta Misra ilbs->ilbs_conn_timer_list[i].tid = 0; 384dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_conn_timer_list[i].tid_lock); 385dbed73cbSSangeeta Misra (void) untimeout(tid); 386dbed73cbSSangeeta Misra } 387dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_conn_timer_list, sizeof (ilb_timer_t) * 388dbed73cbSSangeeta Misra ilb_conn_timer_size); 389dbed73cbSSangeeta Misra taskq_destroy(ilbs->ilbs_conn_taskq); 390dbed73cbSSangeeta Misra ilbs->ilbs_conn_taskq = NULL; 391dbed73cbSSangeeta Misra 392dbed73cbSSangeeta Misra /* Then remove all the conns. */ 393*d17b05b6SJerry Jelinek hash = ilbs->ilbs_s2c_conn_hash; 394dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_conn_hash_size; i++) { 395*d17b05b6SJerry Jelinek while ((connp = hash[i].ilb_connp) != NULL) { 396*d17b05b6SJerry Jelinek hash[i].ilb_connp = connp->conn_s2c_next; 397dbed73cbSSangeeta Misra ILB_SERVER_REFRELE(connp->conn_server); 398dbed73cbSSangeeta Misra if (connp->conn_rule_cache.topo == ILB_TOPO_IMPL_NAT) { 399dbed73cbSSangeeta Misra ilb_nat_src_entry_t *ent; 400dbed73cbSSangeeta Misra in_port_t port; 401dbed73cbSSangeeta Misra 402dbed73cbSSangeeta Misra /* 403dbed73cbSSangeeta Misra * src_ent will be freed in ilb_nat_src_fini(). 404dbed73cbSSangeeta Misra */ 405dbed73cbSSangeeta Misra port = ntohs( 406dbed73cbSSangeeta Misra connp->conn_rule_cache.info.nat_sport); 407dbed73cbSSangeeta Misra ent = connp->conn_rule_cache.info.src_ent; 408dbed73cbSSangeeta Misra vmem_free(ent->nse_port_arena, 409dbed73cbSSangeeta Misra (void *)(uintptr_t)port, 1); 410dbed73cbSSangeeta Misra } 411dbed73cbSSangeeta Misra kmem_cache_free(ilb_conn_cache, connp); 412dbed73cbSSangeeta Misra } 413dbed73cbSSangeeta Misra } 414dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_c2s_conn_hash, sizeof (ilb_conn_hash_t) * 415dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size); 416dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_s2c_conn_hash, sizeof (ilb_conn_hash_t) * 417dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size); 418dbed73cbSSangeeta Misra } 419dbed73cbSSangeeta Misra 420dbed73cbSSangeeta Misra /* 421dbed73cbSSangeeta Misra * Internet checksum adjustment calculation routines. We pre-calculate 422dbed73cbSSangeeta Misra * checksum adjustment so that we don't need to compute the checksum on 423dbed73cbSSangeeta Misra * the whole packet when we change address/port in the packet. 424dbed73cbSSangeeta Misra */ 425dbed73cbSSangeeta Misra 426dbed73cbSSangeeta Misra static void 427dbed73cbSSangeeta Misra hnat_cksum_v4(uint16_t *oaddr, uint16_t *naddr, in_port_t old_port, 428dbed73cbSSangeeta Misra in_port_t new_port, uint32_t *adj_sum) 429dbed73cbSSangeeta Misra { 430dbed73cbSSangeeta Misra uint32_t sum; 431dbed73cbSSangeeta Misra 432dbed73cbSSangeeta Misra sum = *oaddr + *(oaddr + 1) + old_port; 433dbed73cbSSangeeta Misra while ((sum >> 16) != 0) 434dbed73cbSSangeeta Misra sum = (sum & 0xffff) + (sum >> 16); 435dbed73cbSSangeeta Misra *adj_sum = (uint16_t)~sum + *naddr + *(naddr + 1) + new_port; 436dbed73cbSSangeeta Misra } 437dbed73cbSSangeeta Misra 438dbed73cbSSangeeta Misra static void 439dbed73cbSSangeeta Misra hnat_cksum_v6(uint16_t *oaddr, uint16_t *naddr, in_port_t old_port, 440dbed73cbSSangeeta Misra in_port_t new_port, uint32_t *adj_sum) 441dbed73cbSSangeeta Misra { 442dbed73cbSSangeeta Misra uint32_t sum = 0; 443dbed73cbSSangeeta Misra 444dbed73cbSSangeeta Misra sum = *oaddr + *(oaddr + 1) + *(oaddr + 2) + *(oaddr + 3) + 445dbed73cbSSangeeta Misra *(oaddr + 4) + *(oaddr + 5) + *(oaddr + 6) + *(oaddr + 7) + 446dbed73cbSSangeeta Misra old_port; 447dbed73cbSSangeeta Misra while ((sum >> 16) != 0) 448dbed73cbSSangeeta Misra sum = (sum & 0xffff) + (sum >> 16); 449dbed73cbSSangeeta Misra *adj_sum = (uint16_t)~sum + *naddr + *(naddr + 1) + 450dbed73cbSSangeeta Misra *(naddr + 2) + *(naddr + 3) + *(naddr + 4) + *(naddr + 5) + 451dbed73cbSSangeeta Misra *(naddr + 6) + *(naddr + 7) + new_port; 452dbed73cbSSangeeta Misra } 453dbed73cbSSangeeta Misra 454dbed73cbSSangeeta Misra static void 455dbed73cbSSangeeta Misra fnat_cksum_v4(uint16_t *oaddr1, uint16_t *oaddr2, uint16_t *naddr1, 456dbed73cbSSangeeta Misra uint16_t *naddr2, in_port_t old_port1, in_port_t old_port2, 457dbed73cbSSangeeta Misra in_port_t new_port1, in_port_t new_port2, uint32_t *adj_sum) 458dbed73cbSSangeeta Misra { 459dbed73cbSSangeeta Misra uint32_t sum; 460dbed73cbSSangeeta Misra 461dbed73cbSSangeeta Misra sum = *oaddr1 + *(oaddr1 + 1) + old_port1 + *oaddr2 + *(oaddr2 + 1) + 462dbed73cbSSangeeta Misra old_port2; 463dbed73cbSSangeeta Misra while ((sum >> 16) != 0) 464dbed73cbSSangeeta Misra sum = (sum & 0xffff) + (sum >> 16); 465dbed73cbSSangeeta Misra *adj_sum = (uint16_t)~sum + *naddr1 + *(naddr1 + 1) + new_port1 + 466dbed73cbSSangeeta Misra *naddr2 + *(naddr2 + 1) + new_port2; 467dbed73cbSSangeeta Misra } 468dbed73cbSSangeeta Misra 469dbed73cbSSangeeta Misra static void 470dbed73cbSSangeeta Misra fnat_cksum_v6(uint16_t *oaddr1, uint16_t *oaddr2, uint16_t *naddr1, 471dbed73cbSSangeeta Misra uint16_t *naddr2, in_port_t old_port1, in_port_t old_port2, 472dbed73cbSSangeeta Misra in_port_t new_port1, in_port_t new_port2, uint32_t *adj_sum) 473dbed73cbSSangeeta Misra { 474dbed73cbSSangeeta Misra uint32_t sum = 0; 475dbed73cbSSangeeta Misra 476dbed73cbSSangeeta Misra sum = *oaddr1 + *(oaddr1 + 1) + *(oaddr1 + 2) + *(oaddr1 + 3) + 477dbed73cbSSangeeta Misra *(oaddr1 + 4) + *(oaddr1 + 5) + *(oaddr1 + 6) + *(oaddr1 + 7) + 478dbed73cbSSangeeta Misra old_port1; 479dbed73cbSSangeeta Misra sum += *oaddr2 + *(oaddr2 + 1) + *(oaddr2 + 2) + *(oaddr2 + 3) + 480dbed73cbSSangeeta Misra *(oaddr2 + 4) + *(oaddr2 + 5) + *(oaddr2 + 6) + *(oaddr2 + 7) + 481dbed73cbSSangeeta Misra old_port2; 482dbed73cbSSangeeta Misra while ((sum >> 16) != 0) 483dbed73cbSSangeeta Misra sum = (sum & 0xffff) + (sum >> 16); 484dbed73cbSSangeeta Misra sum = (uint16_t)~sum + *naddr1 + *(naddr1 + 1) + *(naddr1 + 2) + 485dbed73cbSSangeeta Misra *(naddr1 + 3) + *(naddr1 + 4) + *(naddr1 + 5) + *(naddr1 + 6) + 486dbed73cbSSangeeta Misra *(naddr1 + 7) + new_port1; 487dbed73cbSSangeeta Misra *adj_sum = sum + *naddr2 + *(naddr2 + 1) + *(naddr2 + 2) + 488dbed73cbSSangeeta Misra *(naddr2 + 3) + *(naddr2 + 4) + *(naddr2 + 5) + *(naddr2 + 6) + 489dbed73cbSSangeeta Misra *(naddr2 + 7) + new_port2; 490dbed73cbSSangeeta Misra } 491dbed73cbSSangeeta Misra 492dbed73cbSSangeeta Misra /* 493dbed73cbSSangeeta Misra * Add a conn hash entry to the tables. Note that a conn hash entry 494dbed73cbSSangeeta Misra * (ilb_conn_t) contains info on both directions. And there are two hash 495dbed73cbSSangeeta Misra * tables, one for client to server and the other for server to client. 496dbed73cbSSangeeta Misra * So the same entry is added to both tables and can be ccessed by two 497dbed73cbSSangeeta Misra * thread simultaneously. But each thread will only access data on one 498dbed73cbSSangeeta Misra * direction, so there is no conflict. 499dbed73cbSSangeeta Misra */ 500dbed73cbSSangeeta Misra int 501dbed73cbSSangeeta Misra ilb_conn_add(ilb_stack_t *ilbs, ilb_rule_t *rule, ilb_server_t *server, 502dbed73cbSSangeeta Misra in6_addr_t *src, in_port_t sport, in6_addr_t *dst, in_port_t dport, 503dbed73cbSSangeeta Misra ilb_nat_info_t *info, uint32_t *ip_sum, uint32_t *tp_sum, ilb_sticky_t *s) 504dbed73cbSSangeeta Misra { 505dbed73cbSSangeeta Misra ilb_conn_t *connp; 506dbed73cbSSangeeta Misra ilb_conn_hash_t *hash; 507dbed73cbSSangeeta Misra int i; 508dbed73cbSSangeeta Misra 509dbed73cbSSangeeta Misra connp = kmem_cache_alloc(ilb_conn_cache, KM_NOSLEEP); 510dbed73cbSSangeeta Misra if (connp == NULL) { 511dbed73cbSSangeeta Misra if (s != NULL) { 512dbed73cbSSangeeta Misra if (rule->ir_topo == ILB_TOPO_IMPL_NAT) { 513dbed73cbSSangeeta Misra ilb_nat_src_entry_t **entry; 514dbed73cbSSangeeta Misra 515dbed73cbSSangeeta Misra entry = s->server->iser_nat_src->src_list; 516dbed73cbSSangeeta Misra vmem_free(entry[s->nat_src_idx]->nse_port_arena, 517dbed73cbSSangeeta Misra (void *)(uintptr_t)ntohs(info->nat_sport), 518dbed73cbSSangeeta Misra 1); 519dbed73cbSSangeeta Misra } 520dbed73cbSSangeeta Misra ILB_STICKY_REFRELE(s); 521dbed73cbSSangeeta Misra } 522dbed73cbSSangeeta Misra return (ENOMEM); 523dbed73cbSSangeeta Misra } 524dbed73cbSSangeeta Misra 525dbed73cbSSangeeta Misra connp->conn_l4 = rule->ir_proto; 526dbed73cbSSangeeta Misra 527dbed73cbSSangeeta Misra connp->conn_server = server; 528dbed73cbSSangeeta Misra ILB_SERVER_REFHOLD(server); 529dbed73cbSSangeeta Misra connp->conn_sticky = s; 530dbed73cbSSangeeta Misra 531dbed73cbSSangeeta Misra connp->conn_rule_cache.topo = rule->ir_topo; 532dbed73cbSSangeeta Misra connp->conn_rule_cache.info = *info; 533dbed73cbSSangeeta Misra 534dbed73cbSSangeeta Misra connp->conn_gc = B_FALSE; 535dbed73cbSSangeeta Misra 536dbed73cbSSangeeta Misra connp->conn_expiry = rule->ir_nat_expiry; 537d3d50737SRafael Vanoni connp->conn_cr_time = ddi_get_lbolt64(); 538dbed73cbSSangeeta Misra 539dbed73cbSSangeeta Misra /* Client to server info. */ 540dbed73cbSSangeeta Misra connp->conn_c2s_saddr = *src; 541dbed73cbSSangeeta Misra connp->conn_c2s_sport = sport; 542dbed73cbSSangeeta Misra connp->conn_c2s_daddr = *dst; 543dbed73cbSSangeeta Misra connp->conn_c2s_dport = dport; 544dbed73cbSSangeeta Misra 545d3d50737SRafael Vanoni connp->conn_c2s_atime = ddi_get_lbolt64(); 546dbed73cbSSangeeta Misra /* The packet ths triggers this creation should be counted */ 547dbed73cbSSangeeta Misra connp->conn_c2s_pkt_cnt = 1; 548dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fin_sent = B_FALSE; 549dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fin_acked = B_FALSE; 550dbed73cbSSangeeta Misra 551dbed73cbSSangeeta Misra /* Server to client info, before NAT */ 552dbed73cbSSangeeta Misra switch (rule->ir_topo) { 553dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_HALF_NAT: 554dbed73cbSSangeeta Misra connp->conn_s2c_saddr = info->nat_dst; 555dbed73cbSSangeeta Misra connp->conn_s2c_sport = info->nat_dport; 556dbed73cbSSangeeta Misra connp->conn_s2c_daddr = *src; 557dbed73cbSSangeeta Misra connp->conn_s2c_dport = sport; 558dbed73cbSSangeeta Misra 559dbed73cbSSangeeta Misra /* Pre-calculate checksum changes for both directions */ 560dbed73cbSSangeeta Misra if (rule->ir_ipver == IPPROTO_IP) { 561dbed73cbSSangeeta Misra hnat_cksum_v4((uint16_t *)&dst->s6_addr32[3], 562dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3], 0, 0, 563dbed73cbSSangeeta Misra &connp->conn_c2s_ip_sum); 564dbed73cbSSangeeta Misra hnat_cksum_v4((uint16_t *)&dst->s6_addr32[3], 565dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3], dport, 566dbed73cbSSangeeta Misra info->nat_dport, &connp->conn_c2s_tp_sum); 567dbed73cbSSangeeta Misra *ip_sum = connp->conn_c2s_ip_sum; 568dbed73cbSSangeeta Misra *tp_sum = connp->conn_c2s_tp_sum; 569dbed73cbSSangeeta Misra 570dbed73cbSSangeeta Misra hnat_cksum_v4( 571dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3], 572dbed73cbSSangeeta Misra (uint16_t *)&dst->s6_addr32[3], 0, 0, 573dbed73cbSSangeeta Misra &connp->conn_s2c_ip_sum); 574dbed73cbSSangeeta Misra hnat_cksum_v4( 575dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3], 576dbed73cbSSangeeta Misra (uint16_t *)&dst->s6_addr32[3], 577dbed73cbSSangeeta Misra info->nat_dport, dport, 578dbed73cbSSangeeta Misra &connp->conn_s2c_tp_sum); 579dbed73cbSSangeeta Misra } else { 580dbed73cbSSangeeta Misra connp->conn_c2s_ip_sum = 0; 581dbed73cbSSangeeta Misra hnat_cksum_v6((uint16_t *)dst, 582dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst, dport, 583dbed73cbSSangeeta Misra info->nat_dport, &connp->conn_c2s_tp_sum); 584dbed73cbSSangeeta Misra *ip_sum = 0; 585dbed73cbSSangeeta Misra *tp_sum = connp->conn_c2s_tp_sum; 586dbed73cbSSangeeta Misra 587dbed73cbSSangeeta Misra connp->conn_s2c_ip_sum = 0; 588dbed73cbSSangeeta Misra hnat_cksum_v6((uint16_t *)&info->nat_dst, 589dbed73cbSSangeeta Misra (uint16_t *)dst, info->nat_dport, dport, 590dbed73cbSSangeeta Misra &connp->conn_s2c_tp_sum); 591dbed73cbSSangeeta Misra } 592dbed73cbSSangeeta Misra break; 593dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_NAT: 594dbed73cbSSangeeta Misra connp->conn_s2c_saddr = info->nat_dst; 595dbed73cbSSangeeta Misra connp->conn_s2c_sport = info->nat_dport; 596dbed73cbSSangeeta Misra connp->conn_s2c_daddr = info->nat_src; 597dbed73cbSSangeeta Misra connp->conn_s2c_dport = info->nat_sport; 598dbed73cbSSangeeta Misra 599dbed73cbSSangeeta Misra if (rule->ir_ipver == IPPROTO_IP) { 600dbed73cbSSangeeta Misra fnat_cksum_v4((uint16_t *)&src->s6_addr32[3], 601dbed73cbSSangeeta Misra (uint16_t *)&dst->s6_addr32[3], 602dbed73cbSSangeeta Misra (uint16_t *)&info->nat_src.s6_addr32[3], 603dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3], 604dbed73cbSSangeeta Misra 0, 0, 0, 0, &connp->conn_c2s_ip_sum); 605dbed73cbSSangeeta Misra fnat_cksum_v4((uint16_t *)&src->s6_addr32[3], 606dbed73cbSSangeeta Misra (uint16_t *)&dst->s6_addr32[3], 607dbed73cbSSangeeta Misra (uint16_t *)&info->nat_src.s6_addr32[3], 608dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3], 609dbed73cbSSangeeta Misra sport, dport, info->nat_sport, 610dbed73cbSSangeeta Misra info->nat_dport, &connp->conn_c2s_tp_sum); 611dbed73cbSSangeeta Misra *ip_sum = connp->conn_c2s_ip_sum; 612dbed73cbSSangeeta Misra *tp_sum = connp->conn_c2s_tp_sum; 613dbed73cbSSangeeta Misra 614dbed73cbSSangeeta Misra fnat_cksum_v4( 615dbed73cbSSangeeta Misra (uint16_t *)&info->nat_src.s6_addr32[3], 616dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3], 617dbed73cbSSangeeta Misra (uint16_t *)&src->s6_addr32[3], 618dbed73cbSSangeeta Misra (uint16_t *)&dst->s6_addr32[3], 619dbed73cbSSangeeta Misra 0, 0, 0, 0, &connp->conn_s2c_ip_sum); 620dbed73cbSSangeeta Misra fnat_cksum_v4( 621dbed73cbSSangeeta Misra (uint16_t *)&info->nat_src.s6_addr32[3], 622dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst.s6_addr32[3], 623dbed73cbSSangeeta Misra (uint16_t *)&src->s6_addr32[3], 624dbed73cbSSangeeta Misra (uint16_t *)&dst->s6_addr32[3], 625dbed73cbSSangeeta Misra info->nat_sport, info->nat_dport, 626dbed73cbSSangeeta Misra sport, dport, &connp->conn_s2c_tp_sum); 627dbed73cbSSangeeta Misra } else { 628dbed73cbSSangeeta Misra fnat_cksum_v6((uint16_t *)src, (uint16_t *)dst, 629dbed73cbSSangeeta Misra (uint16_t *)&info->nat_src, 630dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst, 631dbed73cbSSangeeta Misra sport, dport, info->nat_sport, 632dbed73cbSSangeeta Misra info->nat_dport, &connp->conn_c2s_tp_sum); 633dbed73cbSSangeeta Misra connp->conn_c2s_ip_sum = 0; 634dbed73cbSSangeeta Misra *ip_sum = 0; 635dbed73cbSSangeeta Misra *tp_sum = connp->conn_c2s_tp_sum; 636dbed73cbSSangeeta Misra 637dbed73cbSSangeeta Misra fnat_cksum_v6((uint16_t *)&info->nat_src, 638dbed73cbSSangeeta Misra (uint16_t *)&info->nat_dst, (uint16_t *)src, 639dbed73cbSSangeeta Misra (uint16_t *)dst, info->nat_sport, 640dbed73cbSSangeeta Misra info->nat_dport, sport, dport, 641dbed73cbSSangeeta Misra &connp->conn_s2c_tp_sum); 642dbed73cbSSangeeta Misra connp->conn_s2c_ip_sum = 0; 643dbed73cbSSangeeta Misra } 644dbed73cbSSangeeta Misra break; 645dbed73cbSSangeeta Misra } 646dbed73cbSSangeeta Misra 647d3d50737SRafael Vanoni connp->conn_s2c_atime = ddi_get_lbolt64(); 648dbed73cbSSangeeta Misra connp->conn_s2c_pkt_cnt = 1; 649dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_sent = B_FALSE; 650dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_acked = B_FALSE; 651dbed73cbSSangeeta Misra 652dbed73cbSSangeeta Misra /* Add it to the s2c hash table. */ 653dbed73cbSSangeeta Misra hash = ilbs->ilbs_s2c_conn_hash; 654dbed73cbSSangeeta Misra i = ILB_CONN_HASH((uint8_t *)&connp->conn_s2c_saddr.s6_addr32[3], 655dbed73cbSSangeeta Misra ntohs(connp->conn_s2c_sport), 656dbed73cbSSangeeta Misra (uint8_t *)&connp->conn_s2c_daddr.s6_addr32[3], 657dbed73cbSSangeeta Misra ntohs(connp->conn_s2c_dport), ilbs->ilbs_conn_hash_size); 658dbed73cbSSangeeta Misra connp->conn_s2c_hash = &hash[i]; 659dbed73cbSSangeeta Misra DTRACE_PROBE2(ilb__conn__hash__add__s2c, ilb_conn_t *, connp, int, i); 660dbed73cbSSangeeta Misra 661dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock); 662dbed73cbSSangeeta Misra hash[i].ilb_conn_cnt++; 663dbed73cbSSangeeta Misra connp->conn_s2c_next = hash[i].ilb_connp; 664dbed73cbSSangeeta Misra if (hash[i].ilb_connp != NULL) 665dbed73cbSSangeeta Misra hash[i].ilb_connp->conn_s2c_prev = connp; 666dbed73cbSSangeeta Misra connp->conn_s2c_prev = NULL; 667dbed73cbSSangeeta Misra hash[i].ilb_connp = connp; 668dbed73cbSSangeeta Misra mutex_exit(&hash[i].ilb_conn_hash_lock); 669dbed73cbSSangeeta Misra 670dbed73cbSSangeeta Misra /* Add it to the c2s hash table. */ 671dbed73cbSSangeeta Misra hash = ilbs->ilbs_c2s_conn_hash; 672dbed73cbSSangeeta Misra i = ILB_CONN_HASH((uint8_t *)&src->s6_addr32[3], ntohs(sport), 673dbed73cbSSangeeta Misra (uint8_t *)&dst->s6_addr32[3], ntohs(dport), 674dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size); 675dbed73cbSSangeeta Misra connp->conn_c2s_hash = &hash[i]; 676dbed73cbSSangeeta Misra DTRACE_PROBE2(ilb__conn__hash__add__c2s, ilb_conn_t *, connp, int, i); 677dbed73cbSSangeeta Misra 678dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock); 679dbed73cbSSangeeta Misra hash[i].ilb_conn_cnt++; 680dbed73cbSSangeeta Misra connp->conn_c2s_next = hash[i].ilb_connp; 681dbed73cbSSangeeta Misra if (hash[i].ilb_connp != NULL) 682dbed73cbSSangeeta Misra hash[i].ilb_connp->conn_c2s_prev = connp; 683dbed73cbSSangeeta Misra connp->conn_c2s_prev = NULL; 684dbed73cbSSangeeta Misra hash[i].ilb_connp = connp; 685dbed73cbSSangeeta Misra mutex_exit(&hash[i].ilb_conn_hash_lock); 686dbed73cbSSangeeta Misra 687dbed73cbSSangeeta Misra return (0); 688dbed73cbSSangeeta Misra } 689dbed73cbSSangeeta Misra 690dbed73cbSSangeeta Misra /* 691dbed73cbSSangeeta Misra * If a connection is using TCP, we keep track of simple TCP state transition 692dbed73cbSSangeeta Misra * so that we know when to clean up an entry. 693dbed73cbSSangeeta Misra */ 694dbed73cbSSangeeta Misra static boolean_t 695dbed73cbSSangeeta Misra update_conn_tcp(ilb_conn_t *connp, void *iph, tcpha_t *tcpha, int32_t pkt_len, 696dbed73cbSSangeeta Misra boolean_t c2s) 697dbed73cbSSangeeta Misra { 698dbed73cbSSangeeta Misra uint32_t ack, seq; 699dbed73cbSSangeeta Misra int32_t seg_len; 700dbed73cbSSangeeta Misra 701dbed73cbSSangeeta Misra if (tcpha->tha_flags & TH_RST) 702dbed73cbSSangeeta Misra return (B_FALSE); 703dbed73cbSSangeeta Misra 704dbed73cbSSangeeta Misra seg_len = pkt_len - ((uint8_t *)tcpha - (uint8_t *)iph) - 705dbed73cbSSangeeta Misra TCP_HDR_LENGTH((tcph_t *)tcpha); 706dbed73cbSSangeeta Misra 707dbed73cbSSangeeta Misra if (tcpha->tha_flags & TH_ACK) 708dbed73cbSSangeeta Misra ack = ntohl(tcpha->tha_ack); 709dbed73cbSSangeeta Misra seq = ntohl(tcpha->tha_seq); 710dbed73cbSSangeeta Misra if (c2s) { 711dbed73cbSSangeeta Misra ASSERT(MUTEX_HELD(&connp->conn_c2s_hash->ilb_conn_hash_lock)); 712dbed73cbSSangeeta Misra if (tcpha->tha_flags & TH_FIN) { 713dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fss = seq + seg_len; 714dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fin_sent = B_TRUE; 715dbed73cbSSangeeta Misra } 716dbed73cbSSangeeta Misra connp->conn_c2s_tcp_ack = ack; 717dbed73cbSSangeeta Misra 718dbed73cbSSangeeta Misra /* Port reuse by the client, restart the conn. */ 719dbed73cbSSangeeta Misra if (connp->conn_c2s_tcp_fin_sent && 720dbed73cbSSangeeta Misra SEQ_GT(seq, connp->conn_c2s_tcp_fss + 1)) { 721dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fin_sent = B_FALSE; 722dbed73cbSSangeeta Misra connp->conn_c2s_tcp_fin_acked = B_FALSE; 723dbed73cbSSangeeta Misra } 724dbed73cbSSangeeta Misra } else { 725dbed73cbSSangeeta Misra ASSERT(MUTEX_HELD(&connp->conn_s2c_hash->ilb_conn_hash_lock)); 726dbed73cbSSangeeta Misra if (tcpha->tha_flags & TH_FIN) { 727dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fss = seq + seg_len; 728dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_sent = B_TRUE; 729dbed73cbSSangeeta Misra } 730dbed73cbSSangeeta Misra connp->conn_s2c_tcp_ack = ack; 731dbed73cbSSangeeta Misra 732dbed73cbSSangeeta Misra /* Port reuse by the client, restart the conn. */ 733dbed73cbSSangeeta Misra if (connp->conn_s2c_tcp_fin_sent && 734dbed73cbSSangeeta Misra SEQ_GT(seq, connp->conn_s2c_tcp_fss + 1)) { 735dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_sent = B_FALSE; 736dbed73cbSSangeeta Misra connp->conn_s2c_tcp_fin_acked = B_FALSE; 737dbed73cbSSangeeta Misra } 738dbed73cbSSangeeta Misra } 739dbed73cbSSangeeta Misra 740dbed73cbSSangeeta Misra return (B_TRUE); 741dbed73cbSSangeeta Misra } 742dbed73cbSSangeeta Misra 743dbed73cbSSangeeta Misra /* 744dbed73cbSSangeeta Misra * Helper routint to find conn hash entry given some packet information and 745dbed73cbSSangeeta Misra * the traffic direction (c2s, client to server?) 746dbed73cbSSangeeta Misra */ 747dbed73cbSSangeeta Misra static boolean_t 748dbed73cbSSangeeta Misra ilb_find_conn(ilb_stack_t *ilbs, void *iph, void *tph, int l4, in6_addr_t *src, 749dbed73cbSSangeeta Misra in_port_t sport, in6_addr_t *dst, in_port_t dport, 750dbed73cbSSangeeta Misra ilb_rule_info_t *rule_cache, uint32_t *ip_sum, uint32_t *tp_sum, 751dbed73cbSSangeeta Misra int32_t pkt_len, boolean_t c2s) 752dbed73cbSSangeeta Misra { 753dbed73cbSSangeeta Misra ilb_conn_hash_t *hash; 754dbed73cbSSangeeta Misra uint_t i; 755dbed73cbSSangeeta Misra ilb_conn_t *connp; 756dbed73cbSSangeeta Misra boolean_t tcp_alive; 757dbed73cbSSangeeta Misra boolean_t ret = B_FALSE; 758dbed73cbSSangeeta Misra 759dbed73cbSSangeeta Misra i = ILB_CONN_HASH((uint8_t *)&src->s6_addr32[3], ntohs(sport), 760dbed73cbSSangeeta Misra (uint8_t *)&dst->s6_addr32[3], ntohs(dport), 761dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size); 762dbed73cbSSangeeta Misra if (c2s) { 763dbed73cbSSangeeta Misra hash = ilbs->ilbs_c2s_conn_hash; 764dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock); 765dbed73cbSSangeeta Misra for (connp = hash[i].ilb_connp; connp != NULL; 766dbed73cbSSangeeta Misra connp = connp->conn_c2s_next) { 767dbed73cbSSangeeta Misra if (connp->conn_l4 == l4 && 768dbed73cbSSangeeta Misra connp->conn_c2s_dport == dport && 769dbed73cbSSangeeta Misra connp->conn_c2s_sport == sport && 770dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(src, &connp->conn_c2s_saddr) && 771dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(dst, &connp->conn_c2s_daddr)) { 772d3d50737SRafael Vanoni connp->conn_c2s_atime = ddi_get_lbolt64(); 773dbed73cbSSangeeta Misra connp->conn_c2s_pkt_cnt++; 774dbed73cbSSangeeta Misra *rule_cache = connp->conn_rule_cache; 775dbed73cbSSangeeta Misra *ip_sum = connp->conn_c2s_ip_sum; 776dbed73cbSSangeeta Misra *tp_sum = connp->conn_c2s_tp_sum; 777dbed73cbSSangeeta Misra ret = B_TRUE; 778dbed73cbSSangeeta Misra break; 779dbed73cbSSangeeta Misra } 780dbed73cbSSangeeta Misra } 781dbed73cbSSangeeta Misra } else { 782dbed73cbSSangeeta Misra hash = ilbs->ilbs_s2c_conn_hash; 783dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock); 784dbed73cbSSangeeta Misra for (connp = hash[i].ilb_connp; connp != NULL; 785dbed73cbSSangeeta Misra connp = connp->conn_s2c_next) { 786dbed73cbSSangeeta Misra if (connp->conn_l4 == l4 && 787dbed73cbSSangeeta Misra connp->conn_s2c_dport == dport && 788dbed73cbSSangeeta Misra connp->conn_s2c_sport == sport && 789dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(src, &connp->conn_s2c_saddr) && 790dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(dst, &connp->conn_s2c_daddr)) { 791d3d50737SRafael Vanoni connp->conn_s2c_atime = ddi_get_lbolt64(); 792dbed73cbSSangeeta Misra connp->conn_s2c_pkt_cnt++; 793dbed73cbSSangeeta Misra *rule_cache = connp->conn_rule_cache; 794dbed73cbSSangeeta Misra *ip_sum = connp->conn_s2c_ip_sum; 795dbed73cbSSangeeta Misra *tp_sum = connp->conn_s2c_tp_sum; 796dbed73cbSSangeeta Misra ret = B_TRUE; 797dbed73cbSSangeeta Misra break; 798dbed73cbSSangeeta Misra } 799dbed73cbSSangeeta Misra } 800dbed73cbSSangeeta Misra } 801dbed73cbSSangeeta Misra if (ret) { 802dbed73cbSSangeeta Misra ILB_S_KSTAT(connp->conn_server, pkt_processed); 803dbed73cbSSangeeta Misra ILB_S_KSTAT_UPDATE(connp->conn_server, bytes_processed, 804dbed73cbSSangeeta Misra pkt_len); 805dbed73cbSSangeeta Misra 806dbed73cbSSangeeta Misra switch (l4) { 807dbed73cbSSangeeta Misra case (IPPROTO_TCP): 808dbed73cbSSangeeta Misra tcp_alive = update_conn_tcp(connp, iph, tph, pkt_len, 809dbed73cbSSangeeta Misra c2s); 810dbed73cbSSangeeta Misra if (!tcp_alive) { 811dbed73cbSSangeeta Misra connp->conn_gc = B_TRUE; 812dbed73cbSSangeeta Misra } 813dbed73cbSSangeeta Misra break; 814dbed73cbSSangeeta Misra default: 815dbed73cbSSangeeta Misra break; 816dbed73cbSSangeeta Misra } 817dbed73cbSSangeeta Misra } 818dbed73cbSSangeeta Misra mutex_exit(&hash[i].ilb_conn_hash_lock); 819dbed73cbSSangeeta Misra 820dbed73cbSSangeeta Misra return (ret); 821dbed73cbSSangeeta Misra } 822dbed73cbSSangeeta Misra 823dbed73cbSSangeeta Misra /* 824dbed73cbSSangeeta Misra * To check if a give packet matches an existing conn hash entry. If it 825dbed73cbSSangeeta Misra * does, return the information about this entry so that the caller can 826dbed73cbSSangeeta Misra * do the proper NAT. 827dbed73cbSSangeeta Misra */ 828dbed73cbSSangeeta Misra boolean_t 829dbed73cbSSangeeta Misra ilb_check_conn(ilb_stack_t *ilbs, int l3, void *iph, int l4, void *tph, 830dbed73cbSSangeeta Misra in6_addr_t *src, in6_addr_t *dst, in_port_t sport, in_port_t dport, 831dbed73cbSSangeeta Misra uint32_t pkt_len, in6_addr_t *lb_dst) 832dbed73cbSSangeeta Misra { 833dbed73cbSSangeeta Misra ilb_rule_info_t rule_cache; 834dbed73cbSSangeeta Misra uint32_t adj_ip_sum, adj_tp_sum; 835dbed73cbSSangeeta Misra boolean_t ret; 836dbed73cbSSangeeta Misra 837dbed73cbSSangeeta Misra /* Check the incoming hash table. */ 838dbed73cbSSangeeta Misra if (ilb_find_conn(ilbs, iph, tph, l4, src, sport, dst, dport, 839dbed73cbSSangeeta Misra &rule_cache, &adj_ip_sum, &adj_tp_sum, pkt_len, B_TRUE)) { 840dbed73cbSSangeeta Misra switch (rule_cache.topo) { 841dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_NAT: 842dbed73cbSSangeeta Misra *lb_dst = rule_cache.info.nat_dst; 843dbed73cbSSangeeta Misra ilb_full_nat(l3, iph, l4, tph, &rule_cache.info, 844dbed73cbSSangeeta Misra adj_ip_sum, adj_tp_sum, B_TRUE); 845dbed73cbSSangeeta Misra ret = B_TRUE; 846dbed73cbSSangeeta Misra break; 847dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_HALF_NAT: 848dbed73cbSSangeeta Misra *lb_dst = rule_cache.info.nat_dst; 849dbed73cbSSangeeta Misra ilb_half_nat(l3, iph, l4, tph, &rule_cache.info, 850dbed73cbSSangeeta Misra adj_ip_sum, adj_tp_sum, B_TRUE); 851dbed73cbSSangeeta Misra ret = B_TRUE; 852dbed73cbSSangeeta Misra break; 853dbed73cbSSangeeta Misra default: 854dbed73cbSSangeeta Misra ret = B_FALSE; 855dbed73cbSSangeeta Misra break; 856dbed73cbSSangeeta Misra } 857dbed73cbSSangeeta Misra return (ret); 858dbed73cbSSangeeta Misra } 859dbed73cbSSangeeta Misra if (ilb_find_conn(ilbs, iph, tph, l4, src, sport, dst, dport, 860dbed73cbSSangeeta Misra &rule_cache, &adj_ip_sum, &adj_tp_sum, pkt_len, B_FALSE)) { 861dbed73cbSSangeeta Misra switch (rule_cache.topo) { 862dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_NAT: 863dbed73cbSSangeeta Misra *lb_dst = rule_cache.info.src; 864dbed73cbSSangeeta Misra ilb_full_nat(l3, iph, l4, tph, &rule_cache.info, 865dbed73cbSSangeeta Misra adj_ip_sum, adj_tp_sum, B_FALSE); 866dbed73cbSSangeeta Misra ret = B_TRUE; 867dbed73cbSSangeeta Misra break; 868dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_HALF_NAT: 869dbed73cbSSangeeta Misra *lb_dst = *dst; 870dbed73cbSSangeeta Misra ilb_half_nat(l3, iph, l4, tph, &rule_cache.info, 871dbed73cbSSangeeta Misra adj_ip_sum, adj_tp_sum, B_FALSE); 872dbed73cbSSangeeta Misra ret = B_TRUE; 873dbed73cbSSangeeta Misra break; 874dbed73cbSSangeeta Misra default: 875dbed73cbSSangeeta Misra ret = B_FALSE; 876dbed73cbSSangeeta Misra break; 877dbed73cbSSangeeta Misra } 878dbed73cbSSangeeta Misra return (ret); 879dbed73cbSSangeeta Misra } 880dbed73cbSSangeeta Misra 881dbed73cbSSangeeta Misra return (B_FALSE); 882dbed73cbSSangeeta Misra } 883dbed73cbSSangeeta Misra 884dbed73cbSSangeeta Misra /* 885dbed73cbSSangeeta Misra * To check if an ICMP packet belongs to a connection in one of the conn 886dbed73cbSSangeeta Misra * hash entries. 887dbed73cbSSangeeta Misra */ 888dbed73cbSSangeeta Misra boolean_t 889dbed73cbSSangeeta Misra ilb_check_icmp_conn(ilb_stack_t *ilbs, mblk_t *mp, int l3, void *out_iph, 890dbed73cbSSangeeta Misra void *icmph, in6_addr_t *lb_dst) 891dbed73cbSSangeeta Misra { 892dbed73cbSSangeeta Misra ilb_conn_hash_t *hash; 893dbed73cbSSangeeta Misra ipha_t *in_iph4; 894dbed73cbSSangeeta Misra ip6_t *in_iph6; 895dbed73cbSSangeeta Misra icmph_t *icmph4; 896dbed73cbSSangeeta Misra icmp6_t *icmph6; 897dbed73cbSSangeeta Misra in6_addr_t *in_src_p, *in_dst_p; 898dbed73cbSSangeeta Misra in_port_t *sport, *dport; 899dbed73cbSSangeeta Misra int l4; 900dbed73cbSSangeeta Misra uint_t i; 901dbed73cbSSangeeta Misra ilb_conn_t *connp; 902dbed73cbSSangeeta Misra ilb_rule_info_t rule_cache; 903dbed73cbSSangeeta Misra uint32_t adj_ip_sum; 904dbed73cbSSangeeta Misra boolean_t full_nat; 905dbed73cbSSangeeta Misra 906dbed73cbSSangeeta Misra if (l3 == IPPROTO_IP) { 907dbed73cbSSangeeta Misra in6_addr_t in_src, in_dst; 908dbed73cbSSangeeta Misra 909dbed73cbSSangeeta Misra icmph4 = (icmph_t *)icmph; 910dbed73cbSSangeeta Misra in_iph4 = (ipha_t *)&icmph4[1]; 911dbed73cbSSangeeta Misra 912dbed73cbSSangeeta Misra if ((uint8_t *)in_iph4 + IPH_HDR_LENGTH(in_iph4) + 913dbed73cbSSangeeta Misra ICMP_MIN_TP_HDR_LEN > mp->b_wptr) { 914dbed73cbSSangeeta Misra return (B_FALSE); 915dbed73cbSSangeeta Misra } 916dbed73cbSSangeeta Misra 917dbed73cbSSangeeta Misra IN6_IPADDR_TO_V4MAPPED(in_iph4->ipha_src, &in_src); 918dbed73cbSSangeeta Misra in_src_p = &in_src; 919dbed73cbSSangeeta Misra IN6_IPADDR_TO_V4MAPPED(in_iph4->ipha_dst, &in_dst); 920dbed73cbSSangeeta Misra in_dst_p = &in_dst; 921dbed73cbSSangeeta Misra 922dbed73cbSSangeeta Misra l4 = in_iph4->ipha_protocol; 923dbed73cbSSangeeta Misra if (l4 != IPPROTO_TCP && l4 != IPPROTO_UDP) 924dbed73cbSSangeeta Misra return (B_FALSE); 925dbed73cbSSangeeta Misra 926dbed73cbSSangeeta Misra sport = (in_port_t *)((char *)in_iph4 + 927dbed73cbSSangeeta Misra IPH_HDR_LENGTH(in_iph4)); 928dbed73cbSSangeeta Misra dport = sport + 1; 929dbed73cbSSangeeta Misra 930dbed73cbSSangeeta Misra DTRACE_PROBE4(ilb__chk__icmp__conn__v4, uint32_t, 931dbed73cbSSangeeta Misra in_iph4->ipha_src, uint32_t, in_iph4->ipha_dst, uint16_t, 932dbed73cbSSangeeta Misra ntohs(*sport), uint16_t, ntohs(*dport)); 933dbed73cbSSangeeta Misra } else { 934dbed73cbSSangeeta Misra ASSERT(l3 == IPPROTO_IPV6); 935dbed73cbSSangeeta Misra 936dbed73cbSSangeeta Misra icmph6 = (icmp6_t *)icmph; 937dbed73cbSSangeeta Misra in_iph6 = (ip6_t *)&icmph6[1]; 938dbed73cbSSangeeta Misra in_src_p = &in_iph6->ip6_src; 939dbed73cbSSangeeta Misra in_dst_p = &in_iph6->ip6_dst; 940dbed73cbSSangeeta Misra 941dbed73cbSSangeeta Misra if ((uint8_t *)in_iph6 + sizeof (ip6_t) + 942dbed73cbSSangeeta Misra ICMP_MIN_TP_HDR_LEN > mp->b_wptr) { 943dbed73cbSSangeeta Misra return (B_FALSE); 944dbed73cbSSangeeta Misra } 945dbed73cbSSangeeta Misra 946dbed73cbSSangeeta Misra l4 = in_iph6->ip6_nxt; 947dbed73cbSSangeeta Misra /* We don't go deep inside an IPv6 packet yet. */ 948dbed73cbSSangeeta Misra if (l4 != IPPROTO_TCP && l4 != IPPROTO_UDP) 949dbed73cbSSangeeta Misra return (B_FALSE); 950dbed73cbSSangeeta Misra 951dbed73cbSSangeeta Misra sport = (in_port_t *)&in_iph6[1]; 952dbed73cbSSangeeta Misra dport = sport + 1; 953dbed73cbSSangeeta Misra 954dbed73cbSSangeeta Misra DTRACE_PROBE4(ilb__chk__icmp__conn__v6, in6_addr_t *, 955dbed73cbSSangeeta Misra &in_iph6->ip6_src, in6_addr_t *, &in_iph6->ip6_dst, 956dbed73cbSSangeeta Misra uint16_t, ntohs(*sport), uint16_t, ntohs(*dport)); 957dbed73cbSSangeeta Misra } 958dbed73cbSSangeeta Misra 959dbed73cbSSangeeta Misra i = ILB_CONN_HASH((uint8_t *)&in_dst_p->s6_addr32[3], ntohs(*dport), 960dbed73cbSSangeeta Misra (uint8_t *)&in_src_p->s6_addr32[3], ntohs(*sport), 961dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size); 962dbed73cbSSangeeta Misra hash = ilbs->ilbs_c2s_conn_hash; 963dbed73cbSSangeeta Misra 964dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock); 965dbed73cbSSangeeta Misra for (connp = hash[i].ilb_connp; connp != NULL; 966dbed73cbSSangeeta Misra connp = connp->conn_c2s_next) { 967dbed73cbSSangeeta Misra if (connp->conn_l4 == l4 && 968dbed73cbSSangeeta Misra connp->conn_c2s_dport == *sport && 969dbed73cbSSangeeta Misra connp->conn_c2s_sport == *dport && 970dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(in_dst_p, &connp->conn_c2s_saddr) && 971dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(in_src_p, &connp->conn_c2s_daddr)) { 972d3d50737SRafael Vanoni connp->conn_c2s_atime = ddi_get_lbolt64(); 973dbed73cbSSangeeta Misra connp->conn_c2s_pkt_cnt++; 974dbed73cbSSangeeta Misra rule_cache = connp->conn_rule_cache; 975dbed73cbSSangeeta Misra adj_ip_sum = connp->conn_c2s_ip_sum; 976dbed73cbSSangeeta Misra break; 977dbed73cbSSangeeta Misra } 978dbed73cbSSangeeta Misra } 979dbed73cbSSangeeta Misra mutex_exit(&hash[i].ilb_conn_hash_lock); 980dbed73cbSSangeeta Misra 981dbed73cbSSangeeta Misra if (connp == NULL) { 982dbed73cbSSangeeta Misra DTRACE_PROBE(ilb__chk__icmp__conn__failed); 983dbed73cbSSangeeta Misra return (B_FALSE); 984dbed73cbSSangeeta Misra } 985dbed73cbSSangeeta Misra 986dbed73cbSSangeeta Misra switch (rule_cache.topo) { 987dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_NAT: 988dbed73cbSSangeeta Misra full_nat = B_TRUE; 989dbed73cbSSangeeta Misra break; 990dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_HALF_NAT: 991dbed73cbSSangeeta Misra full_nat = B_FALSE; 992dbed73cbSSangeeta Misra break; 993dbed73cbSSangeeta Misra default: 994dbed73cbSSangeeta Misra return (B_FALSE); 995dbed73cbSSangeeta Misra } 996dbed73cbSSangeeta Misra 997dbed73cbSSangeeta Misra *lb_dst = rule_cache.info.nat_dst; 998dbed73cbSSangeeta Misra if (l3 == IPPROTO_IP) { 999dbed73cbSSangeeta Misra ilb_nat_icmpv4(mp, out_iph, icmph4, in_iph4, sport, dport, 1000dbed73cbSSangeeta Misra &rule_cache.info, adj_ip_sum, full_nat); 1001dbed73cbSSangeeta Misra } else { 1002dbed73cbSSangeeta Misra ilb_nat_icmpv6(mp, out_iph, icmph6, in_iph6, sport, dport, 1003dbed73cbSSangeeta Misra &rule_cache.info, full_nat); 1004dbed73cbSSangeeta Misra } 1005dbed73cbSSangeeta Misra return (B_TRUE); 1006dbed73cbSSangeeta Misra } 1007dbed73cbSSangeeta Misra 1008dbed73cbSSangeeta Misra /* 1009dbed73cbSSangeeta Misra * This routine sends up the conn hash table to user land. Note that the 1010dbed73cbSSangeeta Misra * request is an ioctl, hence we cannot really differentiate requests 1011dbed73cbSSangeeta Misra * from different clients. There is no context shared between different 1012dbed73cbSSangeeta Misra * ioctls. Here we make the assumption that the user land ilbd will 1013dbed73cbSSangeeta Misra * only allow one client to show the conn hash table at any time. 1014dbed73cbSSangeeta Misra * Otherwise, the results will be "very" inconsistent. 1015dbed73cbSSangeeta Misra * 1016dbed73cbSSangeeta Misra * In each ioctl, a flag (ILB_LIST_BEGIN) indicates whether the client wants 1017dbed73cbSSangeeta Misra * to read from the beginning of the able. After a certain entries 1018dbed73cbSSangeeta Misra * are reported, the kernel remembers the position of the last returned 1019dbed73cbSSangeeta Misra * entry. When the next ioctl comes in with the ILB_LIST_BEGIN flag, 1020dbed73cbSSangeeta Misra * it will return entries starting from where it was left off. When 1021dbed73cbSSangeeta Misra * the end of table is reached, a flag (ILB_LIST_END) is set to tell 1022dbed73cbSSangeeta Misra * the client that there is no more entry. 1023dbed73cbSSangeeta Misra * 1024dbed73cbSSangeeta Misra * It is assumed that the caller has checked the size of nat so that it 1025dbed73cbSSangeeta Misra * can hold num entries. 1026dbed73cbSSangeeta Misra */ 1027dbed73cbSSangeeta Misra /* ARGSUSED */ 1028dbed73cbSSangeeta Misra int 1029dbed73cbSSangeeta Misra ilb_list_nat(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_nat_entry_t *nat, 1030dbed73cbSSangeeta Misra uint32_t *num, uint32_t *flags) 1031dbed73cbSSangeeta Misra { 1032dbed73cbSSangeeta Misra ilb_conn_hash_t *hash; 1033dbed73cbSSangeeta Misra ilb_conn_t *cur_connp; 1034dbed73cbSSangeeta Misra uint32_t i, j; 1035dbed73cbSSangeeta Misra int ret = 0; 1036dbed73cbSSangeeta Misra 1037dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_conn_list_lock); 1038dbed73cbSSangeeta Misra while (ilbs->ilbs_conn_list_busy) { 1039dbed73cbSSangeeta Misra if (cv_wait_sig(&ilbs->ilbs_conn_list_cv, 1040dbed73cbSSangeeta Misra &ilbs->ilbs_conn_list_lock) == 0) { 1041dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_conn_list_lock); 1042dbed73cbSSangeeta Misra return (EINTR); 1043dbed73cbSSangeeta Misra } 1044dbed73cbSSangeeta Misra } 1045dbed73cbSSangeeta Misra if ((hash = ilbs->ilbs_c2s_conn_hash) == NULL) { 1046dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_s2c_conn_hash == NULL); 1047dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_conn_list_lock); 1048dbed73cbSSangeeta Misra *num = 0; 1049dbed73cbSSangeeta Misra *flags |= ILB_LIST_END; 1050dbed73cbSSangeeta Misra return (0); 1051dbed73cbSSangeeta Misra } 1052dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_busy = B_TRUE; 1053dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_conn_list_lock); 1054dbed73cbSSangeeta Misra 1055dbed73cbSSangeeta Misra if (*flags & ILB_LIST_BEGIN) { 1056dbed73cbSSangeeta Misra i = 0; 1057dbed73cbSSangeeta Misra mutex_enter(&hash[0].ilb_conn_hash_lock); 1058dbed73cbSSangeeta Misra cur_connp = hash[0].ilb_connp; 1059dbed73cbSSangeeta Misra } else if (*flags & ILB_LIST_CONT) { 1060dbed73cbSSangeeta Misra if (ilbs->ilbs_conn_list_cur == ilbs->ilbs_conn_hash_size) { 1061dbed73cbSSangeeta Misra *num = 0; 1062dbed73cbSSangeeta Misra *flags |= ILB_LIST_END; 1063dbed73cbSSangeeta Misra goto done; 1064dbed73cbSSangeeta Misra } 1065dbed73cbSSangeeta Misra i = ilbs->ilbs_conn_list_cur; 1066dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock); 1067dbed73cbSSangeeta Misra cur_connp = ilbs->ilbs_conn_list_connp; 1068dbed73cbSSangeeta Misra } else { 1069dbed73cbSSangeeta Misra ret = EINVAL; 1070dbed73cbSSangeeta Misra goto done; 1071dbed73cbSSangeeta Misra } 1072dbed73cbSSangeeta Misra 1073dbed73cbSSangeeta Misra j = 0; 1074dbed73cbSSangeeta Misra while (j < *num) { 1075dbed73cbSSangeeta Misra if (cur_connp == NULL) { 1076dbed73cbSSangeeta Misra mutex_exit(&hash[i].ilb_conn_hash_lock); 1077dbed73cbSSangeeta Misra if (++i == ilbs->ilbs_conn_hash_size) { 1078dbed73cbSSangeeta Misra *flags |= ILB_LIST_END; 1079dbed73cbSSangeeta Misra break; 1080dbed73cbSSangeeta Misra } 1081dbed73cbSSangeeta Misra mutex_enter(&hash[i].ilb_conn_hash_lock); 1082dbed73cbSSangeeta Misra cur_connp = hash[i].ilb_connp; 1083dbed73cbSSangeeta Misra continue; 1084dbed73cbSSangeeta Misra } 1085dbed73cbSSangeeta Misra nat[j].proto = cur_connp->conn_l4; 1086dbed73cbSSangeeta Misra 1087dbed73cbSSangeeta Misra nat[j].in_global = cur_connp->conn_c2s_daddr; 1088dbed73cbSSangeeta Misra nat[j].in_global_port = cur_connp->conn_c2s_dport; 1089dbed73cbSSangeeta Misra nat[j].out_global = cur_connp->conn_c2s_saddr; 1090dbed73cbSSangeeta Misra nat[j].out_global_port = cur_connp->conn_c2s_sport; 1091dbed73cbSSangeeta Misra 1092dbed73cbSSangeeta Misra nat[j].in_local = cur_connp->conn_s2c_saddr; 1093dbed73cbSSangeeta Misra nat[j].in_local_port = cur_connp->conn_s2c_sport; 1094dbed73cbSSangeeta Misra nat[j].out_local = cur_connp->conn_s2c_daddr; 1095dbed73cbSSangeeta Misra nat[j].out_local_port = cur_connp->conn_s2c_dport; 1096dbed73cbSSangeeta Misra 1097dbed73cbSSangeeta Misra nat[j].create_time = TICK_TO_MSEC(cur_connp->conn_cr_time); 1098dbed73cbSSangeeta Misra nat[j].last_access_time = 1099dbed73cbSSangeeta Misra TICK_TO_MSEC(cur_connp->conn_c2s_atime); 1100dbed73cbSSangeeta Misra 1101dbed73cbSSangeeta Misra /* 1102dbed73cbSSangeeta Misra * The conn_s2c_pkt_cnt may not be accurate since we are not 1103dbed73cbSSangeeta Misra * holding the s2c hash lock. 1104dbed73cbSSangeeta Misra */ 1105dbed73cbSSangeeta Misra nat[j].pkt_cnt = cur_connp->conn_c2s_pkt_cnt + 1106dbed73cbSSangeeta Misra cur_connp->conn_s2c_pkt_cnt; 1107dbed73cbSSangeeta Misra j++; 1108dbed73cbSSangeeta Misra 1109dbed73cbSSangeeta Misra cur_connp = cur_connp->conn_c2s_next; 1110dbed73cbSSangeeta Misra } 1111dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_connp = cur_connp; 1112dbed73cbSSangeeta Misra if (j == *num) 1113dbed73cbSSangeeta Misra mutex_exit(&hash[i].ilb_conn_hash_lock); 1114dbed73cbSSangeeta Misra 1115dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_cur = i; 1116dbed73cbSSangeeta Misra 1117dbed73cbSSangeeta Misra *num = j; 1118dbed73cbSSangeeta Misra done: 1119dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_conn_list_lock); 1120dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_busy = B_FALSE; 1121dbed73cbSSangeeta Misra cv_signal(&ilbs->ilbs_conn_list_cv); 1122dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_conn_list_lock); 1123dbed73cbSSangeeta Misra 1124dbed73cbSSangeeta Misra return (ret); 1125dbed73cbSSangeeta Misra } 1126dbed73cbSSangeeta Misra 1127dbed73cbSSangeeta Misra 1128dbed73cbSSangeeta Misra /* 1129dbed73cbSSangeeta Misra * Stickiness (persistence) handling routines. 1130dbed73cbSSangeeta Misra */ 1131dbed73cbSSangeeta Misra 1132dbed73cbSSangeeta Misra 1133dbed73cbSSangeeta Misra static void 1134dbed73cbSSangeeta Misra ilb_sticky_cache_init(void) 1135dbed73cbSSangeeta Misra { 1136dbed73cbSSangeeta Misra ilb_sticky_cache = kmem_cache_create("ilb_sticky_cache", 1137dbed73cbSSangeeta Misra sizeof (ilb_sticky_t), 0, NULL, NULL, NULL, NULL, NULL, 1138dbed73cbSSangeeta Misra ilb_kmem_flags); 1139dbed73cbSSangeeta Misra } 1140dbed73cbSSangeeta Misra 1141dbed73cbSSangeeta Misra void 1142dbed73cbSSangeeta Misra ilb_sticky_cache_fini(void) 1143dbed73cbSSangeeta Misra { 1144dbed73cbSSangeeta Misra if (ilb_sticky_cache != NULL) { 1145dbed73cbSSangeeta Misra kmem_cache_destroy(ilb_sticky_cache); 1146dbed73cbSSangeeta Misra ilb_sticky_cache = NULL; 1147dbed73cbSSangeeta Misra } 1148dbed73cbSSangeeta Misra } 1149dbed73cbSSangeeta Misra 1150dbed73cbSSangeeta Misra void 1151dbed73cbSSangeeta Misra ilb_sticky_refrele(ilb_sticky_t *s) 1152dbed73cbSSangeeta Misra { 1153dbed73cbSSangeeta Misra ILB_STICKY_REFRELE(s); 1154dbed73cbSSangeeta Misra } 1155dbed73cbSSangeeta Misra 1156dbed73cbSSangeeta Misra static ilb_sticky_t * 1157dbed73cbSSangeeta Misra ilb_sticky_lookup(ilb_sticky_hash_t *hash, ilb_rule_t *rule, in6_addr_t *src) 1158dbed73cbSSangeeta Misra { 1159dbed73cbSSangeeta Misra ilb_sticky_t *s; 1160dbed73cbSSangeeta Misra 1161dbed73cbSSangeeta Misra ASSERT(mutex_owned(&hash->sticky_lock)); 1162dbed73cbSSangeeta Misra 1163dbed73cbSSangeeta Misra for (s = list_head(&hash->sticky_head); s != NULL; 1164dbed73cbSSangeeta Misra s = list_next(&hash->sticky_head, s)) { 1165dbed73cbSSangeeta Misra if (s->rule_instance == rule->ir_ks_instance) { 1166dbed73cbSSangeeta Misra if (IN6_ARE_ADDR_EQUAL(src, &s->src)) 1167dbed73cbSSangeeta Misra return (s); 1168dbed73cbSSangeeta Misra } 1169dbed73cbSSangeeta Misra } 1170dbed73cbSSangeeta Misra return (NULL); 1171dbed73cbSSangeeta Misra } 1172dbed73cbSSangeeta Misra 1173dbed73cbSSangeeta Misra static ilb_sticky_t * 1174dbed73cbSSangeeta Misra ilb_sticky_add(ilb_sticky_hash_t *hash, ilb_rule_t *rule, ilb_server_t *server, 1175dbed73cbSSangeeta Misra in6_addr_t *src) 1176dbed73cbSSangeeta Misra { 1177dbed73cbSSangeeta Misra ilb_sticky_t *s; 1178dbed73cbSSangeeta Misra 1179dbed73cbSSangeeta Misra ASSERT(mutex_owned(&hash->sticky_lock)); 1180dbed73cbSSangeeta Misra 1181dbed73cbSSangeeta Misra if ((s = kmem_cache_alloc(ilb_sticky_cache, KM_NOSLEEP)) == NULL) 1182dbed73cbSSangeeta Misra return (NULL); 1183dbed73cbSSangeeta Misra 1184dbed73cbSSangeeta Misra /* 1185dbed73cbSSangeeta Misra * The rule instance is for handling the scenario when the same 1186dbed73cbSSangeeta Misra * client talks to different rules at the same time. Stickiness 1187dbed73cbSSangeeta Misra * is per rule so we can use the rule instance to differentiate 1188dbed73cbSSangeeta Misra * the client's request. 1189dbed73cbSSangeeta Misra */ 1190dbed73cbSSangeeta Misra s->rule_instance = rule->ir_ks_instance; 1191dbed73cbSSangeeta Misra /* 1192dbed73cbSSangeeta Misra * Copy the rule name for listing all sticky cache entry. ir_name 1193dbed73cbSSangeeta Misra * is guaranteed to be NULL terminated. 1194dbed73cbSSangeeta Misra */ 1195dbed73cbSSangeeta Misra (void) strcpy(s->rule_name, rule->ir_name); 1196dbed73cbSSangeeta Misra s->server = server; 1197dbed73cbSSangeeta Misra 1198dbed73cbSSangeeta Misra /* 1199dbed73cbSSangeeta Misra * Grab a ref cnt on the server so that it won't go away while 1200dbed73cbSSangeeta Misra * it is still in the sticky table. 1201dbed73cbSSangeeta Misra */ 1202dbed73cbSSangeeta Misra ILB_SERVER_REFHOLD(server); 1203dbed73cbSSangeeta Misra s->src = *src; 1204dbed73cbSSangeeta Misra s->expiry = rule->ir_sticky_expiry; 1205dbed73cbSSangeeta Misra s->refcnt = 1; 1206dbed73cbSSangeeta Misra s->hash = hash; 1207dbed73cbSSangeeta Misra 1208dbed73cbSSangeeta Misra /* 1209dbed73cbSSangeeta Misra * There is no need to set atime here since the refcnt is not 1210dbed73cbSSangeeta Misra * zero. A sticky entry is removed only when the refcnt is 1211dbed73cbSSangeeta Misra * zero. But just set it here for debugging purpose. The 1212dbed73cbSSangeeta Misra * atime is set when a refrele is done on a sticky entry. 1213dbed73cbSSangeeta Misra */ 1214d3d50737SRafael Vanoni s->atime = ddi_get_lbolt64(); 1215dbed73cbSSangeeta Misra 1216dbed73cbSSangeeta Misra list_insert_head(&hash->sticky_head, s); 1217dbed73cbSSangeeta Misra hash->sticky_cnt++; 1218dbed73cbSSangeeta Misra return (s); 1219dbed73cbSSangeeta Misra } 1220dbed73cbSSangeeta Misra 1221dbed73cbSSangeeta Misra /* 1222dbed73cbSSangeeta Misra * This routine checks if there is an existing sticky entry which matches 1223dbed73cbSSangeeta Misra * a given packet. If there is one, return it. If there is not, create 1224dbed73cbSSangeeta Misra * a sticky entry using the packet's info. 1225dbed73cbSSangeeta Misra */ 1226dbed73cbSSangeeta Misra ilb_server_t * 1227dbed73cbSSangeeta Misra ilb_sticky_find_add(ilb_stack_t *ilbs, ilb_rule_t *rule, in6_addr_t *src, 1228dbed73cbSSangeeta Misra ilb_server_t *server, ilb_sticky_t **res, uint16_t *src_ent_idx) 1229dbed73cbSSangeeta Misra { 1230dbed73cbSSangeeta Misra int i; 1231dbed73cbSSangeeta Misra ilb_sticky_hash_t *hash; 1232dbed73cbSSangeeta Misra ilb_sticky_t *s; 1233dbed73cbSSangeeta Misra 1234dbed73cbSSangeeta Misra ASSERT(server != NULL); 1235dbed73cbSSangeeta Misra 1236dbed73cbSSangeeta Misra *res = NULL; 1237dbed73cbSSangeeta Misra 1238dbed73cbSSangeeta Misra i = ILB_STICKY_HASH((uint8_t *)&src->s6_addr32[3], 1239dbed73cbSSangeeta Misra (uint32_t)(uintptr_t)rule, ilbs->ilbs_sticky_hash_size); 1240dbed73cbSSangeeta Misra hash = &ilbs->ilbs_sticky_hash[i]; 1241dbed73cbSSangeeta Misra 1242dbed73cbSSangeeta Misra /* First check if there is already an entry. */ 1243dbed73cbSSangeeta Misra mutex_enter(&hash->sticky_lock); 1244dbed73cbSSangeeta Misra s = ilb_sticky_lookup(hash, rule, src); 1245dbed73cbSSangeeta Misra 1246dbed73cbSSangeeta Misra /* No sticky entry, add one. */ 1247dbed73cbSSangeeta Misra if (s == NULL) { 1248dbed73cbSSangeeta Misra add_new_entry: 1249dbed73cbSSangeeta Misra s = ilb_sticky_add(hash, rule, server, src); 1250dbed73cbSSangeeta Misra if (s == NULL) { 1251dbed73cbSSangeeta Misra mutex_exit(&hash->sticky_lock); 1252dbed73cbSSangeeta Misra return (NULL); 1253dbed73cbSSangeeta Misra } 1254dbed73cbSSangeeta Misra /* 1255dbed73cbSSangeeta Misra * Find a source for this server. All subseqent requests from 1256dbed73cbSSangeeta Misra * the same client matching this sticky entry will use this 1257dbed73cbSSangeeta Misra * source address in doing NAT. The current algorithm is 1258dbed73cbSSangeeta Misra * simple, rotate the source address. Note that the 1259dbed73cbSSangeeta Misra * source address array does not change after it's created, so 1260dbed73cbSSangeeta Misra * it is OK to just increment the cur index. 1261dbed73cbSSangeeta Misra */ 1262dbed73cbSSangeeta Misra if (server->iser_nat_src != NULL) { 1263dbed73cbSSangeeta Misra /* It is a hint, does not need to be atomic. */ 1264dbed73cbSSangeeta Misra *src_ent_idx = (server->iser_nat_src->cur++ % 1265dbed73cbSSangeeta Misra server->iser_nat_src->num_src); 1266dbed73cbSSangeeta Misra s->nat_src_idx = *src_ent_idx; 1267dbed73cbSSangeeta Misra } 1268dbed73cbSSangeeta Misra mutex_exit(&hash->sticky_lock); 1269dbed73cbSSangeeta Misra *res = s; 1270dbed73cbSSangeeta Misra return (server); 1271dbed73cbSSangeeta Misra } 1272dbed73cbSSangeeta Misra 1273dbed73cbSSangeeta Misra /* 1274dbed73cbSSangeeta Misra * We don't hold any lock accessing iser_enabled. Refer to the 1275dbed73cbSSangeeta Misra * comment in ilb_server_add() about iser_lock. 1276dbed73cbSSangeeta Misra */ 1277dbed73cbSSangeeta Misra if (!s->server->iser_enabled) { 1278dbed73cbSSangeeta Misra /* 1279dbed73cbSSangeeta Misra * s->server == server can only happen if there is a race in 1280dbed73cbSSangeeta Misra * toggling the iser_enabled flag (we don't hold a lock doing 1281dbed73cbSSangeeta Misra * that) so that the load balance algorithm still returns a 1282dbed73cbSSangeeta Misra * disabled server. In this case, just drop the packet... 1283dbed73cbSSangeeta Misra */ 1284dbed73cbSSangeeta Misra if (s->server == server) { 1285dbed73cbSSangeeta Misra mutex_exit(&hash->sticky_lock); 1286dbed73cbSSangeeta Misra return (NULL); 1287dbed73cbSSangeeta Misra } 1288dbed73cbSSangeeta Misra 1289dbed73cbSSangeeta Misra /* 1290dbed73cbSSangeeta Misra * The old server is disabled and there is a new server, use 1291dbed73cbSSangeeta Misra * the new one to create a sticky entry. Since we will 1292dbed73cbSSangeeta Misra * add the entry at the beginning, subsequent lookup will 1293dbed73cbSSangeeta Misra * find this new entry instead of the old one. 1294dbed73cbSSangeeta Misra */ 1295dbed73cbSSangeeta Misra goto add_new_entry; 1296dbed73cbSSangeeta Misra } 1297dbed73cbSSangeeta Misra 1298dbed73cbSSangeeta Misra s->refcnt++; 1299dbed73cbSSangeeta Misra *res = s; 1300dbed73cbSSangeeta Misra mutex_exit(&hash->sticky_lock); 1301dbed73cbSSangeeta Misra if (server->iser_nat_src != NULL) 1302dbed73cbSSangeeta Misra *src_ent_idx = s->nat_src_idx; 1303dbed73cbSSangeeta Misra return (s->server); 1304dbed73cbSSangeeta Misra } 1305dbed73cbSSangeeta Misra 1306dbed73cbSSangeeta Misra static void 1307dbed73cbSSangeeta Misra ilb_sticky_cleanup(void *arg) 1308dbed73cbSSangeeta Misra { 1309dbed73cbSSangeeta Misra ilb_timer_t *timer = (ilb_timer_t *)arg; 1310dbed73cbSSangeeta Misra uint32_t i; 1311dbed73cbSSangeeta Misra ilb_stack_t *ilbs; 1312dbed73cbSSangeeta Misra ilb_sticky_hash_t *hash; 1313dbed73cbSSangeeta Misra ilb_sticky_t *s, *nxt_s; 1314dbed73cbSSangeeta Misra int64_t now, expiry; 1315dbed73cbSSangeeta Misra 1316dbed73cbSSangeeta Misra ilbs = timer->ilbs; 1317dbed73cbSSangeeta Misra hash = ilbs->ilbs_sticky_hash; 1318dbed73cbSSangeeta Misra ASSERT(hash != NULL); 1319dbed73cbSSangeeta Misra 1320d3d50737SRafael Vanoni now = ddi_get_lbolt64(); 1321dbed73cbSSangeeta Misra for (i = timer->start; i < timer->end; i++) { 1322dbed73cbSSangeeta Misra mutex_enter(&hash[i].sticky_lock); 1323dbed73cbSSangeeta Misra for (s = list_head(&hash[i].sticky_head); s != NULL; 1324dbed73cbSSangeeta Misra s = nxt_s) { 1325dbed73cbSSangeeta Misra nxt_s = list_next(&hash[i].sticky_head, s); 1326dbed73cbSSangeeta Misra if (s->refcnt != 0) 1327dbed73cbSSangeeta Misra continue; 1328dbed73cbSSangeeta Misra expiry = now - SEC_TO_TICK(s->expiry); 1329dbed73cbSSangeeta Misra if (s->atime < expiry) { 1330dbed73cbSSangeeta Misra ILB_SERVER_REFRELE(s->server); 1331dbed73cbSSangeeta Misra list_remove(&hash[i].sticky_head, s); 1332dbed73cbSSangeeta Misra kmem_cache_free(ilb_sticky_cache, s); 1333dbed73cbSSangeeta Misra hash[i].sticky_cnt--; 1334dbed73cbSSangeeta Misra } 1335dbed73cbSSangeeta Misra } 1336dbed73cbSSangeeta Misra mutex_exit(&hash[i].sticky_lock); 1337dbed73cbSSangeeta Misra } 1338dbed73cbSSangeeta Misra } 1339dbed73cbSSangeeta Misra 1340dbed73cbSSangeeta Misra static void 1341dbed73cbSSangeeta Misra ilb_sticky_timer(void *arg) 1342dbed73cbSSangeeta Misra { 1343dbed73cbSSangeeta Misra ilb_timer_t *timer = (ilb_timer_t *)arg; 1344dbed73cbSSangeeta Misra 1345dbed73cbSSangeeta Misra (void) taskq_dispatch(timer->ilbs->ilbs_sticky_taskq, 1346dbed73cbSSangeeta Misra ilb_sticky_cleanup, arg, TQ_SLEEP); 1347dbed73cbSSangeeta Misra mutex_enter(&timer->tid_lock); 1348dbed73cbSSangeeta Misra if (timer->tid == 0) { 1349dbed73cbSSangeeta Misra mutex_exit(&timer->tid_lock); 1350dbed73cbSSangeeta Misra } else { 1351dbed73cbSSangeeta Misra timer->tid = timeout(ilb_sticky_timer, arg, 1352dbed73cbSSangeeta Misra SEC_TO_TICK(ilb_sticky_timeout)); 1353dbed73cbSSangeeta Misra mutex_exit(&timer->tid_lock); 1354dbed73cbSSangeeta Misra } 1355dbed73cbSSangeeta Misra } 1356dbed73cbSSangeeta Misra 1357dbed73cbSSangeeta Misra void 1358dbed73cbSSangeeta Misra ilb_sticky_hash_init(ilb_stack_t *ilbs) 1359dbed73cbSSangeeta Misra { 1360dbed73cbSSangeeta Misra extern pri_t minclsyspri; 1361dbed73cbSSangeeta Misra int i, part; 1362dbed73cbSSangeeta Misra char tq_name[TASKQ_NAMELEN]; 1363dbed73cbSSangeeta Misra ilb_timer_t *tm; 1364dbed73cbSSangeeta Misra 1365de710d24SJosef 'Jeff' Sipek if (!ISP2(ilbs->ilbs_sticky_hash_size)) { 1366dbed73cbSSangeeta Misra for (i = 0; i < 31; i++) { 1367dbed73cbSSangeeta Misra if (ilbs->ilbs_sticky_hash_size < (1 << i)) 1368dbed73cbSSangeeta Misra break; 1369dbed73cbSSangeeta Misra } 1370dbed73cbSSangeeta Misra ilbs->ilbs_sticky_hash_size = 1 << i; 1371dbed73cbSSangeeta Misra } 1372dbed73cbSSangeeta Misra 1373dbed73cbSSangeeta Misra ilbs->ilbs_sticky_hash = kmem_zalloc(sizeof (ilb_sticky_hash_t) * 1374dbed73cbSSangeeta Misra ilbs->ilbs_sticky_hash_size, KM_SLEEP); 1375dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_sticky_hash_size; i++) { 1376dbed73cbSSangeeta Misra mutex_init(&ilbs->ilbs_sticky_hash[i].sticky_lock, NULL, 1377dbed73cbSSangeeta Misra MUTEX_DEFAULT, NULL); 1378dbed73cbSSangeeta Misra list_create(&ilbs->ilbs_sticky_hash[i].sticky_head, 1379dbed73cbSSangeeta Misra sizeof (ilb_sticky_t), 1380dbed73cbSSangeeta Misra offsetof(ilb_sticky_t, list)); 1381dbed73cbSSangeeta Misra } 1382dbed73cbSSangeeta Misra 1383dbed73cbSSangeeta Misra if (ilb_sticky_cache == NULL) 1384dbed73cbSSangeeta Misra ilb_sticky_cache_init(); 1385dbed73cbSSangeeta Misra 1386dbed73cbSSangeeta Misra (void) snprintf(tq_name, sizeof (tq_name), "ilb_sticky_taskq_%p", 13876e0672acSSangeeta Misra (void *)ilbs->ilbs_netstack); 1388dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_sticky_taskq == NULL); 1389dbed73cbSSangeeta Misra ilbs->ilbs_sticky_taskq = taskq_create(tq_name, 1390dbed73cbSSangeeta Misra ilb_sticky_timer_size * 2, minclsyspri, ilb_sticky_timer_size, 1391dbed73cbSSangeeta Misra ilb_sticky_timer_size * 2, TASKQ_PREPOPULATE|TASKQ_DYNAMIC); 1392dbed73cbSSangeeta Misra 1393dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_sticky_timer_list == NULL); 1394dbed73cbSSangeeta Misra ilbs->ilbs_sticky_timer_list = kmem_zalloc(sizeof (ilb_timer_t) * 1395dbed73cbSSangeeta Misra ilb_sticky_timer_size, KM_SLEEP); 1396dbed73cbSSangeeta Misra part = ilbs->ilbs_sticky_hash_size / ilb_sticky_timer_size + 1; 1397dbed73cbSSangeeta Misra for (i = 0; i < ilb_sticky_timer_size; i++) { 1398dbed73cbSSangeeta Misra tm = ilbs->ilbs_sticky_timer_list + i; 1399dbed73cbSSangeeta Misra tm->start = i * part; 1400dbed73cbSSangeeta Misra tm->end = i * part + part; 1401dbed73cbSSangeeta Misra if (tm->end > ilbs->ilbs_sticky_hash_size) 1402dbed73cbSSangeeta Misra tm->end = ilbs->ilbs_sticky_hash_size; 1403dbed73cbSSangeeta Misra tm->ilbs = ilbs; 1404dbed73cbSSangeeta Misra mutex_init(&tm->tid_lock, NULL, MUTEX_DEFAULT, NULL); 1405dbed73cbSSangeeta Misra /* Spread out the starting execution time of all the timers. */ 1406dbed73cbSSangeeta Misra tm->tid = timeout(ilb_sticky_timer, tm, 1407dbed73cbSSangeeta Misra SEC_TO_TICK(ilb_sticky_timeout + i)); 1408dbed73cbSSangeeta Misra } 1409dbed73cbSSangeeta Misra } 1410dbed73cbSSangeeta Misra 1411dbed73cbSSangeeta Misra void 1412dbed73cbSSangeeta Misra ilb_sticky_hash_fini(ilb_stack_t *ilbs) 1413dbed73cbSSangeeta Misra { 1414dbed73cbSSangeeta Misra int i; 1415dbed73cbSSangeeta Misra ilb_sticky_t *s; 1416dbed73cbSSangeeta Misra 1417dbed73cbSSangeeta Misra if (ilbs->ilbs_sticky_hash == NULL) 1418dbed73cbSSangeeta Misra return; 1419dbed73cbSSangeeta Misra 1420dbed73cbSSangeeta Misra /* Stop all the timers first. */ 1421dbed73cbSSangeeta Misra for (i = 0; i < ilb_sticky_timer_size; i++) { 1422dbed73cbSSangeeta Misra timeout_id_t tid; 1423dbed73cbSSangeeta Misra 1424dbed73cbSSangeeta Misra /* Setting tid to 0 tells the timer handler not to restart. */ 1425dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_sticky_timer_list[i].tid_lock); 1426dbed73cbSSangeeta Misra tid = ilbs->ilbs_sticky_timer_list[i].tid; 1427dbed73cbSSangeeta Misra ilbs->ilbs_sticky_timer_list[i].tid = 0; 1428dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_sticky_timer_list[i].tid_lock); 1429dbed73cbSSangeeta Misra (void) untimeout(tid); 1430dbed73cbSSangeeta Misra } 1431dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_sticky_timer_list, sizeof (ilb_timer_t) * 1432dbed73cbSSangeeta Misra ilb_sticky_timer_size); 1433dbed73cbSSangeeta Misra taskq_destroy(ilbs->ilbs_sticky_taskq); 1434dbed73cbSSangeeta Misra ilbs->ilbs_sticky_taskq = NULL; 1435dbed73cbSSangeeta Misra 1436dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_sticky_hash_size; i++) { 1437dbed73cbSSangeeta Misra while ((s = list_head(&ilbs->ilbs_sticky_hash[i].sticky_head)) 1438dbed73cbSSangeeta Misra != NULL) { 1439dbed73cbSSangeeta Misra list_remove(&ilbs->ilbs_sticky_hash[i].sticky_head, s); 1440dbed73cbSSangeeta Misra ILB_SERVER_REFRELE(s->server); 1441dbed73cbSSangeeta Misra kmem_free(s, sizeof (ilb_sticky_t)); 1442dbed73cbSSangeeta Misra } 1443dbed73cbSSangeeta Misra } 1444dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_sticky_hash, ilbs->ilbs_sticky_hash_size * 1445dbed73cbSSangeeta Misra sizeof (ilb_sticky_hash_t)); 1446dbed73cbSSangeeta Misra } 1447dbed73cbSSangeeta Misra 1448dbed73cbSSangeeta Misra /* 1449dbed73cbSSangeeta Misra * This routine sends up the sticky hash table to user land. Refer to 1450dbed73cbSSangeeta Misra * the comments before ilb_list_nat(). Both routines assume similar 1451dbed73cbSSangeeta Misra * conditions. 1452dbed73cbSSangeeta Misra * 1453dbed73cbSSangeeta Misra * It is assumed that the caller has checked the size of st so that it 1454dbed73cbSSangeeta Misra * can hold num entries. 1455dbed73cbSSangeeta Misra */ 1456dbed73cbSSangeeta Misra /* ARGSUSED */ 1457dbed73cbSSangeeta Misra int 1458dbed73cbSSangeeta Misra ilb_list_sticky(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_sticky_entry_t *st, 1459dbed73cbSSangeeta Misra uint32_t *num, uint32_t *flags) 1460dbed73cbSSangeeta Misra { 1461dbed73cbSSangeeta Misra ilb_sticky_hash_t *hash; 1462dbed73cbSSangeeta Misra ilb_sticky_t *curp; 1463dbed73cbSSangeeta Misra uint32_t i, j; 1464dbed73cbSSangeeta Misra int ret = 0; 1465dbed73cbSSangeeta Misra 1466dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_sticky_list_lock); 1467dbed73cbSSangeeta Misra while (ilbs->ilbs_sticky_list_busy) { 1468dbed73cbSSangeeta Misra if (cv_wait_sig(&ilbs->ilbs_sticky_list_cv, 1469dbed73cbSSangeeta Misra &ilbs->ilbs_sticky_list_lock) == 0) { 1470dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_sticky_list_lock); 1471dbed73cbSSangeeta Misra return (EINTR); 1472dbed73cbSSangeeta Misra } 1473dbed73cbSSangeeta Misra } 1474dbed73cbSSangeeta Misra if ((hash = ilbs->ilbs_sticky_hash) == NULL) { 1475dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_sticky_list_lock); 1476dbed73cbSSangeeta Misra *num = 0; 1477dbed73cbSSangeeta Misra *flags |= ILB_LIST_END; 1478dbed73cbSSangeeta Misra return (0); 1479dbed73cbSSangeeta Misra } 1480dbed73cbSSangeeta Misra ilbs->ilbs_sticky_list_busy = B_TRUE; 1481dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_sticky_list_lock); 1482dbed73cbSSangeeta Misra 1483dbed73cbSSangeeta Misra if (*flags & ILB_LIST_BEGIN) { 1484dbed73cbSSangeeta Misra i = 0; 1485dbed73cbSSangeeta Misra mutex_enter(&hash[0].sticky_lock); 1486dbed73cbSSangeeta Misra curp = list_head(&hash[0].sticky_head); 1487dbed73cbSSangeeta Misra } else if (*flags & ILB_LIST_CONT) { 1488dbed73cbSSangeeta Misra if (ilbs->ilbs_sticky_list_cur == ilbs->ilbs_sticky_hash_size) { 1489dbed73cbSSangeeta Misra *num = 0; 1490dbed73cbSSangeeta Misra *flags |= ILB_LIST_END; 1491dbed73cbSSangeeta Misra goto done; 1492dbed73cbSSangeeta Misra } 1493dbed73cbSSangeeta Misra i = ilbs->ilbs_sticky_list_cur; 1494dbed73cbSSangeeta Misra mutex_enter(&hash[i].sticky_lock); 1495dbed73cbSSangeeta Misra curp = ilbs->ilbs_sticky_list_curp; 1496dbed73cbSSangeeta Misra } else { 1497dbed73cbSSangeeta Misra ret = EINVAL; 1498dbed73cbSSangeeta Misra goto done; 1499dbed73cbSSangeeta Misra } 1500dbed73cbSSangeeta Misra 1501dbed73cbSSangeeta Misra j = 0; 1502dbed73cbSSangeeta Misra while (j < *num) { 1503dbed73cbSSangeeta Misra if (curp == NULL) { 1504dbed73cbSSangeeta Misra mutex_exit(&hash[i].sticky_lock); 1505dbed73cbSSangeeta Misra if (++i == ilbs->ilbs_sticky_hash_size) { 1506dbed73cbSSangeeta Misra *flags |= ILB_LIST_END; 1507dbed73cbSSangeeta Misra break; 1508dbed73cbSSangeeta Misra } 1509dbed73cbSSangeeta Misra mutex_enter(&hash[i].sticky_lock); 1510dbed73cbSSangeeta Misra curp = list_head(&hash[i].sticky_head); 1511dbed73cbSSangeeta Misra continue; 1512dbed73cbSSangeeta Misra } 1513dbed73cbSSangeeta Misra (void) strcpy(st[j].rule_name, curp->rule_name); 1514dbed73cbSSangeeta Misra st[j].req_addr = curp->src; 1515dbed73cbSSangeeta Misra st[j].srv_addr = curp->server->iser_addr_v6; 1516dbed73cbSSangeeta Misra st[j].expiry_time = TICK_TO_MSEC(curp->expiry); 1517dbed73cbSSangeeta Misra j++; 1518dbed73cbSSangeeta Misra curp = list_next(&hash[i].sticky_head, curp); 1519dbed73cbSSangeeta Misra } 1520dbed73cbSSangeeta Misra ilbs->ilbs_sticky_list_curp = curp; 1521dbed73cbSSangeeta Misra if (j == *num) 1522dbed73cbSSangeeta Misra mutex_exit(&hash[i].sticky_lock); 1523dbed73cbSSangeeta Misra 1524dbed73cbSSangeeta Misra ilbs->ilbs_sticky_list_cur = i; 1525dbed73cbSSangeeta Misra 1526dbed73cbSSangeeta Misra *num = j; 1527dbed73cbSSangeeta Misra done: 1528dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_sticky_list_lock); 1529dbed73cbSSangeeta Misra ilbs->ilbs_sticky_list_busy = B_FALSE; 1530dbed73cbSSangeeta Misra cv_signal(&ilbs->ilbs_sticky_list_cv); 1531dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_sticky_list_lock); 1532dbed73cbSSangeeta Misra 1533dbed73cbSSangeeta Misra return (ret); 1534dbed73cbSSangeeta Misra } 1535