1dbed73cbSSangeeta Misra /* 2dbed73cbSSangeeta Misra * CDDL HEADER START 3dbed73cbSSangeeta Misra * 4dbed73cbSSangeeta Misra * The contents of this file are subject to the terms of the 5dbed73cbSSangeeta Misra * Common Development and Distribution License (the "License"). 6dbed73cbSSangeeta Misra * You may not use this file except in compliance with the License. 7dbed73cbSSangeeta Misra * 8dbed73cbSSangeeta Misra * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9dbed73cbSSangeeta Misra * or http://www.opensolaris.org/os/licensing. 10dbed73cbSSangeeta Misra * See the License for the specific language governing permissions 11dbed73cbSSangeeta Misra * and limitations under the License. 12dbed73cbSSangeeta Misra * 13dbed73cbSSangeeta Misra * When distributing Covered Code, include this CDDL HEADER in each 14dbed73cbSSangeeta Misra * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15dbed73cbSSangeeta Misra * If applicable, add the following below this CDDL HEADER, with the 16dbed73cbSSangeeta Misra * fields enclosed by brackets "[]" replaced with your own identifying 17dbed73cbSSangeeta Misra * information: Portions Copyright [yyyy] [name of copyright owner] 18dbed73cbSSangeeta Misra * 19dbed73cbSSangeeta Misra * CDDL HEADER END 20dbed73cbSSangeeta Misra */ 21dbed73cbSSangeeta Misra 22dbed73cbSSangeeta Misra /* 23dbed73cbSSangeeta Misra * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24dbed73cbSSangeeta Misra * Use is subject to license terms. 25dbed73cbSSangeeta Misra */ 26dbed73cbSSangeeta Misra 27*de710d24SJosef 'Jeff' Sipek #include <sys/sysmacros.h> 28dbed73cbSSangeeta Misra #include <sys/kmem.h> 29dbed73cbSSangeeta Misra #include <sys/ksynch.h> 30dbed73cbSSangeeta Misra #include <sys/systm.h> 31dbed73cbSSangeeta Misra #include <sys/socket.h> 32dbed73cbSSangeeta Misra #include <sys/disp.h> 33dbed73cbSSangeeta Misra #include <sys/taskq.h> 34dbed73cbSSangeeta Misra #include <sys/cmn_err.h> 35dbed73cbSSangeeta Misra #include <sys/strsun.h> 36dbed73cbSSangeeta Misra #include <sys/sdt.h> 37dbed73cbSSangeeta Misra #include <sys/atomic.h> 38dbed73cbSSangeeta Misra #include <netinet/in.h> 39dbed73cbSSangeeta Misra #include <inet/ip.h> 40dbed73cbSSangeeta Misra #include <inet/ip6.h> 41dbed73cbSSangeeta Misra #include <inet/tcp.h> 42dbed73cbSSangeeta Misra #include <inet/udp_impl.h> 43dbed73cbSSangeeta Misra #include <inet/kstatcom.h> 44dbed73cbSSangeeta Misra 45dbed73cbSSangeeta Misra #include <inet/ilb_ip.h> 46dbed73cbSSangeeta Misra #include "ilb_alg.h" 47dbed73cbSSangeeta Misra #include "ilb_nat.h" 48dbed73cbSSangeeta Misra #include "ilb_conn.h" 49dbed73cbSSangeeta Misra 50dbed73cbSSangeeta Misra /* ILB kmem cache flag */ 51dbed73cbSSangeeta Misra int ilb_kmem_flags = 0; 52dbed73cbSSangeeta Misra 53dbed73cbSSangeeta Misra /* 54dbed73cbSSangeeta Misra * The default size for the different hash tables. Global for all stacks. 55dbed73cbSSangeeta Misra * But each stack has its own table, just that their sizes are the same. 56dbed73cbSSangeeta Misra */ 57dbed73cbSSangeeta Misra static size_t ilb_rule_hash_size = 2048; 58dbed73cbSSangeeta Misra 59dbed73cbSSangeeta Misra static size_t ilb_conn_hash_size = 262144; 60dbed73cbSSangeeta Misra 61dbed73cbSSangeeta Misra static size_t ilb_sticky_hash_size = 262144; 62dbed73cbSSangeeta Misra 63dbed73cbSSangeeta Misra /* This should be a prime number. */ 64dbed73cbSSangeeta Misra static size_t ilb_nat_src_hash_size = 97; 65dbed73cbSSangeeta Misra 66dbed73cbSSangeeta Misra /* Default NAT cache entry expiry time. */ 67dbed73cbSSangeeta Misra static uint32_t ilb_conn_tcp_expiry = 120; 68dbed73cbSSangeeta Misra static uint32_t ilb_conn_udp_expiry = 60; 69dbed73cbSSangeeta Misra 70dbed73cbSSangeeta Misra /* Default sticky entry expiry time. */ 71dbed73cbSSangeeta Misra static uint32_t ilb_sticky_expiry = 60; 72dbed73cbSSangeeta Misra 73dbed73cbSSangeeta Misra /* addr is assumed to be a uint8_t * to an ipaddr_t. */ 74dbed73cbSSangeeta Misra #define ILB_RULE_HASH(addr, hash_size) \ 75dbed73cbSSangeeta Misra ((*((addr) + 3) * 29791 + *((addr) + 2) * 961 + *((addr) + 1) * 31 + \ 76dbed73cbSSangeeta Misra *(addr)) & ((hash_size) - 1)) 77dbed73cbSSangeeta Misra 78dbed73cbSSangeeta Misra /* 79dbed73cbSSangeeta Misra * Note on ILB delayed processing 80dbed73cbSSangeeta Misra * 81dbed73cbSSangeeta Misra * To avoid in line removal on some of the data structures, such as rules, 82dbed73cbSSangeeta Misra * servers and ilb_conn_hash entries, ILB delays such processing to a taskq. 83dbed73cbSSangeeta Misra * There are three types of ILB taskq: 84dbed73cbSSangeeta Misra * 85dbed73cbSSangeeta Misra * 1. rule handling: created at stack initialialization time, ilb_stack_init() 86dbed73cbSSangeeta Misra * 2. conn hash handling: created at conn hash initialization time, 87dbed73cbSSangeeta Misra * ilb_conn_hash_init() 88dbed73cbSSangeeta Misra * 3. sticky hash handling: created at sticky hash initialization time, 89dbed73cbSSangeeta Misra * ilb_sticky_hash_init() 90dbed73cbSSangeeta Misra * 91dbed73cbSSangeeta Misra * The rule taskq is for processing rule and server removal. When a user 92dbed73cbSSangeeta Misra * land rule/server removal request comes in, a taskq is dispatched after 93dbed73cbSSangeeta Misra * removing the rule/server from all related hashes. This taskq will wait 94dbed73cbSSangeeta Misra * until all references to the rule/server are gone before removing it. 95dbed73cbSSangeeta Misra * So the user land thread requesting the removal does not need to wait 96dbed73cbSSangeeta Misra * for the removal completion. 97dbed73cbSSangeeta Misra * 98dbed73cbSSangeeta Misra * The conn hash/sticky hash taskq is for processing ilb_conn_hash and 99dbed73cbSSangeeta Misra * ilb_sticky_hash table entry removal. There are ilb_conn_timer_size timers 100dbed73cbSSangeeta Misra * and ilb_sticky_timer_size timers running for ilb_conn_hash and 101dbed73cbSSangeeta Misra * ilb_sticky_hash cleanup respectively. Each timer is responsible for one 102dbed73cbSSangeeta Misra * portion (same size) of the hash table. When a timer fires, it dispatches 103dbed73cbSSangeeta Misra * a conn hash taskq to clean up its portion of the table. This avoids in 104dbed73cbSSangeeta Misra * line processing of the removal. 105dbed73cbSSangeeta Misra * 106dbed73cbSSangeeta Misra * There is another delayed processing, the clean up of NAT source address 107dbed73cbSSangeeta Misra * table. We just use the timer to directly handle it instead of using 108dbed73cbSSangeeta Misra * a taskq. The reason is that the table is small so it is OK to use the 109dbed73cbSSangeeta Misra * timer. 110dbed73cbSSangeeta Misra */ 111dbed73cbSSangeeta Misra 112dbed73cbSSangeeta Misra /* ILB rule taskq constants. */ 113dbed73cbSSangeeta Misra #define ILB_RULE_TASKQ_NUM_THR 20 114dbed73cbSSangeeta Misra 115dbed73cbSSangeeta Misra /* Argument passed to ILB rule taskq routines. */ 116dbed73cbSSangeeta Misra typedef struct { 117dbed73cbSSangeeta Misra ilb_stack_t *ilbs; 118dbed73cbSSangeeta Misra ilb_rule_t *rule; 119dbed73cbSSangeeta Misra } ilb_rule_tq_t; 120dbed73cbSSangeeta Misra 121dbed73cbSSangeeta Misra /* kstat handling routines. */ 122dbed73cbSSangeeta Misra static kstat_t *ilb_kstat_g_init(netstackid_t, ilb_stack_t *); 123dbed73cbSSangeeta Misra static void ilb_kstat_g_fini(netstackid_t, ilb_stack_t *); 124dbed73cbSSangeeta Misra static kstat_t *ilb_rule_kstat_init(netstackid_t, ilb_rule_t *); 125dbed73cbSSangeeta Misra static kstat_t *ilb_server_kstat_init(netstackid_t, ilb_rule_t *, 126dbed73cbSSangeeta Misra ilb_server_t *); 127dbed73cbSSangeeta Misra 128dbed73cbSSangeeta Misra /* Rule hash handling routines. */ 129dbed73cbSSangeeta Misra static void ilb_rule_hash_init(ilb_stack_t *); 130dbed73cbSSangeeta Misra static void ilb_rule_hash_fini(ilb_stack_t *); 131dbed73cbSSangeeta Misra static void ilb_rule_hash_add(ilb_stack_t *, ilb_rule_t *, const in6_addr_t *); 132dbed73cbSSangeeta Misra static void ilb_rule_hash_del(ilb_rule_t *); 133dbed73cbSSangeeta Misra static ilb_rule_t *ilb_rule_hash(ilb_stack_t *, int, int, in6_addr_t *, 134dbed73cbSSangeeta Misra in_port_t, zoneid_t, uint32_t, boolean_t *); 135dbed73cbSSangeeta Misra 136dbed73cbSSangeeta Misra static void ilb_rule_g_add(ilb_stack_t *, ilb_rule_t *); 137dbed73cbSSangeeta Misra static void ilb_rule_g_del(ilb_stack_t *, ilb_rule_t *); 138dbed73cbSSangeeta Misra static void ilb_del_rule_common(ilb_stack_t *, ilb_rule_t *); 139dbed73cbSSangeeta Misra static ilb_rule_t *ilb_find_rule_locked(ilb_stack_t *, zoneid_t, const char *, 140dbed73cbSSangeeta Misra int *); 141dbed73cbSSangeeta Misra static boolean_t ilb_match_rule(ilb_stack_t *, zoneid_t, const char *, int, 142dbed73cbSSangeeta Misra int, in_port_t, in_port_t, const in6_addr_t *); 143dbed73cbSSangeeta Misra 144dbed73cbSSangeeta Misra /* Back end server handling routines. */ 145dbed73cbSSangeeta Misra static void ilb_server_free(ilb_server_t *); 146dbed73cbSSangeeta Misra 147dbed73cbSSangeeta Misra /* Network stack handling routines. */ 148dbed73cbSSangeeta Misra static void *ilb_stack_init(netstackid_t, netstack_t *); 149dbed73cbSSangeeta Misra static void ilb_stack_shutdown(netstackid_t, void *); 150dbed73cbSSangeeta Misra static void ilb_stack_fini(netstackid_t, void *); 151dbed73cbSSangeeta Misra 152dbed73cbSSangeeta Misra /* Sticky connection handling routines. */ 153dbed73cbSSangeeta Misra static void ilb_rule_sticky_init(ilb_rule_t *); 154dbed73cbSSangeeta Misra static void ilb_rule_sticky_fini(ilb_rule_t *); 155dbed73cbSSangeeta Misra 156dbed73cbSSangeeta Misra /* Handy macro to check for unspecified address. */ 157dbed73cbSSangeeta Misra #define IS_ADDR_UNSPEC(addr) \ 158dbed73cbSSangeeta Misra (IN6_IS_ADDR_V4MAPPED(addr) ? IN6_IS_ADDR_V4MAPPED_ANY(addr) : \ 159dbed73cbSSangeeta Misra IN6_IS_ADDR_UNSPECIFIED(addr)) 160dbed73cbSSangeeta Misra 161dbed73cbSSangeeta Misra /* 162dbed73cbSSangeeta Misra * Global kstat instance counter. When a rule is created, its kstat instance 163dbed73cbSSangeeta Misra * number is assigned by ilb_kstat_instance and ilb_kstat_instance is 164dbed73cbSSangeeta Misra * incremented. 165dbed73cbSSangeeta Misra */ 166dbed73cbSSangeeta Misra static uint_t ilb_kstat_instance = 0; 167dbed73cbSSangeeta Misra 168dbed73cbSSangeeta Misra /* 169dbed73cbSSangeeta Misra * The ILB global kstat has name ILB_G_KS_NAME and class name ILB_G_KS_CNAME. 170dbed73cbSSangeeta Misra * A rule's kstat has ILB_RULE_KS_CNAME class name. 171dbed73cbSSangeeta Misra */ 172dbed73cbSSangeeta Misra #define ILB_G_KS_NAME "global" 173dbed73cbSSangeeta Misra #define ILB_G_KS_CNAME "kstat" 174dbed73cbSSangeeta Misra #define ILB_RULE_KS_CNAME "rulestat" 175dbed73cbSSangeeta Misra 176dbed73cbSSangeeta Misra static kstat_t * 177dbed73cbSSangeeta Misra ilb_kstat_g_init(netstackid_t stackid, ilb_stack_t *ilbs) 178dbed73cbSSangeeta Misra { 179dbed73cbSSangeeta Misra kstat_t *ksp; 180dbed73cbSSangeeta Misra ilb_g_kstat_t template = { 181dbed73cbSSangeeta Misra { "num_rules", KSTAT_DATA_UINT64, 0 }, 182dbed73cbSSangeeta Misra { "ip_frag_in", KSTAT_DATA_UINT64, 0 }, 183dbed73cbSSangeeta Misra { "ip_frag_dropped", KSTAT_DATA_UINT64, 0 } 184dbed73cbSSangeeta Misra }; 185dbed73cbSSangeeta Misra 186dbed73cbSSangeeta Misra ksp = kstat_create_netstack(ILB_KSTAT_MOD_NAME, 0, ILB_G_KS_NAME, 187dbed73cbSSangeeta Misra ILB_G_KS_CNAME, KSTAT_TYPE_NAMED, NUM_OF_FIELDS(ilb_g_kstat_t), 188dbed73cbSSangeeta Misra KSTAT_FLAG_VIRTUAL, stackid); 189dbed73cbSSangeeta Misra if (ksp == NULL) 190dbed73cbSSangeeta Misra return (NULL); 191dbed73cbSSangeeta Misra bcopy(&template, ilbs->ilbs_kstat, sizeof (template)); 192dbed73cbSSangeeta Misra ksp->ks_data = ilbs->ilbs_kstat; 193dbed73cbSSangeeta Misra ksp->ks_private = (void *)(uintptr_t)stackid; 194dbed73cbSSangeeta Misra 195dbed73cbSSangeeta Misra kstat_install(ksp); 196dbed73cbSSangeeta Misra return (ksp); 197dbed73cbSSangeeta Misra } 198dbed73cbSSangeeta Misra 199dbed73cbSSangeeta Misra static void 200dbed73cbSSangeeta Misra ilb_kstat_g_fini(netstackid_t stackid, ilb_stack_t *ilbs) 201dbed73cbSSangeeta Misra { 202dbed73cbSSangeeta Misra if (ilbs->ilbs_ksp != NULL) { 203dbed73cbSSangeeta Misra ASSERT(stackid == (netstackid_t)(uintptr_t) 204dbed73cbSSangeeta Misra ilbs->ilbs_ksp->ks_private); 205dbed73cbSSangeeta Misra kstat_delete_netstack(ilbs->ilbs_ksp, stackid); 206dbed73cbSSangeeta Misra ilbs->ilbs_ksp = NULL; 207dbed73cbSSangeeta Misra } 208dbed73cbSSangeeta Misra } 209dbed73cbSSangeeta Misra 210dbed73cbSSangeeta Misra static kstat_t * 211dbed73cbSSangeeta Misra ilb_rule_kstat_init(netstackid_t stackid, ilb_rule_t *rule) 212dbed73cbSSangeeta Misra { 213dbed73cbSSangeeta Misra kstat_t *ksp; 214dbed73cbSSangeeta Misra ilb_rule_kstat_t template = { 215dbed73cbSSangeeta Misra { "num_servers", KSTAT_DATA_UINT64, 0 }, 216dbed73cbSSangeeta Misra { "bytes_not_processed", KSTAT_DATA_UINT64, 0 }, 217dbed73cbSSangeeta Misra { "pkt_not_processed", KSTAT_DATA_UINT64, 0 }, 218dbed73cbSSangeeta Misra { "bytes_dropped", KSTAT_DATA_UINT64, 0 }, 219dbed73cbSSangeeta Misra { "pkt_dropped", KSTAT_DATA_UINT64, 0 }, 220dbed73cbSSangeeta Misra { "nomem_bytes_dropped", KSTAT_DATA_UINT64, 0 }, 221dbed73cbSSangeeta Misra { "nomem_pkt_dropped", KSTAT_DATA_UINT64, 0 }, 222dbed73cbSSangeeta Misra { "noport_bytes_dropped", KSTAT_DATA_UINT64, 0 }, 223dbed73cbSSangeeta Misra { "noport_pkt_dropped", KSTAT_DATA_UINT64, 0 }, 224dbed73cbSSangeeta Misra { "icmp_echo_processed", KSTAT_DATA_UINT64, 0 }, 225dbed73cbSSangeeta Misra { "icmp_dropped", KSTAT_DATA_UINT64, 0 }, 226dbed73cbSSangeeta Misra { "icmp_too_big_processed", KSTAT_DATA_UINT64, 0 }, 227dbed73cbSSangeeta Misra { "icmp_too_big_dropped", KSTAT_DATA_UINT64, 0 } 228dbed73cbSSangeeta Misra }; 229dbed73cbSSangeeta Misra 230dbed73cbSSangeeta Misra ksp = kstat_create_netstack(ILB_KSTAT_MOD_NAME, rule->ir_ks_instance, 231dbed73cbSSangeeta Misra rule->ir_name, ILB_RULE_KS_CNAME, KSTAT_TYPE_NAMED, 232dbed73cbSSangeeta Misra NUM_OF_FIELDS(ilb_rule_kstat_t), KSTAT_FLAG_VIRTUAL, stackid); 233dbed73cbSSangeeta Misra if (ksp == NULL) 234dbed73cbSSangeeta Misra return (NULL); 235dbed73cbSSangeeta Misra 236dbed73cbSSangeeta Misra bcopy(&template, &rule->ir_kstat, sizeof (template)); 237dbed73cbSSangeeta Misra ksp->ks_data = &rule->ir_kstat; 238dbed73cbSSangeeta Misra ksp->ks_private = (void *)(uintptr_t)stackid; 239dbed73cbSSangeeta Misra 240dbed73cbSSangeeta Misra kstat_install(ksp); 241dbed73cbSSangeeta Misra return (ksp); 242dbed73cbSSangeeta Misra } 243dbed73cbSSangeeta Misra 244dbed73cbSSangeeta Misra static kstat_t * 245dbed73cbSSangeeta Misra ilb_server_kstat_init(netstackid_t stackid, ilb_rule_t *rule, 246dbed73cbSSangeeta Misra ilb_server_t *server) 247dbed73cbSSangeeta Misra { 248dbed73cbSSangeeta Misra kstat_t *ksp; 249dbed73cbSSangeeta Misra ilb_server_kstat_t template = { 250dbed73cbSSangeeta Misra { "bytes_processed", KSTAT_DATA_UINT64, 0 }, 251dbed73cbSSangeeta Misra { "pkt_processed", KSTAT_DATA_UINT64, 0 }, 252dbed73cbSSangeeta Misra { "ip_address", KSTAT_DATA_STRING, 0 } 253dbed73cbSSangeeta Misra }; 254dbed73cbSSangeeta Misra char cname_buf[KSTAT_STRLEN]; 255dbed73cbSSangeeta Misra 256dbed73cbSSangeeta Misra /* 7 is "-sstat" */ 257dbed73cbSSangeeta Misra ASSERT(strlen(rule->ir_name) + 7 < KSTAT_STRLEN); 258dbed73cbSSangeeta Misra (void) sprintf(cname_buf, "%s-sstat", rule->ir_name); 259dbed73cbSSangeeta Misra ksp = kstat_create_netstack(ILB_KSTAT_MOD_NAME, rule->ir_ks_instance, 260dbed73cbSSangeeta Misra server->iser_name, cname_buf, KSTAT_TYPE_NAMED, 261dbed73cbSSangeeta Misra NUM_OF_FIELDS(ilb_server_kstat_t), KSTAT_FLAG_VIRTUAL, stackid); 262dbed73cbSSangeeta Misra if (ksp == NULL) 263dbed73cbSSangeeta Misra return (NULL); 264dbed73cbSSangeeta Misra 265dbed73cbSSangeeta Misra bcopy(&template, &server->iser_kstat, sizeof (template)); 266dbed73cbSSangeeta Misra ksp->ks_data = &server->iser_kstat; 267dbed73cbSSangeeta Misra ksp->ks_private = (void *)(uintptr_t)stackid; 268dbed73cbSSangeeta Misra 269dbed73cbSSangeeta Misra kstat_named_setstr(&server->iser_kstat.ip_address, 270dbed73cbSSangeeta Misra server->iser_ip_addr); 271dbed73cbSSangeeta Misra /* We never change the IP address */ 272dbed73cbSSangeeta Misra ksp->ks_data_size += strlen(server->iser_ip_addr) + 1; 273dbed73cbSSangeeta Misra 274dbed73cbSSangeeta Misra kstat_install(ksp); 275dbed73cbSSangeeta Misra return (ksp); 276dbed73cbSSangeeta Misra } 277dbed73cbSSangeeta Misra 278dbed73cbSSangeeta Misra /* Initialize the rule hash table. */ 279dbed73cbSSangeeta Misra static void 280dbed73cbSSangeeta Misra ilb_rule_hash_init(ilb_stack_t *ilbs) 281dbed73cbSSangeeta Misra { 282dbed73cbSSangeeta Misra int i; 283dbed73cbSSangeeta Misra 284dbed73cbSSangeeta Misra /* 285dbed73cbSSangeeta Misra * If ilbs->ilbs_rule_hash_size is not a power of 2, bump it up to 286dbed73cbSSangeeta Misra * the next power of 2. 287dbed73cbSSangeeta Misra */ 288*de710d24SJosef 'Jeff' Sipek if (!ISP2(ilbs->ilbs_rule_hash_size)) { 289dbed73cbSSangeeta Misra for (i = 0; i < 31; i++) { 290dbed73cbSSangeeta Misra if (ilbs->ilbs_rule_hash_size < (1 << i)) 291dbed73cbSSangeeta Misra break; 292dbed73cbSSangeeta Misra } 293dbed73cbSSangeeta Misra ilbs->ilbs_rule_hash_size = 1 << i; 294dbed73cbSSangeeta Misra } 295dbed73cbSSangeeta Misra ilbs->ilbs_g_hash = kmem_zalloc(sizeof (ilb_hash_t) * 296dbed73cbSSangeeta Misra ilbs->ilbs_rule_hash_size, KM_SLEEP); 297dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_rule_hash_size; i++) { 298dbed73cbSSangeeta Misra mutex_init(&ilbs->ilbs_g_hash[i].ilb_hash_lock, NULL, 299dbed73cbSSangeeta Misra MUTEX_DEFAULT, NULL); 300dbed73cbSSangeeta Misra } 301dbed73cbSSangeeta Misra } 302dbed73cbSSangeeta Misra 303dbed73cbSSangeeta Misra /* Clean up the rule hash table. */ 304dbed73cbSSangeeta Misra static void 305dbed73cbSSangeeta Misra ilb_rule_hash_fini(ilb_stack_t *ilbs) 306dbed73cbSSangeeta Misra { 307dbed73cbSSangeeta Misra if (ilbs->ilbs_g_hash == NULL) 308dbed73cbSSangeeta Misra return; 309dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_g_hash, sizeof (ilb_hash_t) * 310dbed73cbSSangeeta Misra ilbs->ilbs_rule_hash_size); 311dbed73cbSSangeeta Misra } 312dbed73cbSSangeeta Misra 313dbed73cbSSangeeta Misra /* Add a rule to the rule hash table. */ 314dbed73cbSSangeeta Misra static void 315dbed73cbSSangeeta Misra ilb_rule_hash_add(ilb_stack_t *ilbs, ilb_rule_t *rule, const in6_addr_t *addr) 316dbed73cbSSangeeta Misra { 317dbed73cbSSangeeta Misra int i; 318dbed73cbSSangeeta Misra 319dbed73cbSSangeeta Misra i = ILB_RULE_HASH((uint8_t *)&addr->s6_addr32[3], 320dbed73cbSSangeeta Misra ilbs->ilbs_rule_hash_size); 321dbed73cbSSangeeta Misra DTRACE_PROBE2(ilb__rule__hash__add, ilb_rule_t *, rule, int, i); 322dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock); 323dbed73cbSSangeeta Misra rule->ir_hash_next = ilbs->ilbs_g_hash[i].ilb_hash_rule; 324dbed73cbSSangeeta Misra if (ilbs->ilbs_g_hash[i].ilb_hash_rule != NULL) 325dbed73cbSSangeeta Misra ilbs->ilbs_g_hash[i].ilb_hash_rule->ir_hash_prev = rule; 326dbed73cbSSangeeta Misra rule->ir_hash_prev = NULL; 327dbed73cbSSangeeta Misra ilbs->ilbs_g_hash[i].ilb_hash_rule = rule; 328dbed73cbSSangeeta Misra 329dbed73cbSSangeeta Misra rule->ir_hash = &ilbs->ilbs_g_hash[i]; 330dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock); 331dbed73cbSSangeeta Misra } 332dbed73cbSSangeeta Misra 333dbed73cbSSangeeta Misra /* 334dbed73cbSSangeeta Misra * Remove a rule from the rule hash table. Note that the rule is not freed 335dbed73cbSSangeeta Misra * in this routine. 336dbed73cbSSangeeta Misra */ 337dbed73cbSSangeeta Misra static void 338dbed73cbSSangeeta Misra ilb_rule_hash_del(ilb_rule_t *rule) 339dbed73cbSSangeeta Misra { 340dbed73cbSSangeeta Misra mutex_enter(&rule->ir_hash->ilb_hash_lock); 341dbed73cbSSangeeta Misra if (rule->ir_hash->ilb_hash_rule == rule) { 342dbed73cbSSangeeta Misra rule->ir_hash->ilb_hash_rule = rule->ir_hash_next; 343dbed73cbSSangeeta Misra if (rule->ir_hash_next != NULL) 344dbed73cbSSangeeta Misra rule->ir_hash_next->ir_hash_prev = NULL; 345dbed73cbSSangeeta Misra } else { 346dbed73cbSSangeeta Misra if (rule->ir_hash_prev != NULL) 347dbed73cbSSangeeta Misra rule->ir_hash_prev->ir_hash_next = 348dbed73cbSSangeeta Misra rule->ir_hash_next; 349dbed73cbSSangeeta Misra if (rule->ir_hash_next != NULL) { 350dbed73cbSSangeeta Misra rule->ir_hash_next->ir_hash_prev = 351dbed73cbSSangeeta Misra rule->ir_hash_prev; 352dbed73cbSSangeeta Misra } 353dbed73cbSSangeeta Misra } 354dbed73cbSSangeeta Misra mutex_exit(&rule->ir_hash->ilb_hash_lock); 355dbed73cbSSangeeta Misra 356dbed73cbSSangeeta Misra rule->ir_hash_next = NULL; 357dbed73cbSSangeeta Misra rule->ir_hash_prev = NULL; 358dbed73cbSSangeeta Misra rule->ir_hash = NULL; 359dbed73cbSSangeeta Misra } 360dbed73cbSSangeeta Misra 361dbed73cbSSangeeta Misra /* 362dbed73cbSSangeeta Misra * Given the info of a packet, look for a match in the rule hash table. 363dbed73cbSSangeeta Misra */ 364dbed73cbSSangeeta Misra static ilb_rule_t * 365dbed73cbSSangeeta Misra ilb_rule_hash(ilb_stack_t *ilbs, int l3, int l4, in6_addr_t *addr, 366dbed73cbSSangeeta Misra in_port_t port, zoneid_t zoneid, uint32_t len, boolean_t *busy) 367dbed73cbSSangeeta Misra { 368dbed73cbSSangeeta Misra int i; 369dbed73cbSSangeeta Misra ilb_rule_t *rule; 370dbed73cbSSangeeta Misra ipaddr_t v4_addr; 371dbed73cbSSangeeta Misra 372dbed73cbSSangeeta Misra *busy = B_FALSE; 373dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(addr, v4_addr); 374dbed73cbSSangeeta Misra i = ILB_RULE_HASH((uint8_t *)&v4_addr, ilbs->ilbs_rule_hash_size); 375dbed73cbSSangeeta Misra port = ntohs(port); 376dbed73cbSSangeeta Misra 377dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock); 378dbed73cbSSangeeta Misra for (rule = ilbs->ilbs_g_hash[i].ilb_hash_rule; rule != NULL; 379dbed73cbSSangeeta Misra rule = rule->ir_hash_next) { 380dbed73cbSSangeeta Misra if (!rule->ir_port_range) { 381dbed73cbSSangeeta Misra if (rule->ir_min_port != port) 382dbed73cbSSangeeta Misra continue; 383dbed73cbSSangeeta Misra } else { 384dbed73cbSSangeeta Misra if (port < rule->ir_min_port || 385dbed73cbSSangeeta Misra port > rule->ir_max_port) { 386dbed73cbSSangeeta Misra continue; 387dbed73cbSSangeeta Misra } 388dbed73cbSSangeeta Misra } 389dbed73cbSSangeeta Misra if (rule->ir_ipver != l3 || rule->ir_proto != l4 || 390dbed73cbSSangeeta Misra rule->ir_zoneid != zoneid) { 391dbed73cbSSangeeta Misra continue; 392dbed73cbSSangeeta Misra } 393dbed73cbSSangeeta Misra 394dbed73cbSSangeeta Misra if (l3 == IPPROTO_IP) { 395dbed73cbSSangeeta Misra if (rule->ir_target_v4 != INADDR_ANY && 396dbed73cbSSangeeta Misra rule->ir_target_v4 != v4_addr) { 397dbed73cbSSangeeta Misra continue; 398dbed73cbSSangeeta Misra } 399dbed73cbSSangeeta Misra } else { 400dbed73cbSSangeeta Misra if (!IN6_IS_ADDR_UNSPECIFIED(&rule->ir_target_v6) && 401dbed73cbSSangeeta Misra !IN6_ARE_ADDR_EQUAL(addr, &rule->ir_target_v6)) { 402dbed73cbSSangeeta Misra continue; 403dbed73cbSSangeeta Misra } 404dbed73cbSSangeeta Misra } 405dbed73cbSSangeeta Misra 406dbed73cbSSangeeta Misra /* 407dbed73cbSSangeeta Misra * Just update the stats if the rule is disabled. 408dbed73cbSSangeeta Misra */ 409dbed73cbSSangeeta Misra mutex_enter(&rule->ir_lock); 410dbed73cbSSangeeta Misra if (!(rule->ir_flags & ILB_RULE_ENABLED)) { 411dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, pkt_not_processed); 412dbed73cbSSangeeta Misra ILB_R_KSTAT_UPDATE(rule, bytes_not_processed, len); 413dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 414dbed73cbSSangeeta Misra rule = NULL; 415dbed73cbSSangeeta Misra break; 416dbed73cbSSangeeta Misra } else if (rule->ir_flags & ILB_RULE_BUSY) { 417dbed73cbSSangeeta Misra /* 418dbed73cbSSangeeta Misra * If we are busy... 419dbed73cbSSangeeta Misra * 420dbed73cbSSangeeta Misra * XXX we should have a queue to postpone the 421dbed73cbSSangeeta Misra * packet processing. But this requires a 422dbed73cbSSangeeta Misra * mechanism in IP to re-start the packet 423dbed73cbSSangeeta Misra * processing. So for now, just drop the packet. 424dbed73cbSSangeeta Misra */ 425dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, pkt_dropped); 426dbed73cbSSangeeta Misra ILB_R_KSTAT_UPDATE(rule, bytes_dropped, len); 427dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 428dbed73cbSSangeeta Misra *busy = B_TRUE; 429dbed73cbSSangeeta Misra rule = NULL; 430dbed73cbSSangeeta Misra break; 431dbed73cbSSangeeta Misra } else { 432dbed73cbSSangeeta Misra rule->ir_refcnt++; 433dbed73cbSSangeeta Misra ASSERT(rule->ir_refcnt != 1); 434dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 435dbed73cbSSangeeta Misra break; 436dbed73cbSSangeeta Misra } 437dbed73cbSSangeeta Misra } 438dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock); 439dbed73cbSSangeeta Misra return (rule); 440dbed73cbSSangeeta Misra } 441dbed73cbSSangeeta Misra 442dbed73cbSSangeeta Misra /* 443dbed73cbSSangeeta Misra * Add a rule to the global rule list. This list is for finding all rules 444dbed73cbSSangeeta Misra * in an IP stack. The caller is assumed to hold the ilbs_g_lock. 445dbed73cbSSangeeta Misra */ 446dbed73cbSSangeeta Misra static void 447dbed73cbSSangeeta Misra ilb_rule_g_add(ilb_stack_t *ilbs, ilb_rule_t *rule) 448dbed73cbSSangeeta Misra { 449dbed73cbSSangeeta Misra ASSERT(mutex_owned(&ilbs->ilbs_g_lock)); 450dbed73cbSSangeeta Misra rule->ir_next = ilbs->ilbs_rule_head; 451dbed73cbSSangeeta Misra ilbs->ilbs_rule_head = rule; 452dbed73cbSSangeeta Misra ILB_KSTAT_UPDATE(ilbs, num_rules, 1); 453dbed73cbSSangeeta Misra } 454dbed73cbSSangeeta Misra 455dbed73cbSSangeeta Misra /* The call is assumed to hold the ilbs_g_lock. */ 456dbed73cbSSangeeta Misra static void 457dbed73cbSSangeeta Misra ilb_rule_g_del(ilb_stack_t *ilbs, ilb_rule_t *rule) 458dbed73cbSSangeeta Misra { 459dbed73cbSSangeeta Misra ilb_rule_t *tmp_rule; 460dbed73cbSSangeeta Misra ilb_rule_t *prev_rule; 461dbed73cbSSangeeta Misra 462dbed73cbSSangeeta Misra ASSERT(mutex_owned(&ilbs->ilbs_g_lock)); 463dbed73cbSSangeeta Misra prev_rule = NULL; 464dbed73cbSSangeeta Misra for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL; 465dbed73cbSSangeeta Misra prev_rule = tmp_rule, tmp_rule = tmp_rule->ir_next) { 466dbed73cbSSangeeta Misra if (tmp_rule == rule) 467dbed73cbSSangeeta Misra break; 468dbed73cbSSangeeta Misra } 469dbed73cbSSangeeta Misra if (tmp_rule == NULL) { 470dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 471dbed73cbSSangeeta Misra return; 472dbed73cbSSangeeta Misra } 473dbed73cbSSangeeta Misra if (prev_rule == NULL) 474dbed73cbSSangeeta Misra ilbs->ilbs_rule_head = tmp_rule->ir_next; 475dbed73cbSSangeeta Misra else 476dbed73cbSSangeeta Misra prev_rule->ir_next = tmp_rule->ir_next; 477dbed73cbSSangeeta Misra ILB_KSTAT_UPDATE(ilbs, num_rules, -1); 478dbed73cbSSangeeta Misra } 479dbed73cbSSangeeta Misra 480dbed73cbSSangeeta Misra /* 481dbed73cbSSangeeta Misra * Helper routine to calculate how many source addresses are in a given 482dbed73cbSSangeeta Misra * range. 483dbed73cbSSangeeta Misra */ 484dbed73cbSSangeeta Misra static int64_t 485dbed73cbSSangeeta Misra num_nat_src_v6(const in6_addr_t *a1, const in6_addr_t *a2) 486dbed73cbSSangeeta Misra { 487dbed73cbSSangeeta Misra int64_t ret; 488dbed73cbSSangeeta Misra uint32_t addr1, addr2; 489dbed73cbSSangeeta Misra 490dbed73cbSSangeeta Misra /* 491dbed73cbSSangeeta Misra * Here we assume that the max number of NAT source cannot be 492dbed73cbSSangeeta Misra * large such that the most significant 2 s6_addr32 must be 493dbed73cbSSangeeta Misra * equal. 494dbed73cbSSangeeta Misra */ 495dbed73cbSSangeeta Misra addr1 = ntohl(a1->s6_addr32[3]); 496dbed73cbSSangeeta Misra addr2 = ntohl(a2->s6_addr32[3]); 497dbed73cbSSangeeta Misra if (a1->s6_addr32[0] != a2->s6_addr32[0] || 498dbed73cbSSangeeta Misra a1->s6_addr32[1] != a2->s6_addr32[1] || 499dbed73cbSSangeeta Misra a1->s6_addr32[2] > a2->s6_addr32[2] || 500dbed73cbSSangeeta Misra (a1->s6_addr32[2] == a2->s6_addr32[2] && addr1 > addr2)) { 501dbed73cbSSangeeta Misra return (-1); 502dbed73cbSSangeeta Misra } 503dbed73cbSSangeeta Misra if (a1->s6_addr32[2] == a2->s6_addr32[2]) { 504dbed73cbSSangeeta Misra return (addr2 - addr1 + 1); 505dbed73cbSSangeeta Misra } else { 506dbed73cbSSangeeta Misra ret = (ntohl(a2->s6_addr32[2]) - ntohl(a1->s6_addr32[2])); 507dbed73cbSSangeeta Misra ret <<= 32; 508dbed73cbSSangeeta Misra ret = ret + addr1 - addr2; 509dbed73cbSSangeeta Misra return (ret + 1); 510dbed73cbSSangeeta Misra } 511dbed73cbSSangeeta Misra } 512dbed73cbSSangeeta Misra 513dbed73cbSSangeeta Misra /* 514dbed73cbSSangeeta Misra * Add an ILB rule. 515dbed73cbSSangeeta Misra */ 516dbed73cbSSangeeta Misra int 517dbed73cbSSangeeta Misra ilb_rule_add(ilb_stack_t *ilbs, zoneid_t zoneid, const ilb_rule_cmd_t *cmd) 518dbed73cbSSangeeta Misra { 519dbed73cbSSangeeta Misra ilb_rule_t *rule; 520dbed73cbSSangeeta Misra netstackid_t stackid; 521dbed73cbSSangeeta Misra int ret; 522dbed73cbSSangeeta Misra in_port_t min_port, max_port; 523dbed73cbSSangeeta Misra int64_t num_src; 524dbed73cbSSangeeta Misra 525dbed73cbSSangeeta Misra /* Sanity checks. */ 526dbed73cbSSangeeta Misra if (cmd->ip_ver != IPPROTO_IP && cmd->ip_ver != IPPROTO_IPV6) 527dbed73cbSSangeeta Misra return (EINVAL); 528dbed73cbSSangeeta Misra 529dbed73cbSSangeeta Misra /* Need to support SCTP... */ 530dbed73cbSSangeeta Misra if (cmd->proto != IPPROTO_TCP && cmd->proto != IPPROTO_UDP) 531dbed73cbSSangeeta Misra return (EINVAL); 532dbed73cbSSangeeta Misra 533dbed73cbSSangeeta Misra /* For full NAT, the NAT source must be supplied. */ 534dbed73cbSSangeeta Misra if (cmd->topo == ILB_TOPO_IMPL_NAT) { 535dbed73cbSSangeeta Misra if (IS_ADDR_UNSPEC(&cmd->nat_src_start) || 536dbed73cbSSangeeta Misra IS_ADDR_UNSPEC(&cmd->nat_src_end)) { 537dbed73cbSSangeeta Misra return (EINVAL); 538dbed73cbSSangeeta Misra } 539dbed73cbSSangeeta Misra } 540dbed73cbSSangeeta Misra 541dbed73cbSSangeeta Misra /* Check invalid mask */ 542dbed73cbSSangeeta Misra if ((cmd->flags & ILB_RULE_STICKY) && 543dbed73cbSSangeeta Misra IS_ADDR_UNSPEC(&cmd->sticky_mask)) { 544dbed73cbSSangeeta Misra return (EINVAL); 545dbed73cbSSangeeta Misra } 546dbed73cbSSangeeta Misra 547dbed73cbSSangeeta Misra /* Port is passed in network byte order. */ 548dbed73cbSSangeeta Misra min_port = ntohs(cmd->min_port); 549dbed73cbSSangeeta Misra max_port = ntohs(cmd->max_port); 550dbed73cbSSangeeta Misra if (min_port > max_port) 551dbed73cbSSangeeta Misra return (EINVAL); 552dbed73cbSSangeeta Misra 553dbed73cbSSangeeta Misra /* min_port == 0 means "all ports". Make it so */ 554dbed73cbSSangeeta Misra if (min_port == 0) { 555dbed73cbSSangeeta Misra min_port = 1; 556dbed73cbSSangeeta Misra max_port = 65535; 557dbed73cbSSangeeta Misra } 558dbed73cbSSangeeta Misra 559dbed73cbSSangeeta Misra /* Funny address checking. */ 560dbed73cbSSangeeta Misra if (cmd->ip_ver == IPPROTO_IP) { 561dbed73cbSSangeeta Misra in_addr_t v4_addr1, v4_addr2; 562dbed73cbSSangeeta Misra 563dbed73cbSSangeeta Misra v4_addr1 = cmd->vip.s6_addr32[3]; 564dbed73cbSSangeeta Misra if ((*(uchar_t *)&v4_addr1) == IN_LOOPBACKNET || 565dbed73cbSSangeeta Misra CLASSD(v4_addr1) || v4_addr1 == INADDR_BROADCAST || 566dbed73cbSSangeeta Misra v4_addr1 == INADDR_ANY || 567dbed73cbSSangeeta Misra !IN6_IS_ADDR_V4MAPPED(&cmd->vip)) { 568dbed73cbSSangeeta Misra return (EINVAL); 569dbed73cbSSangeeta Misra } 570dbed73cbSSangeeta Misra 571dbed73cbSSangeeta Misra if (cmd->topo == ILB_TOPO_IMPL_NAT) { 572dbed73cbSSangeeta Misra v4_addr1 = ntohl(cmd->nat_src_start.s6_addr32[3]); 573dbed73cbSSangeeta Misra v4_addr2 = ntohl(cmd->nat_src_end.s6_addr32[3]); 574dbed73cbSSangeeta Misra if ((*(uchar_t *)&v4_addr1) == IN_LOOPBACKNET || 575dbed73cbSSangeeta Misra (*(uchar_t *)&v4_addr2) == IN_LOOPBACKNET || 576dbed73cbSSangeeta Misra v4_addr1 == INADDR_BROADCAST || 577dbed73cbSSangeeta Misra v4_addr2 == INADDR_BROADCAST || 578dbed73cbSSangeeta Misra v4_addr1 == INADDR_ANY || v4_addr2 == INADDR_ANY || 579dbed73cbSSangeeta Misra CLASSD(v4_addr1) || CLASSD(v4_addr2) || 580dbed73cbSSangeeta Misra !IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_start) || 581dbed73cbSSangeeta Misra !IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_end)) { 582dbed73cbSSangeeta Misra return (EINVAL); 583dbed73cbSSangeeta Misra } 584dbed73cbSSangeeta Misra 585dbed73cbSSangeeta Misra num_src = v4_addr2 - v4_addr1 + 1; 586dbed73cbSSangeeta Misra if (v4_addr1 > v4_addr2 || num_src > ILB_MAX_NAT_SRC) 587dbed73cbSSangeeta Misra return (EINVAL); 588dbed73cbSSangeeta Misra } 589dbed73cbSSangeeta Misra } else { 590dbed73cbSSangeeta Misra if (IN6_IS_ADDR_LOOPBACK(&cmd->vip) || 591dbed73cbSSangeeta Misra IN6_IS_ADDR_MULTICAST(&cmd->vip) || 592dbed73cbSSangeeta Misra IN6_IS_ADDR_UNSPECIFIED(&cmd->vip) || 593dbed73cbSSangeeta Misra IN6_IS_ADDR_V4MAPPED(&cmd->vip)) { 594dbed73cbSSangeeta Misra return (EINVAL); 595dbed73cbSSangeeta Misra } 596dbed73cbSSangeeta Misra 597dbed73cbSSangeeta Misra if (cmd->topo == ILB_TOPO_IMPL_NAT) { 598dbed73cbSSangeeta Misra if (IN6_IS_ADDR_LOOPBACK(&cmd->nat_src_start) || 599dbed73cbSSangeeta Misra IN6_IS_ADDR_LOOPBACK(&cmd->nat_src_end) || 600dbed73cbSSangeeta Misra IN6_IS_ADDR_MULTICAST(&cmd->nat_src_start) || 601dbed73cbSSangeeta Misra IN6_IS_ADDR_MULTICAST(&cmd->nat_src_end) || 602dbed73cbSSangeeta Misra IN6_IS_ADDR_UNSPECIFIED(&cmd->nat_src_start) || 603dbed73cbSSangeeta Misra IN6_IS_ADDR_UNSPECIFIED(&cmd->nat_src_end) || 604dbed73cbSSangeeta Misra IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_start) || 605dbed73cbSSangeeta Misra IN6_IS_ADDR_V4MAPPED(&cmd->nat_src_end)) { 606dbed73cbSSangeeta Misra return (EINVAL); 607dbed73cbSSangeeta Misra } 608dbed73cbSSangeeta Misra 609dbed73cbSSangeeta Misra if ((num_src = num_nat_src_v6(&cmd->nat_src_start, 610dbed73cbSSangeeta Misra &cmd->nat_src_end)) < 0 || 611dbed73cbSSangeeta Misra num_src > ILB_MAX_NAT_SRC) { 612dbed73cbSSangeeta Misra return (EINVAL); 613dbed73cbSSangeeta Misra } 614dbed73cbSSangeeta Misra } 615dbed73cbSSangeeta Misra } 616dbed73cbSSangeeta Misra 617dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_lock); 618dbed73cbSSangeeta Misra if (ilbs->ilbs_g_hash == NULL) 619dbed73cbSSangeeta Misra ilb_rule_hash_init(ilbs); 620dbed73cbSSangeeta Misra if (ilbs->ilbs_c2s_conn_hash == NULL) { 621dbed73cbSSangeeta Misra ASSERT(ilbs->ilbs_s2c_conn_hash == NULL); 622dbed73cbSSangeeta Misra ilb_conn_hash_init(ilbs); 623dbed73cbSSangeeta Misra ilb_nat_src_init(ilbs); 624dbed73cbSSangeeta Misra } 625dbed73cbSSangeeta Misra 626dbed73cbSSangeeta Misra /* Make sure that the new rule does not duplicate an existing one. */ 627dbed73cbSSangeeta Misra if (ilb_match_rule(ilbs, zoneid, cmd->name, cmd->ip_ver, cmd->proto, 628dbed73cbSSangeeta Misra min_port, max_port, &cmd->vip)) { 629dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 630dbed73cbSSangeeta Misra return (EEXIST); 631dbed73cbSSangeeta Misra } 632dbed73cbSSangeeta Misra 633dbed73cbSSangeeta Misra rule = kmem_zalloc(sizeof (ilb_rule_t), KM_NOSLEEP); 634dbed73cbSSangeeta Misra if (rule == NULL) { 635dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 636dbed73cbSSangeeta Misra return (ENOMEM); 637dbed73cbSSangeeta Misra } 638dbed73cbSSangeeta Misra 639dbed73cbSSangeeta Misra /* ir_name is all 0 to begin with */ 640dbed73cbSSangeeta Misra (void) memcpy(rule->ir_name, cmd->name, ILB_RULE_NAMESZ - 1); 641dbed73cbSSangeeta Misra 6421a5e258fSJosef 'Jeff' Sipek rule->ir_ks_instance = atomic_inc_uint_nv(&ilb_kstat_instance); 643dbed73cbSSangeeta Misra stackid = (netstackid_t)(uintptr_t)ilbs->ilbs_ksp->ks_private; 644dbed73cbSSangeeta Misra if ((rule->ir_ksp = ilb_rule_kstat_init(stackid, rule)) == NULL) { 645dbed73cbSSangeeta Misra ret = ENOMEM; 646dbed73cbSSangeeta Misra goto error; 647dbed73cbSSangeeta Misra } 648dbed73cbSSangeeta Misra 649dbed73cbSSangeeta Misra if (cmd->topo == ILB_TOPO_IMPL_NAT) { 650dbed73cbSSangeeta Misra rule->ir_nat_src_start = cmd->nat_src_start; 651dbed73cbSSangeeta Misra rule->ir_nat_src_end = cmd->nat_src_end; 652dbed73cbSSangeeta Misra } 653dbed73cbSSangeeta Misra 654dbed73cbSSangeeta Misra rule->ir_ipver = cmd->ip_ver; 655dbed73cbSSangeeta Misra rule->ir_proto = cmd->proto; 656dbed73cbSSangeeta Misra rule->ir_topo = cmd->topo; 657dbed73cbSSangeeta Misra 658dbed73cbSSangeeta Misra rule->ir_min_port = min_port; 659dbed73cbSSangeeta Misra rule->ir_max_port = max_port; 660dbed73cbSSangeeta Misra if (rule->ir_min_port != rule->ir_max_port) 661dbed73cbSSangeeta Misra rule->ir_port_range = B_TRUE; 662dbed73cbSSangeeta Misra else 663dbed73cbSSangeeta Misra rule->ir_port_range = B_FALSE; 664dbed73cbSSangeeta Misra 665dbed73cbSSangeeta Misra rule->ir_zoneid = zoneid; 666dbed73cbSSangeeta Misra 667dbed73cbSSangeeta Misra rule->ir_target_v6 = cmd->vip; 668dbed73cbSSangeeta Misra rule->ir_servers = NULL; 669dbed73cbSSangeeta Misra 670dbed73cbSSangeeta Misra /* 671dbed73cbSSangeeta Misra * The default connection drain timeout is indefinite (value 0), 672dbed73cbSSangeeta Misra * meaning we will wait for all connections to finish. So we 673dbed73cbSSangeeta Misra * can assign cmd->conn_drain_timeout to it directly. 674dbed73cbSSangeeta Misra */ 675dbed73cbSSangeeta Misra rule->ir_conn_drain_timeout = cmd->conn_drain_timeout; 676dbed73cbSSangeeta Misra if (cmd->nat_expiry != 0) { 677dbed73cbSSangeeta Misra rule->ir_nat_expiry = cmd->nat_expiry; 678dbed73cbSSangeeta Misra } else { 679dbed73cbSSangeeta Misra switch (rule->ir_proto) { 680dbed73cbSSangeeta Misra case IPPROTO_TCP: 681dbed73cbSSangeeta Misra rule->ir_nat_expiry = ilb_conn_tcp_expiry; 682dbed73cbSSangeeta Misra break; 683dbed73cbSSangeeta Misra case IPPROTO_UDP: 684dbed73cbSSangeeta Misra rule->ir_nat_expiry = ilb_conn_udp_expiry; 685dbed73cbSSangeeta Misra break; 686dbed73cbSSangeeta Misra default: 687dbed73cbSSangeeta Misra cmn_err(CE_PANIC, "data corruption: wrong ir_proto: %p", 688dbed73cbSSangeeta Misra (void *)rule); 689dbed73cbSSangeeta Misra break; 690dbed73cbSSangeeta Misra } 691dbed73cbSSangeeta Misra } 692dbed73cbSSangeeta Misra if (cmd->sticky_expiry != 0) 693dbed73cbSSangeeta Misra rule->ir_sticky_expiry = cmd->sticky_expiry; 694dbed73cbSSangeeta Misra else 695dbed73cbSSangeeta Misra rule->ir_sticky_expiry = ilb_sticky_expiry; 696dbed73cbSSangeeta Misra 697dbed73cbSSangeeta Misra if (cmd->flags & ILB_RULE_STICKY) { 698dbed73cbSSangeeta Misra rule->ir_flags |= ILB_RULE_STICKY; 699dbed73cbSSangeeta Misra rule->ir_sticky_mask = cmd->sticky_mask; 700dbed73cbSSangeeta Misra if (ilbs->ilbs_sticky_hash == NULL) 701dbed73cbSSangeeta Misra ilb_sticky_hash_init(ilbs); 702dbed73cbSSangeeta Misra } 703dbed73cbSSangeeta Misra if (cmd->flags & ILB_RULE_ENABLED) 704dbed73cbSSangeeta Misra rule->ir_flags |= ILB_RULE_ENABLED; 705dbed73cbSSangeeta Misra 706dbed73cbSSangeeta Misra mutex_init(&rule->ir_lock, NULL, MUTEX_DEFAULT, NULL); 707dbed73cbSSangeeta Misra cv_init(&rule->ir_cv, NULL, CV_DEFAULT, NULL); 708dbed73cbSSangeeta Misra 709dbed73cbSSangeeta Misra rule->ir_refcnt = 1; 710dbed73cbSSangeeta Misra 711dbed73cbSSangeeta Misra switch (cmd->algo) { 712dbed73cbSSangeeta Misra case ILB_ALG_IMPL_ROUNDROBIN: 713dbed73cbSSangeeta Misra if ((rule->ir_alg = ilb_alg_rr_init(rule, NULL)) == NULL) { 714dbed73cbSSangeeta Misra ret = ENOMEM; 715dbed73cbSSangeeta Misra goto error; 716dbed73cbSSangeeta Misra } 717dbed73cbSSangeeta Misra rule->ir_alg_type = ILB_ALG_IMPL_ROUNDROBIN; 718dbed73cbSSangeeta Misra break; 719dbed73cbSSangeeta Misra case ILB_ALG_IMPL_HASH_IP: 720dbed73cbSSangeeta Misra case ILB_ALG_IMPL_HASH_IP_SPORT: 721dbed73cbSSangeeta Misra case ILB_ALG_IMPL_HASH_IP_VIP: 722dbed73cbSSangeeta Misra if ((rule->ir_alg = ilb_alg_hash_init(rule, 723dbed73cbSSangeeta Misra &cmd->algo)) == NULL) { 724dbed73cbSSangeeta Misra ret = ENOMEM; 725dbed73cbSSangeeta Misra goto error; 726dbed73cbSSangeeta Misra } 727dbed73cbSSangeeta Misra rule->ir_alg_type = cmd->algo; 728dbed73cbSSangeeta Misra break; 729dbed73cbSSangeeta Misra default: 730dbed73cbSSangeeta Misra ret = EINVAL; 731dbed73cbSSangeeta Misra goto error; 732dbed73cbSSangeeta Misra } 733dbed73cbSSangeeta Misra 734dbed73cbSSangeeta Misra /* Add it to the global list and hash array at the end. */ 735dbed73cbSSangeeta Misra ilb_rule_g_add(ilbs, rule); 736dbed73cbSSangeeta Misra ilb_rule_hash_add(ilbs, rule, &cmd->vip); 737dbed73cbSSangeeta Misra 738dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 739dbed73cbSSangeeta Misra 740dbed73cbSSangeeta Misra return (0); 741dbed73cbSSangeeta Misra 742dbed73cbSSangeeta Misra error: 743dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 744dbed73cbSSangeeta Misra if (rule->ir_ksp != NULL) { 745dbed73cbSSangeeta Misra /* stackid must be initialized if ir_ksp != NULL */ 746dbed73cbSSangeeta Misra kstat_delete_netstack(rule->ir_ksp, stackid); 747dbed73cbSSangeeta Misra } 748dbed73cbSSangeeta Misra kmem_free(rule, sizeof (ilb_rule_t)); 749dbed73cbSSangeeta Misra return (ret); 750dbed73cbSSangeeta Misra } 751dbed73cbSSangeeta Misra 752dbed73cbSSangeeta Misra /* 753dbed73cbSSangeeta Misra * The final part in deleting a rule. Either called directly or by the 754dbed73cbSSangeeta Misra * taskq dispatched. 755dbed73cbSSangeeta Misra */ 756dbed73cbSSangeeta Misra static void 757dbed73cbSSangeeta Misra ilb_rule_del_common(ilb_stack_t *ilbs, ilb_rule_t *tmp_rule) 758dbed73cbSSangeeta Misra { 759dbed73cbSSangeeta Misra netstackid_t stackid; 760dbed73cbSSangeeta Misra ilb_server_t *server; 761dbed73cbSSangeeta Misra 762dbed73cbSSangeeta Misra stackid = (netstackid_t)(uintptr_t)ilbs->ilbs_ksp->ks_private; 763dbed73cbSSangeeta Misra 764dbed73cbSSangeeta Misra /* 765dbed73cbSSangeeta Misra * Let the algorithm know that the rule is going away. The 766dbed73cbSSangeeta Misra * algorithm fini routine will free all its resources with this 767dbed73cbSSangeeta Misra * rule. 768dbed73cbSSangeeta Misra */ 769dbed73cbSSangeeta Misra tmp_rule->ir_alg->ilb_alg_fini(&tmp_rule->ir_alg); 770dbed73cbSSangeeta Misra 771dbed73cbSSangeeta Misra while ((server = tmp_rule->ir_servers) != NULL) { 772dbed73cbSSangeeta Misra mutex_enter(&server->iser_lock); 773dbed73cbSSangeeta Misra ilb_destroy_nat_src(&server->iser_nat_src); 774dbed73cbSSangeeta Misra if (tmp_rule->ir_conn_drain_timeout != 0) { 775dbed73cbSSangeeta Misra /* 776dbed73cbSSangeeta Misra * The garbage collection thread checks this value 777dbed73cbSSangeeta Misra * without grabing a lock. So we need to use 778dbed73cbSSangeeta Misra * atomic_swap_64() to make sure that the value seen 779dbed73cbSSangeeta Misra * by gc thread is intact. 780dbed73cbSSangeeta Misra */ 781dbed73cbSSangeeta Misra (void) atomic_swap_64( 782d3d50737SRafael Vanoni (uint64_t *)&server->iser_die_time, 783d3d50737SRafael Vanoni ddi_get_lbolt64() + 784dbed73cbSSangeeta Misra SEC_TO_TICK(tmp_rule->ir_conn_drain_timeout)); 785dbed73cbSSangeeta Misra } 786dbed73cbSSangeeta Misra while (server->iser_refcnt > 1) 787dbed73cbSSangeeta Misra cv_wait(&server->iser_cv, &server->iser_lock); 788dbed73cbSSangeeta Misra tmp_rule->ir_servers = server->iser_next; 789dbed73cbSSangeeta Misra kstat_delete_netstack(server->iser_ksp, stackid); 790dbed73cbSSangeeta Misra kmem_free(server, sizeof (ilb_server_t)); 791dbed73cbSSangeeta Misra } 792dbed73cbSSangeeta Misra 793dbed73cbSSangeeta Misra ASSERT(tmp_rule->ir_ksp != NULL); 794dbed73cbSSangeeta Misra kstat_delete_netstack(tmp_rule->ir_ksp, stackid); 795dbed73cbSSangeeta Misra 796dbed73cbSSangeeta Misra kmem_free(tmp_rule, sizeof (ilb_rule_t)); 797dbed73cbSSangeeta Misra } 798dbed73cbSSangeeta Misra 799dbed73cbSSangeeta Misra /* The routine executed by the delayed rule taskq. */ 800dbed73cbSSangeeta Misra static void 801dbed73cbSSangeeta Misra ilb_rule_del_tq(void *arg) 802dbed73cbSSangeeta Misra { 803dbed73cbSSangeeta Misra ilb_stack_t *ilbs = ((ilb_rule_tq_t *)arg)->ilbs; 804dbed73cbSSangeeta Misra ilb_rule_t *rule = ((ilb_rule_tq_t *)arg)->rule; 805dbed73cbSSangeeta Misra 806dbed73cbSSangeeta Misra mutex_enter(&rule->ir_lock); 807dbed73cbSSangeeta Misra while (rule->ir_refcnt > 1) 808dbed73cbSSangeeta Misra cv_wait(&rule->ir_cv, &rule->ir_lock); 809dbed73cbSSangeeta Misra ilb_rule_del_common(ilbs, rule); 810dbed73cbSSangeeta Misra kmem_free(arg, sizeof (ilb_rule_tq_t)); 811dbed73cbSSangeeta Misra } 812dbed73cbSSangeeta Misra 813dbed73cbSSangeeta Misra /* Routine to delete a rule. */ 814dbed73cbSSangeeta Misra int 815dbed73cbSSangeeta Misra ilb_rule_del(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name) 816dbed73cbSSangeeta Misra { 817dbed73cbSSangeeta Misra ilb_rule_t *tmp_rule; 818dbed73cbSSangeeta Misra ilb_rule_tq_t *arg; 819dbed73cbSSangeeta Misra int err; 820dbed73cbSSangeeta Misra 821dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_lock); 822dbed73cbSSangeeta Misra if ((tmp_rule = ilb_find_rule_locked(ilbs, zoneid, name, 823dbed73cbSSangeeta Misra &err)) == NULL) { 824dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 825dbed73cbSSangeeta Misra return (err); 826dbed73cbSSangeeta Misra } 827dbed73cbSSangeeta Misra 828dbed73cbSSangeeta Misra /* 829dbed73cbSSangeeta Misra * First remove the rule from the hash array and the global list so 830dbed73cbSSangeeta Misra * that no one can find this rule any more. 831dbed73cbSSangeeta Misra */ 832dbed73cbSSangeeta Misra ilb_rule_hash_del(tmp_rule); 833dbed73cbSSangeeta Misra ilb_rule_g_del(ilbs, tmp_rule); 834dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 835dbed73cbSSangeeta Misra ILB_RULE_REFRELE(tmp_rule); 836dbed73cbSSangeeta Misra 837dbed73cbSSangeeta Misra /* 838dbed73cbSSangeeta Misra * Now no one can find this rule, we can remove it once all 839dbed73cbSSangeeta Misra * references to it are dropped and all references to the list 840dbed73cbSSangeeta Misra * of servers are dropped. So dispatch a task to finish the deletion. 841dbed73cbSSangeeta Misra * We do this instead of letting the last one referencing the 842dbed73cbSSangeeta Misra * rule do it. The reason is that the last one may be the 843dbed73cbSSangeeta Misra * interrupt thread. We want to minimize the work it needs to 844dbed73cbSSangeeta Misra * do. Rule deletion is not a critical task so it can be delayed. 845dbed73cbSSangeeta Misra */ 846dbed73cbSSangeeta Misra arg = kmem_alloc(sizeof (ilb_rule_tq_t), KM_SLEEP); 847dbed73cbSSangeeta Misra arg->ilbs = ilbs; 848dbed73cbSSangeeta Misra arg->rule = tmp_rule; 849dbed73cbSSangeeta Misra (void) taskq_dispatch(ilbs->ilbs_rule_taskq, ilb_rule_del_tq, arg, 850dbed73cbSSangeeta Misra TQ_SLEEP); 851dbed73cbSSangeeta Misra 852dbed73cbSSangeeta Misra return (0); 853dbed73cbSSangeeta Misra } 854dbed73cbSSangeeta Misra 855dbed73cbSSangeeta Misra /* 856dbed73cbSSangeeta Misra * Given an IP address, check to see if there is a rule using this 857dbed73cbSSangeeta Misra * as the VIP. It can be used to check if we need to drop a fragment. 858dbed73cbSSangeeta Misra */ 859dbed73cbSSangeeta Misra boolean_t 860dbed73cbSSangeeta Misra ilb_rule_match_vip_v6(ilb_stack_t *ilbs, in6_addr_t *vip, ilb_rule_t **ret_rule) 861dbed73cbSSangeeta Misra { 862dbed73cbSSangeeta Misra int i; 863dbed73cbSSangeeta Misra ilb_rule_t *rule; 864dbed73cbSSangeeta Misra boolean_t ret = B_FALSE; 865dbed73cbSSangeeta Misra 866dbed73cbSSangeeta Misra i = ILB_RULE_HASH((uint8_t *)&vip->s6_addr32[3], 867dbed73cbSSangeeta Misra ilbs->ilbs_rule_hash_size); 868dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock); 869dbed73cbSSangeeta Misra for (rule = ilbs->ilbs_g_hash[i].ilb_hash_rule; rule != NULL; 870dbed73cbSSangeeta Misra rule = rule->ir_hash_next) { 871dbed73cbSSangeeta Misra if (IN6_ARE_ADDR_EQUAL(vip, &rule->ir_target_v6)) { 872dbed73cbSSangeeta Misra mutex_enter(&rule->ir_lock); 873dbed73cbSSangeeta Misra if (rule->ir_flags & ILB_RULE_BUSY) { 874dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 875dbed73cbSSangeeta Misra break; 876dbed73cbSSangeeta Misra } 877dbed73cbSSangeeta Misra if (ret_rule != NULL) { 878dbed73cbSSangeeta Misra rule->ir_refcnt++; 879dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 880dbed73cbSSangeeta Misra *ret_rule = rule; 881dbed73cbSSangeeta Misra } else { 882dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 883dbed73cbSSangeeta Misra } 884dbed73cbSSangeeta Misra ret = B_TRUE; 885dbed73cbSSangeeta Misra break; 886dbed73cbSSangeeta Misra } 887dbed73cbSSangeeta Misra } 888dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock); 889dbed73cbSSangeeta Misra return (ret); 890dbed73cbSSangeeta Misra } 891dbed73cbSSangeeta Misra 892dbed73cbSSangeeta Misra boolean_t 893dbed73cbSSangeeta Misra ilb_rule_match_vip_v4(ilb_stack_t *ilbs, ipaddr_t addr, ilb_rule_t **ret_rule) 894dbed73cbSSangeeta Misra { 895dbed73cbSSangeeta Misra int i; 896dbed73cbSSangeeta Misra ilb_rule_t *rule; 897dbed73cbSSangeeta Misra boolean_t ret = B_FALSE; 898dbed73cbSSangeeta Misra 899dbed73cbSSangeeta Misra i = ILB_RULE_HASH((uint8_t *)&addr, ilbs->ilbs_rule_hash_size); 900dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_hash[i].ilb_hash_lock); 901dbed73cbSSangeeta Misra for (rule = ilbs->ilbs_g_hash[i].ilb_hash_rule; rule != NULL; 902dbed73cbSSangeeta Misra rule = rule->ir_hash_next) { 903dbed73cbSSangeeta Misra if (rule->ir_target_v6.s6_addr32[3] == addr) { 904dbed73cbSSangeeta Misra mutex_enter(&rule->ir_lock); 905dbed73cbSSangeeta Misra if (rule->ir_flags & ILB_RULE_BUSY) { 906dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 907dbed73cbSSangeeta Misra break; 908dbed73cbSSangeeta Misra } 909dbed73cbSSangeeta Misra if (ret_rule != NULL) { 910dbed73cbSSangeeta Misra rule->ir_refcnt++; 911dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 912dbed73cbSSangeeta Misra *ret_rule = rule; 913dbed73cbSSangeeta Misra } else { 914dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 915dbed73cbSSangeeta Misra } 916dbed73cbSSangeeta Misra ret = B_TRUE; 917dbed73cbSSangeeta Misra break; 918dbed73cbSSangeeta Misra } 919dbed73cbSSangeeta Misra } 920dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_hash[i].ilb_hash_lock); 921dbed73cbSSangeeta Misra return (ret); 922dbed73cbSSangeeta Misra } 923dbed73cbSSangeeta Misra 924dbed73cbSSangeeta Misra static ilb_rule_t * 925dbed73cbSSangeeta Misra ilb_find_rule_locked(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, 926dbed73cbSSangeeta Misra int *err) 927dbed73cbSSangeeta Misra { 928dbed73cbSSangeeta Misra ilb_rule_t *tmp_rule; 929dbed73cbSSangeeta Misra 930dbed73cbSSangeeta Misra ASSERT(mutex_owned(&ilbs->ilbs_g_lock)); 931dbed73cbSSangeeta Misra 932dbed73cbSSangeeta Misra for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL; 933dbed73cbSSangeeta Misra tmp_rule = tmp_rule->ir_next) { 934dbed73cbSSangeeta Misra if (tmp_rule->ir_zoneid != zoneid) 935dbed73cbSSangeeta Misra continue; 936dbed73cbSSangeeta Misra if (strcasecmp(tmp_rule->ir_name, name) == 0) { 937dbed73cbSSangeeta Misra mutex_enter(&tmp_rule->ir_lock); 938dbed73cbSSangeeta Misra if (tmp_rule->ir_flags & ILB_RULE_BUSY) { 939dbed73cbSSangeeta Misra mutex_exit(&tmp_rule->ir_lock); 940dbed73cbSSangeeta Misra *err = EINPROGRESS; 941dbed73cbSSangeeta Misra return (NULL); 942dbed73cbSSangeeta Misra } 943dbed73cbSSangeeta Misra tmp_rule->ir_refcnt++; 944dbed73cbSSangeeta Misra mutex_exit(&tmp_rule->ir_lock); 945dbed73cbSSangeeta Misra *err = 0; 946dbed73cbSSangeeta Misra return (tmp_rule); 947dbed73cbSSangeeta Misra } 948dbed73cbSSangeeta Misra } 949dbed73cbSSangeeta Misra *err = ENOENT; 950dbed73cbSSangeeta Misra return (NULL); 951dbed73cbSSangeeta Misra } 952dbed73cbSSangeeta Misra 953dbed73cbSSangeeta Misra /* To find a rule with a given name and zone in the global rule list. */ 954dbed73cbSSangeeta Misra ilb_rule_t * 955dbed73cbSSangeeta Misra ilb_find_rule(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, 956dbed73cbSSangeeta Misra int *err) 957dbed73cbSSangeeta Misra { 958dbed73cbSSangeeta Misra ilb_rule_t *tmp_rule; 959dbed73cbSSangeeta Misra 960dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_lock); 961dbed73cbSSangeeta Misra tmp_rule = ilb_find_rule_locked(ilbs, zoneid, name, err); 962dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 963dbed73cbSSangeeta Misra return (tmp_rule); 964dbed73cbSSangeeta Misra } 965dbed73cbSSangeeta Misra 966dbed73cbSSangeeta Misra /* Try to match the given packet info and zone ID with a rule. */ 967dbed73cbSSangeeta Misra static boolean_t 968dbed73cbSSangeeta Misra ilb_match_rule(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, int l3, 969dbed73cbSSangeeta Misra int l4, in_port_t min_port, in_port_t max_port, const in6_addr_t *addr) 970dbed73cbSSangeeta Misra { 971dbed73cbSSangeeta Misra ilb_rule_t *tmp_rule; 972dbed73cbSSangeeta Misra 973dbed73cbSSangeeta Misra ASSERT(mutex_owned(&ilbs->ilbs_g_lock)); 974dbed73cbSSangeeta Misra 975dbed73cbSSangeeta Misra for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL; 976dbed73cbSSangeeta Misra tmp_rule = tmp_rule->ir_next) { 977dbed73cbSSangeeta Misra if (tmp_rule->ir_zoneid != zoneid) 978dbed73cbSSangeeta Misra continue; 979dbed73cbSSangeeta Misra 980dbed73cbSSangeeta Misra /* 981dbed73cbSSangeeta Misra * We don't allow the same name in different rules even if all 982dbed73cbSSangeeta Misra * the other rule components are different. 983dbed73cbSSangeeta Misra */ 984dbed73cbSSangeeta Misra if (strcasecmp(tmp_rule->ir_name, name) == 0) 985dbed73cbSSangeeta Misra return (B_TRUE); 986dbed73cbSSangeeta Misra 987dbed73cbSSangeeta Misra if (tmp_rule->ir_ipver != l3 || tmp_rule->ir_proto != l4) 988dbed73cbSSangeeta Misra continue; 989dbed73cbSSangeeta Misra 990dbed73cbSSangeeta Misra /* 991dbed73cbSSangeeta Misra * ir_min_port and ir_max_port are the same if ir_port_range 992dbed73cbSSangeeta Misra * is false. In this case, if the ir_min|max_port (same) is 993dbed73cbSSangeeta Misra * outside of the given port range, it is OK. In other cases, 994dbed73cbSSangeeta Misra * check if min and max port are outside a rule's range. 995dbed73cbSSangeeta Misra */ 996dbed73cbSSangeeta Misra if (tmp_rule->ir_max_port < min_port || 997dbed73cbSSangeeta Misra tmp_rule->ir_min_port > max_port) { 998dbed73cbSSangeeta Misra continue; 999dbed73cbSSangeeta Misra } 1000dbed73cbSSangeeta Misra 1001dbed73cbSSangeeta Misra /* 1002dbed73cbSSangeeta Misra * If l3 is IPv4, the addr passed in is assumed to be 1003dbed73cbSSangeeta Misra * mapped address. 1004dbed73cbSSangeeta Misra */ 1005dbed73cbSSangeeta Misra if (V6_OR_V4_INADDR_ANY(*addr) || 1006dbed73cbSSangeeta Misra V6_OR_V4_INADDR_ANY(tmp_rule->ir_target_v6) || 1007dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(addr, &tmp_rule->ir_target_v6)) { 1008dbed73cbSSangeeta Misra return (B_TRUE); 1009dbed73cbSSangeeta Misra } 1010dbed73cbSSangeeta Misra } 1011dbed73cbSSangeeta Misra return (B_FALSE); 1012dbed73cbSSangeeta Misra } 1013dbed73cbSSangeeta Misra 1014dbed73cbSSangeeta Misra int 1015dbed73cbSSangeeta Misra ilb_rule_enable(ilb_stack_t *ilbs, zoneid_t zoneid, 1016dbed73cbSSangeeta Misra const char *rule_name, ilb_rule_t *in_rule) 1017dbed73cbSSangeeta Misra { 1018dbed73cbSSangeeta Misra ilb_rule_t *rule; 1019dbed73cbSSangeeta Misra int err; 1020dbed73cbSSangeeta Misra 1021dbed73cbSSangeeta Misra ASSERT((in_rule == NULL && rule_name != NULL) || 1022dbed73cbSSangeeta Misra (in_rule != NULL && rule_name == NULL)); 1023dbed73cbSSangeeta Misra if ((rule = in_rule) == NULL) { 1024dbed73cbSSangeeta Misra if ((rule = ilb_find_rule(ilbs, zoneid, rule_name, 1025dbed73cbSSangeeta Misra &err)) == NULL) { 1026dbed73cbSSangeeta Misra return (err); 1027dbed73cbSSangeeta Misra } 1028dbed73cbSSangeeta Misra } 1029dbed73cbSSangeeta Misra mutex_enter(&rule->ir_lock); 1030dbed73cbSSangeeta Misra rule->ir_flags |= ILB_RULE_ENABLED; 1031dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 1032dbed73cbSSangeeta Misra 1033dbed73cbSSangeeta Misra /* Only refrele if the rule is passed in. */ 1034dbed73cbSSangeeta Misra if (in_rule == NULL) 1035dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1036dbed73cbSSangeeta Misra return (0); 1037dbed73cbSSangeeta Misra } 1038dbed73cbSSangeeta Misra 1039dbed73cbSSangeeta Misra int 1040dbed73cbSSangeeta Misra ilb_rule_disable(ilb_stack_t *ilbs, zoneid_t zoneid, 1041dbed73cbSSangeeta Misra const char *rule_name, ilb_rule_t *in_rule) 1042dbed73cbSSangeeta Misra { 1043dbed73cbSSangeeta Misra ilb_rule_t *rule; 1044dbed73cbSSangeeta Misra int err; 1045dbed73cbSSangeeta Misra 1046dbed73cbSSangeeta Misra ASSERT((in_rule == NULL && rule_name != NULL) || 1047dbed73cbSSangeeta Misra (in_rule != NULL && rule_name == NULL)); 1048dbed73cbSSangeeta Misra if ((rule = in_rule) == NULL) { 1049dbed73cbSSangeeta Misra if ((rule = ilb_find_rule(ilbs, zoneid, rule_name, 1050dbed73cbSSangeeta Misra &err)) == NULL) { 1051dbed73cbSSangeeta Misra return (err); 1052dbed73cbSSangeeta Misra } 1053dbed73cbSSangeeta Misra } 1054dbed73cbSSangeeta Misra mutex_enter(&rule->ir_lock); 1055dbed73cbSSangeeta Misra rule->ir_flags &= ~ILB_RULE_ENABLED; 1056dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 1057dbed73cbSSangeeta Misra 1058dbed73cbSSangeeta Misra /* Only refrele if the rule is passed in. */ 1059dbed73cbSSangeeta Misra if (in_rule == NULL) 1060dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1061dbed73cbSSangeeta Misra return (0); 1062dbed73cbSSangeeta Misra } 1063dbed73cbSSangeeta Misra 1064dbed73cbSSangeeta Misra /* 1065dbed73cbSSangeeta Misra * XXX We should probably have a walker function to walk all rules. For 1066dbed73cbSSangeeta Misra * now, just add a simple loop for enable/disable/del. 1067dbed73cbSSangeeta Misra */ 1068dbed73cbSSangeeta Misra void 1069dbed73cbSSangeeta Misra ilb_rule_enable_all(ilb_stack_t *ilbs, zoneid_t zoneid) 1070dbed73cbSSangeeta Misra { 1071dbed73cbSSangeeta Misra ilb_rule_t *rule; 1072dbed73cbSSangeeta Misra 1073dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_lock); 1074dbed73cbSSangeeta Misra for (rule = ilbs->ilbs_rule_head; rule != NULL; rule = rule->ir_next) { 1075dbed73cbSSangeeta Misra if (rule->ir_zoneid != zoneid) 1076dbed73cbSSangeeta Misra continue; 1077dbed73cbSSangeeta Misra /* 1078dbed73cbSSangeeta Misra * No need to hold the rule as we are holding the global 1079dbed73cbSSangeeta Misra * lock so it won't go away. Ignore the return value here 1080dbed73cbSSangeeta Misra * as the rule is provided so the call cannot fail. 1081dbed73cbSSangeeta Misra */ 1082dbed73cbSSangeeta Misra (void) ilb_rule_enable(ilbs, zoneid, NULL, rule); 1083dbed73cbSSangeeta Misra } 1084dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 1085dbed73cbSSangeeta Misra } 1086dbed73cbSSangeeta Misra 1087dbed73cbSSangeeta Misra void 1088dbed73cbSSangeeta Misra ilb_rule_disable_all(ilb_stack_t *ilbs, zoneid_t zoneid) 1089dbed73cbSSangeeta Misra { 1090dbed73cbSSangeeta Misra ilb_rule_t *rule; 1091dbed73cbSSangeeta Misra 1092dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_lock); 1093dbed73cbSSangeeta Misra for (rule = ilbs->ilbs_rule_head; rule != NULL; 1094dbed73cbSSangeeta Misra rule = rule->ir_next) { 1095dbed73cbSSangeeta Misra if (rule->ir_zoneid != zoneid) 1096dbed73cbSSangeeta Misra continue; 1097dbed73cbSSangeeta Misra (void) ilb_rule_disable(ilbs, zoneid, NULL, rule); 1098dbed73cbSSangeeta Misra } 1099dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 1100dbed73cbSSangeeta Misra } 1101dbed73cbSSangeeta Misra 1102dbed73cbSSangeeta Misra void 1103dbed73cbSSangeeta Misra ilb_rule_del_all(ilb_stack_t *ilbs, zoneid_t zoneid) 1104dbed73cbSSangeeta Misra { 1105dbed73cbSSangeeta Misra ilb_rule_t *rule; 1106dbed73cbSSangeeta Misra ilb_rule_tq_t *arg; 1107dbed73cbSSangeeta Misra 1108dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_lock); 1109dbed73cbSSangeeta Misra while ((rule = ilbs->ilbs_rule_head) != NULL) { 1110dbed73cbSSangeeta Misra if (rule->ir_zoneid != zoneid) 1111dbed73cbSSangeeta Misra continue; 1112dbed73cbSSangeeta Misra ilb_rule_hash_del(rule); 1113dbed73cbSSangeeta Misra ilb_rule_g_del(ilbs, rule); 1114dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 1115dbed73cbSSangeeta Misra 1116dbed73cbSSangeeta Misra arg = kmem_alloc(sizeof (ilb_rule_tq_t), KM_SLEEP); 1117dbed73cbSSangeeta Misra arg->ilbs = ilbs; 1118dbed73cbSSangeeta Misra arg->rule = rule; 1119dbed73cbSSangeeta Misra (void) taskq_dispatch(ilbs->ilbs_rule_taskq, ilb_rule_del_tq, 1120dbed73cbSSangeeta Misra arg, TQ_SLEEP); 1121dbed73cbSSangeeta Misra 1122dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_lock); 1123dbed73cbSSangeeta Misra } 1124dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 1125dbed73cbSSangeeta Misra } 1126dbed73cbSSangeeta Misra 1127dbed73cbSSangeeta Misra /* 1128dbed73cbSSangeeta Misra * This is just an optimization, so don't grab the global lock. The 1129dbed73cbSSangeeta Misra * worst case is that we missed a couple packets. 1130dbed73cbSSangeeta Misra */ 1131dbed73cbSSangeeta Misra boolean_t 1132dbed73cbSSangeeta Misra ilb_has_rules(ilb_stack_t *ilbs) 1133dbed73cbSSangeeta Misra { 1134dbed73cbSSangeeta Misra return (ilbs->ilbs_rule_head != NULL); 1135dbed73cbSSangeeta Misra } 1136dbed73cbSSangeeta Misra 1137dbed73cbSSangeeta Misra 1138dbed73cbSSangeeta Misra static int 1139dbed73cbSSangeeta Misra ilb_server_toggle(ilb_stack_t *ilbs, zoneid_t zoneid, const char *rule_name, 1140dbed73cbSSangeeta Misra ilb_rule_t *rule, in6_addr_t *addr, boolean_t enable) 1141dbed73cbSSangeeta Misra { 1142dbed73cbSSangeeta Misra ilb_server_t *tmp_server; 1143dbed73cbSSangeeta Misra int ret; 1144dbed73cbSSangeeta Misra 1145dbed73cbSSangeeta Misra ASSERT((rule == NULL && rule_name != NULL) || 1146dbed73cbSSangeeta Misra (rule != NULL && rule_name == NULL)); 1147dbed73cbSSangeeta Misra 1148dbed73cbSSangeeta Misra if (rule == NULL) { 1149dbed73cbSSangeeta Misra if ((rule = ilb_find_rule(ilbs, zoneid, rule_name, 1150dbed73cbSSangeeta Misra &ret)) == NULL) { 1151dbed73cbSSangeeta Misra return (ret); 1152dbed73cbSSangeeta Misra } 1153dbed73cbSSangeeta Misra } 1154dbed73cbSSangeeta Misra 1155dbed73cbSSangeeta Misra /* Once we get a hold on the rule, no server can be added/deleted. */ 1156dbed73cbSSangeeta Misra for (tmp_server = rule->ir_servers; tmp_server != NULL; 1157dbed73cbSSangeeta Misra tmp_server = tmp_server->iser_next) { 1158dbed73cbSSangeeta Misra if (IN6_ARE_ADDR_EQUAL(&tmp_server->iser_addr_v6, addr)) 1159dbed73cbSSangeeta Misra break; 1160dbed73cbSSangeeta Misra } 1161dbed73cbSSangeeta Misra if (tmp_server == NULL) { 1162dbed73cbSSangeeta Misra ret = ENOENT; 1163dbed73cbSSangeeta Misra goto done; 1164dbed73cbSSangeeta Misra } 1165dbed73cbSSangeeta Misra 1166dbed73cbSSangeeta Misra if (enable) { 1167dbed73cbSSangeeta Misra ret = rule->ir_alg->ilb_alg_server_enable(tmp_server, 1168dbed73cbSSangeeta Misra rule->ir_alg->ilb_alg_data); 1169dbed73cbSSangeeta Misra if (ret == 0) { 1170dbed73cbSSangeeta Misra tmp_server->iser_enabled = B_TRUE; 1171dbed73cbSSangeeta Misra tmp_server->iser_die_time = 0; 1172dbed73cbSSangeeta Misra } 1173dbed73cbSSangeeta Misra } else { 1174dbed73cbSSangeeta Misra ret = rule->ir_alg->ilb_alg_server_disable(tmp_server, 1175dbed73cbSSangeeta Misra rule->ir_alg->ilb_alg_data); 1176dbed73cbSSangeeta Misra if (ret == 0) { 1177dbed73cbSSangeeta Misra tmp_server->iser_enabled = B_FALSE; 1178dbed73cbSSangeeta Misra if (rule->ir_conn_drain_timeout != 0) { 1179dbed73cbSSangeeta Misra (void) atomic_swap_64( 1180dbed73cbSSangeeta Misra (uint64_t *)&tmp_server->iser_die_time, 1181d3d50737SRafael Vanoni ddi_get_lbolt64() + SEC_TO_TICK( 1182dbed73cbSSangeeta Misra rule->ir_conn_drain_timeout)); 1183dbed73cbSSangeeta Misra } 1184dbed73cbSSangeeta Misra } 1185dbed73cbSSangeeta Misra } 1186dbed73cbSSangeeta Misra 1187dbed73cbSSangeeta Misra done: 1188dbed73cbSSangeeta Misra if (rule_name != NULL) 1189dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1190dbed73cbSSangeeta Misra return (ret); 1191dbed73cbSSangeeta Misra } 1192dbed73cbSSangeeta Misra int 1193dbed73cbSSangeeta Misra ilb_server_enable(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, 1194dbed73cbSSangeeta Misra ilb_rule_t *rule, in6_addr_t *addr) 1195dbed73cbSSangeeta Misra { 1196dbed73cbSSangeeta Misra return (ilb_server_toggle(ilbs, zoneid, name, rule, addr, B_TRUE)); 1197dbed73cbSSangeeta Misra } 1198dbed73cbSSangeeta Misra 1199dbed73cbSSangeeta Misra int 1200dbed73cbSSangeeta Misra ilb_server_disable(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, 1201dbed73cbSSangeeta Misra ilb_rule_t *rule, in6_addr_t *addr) 1202dbed73cbSSangeeta Misra { 1203dbed73cbSSangeeta Misra return (ilb_server_toggle(ilbs, zoneid, name, rule, addr, B_FALSE)); 1204dbed73cbSSangeeta Misra } 1205dbed73cbSSangeeta Misra 1206dbed73cbSSangeeta Misra /* 1207dbed73cbSSangeeta Misra * Add a back end server to a rule. If the address is IPv4, it is assumed 1208dbed73cbSSangeeta Misra * to be passed in as a mapped address. 1209dbed73cbSSangeeta Misra */ 1210dbed73cbSSangeeta Misra int 1211dbed73cbSSangeeta Misra ilb_server_add(ilb_stack_t *ilbs, ilb_rule_t *rule, ilb_server_info_t *info) 1212dbed73cbSSangeeta Misra { 1213dbed73cbSSangeeta Misra ilb_server_t *server; 1214dbed73cbSSangeeta Misra netstackid_t stackid; 1215dbed73cbSSangeeta Misra int ret = 0; 1216dbed73cbSSangeeta Misra in_port_t min_port, max_port; 1217dbed73cbSSangeeta Misra in_port_t range; 1218dbed73cbSSangeeta Misra 1219dbed73cbSSangeeta Misra /* Port is passed in network byte order. */ 1220dbed73cbSSangeeta Misra min_port = ntohs(info->min_port); 1221dbed73cbSSangeeta Misra max_port = ntohs(info->max_port); 1222dbed73cbSSangeeta Misra if (min_port > max_port) 1223dbed73cbSSangeeta Misra return (EINVAL); 1224dbed73cbSSangeeta Misra 1225dbed73cbSSangeeta Misra /* min_port == 0 means "all ports". Make it so */ 1226dbed73cbSSangeeta Misra if (min_port == 0) { 1227dbed73cbSSangeeta Misra min_port = 1; 1228dbed73cbSSangeeta Misra max_port = 65535; 1229dbed73cbSSangeeta Misra } 1230dbed73cbSSangeeta Misra range = max_port - min_port; 1231dbed73cbSSangeeta Misra 1232dbed73cbSSangeeta Misra mutex_enter(&rule->ir_lock); 1233dbed73cbSSangeeta Misra /* If someone is already doing server add/del, sleeps and wait. */ 1234dbed73cbSSangeeta Misra while (rule->ir_flags & ILB_RULE_BUSY) { 1235dbed73cbSSangeeta Misra if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) { 1236dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 1237dbed73cbSSangeeta Misra return (EINTR); 1238dbed73cbSSangeeta Misra } 1239dbed73cbSSangeeta Misra } 1240dbed73cbSSangeeta Misra 1241dbed73cbSSangeeta Misra /* 1242dbed73cbSSangeeta Misra * Set the rule to be busy to make sure that no new packet can 1243dbed73cbSSangeeta Misra * use this rule. 1244dbed73cbSSangeeta Misra */ 1245dbed73cbSSangeeta Misra rule->ir_flags |= ILB_RULE_BUSY; 1246dbed73cbSSangeeta Misra 1247dbed73cbSSangeeta Misra /* Now wait for all other guys to finish their work. */ 1248dbed73cbSSangeeta Misra while (rule->ir_refcnt > 2) { 1249dbed73cbSSangeeta Misra if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) { 1250dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 1251dbed73cbSSangeeta Misra ret = EINTR; 1252dbed73cbSSangeeta Misra goto end; 1253dbed73cbSSangeeta Misra } 1254dbed73cbSSangeeta Misra } 1255dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 1256dbed73cbSSangeeta Misra 1257dbed73cbSSangeeta Misra /* Sanity checks... */ 1258dbed73cbSSangeeta Misra if ((IN6_IS_ADDR_V4MAPPED(&info->addr) && 1259dbed73cbSSangeeta Misra rule->ir_ipver != IPPROTO_IP) || 1260dbed73cbSSangeeta Misra (!IN6_IS_ADDR_V4MAPPED(&info->addr) && 1261dbed73cbSSangeeta Misra rule->ir_ipver != IPPROTO_IPV6)) { 1262dbed73cbSSangeeta Misra ret = EINVAL; 1263dbed73cbSSangeeta Misra goto end; 1264dbed73cbSSangeeta Misra } 1265dbed73cbSSangeeta Misra 1266dbed73cbSSangeeta Misra /* 1267dbed73cbSSangeeta Misra * Check for valid port range. 1268dbed73cbSSangeeta Misra * 1269dbed73cbSSangeeta Misra * For DSR, there can be no port shifting. Hence the server 1270dbed73cbSSangeeta Misra * specification must be the same as the rule's. 1271dbed73cbSSangeeta Misra * 1272dbed73cbSSangeeta Misra * For half-NAT/NAT, the range must either be 0 (port collapsing) or 1273dbed73cbSSangeeta Misra * it must be equal to the same value as the rule port range. 1274dbed73cbSSangeeta Misra * 1275dbed73cbSSangeeta Misra */ 1276dbed73cbSSangeeta Misra if (rule->ir_topo == ILB_TOPO_IMPL_DSR) { 1277dbed73cbSSangeeta Misra if (rule->ir_max_port != max_port || 1278dbed73cbSSangeeta Misra rule->ir_min_port != min_port) { 1279dbed73cbSSangeeta Misra ret = EINVAL; 1280dbed73cbSSangeeta Misra goto end; 1281dbed73cbSSangeeta Misra } 1282dbed73cbSSangeeta Misra } else { 1283dbed73cbSSangeeta Misra if ((range != rule->ir_max_port - rule->ir_min_port) && 1284dbed73cbSSangeeta Misra range != 0) { 1285dbed73cbSSangeeta Misra ret = EINVAL; 1286dbed73cbSSangeeta Misra goto end; 1287dbed73cbSSangeeta Misra } 1288dbed73cbSSangeeta Misra } 1289dbed73cbSSangeeta Misra 1290dbed73cbSSangeeta Misra /* Check for duplicate. */ 1291dbed73cbSSangeeta Misra for (server = rule->ir_servers; server != NULL; 1292dbed73cbSSangeeta Misra server = server->iser_next) { 1293dbed73cbSSangeeta Misra if (IN6_ARE_ADDR_EQUAL(&server->iser_addr_v6, &info->addr) || 1294dbed73cbSSangeeta Misra strcasecmp(server->iser_name, info->name) == 0) { 1295dbed73cbSSangeeta Misra break; 1296dbed73cbSSangeeta Misra } 1297dbed73cbSSangeeta Misra } 1298dbed73cbSSangeeta Misra if (server != NULL) { 1299dbed73cbSSangeeta Misra ret = EEXIST; 1300dbed73cbSSangeeta Misra goto end; 1301dbed73cbSSangeeta Misra } 1302dbed73cbSSangeeta Misra 1303dbed73cbSSangeeta Misra if ((server = kmem_zalloc(sizeof (ilb_server_t), KM_NOSLEEP)) == NULL) { 1304dbed73cbSSangeeta Misra ret = ENOMEM; 1305dbed73cbSSangeeta Misra goto end; 1306dbed73cbSSangeeta Misra } 1307dbed73cbSSangeeta Misra 1308dbed73cbSSangeeta Misra (void) memcpy(server->iser_name, info->name, ILB_SERVER_NAMESZ - 1); 1309dbed73cbSSangeeta Misra (void) inet_ntop(AF_INET6, &info->addr, server->iser_ip_addr, 1310dbed73cbSSangeeta Misra sizeof (server->iser_ip_addr)); 1311dbed73cbSSangeeta Misra stackid = (netstackid_t)(uintptr_t)ilbs->ilbs_ksp->ks_private; 1312dbed73cbSSangeeta Misra server->iser_ksp = ilb_server_kstat_init(stackid, rule, server); 1313dbed73cbSSangeeta Misra if (server->iser_ksp == NULL) { 1314dbed73cbSSangeeta Misra kmem_free(server, sizeof (ilb_server_t)); 1315dbed73cbSSangeeta Misra ret = EINVAL; 1316dbed73cbSSangeeta Misra goto end; 1317dbed73cbSSangeeta Misra } 1318dbed73cbSSangeeta Misra 1319dbed73cbSSangeeta Misra server->iser_stackid = stackid; 1320dbed73cbSSangeeta Misra server->iser_addr_v6 = info->addr; 1321dbed73cbSSangeeta Misra server->iser_min_port = min_port; 1322dbed73cbSSangeeta Misra server->iser_max_port = max_port; 1323dbed73cbSSangeeta Misra if (min_port != max_port) 1324dbed73cbSSangeeta Misra server->iser_port_range = B_TRUE; 1325dbed73cbSSangeeta Misra else 1326dbed73cbSSangeeta Misra server->iser_port_range = B_FALSE; 1327dbed73cbSSangeeta Misra 1328dbed73cbSSangeeta Misra /* 1329dbed73cbSSangeeta Misra * If the rule uses NAT, find/create the NAT source entry to use 1330dbed73cbSSangeeta Misra * for this server. 1331dbed73cbSSangeeta Misra */ 1332dbed73cbSSangeeta Misra if (rule->ir_topo == ILB_TOPO_IMPL_NAT) { 1333dbed73cbSSangeeta Misra in_port_t port; 1334dbed73cbSSangeeta Misra 1335dbed73cbSSangeeta Misra /* 1336dbed73cbSSangeeta Misra * If the server uses a port range, our port allocation 1337dbed73cbSSangeeta Misra * scheme needs to treat it as a wildcard. Refer to the 1338dbed73cbSSangeeta Misra * comments in ilb_nat.c about the scheme. 1339dbed73cbSSangeeta Misra */ 1340dbed73cbSSangeeta Misra if (server->iser_port_range) 1341dbed73cbSSangeeta Misra port = 0; 1342dbed73cbSSangeeta Misra else 1343dbed73cbSSangeeta Misra port = server->iser_min_port; 1344dbed73cbSSangeeta Misra 1345dbed73cbSSangeeta Misra if ((ret = ilb_create_nat_src(ilbs, &server->iser_nat_src, 1346dbed73cbSSangeeta Misra &server->iser_addr_v6, port, &rule->ir_nat_src_start, 1347dbed73cbSSangeeta Misra num_nat_src_v6(&rule->ir_nat_src_start, 1348dbed73cbSSangeeta Misra &rule->ir_nat_src_end))) != 0) { 1349dbed73cbSSangeeta Misra kstat_delete_netstack(server->iser_ksp, stackid); 1350dbed73cbSSangeeta Misra kmem_free(server, sizeof (ilb_server_t)); 1351dbed73cbSSangeeta Misra goto end; 1352dbed73cbSSangeeta Misra } 1353dbed73cbSSangeeta Misra } 1354dbed73cbSSangeeta Misra 1355dbed73cbSSangeeta Misra /* 1356dbed73cbSSangeeta Misra * The iser_lock is only used to protect iser_refcnt. All the other 1357dbed73cbSSangeeta Misra * fields in ilb_server_t should not change, except for iser_enabled. 1358dbed73cbSSangeeta Misra * The worst thing that can happen if iser_enabled is messed up is 1359dbed73cbSSangeeta Misra * that one or two packets may not be load balanced to a server 1360dbed73cbSSangeeta Misra * correctly. 1361dbed73cbSSangeeta Misra */ 1362dbed73cbSSangeeta Misra server->iser_refcnt = 1; 1363dbed73cbSSangeeta Misra server->iser_enabled = info->flags & ILB_SERVER_ENABLED ? B_TRUE : 1364dbed73cbSSangeeta Misra B_FALSE; 1365dbed73cbSSangeeta Misra mutex_init(&server->iser_lock, NULL, MUTEX_DEFAULT, NULL); 1366dbed73cbSSangeeta Misra cv_init(&server->iser_cv, NULL, CV_DEFAULT, NULL); 1367dbed73cbSSangeeta Misra 1368dbed73cbSSangeeta Misra /* Let the load balancing algorithm know about the addition. */ 1369dbed73cbSSangeeta Misra ASSERT(rule->ir_alg != NULL); 1370dbed73cbSSangeeta Misra if ((ret = rule->ir_alg->ilb_alg_server_add(server, 1371dbed73cbSSangeeta Misra rule->ir_alg->ilb_alg_data)) != 0) { 1372dbed73cbSSangeeta Misra kstat_delete_netstack(server->iser_ksp, stackid); 1373dbed73cbSSangeeta Misra kmem_free(server, sizeof (ilb_server_t)); 1374dbed73cbSSangeeta Misra goto end; 1375dbed73cbSSangeeta Misra } 1376dbed73cbSSangeeta Misra 1377dbed73cbSSangeeta Misra /* 1378dbed73cbSSangeeta Misra * No need to hold ir_lock since no other thread should manipulate 1379dbed73cbSSangeeta Misra * the following fields until ILB_RULE_BUSY is cleared. 1380dbed73cbSSangeeta Misra */ 1381dbed73cbSSangeeta Misra if (rule->ir_servers == NULL) { 1382dbed73cbSSangeeta Misra server->iser_next = NULL; 1383dbed73cbSSangeeta Misra } else { 1384dbed73cbSSangeeta Misra server->iser_next = rule->ir_servers; 1385dbed73cbSSangeeta Misra } 1386dbed73cbSSangeeta Misra rule->ir_servers = server; 1387dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, num_servers); 1388dbed73cbSSangeeta Misra 1389dbed73cbSSangeeta Misra end: 1390dbed73cbSSangeeta Misra mutex_enter(&rule->ir_lock); 1391dbed73cbSSangeeta Misra rule->ir_flags &= ~ILB_RULE_BUSY; 1392dbed73cbSSangeeta Misra cv_signal(&rule->ir_cv); 1393dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 1394dbed73cbSSangeeta Misra return (ret); 1395dbed73cbSSangeeta Misra } 1396dbed73cbSSangeeta Misra 1397dbed73cbSSangeeta Misra /* The routine executed by the delayed rule processing taskq. */ 1398dbed73cbSSangeeta Misra static void 1399dbed73cbSSangeeta Misra ilb_server_del_tq(void *arg) 1400dbed73cbSSangeeta Misra { 1401dbed73cbSSangeeta Misra ilb_server_t *server = (ilb_server_t *)arg; 1402dbed73cbSSangeeta Misra 1403dbed73cbSSangeeta Misra mutex_enter(&server->iser_lock); 1404dbed73cbSSangeeta Misra while (server->iser_refcnt > 1) 1405dbed73cbSSangeeta Misra cv_wait(&server->iser_cv, &server->iser_lock); 1406dbed73cbSSangeeta Misra kstat_delete_netstack(server->iser_ksp, server->iser_stackid); 1407dbed73cbSSangeeta Misra kmem_free(server, sizeof (ilb_server_t)); 1408dbed73cbSSangeeta Misra } 1409dbed73cbSSangeeta Misra 1410dbed73cbSSangeeta Misra /* 1411dbed73cbSSangeeta Misra * Delete a back end server from a rule. If the address is IPv4, it is assumed 1412dbed73cbSSangeeta Misra * to be passed in as a mapped address. 1413dbed73cbSSangeeta Misra */ 1414dbed73cbSSangeeta Misra int 1415dbed73cbSSangeeta Misra ilb_server_del(ilb_stack_t *ilbs, zoneid_t zoneid, const char *rule_name, 1416dbed73cbSSangeeta Misra ilb_rule_t *rule, in6_addr_t *addr) 1417dbed73cbSSangeeta Misra { 1418dbed73cbSSangeeta Misra ilb_server_t *server; 1419dbed73cbSSangeeta Misra ilb_server_t *prev_server; 1420dbed73cbSSangeeta Misra int ret = 0; 1421dbed73cbSSangeeta Misra 1422dbed73cbSSangeeta Misra ASSERT((rule == NULL && rule_name != NULL) || 1423dbed73cbSSangeeta Misra (rule != NULL && rule_name == NULL)); 1424dbed73cbSSangeeta Misra if (rule == NULL) { 1425dbed73cbSSangeeta Misra if ((rule = ilb_find_rule(ilbs, zoneid, rule_name, 1426dbed73cbSSangeeta Misra &ret)) == NULL) { 1427dbed73cbSSangeeta Misra return (ret); 1428dbed73cbSSangeeta Misra } 1429dbed73cbSSangeeta Misra } 1430dbed73cbSSangeeta Misra 1431dbed73cbSSangeeta Misra mutex_enter(&rule->ir_lock); 1432dbed73cbSSangeeta Misra /* If someone is already doing server add/del, sleeps and wait. */ 1433dbed73cbSSangeeta Misra while (rule->ir_flags & ILB_RULE_BUSY) { 1434dbed73cbSSangeeta Misra if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) { 1435dbed73cbSSangeeta Misra if (rule_name != NULL) { 1436dbed73cbSSangeeta Misra if (--rule->ir_refcnt <= 2) 1437dbed73cbSSangeeta Misra cv_signal(&rule->ir_cv); 1438dbed73cbSSangeeta Misra } 1439dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 1440dbed73cbSSangeeta Misra return (EINTR); 1441dbed73cbSSangeeta Misra } 1442dbed73cbSSangeeta Misra } 1443dbed73cbSSangeeta Misra /* 1444dbed73cbSSangeeta Misra * Set the rule to be busy to make sure that no new packet can 1445dbed73cbSSangeeta Misra * use this rule. 1446dbed73cbSSangeeta Misra */ 1447dbed73cbSSangeeta Misra rule->ir_flags |= ILB_RULE_BUSY; 1448dbed73cbSSangeeta Misra 1449dbed73cbSSangeeta Misra /* Now wait for all other guys to finish their work. */ 1450dbed73cbSSangeeta Misra while (rule->ir_refcnt > 2) { 1451dbed73cbSSangeeta Misra if (cv_wait_sig(&rule->ir_cv, &rule->ir_lock) == 0) { 1452dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 1453dbed73cbSSangeeta Misra ret = EINTR; 1454dbed73cbSSangeeta Misra goto end; 1455dbed73cbSSangeeta Misra } 1456dbed73cbSSangeeta Misra } 1457dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 1458dbed73cbSSangeeta Misra 1459dbed73cbSSangeeta Misra prev_server = NULL; 1460dbed73cbSSangeeta Misra for (server = rule->ir_servers; server != NULL; 1461dbed73cbSSangeeta Misra prev_server = server, server = server->iser_next) { 1462dbed73cbSSangeeta Misra if (IN6_ARE_ADDR_EQUAL(&server->iser_addr_v6, addr)) 1463dbed73cbSSangeeta Misra break; 1464dbed73cbSSangeeta Misra } 1465dbed73cbSSangeeta Misra if (server == NULL) { 1466dbed73cbSSangeeta Misra ret = ENOENT; 1467dbed73cbSSangeeta Misra goto end; 1468dbed73cbSSangeeta Misra } 1469dbed73cbSSangeeta Misra 1470dbed73cbSSangeeta Misra /* 1471dbed73cbSSangeeta Misra * Let the load balancing algorithm know about the removal. 1472dbed73cbSSangeeta Misra * The algorithm may disallow the removal... 1473dbed73cbSSangeeta Misra */ 1474dbed73cbSSangeeta Misra if ((ret = rule->ir_alg->ilb_alg_server_del(server, 1475dbed73cbSSangeeta Misra rule->ir_alg->ilb_alg_data)) != 0) { 1476dbed73cbSSangeeta Misra goto end; 1477dbed73cbSSangeeta Misra } 1478dbed73cbSSangeeta Misra 1479dbed73cbSSangeeta Misra if (prev_server == NULL) 1480dbed73cbSSangeeta Misra rule->ir_servers = server->iser_next; 1481dbed73cbSSangeeta Misra else 1482dbed73cbSSangeeta Misra prev_server->iser_next = server->iser_next; 1483dbed73cbSSangeeta Misra 1484dbed73cbSSangeeta Misra ILB_R_KSTAT_UPDATE(rule, num_servers, -1); 1485dbed73cbSSangeeta Misra 1486dbed73cbSSangeeta Misra /* 1487dbed73cbSSangeeta Misra * Mark the server as disabled so that if there is any sticky cache 1488dbed73cbSSangeeta Misra * using this server around, it won't be used. 1489dbed73cbSSangeeta Misra */ 1490dbed73cbSSangeeta Misra server->iser_enabled = B_FALSE; 1491dbed73cbSSangeeta Misra 1492dbed73cbSSangeeta Misra mutex_enter(&server->iser_lock); 1493dbed73cbSSangeeta Misra 1494dbed73cbSSangeeta Misra /* 1495dbed73cbSSangeeta Misra * De-allocate the NAT source array. The indiviual ilb_nat_src_entry_t 1496dbed73cbSSangeeta Misra * may not go away if there is still a conn using it. The NAT source 1497dbed73cbSSangeeta Misra * timer will do the garbage collection. 1498dbed73cbSSangeeta Misra */ 1499dbed73cbSSangeeta Misra ilb_destroy_nat_src(&server->iser_nat_src); 1500dbed73cbSSangeeta Misra 1501dbed73cbSSangeeta Misra /* If there is a hard limit on when a server should die, set it. */ 1502dbed73cbSSangeeta Misra if (rule->ir_conn_drain_timeout != 0) { 1503dbed73cbSSangeeta Misra (void) atomic_swap_64((uint64_t *)&server->iser_die_time, 1504d3d50737SRafael Vanoni ddi_get_lbolt64() + 1505d3d50737SRafael Vanoni SEC_TO_TICK(rule->ir_conn_drain_timeout)); 1506dbed73cbSSangeeta Misra } 1507dbed73cbSSangeeta Misra 1508dbed73cbSSangeeta Misra if (server->iser_refcnt > 1) { 1509dbed73cbSSangeeta Misra (void) taskq_dispatch(ilbs->ilbs_rule_taskq, ilb_server_del_tq, 1510dbed73cbSSangeeta Misra server, TQ_SLEEP); 1511dbed73cbSSangeeta Misra mutex_exit(&server->iser_lock); 1512dbed73cbSSangeeta Misra } else { 1513dbed73cbSSangeeta Misra kstat_delete_netstack(server->iser_ksp, server->iser_stackid); 1514dbed73cbSSangeeta Misra kmem_free(server, sizeof (ilb_server_t)); 1515dbed73cbSSangeeta Misra } 1516dbed73cbSSangeeta Misra 1517dbed73cbSSangeeta Misra end: 1518dbed73cbSSangeeta Misra mutex_enter(&rule->ir_lock); 1519dbed73cbSSangeeta Misra rule->ir_flags &= ~ILB_RULE_BUSY; 1520dbed73cbSSangeeta Misra if (rule_name != NULL) 1521dbed73cbSSangeeta Misra rule->ir_refcnt--; 1522dbed73cbSSangeeta Misra cv_signal(&rule->ir_cv); 1523dbed73cbSSangeeta Misra mutex_exit(&rule->ir_lock); 1524dbed73cbSSangeeta Misra return (ret); 1525dbed73cbSSangeeta Misra } 1526dbed73cbSSangeeta Misra 1527dbed73cbSSangeeta Misra /* 1528dbed73cbSSangeeta Misra * First check if the destination of the ICMP message matches a VIP of 1529dbed73cbSSangeeta Misra * a rule. If it does not, just return ILB_PASSED. 1530dbed73cbSSangeeta Misra * 1531dbed73cbSSangeeta Misra * If the destination matches a VIP: 1532dbed73cbSSangeeta Misra * 1533dbed73cbSSangeeta Misra * For ICMP_ECHO_REQUEST, generate a response on behalf of the back end 1534dbed73cbSSangeeta Misra * server. 1535dbed73cbSSangeeta Misra * 1536dbed73cbSSangeeta Misra * For ICMP_DEST_UNREACHABLE fragmentation needed, check inside the payload 1537dbed73cbSSangeeta Misra * and see which back end server we should send this message to. And we 1538dbed73cbSSangeeta Misra * need to do NAT on both the payload message and the outside IP packet. 1539dbed73cbSSangeeta Misra * 1540dbed73cbSSangeeta Misra * For other ICMP messages, drop them. 1541dbed73cbSSangeeta Misra */ 1542dbed73cbSSangeeta Misra /* ARGSUSED */ 1543dbed73cbSSangeeta Misra static int 1544dbed73cbSSangeeta Misra ilb_icmp_v4(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ipha_t *ipha, 1545dbed73cbSSangeeta Misra icmph_t *icmph, ipaddr_t *lb_dst) 1546dbed73cbSSangeeta Misra { 1547dbed73cbSSangeeta Misra ipaddr_t vip; 1548dbed73cbSSangeeta Misra ilb_rule_t *rule; 1549dbed73cbSSangeeta Misra in6_addr_t addr6; 1550dbed73cbSSangeeta Misra 1551dbed73cbSSangeeta Misra if (!ilb_rule_match_vip_v4(ilbs, ipha->ipha_dst, &rule)) 1552dbed73cbSSangeeta Misra return (ILB_PASSED); 1553dbed73cbSSangeeta Misra 1554dbed73cbSSangeeta Misra 1555dbed73cbSSangeeta Misra if ((uint8_t *)icmph + sizeof (icmph_t) > mp->b_wptr) { 1556dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, icmp_dropped); 1557dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1558dbed73cbSSangeeta Misra return (ILB_DROPPED); 1559dbed73cbSSangeeta Misra } 1560dbed73cbSSangeeta Misra 1561dbed73cbSSangeeta Misra switch (icmph->icmph_type) { 1562dbed73cbSSangeeta Misra case ICMP_ECHO_REQUEST: 1563dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, icmp_echo_processed); 1564dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1565dbed73cbSSangeeta Misra 1566dbed73cbSSangeeta Misra icmph->icmph_type = ICMP_ECHO_REPLY; 1567dbed73cbSSangeeta Misra icmph->icmph_checksum = 0; 1568dbed73cbSSangeeta Misra icmph->icmph_checksum = IP_CSUM(mp, IPH_HDR_LENGTH(ipha), 0); 1569dbed73cbSSangeeta Misra ipha->ipha_ttl = 1570dbed73cbSSangeeta Misra ilbs->ilbs_netstack->netstack_ip->ips_ip_def_ttl; 1571dbed73cbSSangeeta Misra *lb_dst = ipha->ipha_src; 1572dbed73cbSSangeeta Misra vip = ipha->ipha_dst; 1573dbed73cbSSangeeta Misra ipha->ipha_dst = ipha->ipha_src; 1574dbed73cbSSangeeta Misra ipha->ipha_src = vip; 1575dbed73cbSSangeeta Misra return (ILB_BALANCED); 1576dbed73cbSSangeeta Misra case ICMP_DEST_UNREACHABLE: { 1577dbed73cbSSangeeta Misra int ret; 1578dbed73cbSSangeeta Misra 1579dbed73cbSSangeeta Misra if (icmph->icmph_code != ICMP_FRAGMENTATION_NEEDED) { 1580dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, icmp_dropped); 1581dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1582dbed73cbSSangeeta Misra return (ILB_DROPPED); 1583dbed73cbSSangeeta Misra } 1584dbed73cbSSangeeta Misra if (ilb_check_icmp_conn(ilbs, mp, IPPROTO_IP, ipha, icmph, 1585dbed73cbSSangeeta Misra &addr6)) { 1586dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, icmp_2big_processed); 1587dbed73cbSSangeeta Misra ret = ILB_BALANCED; 1588dbed73cbSSangeeta Misra } else { 1589dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, icmp_2big_dropped); 1590dbed73cbSSangeeta Misra ret = ILB_DROPPED; 1591dbed73cbSSangeeta Misra } 1592dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1593dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(&addr6, *lb_dst); 1594dbed73cbSSangeeta Misra return (ret); 1595dbed73cbSSangeeta Misra } 1596dbed73cbSSangeeta Misra default: 1597dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, icmp_dropped); 1598dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1599dbed73cbSSangeeta Misra return (ILB_DROPPED); 1600dbed73cbSSangeeta Misra } 1601dbed73cbSSangeeta Misra } 1602dbed73cbSSangeeta Misra 1603dbed73cbSSangeeta Misra /* ARGSUSED */ 1604dbed73cbSSangeeta Misra static int 1605dbed73cbSSangeeta Misra ilb_icmp_v6(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ip6_t *ip6h, 1606dbed73cbSSangeeta Misra icmp6_t *icmp6, in6_addr_t *lb_dst) 1607dbed73cbSSangeeta Misra { 1608dbed73cbSSangeeta Misra ilb_rule_t *rule; 1609dbed73cbSSangeeta Misra 1610dbed73cbSSangeeta Misra if (!ilb_rule_match_vip_v6(ilbs, &ip6h->ip6_dst, &rule)) 1611dbed73cbSSangeeta Misra return (ILB_PASSED); 1612dbed73cbSSangeeta Misra 1613dbed73cbSSangeeta Misra if ((uint8_t *)icmp6 + sizeof (icmp6_t) > mp->b_wptr) { 1614dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, icmp_dropped); 1615dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1616dbed73cbSSangeeta Misra return (ILB_DROPPED); 1617dbed73cbSSangeeta Misra } 1618dbed73cbSSangeeta Misra 1619dbed73cbSSangeeta Misra switch (icmp6->icmp6_type) { 1620dbed73cbSSangeeta Misra case ICMP6_ECHO_REQUEST: { 1621dbed73cbSSangeeta Misra int hdr_len; 1622dbed73cbSSangeeta Misra 1623dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, icmp_echo_processed); 1624dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1625dbed73cbSSangeeta Misra 1626dbed73cbSSangeeta Misra icmp6->icmp6_type = ICMP6_ECHO_REPLY; 1627dbed73cbSSangeeta Misra icmp6->icmp6_cksum = ip6h->ip6_plen; 1628dbed73cbSSangeeta Misra hdr_len = (char *)icmp6 - (char *)ip6h; 1629dbed73cbSSangeeta Misra icmp6->icmp6_cksum = IP_CSUM(mp, hdr_len, 1630dbed73cbSSangeeta Misra ilb_pseudo_sum_v6(ip6h, IPPROTO_ICMPV6)); 1631dbed73cbSSangeeta Misra ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 1632dbed73cbSSangeeta Misra ip6h->ip6_hops = 1633dbed73cbSSangeeta Misra ilbs->ilbs_netstack->netstack_ip->ips_ipv6_def_hops; 1634dbed73cbSSangeeta Misra *lb_dst = ip6h->ip6_src; 1635dbed73cbSSangeeta Misra ip6h->ip6_src = ip6h->ip6_dst; 1636dbed73cbSSangeeta Misra ip6h->ip6_dst = *lb_dst; 1637dbed73cbSSangeeta Misra return (ILB_BALANCED); 1638dbed73cbSSangeeta Misra } 1639dbed73cbSSangeeta Misra case ICMP6_PACKET_TOO_BIG: { 1640dbed73cbSSangeeta Misra int ret; 1641dbed73cbSSangeeta Misra 1642dbed73cbSSangeeta Misra if (ilb_check_icmp_conn(ilbs, mp, IPPROTO_IPV6, ip6h, icmp6, 1643dbed73cbSSangeeta Misra lb_dst)) { 1644dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, icmp_2big_processed); 1645dbed73cbSSangeeta Misra ret = ILB_BALANCED; 1646dbed73cbSSangeeta Misra } else { 1647dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, icmp_2big_dropped); 1648dbed73cbSSangeeta Misra ret = ILB_DROPPED; 1649dbed73cbSSangeeta Misra } 1650dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1651dbed73cbSSangeeta Misra return (ret); 1652dbed73cbSSangeeta Misra } 1653dbed73cbSSangeeta Misra default: 1654dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, icmp_dropped); 1655dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1656dbed73cbSSangeeta Misra return (ILB_DROPPED); 1657dbed73cbSSangeeta Misra } 1658dbed73cbSSangeeta Misra } 1659dbed73cbSSangeeta Misra 1660dbed73cbSSangeeta Misra /* 1661dbed73cbSSangeeta Misra * Common routine to check an incoming packet and decide what to do with it. 1662dbed73cbSSangeeta Misra * called by ilb_check_v4|v6(). 1663dbed73cbSSangeeta Misra */ 1664dbed73cbSSangeeta Misra static int 1665dbed73cbSSangeeta Misra ilb_check(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, in6_addr_t *src, 1666dbed73cbSSangeeta Misra in6_addr_t *dst, int l3, int l4, void *iph, uint8_t *tph, uint32_t pkt_len, 1667dbed73cbSSangeeta Misra in6_addr_t *lb_dst) 1668dbed73cbSSangeeta Misra { 1669dbed73cbSSangeeta Misra in_port_t sport, dport; 1670dbed73cbSSangeeta Misra tcpha_t *tcph; 1671dbed73cbSSangeeta Misra udpha_t *udph; 1672dbed73cbSSangeeta Misra ilb_rule_t *rule; 1673dbed73cbSSangeeta Misra ilb_server_t *server; 1674dbed73cbSSangeeta Misra boolean_t balanced; 1675dbed73cbSSangeeta Misra struct ilb_sticky_s *s = NULL; 1676dbed73cbSSangeeta Misra int ret; 1677dbed73cbSSangeeta Misra uint32_t ip_sum, tp_sum; 1678dbed73cbSSangeeta Misra ilb_nat_info_t info; 1679dbed73cbSSangeeta Misra uint16_t nat_src_idx; 1680dbed73cbSSangeeta Misra boolean_t busy; 1681dbed73cbSSangeeta Misra 1682dbed73cbSSangeeta Misra /* 1683dbed73cbSSangeeta Misra * We don't really need to switch here since both protocols's 1684dbed73cbSSangeeta Misra * ports are at the same offset. Just prepare for future protocol 1685dbed73cbSSangeeta Misra * specific processing. 1686dbed73cbSSangeeta Misra */ 1687dbed73cbSSangeeta Misra switch (l4) { 1688dbed73cbSSangeeta Misra case IPPROTO_TCP: 1689dbed73cbSSangeeta Misra if (tph + TCP_MIN_HEADER_LENGTH > mp->b_wptr) 1690dbed73cbSSangeeta Misra return (ILB_DROPPED); 1691dbed73cbSSangeeta Misra tcph = (tcpha_t *)tph; 1692dbed73cbSSangeeta Misra sport = tcph->tha_lport; 1693dbed73cbSSangeeta Misra dport = tcph->tha_fport; 1694dbed73cbSSangeeta Misra break; 1695dbed73cbSSangeeta Misra case IPPROTO_UDP: 1696dbed73cbSSangeeta Misra if (tph + sizeof (udpha_t) > mp->b_wptr) 1697dbed73cbSSangeeta Misra return (ILB_DROPPED); 1698dbed73cbSSangeeta Misra udph = (udpha_t *)tph; 1699dbed73cbSSangeeta Misra sport = udph->uha_src_port; 1700dbed73cbSSangeeta Misra dport = udph->uha_dst_port; 1701dbed73cbSSangeeta Misra break; 1702dbed73cbSSangeeta Misra default: 1703dbed73cbSSangeeta Misra return (ILB_PASSED); 1704dbed73cbSSangeeta Misra } 1705dbed73cbSSangeeta Misra 1706dbed73cbSSangeeta Misra /* Fast path, there is an existing conn. */ 1707dbed73cbSSangeeta Misra if (ilb_check_conn(ilbs, l3, iph, l4, tph, src, dst, sport, dport, 1708dbed73cbSSangeeta Misra pkt_len, lb_dst)) { 1709dbed73cbSSangeeta Misra return (ILB_BALANCED); 1710dbed73cbSSangeeta Misra } 1711dbed73cbSSangeeta Misra 1712dbed73cbSSangeeta Misra /* 1713dbed73cbSSangeeta Misra * If there is no existing connection for the incoming packet, check 1714dbed73cbSSangeeta Misra * to see if the packet matches a rule. If not, just let IP decide 1715dbed73cbSSangeeta Misra * what to do with it. 1716dbed73cbSSangeeta Misra * 1717dbed73cbSSangeeta Misra * Note: a reply from back end server should not match a rule. A 1718dbed73cbSSangeeta Misra * reply should match one existing conn. 1719dbed73cbSSangeeta Misra */ 1720dbed73cbSSangeeta Misra rule = ilb_rule_hash(ilbs, l3, l4, dst, dport, ill->ill_zoneid, 1721dbed73cbSSangeeta Misra pkt_len, &busy); 1722dbed73cbSSangeeta Misra if (rule == NULL) { 1723dbed73cbSSangeeta Misra /* If the rule is busy, just drop the packet. */ 1724dbed73cbSSangeeta Misra if (busy) 1725dbed73cbSSangeeta Misra return (ILB_DROPPED); 1726dbed73cbSSangeeta Misra else 1727dbed73cbSSangeeta Misra return (ILB_PASSED); 1728dbed73cbSSangeeta Misra } 1729dbed73cbSSangeeta Misra 1730dbed73cbSSangeeta Misra /* 1731dbed73cbSSangeeta Misra * The packet matches a rule, use the rule load balance algorithm 1732dbed73cbSSangeeta Misra * to find a server. 1733dbed73cbSSangeeta Misra */ 1734dbed73cbSSangeeta Misra balanced = rule->ir_alg->ilb_alg_lb(src, sport, dst, dport, 1735dbed73cbSSangeeta Misra rule->ir_alg->ilb_alg_data, &server); 1736dbed73cbSSangeeta Misra /* 1737dbed73cbSSangeeta Misra * This can only happen if there is no server in a rule or all 1738dbed73cbSSangeeta Misra * the servers are currently disabled. 1739dbed73cbSSangeeta Misra */ 1740dbed73cbSSangeeta Misra if (!balanced) 1741dbed73cbSSangeeta Misra goto no_server; 1742dbed73cbSSangeeta Misra 1743dbed73cbSSangeeta Misra /* 1744dbed73cbSSangeeta Misra * If the rule is sticky enabled, we need to check the sticky table. 1745dbed73cbSSangeeta Misra * If there is a sticky entry for the client, use the previous server 1746dbed73cbSSangeeta Misra * instead of the one found above (note that both can be the same). 1747dbed73cbSSangeeta Misra * If there is no entry for that client, add an entry to the sticky 1748dbed73cbSSangeeta Misra * table. Both the find and add are done in ilb_sticky_find_add() 1749dbed73cbSSangeeta Misra * to avoid checking for duplicate when adding an entry. 1750dbed73cbSSangeeta Misra */ 1751dbed73cbSSangeeta Misra if (rule->ir_flags & ILB_RULE_STICKY) { 1752dbed73cbSSangeeta Misra in6_addr_t addr; 1753dbed73cbSSangeeta Misra 1754dbed73cbSSangeeta Misra V6_MASK_COPY(*src, rule->ir_sticky_mask, addr); 1755dbed73cbSSangeeta Misra if ((server = ilb_sticky_find_add(ilbs, rule, &addr, server, 1756dbed73cbSSangeeta Misra &s, &nat_src_idx)) == NULL) { 1757dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, nomem_pkt_dropped); 1758dbed73cbSSangeeta Misra ILB_R_KSTAT_UPDATE(rule, nomem_bytes_dropped, pkt_len); 1759dbed73cbSSangeeta Misra goto no_server; 1760dbed73cbSSangeeta Misra } 1761dbed73cbSSangeeta Misra } 1762dbed73cbSSangeeta Misra 1763dbed73cbSSangeeta Misra /* 1764dbed73cbSSangeeta Misra * We are holding a reference on the rule, so the server 1765dbed73cbSSangeeta Misra * cannot go away. 1766dbed73cbSSangeeta Misra */ 1767dbed73cbSSangeeta Misra *lb_dst = server->iser_addr_v6; 1768dbed73cbSSangeeta Misra ILB_S_KSTAT(server, pkt_processed); 1769dbed73cbSSangeeta Misra ILB_S_KSTAT_UPDATE(server, bytes_processed, pkt_len); 1770dbed73cbSSangeeta Misra 1771dbed73cbSSangeeta Misra switch (rule->ir_topo) { 1772dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_NAT: { 1773dbed73cbSSangeeta Misra ilb_nat_src_entry_t *src_ent; 1774dbed73cbSSangeeta Misra uint16_t *src_idx; 1775dbed73cbSSangeeta Misra 1776dbed73cbSSangeeta Misra /* 1777dbed73cbSSangeeta Misra * We create a cache even if it is not a SYN segment. 1778dbed73cbSSangeeta Misra * The server should return a RST. When we see the 1779dbed73cbSSangeeta Misra * RST, we will destroy this cache. But by having 1780dbed73cbSSangeeta Misra * a cache, we know how to NAT the returned RST. 1781dbed73cbSSangeeta Misra */ 1782dbed73cbSSangeeta Misra info.vip = *dst; 1783dbed73cbSSangeeta Misra info.dport = dport; 1784dbed73cbSSangeeta Misra info.src = *src; 1785dbed73cbSSangeeta Misra info.sport = sport; 1786dbed73cbSSangeeta Misra 1787dbed73cbSSangeeta Misra /* If stickiness is enabled, use the same source address */ 1788dbed73cbSSangeeta Misra if (s != NULL) 1789dbed73cbSSangeeta Misra src_idx = &nat_src_idx; 1790dbed73cbSSangeeta Misra else 1791dbed73cbSSangeeta Misra src_idx = NULL; 1792dbed73cbSSangeeta Misra 1793dbed73cbSSangeeta Misra if ((src_ent = ilb_alloc_nat_addr(server->iser_nat_src, 1794dbed73cbSSangeeta Misra &info.nat_src, &info.nat_sport, src_idx)) == NULL) { 1795dbed73cbSSangeeta Misra if (s != NULL) 1796dbed73cbSSangeeta Misra ilb_sticky_refrele(s); 1797dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, pkt_dropped); 1798dbed73cbSSangeeta Misra ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len); 1799dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, noport_pkt_dropped); 1800dbed73cbSSangeeta Misra ILB_R_KSTAT_UPDATE(rule, noport_bytes_dropped, pkt_len); 1801dbed73cbSSangeeta Misra ret = ILB_DROPPED; 1802dbed73cbSSangeeta Misra break; 1803dbed73cbSSangeeta Misra } 1804dbed73cbSSangeeta Misra info.src_ent = src_ent; 1805dbed73cbSSangeeta Misra info.nat_dst = server->iser_addr_v6; 1806dbed73cbSSangeeta Misra if (rule->ir_port_range && server->iser_port_range) { 1807dbed73cbSSangeeta Misra info.nat_dport = htons(ntohs(dport) - 1808dbed73cbSSangeeta Misra rule->ir_min_port + server->iser_min_port); 1809dbed73cbSSangeeta Misra } else { 1810dbed73cbSSangeeta Misra info.nat_dport = htons(server->iser_min_port); 1811dbed73cbSSangeeta Misra } 1812dbed73cbSSangeeta Misra 1813dbed73cbSSangeeta Misra /* 1814dbed73cbSSangeeta Misra * If ilb_conn_add() fails, it will release the reference on 1815dbed73cbSSangeeta Misra * sticky info and de-allocate the NAT source port allocated 1816dbed73cbSSangeeta Misra * above. 1817dbed73cbSSangeeta Misra */ 1818dbed73cbSSangeeta Misra if (ilb_conn_add(ilbs, rule, server, src, sport, dst, 1819dbed73cbSSangeeta Misra dport, &info, &ip_sum, &tp_sum, s) != 0) { 1820dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, pkt_dropped); 1821dbed73cbSSangeeta Misra ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len); 1822dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, nomem_pkt_dropped); 1823dbed73cbSSangeeta Misra ILB_R_KSTAT_UPDATE(rule, nomem_bytes_dropped, pkt_len); 1824dbed73cbSSangeeta Misra ret = ILB_DROPPED; 1825dbed73cbSSangeeta Misra break; 1826dbed73cbSSangeeta Misra } 1827dbed73cbSSangeeta Misra ilb_full_nat(l3, iph, l4, tph, &info, ip_sum, tp_sum, B_TRUE); 1828dbed73cbSSangeeta Misra ret = ILB_BALANCED; 1829dbed73cbSSangeeta Misra break; 1830dbed73cbSSangeeta Misra } 1831dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_HALF_NAT: 1832dbed73cbSSangeeta Misra info.vip = *dst; 1833dbed73cbSSangeeta Misra info.nat_dst = server->iser_addr_v6; 1834dbed73cbSSangeeta Misra info.dport = dport; 1835dbed73cbSSangeeta Misra if (rule->ir_port_range && server->iser_port_range) { 1836dbed73cbSSangeeta Misra info.nat_dport = htons(ntohs(dport) - 1837dbed73cbSSangeeta Misra rule->ir_min_port + server->iser_min_port); 1838dbed73cbSSangeeta Misra } else { 1839dbed73cbSSangeeta Misra info.nat_dport = htons(server->iser_min_port); 1840dbed73cbSSangeeta Misra } 1841dbed73cbSSangeeta Misra 1842dbed73cbSSangeeta Misra if (ilb_conn_add(ilbs, rule, server, src, sport, dst, 1843dbed73cbSSangeeta Misra dport, &info, &ip_sum, &tp_sum, s) != 0) { 1844dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, pkt_dropped); 1845dbed73cbSSangeeta Misra ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len); 1846dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, nomem_pkt_dropped); 1847dbed73cbSSangeeta Misra ILB_R_KSTAT_UPDATE(rule, nomem_bytes_dropped, pkt_len); 1848dbed73cbSSangeeta Misra ret = ILB_DROPPED; 1849dbed73cbSSangeeta Misra break; 1850dbed73cbSSangeeta Misra } 1851dbed73cbSSangeeta Misra ilb_half_nat(l3, iph, l4, tph, &info, ip_sum, tp_sum, B_TRUE); 1852dbed73cbSSangeeta Misra 1853dbed73cbSSangeeta Misra ret = ILB_BALANCED; 1854dbed73cbSSangeeta Misra break; 1855dbed73cbSSangeeta Misra case ILB_TOPO_IMPL_DSR: 1856dbed73cbSSangeeta Misra /* 1857dbed73cbSSangeeta Misra * By decrementing the sticky refcnt, the period of 1858dbed73cbSSangeeta Misra * stickiness (life time of ilb_sticky_t) will be 1859dbed73cbSSangeeta Misra * from now to (now + default expiry time). 1860dbed73cbSSangeeta Misra */ 1861dbed73cbSSangeeta Misra if (s != NULL) 1862dbed73cbSSangeeta Misra ilb_sticky_refrele(s); 1863dbed73cbSSangeeta Misra ret = ILB_BALANCED; 1864dbed73cbSSangeeta Misra break; 1865dbed73cbSSangeeta Misra default: 1866dbed73cbSSangeeta Misra cmn_err(CE_PANIC, "data corruption unknown topology: %p", 1867dbed73cbSSangeeta Misra (void *) rule); 1868dbed73cbSSangeeta Misra break; 1869dbed73cbSSangeeta Misra } 1870dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1871dbed73cbSSangeeta Misra return (ret); 1872dbed73cbSSangeeta Misra 1873dbed73cbSSangeeta Misra no_server: 1874dbed73cbSSangeeta Misra /* This can only happen if there is no server available. */ 1875dbed73cbSSangeeta Misra ILB_R_KSTAT(rule, pkt_dropped); 1876dbed73cbSSangeeta Misra ILB_R_KSTAT_UPDATE(rule, bytes_dropped, pkt_len); 1877dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1878dbed73cbSSangeeta Misra return (ILB_DROPPED); 1879dbed73cbSSangeeta Misra } 1880dbed73cbSSangeeta Misra 1881dbed73cbSSangeeta Misra int 1882dbed73cbSSangeeta Misra ilb_check_v4(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ipha_t *ipha, int l4, 1883dbed73cbSSangeeta Misra uint8_t *tph, ipaddr_t *lb_dst) 1884dbed73cbSSangeeta Misra { 1885dbed73cbSSangeeta Misra in6_addr_t v6_src, v6_dst, v6_lb_dst; 1886dbed73cbSSangeeta Misra int ret; 1887dbed73cbSSangeeta Misra 1888dbed73cbSSangeeta Misra ASSERT(DB_REF(mp) == 1); 1889dbed73cbSSangeeta Misra 1890dbed73cbSSangeeta Misra if (l4 == IPPROTO_ICMP) { 1891dbed73cbSSangeeta Misra return (ilb_icmp_v4(ilbs, ill, mp, ipha, (icmph_t *)tph, 1892dbed73cbSSangeeta Misra lb_dst)); 1893dbed73cbSSangeeta Misra } 1894dbed73cbSSangeeta Misra 1895dbed73cbSSangeeta Misra IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &v6_src); 1896dbed73cbSSangeeta Misra IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &v6_dst); 1897dbed73cbSSangeeta Misra ret = ilb_check(ilbs, ill, mp, &v6_src, &v6_dst, IPPROTO_IP, l4, ipha, 1898dbed73cbSSangeeta Misra tph, ntohs(ipha->ipha_length), &v6_lb_dst); 1899dbed73cbSSangeeta Misra if (ret == ILB_BALANCED) 1900dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(&v6_lb_dst, *lb_dst); 1901dbed73cbSSangeeta Misra return (ret); 1902dbed73cbSSangeeta Misra } 1903dbed73cbSSangeeta Misra 1904dbed73cbSSangeeta Misra int 1905dbed73cbSSangeeta Misra ilb_check_v6(ilb_stack_t *ilbs, ill_t *ill, mblk_t *mp, ip6_t *ip6h, int l4, 1906dbed73cbSSangeeta Misra uint8_t *tph, in6_addr_t *lb_dst) 1907dbed73cbSSangeeta Misra { 1908dbed73cbSSangeeta Misra uint32_t pkt_len; 1909dbed73cbSSangeeta Misra 1910dbed73cbSSangeeta Misra ASSERT(DB_REF(mp) == 1); 1911dbed73cbSSangeeta Misra 1912dbed73cbSSangeeta Misra if (l4 == IPPROTO_ICMPV6) { 1913dbed73cbSSangeeta Misra return (ilb_icmp_v6(ilbs, ill, mp, ip6h, (icmp6_t *)tph, 1914dbed73cbSSangeeta Misra lb_dst)); 1915dbed73cbSSangeeta Misra } 1916dbed73cbSSangeeta Misra 1917dbed73cbSSangeeta Misra pkt_len = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 1918dbed73cbSSangeeta Misra return (ilb_check(ilbs, ill, mp, &ip6h->ip6_src, &ip6h->ip6_dst, 1919dbed73cbSSangeeta Misra IPPROTO_IPV6, l4, ip6h, tph, pkt_len, lb_dst)); 1920dbed73cbSSangeeta Misra } 1921dbed73cbSSangeeta Misra 1922dbed73cbSSangeeta Misra void 1923dbed73cbSSangeeta Misra ilb_get_num_rules(ilb_stack_t *ilbs, zoneid_t zoneid, uint32_t *num_rules) 1924dbed73cbSSangeeta Misra { 1925dbed73cbSSangeeta Misra ilb_rule_t *tmp_rule; 1926dbed73cbSSangeeta Misra 1927dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_lock); 1928dbed73cbSSangeeta Misra *num_rules = 0; 1929dbed73cbSSangeeta Misra for (tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL; 1930dbed73cbSSangeeta Misra tmp_rule = tmp_rule->ir_next) { 1931dbed73cbSSangeeta Misra if (tmp_rule->ir_zoneid == zoneid) 1932dbed73cbSSangeeta Misra *num_rules += 1; 1933dbed73cbSSangeeta Misra } 1934dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 1935dbed73cbSSangeeta Misra } 1936dbed73cbSSangeeta Misra 1937dbed73cbSSangeeta Misra int 1938dbed73cbSSangeeta Misra ilb_get_num_servers(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, 1939dbed73cbSSangeeta Misra uint32_t *num_servers) 1940dbed73cbSSangeeta Misra { 1941dbed73cbSSangeeta Misra ilb_rule_t *rule; 1942dbed73cbSSangeeta Misra int err; 1943dbed73cbSSangeeta Misra 1944dbed73cbSSangeeta Misra if ((rule = ilb_find_rule(ilbs, zoneid, name, &err)) == NULL) 1945dbed73cbSSangeeta Misra return (err); 1946dbed73cbSSangeeta Misra *num_servers = rule->ir_kstat.num_servers.value.ui64; 1947dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1948dbed73cbSSangeeta Misra return (0); 1949dbed73cbSSangeeta Misra } 1950dbed73cbSSangeeta Misra 1951dbed73cbSSangeeta Misra int 1952dbed73cbSSangeeta Misra ilb_get_servers(ilb_stack_t *ilbs, zoneid_t zoneid, const char *name, 1953dbed73cbSSangeeta Misra ilb_server_info_t *servers, uint32_t *num_servers) 1954dbed73cbSSangeeta Misra { 1955dbed73cbSSangeeta Misra ilb_rule_t *rule; 1956dbed73cbSSangeeta Misra ilb_server_t *server; 1957dbed73cbSSangeeta Misra size_t cnt; 1958dbed73cbSSangeeta Misra int err; 1959dbed73cbSSangeeta Misra 1960dbed73cbSSangeeta Misra if ((rule = ilb_find_rule(ilbs, zoneid, name, &err)) == NULL) 1961dbed73cbSSangeeta Misra return (err); 1962dbed73cbSSangeeta Misra for (server = rule->ir_servers, cnt = *num_servers; 1963dbed73cbSSangeeta Misra server != NULL && cnt > 0; 1964dbed73cbSSangeeta Misra server = server->iser_next, cnt--, servers++) { 1965dbed73cbSSangeeta Misra (void) memcpy(servers->name, server->iser_name, 1966dbed73cbSSangeeta Misra ILB_SERVER_NAMESZ); 1967dbed73cbSSangeeta Misra servers->addr = server->iser_addr_v6; 1968dbed73cbSSangeeta Misra servers->min_port = htons(server->iser_min_port); 1969dbed73cbSSangeeta Misra servers->max_port = htons(server->iser_max_port); 1970dbed73cbSSangeeta Misra servers->flags = server->iser_enabled ? ILB_SERVER_ENABLED : 0; 1971dbed73cbSSangeeta Misra servers->err = 0; 1972dbed73cbSSangeeta Misra } 1973dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 1974dbed73cbSSangeeta Misra *num_servers -= cnt; 1975dbed73cbSSangeeta Misra 1976dbed73cbSSangeeta Misra return (0); 1977dbed73cbSSangeeta Misra } 1978dbed73cbSSangeeta Misra 1979dbed73cbSSangeeta Misra void 1980dbed73cbSSangeeta Misra ilb_get_rulenames(ilb_stack_t *ilbs, zoneid_t zoneid, uint32_t *num_names, 1981dbed73cbSSangeeta Misra char *buf) 1982dbed73cbSSangeeta Misra { 1983dbed73cbSSangeeta Misra ilb_rule_t *tmp_rule; 1984dbed73cbSSangeeta Misra int cnt; 1985dbed73cbSSangeeta Misra 1986dbed73cbSSangeeta Misra if (*num_names == 0) 1987dbed73cbSSangeeta Misra return; 1988dbed73cbSSangeeta Misra 1989dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_lock); 1990dbed73cbSSangeeta Misra for (cnt = 0, tmp_rule = ilbs->ilbs_rule_head; tmp_rule != NULL; 1991dbed73cbSSangeeta Misra tmp_rule = tmp_rule->ir_next) { 1992dbed73cbSSangeeta Misra if (tmp_rule->ir_zoneid != zoneid) 1993dbed73cbSSangeeta Misra continue; 1994dbed73cbSSangeeta Misra 1995dbed73cbSSangeeta Misra (void) memcpy(buf, tmp_rule->ir_name, ILB_RULE_NAMESZ); 1996dbed73cbSSangeeta Misra buf += ILB_RULE_NAMESZ; 1997dbed73cbSSangeeta Misra if (++cnt == *num_names) 1998dbed73cbSSangeeta Misra break; 1999dbed73cbSSangeeta Misra } 2000dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 2001dbed73cbSSangeeta Misra *num_names = cnt; 2002dbed73cbSSangeeta Misra } 2003dbed73cbSSangeeta Misra 2004dbed73cbSSangeeta Misra int 2005dbed73cbSSangeeta Misra ilb_rule_list(ilb_stack_t *ilbs, zoneid_t zoneid, ilb_rule_cmd_t *cmd) 2006dbed73cbSSangeeta Misra { 2007dbed73cbSSangeeta Misra ilb_rule_t *rule; 2008dbed73cbSSangeeta Misra int err; 2009dbed73cbSSangeeta Misra 2010dbed73cbSSangeeta Misra if ((rule = ilb_find_rule(ilbs, zoneid, cmd->name, &err)) == NULL) { 2011dbed73cbSSangeeta Misra return (err); 2012dbed73cbSSangeeta Misra } 2013dbed73cbSSangeeta Misra 2014dbed73cbSSangeeta Misra /* 2015dbed73cbSSangeeta Misra * Except the enabled flags, none of the following will change 2016dbed73cbSSangeeta Misra * in the life time of a rule. So we don't hold the mutex when 2017dbed73cbSSangeeta Misra * reading them. The worst is to report a wrong enabled flags. 2018dbed73cbSSangeeta Misra */ 2019dbed73cbSSangeeta Misra cmd->ip_ver = rule->ir_ipver; 2020dbed73cbSSangeeta Misra cmd->proto = rule->ir_proto; 2021dbed73cbSSangeeta Misra cmd->min_port = htons(rule->ir_min_port); 2022dbed73cbSSangeeta Misra cmd->max_port = htons(rule->ir_max_port); 2023dbed73cbSSangeeta Misra 2024dbed73cbSSangeeta Misra cmd->vip = rule->ir_target_v6; 2025dbed73cbSSangeeta Misra cmd->algo = rule->ir_alg_type; 2026dbed73cbSSangeeta Misra cmd->topo = rule->ir_topo; 2027dbed73cbSSangeeta Misra 2028dbed73cbSSangeeta Misra cmd->nat_src_start = rule->ir_nat_src_start; 2029dbed73cbSSangeeta Misra cmd->nat_src_end = rule->ir_nat_src_end; 2030dbed73cbSSangeeta Misra 2031dbed73cbSSangeeta Misra cmd->conn_drain_timeout = rule->ir_conn_drain_timeout; 2032dbed73cbSSangeeta Misra cmd->nat_expiry = rule->ir_nat_expiry; 2033dbed73cbSSangeeta Misra cmd->sticky_expiry = rule->ir_sticky_expiry; 2034dbed73cbSSangeeta Misra 2035dbed73cbSSangeeta Misra cmd->flags = 0; 2036dbed73cbSSangeeta Misra if (rule->ir_flags & ILB_RULE_ENABLED) 2037dbed73cbSSangeeta Misra cmd->flags |= ILB_RULE_ENABLED; 2038dbed73cbSSangeeta Misra if (rule->ir_flags & ILB_RULE_STICKY) { 2039dbed73cbSSangeeta Misra cmd->flags |= ILB_RULE_STICKY; 2040dbed73cbSSangeeta Misra cmd->sticky_mask = rule->ir_sticky_mask; 2041dbed73cbSSangeeta Misra } 2042dbed73cbSSangeeta Misra 2043dbed73cbSSangeeta Misra ILB_RULE_REFRELE(rule); 2044dbed73cbSSangeeta Misra return (0); 2045dbed73cbSSangeeta Misra } 2046dbed73cbSSangeeta Misra 2047dbed73cbSSangeeta Misra static void * 2048dbed73cbSSangeeta Misra ilb_stack_init(netstackid_t stackid, netstack_t *ns) 2049dbed73cbSSangeeta Misra { 2050dbed73cbSSangeeta Misra ilb_stack_t *ilbs; 2051dbed73cbSSangeeta Misra char tq_name[TASKQ_NAMELEN]; 2052dbed73cbSSangeeta Misra 2053dbed73cbSSangeeta Misra ilbs = kmem_alloc(sizeof (ilb_stack_t), KM_SLEEP); 2054dbed73cbSSangeeta Misra ilbs->ilbs_netstack = ns; 2055dbed73cbSSangeeta Misra 2056dbed73cbSSangeeta Misra ilbs->ilbs_rule_head = NULL; 2057dbed73cbSSangeeta Misra ilbs->ilbs_g_hash = NULL; 2058dbed73cbSSangeeta Misra mutex_init(&ilbs->ilbs_g_lock, NULL, MUTEX_DEFAULT, NULL); 2059dbed73cbSSangeeta Misra 2060dbed73cbSSangeeta Misra ilbs->ilbs_kstat = kmem_alloc(sizeof (ilb_g_kstat_t), KM_SLEEP); 2061dbed73cbSSangeeta Misra if ((ilbs->ilbs_ksp = ilb_kstat_g_init(stackid, ilbs)) == NULL) { 2062dbed73cbSSangeeta Misra kmem_free(ilbs, sizeof (ilb_stack_t)); 2063dbed73cbSSangeeta Misra return (NULL); 2064dbed73cbSSangeeta Misra } 2065dbed73cbSSangeeta Misra 2066dbed73cbSSangeeta Misra /* 2067dbed73cbSSangeeta Misra * ilbs_conn/sticky_hash related info is initialized in 2068dbed73cbSSangeeta Misra * ilb_conn/sticky_hash_init(). 2069dbed73cbSSangeeta Misra */ 2070dbed73cbSSangeeta Misra ilbs->ilbs_conn_taskq = NULL; 2071dbed73cbSSangeeta Misra ilbs->ilbs_rule_hash_size = ilb_rule_hash_size; 2072dbed73cbSSangeeta Misra ilbs->ilbs_conn_hash_size = ilb_conn_hash_size; 2073dbed73cbSSangeeta Misra ilbs->ilbs_c2s_conn_hash = NULL; 2074dbed73cbSSangeeta Misra ilbs->ilbs_s2c_conn_hash = NULL; 2075dbed73cbSSangeeta Misra ilbs->ilbs_conn_timer_list = NULL; 2076dbed73cbSSangeeta Misra 2077dbed73cbSSangeeta Misra ilbs->ilbs_sticky_hash = NULL; 2078dbed73cbSSangeeta Misra ilbs->ilbs_sticky_hash_size = ilb_sticky_hash_size; 2079dbed73cbSSangeeta Misra ilbs->ilbs_sticky_timer_list = NULL; 2080dbed73cbSSangeeta Misra ilbs->ilbs_sticky_taskq = NULL; 2081dbed73cbSSangeeta Misra 2082dbed73cbSSangeeta Misra /* The allocation is done later when there is a rule using NAT mode. */ 2083dbed73cbSSangeeta Misra ilbs->ilbs_nat_src = NULL; 2084dbed73cbSSangeeta Misra ilbs->ilbs_nat_src_hash_size = ilb_nat_src_hash_size; 2085dbed73cbSSangeeta Misra mutex_init(&ilbs->ilbs_nat_src_lock, NULL, MUTEX_DEFAULT, NULL); 2086dbed73cbSSangeeta Misra ilbs->ilbs_nat_src_tid = 0; 2087dbed73cbSSangeeta Misra 2088dbed73cbSSangeeta Misra /* For listing the conn hash table */ 2089dbed73cbSSangeeta Misra mutex_init(&ilbs->ilbs_conn_list_lock, NULL, MUTEX_DEFAULT, NULL); 2090dbed73cbSSangeeta Misra cv_init(&ilbs->ilbs_conn_list_cv, NULL, CV_DEFAULT, NULL); 2091dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_busy = B_FALSE; 2092dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_cur = 0; 2093dbed73cbSSangeeta Misra ilbs->ilbs_conn_list_connp = NULL; 2094dbed73cbSSangeeta Misra 2095dbed73cbSSangeeta Misra /* For listing the sticky hash table */ 2096dbed73cbSSangeeta Misra mutex_init(&ilbs->ilbs_sticky_list_lock, NULL, MUTEX_DEFAULT, NULL); 2097dbed73cbSSangeeta Misra cv_init(&ilbs->ilbs_sticky_list_cv, NULL, CV_DEFAULT, NULL); 2098dbed73cbSSangeeta Misra ilbs->ilbs_sticky_list_busy = B_FALSE; 2099dbed73cbSSangeeta Misra ilbs->ilbs_sticky_list_cur = 0; 2100dbed73cbSSangeeta Misra ilbs->ilbs_sticky_list_curp = NULL; 2101dbed73cbSSangeeta Misra 21026e0672acSSangeeta Misra (void) snprintf(tq_name, sizeof (tq_name), "ilb_rule_taskq_%p", 21036e0672acSSangeeta Misra (void *)ns); 2104dbed73cbSSangeeta Misra ilbs->ilbs_rule_taskq = taskq_create(tq_name, ILB_RULE_TASKQ_NUM_THR, 2105dbed73cbSSangeeta Misra minclsyspri, 1, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC); 2106dbed73cbSSangeeta Misra 2107dbed73cbSSangeeta Misra return (ilbs); 2108dbed73cbSSangeeta Misra } 2109dbed73cbSSangeeta Misra 2110dbed73cbSSangeeta Misra /* ARGSUSED */ 2111dbed73cbSSangeeta Misra static void 2112dbed73cbSSangeeta Misra ilb_stack_shutdown(netstackid_t stackid, void *arg) 2113dbed73cbSSangeeta Misra { 2114dbed73cbSSangeeta Misra ilb_stack_t *ilbs = (ilb_stack_t *)arg; 2115dbed73cbSSangeeta Misra ilb_rule_t *tmp_rule; 2116dbed73cbSSangeeta Misra 2117dbed73cbSSangeeta Misra ilb_sticky_hash_fini(ilbs); 2118dbed73cbSSangeeta Misra ilb_conn_hash_fini(ilbs); 2119dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_lock); 2120dbed73cbSSangeeta Misra while ((tmp_rule = ilbs->ilbs_rule_head) != NULL) { 2121dbed73cbSSangeeta Misra ilb_rule_hash_del(tmp_rule); 2122dbed73cbSSangeeta Misra ilb_rule_g_del(ilbs, tmp_rule); 2123dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 2124dbed73cbSSangeeta Misra ilb_rule_del_common(ilbs, tmp_rule); 2125dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_g_lock); 2126dbed73cbSSangeeta Misra } 2127dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_g_lock); 2128dbed73cbSSangeeta Misra if (ilbs->ilbs_nat_src != NULL) 2129dbed73cbSSangeeta Misra ilb_nat_src_fini(ilbs); 2130dbed73cbSSangeeta Misra } 2131dbed73cbSSangeeta Misra 2132dbed73cbSSangeeta Misra static void 2133dbed73cbSSangeeta Misra ilb_stack_fini(netstackid_t stackid, void * arg) 2134dbed73cbSSangeeta Misra { 2135dbed73cbSSangeeta Misra ilb_stack_t *ilbs = (ilb_stack_t *)arg; 2136dbed73cbSSangeeta Misra 2137dbed73cbSSangeeta Misra ilb_rule_hash_fini(ilbs); 2138dbed73cbSSangeeta Misra taskq_destroy(ilbs->ilbs_rule_taskq); 2139dbed73cbSSangeeta Misra ilb_kstat_g_fini(stackid, ilbs); 2140dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_kstat, sizeof (ilb_g_kstat_t)); 2141dbed73cbSSangeeta Misra kmem_free(ilbs, sizeof (ilb_stack_t)); 2142dbed73cbSSangeeta Misra } 2143dbed73cbSSangeeta Misra 2144dbed73cbSSangeeta Misra void 2145dbed73cbSSangeeta Misra ilb_ddi_g_init(void) 2146dbed73cbSSangeeta Misra { 2147dbed73cbSSangeeta Misra netstack_register(NS_ILB, ilb_stack_init, ilb_stack_shutdown, 2148dbed73cbSSangeeta Misra ilb_stack_fini); 2149dbed73cbSSangeeta Misra } 2150dbed73cbSSangeeta Misra 2151dbed73cbSSangeeta Misra void 2152dbed73cbSSangeeta Misra ilb_ddi_g_destroy(void) 2153dbed73cbSSangeeta Misra { 2154dbed73cbSSangeeta Misra netstack_unregister(NS_ILB); 2155dbed73cbSSangeeta Misra ilb_conn_cache_fini(); 2156dbed73cbSSangeeta Misra ilb_sticky_cache_fini(); 2157dbed73cbSSangeeta Misra } 2158