1dbed73cbSSangeeta Misra /* 2dbed73cbSSangeeta Misra * CDDL HEADER START 3dbed73cbSSangeeta Misra * 4dbed73cbSSangeeta Misra * The contents of this file are subject to the terms of the 5dbed73cbSSangeeta Misra * Common Development and Distribution License (the "License"). 6dbed73cbSSangeeta Misra * You may not use this file except in compliance with the License. 7dbed73cbSSangeeta Misra * 8dbed73cbSSangeeta Misra * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9dbed73cbSSangeeta Misra * or http://www.opensolaris.org/os/licensing. 10dbed73cbSSangeeta Misra * See the License for the specific language governing permissions 11dbed73cbSSangeeta Misra * and limitations under the License. 12dbed73cbSSangeeta Misra * 13dbed73cbSSangeeta Misra * When distributing Covered Code, include this CDDL HEADER in each 14dbed73cbSSangeeta Misra * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15dbed73cbSSangeeta Misra * If applicable, add the following below this CDDL HEADER, with the 16dbed73cbSSangeeta Misra * fields enclosed by brackets "[]" replaced with your own identifying 17dbed73cbSSangeeta Misra * information: Portions Copyright [yyyy] [name of copyright owner] 18dbed73cbSSangeeta Misra * 19dbed73cbSSangeeta Misra * CDDL HEADER END 20dbed73cbSSangeeta Misra */ 21dbed73cbSSangeeta Misra 22dbed73cbSSangeeta Misra /* 2347b75f87SKacheong Poon * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24dbed73cbSSangeeta Misra * Use is subject to license terms. 25dbed73cbSSangeeta Misra */ 26dbed73cbSSangeeta Misra 27dbed73cbSSangeeta Misra #include <sys/types.h> 28dbed73cbSSangeeta Misra #include <sys/cmn_err.h> 29dbed73cbSSangeeta Misra #include <sys/crc32.h> 30dbed73cbSSangeeta Misra #include <netinet/in.h> 31dbed73cbSSangeeta Misra #include <inet/ip.h> 32dbed73cbSSangeeta Misra #include <inet/ip6.h> 33dbed73cbSSangeeta Misra #include <inet/tcp.h> 34dbed73cbSSangeeta Misra #include <inet/udp_impl.h> 35dbed73cbSSangeeta Misra #include <inet/ilb.h> 36dbed73cbSSangeeta Misra 37dbed73cbSSangeeta Misra #include "ilb_impl.h" 38dbed73cbSSangeeta Misra #include "ilb_stack.h" 39dbed73cbSSangeeta Misra #include "ilb_nat.h" 40dbed73cbSSangeeta Misra 41dbed73cbSSangeeta Misra /* 42dbed73cbSSangeeta Misra * NAT source entry garbarge collection timeout. The actual timeout value 43dbed73cbSSangeeta Misra * includes a random jitter bounded by the ILB_NAT_SRC_TIMEOUT_JITTER. 44dbed73cbSSangeeta Misra */ 45dbed73cbSSangeeta Misra #define ILB_NAT_SRC_TIMEOUT 30 46dbed73cbSSangeeta Misra #define ILB_NAT_SRC_TIMEOUT_JITTER 5 47dbed73cbSSangeeta Misra 48dbed73cbSSangeeta Misra /* key1/2 are assumed to be uint32_t. */ 49dbed73cbSSangeeta Misra #define ILB_NAT_SRC_HASH(hash, key1, key2, hash_size) \ 50dbed73cbSSangeeta Misra { \ 51dbed73cbSSangeeta Misra CRC32((hash), (key1), sizeof (uint32_t), -1U, crc32_table); \ 52dbed73cbSSangeeta Misra CRC32((hash), (key2), sizeof (uint32_t), (hash), crc32_table); \ 53dbed73cbSSangeeta Misra (hash) %= (hash_size); \ 54dbed73cbSSangeeta Misra } 55dbed73cbSSangeeta Misra 56dbed73cbSSangeeta Misra /* NAT source port space instance number. */ 57dbed73cbSSangeeta Misra static uint32_t ilb_nat_src_instance = 0; 58dbed73cbSSangeeta Misra 59dbed73cbSSangeeta Misra static void 60dbed73cbSSangeeta Misra incr_addr(in6_addr_t *a) 61dbed73cbSSangeeta Misra { 62dbed73cbSSangeeta Misra uint32_t i; 63dbed73cbSSangeeta Misra 64dbed73cbSSangeeta Misra i = ntohl(a->s6_addr32[3]); 65dbed73cbSSangeeta Misra if (IN6_IS_ADDR_V4MAPPED(a)) { 66dbed73cbSSangeeta Misra a->s6_addr32[3] = htonl(++i); 67dbed73cbSSangeeta Misra ASSERT(i != 0); 68dbed73cbSSangeeta Misra return; 69dbed73cbSSangeeta Misra } 70dbed73cbSSangeeta Misra 71dbed73cbSSangeeta Misra if (++i != 0) { 72dbed73cbSSangeeta Misra a->s6_addr32[3] = htonl(i); 73dbed73cbSSangeeta Misra return; 74dbed73cbSSangeeta Misra } 75dbed73cbSSangeeta Misra a->s6_addr32[3] = 0; 76dbed73cbSSangeeta Misra i = ntohl(a->s6_addr[2]); 77dbed73cbSSangeeta Misra if (++i != 0) { 78dbed73cbSSangeeta Misra a->s6_addr32[2] = htonl(i); 79dbed73cbSSangeeta Misra return; 80dbed73cbSSangeeta Misra } 81dbed73cbSSangeeta Misra a->s6_addr32[2] = 0; 82dbed73cbSSangeeta Misra i = ntohl(a->s6_addr[1]); 83dbed73cbSSangeeta Misra if (++i != 0) { 84dbed73cbSSangeeta Misra a->s6_addr32[1] = htonl(i); 85dbed73cbSSangeeta Misra return; 86dbed73cbSSangeeta Misra } 87dbed73cbSSangeeta Misra a->s6_addr32[1] = 0; 88dbed73cbSSangeeta Misra i = ntohl(a->s6_addr[0]); 89dbed73cbSSangeeta Misra a->s6_addr[0] = htonl(++i); 90dbed73cbSSangeeta Misra ASSERT(i != 0); 91dbed73cbSSangeeta Misra } 92dbed73cbSSangeeta Misra 93dbed73cbSSangeeta Misra /* 94dbed73cbSSangeeta Misra * When ILB does full NAT, it first picks one source address from the rule's 95dbed73cbSSangeeta Misra * specified NAT source address list (currently done in round robin fashion). 96dbed73cbSSangeeta Misra * Then it needs to allocate a port. This source port must make the tuple 97dbed73cbSSangeeta Misra * (source address:source port:destination address:destination port) 98dbed73cbSSangeeta Misra * unique. The destination part of the tuple is determined by the back 99dbed73cbSSangeeta Misra * end server, and could not be changed. 100dbed73cbSSangeeta Misra * 101dbed73cbSSangeeta Misra * To handle the above source port number allocation, ILB sets up a table 102dbed73cbSSangeeta Misra * of entries identified by source address:back end server address:server port 103dbed73cbSSangeeta Misra * tuple. This table is used by all rules for NAT source port allocation. 104dbed73cbSSangeeta Misra * Each tuple has an associated vmem arena used for managing the NAT source 105dbed73cbSSangeeta Misra * port space between the source address and back end server address/port. 106dbed73cbSSangeeta Misra * Each back end server (ilb_server_t) has an array of pointers (iser_nat_src) 107dbed73cbSSangeeta Misra * to the different entries in this table for NAT source port allocation. 108dbed73cbSSangeeta Misra * When ILB needs to allocate a NAT source address and port to talk to a back 109dbed73cbSSangeeta Misra * end server, it picks a source address and uses the array pointer to get 110dbed73cbSSangeeta Misra * to an entry. Then it calls vmem_alloc() on the associated vmem arena to 111dbed73cbSSangeeta Misra * find an unused port. 112dbed73cbSSangeeta Misra * 113dbed73cbSSangeeta Misra * When a back end server is added, ILB sets up the aforementioned array. 114dbed73cbSSangeeta Misra * For each source address specified in the rule, ILB checks if there is any 115dbed73cbSSangeeta Misra * existing entry which matches this source address:back end server address: 116dbed73cbSSangeeta Misra * port tuple. The server port is either a specific port or 0 (meaning wild 117dbed73cbSSangeeta Misra * card port). Normally, a back end server uses the same port as in the rule. 118dbed73cbSSangeeta Misra * If a back end server is used to serve two different rules, there will be 119dbed73cbSSangeeta Misra * two different ports. Source port allocation for these two rules do not 120dbed73cbSSangeeta Misra * conflict, hence we can use two vmem arenas (two different entries in the 121dbed73cbSSangeeta Misra * table). But if a server uses port range in one rule, we will treat it as 122dbed73cbSSangeeta Misra * a wild card port. Wild card poart matches with any port. If this server 123dbed73cbSSangeeta Misra * is used to serve more than one rules and those rules use the same set of 124dbed73cbSSangeeta Misra * NAT source addresses, this means that they must share the same set of vmem 125dbed73cbSSangeeta Misra * arenas (source port spaces). We do this for simplicity reason. If not, 126dbed73cbSSangeeta Misra * we need to partition the port range so that we can identify different forms 127dbed73cbSSangeeta Misra * of source port number collision. 128dbed73cbSSangeeta Misra */ 129dbed73cbSSangeeta Misra 130dbed73cbSSangeeta Misra /* 131dbed73cbSSangeeta Misra * NAT source address initialization routine. 132dbed73cbSSangeeta Misra */ 133dbed73cbSSangeeta Misra void 134dbed73cbSSangeeta Misra ilb_nat_src_init(ilb_stack_t *ilbs) 135dbed73cbSSangeeta Misra { 136dbed73cbSSangeeta Misra int i; 137dbed73cbSSangeeta Misra 138dbed73cbSSangeeta Misra ilbs->ilbs_nat_src = kmem_zalloc(sizeof (ilb_nat_src_hash_t) * 139dbed73cbSSangeeta Misra ilbs->ilbs_nat_src_hash_size, KM_SLEEP); 140dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_nat_src_hash_size; i++) { 141dbed73cbSSangeeta Misra list_create(&ilbs->ilbs_nat_src[i].nsh_head, 142dbed73cbSSangeeta Misra sizeof (ilb_nat_src_entry_t), 143dbed73cbSSangeeta Misra offsetof(ilb_nat_src_entry_t, nse_link)); 144dbed73cbSSangeeta Misra mutex_init(&ilbs->ilbs_nat_src[i].nsh_lock, NULL, 145dbed73cbSSangeeta Misra MUTEX_DEFAULT, NULL); 146dbed73cbSSangeeta Misra } 14747b75f87SKacheong Poon ilbs->ilbs_nat_src_tid = timeout(ilb_nat_src_timer, ilbs, 14847b75f87SKacheong Poon SEC_TO_TICK(ILB_NAT_SRC_TIMEOUT + 14947b75f87SKacheong Poon gethrtime() % ILB_NAT_SRC_TIMEOUT_JITTER)); 150dbed73cbSSangeeta Misra } 151dbed73cbSSangeeta Misra 152dbed73cbSSangeeta Misra /* 153dbed73cbSSangeeta Misra * NAT source address clean up routine. 154dbed73cbSSangeeta Misra */ 155dbed73cbSSangeeta Misra void 156dbed73cbSSangeeta Misra ilb_nat_src_fini(ilb_stack_t *ilbs) 157dbed73cbSSangeeta Misra { 158dbed73cbSSangeeta Misra ilb_nat_src_entry_t *cur; 159dbed73cbSSangeeta Misra timeout_id_t tid; 160dbed73cbSSangeeta Misra int i; 161dbed73cbSSangeeta Misra 162dbed73cbSSangeeta Misra /* 163dbed73cbSSangeeta Misra * By setting ilbs_nat_src_tid to 0, the timer handler will not 164dbed73cbSSangeeta Misra * restart the timer. 165dbed73cbSSangeeta Misra */ 166dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_nat_src_lock); 167dbed73cbSSangeeta Misra tid = ilbs->ilbs_nat_src_tid; 168dbed73cbSSangeeta Misra ilbs->ilbs_nat_src_tid = 0; 169dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_nat_src_lock); 170dbed73cbSSangeeta Misra if (tid != 0) 171dbed73cbSSangeeta Misra (void) untimeout(tid); 172dbed73cbSSangeeta Misra 173dbed73cbSSangeeta Misra mutex_destroy(&ilbs->ilbs_nat_src_lock); 174dbed73cbSSangeeta Misra 175dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_nat_src_hash_size; i++) { 176dbed73cbSSangeeta Misra while ((cur = list_remove_head(&ilbs->ilbs_nat_src[i].nsh_head)) 177dbed73cbSSangeeta Misra != NULL) { 178dbed73cbSSangeeta Misra vmem_destroy(cur->nse_port_arena); 179dbed73cbSSangeeta Misra kmem_free(cur, sizeof (ilb_nat_src_entry_t)); 180dbed73cbSSangeeta Misra } 181dbed73cbSSangeeta Misra mutex_destroy(&ilbs->ilbs_nat_src[i].nsh_lock); 182dbed73cbSSangeeta Misra } 183dbed73cbSSangeeta Misra 184dbed73cbSSangeeta Misra kmem_free(ilbs->ilbs_nat_src, sizeof (ilb_nat_src_hash_t) * 185dbed73cbSSangeeta Misra ilbs->ilbs_nat_src_hash_size); 186dbed73cbSSangeeta Misra ilbs->ilbs_nat_src = NULL; 187dbed73cbSSangeeta Misra } 188dbed73cbSSangeeta Misra 189dbed73cbSSangeeta Misra /* An arena name is "ilb_ns" + "_xxxxxxxxxx" */ 190dbed73cbSSangeeta Misra #define ARENA_NAMESZ 18 191dbed73cbSSangeeta Misra #define NAT_PORT_START 4096 192dbed73cbSSangeeta Misra #define NAT_PORT_SIZE 65535 - NAT_PORT_START 193dbed73cbSSangeeta Misra 194dbed73cbSSangeeta Misra /* 195dbed73cbSSangeeta Misra * Check if the NAT source and back end server pair ilb_nat_src_entry_t 196dbed73cbSSangeeta Misra * exists. If it does, increment the refcnt and return it. If not, create 197dbed73cbSSangeeta Misra * one and return it. 198dbed73cbSSangeeta Misra */ 199dbed73cbSSangeeta Misra static ilb_nat_src_entry_t * 200dbed73cbSSangeeta Misra ilb_find_nat_src(ilb_stack_t *ilbs, const in6_addr_t *nat_src, 201dbed73cbSSangeeta Misra const in6_addr_t *serv_addr, in_port_t port) 202dbed73cbSSangeeta Misra { 203dbed73cbSSangeeta Misra ilb_nat_src_entry_t *tmp; 204dbed73cbSSangeeta Misra uint32_t idx; 205dbed73cbSSangeeta Misra char arena_name[ARENA_NAMESZ]; 206dbed73cbSSangeeta Misra list_t *head; 207dbed73cbSSangeeta Misra 208dbed73cbSSangeeta Misra ILB_NAT_SRC_HASH(idx, &nat_src->s6_addr32[3], &serv_addr->s6_addr32[3], 209dbed73cbSSangeeta Misra ilbs->ilbs_nat_src_hash_size); 210dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_nat_src[idx].nsh_lock); 211dbed73cbSSangeeta Misra head = &ilbs->ilbs_nat_src[idx].nsh_head; 212dbed73cbSSangeeta Misra for (tmp = list_head(head); tmp != NULL; tmp = list_next(head, tmp)) { 213dbed73cbSSangeeta Misra if (IN6_ARE_ADDR_EQUAL(&tmp->nse_src_addr, nat_src) && 214dbed73cbSSangeeta Misra IN6_ARE_ADDR_EQUAL(&tmp->nse_serv_addr, serv_addr) && 215dbed73cbSSangeeta Misra (port == tmp->nse_port || port == 0 || 216dbed73cbSSangeeta Misra tmp->nse_port == 0)) { 217dbed73cbSSangeeta Misra break; 218dbed73cbSSangeeta Misra } 219dbed73cbSSangeeta Misra } 220dbed73cbSSangeeta Misra /* Found one, return it. */ 221dbed73cbSSangeeta Misra if (tmp != NULL) { 222dbed73cbSSangeeta Misra tmp->nse_refcnt++; 223dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_nat_src[idx].nsh_lock); 224dbed73cbSSangeeta Misra return (tmp); 225dbed73cbSSangeeta Misra } 226dbed73cbSSangeeta Misra 227dbed73cbSSangeeta Misra tmp = kmem_alloc(sizeof (ilb_nat_src_entry_t), KM_NOSLEEP); 228dbed73cbSSangeeta Misra if (tmp == NULL) { 229dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_nat_src[idx].nsh_lock); 230dbed73cbSSangeeta Misra return (NULL); 231dbed73cbSSangeeta Misra } 232dbed73cbSSangeeta Misra tmp->nse_src_addr = *nat_src; 233dbed73cbSSangeeta Misra tmp->nse_serv_addr = *serv_addr; 234dbed73cbSSangeeta Misra tmp->nse_port = port; 235dbed73cbSSangeeta Misra tmp->nse_nsh_lock = &ilbs->ilbs_nat_src[idx].nsh_lock; 236dbed73cbSSangeeta Misra tmp->nse_refcnt = 1; 237dbed73cbSSangeeta Misra 238dbed73cbSSangeeta Misra (void) snprintf(arena_name, ARENA_NAMESZ, "ilb_ns_%u", 239*1a5e258fSJosef 'Jeff' Sipek atomic_inc_32_nv(&ilb_nat_src_instance)); 240dbed73cbSSangeeta Misra if ((tmp->nse_port_arena = vmem_create(arena_name, 241dbed73cbSSangeeta Misra (void *)NAT_PORT_START, NAT_PORT_SIZE, 1, NULL, NULL, NULL, 1, 242dbed73cbSSangeeta Misra VM_SLEEP | VMC_IDENTIFIER)) == NULL) { 243dbed73cbSSangeeta Misra kmem_free(tmp, sizeof (*tmp)); 244dbed73cbSSangeeta Misra return (NULL); 245dbed73cbSSangeeta Misra } 246dbed73cbSSangeeta Misra 247dbed73cbSSangeeta Misra list_insert_tail(head, tmp); 248dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_nat_src[idx].nsh_lock); 249dbed73cbSSangeeta Misra 250dbed73cbSSangeeta Misra return (tmp); 251dbed73cbSSangeeta Misra } 252dbed73cbSSangeeta Misra 253dbed73cbSSangeeta Misra /* 254dbed73cbSSangeeta Misra * Create ilb_nat_src_t struct for a ilb_server_t struct. 255dbed73cbSSangeeta Misra */ 256dbed73cbSSangeeta Misra int 257dbed73cbSSangeeta Misra ilb_create_nat_src(ilb_stack_t *ilbs, ilb_nat_src_t **nat_src, 258dbed73cbSSangeeta Misra const in6_addr_t *srv_addr, in_port_t port, const in6_addr_t *start, 259dbed73cbSSangeeta Misra int num) 260dbed73cbSSangeeta Misra { 261dbed73cbSSangeeta Misra ilb_nat_src_t *src; 262dbed73cbSSangeeta Misra in6_addr_t cur_addr; 263dbed73cbSSangeeta Misra int i; 264dbed73cbSSangeeta Misra 265dbed73cbSSangeeta Misra if ((src = kmem_zalloc(sizeof (ilb_nat_src_t), KM_NOSLEEP)) == NULL) { 266dbed73cbSSangeeta Misra *nat_src = NULL; 267dbed73cbSSangeeta Misra return (ENOMEM); 268dbed73cbSSangeeta Misra } 269dbed73cbSSangeeta Misra cur_addr = *start; 270dbed73cbSSangeeta Misra for (i = 0; i < num && i < ILB_MAX_NAT_SRC; i++) { 271dbed73cbSSangeeta Misra src->src_list[i] = ilb_find_nat_src(ilbs, &cur_addr, srv_addr, 272dbed73cbSSangeeta Misra port); 273dbed73cbSSangeeta Misra if (src->src_list[i] == NULL) { 274dbed73cbSSangeeta Misra ilb_destroy_nat_src(&src); 275dbed73cbSSangeeta Misra *nat_src = NULL; 276dbed73cbSSangeeta Misra return (ENOMEM); 277dbed73cbSSangeeta Misra } 278dbed73cbSSangeeta Misra incr_addr(&cur_addr); 279dbed73cbSSangeeta Misra /* 280dbed73cbSSangeeta Misra * Increment num_src here so that we can call 281dbed73cbSSangeeta Misra * ilb_destroy_nat_src() when we need to do cleanup. 282dbed73cbSSangeeta Misra */ 283dbed73cbSSangeeta Misra src->num_src++; 284dbed73cbSSangeeta Misra } 285dbed73cbSSangeeta Misra *nat_src = src; 286dbed73cbSSangeeta Misra return (0); 287dbed73cbSSangeeta Misra } 288dbed73cbSSangeeta Misra 289dbed73cbSSangeeta Misra /* 290dbed73cbSSangeeta Misra * Timer routine for garbage collecting unneeded NAT source entry. We 291dbed73cbSSangeeta Misra * don't use a taskq for this since the table should be relatively small 292dbed73cbSSangeeta Misra * and should be OK for a timer to handle. 293dbed73cbSSangeeta Misra */ 294dbed73cbSSangeeta Misra void 295dbed73cbSSangeeta Misra ilb_nat_src_timer(void *arg) 296dbed73cbSSangeeta Misra { 297dbed73cbSSangeeta Misra ilb_stack_t *ilbs = (ilb_stack_t *)arg; 298dbed73cbSSangeeta Misra ilb_nat_src_entry_t *cur, *tmp; 299dbed73cbSSangeeta Misra list_t *head; 300dbed73cbSSangeeta Misra int i; 301dbed73cbSSangeeta Misra 302dbed73cbSSangeeta Misra for (i = 0; i < ilbs->ilbs_nat_src_hash_size; i++) { 303dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_nat_src[i].nsh_lock); 304dbed73cbSSangeeta Misra head = &ilbs->ilbs_nat_src[i].nsh_head; 305dbed73cbSSangeeta Misra cur = list_head(head); 306dbed73cbSSangeeta Misra while (cur != NULL) { 307dbed73cbSSangeeta Misra /* 308dbed73cbSSangeeta Misra * When a server is removed, it will release its 309dbed73cbSSangeeta Misra * reference on an entry. But there may still be 310dbed73cbSSangeeta Misra * conn using some ports. So check the size also. 311dbed73cbSSangeeta Misra */ 312dbed73cbSSangeeta Misra if (cur->nse_refcnt != 0 || 313dbed73cbSSangeeta Misra vmem_size(cur->nse_port_arena, VMEM_ALLOC) != 0) { 314dbed73cbSSangeeta Misra cur = list_next(head, cur); 315dbed73cbSSangeeta Misra continue; 316dbed73cbSSangeeta Misra } 317dbed73cbSSangeeta Misra tmp = cur; 318dbed73cbSSangeeta Misra cur = list_next(head, cur); 319dbed73cbSSangeeta Misra list_remove(head, tmp); 320dbed73cbSSangeeta Misra vmem_destroy(tmp->nse_port_arena); 321dbed73cbSSangeeta Misra kmem_free(tmp, sizeof (ilb_nat_src_entry_t)); 322dbed73cbSSangeeta Misra } 323dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_nat_src[i].nsh_lock); 324dbed73cbSSangeeta Misra } 325dbed73cbSSangeeta Misra 326dbed73cbSSangeeta Misra mutex_enter(&ilbs->ilbs_nat_src_lock); 327dbed73cbSSangeeta Misra if (ilbs->ilbs_nat_src_tid == 0) { 328dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_nat_src_lock); 329dbed73cbSSangeeta Misra } else { 330dbed73cbSSangeeta Misra ilbs->ilbs_nat_src_tid = timeout(ilb_nat_src_timer, ilbs, 331dbed73cbSSangeeta Misra SEC_TO_TICK(ILB_NAT_SRC_TIMEOUT + 332dbed73cbSSangeeta Misra gethrtime() % ILB_NAT_SRC_TIMEOUT_JITTER)); 333dbed73cbSSangeeta Misra mutex_exit(&ilbs->ilbs_nat_src_lock); 334dbed73cbSSangeeta Misra } 335dbed73cbSSangeeta Misra } 336dbed73cbSSangeeta Misra 337dbed73cbSSangeeta Misra /* 338dbed73cbSSangeeta Misra * Destroy a given ilb_nat_src_t struct. It will also release the reference 339dbed73cbSSangeeta Misra * hold on all its ilb_nat_src_entry_t. 340dbed73cbSSangeeta Misra */ 341dbed73cbSSangeeta Misra void 342dbed73cbSSangeeta Misra ilb_destroy_nat_src(ilb_nat_src_t **nat_src) 343dbed73cbSSangeeta Misra { 344dbed73cbSSangeeta Misra int i, size; 345dbed73cbSSangeeta Misra ilb_nat_src_t *src; 346dbed73cbSSangeeta Misra ilb_nat_src_entry_t *entry; 347dbed73cbSSangeeta Misra 348dbed73cbSSangeeta Misra src = *nat_src; 349dbed73cbSSangeeta Misra if (src == NULL) 350dbed73cbSSangeeta Misra return; 351dbed73cbSSangeeta Misra size = src->num_src; 352dbed73cbSSangeeta Misra /* 353dbed73cbSSangeeta Misra * Set each entry to be condemned and the garbarge collector will 354dbed73cbSSangeeta Misra * clean them up. 355dbed73cbSSangeeta Misra */ 356dbed73cbSSangeeta Misra for (i = 0; i < size; i++) { 357dbed73cbSSangeeta Misra entry = src->src_list[i]; 358dbed73cbSSangeeta Misra mutex_enter(entry->nse_nsh_lock); 359dbed73cbSSangeeta Misra entry->nse_refcnt--; 360dbed73cbSSangeeta Misra mutex_exit(entry->nse_nsh_lock); 361dbed73cbSSangeeta Misra } 362dbed73cbSSangeeta Misra kmem_free(src, sizeof (ilb_nat_src_t)); 363dbed73cbSSangeeta Misra *nat_src = NULL; 364dbed73cbSSangeeta Misra } 365dbed73cbSSangeeta Misra 366dbed73cbSSangeeta Misra /* 367dbed73cbSSangeeta Misra * Given a backend server address and its ilb_nat_src_t, allocate a source 368dbed73cbSSangeeta Misra * address and port for NAT usage. 369dbed73cbSSangeeta Misra */ 370dbed73cbSSangeeta Misra ilb_nat_src_entry_t * 371dbed73cbSSangeeta Misra ilb_alloc_nat_addr(ilb_nat_src_t *src, in6_addr_t *addr, in_port_t *port, 372dbed73cbSSangeeta Misra uint16_t *nat_src_idx) 373dbed73cbSSangeeta Misra { 374dbed73cbSSangeeta Misra int i, try, size; 375dbed73cbSSangeeta Misra in_port_t p; 376dbed73cbSSangeeta Misra 377dbed73cbSSangeeta Misra size = src->num_src; 378dbed73cbSSangeeta Misra /* Increment of cur does not need to be atomic. It is just a hint. */ 379dbed73cbSSangeeta Misra if (nat_src_idx == NULL) 380dbed73cbSSangeeta Misra i = (++src->cur) % size; 381dbed73cbSSangeeta Misra else 382dbed73cbSSangeeta Misra i = *nat_src_idx; 383dbed73cbSSangeeta Misra 384dbed73cbSSangeeta Misra for (try = 0; try < size; try++) { 385dbed73cbSSangeeta Misra p = (in_port_t)(uintptr_t)vmem_alloc( 386dbed73cbSSangeeta Misra src->src_list[i]->nse_port_arena, 1, VM_NOSLEEP); 387dbed73cbSSangeeta Misra if (p != 0) 388dbed73cbSSangeeta Misra break; 389dbed73cbSSangeeta Misra /* 390dbed73cbSSangeeta Misra * If an index is given and we cannot allocate a port using 391dbed73cbSSangeeta Misra * that entry, return NULL. 392dbed73cbSSangeeta Misra */ 393dbed73cbSSangeeta Misra if (nat_src_idx != NULL) 394dbed73cbSSangeeta Misra return (NULL); 395dbed73cbSSangeeta Misra i = (i + 1) % size; 396dbed73cbSSangeeta Misra } 397dbed73cbSSangeeta Misra if (try == size) 398dbed73cbSSangeeta Misra return (NULL); 399dbed73cbSSangeeta Misra *addr = src->src_list[i]->nse_src_addr; 400dbed73cbSSangeeta Misra *port = htons(p); 401dbed73cbSSangeeta Misra return (src->src_list[i]); 402dbed73cbSSangeeta Misra } 403dbed73cbSSangeeta Misra 404dbed73cbSSangeeta Misra /* 405dbed73cbSSangeeta Misra * Use the pre-calculated checksum to adjust the checksum of a packet after 406dbed73cbSSangeeta Misra * NAT. 407dbed73cbSSangeeta Misra */ 408dbed73cbSSangeeta Misra static void 409dbed73cbSSangeeta Misra adj_cksum(uint16_t *chksum, uint32_t adj_sum) 410dbed73cbSSangeeta Misra { 411dbed73cbSSangeeta Misra adj_sum += (uint16_t)~(*chksum); 412dbed73cbSSangeeta Misra while ((adj_sum >> 16) != 0) 413dbed73cbSSangeeta Misra adj_sum = (adj_sum & 0xffff) + (adj_sum >> 16); 414dbed73cbSSangeeta Misra *chksum = (uint16_t)~adj_sum; 415dbed73cbSSangeeta Misra } 416dbed73cbSSangeeta Misra 417dbed73cbSSangeeta Misra /* Do full NAT (replace both source and desination info) on a packet. */ 418dbed73cbSSangeeta Misra void 419dbed73cbSSangeeta Misra ilb_full_nat(int l3, void *iph, int l4, void *tph, ilb_nat_info_t *info, 420dbed73cbSSangeeta Misra uint32_t adj_ip_sum, uint32_t adj_tp_sum, boolean_t c2s) 421dbed73cbSSangeeta Misra { 422dbed73cbSSangeeta Misra in_port_t *orig_sport, *orig_dport; 423dbed73cbSSangeeta Misra uint16_t *tp_cksum; 424dbed73cbSSangeeta Misra 425dbed73cbSSangeeta Misra switch (l4) { 426dbed73cbSSangeeta Misra case IPPROTO_TCP: 427dbed73cbSSangeeta Misra orig_sport = &((tcpha_t *)tph)->tha_lport; 428dbed73cbSSangeeta Misra orig_dport = &((tcpha_t *)tph)->tha_fport; 429dbed73cbSSangeeta Misra tp_cksum = &((tcpha_t *)tph)->tha_sum; 430dbed73cbSSangeeta Misra break; 431dbed73cbSSangeeta Misra case IPPROTO_UDP: 432dbed73cbSSangeeta Misra orig_sport = &((udpha_t *)tph)->uha_src_port; 433dbed73cbSSangeeta Misra orig_dport = &((udpha_t *)tph)->uha_dst_port; 434dbed73cbSSangeeta Misra tp_cksum = &((udpha_t *)tph)->uha_checksum; 435dbed73cbSSangeeta Misra break; 436dbed73cbSSangeeta Misra default: 437dbed73cbSSangeeta Misra ASSERT(0); 438dbed73cbSSangeeta Misra return; 439dbed73cbSSangeeta Misra } 440dbed73cbSSangeeta Misra 441dbed73cbSSangeeta Misra switch (l3) { 442dbed73cbSSangeeta Misra case IPPROTO_IP: { 443dbed73cbSSangeeta Misra ipha_t *ipha; 444dbed73cbSSangeeta Misra 445dbed73cbSSangeeta Misra ipha = iph; 446dbed73cbSSangeeta Misra if (c2s) { 447dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(&info->nat_src, 448dbed73cbSSangeeta Misra ipha->ipha_src); 449dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(&info->nat_dst, 450dbed73cbSSangeeta Misra ipha->ipha_dst); 451dbed73cbSSangeeta Misra *orig_sport = info->nat_sport; 452dbed73cbSSangeeta Misra *orig_dport = info->nat_dport; 453dbed73cbSSangeeta Misra } else { 454dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(&info->vip, ipha->ipha_src); 455dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(&info->src, ipha->ipha_dst); 456dbed73cbSSangeeta Misra *orig_sport = info->dport; 457dbed73cbSSangeeta Misra *orig_dport = info->sport; 458dbed73cbSSangeeta Misra } 459dbed73cbSSangeeta Misra adj_cksum(&ipha->ipha_hdr_checksum, adj_ip_sum); 460dbed73cbSSangeeta Misra adj_cksum(tp_cksum, adj_tp_sum); 461dbed73cbSSangeeta Misra break; 462dbed73cbSSangeeta Misra } 463dbed73cbSSangeeta Misra case IPPROTO_IPV6: { 464dbed73cbSSangeeta Misra ip6_t *ip6h; 465dbed73cbSSangeeta Misra 466dbed73cbSSangeeta Misra ip6h = iph; 467dbed73cbSSangeeta Misra if (c2s) { 468dbed73cbSSangeeta Misra ip6h->ip6_src = info->nat_src; 469dbed73cbSSangeeta Misra ip6h->ip6_dst = info->nat_dst; 470dbed73cbSSangeeta Misra *orig_sport = info->nat_sport; 471dbed73cbSSangeeta Misra *orig_dport = info->nat_dport; 472dbed73cbSSangeeta Misra } else { 473dbed73cbSSangeeta Misra ip6h->ip6_src = info->vip; 474dbed73cbSSangeeta Misra ip6h->ip6_dst = info->src; 475dbed73cbSSangeeta Misra *orig_sport = info->dport; 476dbed73cbSSangeeta Misra *orig_dport = info->sport; 477dbed73cbSSangeeta Misra } 478dbed73cbSSangeeta Misra /* No checksum for IPv6 header */ 479dbed73cbSSangeeta Misra adj_cksum(tp_cksum, adj_tp_sum); 480dbed73cbSSangeeta Misra break; 481dbed73cbSSangeeta Misra } 482dbed73cbSSangeeta Misra default: 483dbed73cbSSangeeta Misra ASSERT(0); 484dbed73cbSSangeeta Misra break; 485dbed73cbSSangeeta Misra } 486dbed73cbSSangeeta Misra } 487dbed73cbSSangeeta Misra 488dbed73cbSSangeeta Misra /* Do half NAT (only replace the destination info) on a packet. */ 489dbed73cbSSangeeta Misra void 490dbed73cbSSangeeta Misra ilb_half_nat(int l3, void *iph, int l4, void *tph, ilb_nat_info_t *info, 491dbed73cbSSangeeta Misra uint32_t adj_ip_sum, uint32_t adj_tp_sum, boolean_t c2s) 492dbed73cbSSangeeta Misra { 493dbed73cbSSangeeta Misra in_port_t *orig_port; 494dbed73cbSSangeeta Misra uint16_t *tp_cksum; 495dbed73cbSSangeeta Misra 496dbed73cbSSangeeta Misra switch (l4) { 497dbed73cbSSangeeta Misra case IPPROTO_TCP: 498dbed73cbSSangeeta Misra if (c2s) 499dbed73cbSSangeeta Misra orig_port = &((tcpha_t *)tph)->tha_fport; 500dbed73cbSSangeeta Misra else 501dbed73cbSSangeeta Misra orig_port = &((tcpha_t *)tph)->tha_lport; 502dbed73cbSSangeeta Misra tp_cksum = &((tcpha_t *)tph)->tha_sum; 503dbed73cbSSangeeta Misra break; 504dbed73cbSSangeeta Misra case IPPROTO_UDP: 505dbed73cbSSangeeta Misra if (c2s) 506dbed73cbSSangeeta Misra orig_port = &((udpha_t *)tph)->uha_dst_port; 507dbed73cbSSangeeta Misra else 508dbed73cbSSangeeta Misra orig_port = &((udpha_t *)tph)->uha_src_port; 509dbed73cbSSangeeta Misra tp_cksum = &((udpha_t *)tph)->uha_checksum; 510dbed73cbSSangeeta Misra break; 511dbed73cbSSangeeta Misra default: 512dbed73cbSSangeeta Misra ASSERT(0); 513dbed73cbSSangeeta Misra return; 514dbed73cbSSangeeta Misra } 515dbed73cbSSangeeta Misra 516dbed73cbSSangeeta Misra switch (l3) { 517dbed73cbSSangeeta Misra case IPPROTO_IP: { 518dbed73cbSSangeeta Misra ipha_t *ipha; 519dbed73cbSSangeeta Misra 520dbed73cbSSangeeta Misra ipha = iph; 521dbed73cbSSangeeta Misra if (c2s) { 522dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(&info->nat_dst, 523dbed73cbSSangeeta Misra ipha->ipha_dst); 524dbed73cbSSangeeta Misra *orig_port = info->nat_dport; 525dbed73cbSSangeeta Misra } else { 526dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(&info->vip, ipha->ipha_src); 527dbed73cbSSangeeta Misra *orig_port = info->dport; 528dbed73cbSSangeeta Misra } 529dbed73cbSSangeeta Misra adj_cksum(&ipha->ipha_hdr_checksum, adj_ip_sum); 530dbed73cbSSangeeta Misra adj_cksum(tp_cksum, adj_tp_sum); 531dbed73cbSSangeeta Misra break; 532dbed73cbSSangeeta Misra } 533dbed73cbSSangeeta Misra case IPPROTO_IPV6: { 534dbed73cbSSangeeta Misra ip6_t *ip6h; 535dbed73cbSSangeeta Misra 536dbed73cbSSangeeta Misra ip6h = iph; 537dbed73cbSSangeeta Misra if (c2s) { 538dbed73cbSSangeeta Misra ip6h->ip6_dst = info->nat_dst; 539dbed73cbSSangeeta Misra *orig_port = info->nat_dport; 540dbed73cbSSangeeta Misra } else { 541dbed73cbSSangeeta Misra ip6h->ip6_src = info->vip; 542dbed73cbSSangeeta Misra *orig_port = info->dport; 543dbed73cbSSangeeta Misra } 544dbed73cbSSangeeta Misra /* No checksum for IPv6 header */ 545dbed73cbSSangeeta Misra adj_cksum(tp_cksum, adj_tp_sum); 546dbed73cbSSangeeta Misra break; 547dbed73cbSSangeeta Misra } 548dbed73cbSSangeeta Misra default: 549dbed73cbSSangeeta Misra ASSERT(0); 550dbed73cbSSangeeta Misra break; 551dbed73cbSSangeeta Misra } 552dbed73cbSSangeeta Misra } 553dbed73cbSSangeeta Misra 554dbed73cbSSangeeta Misra /* Calculate the IPv6 pseudo checksum, used for ICMPv6 NAT. */ 555dbed73cbSSangeeta Misra uint32_t 556dbed73cbSSangeeta Misra ilb_pseudo_sum_v6(ip6_t *ip6h, uint8_t nxt_hdr) 557dbed73cbSSangeeta Misra { 558dbed73cbSSangeeta Misra uint32_t sum; 559dbed73cbSSangeeta Misra uint16_t *cur; 560dbed73cbSSangeeta Misra 561dbed73cbSSangeeta Misra cur = (uint16_t *)&ip6h->ip6_src; 562dbed73cbSSangeeta Misra sum = cur[0] + cur[1] + cur[2] + cur[3] + cur[4] + cur[5] + cur[6] + 563dbed73cbSSangeeta Misra cur[7] + cur[8] + cur[9] + cur[10] + cur[11] + cur[12] + cur[13] + 564dbed73cbSSangeeta Misra cur[14] + cur[15] + htons(nxt_hdr); 565dbed73cbSSangeeta Misra return ((sum & 0xffff) + (sum >> 16)); 566dbed73cbSSangeeta Misra } 567dbed73cbSSangeeta Misra 568dbed73cbSSangeeta Misra /* Do NAT on an ICMPv4 packet. */ 569dbed73cbSSangeeta Misra void 570dbed73cbSSangeeta Misra ilb_nat_icmpv4(mblk_t *mp, ipha_t *out_iph, icmph_t *icmph, ipha_t *in_iph, 571dbed73cbSSangeeta Misra in_port_t *sport, in_port_t *dport, ilb_nat_info_t *info, uint32_t sum, 572dbed73cbSSangeeta Misra boolean_t full_nat) 573dbed73cbSSangeeta Misra { 574dbed73cbSSangeeta Misra if (full_nat) { 575dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(&info->nat_src, out_iph->ipha_src); 576dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(&info->nat_src, in_iph->ipha_dst); 577dbed73cbSSangeeta Misra *dport = info->nat_sport; 578dbed73cbSSangeeta Misra } 579dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(&info->nat_dst, out_iph->ipha_dst); 580dbed73cbSSangeeta Misra adj_cksum(&out_iph->ipha_hdr_checksum, sum); 581dbed73cbSSangeeta Misra IN6_V4MAPPED_TO_IPADDR(&info->nat_dst, in_iph->ipha_src); 582dbed73cbSSangeeta Misra *sport = info->nat_dport; 583dbed73cbSSangeeta Misra 584dbed73cbSSangeeta Misra icmph->icmph_checksum = 0; 585dbed73cbSSangeeta Misra icmph->icmph_checksum = IP_CSUM(mp, IPH_HDR_LENGTH(out_iph), 0); 586dbed73cbSSangeeta Misra } 587dbed73cbSSangeeta Misra 588dbed73cbSSangeeta Misra /* Do NAT on an ICMPv6 packet. */ 589dbed73cbSSangeeta Misra void 590dbed73cbSSangeeta Misra ilb_nat_icmpv6(mblk_t *mp, ip6_t *out_ip6h, icmp6_t *icmp6h, ip6_t *in_ip6h, 591dbed73cbSSangeeta Misra in_port_t *sport, in_port_t *dport, ilb_nat_info_t *info, 592dbed73cbSSangeeta Misra boolean_t full_nat) 593dbed73cbSSangeeta Misra { 594dbed73cbSSangeeta Misra int hdr_len; 595dbed73cbSSangeeta Misra 596dbed73cbSSangeeta Misra if (full_nat) { 597dbed73cbSSangeeta Misra out_ip6h->ip6_src = info->nat_src; 598dbed73cbSSangeeta Misra in_ip6h->ip6_dst = info->nat_src; 599dbed73cbSSangeeta Misra *dport = info->nat_sport; 600dbed73cbSSangeeta Misra } 601dbed73cbSSangeeta Misra out_ip6h->ip6_dst = info->nat_dst; 602dbed73cbSSangeeta Misra in_ip6h->ip6_src = info->nat_dst; 603dbed73cbSSangeeta Misra *sport = info->nat_dport; 604dbed73cbSSangeeta Misra 605dbed73cbSSangeeta Misra icmp6h->icmp6_cksum = out_ip6h->ip6_plen; 606dbed73cbSSangeeta Misra hdr_len = (char *)icmp6h - (char *)out_ip6h; 607dbed73cbSSangeeta Misra icmp6h->icmp6_cksum = IP_CSUM(mp, hdr_len, 608dbed73cbSSangeeta Misra ilb_pseudo_sum_v6(out_ip6h, IPPROTO_ICMPV6)); 609dbed73cbSSangeeta Misra } 610