1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <sys/ksynch.h> 31 #include <sys/kmem.h> 32 #include <sys/errno.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/cmn_err.h> 36 #include <sys/strsun.h> 37 #include <sys/zone.h> 38 #include <netinet/in.h> 39 #include <inet/common.h> 40 #include <inet/ip.h> 41 #include <inet/ip6.h> 42 #include <inet/ip6_asp.h> 43 #include <inet/ip_ire.h> 44 #include <inet/ipclassifier.h> 45 46 #define IN6ADDR_MASK128_INIT \ 47 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU } 48 #define IN6ADDR_MASK96_INIT { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0 } 49 #ifdef _BIG_ENDIAN 50 #define IN6ADDR_MASK16_INIT { 0xffff0000U, 0, 0, 0 } 51 #else 52 #define IN6ADDR_MASK16_INIT { 0x0000ffffU, 0, 0, 0 } 53 #endif 54 55 56 /* 57 * This table is ordered such that longest prefix matches are hit first 58 * (longer prefix lengths first). The last entry must be the "default" 59 * entry (::0/0). 60 */ 61 static ip6_asp_t default_ip6_asp_table[] = { 62 { IN6ADDR_LOOPBACK_INIT, IN6ADDR_MASK128_INIT, 63 "Loopback", 50 }, 64 { IN6ADDR_ANY_INIT, IN6ADDR_MASK96_INIT, 65 "IPv4_Compatible", 20 }, 66 #ifdef _BIG_ENDIAN 67 { { 0, 0, 0x0000ffffU, 0 }, IN6ADDR_MASK96_INIT, 68 "IPv4", 10 }, 69 { { 0x20020000U, 0, 0, 0 }, IN6ADDR_MASK16_INIT, 70 "6to4", 30 }, 71 #else 72 { { 0, 0, 0xffff0000U, 0 }, IN6ADDR_MASK96_INIT, 73 "IPv4", 10 }, 74 { { 0x00000220U, 0, 0, 0 }, IN6ADDR_MASK16_INIT, 75 "6to4", 30 }, 76 #endif 77 { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, 78 "Default", 40 } 79 }; 80 81 /* 82 * The IPv6 Default Address Selection policy table. 83 * Until someone up above reconfigures the policy table, use the global 84 * default. The table needs no lock since the only way to alter it is 85 * through the SIOCSIP6ADDRPOLICY which is exclusive in ip. 86 */ 87 static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t); 88 static void ip6_asp_check_for_updates(ip_stack_t *); 89 90 void 91 ip6_asp_init(ip_stack_t *ipst) 92 { 93 /* Initialize the table lock */ 94 mutex_init(&ipst->ips_ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL); 95 96 ipst->ips_ip6_asp_table = default_ip6_asp_table; 97 98 ipst->ips_ip6_asp_table_count = 99 sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t); 100 } 101 102 void 103 ip6_asp_free(ip_stack_t *ipst) 104 { 105 if (ipst->ips_ip6_asp_table != default_ip6_asp_table) { 106 kmem_free(ipst->ips_ip6_asp_table, 107 ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t)); 108 ipst->ips_ip6_asp_table = NULL; 109 } 110 mutex_destroy(&ipst->ips_ip6_asp_lock); 111 } 112 113 /* 114 * Return false if the table is being updated. Else, increment the ref 115 * count and return true. 116 */ 117 boolean_t 118 ip6_asp_can_lookup(ip_stack_t *ipst) 119 { 120 mutex_enter(&ipst->ips_ip6_asp_lock); 121 if (ipst->ips_ip6_asp_uip) { 122 mutex_exit(&ipst->ips_ip6_asp_lock); 123 return (B_FALSE); 124 } 125 IP6_ASP_TABLE_REFHOLD(ipst); 126 mutex_exit(&ipst->ips_ip6_asp_lock); 127 return (B_TRUE); 128 129 } 130 131 void 132 ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func) 133 { 134 conn_t *connp = Q_TO_CONN(q); 135 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 136 137 ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) && 138 (mp->b_next == NULL)); 139 mp->b_queue = (void *)q; 140 mp->b_prev = (void *)func; 141 mp->b_next = NULL; 142 143 mutex_enter(&ipst->ips_ip6_asp_lock); 144 if (ipst->ips_ip6_asp_pending_ops == NULL) { 145 ASSERT(ipst->ips_ip6_asp_pending_ops_tail == NULL); 146 ipst->ips_ip6_asp_pending_ops = 147 ipst->ips_ip6_asp_pending_ops_tail = mp; 148 } else { 149 ipst->ips_ip6_asp_pending_ops_tail->b_next = mp; 150 ipst->ips_ip6_asp_pending_ops_tail = mp; 151 } 152 mutex_exit(&ipst->ips_ip6_asp_lock); 153 } 154 155 static void 156 ip6_asp_complete_op(ip_stack_t *ipst) 157 { 158 mblk_t *mp; 159 queue_t *q; 160 aspfunc_t func; 161 162 mutex_enter(&ipst->ips_ip6_asp_lock); 163 while (ipst->ips_ip6_asp_pending_ops != NULL) { 164 mp = ipst->ips_ip6_asp_pending_ops; 165 ipst->ips_ip6_asp_pending_ops = mp->b_next; 166 mp->b_next = NULL; 167 if (ipst->ips_ip6_asp_pending_ops == NULL) 168 ipst->ips_ip6_asp_pending_ops_tail = NULL; 169 mutex_exit(&ipst->ips_ip6_asp_lock); 170 171 q = (queue_t *)mp->b_queue; 172 func = (aspfunc_t)mp->b_prev; 173 174 mp->b_prev = NULL; 175 mp->b_queue = NULL; 176 177 178 (*func)(NULL, q, mp, NULL); 179 mutex_enter(&ipst->ips_ip6_asp_lock); 180 } 181 mutex_exit(&ipst->ips_ip6_asp_lock); 182 } 183 184 /* 185 * Decrement reference count. When it gets to 0, we check for (pending) 186 * saved update to the table, if any. 187 */ 188 void 189 ip6_asp_table_refrele(ip_stack_t *ipst) 190 { 191 IP6_ASP_TABLE_REFRELE(ipst); 192 } 193 194 /* 195 * This function is guaranteed never to return a NULL pointer. It 196 * will always return information from one of the entries in the 197 * asp_table (which will never be empty). If a pointer is passed 198 * in for the precedence, the precedence value will be set; a 199 * pointer to the label will be returned by the function. 200 * 201 * Since the table is only anticipated to have five or six entries 202 * total, the lookup algorithm hasn't been optimized to anything 203 * better than O(n). 204 */ 205 char * 206 ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence, ip_stack_t *ipst) 207 { 208 ip6_asp_t *aspp; 209 ip6_asp_t *match = NULL; 210 ip6_asp_t *default_policy; 211 212 aspp = ipst->ips_ip6_asp_table; 213 /* The default entry must always be the last one */ 214 default_policy = aspp + ipst->ips_ip6_asp_table_count - 1; 215 216 while (match == NULL) { 217 if (aspp == default_policy) { 218 match = aspp; 219 } else { 220 if (V6_MASK_EQ(*addr, aspp->ip6_asp_mask, 221 aspp->ip6_asp_prefix)) 222 match = aspp; 223 else 224 aspp++; 225 } 226 } 227 228 if (precedence != NULL) 229 *precedence = match->ip6_asp_precedence; 230 return (match->ip6_asp_label); 231 } 232 233 /* 234 * If we had deferred updating the table because of outstanding references, 235 * do it now. Note, we don't do error checking on the queued IOCTL mblk, since 236 * ip_sioctl_ip6addrpolicy() has already done it for us. 237 */ 238 void 239 ip6_asp_check_for_updates(ip_stack_t *ipst) 240 { 241 ip6_asp_t *table; 242 size_t table_size; 243 mblk_t *data_mp, *mp; 244 struct iocblk *iocp; 245 246 mutex_enter(&ipst->ips_ip6_asp_lock); 247 if (ipst->ips_ip6_asp_pending_update == NULL || 248 ipst->ips_ip6_asp_refcnt > 0) { 249 mutex_exit(&ipst->ips_ip6_asp_lock); 250 return; 251 } 252 253 mp = ipst->ips_ip6_asp_pending_update; 254 ipst->ips_ip6_asp_pending_update = NULL; 255 ASSERT(mp->b_prev != NULL); 256 257 ipst->ips_ip6_asp_uip = B_TRUE; 258 259 iocp = (struct iocblk *)mp->b_rptr; 260 data_mp = mp->b_cont; 261 if (data_mp == NULL) { 262 table = NULL; 263 table_size = iocp->ioc_count; 264 } else { 265 table = (ip6_asp_t *)data_mp->b_rptr; 266 table_size = iocp->ioc_count; 267 } 268 269 ip6_asp_replace(mp, table, table_size, B_TRUE, ipst, 270 iocp->ioc_flag & IOC_MODELS); 271 } 272 273 /* 274 * ip6_asp_replace replaces the contents of the IPv6 address selection 275 * policy table with those specified in new_table. If new_table is NULL, 276 * this indicates that the caller wishes ip to use the default policy 277 * table. The caller is responsible for making sure that there are exactly 278 * new_count policy entries in new_table. 279 */ 280 /*ARGSUSED5*/ 281 void 282 ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size, 283 boolean_t locked, ip_stack_t *ipst, model_t datamodel) 284 { 285 int ret_val = 0; 286 ip6_asp_t *tmp_table; 287 uint_t count; 288 queue_t *q; 289 struct iocblk *iocp; 290 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4 291 size_t ip6_asp_size = SIZEOF_STRUCT(ip6_asp, datamodel); 292 #else 293 const size_t ip6_asp_size = sizeof (ip6_asp_t); 294 #endif 295 296 if (new_size % ip6_asp_size != 0) { 297 ip1dbg(("ip6_asp_replace: invalid table size\n")); 298 ret_val = EINVAL; 299 if (locked) 300 goto unlock_end; 301 goto replace_end; 302 } else { 303 count = new_size / ip6_asp_size; 304 } 305 306 307 if (!locked) 308 mutex_enter(&ipst->ips_ip6_asp_lock); 309 /* 310 * Check if we are in the process of creating any IRE using the 311 * current information. If so, wait till that is done. 312 */ 313 if (!locked && ipst->ips_ip6_asp_refcnt > 0) { 314 /* Save this request for later processing */ 315 if (ipst->ips_ip6_asp_pending_update == NULL) { 316 ipst->ips_ip6_asp_pending_update = mp; 317 } else { 318 /* Let's not queue multiple requests for now */ 319 ip1dbg(("ip6_asp_replace: discarding request\n")); 320 mutex_exit(&ipst->ips_ip6_asp_lock); 321 ret_val = EAGAIN; 322 goto replace_end; 323 } 324 mutex_exit(&ipst->ips_ip6_asp_lock); 325 return; 326 } 327 328 /* Prevent lookups till the table have been updated */ 329 if (!locked) 330 ipst->ips_ip6_asp_uip = B_TRUE; 331 332 ASSERT(ipst->ips_ip6_asp_refcnt == 0); 333 334 if (new_table == NULL) { 335 /* 336 * This is a special case. The user wants to revert 337 * back to using the default table. 338 */ 339 if (ipst->ips_ip6_asp_table == default_ip6_asp_table) 340 goto unlock_end; 341 342 kmem_free(ipst->ips_ip6_asp_table, 343 ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t)); 344 ipst->ips_ip6_asp_table = default_ip6_asp_table; 345 ipst->ips_ip6_asp_table_count = 346 sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t); 347 goto unlock_end; 348 } 349 350 if (count == 0) { 351 ret_val = EINVAL; 352 ip1dbg(("ip6_asp_replace: empty table\n")); 353 goto unlock_end; 354 } 355 356 if ((tmp_table = kmem_alloc(count * sizeof (ip6_asp_t), KM_NOSLEEP)) == 357 NULL) { 358 ret_val = ENOMEM; 359 goto unlock_end; 360 } 361 362 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4 363 364 /* 365 * If 'new_table' -actually- originates from a 32-bit process 366 * then the nicely aligned ip6_asp_label array will be 367 * subtlely misaligned on this kernel, because the structure 368 * is 8 byte aligned in the kernel, but only 4 byte aligned in 369 * userland. Fix it up here. 370 * 371 * XX64 See the notes in ip_sioctl_ip6addrpolicy. Perhaps we could 372 * do the datamodel transformation (below) there instead of here? 373 */ 374 if (datamodel == IOC_ILP32) { 375 ip6_asp_t *dst; 376 ip6_asp32_t *src; 377 int i; 378 379 if ((dst = kmem_zalloc(count * sizeof (*dst), 380 KM_NOSLEEP)) == NULL) { 381 kmem_free(tmp_table, count * sizeof (ip6_asp_t)); 382 ret_val = ENOMEM; 383 goto unlock_end; 384 } 385 386 /* 387 * Copy each element of the table from ip6_asp32_t 388 * format into ip6_asp_t format. Fortunately, since 389 * we're just dealing with a trailing structure pad, 390 * we can do this straightforwardly with a flurry of 391 * bcopying. 392 */ 393 src = (void *)new_table; 394 for (i = 0; i < count; i++) 395 bcopy(src + i, dst + i, sizeof (*src)); 396 397 ip6_asp_copy(dst, tmp_table, count); 398 kmem_free(dst, count * sizeof (*dst)); 399 } else 400 #endif 401 ip6_asp_copy(new_table, tmp_table, count); 402 403 /* Make sure the last entry is the default entry */ 404 if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_prefix) || 405 !IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_mask)) { 406 ret_val = EINVAL; 407 kmem_free(tmp_table, count * sizeof (ip6_asp_t)); 408 ip1dbg(("ip6_asp_replace: bad table: no default entry\n")); 409 goto unlock_end; 410 } 411 if (ipst->ips_ip6_asp_table != default_ip6_asp_table) { 412 kmem_free(ipst->ips_ip6_asp_table, 413 ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t)); 414 } 415 ipst->ips_ip6_asp_table = tmp_table; 416 ipst->ips_ip6_asp_table_count = count; 417 418 /* 419 * The user has changed the address selection policy table. IPv6 420 * source address selection for existing IRE_CACHE and 421 * IRE_HOST_REDIRECT entries used the old table, so we need to 422 * clear the cache. 423 */ 424 ire_walk_v6(ire_delete_cache_v6, NULL, ALL_ZONES, ipst); 425 426 unlock_end: 427 ipst->ips_ip6_asp_uip = B_FALSE; 428 mutex_exit(&ipst->ips_ip6_asp_lock); 429 430 replace_end: 431 /* Reply to the ioctl */ 432 q = (queue_t *)mp->b_prev; 433 mp->b_prev = NULL; 434 if (q == NULL) { 435 freemsg(mp); 436 goto check_binds; 437 } 438 iocp = (struct iocblk *)mp->b_rptr; 439 iocp->ioc_error = ret_val; 440 iocp->ioc_count = 0; 441 DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK; 442 qreply(q, mp); 443 check_binds: 444 ip6_asp_complete_op(ipst); 445 } 446 447 /* 448 * Copies the contents of src_table to dst_table, and sorts the 449 * entries in decending order of prefix lengths. It assumes that both 450 * tables are appropriately sized to contain count entries. 451 */ 452 static void 453 ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count) 454 { 455 ip6_asp_t *src_ptr, *src_limit, *dst_ptr, *dst_limit, *dp; 456 457 dst_table[0] = src_table[0]; 458 if (count == 1) 459 return; 460 461 /* 462 * Sort the entries in descending order of prefix lengths. 463 * 464 * Note: this should be a small table. In 99% of cases, we 465 * expect the table to have 5 entries. In the remaining 1% 466 * of cases, we expect the table to have one or two more 467 * entries. It would be very rare for the table to have 468 * double-digit entries. 469 */ 470 src_limit = src_table + count; 471 dst_limit = dst_table + 1; 472 for (src_ptr = src_table + 1; src_ptr != src_limit; 473 src_ptr++, dst_limit++) { 474 for (dst_ptr = dst_table; dst_ptr < dst_limit; dst_ptr++) { 475 if (ip_mask_to_plen_v6(&src_ptr->ip6_asp_mask) > 476 ip_mask_to_plen_v6(&dst_ptr->ip6_asp_mask)) { 477 /* 478 * Make room to insert the source entry 479 * before dst_ptr by shifting entries to 480 * the right. 481 */ 482 for (dp = dst_limit - 1; dp >= dst_ptr; dp--) 483 *(dp + 1) = *dp; 484 break; 485 } 486 } 487 *dst_ptr = *src_ptr; 488 } 489 } 490 491 /* 492 * This function copies as many entries from ip6_asp_table as will fit 493 * into dtable. The dtable_size parameter is the size of dtable 494 * in bytes. This function returns the number of entries in 495 * ip6_asp_table, even if it's not able to fit all of the entries into 496 * dtable. 497 */ 498 int 499 ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size, ip_stack_t *ipst) 500 { 501 uint_t dtable_count; 502 503 if (dtable != NULL) { 504 if (dtable_size < sizeof (ip6_asp_t)) 505 return (-1); 506 507 dtable_count = dtable_size / sizeof (ip6_asp_t); 508 bcopy(ipst->ips_ip6_asp_table, dtable, 509 MIN(ipst->ips_ip6_asp_table_count, dtable_count) * 510 sizeof (ip6_asp_t)); 511 } 512 513 return (ipst->ips_ip6_asp_table_count); 514 } 515 516 /* 517 * Compare two labels. Return B_TRUE if they are equal, B_FALSE 518 * otherwise. 519 */ 520 boolean_t 521 ip6_asp_labelcmp(const char *label1, const char *label2) 522 { 523 int64_t *llptr1, *llptr2; 524 525 /* 526 * The common case, the two labels are actually the same string 527 * from the policy table. 528 */ 529 if (label1 == label2) 530 return (B_TRUE); 531 532 /* 533 * Since we know the labels are at most 16 bytes long, compare 534 * the two strings as two 8-byte long integers. The ip6_asp_t 535 * structure guarantees that the labels are 8 byte alligned. 536 */ 537 llptr1 = (int64_t *)label1; 538 llptr2 = (int64_t *)label2; 539 if (llptr1[0] == llptr2[0] && llptr1[1] == llptr2[1]) 540 return (B_TRUE); 541 return (B_FALSE); 542 } 543