1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/socket.h> 31 #include <sys/ksynch.h> 32 #include <sys/kmem.h> 33 #include <sys/errno.h> 34 #include <sys/systm.h> 35 #include <sys/sysmacros.h> 36 #include <sys/cmn_err.h> 37 #include <sys/strsun.h> 38 #include <sys/zone.h> 39 #include <netinet/in.h> 40 #include <inet/common.h> 41 #include <inet/ip.h> 42 #include <inet/ip6.h> 43 #include <inet/ip6_asp.h> 44 #include <inet/ip_ire.h> 45 46 #define IN6ADDR_MASK128_INIT \ 47 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU } 48 #define IN6ADDR_MASK96_INIT { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0 } 49 #ifdef _BIG_ENDIAN 50 #define IN6ADDR_MASK16_INIT { 0xffff0000U, 0, 0, 0 } 51 #else 52 #define IN6ADDR_MASK16_INIT { 0x0000ffffU, 0, 0, 0 } 53 #endif 54 55 56 /* 57 * This table is ordered such that longest prefix matches are hit first 58 * (longer prefix lengths first). The last entry must be the "default" 59 * entry (::0/0). 60 */ 61 static ip6_asp_t default_ip6_asp_table[] = { 62 { IN6ADDR_LOOPBACK_INIT, IN6ADDR_MASK128_INIT, 63 "Loopback", 50 }, 64 { IN6ADDR_ANY_INIT, IN6ADDR_MASK96_INIT, 65 "IPv4_Compatible", 20 }, 66 #ifdef _BIG_ENDIAN 67 { { 0, 0, 0x0000ffffU, 0 }, IN6ADDR_MASK96_INIT, 68 "IPv4", 10 }, 69 { { 0x20020000U, 0, 0, 0 }, IN6ADDR_MASK16_INIT, 70 "6to4", 30 }, 71 #else 72 { { 0, 0, 0xffff0000U, 0 }, IN6ADDR_MASK96_INIT, 73 "IPv4", 10 }, 74 { { 0x00000220U, 0, 0, 0 }, IN6ADDR_MASK16_INIT, 75 "6to4", 30 }, 76 #endif 77 { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, 78 "Default", 40 } 79 }; 80 81 /* pending binds */ 82 static mblk_t *ip6_asp_pending_ops = NULL, *ip6_asp_pending_ops_tail = NULL; 83 84 /* Synchronize updates with table usage */ 85 static mblk_t *ip6_asp_pending_update = NULL; /* pending table updates */ 86 87 static boolean_t ip6_asp_uip = B_FALSE; /* table update in progress */ 88 static kmutex_t ip6_asp_lock; /* protect all the above */ 89 static uint32_t ip6_asp_refcnt = 0; /* outstanding references */ 90 91 /* 92 * The IPv6 Default Address Selection policy table. 93 * Until someone up above reconfigures the policy table, use the global 94 * default. The table needs no lock since the only way to alter it is 95 * through the SIOCSIP6ADDRPOLICY which is exclusive in ip. 96 */ 97 static ip6_asp_t *ip6_asp_table = default_ip6_asp_table; 98 /* The number of policy entries in the table */ 99 static uint_t ip6_asp_table_count = 100 sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t); 101 102 static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t); 103 static void ip6_asp_check_for_updates(); 104 105 void 106 ip6_asp_init(void) 107 { 108 /* Initialize the table lock */ 109 mutex_init(&ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL); 110 } 111 112 void 113 ip6_asp_free(void) 114 { 115 if (ip6_asp_table != default_ip6_asp_table) { 116 kmem_free(ip6_asp_table, 117 ip6_asp_table_count * sizeof (ip6_asp_t)); 118 } 119 mutex_destroy(&ip6_asp_lock); 120 } 121 122 /* 123 * Return false if the table is being updated. Else, increment the ref 124 * count and return true. 125 */ 126 boolean_t 127 ip6_asp_can_lookup() 128 { 129 mutex_enter(&ip6_asp_lock); 130 if (ip6_asp_uip) { 131 mutex_exit(&ip6_asp_lock); 132 return (B_FALSE); 133 } 134 IP6_ASP_TABLE_REFHOLD(); 135 mutex_exit(&ip6_asp_lock); 136 return (B_TRUE); 137 138 } 139 140 void 141 ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func) 142 { 143 144 ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) && 145 (mp->b_next == NULL)); 146 mp->b_queue = (void *)q; 147 mp->b_prev = (void *)func; 148 mp->b_next = NULL; 149 150 mutex_enter(&ip6_asp_lock); 151 if (ip6_asp_pending_ops == NULL) { 152 ASSERT(ip6_asp_pending_ops_tail == NULL); 153 ip6_asp_pending_ops = ip6_asp_pending_ops_tail = mp; 154 } else { 155 ip6_asp_pending_ops_tail->b_next = mp; 156 ip6_asp_pending_ops_tail = mp; 157 } 158 mutex_exit(&ip6_asp_lock); 159 } 160 161 static void 162 ip6_asp_complete_op() 163 { 164 mblk_t *mp; 165 queue_t *q; 166 aspfunc_t func; 167 168 mutex_enter(&ip6_asp_lock); 169 while (ip6_asp_pending_ops != NULL) { 170 mp = ip6_asp_pending_ops; 171 ip6_asp_pending_ops = mp->b_next; 172 mp->b_next = NULL; 173 if (ip6_asp_pending_ops == NULL) 174 ip6_asp_pending_ops_tail = NULL; 175 mutex_exit(&ip6_asp_lock); 176 177 q = (queue_t *)mp->b_queue; 178 func = (aspfunc_t)mp->b_prev; 179 180 mp->b_prev = NULL; 181 mp->b_queue = NULL; 182 183 184 (*func)(NULL, q, mp, NULL); 185 mutex_enter(&ip6_asp_lock); 186 } 187 mutex_exit(&ip6_asp_lock); 188 } 189 190 /* 191 * Decrement reference count. When it gets to 0, we check for (pending) 192 * saved update to the table, if any. 193 */ 194 void 195 ip6_asp_table_refrele() 196 { 197 IP6_ASP_TABLE_REFRELE(); 198 } 199 200 /* 201 * This function is guaranteed never to return a NULL pointer. It 202 * will always return information from one of the entries in the 203 * asp_table (which will never be empty). If a pointer is passed 204 * in for the precedence, the precedence value will be set; a 205 * pointer to the label will be returned by the function. 206 * 207 * Since the table is only anticipated to have five or six entries 208 * total, the lookup algorithm hasn't been optimized to anything 209 * better than O(n). 210 */ 211 char * 212 ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence) 213 { 214 ip6_asp_t *aspp; 215 ip6_asp_t *match = NULL; 216 ip6_asp_t *default_policy; 217 218 aspp = ip6_asp_table; 219 /* The default entry must always be the last one */ 220 default_policy = aspp + ip6_asp_table_count - 1; 221 222 while (match == NULL) { 223 if (aspp == default_policy) { 224 match = aspp; 225 } else { 226 if (V6_MASK_EQ(*addr, aspp->ip6_asp_mask, 227 aspp->ip6_asp_prefix)) 228 match = aspp; 229 else 230 aspp++; 231 } 232 } 233 234 if (precedence != NULL) 235 *precedence = match->ip6_asp_precedence; 236 return (match->ip6_asp_label); 237 } 238 239 /* 240 * If we had deferred updating the table because of outstanding references, 241 * do it now. Note, we don't do error checking on the queued IOCTL mblk, since 242 * ip_sioctl_ip6addrpolicy() has already done it for us. 243 */ 244 void 245 ip6_asp_check_for_updates() 246 { 247 ip6_asp_t *table; 248 size_t table_size; 249 mblk_t *data_mp, *mp; 250 struct iocblk *iocp; 251 252 mutex_enter(&ip6_asp_lock); 253 if (ip6_asp_pending_update == NULL || ip6_asp_refcnt > 0) { 254 mutex_exit(&ip6_asp_lock); 255 return; 256 } 257 258 mp = ip6_asp_pending_update; 259 ip6_asp_pending_update = NULL; 260 ASSERT(mp->b_prev != NULL); 261 262 ip6_asp_uip = B_TRUE; 263 264 iocp = (struct iocblk *)mp->b_rptr; 265 data_mp = mp->b_cont; 266 if (data_mp == NULL) { 267 table = NULL; 268 table_size = iocp->ioc_count; 269 } else { 270 table = (ip6_asp_t *)data_mp->b_rptr; 271 table_size = iocp->ioc_count; 272 } 273 274 ip6_asp_replace(mp, table, table_size, B_TRUE, 275 iocp->ioc_flag & IOC_MODELS); 276 } 277 278 /* 279 * ip6_asp_replace replaces the contents of the IPv6 address selection 280 * policy table with those specified in new_table. If new_table is NULL, 281 * this indicates that the caller wishes ip to use the default policy 282 * table. The caller is responsible for making sure that there are exactly 283 * new_count policy entries in new_table. 284 */ 285 /*ARGSUSED4*/ 286 void 287 ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size, 288 boolean_t locked, model_t datamodel) 289 { 290 int ret_val = 0; 291 ip6_asp_t *tmp_table; 292 uint_t count; 293 queue_t *q; 294 struct iocblk *iocp; 295 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4 296 size_t ip6_asp_size = SIZEOF_STRUCT(ip6_asp, datamodel); 297 #else 298 const size_t ip6_asp_size = sizeof (ip6_asp_t); 299 #endif 300 301 if (new_size % ip6_asp_size != 0) { 302 ip1dbg(("ip6_asp_replace: invalid table size\n")); 303 ret_val = EINVAL; 304 if (locked) 305 goto unlock_end; 306 goto replace_end; 307 } else { 308 count = new_size / ip6_asp_size; 309 } 310 311 312 if (!locked) 313 mutex_enter(&ip6_asp_lock); 314 /* 315 * Check if we are in the process of creating any IRE using the 316 * current information. If so, wait till that is done. 317 */ 318 if (!locked && ip6_asp_refcnt > 0) { 319 /* Save this request for later processing */ 320 if (ip6_asp_pending_update == NULL) { 321 ip6_asp_pending_update = mp; 322 } else { 323 /* Let's not queue multiple requests for now */ 324 ip1dbg(("ip6_asp_replace: discarding request\n")); 325 mutex_exit(&ip6_asp_lock); 326 ret_val = EAGAIN; 327 goto replace_end; 328 } 329 mutex_exit(&ip6_asp_lock); 330 return; 331 } 332 333 /* Prevent lookups till the table have been updated */ 334 if (!locked) 335 ip6_asp_uip = B_TRUE; 336 337 ASSERT(ip6_asp_refcnt == 0); 338 339 if (new_table == NULL) { 340 /* 341 * This is a special case. The user wants to revert 342 * back to using the default table. 343 */ 344 if (ip6_asp_table == default_ip6_asp_table) 345 goto unlock_end; 346 347 kmem_free(ip6_asp_table, 348 ip6_asp_table_count * sizeof (ip6_asp_t)); 349 ip6_asp_table = default_ip6_asp_table; 350 ip6_asp_table_count = 351 sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t); 352 goto unlock_end; 353 } 354 355 if (count == 0) { 356 ret_val = EINVAL; 357 ip1dbg(("ip6_asp_replace: empty table\n")); 358 goto unlock_end; 359 } 360 361 if ((tmp_table = kmem_alloc(count * sizeof (ip6_asp_t), KM_NOSLEEP)) == 362 NULL) { 363 ret_val = ENOMEM; 364 goto unlock_end; 365 } 366 367 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4 368 369 /* 370 * If 'new_table' -actually- originates from a 32-bit process 371 * then the nicely aligned ip6_asp_label array will be 372 * subtlely misaligned on this kernel, because the structure 373 * is 8 byte aligned in the kernel, but only 4 byte aligned in 374 * userland. Fix it up here. 375 * 376 * XX64 See the notes in ip_sioctl_ip6addrpolicy. Perhaps we could 377 * do the datamodel transformation (below) there instead of here? 378 */ 379 if (datamodel == IOC_ILP32) { 380 ip6_asp_t *dst; 381 ip6_asp32_t *src; 382 int i; 383 384 if ((dst = kmem_zalloc(count * sizeof (*dst), 385 KM_NOSLEEP)) == NULL) { 386 kmem_free(tmp_table, count * sizeof (ip6_asp_t)); 387 ret_val = ENOMEM; 388 goto unlock_end; 389 } 390 391 /* 392 * Copy each element of the table from ip6_asp32_t 393 * format into ip6_asp_t format. Fortunately, since 394 * we're just dealing with a trailing structure pad, 395 * we can do this straightforwardly with a flurry of 396 * bcopying. 397 */ 398 src = (void *)new_table; 399 for (i = 0; i < count; i++) 400 bcopy(src + i, dst + i, sizeof (*src)); 401 402 ip6_asp_copy(dst, tmp_table, count); 403 kmem_free(dst, count * sizeof (*dst)); 404 } else 405 #endif 406 ip6_asp_copy(new_table, tmp_table, count); 407 408 /* Make sure the last entry is the default entry */ 409 if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_prefix) || 410 !IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_mask)) { 411 ret_val = EINVAL; 412 kmem_free(tmp_table, count * sizeof (ip6_asp_t)); 413 ip1dbg(("ip6_asp_replace: bad table: no default entry\n")); 414 goto unlock_end; 415 } 416 if (ip6_asp_table != default_ip6_asp_table) { 417 kmem_free(ip6_asp_table, 418 ip6_asp_table_count * sizeof (ip6_asp_t)); 419 } 420 ip6_asp_table = tmp_table; 421 ip6_asp_table_count = count; 422 423 /* 424 * The user has changed the address selection policy table. IPv6 425 * source address selection for existing IRE_CACHE and 426 * IRE_HOST_REDIRECT entries used the old table, so we need to 427 * clear the cache. 428 */ 429 ire_walk_v6(ire_delete_cache_v6, NULL, ALL_ZONES); 430 431 unlock_end: 432 ip6_asp_uip = B_FALSE; 433 mutex_exit(&ip6_asp_lock); 434 435 replace_end: 436 /* Reply to the ioctl */ 437 q = (queue_t *)mp->b_prev; 438 mp->b_prev = NULL; 439 if (q == NULL) { 440 freemsg(mp); 441 goto check_binds; 442 } 443 iocp = (struct iocblk *)mp->b_rptr; 444 iocp->ioc_error = ret_val; 445 iocp->ioc_count = 0; 446 DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK; 447 qreply(q, mp); 448 check_binds: 449 ip6_asp_complete_op(); 450 } 451 452 /* 453 * Copies the contents of src_table to dst_table, and sorts the 454 * entries in decending order of prefix lengths. It assumes that both 455 * tables are appropriately sized to contain count entries. 456 */ 457 static void 458 ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count) 459 { 460 ip6_asp_t *src_ptr, *src_limit, *dst_ptr, *dst_limit, *dp; 461 462 dst_table[0] = src_table[0]; 463 if (count == 1) 464 return; 465 466 /* 467 * Sort the entries in descending order of prefix lengths. 468 * 469 * Note: this should be a small table. In 99% of cases, we 470 * expect the table to have 5 entries. In the remaining 1% 471 * of cases, we expect the table to have one or two more 472 * entries. It would be very rare for the table to have 473 * double-digit entries. 474 */ 475 src_limit = src_table + count; 476 dst_limit = dst_table + 1; 477 for (src_ptr = src_table + 1; src_ptr != src_limit; 478 src_ptr++, dst_limit++) { 479 for (dst_ptr = dst_table; dst_ptr < dst_limit; dst_ptr++) { 480 if (ip_mask_to_plen_v6(&src_ptr->ip6_asp_mask) > 481 ip_mask_to_plen_v6(&dst_ptr->ip6_asp_mask)) { 482 /* 483 * Make room to insert the source entry 484 * before dst_ptr by shifting entries to 485 * the right. 486 */ 487 for (dp = dst_limit - 1; dp >= dst_ptr; dp--) 488 *(dp + 1) = *dp; 489 break; 490 } 491 } 492 *dst_ptr = *src_ptr; 493 } 494 } 495 496 /* 497 * This function copies as many entries from ip6_asp_table as will fit 498 * into dtable. The dtable_size parameter is the size of dtable 499 * in bytes. This function returns the number of entries in 500 * ip6_asp_table, even if it's not able to fit all of the entries into 501 * dtable. 502 */ 503 int 504 ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size) 505 { 506 uint_t dtable_count; 507 508 if (dtable != NULL) { 509 if (dtable_size < sizeof (ip6_asp_t)) 510 return (-1); 511 512 dtable_count = dtable_size / sizeof (ip6_asp_t); 513 bcopy(ip6_asp_table, dtable, 514 MIN(ip6_asp_table_count, dtable_count) * 515 sizeof (ip6_asp_t)); 516 } 517 518 return (ip6_asp_table_count); 519 } 520 521 /* 522 * Compare two labels. Return B_TRUE if they are equal, B_FALSE 523 * otherwise. 524 */ 525 boolean_t 526 ip6_asp_labelcmp(const char *label1, const char *label2) 527 { 528 int64_t *llptr1, *llptr2; 529 530 /* 531 * The common case, the two labels are actually the same string 532 * from the policy table. 533 */ 534 if (label1 == label2) 535 return (B_TRUE); 536 537 /* 538 * Since we know the labels are at most 16 bytes long, compare 539 * the two strings as two 8-byte long integers. The ip6_asp_t 540 * structure guarantees that the labels are 8 byte alligned. 541 */ 542 llptr1 = (int64_t *)label1; 543 llptr2 = (int64_t *)label2; 544 if (llptr1[0] == llptr2[0] && llptr1[1] == llptr2[1]) 545 return (B_TRUE); 546 return (B_FALSE); 547 } 548