1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Copyright 2017 Sebastian Wiedenroth 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/socket.h> 29 #include <sys/ksynch.h> 30 #include <sys/kmem.h> 31 #include <sys/errno.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/cmn_err.h> 35 #include <sys/strsun.h> 36 #include <sys/zone.h> 37 #include <netinet/in.h> 38 #include <inet/common.h> 39 #include <inet/ip.h> 40 #include <inet/ip6.h> 41 #include <inet/ip6_asp.h> 42 #include <inet/ip_ire.h> 43 #include <inet/ip_if.h> 44 #include <inet/ipclassifier.h> 45 46 #define IN6ADDR_MASK128_INIT \ 47 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU } 48 #define IN6ADDR_MASK96_INIT { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0 } 49 #define IN6ADDR_MASK32_INIT { 0xffffffffU, 0, 0, 0 } 50 #ifdef _BIG_ENDIAN 51 #define IN6ADDR_MASK16_INIT { 0xffff0000U, 0, 0, 0 } 52 #define IN6ADDR_MASK10_INIT { 0xffc00000U, 0, 0, 0 } 53 #define IN6ADDR_MASK7_INIT { 0xfe000000U, 0, 0, 0 } 54 #else 55 #define IN6ADDR_MASK16_INIT { 0x0000ffffU, 0, 0, 0 } 56 #define IN6ADDR_MASK10_INIT { 0x0000c0ffU, 0, 0, 0 } 57 #define IN6ADDR_MASK7_INIT { 0x000000feU, 0, 0, 0 } 58 #endif 59 60 /* 61 * This table is ordered such that longest prefix matches are hit first 62 * (longer prefix lengths first). The last entry must be the "default" 63 * entry (::0/0). 64 */ 65 static ip6_asp_t default_ip6_asp_table[] = { 66 { IN6ADDR_LOOPBACK_INIT, IN6ADDR_MASK128_INIT, 67 "Loopback", 50 }, 68 { IN6ADDR_ANY_INIT, IN6ADDR_MASK96_INIT, 69 "IPv4_Compatible", 1 }, 70 #ifdef _BIG_ENDIAN 71 { { 0, 0, 0x0000ffffU, 0 }, IN6ADDR_MASK96_INIT, 72 "IPv4", 35 }, 73 { { 0x20010000U, 0, 0, 0 }, IN6ADDR_MASK32_INIT, 74 "Teredo", 5 }, 75 { { 0x20020000U, 0, 0, 0 }, IN6ADDR_MASK16_INIT, 76 "6to4", 30 }, 77 { { 0x3ffe0000U, 0, 0, 0 }, IN6ADDR_MASK16_INIT, 78 "6bone", 1 }, 79 { { 0xfec00000U, 0, 0, 0 }, IN6ADDR_MASK10_INIT, 80 "Site_Local", 1 }, 81 { { 0xfc000000U, 0, 0, 0 }, IN6ADDR_MASK7_INIT, 82 "ULA", 3 }, 83 #else 84 { { 0, 0, 0xffff0000U, 0 }, IN6ADDR_MASK96_INIT, 85 "IPv4", 35 }, 86 { { 0x00000120U, 0, 0, 0 }, IN6ADDR_MASK32_INIT, 87 "Teredo", 5 }, 88 { { 0x00000220U, 0, 0, 0 }, IN6ADDR_MASK16_INIT, 89 "6to4", 30 }, 90 { { 0x0000fe3fU, 0, 0, 0 }, IN6ADDR_MASK16_INIT, 91 "6bone", 1 }, 92 { { 0x0000c0feU, 0, 0, 0 }, IN6ADDR_MASK10_INIT, 93 "Site_Local", 1 }, 94 { { 0x000000fcU, 0, 0, 0 }, IN6ADDR_MASK7_INIT, 95 "ULA", 3 }, 96 #endif 97 { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, 98 "Default", 40 } 99 }; 100 101 /* 102 * The IPv6 Default Address Selection policy table. 103 * Until someone up above reconfigures the policy table, use the global 104 * default. The table needs no lock since the only way to alter it is 105 * through the SIOCSIP6ADDRPOLICY which is exclusive in ip. 106 */ 107 static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t); 108 static void ip6_asp_check_for_updates(ip_stack_t *); 109 110 void 111 ip6_asp_init(ip_stack_t *ipst) 112 { 113 /* Initialize the table lock */ 114 mutex_init(&ipst->ips_ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL); 115 116 ipst->ips_ip6_asp_table = default_ip6_asp_table; 117 118 ipst->ips_ip6_asp_table_count = 119 sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t); 120 } 121 122 void 123 ip6_asp_free(ip_stack_t *ipst) 124 { 125 if (ipst->ips_ip6_asp_table != default_ip6_asp_table) { 126 kmem_free(ipst->ips_ip6_asp_table, 127 ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t)); 128 ipst->ips_ip6_asp_table = NULL; 129 } 130 mutex_destroy(&ipst->ips_ip6_asp_lock); 131 } 132 133 /* 134 * Return false if the table is being updated. Else, increment the ref 135 * count and return true. 136 */ 137 boolean_t 138 ip6_asp_can_lookup(ip_stack_t *ipst) 139 { 140 mutex_enter(&ipst->ips_ip6_asp_lock); 141 if (ipst->ips_ip6_asp_uip) { 142 mutex_exit(&ipst->ips_ip6_asp_lock); 143 return (B_FALSE); 144 } 145 IP6_ASP_TABLE_REFHOLD(ipst); 146 mutex_exit(&ipst->ips_ip6_asp_lock); 147 return (B_TRUE); 148 149 } 150 151 void 152 ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func) 153 { 154 conn_t *connp = Q_TO_CONN(q); 155 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 156 157 ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) && 158 (mp->b_next == NULL)); 159 mp->b_queue = (void *)q; 160 mp->b_prev = (void *)func; 161 mp->b_next = NULL; 162 163 mutex_enter(&ipst->ips_ip6_asp_lock); 164 if (ipst->ips_ip6_asp_pending_ops == NULL) { 165 ASSERT(ipst->ips_ip6_asp_pending_ops_tail == NULL); 166 ipst->ips_ip6_asp_pending_ops = 167 ipst->ips_ip6_asp_pending_ops_tail = mp; 168 } else { 169 ipst->ips_ip6_asp_pending_ops_tail->b_next = mp; 170 ipst->ips_ip6_asp_pending_ops_tail = mp; 171 } 172 mutex_exit(&ipst->ips_ip6_asp_lock); 173 } 174 175 static void 176 ip6_asp_complete_op(ip_stack_t *ipst) 177 { 178 mblk_t *mp; 179 queue_t *q; 180 aspfunc_t func; 181 182 mutex_enter(&ipst->ips_ip6_asp_lock); 183 while (ipst->ips_ip6_asp_pending_ops != NULL) { 184 mp = ipst->ips_ip6_asp_pending_ops; 185 ipst->ips_ip6_asp_pending_ops = mp->b_next; 186 mp->b_next = NULL; 187 if (ipst->ips_ip6_asp_pending_ops == NULL) 188 ipst->ips_ip6_asp_pending_ops_tail = NULL; 189 mutex_exit(&ipst->ips_ip6_asp_lock); 190 191 q = (queue_t *)mp->b_queue; 192 func = (aspfunc_t)mp->b_prev; 193 194 mp->b_prev = NULL; 195 mp->b_queue = NULL; 196 197 198 (*func)(NULL, q, mp, NULL); 199 mutex_enter(&ipst->ips_ip6_asp_lock); 200 } 201 mutex_exit(&ipst->ips_ip6_asp_lock); 202 } 203 204 /* 205 * Decrement reference count. When it gets to 0, we check for (pending) 206 * saved update to the table, if any. 207 */ 208 void 209 ip6_asp_table_refrele(ip_stack_t *ipst) 210 { 211 IP6_ASP_TABLE_REFRELE(ipst); 212 } 213 214 /* 215 * This function is guaranteed never to return a NULL pointer. It 216 * will always return information from one of the entries in the 217 * asp_table (which will never be empty). If a pointer is passed 218 * in for the precedence, the precedence value will be set; a 219 * pointer to the label will be returned by the function. 220 * 221 * Since the table is only anticipated to have about 10 entries 222 * total, the lookup algorithm hasn't been optimized to anything 223 * better than O(n). 224 */ 225 char * 226 ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence, ip_stack_t *ipst) 227 { 228 ip6_asp_t *aspp; 229 ip6_asp_t *match = NULL; 230 ip6_asp_t *default_policy; 231 232 aspp = ipst->ips_ip6_asp_table; 233 /* The default entry must always be the last one */ 234 default_policy = aspp + ipst->ips_ip6_asp_table_count - 1; 235 236 while (match == NULL) { 237 if (aspp == default_policy) { 238 match = aspp; 239 } else { 240 if (V6_MASK_EQ(*addr, aspp->ip6_asp_mask, 241 aspp->ip6_asp_prefix)) 242 match = aspp; 243 else 244 aspp++; 245 } 246 } 247 248 if (precedence != NULL) 249 *precedence = match->ip6_asp_precedence; 250 return (match->ip6_asp_label); 251 } 252 253 /* 254 * If we had deferred updating the table because of outstanding references, 255 * do it now. Note, we don't do error checking on the queued IOCTL mblk, since 256 * ip_sioctl_ip6addrpolicy() has already done it for us. 257 */ 258 void 259 ip6_asp_check_for_updates(ip_stack_t *ipst) 260 { 261 ip6_asp_t *table; 262 size_t table_size; 263 mblk_t *data_mp, *mp; 264 struct iocblk *iocp; 265 266 mutex_enter(&ipst->ips_ip6_asp_lock); 267 if (ipst->ips_ip6_asp_pending_update == NULL || 268 ipst->ips_ip6_asp_refcnt > 0) { 269 mutex_exit(&ipst->ips_ip6_asp_lock); 270 return; 271 } 272 273 mp = ipst->ips_ip6_asp_pending_update; 274 ipst->ips_ip6_asp_pending_update = NULL; 275 ASSERT(mp->b_prev != NULL); 276 277 ipst->ips_ip6_asp_uip = B_TRUE; 278 279 iocp = (struct iocblk *)mp->b_rptr; 280 data_mp = mp->b_cont; 281 if (data_mp == NULL) { 282 table = NULL; 283 table_size = iocp->ioc_count; 284 } else { 285 table = (ip6_asp_t *)data_mp->b_rptr; 286 table_size = iocp->ioc_count; 287 } 288 289 ip6_asp_replace(mp, table, table_size, B_TRUE, ipst, 290 iocp->ioc_flag & IOC_MODELS); 291 } 292 293 /* 294 * ip6_asp_replace replaces the contents of the IPv6 address selection 295 * policy table with those specified in new_table. If new_table is NULL, 296 * this indicates that the caller wishes ip to use the default policy 297 * table. The caller is responsible for making sure that there are exactly 298 * new_count policy entries in new_table. 299 */ 300 /*ARGSUSED5*/ 301 void 302 ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size, 303 boolean_t locked, ip_stack_t *ipst, model_t datamodel) 304 { 305 int ret_val = 0; 306 ip6_asp_t *tmp_table; 307 uint_t count; 308 queue_t *q; 309 struct iocblk *iocp; 310 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4 311 size_t ip6_asp_size = SIZEOF_STRUCT(ip6_asp, datamodel); 312 #else 313 const size_t ip6_asp_size = sizeof (ip6_asp_t); 314 #endif 315 316 if (new_size % ip6_asp_size != 0) { 317 ip1dbg(("ip6_asp_replace: invalid table size\n")); 318 ret_val = EINVAL; 319 if (locked) 320 goto unlock_end; 321 goto replace_end; 322 } else { 323 count = new_size / ip6_asp_size; 324 } 325 326 327 if (!locked) 328 mutex_enter(&ipst->ips_ip6_asp_lock); 329 /* 330 * Check if we are in the process of creating any IRE using the 331 * current information. If so, wait till that is done. 332 */ 333 if (!locked && ipst->ips_ip6_asp_refcnt > 0) { 334 /* Save this request for later processing */ 335 if (ipst->ips_ip6_asp_pending_update == NULL) { 336 ipst->ips_ip6_asp_pending_update = mp; 337 } else { 338 /* Let's not queue multiple requests for now */ 339 ip1dbg(("ip6_asp_replace: discarding request\n")); 340 mutex_exit(&ipst->ips_ip6_asp_lock); 341 ret_val = EAGAIN; 342 goto replace_end; 343 } 344 mutex_exit(&ipst->ips_ip6_asp_lock); 345 return; 346 } 347 348 /* Prevent lookups till the table have been updated */ 349 if (!locked) 350 ipst->ips_ip6_asp_uip = B_TRUE; 351 352 ASSERT(ipst->ips_ip6_asp_refcnt == 0); 353 354 if (new_table == NULL) { 355 /* 356 * This is a special case. The user wants to revert 357 * back to using the default table. 358 */ 359 if (ipst->ips_ip6_asp_table == default_ip6_asp_table) 360 goto unlock_end; 361 362 kmem_free(ipst->ips_ip6_asp_table, 363 ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t)); 364 ipst->ips_ip6_asp_table = default_ip6_asp_table; 365 ipst->ips_ip6_asp_table_count = 366 sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t); 367 goto unlock_end; 368 } 369 370 if (count == 0) { 371 ret_val = EINVAL; 372 ip1dbg(("ip6_asp_replace: empty table\n")); 373 goto unlock_end; 374 } 375 376 if ((tmp_table = kmem_alloc(count * sizeof (ip6_asp_t), KM_NOSLEEP)) == 377 NULL) { 378 ret_val = ENOMEM; 379 goto unlock_end; 380 } 381 382 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4 383 384 /* 385 * If 'new_table' -actually- originates from a 32-bit process 386 * then the nicely aligned ip6_asp_label array will be 387 * subtlely misaligned on this kernel, because the structure 388 * is 8 byte aligned in the kernel, but only 4 byte aligned in 389 * userland. Fix it up here. 390 * 391 * XX64 See the notes in ip_sioctl_ip6addrpolicy. Perhaps we could 392 * do the datamodel transformation (below) there instead of here? 393 */ 394 if (datamodel == IOC_ILP32) { 395 ip6_asp_t *dst; 396 ip6_asp32_t *src; 397 int i; 398 399 if ((dst = kmem_zalloc(count * sizeof (*dst), 400 KM_NOSLEEP)) == NULL) { 401 kmem_free(tmp_table, count * sizeof (ip6_asp_t)); 402 ret_val = ENOMEM; 403 goto unlock_end; 404 } 405 406 /* 407 * Copy each element of the table from ip6_asp32_t 408 * format into ip6_asp_t format. Fortunately, since 409 * we're just dealing with a trailing structure pad, 410 * we can do this straightforwardly with a flurry of 411 * bcopying. 412 */ 413 src = (void *)new_table; 414 for (i = 0; i < count; i++) 415 bcopy(src + i, dst + i, sizeof (*src)); 416 417 ip6_asp_copy(dst, tmp_table, count); 418 kmem_free(dst, count * sizeof (*dst)); 419 } else 420 #endif 421 ip6_asp_copy(new_table, tmp_table, count); 422 423 /* Make sure the last entry is the default entry */ 424 if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_prefix) || 425 !IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_mask)) { 426 ret_val = EINVAL; 427 kmem_free(tmp_table, count * sizeof (ip6_asp_t)); 428 ip1dbg(("ip6_asp_replace: bad table: no default entry\n")); 429 goto unlock_end; 430 } 431 if (ipst->ips_ip6_asp_table != default_ip6_asp_table) { 432 kmem_free(ipst->ips_ip6_asp_table, 433 ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t)); 434 } 435 ipst->ips_ip6_asp_table = tmp_table; 436 ipst->ips_ip6_asp_table_count = count; 437 438 unlock_end: 439 ipst->ips_ip6_asp_uip = B_FALSE; 440 mutex_exit(&ipst->ips_ip6_asp_lock); 441 442 /* Let conn_ixa caching know that source address selection changed */ 443 ip_update_source_selection(ipst); 444 445 replace_end: 446 /* Reply to the ioctl */ 447 q = (queue_t *)mp->b_prev; 448 mp->b_prev = NULL; 449 if (q == NULL) { 450 freemsg(mp); 451 goto check_binds; 452 } 453 iocp = (struct iocblk *)mp->b_rptr; 454 iocp->ioc_error = ret_val; 455 iocp->ioc_count = 0; 456 DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK; 457 qreply(q, mp); 458 check_binds: 459 ip6_asp_complete_op(ipst); 460 } 461 462 /* 463 * Copies the contents of src_table to dst_table, and sorts the 464 * entries in decending order of prefix lengths. It assumes that both 465 * tables are appropriately sized to contain count entries. 466 */ 467 static void 468 ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count) 469 { 470 ip6_asp_t *src_ptr, *src_limit, *dst_ptr, *dst_limit, *dp; 471 472 dst_table[0] = src_table[0]; 473 if (count == 1) 474 return; 475 476 /* 477 * Sort the entries in descending order of prefix lengths. 478 * 479 * Note: this should be a small table. In 99% of cases, we 480 * expect the table to have 9 entries. In the remaining 1% 481 * of cases, we expect the table to have one or two more 482 * entries. 483 */ 484 src_limit = src_table + count; 485 dst_limit = dst_table + 1; 486 for (src_ptr = src_table + 1; src_ptr != src_limit; 487 src_ptr++, dst_limit++) { 488 for (dst_ptr = dst_table; dst_ptr < dst_limit; dst_ptr++) { 489 if (ip_mask_to_plen_v6(&src_ptr->ip6_asp_mask) > 490 ip_mask_to_plen_v6(&dst_ptr->ip6_asp_mask)) { 491 /* 492 * Make room to insert the source entry 493 * before dst_ptr by shifting entries to 494 * the right. 495 */ 496 for (dp = dst_limit - 1; dp >= dst_ptr; dp--) 497 *(dp + 1) = *dp; 498 break; 499 } 500 } 501 *dst_ptr = *src_ptr; 502 } 503 } 504 505 /* 506 * This function copies as many entries from ip6_asp_table as will fit 507 * into dtable. The dtable_size parameter is the size of dtable 508 * in bytes. This function returns the number of entries in 509 * ip6_asp_table, even if it's not able to fit all of the entries into 510 * dtable. 511 */ 512 int 513 ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size, ip_stack_t *ipst) 514 { 515 uint_t dtable_count; 516 517 if (dtable != NULL) { 518 if (dtable_size < sizeof (ip6_asp_t)) 519 return (-1); 520 521 dtable_count = dtable_size / sizeof (ip6_asp_t); 522 bcopy(ipst->ips_ip6_asp_table, dtable, 523 MIN(ipst->ips_ip6_asp_table_count, dtable_count) * 524 sizeof (ip6_asp_t)); 525 } 526 527 return (ipst->ips_ip6_asp_table_count); 528 } 529 530 /* 531 * Compare two labels. Return B_TRUE if they are equal, B_FALSE 532 * otherwise. 533 */ 534 boolean_t 535 ip6_asp_labelcmp(const char *label1, const char *label2) 536 { 537 int64_t *llptr1, *llptr2; 538 539 /* 540 * The common case, the two labels are actually the same string 541 * from the policy table. 542 */ 543 if (label1 == label2) 544 return (B_TRUE); 545 546 /* 547 * Since we know the labels are at most 16 bytes long, compare 548 * the two strings as two 8-byte long integers. The ip6_asp_t 549 * structure guarantees that the labels are 8 byte alligned. 550 */ 551 llptr1 = (int64_t *)label1; 552 llptr2 = (int64_t *)label2; 553 if (llptr1[0] == llptr2[0] && llptr1[1] == llptr2[1]) 554 return (B_TRUE); 555 return (B_FALSE); 556 } 557