1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/strsubr.h> 31 #include <sys/stropts.h> 32 #include <sys/sunddi.h> 33 #include <sys/cred.h> 34 #include <sys/debug.h> 35 #include <sys/kmem.h> 36 #include <sys/errno.h> 37 #include <sys/disp.h> 38 #include <netinet/in.h> 39 #include <netinet/in_systm.h> 40 #include <netinet/ip.h> 41 #include <netinet/ip_icmp.h> 42 #include <netinet/tcp.h> 43 #include <inet/common.h> 44 #include <inet/ipclassifier.h> 45 #include <inet/ip.h> 46 #include <inet/mib2.h> 47 #include <inet/nd.h> 48 #include <inet/tcp.h> 49 #include <inet/ip_rts.h> 50 #include <inet/ip_ire.h> 51 #include <inet/ip_if.h> 52 #include <sys/modhash.h> 53 54 #include <sys/tsol/label.h> 55 #include <sys/tsol/label_macro.h> 56 #include <sys/tsol/tnet.h> 57 #include <sys/tsol/tndb.h> 58 #include <sys/strsun.h> 59 60 /* tunable for strict error-reply behavior (TCP RST and ICMP Unreachable) */ 61 int tsol_strict_error; 62 63 /* 64 * Some notes on the Trusted Solaris IRE gateway security attributes: 65 * 66 * When running in Trusted mode, the routing subsystem determines whether or 67 * not a packet can be delivered to an off-link host (not directly reachable 68 * through an interface) based on the accreditation checks of the packet's 69 * security attributes against those associated with the next-hop gateway. 70 * 71 * The next-hop gateway's security attributes can be derived from two sources 72 * (in order of preference): route-related and the host database. A Trusted 73 * system must be configured with at least the host database containing an 74 * entry for the next-hop gateway, or otherwise no accreditation checks can 75 * be performed, which may result in the inability to send packets to any 76 * off-link destination host. 77 * 78 * The major differences between the two sources are the number and type of 79 * security attributes used for accreditation checks. A host database entry 80 * can contain at most one set of security attributes, specific only to the 81 * next-hop gateway. On contrast, route-related security attributes are made 82 * up of a collection of security attributes for the distant networks, and 83 * are grouped together per next-hop gateway used to reach those networks. 84 * This is the preferred method, and the routing subsystem will fallback to 85 * the host database entry only if there are no route-related attributes 86 * associated with the next-hop gateway. 87 * 88 * In Trusted mode, all of the IRE entries (except LOCAL/LOOPBACK/BROADCAST/ 89 * INTERFACE type) are initialized to contain a placeholder to store this 90 * information. The ire_gw_secattr structure gets allocated, initialized 91 * and associated with the IRE during the time of the IRE creation. The 92 * initialization process also includes resolving the host database entry 93 * of the next-hop gateway for fallback purposes. It does not include any 94 * route-related attribute setup, as that process comes separately as part 95 * of the route requests (add/change) made to the routing subsystem. 96 * 97 * The underlying logic which involves associating IREs with the gateway 98 * security attributes are represented by the following data structures: 99 * 100 * tsol_gcdb_t, or "gcdb" 101 * 102 * - This is a system-wide collection of records containing the 103 * currently used route-related security attributes, which are fed 104 * through the routing socket interface, e.g. "route add/change". 105 * 106 * tsol_gc_t, or "gc" 107 * 108 * - This is the gateway credential structure, and it provides for the 109 * only mechanism to access the contents of gcdb. More than one gc 110 * entries may refer to the same gcdb record. gc's in the system are 111 * grouped according to the next-hop gateway address. 112 * 113 * tsol_gcgrp_t, or "gcgrp" 114 * 115 * - Group of gateway credentials, and is unique per next-hop gateway 116 * address. When the group is not empty, i.e. when gcgrp_count is 117 * greater than zero, it contains one or more gc's, each pointing to 118 * a gcdb record which indicates the gateway security attributes 119 * associated with the next-hop gateway. 120 * 121 * The fields of the tsol_ire_gw_secattr_t used from within the IRE are: 122 * 123 * igsa_lock 124 * 125 * - Lock that protects all fields within tsol_ire_gw_secattr_t. 126 * 127 * igsa_rhc 128 * 129 * - Remote host cache database entry of next-hop gateway. This is 130 * used in the case when there are no route-related attributes 131 * configured for the IRE. 132 * 133 * igsa_gc 134 * 135 * - A set of route-related attributes that only get set for prefix 136 * IREs. If this is non-NULL, the prefix IRE has been associated 137 * with a set of gateway security attributes by way of route add/ 138 * change functionality. This field stays NULL for IRE_CACHEs. 139 * 140 * igsa_gcgrp 141 * 142 * - Group of gc's which only gets set for IRE_CACHEs. Each of the gc 143 * points to a gcdb record that contains the security attributes 144 * used to perform the credential checks of the packet which uses 145 * the IRE. If the group is not empty, the list of gc's can be 146 * traversed starting at gcgrp_head. This field stays NULL for 147 * prefix IREs. 148 */ 149 150 static kmem_cache_t *ire_gw_secattr_cache; 151 152 #define GCDB_HASH_SIZE 101 153 #define GCGRP_HASH_SIZE 101 154 155 #define GCDB_REFRELE(p) { \ 156 mutex_enter(&gcdb_lock); \ 157 ASSERT((p)->gcdb_refcnt > 0); \ 158 if (--((p)->gcdb_refcnt) == 0) \ 159 gcdb_inactive(p); \ 160 ASSERT(MUTEX_HELD(&gcdb_lock)); \ 161 mutex_exit(&gcdb_lock); \ 162 } 163 164 static int gcdb_hash_size = GCDB_HASH_SIZE; 165 static int gcgrp_hash_size = GCGRP_HASH_SIZE; 166 static mod_hash_t *gcdb_hash; 167 static mod_hash_t *gcgrp4_hash; 168 static mod_hash_t *gcgrp6_hash; 169 170 static kmutex_t gcdb_lock; 171 kmutex_t gcgrp_lock; 172 173 static uint_t gcdb_hash_by_secattr(void *, mod_hash_key_t); 174 static int gcdb_hash_cmp(mod_hash_key_t, mod_hash_key_t); 175 static tsol_gcdb_t *gcdb_lookup(struct rtsa_s *, boolean_t); 176 static void gcdb_inactive(tsol_gcdb_t *); 177 178 static uint_t gcgrp_hash_by_addr(void *, mod_hash_key_t); 179 static int gcgrp_hash_cmp(mod_hash_key_t, mod_hash_key_t); 180 181 static int ire_gw_secattr_constructor(void *, void *, int); 182 static void ire_gw_secattr_destructor(void *, void *); 183 184 void 185 tnet_init(void) 186 { 187 ire_gw_secattr_cache = kmem_cache_create("ire_gw_secattr_cache", 188 sizeof (tsol_ire_gw_secattr_t), 64, ire_gw_secattr_constructor, 189 ire_gw_secattr_destructor, NULL, NULL, NULL, 0); 190 191 gcdb_hash = mod_hash_create_extended("gcdb_hash", 192 gcdb_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, 193 gcdb_hash_by_secattr, NULL, gcdb_hash_cmp, KM_SLEEP); 194 195 gcgrp4_hash = mod_hash_create_extended("gcgrp4_hash", 196 gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, 197 gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP); 198 199 gcgrp6_hash = mod_hash_create_extended("gcgrp6_hash", 200 gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, 201 gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP); 202 203 mutex_init(&gcdb_lock, NULL, MUTEX_DEFAULT, NULL); 204 mutex_init(&gcgrp_lock, NULL, MUTEX_DEFAULT, NULL); 205 } 206 207 void 208 tnet_fini(void) 209 { 210 kmem_cache_destroy(ire_gw_secattr_cache); 211 mod_hash_destroy_hash(gcdb_hash); 212 mod_hash_destroy_hash(gcgrp4_hash); 213 mod_hash_destroy_hash(gcgrp6_hash); 214 mutex_destroy(&gcdb_lock); 215 mutex_destroy(&gcgrp_lock); 216 } 217 218 /* ARGSUSED */ 219 static int 220 ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags) 221 { 222 tsol_ire_gw_secattr_t *attrp = buf; 223 224 mutex_init(&attrp->igsa_lock, NULL, MUTEX_DEFAULT, NULL); 225 226 attrp->igsa_rhc = NULL; 227 attrp->igsa_gc = NULL; 228 attrp->igsa_gcgrp = NULL; 229 230 return (0); 231 } 232 233 /* ARGSUSED */ 234 static void 235 ire_gw_secattr_destructor(void *buf, void *cdrarg) 236 { 237 tsol_ire_gw_secattr_t *attrp = (tsol_ire_gw_secattr_t *)buf; 238 239 mutex_destroy(&attrp->igsa_lock); 240 } 241 242 tsol_ire_gw_secattr_t * 243 ire_gw_secattr_alloc(int kmflags) 244 { 245 return (kmem_cache_alloc(ire_gw_secattr_cache, kmflags)); 246 } 247 248 void 249 ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp) 250 { 251 ASSERT(MUTEX_NOT_HELD(&attrp->igsa_lock)); 252 253 if (attrp->igsa_rhc != NULL) { 254 TNRHC_RELE(attrp->igsa_rhc); 255 attrp->igsa_rhc = NULL; 256 } 257 258 if (attrp->igsa_gc != NULL) { 259 GC_REFRELE(attrp->igsa_gc); 260 attrp->igsa_gc = NULL; 261 } 262 if (attrp->igsa_gcgrp != NULL) { 263 GCGRP_REFRELE(attrp->igsa_gcgrp); 264 attrp->igsa_gcgrp = NULL; 265 } 266 267 ASSERT(attrp->igsa_rhc == NULL); 268 ASSERT(attrp->igsa_gc == NULL); 269 ASSERT(attrp->igsa_gcgrp == NULL); 270 271 kmem_cache_free(ire_gw_secattr_cache, attrp); 272 } 273 274 /* ARGSUSED */ 275 static uint_t 276 gcdb_hash_by_secattr(void *hash_data, mod_hash_key_t key) 277 { 278 const struct rtsa_s *rp = (struct rtsa_s *)key; 279 const uint32_t *up, *ue; 280 uint_t hash; 281 int i; 282 283 ASSERT(rp != NULL); 284 285 /* See comments in hash_bylabel in zone.c for details */ 286 hash = rp->rtsa_doi + (rp->rtsa_doi << 1); 287 up = (const uint32_t *)&rp->rtsa_slrange; 288 ue = up + sizeof (rp->rtsa_slrange) / sizeof (*up); 289 i = 1; 290 while (up < ue) { 291 /* using 2^n + 1, 1 <= n <= 16 as source of many primes */ 292 hash += *up + (*up << ((i % 16) + 1)); 293 up++; 294 i++; 295 } 296 return (hash); 297 } 298 299 static int 300 gcdb_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 301 { 302 struct rtsa_s *rp1 = (struct rtsa_s *)key1; 303 struct rtsa_s *rp2 = (struct rtsa_s *)key2; 304 305 ASSERT(rp1 != NULL && rp2 != NULL); 306 307 if (blequal(&rp1->rtsa_slrange.lower_bound, 308 &rp2->rtsa_slrange.lower_bound) && 309 blequal(&rp1->rtsa_slrange.upper_bound, 310 &rp2->rtsa_slrange.upper_bound) && 311 rp1->rtsa_doi == rp2->rtsa_doi) 312 return (0); 313 314 /* No match; not found */ 315 return (-1); 316 } 317 318 /* ARGSUSED */ 319 static uint_t 320 gcgrp_hash_by_addr(void *hash_data, mod_hash_key_t key) 321 { 322 tsol_gcgrp_addr_t *ga = (tsol_gcgrp_addr_t *)key; 323 uint_t idx = 0; 324 uint32_t *ap; 325 326 ASSERT(ga != NULL); 327 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6); 328 329 ap = (uint32_t *)&ga->ga_addr.s6_addr32[0]; 330 idx ^= *ap++; 331 idx ^= *ap++; 332 idx ^= *ap++; 333 idx ^= *ap; 334 335 return (idx); 336 } 337 338 static int 339 gcgrp_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 340 { 341 tsol_gcgrp_addr_t *ga1 = (tsol_gcgrp_addr_t *)key1; 342 tsol_gcgrp_addr_t *ga2 = (tsol_gcgrp_addr_t *)key2; 343 344 ASSERT(ga1 != NULL && ga2 != NULL); 345 346 /* Address family must match */ 347 if (ga1->ga_af != ga2->ga_af) 348 return (-1); 349 350 if (ga1->ga_addr.s6_addr32[0] == ga2->ga_addr.s6_addr32[0] && 351 ga1->ga_addr.s6_addr32[1] == ga2->ga_addr.s6_addr32[1] && 352 ga1->ga_addr.s6_addr32[2] == ga2->ga_addr.s6_addr32[2] && 353 ga1->ga_addr.s6_addr32[3] == ga2->ga_addr.s6_addr32[3]) 354 return (0); 355 356 /* No match; not found */ 357 return (-1); 358 } 359 360 #define RTSAFLAGS "\20\11cipso\3doi\2max_sl\1min_sl" 361 362 int 363 rtsa_validate(const struct rtsa_s *rp) 364 { 365 uint32_t mask = rp->rtsa_mask; 366 367 /* RTSA_CIPSO must be set, and DOI must not be zero */ 368 if ((mask & RTSA_CIPSO) == 0 || rp->rtsa_doi == 0) { 369 DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *, 370 "rtsa(1) lacks flag or has 0 doi.", 371 rtsa_s *, rp); 372 return (EINVAL); 373 } 374 /* 375 * SL range must be specified, and it must have its 376 * upper bound dominating its lower bound. 377 */ 378 if ((mask & RTSA_SLRANGE) != RTSA_SLRANGE || 379 !bldominates(&rp->rtsa_slrange.upper_bound, 380 &rp->rtsa_slrange.lower_bound)) { 381 DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *, 382 "rtsa(1) min_sl and max_sl not set or max_sl is " 383 "not dominating.", rtsa_s *, rp); 384 return (EINVAL); 385 } 386 return (0); 387 } 388 389 /* 390 * A brief explanation of the reference counting scheme: 391 * 392 * Prefix IREs have a non-NULL igsa_gc and a NULL igsa_gcgrp; 393 * IRE_CACHEs have it vice-versa. 394 * 395 * Apart from dynamic references due to to reference holds done 396 * actively by threads, we have the following references: 397 * 398 * gcdb_refcnt: 399 * - Every tsol_gc_t pointing to a tsol_gcdb_t contributes a reference 400 * to the gcdb_refcnt. 401 * 402 * gc_refcnt: 403 * - A prefix IRE that points to an igsa_gc contributes a reference 404 * to the gc_refcnt. 405 * 406 * gcgrp_refcnt: 407 * - An IRE_CACHE that points to an igsa_gcgrp contributes a reference 408 * to the gcgrp_refcnt of the associated tsol_gcgrp_t. 409 * - Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes 410 * a reference to the gcgrp_refcnt. 411 */ 412 static tsol_gcdb_t * 413 gcdb_lookup(struct rtsa_s *rp, boolean_t alloc) 414 { 415 tsol_gcdb_t *gcdb = NULL; 416 417 if (rtsa_validate(rp) != 0) 418 return (NULL); 419 420 mutex_enter(&gcdb_lock); 421 /* Find a copy in the cache; otherwise, create one and cache it */ 422 if (mod_hash_find(gcdb_hash, (mod_hash_key_t)rp, 423 (mod_hash_val_t *)&gcdb) == 0) { 424 gcdb->gcdb_refcnt++; 425 ASSERT(gcdb->gcdb_refcnt != 0); 426 427 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__lookup, char *, 428 "gcdb(1) is in gcdb_hash(global)", tsol_gcdb_t *, gcdb); 429 } else if (alloc) { 430 gcdb = kmem_zalloc(sizeof (*gcdb), KM_NOSLEEP); 431 if (gcdb != NULL) { 432 gcdb->gcdb_refcnt = 1; 433 gcdb->gcdb_mask = rp->rtsa_mask; 434 gcdb->gcdb_doi = rp->rtsa_doi; 435 gcdb->gcdb_slrange = rp->rtsa_slrange; 436 437 if (mod_hash_insert(gcdb_hash, 438 (mod_hash_key_t)&gcdb->gcdb_attr, 439 (mod_hash_val_t)gcdb) != 0) { 440 mutex_exit(&gcdb_lock); 441 kmem_free(gcdb, sizeof (*gcdb)); 442 return (NULL); 443 } 444 445 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__insert, char *, 446 "gcdb(1) inserted in gcdb_hash(global)", 447 tsol_gcdb_t *, gcdb); 448 } 449 } 450 mutex_exit(&gcdb_lock); 451 return (gcdb); 452 } 453 454 static void 455 gcdb_inactive(tsol_gcdb_t *gcdb) 456 { 457 ASSERT(MUTEX_HELD(&gcdb_lock)); 458 ASSERT(gcdb != NULL && gcdb->gcdb_refcnt == 0); 459 460 (void) mod_hash_remove(gcdb_hash, (mod_hash_key_t)&gcdb->gcdb_attr, 461 (mod_hash_val_t *)&gcdb); 462 463 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__remove, char *, 464 "gcdb(1) removed from gcdb_hash(global)", 465 tsol_gcdb_t *, gcdb); 466 kmem_free(gcdb, sizeof (*gcdb)); 467 } 468 469 tsol_gc_t * 470 gc_create(struct rtsa_s *rp, tsol_gcgrp_t *gcgrp, boolean_t *gcgrp_xtrarefp) 471 { 472 tsol_gc_t *gc; 473 tsol_gcdb_t *gcdb; 474 475 *gcgrp_xtrarefp = B_TRUE; 476 477 rw_enter(&gcgrp->gcgrp_rwlock, RW_WRITER); 478 if ((gcdb = gcdb_lookup(rp, B_TRUE)) == NULL) { 479 rw_exit(&gcgrp->gcgrp_rwlock); 480 return (NULL); 481 } 482 483 for (gc = gcgrp->gcgrp_head; gc != NULL; gc = gc->gc_next) { 484 if (gc->gc_db == gcdb) { 485 ASSERT(gc->gc_grp == gcgrp); 486 487 gc->gc_refcnt++; 488 ASSERT(gc->gc_refcnt != 0); 489 490 GCDB_REFRELE(gcdb); 491 492 DTRACE_PROBE3(tx__gcdb__log__info__gc__create, 493 char *, "found gc(1) in gcgrp(2)", 494 tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp); 495 rw_exit(&gcgrp->gcgrp_rwlock); 496 return (gc); 497 } 498 } 499 500 gc = kmem_zalloc(sizeof (*gc), KM_NOSLEEP); 501 if (gc != NULL) { 502 if (gcgrp->gcgrp_head == NULL) { 503 gcgrp->gcgrp_head = gcgrp->gcgrp_tail = gc; 504 } else { 505 gcgrp->gcgrp_tail->gc_next = gc; 506 gc->gc_prev = gcgrp->gcgrp_tail; 507 gcgrp->gcgrp_tail = gc; 508 } 509 gcgrp->gcgrp_count++; 510 ASSERT(gcgrp->gcgrp_count != 0); 511 512 /* caller has incremented gcgrp reference for us */ 513 gc->gc_grp = gcgrp; 514 515 gc->gc_db = gcdb; 516 gc->gc_refcnt = 1; 517 518 DTRACE_PROBE3(tx__gcdb__log__info__gc__create, char *, 519 "added gc(1) to gcgrp(2)", tsol_gc_t *, gc, 520 tsol_gcgrp_t *, gcgrp); 521 522 *gcgrp_xtrarefp = B_FALSE; 523 } 524 rw_exit(&gcgrp->gcgrp_rwlock); 525 526 return (gc); 527 } 528 529 void 530 gc_inactive(tsol_gc_t *gc) 531 { 532 tsol_gcgrp_t *gcgrp = gc->gc_grp; 533 534 ASSERT(gcgrp != NULL); 535 ASSERT(RW_WRITE_HELD(&gcgrp->gcgrp_rwlock)); 536 ASSERT(gc->gc_refcnt == 0); 537 538 if (gc->gc_prev != NULL) 539 gc->gc_prev->gc_next = gc->gc_next; 540 else 541 gcgrp->gcgrp_head = gc->gc_next; 542 if (gc->gc_next != NULL) 543 gc->gc_next->gc_prev = gc->gc_prev; 544 else 545 gcgrp->gcgrp_tail = gc->gc_prev; 546 ASSERT(gcgrp->gcgrp_count > 0); 547 gcgrp->gcgrp_count--; 548 549 /* drop lock before it's destroyed */ 550 rw_exit(&gcgrp->gcgrp_rwlock); 551 552 DTRACE_PROBE3(tx__gcdb__log__info__gc__remove, char *, 553 "removed inactive gc(1) from gcgrp(2)", 554 tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp); 555 556 GCGRP_REFRELE(gcgrp); 557 558 gc->gc_grp = NULL; 559 gc->gc_prev = gc->gc_next = NULL; 560 561 if (gc->gc_db != NULL) 562 GCDB_REFRELE(gc->gc_db); 563 564 kmem_free(gc, sizeof (*gc)); 565 } 566 567 tsol_gcgrp_t * 568 gcgrp_lookup(tsol_gcgrp_addr_t *ga, boolean_t alloc) 569 { 570 tsol_gcgrp_t *gcgrp = NULL; 571 mod_hash_t *hashp; 572 573 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6); 574 575 hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash; 576 577 mutex_enter(&gcgrp_lock); 578 if (mod_hash_find(hashp, (mod_hash_key_t)ga, 579 (mod_hash_val_t *)&gcgrp) == 0) { 580 gcgrp->gcgrp_refcnt++; 581 ASSERT(gcgrp->gcgrp_refcnt != 0); 582 583 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__lookup, char *, 584 "found gcgrp(1) in hash(2)", tsol_gcgrp_t *, gcgrp, 585 mod_hash_t *, hashp); 586 587 } else if (alloc) { 588 gcgrp = kmem_zalloc(sizeof (*gcgrp), KM_NOSLEEP); 589 if (gcgrp != NULL) { 590 gcgrp->gcgrp_refcnt = 1; 591 rw_init(&gcgrp->gcgrp_rwlock, NULL, RW_DEFAULT, NULL); 592 bcopy(ga, &gcgrp->gcgrp_addr, sizeof (*ga)); 593 594 if (mod_hash_insert(hashp, 595 (mod_hash_key_t)&gcgrp->gcgrp_addr, 596 (mod_hash_val_t)gcgrp) != 0) { 597 mutex_exit(&gcgrp_lock); 598 kmem_free(gcgrp, sizeof (*gcgrp)); 599 return (NULL); 600 } 601 602 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__insert, 603 char *, "inserted gcgrp(1) in hash(2)", 604 tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp); 605 } 606 } 607 mutex_exit(&gcgrp_lock); 608 return (gcgrp); 609 } 610 611 void 612 gcgrp_inactive(tsol_gcgrp_t *gcgrp) 613 { 614 tsol_gcgrp_addr_t *ga; 615 mod_hash_t *hashp; 616 617 ASSERT(MUTEX_HELD(&gcgrp_lock)); 618 ASSERT(!RW_LOCK_HELD(&gcgrp->gcgrp_rwlock)); 619 ASSERT(gcgrp != NULL && gcgrp->gcgrp_refcnt == 0); 620 ASSERT(gcgrp->gcgrp_head == NULL && gcgrp->gcgrp_count == 0); 621 622 ga = &gcgrp->gcgrp_addr; 623 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6); 624 625 hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash; 626 (void) mod_hash_remove(hashp, (mod_hash_key_t)ga, 627 (mod_hash_val_t *)&gcgrp); 628 rw_destroy(&gcgrp->gcgrp_rwlock); 629 630 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__remove, char *, 631 "removed inactive gcgrp(1) from hash(2)", 632 tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp); 633 634 kmem_free(gcgrp, sizeof (*gcgrp)); 635 } 636 637 /* 638 * Converts CIPSO option to sensitivity label. 639 * Validity checks based on restrictions defined in 640 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) (draft-ietf-cipso-ipsecurity) 641 */ 642 static boolean_t 643 cipso_to_sl(const uchar_t *option, bslabel_t *sl) 644 { 645 const struct cipso_option *co = (const struct cipso_option *)option; 646 const struct cipso_tag_type_1 *tt1; 647 648 tt1 = (struct cipso_tag_type_1 *)&co->cipso_tag_type[0]; 649 if (tt1->tag_type != 1 || 650 tt1->tag_length < TSOL_TT1_MIN_LENGTH || 651 tt1->tag_length > TSOL_TT1_MAX_LENGTH || 652 tt1->tag_length + TSOL_CIPSO_TAG_OFFSET > co->cipso_length) 653 return (B_FALSE); 654 655 bsllow(sl); /* assumed: sets compartments to all zeroes */ 656 LCLASS_SET((_bslabel_impl_t *)sl, tt1->tag_sl); 657 bcopy(tt1->tag_cat, &((_bslabel_impl_t *)sl)->compartments, 658 tt1->tag_length - TSOL_TT1_MIN_LENGTH); 659 return (B_TRUE); 660 } 661 662 /* 663 * Parse the CIPSO label in the incoming packet and construct a ts_label_t 664 * that reflects the CIPSO label and attach it to the dblk cred. Later as 665 * the mblk flows up through the stack any code that needs to examine the 666 * packet label can inspect the label from the dblk cred. This function is 667 * called right in ip_rput for all packets, i.e. locally destined and 668 * to be forwarded packets. The forwarding path needs to examine the label 669 * to determine how to forward the packet. 670 * 671 * For IPv4, IP header options have been pulled up, but other headers might not 672 * have been. For IPv6, any hop-by-hop options have been pulled up, but any 673 * other headers might not be present. 674 */ 675 boolean_t 676 tsol_get_pkt_label(mblk_t *mp, int version) 677 { 678 tsol_tpc_t *src_rhtp; 679 uchar_t *opt_ptr = NULL; 680 const ipha_t *ipha; 681 bslabel_t sl; 682 uint32_t doi; 683 tsol_ip_label_t label_type; 684 const cipso_option_t *co; 685 const void *src; 686 const ip6_t *ip6h; 687 688 ASSERT(DB_TYPE(mp) == M_DATA); 689 690 if (version == IPV4_VERSION) { 691 ipha = (const ipha_t *)mp->b_rptr; 692 src = &ipha->ipha_src; 693 label_type = tsol_get_option(mp, &opt_ptr); 694 } else { 695 uchar_t *after_secopt; 696 boolean_t hbh_needed; 697 const uchar_t *ip6hbh; 698 size_t optlen; 699 700 label_type = OPT_NONE; 701 ip6h = (const ip6_t *)mp->b_rptr; 702 src = &ip6h->ip6_src; 703 if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) { 704 ip6hbh = (const uchar_t *)&ip6h[1]; 705 optlen = (ip6hbh[1] + 1) << 3; 706 ASSERT(ip6hbh + optlen <= mp->b_wptr); 707 opt_ptr = tsol_find_secopt_v6(ip6hbh, optlen, 708 &after_secopt, &hbh_needed); 709 /* tsol_find_secopt_v6 guarantees some sanity */ 710 if (opt_ptr != NULL && 711 (optlen = opt_ptr[1]) >= 8) { 712 opt_ptr += 2; 713 bcopy(opt_ptr, &doi, sizeof (doi)); 714 doi = ntohl(doi); 715 if (doi == IP6LS_DOI_V4 && 716 opt_ptr[4] == IP6LS_TT_V4 && 717 opt_ptr[5] <= optlen - 4 && 718 opt_ptr[7] <= optlen - 6) { 719 opt_ptr += sizeof (doi) + 2; 720 label_type = OPT_CIPSO; 721 } 722 } 723 } 724 } 725 726 switch (label_type) { 727 case OPT_CIPSO: 728 /* 729 * Convert the CIPSO label to the internal format 730 * and attach it to the dblk cred. 731 * Validity checks based on restrictions defined in 732 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) 733 * (draft-ietf-cipso-ipsecurity) 734 */ 735 if (version == IPV6_VERSION && ip6opt_ls == 0) 736 return (B_FALSE); 737 co = (const struct cipso_option *)opt_ptr; 738 if ((co->cipso_length < 739 TSOL_CIPSO_TAG_OFFSET + TSOL_TT1_MIN_LENGTH) || 740 (co->cipso_length > IP_MAX_OPT_LENGTH)) 741 return (B_FALSE); 742 bcopy(co->cipso_doi, &doi, sizeof (doi)); 743 doi = ntohl(doi); 744 if (!cipso_to_sl(opt_ptr, &sl)) 745 return (B_FALSE); 746 setbltype(&sl, SUN_SL_ID); 747 break; 748 749 case OPT_NONE: 750 /* 751 * Handle special cases that are not currently labeled, even 752 * though the sending system may otherwise be configured as 753 * labeled. 754 * - IGMP 755 * - IPv4 ICMP Router Discovery 756 * - IPv6 Neighbor Discovery 757 */ 758 if (version == IPV4_VERSION) { 759 if (ipha->ipha_protocol == IPPROTO_IGMP) 760 return (B_TRUE); 761 if (ipha->ipha_protocol == IPPROTO_ICMP) { 762 const struct icmp *icmp = (const struct icmp *) 763 (mp->b_rptr + IPH_HDR_LENGTH(ipha)); 764 765 if ((uchar_t *)icmp > mp->b_wptr) { 766 if (!pullupmsg(mp, 767 (uchar_t *)icmp - mp->b_rptr + 1)) 768 return (B_FALSE); 769 icmp = (const struct icmp *) 770 (mp->b_rptr + 771 IPH_HDR_LENGTH(ipha)); 772 } 773 if (icmp->icmp_type == ICMP_ROUTERADVERT || 774 icmp->icmp_type == ICMP_ROUTERSOLICIT) 775 return (B_TRUE); 776 } 777 src = &ipha->ipha_src; 778 } else { 779 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 780 const icmp6_t *icmp6 = (const icmp6_t *) 781 (mp->b_rptr + IPV6_HDR_LEN); 782 783 if ((uchar_t *)icmp6 + ICMP6_MINLEN > 784 mp->b_wptr) { 785 if (!pullupmsg(mp, 786 (uchar_t *)icmp6 - mp->b_rptr + 787 ICMP6_MINLEN)) 788 return (B_FALSE); 789 icmp6 = (const icmp6_t *) 790 (mp->b_rptr + IPV6_HDR_LEN); 791 } 792 if (icmp6->icmp6_type >= MLD_LISTENER_QUERY && 793 icmp6->icmp6_type <= ICMP6_MAX_INFO_TYPE) 794 return (B_TRUE); 795 } 796 src = &ip6h->ip6_src; 797 } 798 799 /* 800 * Look up the tnrhtp database and get the implicit label 801 * that is associated with this unlabeled host and attach 802 * it to the packet. 803 */ 804 if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL) 805 return (B_FALSE); 806 807 /* If the sender is labeled, drop the unlabeled packet. */ 808 if (src_rhtp->tpc_tp.host_type != UNLABELED) { 809 TPC_RELE(src_rhtp); 810 pr_addr_dbg("unlabeled packet forged from %s\n", 811 version == IPV4_VERSION ? AF_INET : AF_INET6, src); 812 return (B_FALSE); 813 } 814 815 sl = src_rhtp->tpc_tp.tp_def_label; 816 setbltype(&sl, SUN_SL_ID); 817 doi = src_rhtp->tpc_tp.tp_doi; 818 TPC_RELE(src_rhtp); 819 break; 820 821 default: 822 return (B_FALSE); 823 } 824 825 /* Make sure no other thread is messing with this mblk */ 826 ASSERT(DB_REF(mp) == 1); 827 if (DB_CRED(mp) == NULL) { 828 DB_CRED(mp) = newcred_from_bslabel(&sl, doi, KM_NOSLEEP); 829 if (DB_CRED(mp) == NULL) 830 return (B_FALSE); 831 } else { 832 cred_t *newcr; 833 834 newcr = copycred_from_bslabel(DB_CRED(mp), &sl, doi, 835 KM_NOSLEEP); 836 if (newcr == NULL) 837 return (B_FALSE); 838 crfree(DB_CRED(mp)); 839 DB_CRED(mp) = newcr; 840 } 841 842 /* 843 * If the source was unlabeled, then flag as such, 844 * while remembering that CIPSO routers add headers. 845 */ 846 if (label_type == OPT_NONE) 847 crgetlabel(DB_CRED(mp))->tsl_flags |= TSLF_UNLABELED; 848 else if (label_type == OPT_CIPSO) { 849 if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL) 850 return (B_FALSE); 851 if (src_rhtp->tpc_tp.host_type == UNLABELED) 852 crgetlabel(DB_CRED(mp))->tsl_flags |= 853 TSLF_UNLABELED; 854 TPC_RELE(src_rhtp); 855 } 856 857 return (B_TRUE); 858 } 859 860 /* 861 * This routine determines whether the given packet should be accepted locally. 862 * It does a range/set check on the packet's label by looking up the given 863 * address in the remote host database. 864 */ 865 boolean_t 866 tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version, 867 boolean_t shared_addr, const conn_t *connp) 868 { 869 const cred_t *credp; 870 ts_label_t *plabel, *conn_plabel; 871 tsol_tpc_t *tp; 872 boolean_t retv; 873 const bslabel_t *label, *conn_label; 874 875 /* 876 * The cases in which this can happen are: 877 * - IPv6 Router Alert, where ip_rput_data_v6 deliberately skips 878 * over the label attachment process. 879 * - MLD output looped-back to ourselves. 880 * - IPv4 Router Discovery, where tsol_get_pkt_label intentionally 881 * avoids the labeling process. 882 * We trust that all valid paths in the code set the cred pointer when 883 * needed. 884 */ 885 if ((credp = DB_CRED(mp)) == NULL) 886 return (B_TRUE); 887 888 /* 889 * If this packet is from the inside (not a remote host) and has the 890 * same zoneid as the selected destination, then no checks are 891 * necessary. Membership in the zone is enough proof. This is 892 * intended to be a hot path through this function. 893 */ 894 if (!crisremote(credp) && 895 crgetzone(credp) == crgetzone(connp->conn_cred)) 896 return (B_TRUE); 897 898 plabel = crgetlabel(credp); 899 conn_plabel = crgetlabel(connp->conn_cred); 900 ASSERT(plabel != NULL && conn_plabel != NULL); 901 902 label = label2bslabel(plabel); 903 conn_label = label2bslabel(crgetlabel(connp->conn_cred)); 904 905 /* 906 * MLPs are always validated using the range and set of the local 907 * address, even when the remote host is unlabeled. 908 */ 909 if (connp->conn_mlp_type == mlptBoth || 910 /* LINTED: no consequent */ 911 connp->conn_mlp_type == (shared_addr ? mlptShared : mlptPrivate)) { 912 ; 913 914 /* 915 * If this is a packet from an unlabeled sender, then we must apply 916 * different rules. If the label is equal to the zone's label, then 917 * it's allowed. If it's not equal, but the zone is either the global 918 * zone or the label is dominated by the zone's label, then allow it 919 * as long as it's in the range configured for the destination. 920 */ 921 } else if (plabel->tsl_flags & TSLF_UNLABELED) { 922 if (plabel->tsl_doi == conn_plabel->tsl_doi && 923 blequal(label, conn_label)) 924 return (B_TRUE); 925 926 /* 927 * conn_zoneid is global for an exclusive stack, thus we use 928 * conn_cred to get the zoneid 929 */ 930 if (!connp->conn_mac_exempt || 931 (crgetzoneid(connp->conn_cred) != GLOBAL_ZONEID && 932 (plabel->tsl_doi != conn_plabel->tsl_doi || 933 !bldominates(conn_label, label)))) { 934 DTRACE_PROBE3( 935 tx__ip__log__drop__receivelocal__mac_unl, 936 char *, 937 "unlabeled packet mp(1) fails mac for conn(2)", 938 mblk_t *, mp, conn_t *, connp); 939 return (B_FALSE); 940 } 941 942 /* 943 * If this is a packet from a labeled sender, verify the 944 * label on the packet matches the connection label. 945 */ 946 } else { 947 if (plabel->tsl_doi != conn_plabel->tsl_doi || 948 !blequal(label, conn_label)) { 949 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac__slp, 950 char *, 951 "packet mp(1) failed label match to SLP conn(2)", 952 mblk_t *, mp, conn_t *, connp); 953 return (B_FALSE); 954 } 955 /* 956 * No further checks will be needed if this is a zone- 957 * specific address because (1) The process for bringing up 958 * the interface ensures the zone's label is within the zone- 959 * specific address's valid label range; (2) For cases where 960 * the conn is bound to the unspecified addresses, ip fanout 961 * logic ensures conn's zoneid equals the dest addr's zoneid; 962 * (3) Mac-exempt and mlp logic above already handle all 963 * cases where the zone label may not be the same as the 964 * conn label. 965 */ 966 if (!shared_addr) 967 return (B_TRUE); 968 } 969 970 tp = find_tpc(addr, version, B_FALSE); 971 if (tp == NULL) { 972 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__no__tnr, 973 char *, "dropping mp(1), host(2) lacks entry", 974 mblk_t *, mp, void *, addr); 975 return (B_FALSE); 976 } 977 978 /* 979 * The local host address should not be unlabeled at this point. The 980 * only way this can happen is that the destination isn't unicast. We 981 * assume that the packet should not have had a label, and thus should 982 * have been handled by the TSLF_UNLABELED logic above. 983 */ 984 if (tp->tpc_tp.host_type == UNLABELED) { 985 retv = B_FALSE; 986 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__flag, char *, 987 "mp(1) unlabeled source, but tp is not unlabeled.", 988 mblk_t *, mp, tsol_tpc_t *, tp); 989 990 } else if (tp->tpc_tp.host_type != SUN_CIPSO) { 991 retv = B_FALSE; 992 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__tptype, char *, 993 "delivering mp(1), found unrecognized tpc(2) type.", 994 mblk_t *, mp, tsol_tpc_t *, tp); 995 996 } else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) { 997 retv = B_FALSE; 998 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *, 999 "mp(1) could not be delievered to tp(2), doi mismatch", 1000 mblk_t *, mp, tsol_tpc_t *, tp); 1001 1002 } else if (!_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) && 1003 !blinlset(label, tp->tpc_tp.tp_sl_set_cipso)) { 1004 retv = B_FALSE; 1005 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *, 1006 "mp(1) could not be delievered to tp(2), bad mac", 1007 mblk_t *, mp, tsol_tpc_t *, tp); 1008 } else { 1009 retv = B_TRUE; 1010 } 1011 1012 TPC_RELE(tp); 1013 1014 return (retv); 1015 } 1016 1017 boolean_t 1018 tsol_can_accept_raw(mblk_t *mp, boolean_t check_host) 1019 { 1020 ts_label_t *plabel = NULL; 1021 tsol_tpc_t *src_rhtp, *dst_rhtp; 1022 boolean_t retv; 1023 1024 if (DB_CRED(mp) != NULL) 1025 plabel = crgetlabel(DB_CRED(mp)); 1026 1027 /* We are bootstrapping or the internal template was never deleted */ 1028 if (plabel == NULL) 1029 return (B_TRUE); 1030 1031 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { 1032 ipha_t *ipha = (ipha_t *)mp->b_rptr; 1033 1034 src_rhtp = find_tpc(&ipha->ipha_src, IPV4_VERSION, 1035 B_FALSE); 1036 if (src_rhtp == NULL) 1037 return (B_FALSE); 1038 dst_rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, 1039 B_FALSE); 1040 } else { 1041 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1042 1043 src_rhtp = find_tpc(&ip6h->ip6_src, IPV6_VERSION, 1044 B_FALSE); 1045 if (src_rhtp == NULL) 1046 return (B_FALSE); 1047 dst_rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, 1048 B_FALSE); 1049 } 1050 if (dst_rhtp == NULL) { 1051 TPC_RELE(src_rhtp); 1052 return (B_FALSE); 1053 } 1054 1055 if (label2doi(plabel) != src_rhtp->tpc_tp.tp_doi) { 1056 retv = B_FALSE; 1057 1058 /* 1059 * Check that the packet's label is in the correct range for labeled 1060 * sender, or is equal to the default label for unlabeled sender. 1061 */ 1062 } else if ((src_rhtp->tpc_tp.host_type != UNLABELED && 1063 !_blinrange(label2bslabel(plabel), 1064 &src_rhtp->tpc_tp.tp_sl_range_cipso) && 1065 !blinlset(label2bslabel(plabel), 1066 src_rhtp->tpc_tp.tp_sl_set_cipso)) || 1067 (src_rhtp->tpc_tp.host_type == UNLABELED && 1068 !blequal(&plabel->tsl_label, &src_rhtp->tpc_tp.tp_def_label))) { 1069 retv = B_FALSE; 1070 1071 } else if (check_host) { 1072 retv = B_TRUE; 1073 1074 /* 1075 * Until we have SL range in the Zone structure, pass it 1076 * when our own address lookup returned an internal entry. 1077 */ 1078 } else switch (dst_rhtp->tpc_tp.host_type) { 1079 case UNLABELED: 1080 retv = B_TRUE; 1081 break; 1082 1083 case SUN_CIPSO: 1084 retv = _blinrange(label2bslabel(plabel), 1085 &dst_rhtp->tpc_tp.tp_sl_range_cipso) || 1086 blinlset(label2bslabel(plabel), 1087 dst_rhtp->tpc_tp.tp_sl_set_cipso); 1088 break; 1089 1090 default: 1091 retv = B_FALSE; 1092 } 1093 TPC_RELE(src_rhtp); 1094 TPC_RELE(dst_rhtp); 1095 return (retv); 1096 } 1097 1098 /* 1099 * This routine determines whether a response to a failed packet delivery or 1100 * connection should be sent back. By default, the policy is to allow such 1101 * messages to be sent at all times, as these messages reveal little useful 1102 * information and are healthy parts of TCP/IP networking. 1103 * 1104 * If tsol_strict_error is set, then we do strict tests: if the packet label is 1105 * within the label range/set of this host/zone, return B_TRUE; otherwise 1106 * return B_FALSE, which causes the packet to be dropped silently. 1107 * 1108 * Note that tsol_get_pkt_label will cause the packet to drop if the sender is 1109 * marked as labeled in the remote host database, but the packet lacks a label. 1110 * This means that we don't need to do a lookup on the source; the 1111 * TSLF_UNLABELED flag is sufficient. 1112 */ 1113 boolean_t 1114 tsol_can_reply_error(const mblk_t *mp) 1115 { 1116 ts_label_t *plabel = NULL; 1117 tsol_tpc_t *rhtp; 1118 const ipha_t *ipha; 1119 const ip6_t *ip6h; 1120 boolean_t retv; 1121 bslabel_t *pktbs; 1122 1123 /* Caller must pull up at least the IP header */ 1124 ASSERT(MBLKL(mp) >= (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ? 1125 sizeof (*ipha) : sizeof (*ip6h))); 1126 1127 if (!tsol_strict_error) 1128 return (B_TRUE); 1129 1130 if (DB_CRED(mp) != NULL) 1131 plabel = crgetlabel(DB_CRED(mp)); 1132 1133 /* We are bootstrapping or the internal template was never deleted */ 1134 if (plabel == NULL) 1135 return (B_TRUE); 1136 1137 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { 1138 ipha = (const ipha_t *)mp->b_rptr; 1139 rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, B_FALSE); 1140 } else { 1141 ip6h = (const ip6_t *)mp->b_rptr; 1142 rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, B_FALSE); 1143 } 1144 1145 if (rhtp == NULL || label2doi(plabel) != rhtp->tpc_tp.tp_doi) { 1146 retv = B_FALSE; 1147 } else { 1148 /* 1149 * If we're in the midst of forwarding, then the destination 1150 * address might not be labeled. In that case, allow unlabeled 1151 * packets through only if the default label is the same, and 1152 * labeled ones if they dominate. 1153 */ 1154 pktbs = label2bslabel(plabel); 1155 switch (rhtp->tpc_tp.host_type) { 1156 case UNLABELED: 1157 if (plabel->tsl_flags & TSLF_UNLABELED) { 1158 retv = blequal(pktbs, 1159 &rhtp->tpc_tp.tp_def_label); 1160 } else { 1161 retv = bldominates(pktbs, 1162 &rhtp->tpc_tp.tp_def_label); 1163 } 1164 break; 1165 1166 case SUN_CIPSO: 1167 retv = _blinrange(pktbs, 1168 &rhtp->tpc_tp.tp_sl_range_cipso) || 1169 blinlset(pktbs, rhtp->tpc_tp.tp_sl_set_cipso); 1170 break; 1171 1172 default: 1173 retv = B_FALSE; 1174 break; 1175 } 1176 } 1177 1178 if (rhtp != NULL) 1179 TPC_RELE(rhtp); 1180 1181 return (retv); 1182 } 1183 1184 /* 1185 * Finds the zone associated with the given packet. Returns GLOBAL_ZONEID if 1186 * the zone cannot be located. 1187 * 1188 * This is used by the classifier when the packet matches an ALL_ZONES IRE, and 1189 * there's no MLP defined. 1190 * 1191 * Note that we assume that this is only invoked in the ALL_ZONES case. 1192 * Handling other cases would require handle exclusive stack zones where either 1193 * this routine or the callers would have to map from 1194 * the zoneid (zone->zone_id) to what IP uses in conn_zoneid etc. 1195 */ 1196 zoneid_t 1197 tsol_packet_to_zoneid(const mblk_t *mp) 1198 { 1199 cred_t *cr = DB_CRED(mp); 1200 zone_t *zone; 1201 ts_label_t *label; 1202 1203 if (cr != NULL) { 1204 if ((label = crgetlabel(cr)) != NULL) { 1205 zone = zone_find_by_label(label); 1206 if (zone != NULL) { 1207 zoneid_t zoneid = zone->zone_id; 1208 1209 zone_rele(zone); 1210 return (zoneid); 1211 } 1212 } 1213 } 1214 return (GLOBAL_ZONEID); 1215 } 1216 1217 int 1218 tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl) 1219 { 1220 int error = 0; 1221 tsol_ire_gw_secattr_t *attrp = NULL; 1222 tsol_tnrhc_t *gw_rhc = NULL; 1223 tsol_gcgrp_t *gcgrp = NULL; 1224 tsol_gc_t *gc = NULL; 1225 in_addr_t ga_addr4; 1226 void *paddr = NULL; 1227 1228 /* Not in Trusted mode or IRE is local/loopback/broadcast/interface */ 1229 if (!is_system_labeled() || 1230 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST | 1231 IRE_INTERFACE))) 1232 goto done; 1233 1234 /* 1235 * If we don't have a label to compare with, or the IRE does not 1236 * contain any gateway security attributes, there's not much that 1237 * we can do. We let the former case pass, and the latter fail, 1238 * since the IRE doesn't qualify for a match due to the lack of 1239 * security attributes. 1240 */ 1241 if (tsl == NULL || ire->ire_gw_secattr == NULL) { 1242 if (tsl != NULL) { 1243 DTRACE_PROBE3(tx__ip__log__drop__irematch__nogwsec, 1244 char *, 1245 "ire(1) lacks ire_gw_secattr matching label(2)", 1246 ire_t *, ire, ts_label_t *, tsl); 1247 error = EACCES; 1248 } 1249 goto done; 1250 } 1251 1252 attrp = ire->ire_gw_secattr; 1253 1254 /* 1255 * The possible lock order scenarios related to the tsol gateway 1256 * attribute locks are documented at the beginning of ip.c in the 1257 * lock order scenario section. 1258 */ 1259 mutex_enter(&attrp->igsa_lock); 1260 1261 /* 1262 * Depending on the IRE type (prefix vs. cache), we seek the group 1263 * structure which contains all security credentials of the gateway. 1264 * A prefix IRE is associated with at most one gateway credential, 1265 * while a cache IRE is associated with every credentials that the 1266 * gateway has. 1267 */ 1268 if ((gc = attrp->igsa_gc) != NULL) { /* prefix */ 1269 gcgrp = gc->gc_grp; 1270 ASSERT(gcgrp != NULL); 1271 rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); 1272 } else if ((gcgrp = attrp->igsa_gcgrp) != NULL) { /* cache */ 1273 rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); 1274 gc = gcgrp->gcgrp_head; 1275 if (gc == NULL) { 1276 /* gc group is empty, so the drop lock now */ 1277 ASSERT(gcgrp->gcgrp_count == 0); 1278 rw_exit(&gcgrp->gcgrp_rwlock); 1279 gcgrp = NULL; 1280 } 1281 } 1282 1283 if (gcgrp != NULL) 1284 GCGRP_REFHOLD(gcgrp); 1285 1286 if ((gw_rhc = attrp->igsa_rhc) != NULL) { 1287 /* 1288 * If our cached entry has grown stale, then discard it so we 1289 * can get a new one. 1290 */ 1291 if (gw_rhc->rhc_invalid || gw_rhc->rhc_tpc->tpc_invalid) { 1292 TNRHC_RELE(gw_rhc); 1293 attrp->igsa_rhc = gw_rhc = NULL; 1294 } else { 1295 TNRHC_HOLD(gw_rhc) 1296 } 1297 } 1298 1299 /* Last attempt at loading the template had failed; try again */ 1300 if (gw_rhc == NULL) { 1301 if (gcgrp != NULL) { 1302 tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr; 1303 1304 if (ire->ire_ipversion == IPV4_VERSION) { 1305 ASSERT(ga->ga_af == AF_INET); 1306 IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4); 1307 paddr = &ga_addr4; 1308 } else { 1309 ASSERT(ga->ga_af == AF_INET6); 1310 paddr = &ga->ga_addr; 1311 } 1312 } else if (ire->ire_ipversion == IPV6_VERSION && 1313 !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) { 1314 paddr = &ire->ire_gateway_addr_v6; 1315 } else if (ire->ire_ipversion == IPV4_VERSION && 1316 ire->ire_gateway_addr != INADDR_ANY) { 1317 paddr = &ire->ire_gateway_addr; 1318 } 1319 1320 /* We've found a gateway address to do the template lookup */ 1321 if (paddr != NULL) { 1322 ASSERT(gw_rhc == NULL); 1323 gw_rhc = find_rhc(paddr, ire->ire_ipversion, B_FALSE); 1324 if (gw_rhc != NULL) { 1325 /* 1326 * Note that if the lookup above returned an 1327 * internal template, we'll use it for the 1328 * time being, and do another lookup next 1329 * time around. 1330 */ 1331 /* Another thread has loaded the template? */ 1332 if (attrp->igsa_rhc != NULL) { 1333 TNRHC_RELE(gw_rhc) 1334 /* reload, it could be different */ 1335 gw_rhc = attrp->igsa_rhc; 1336 } else { 1337 attrp->igsa_rhc = gw_rhc; 1338 } 1339 /* 1340 * Hold an extra reference just like we did 1341 * above prior to dropping the igsa_lock. 1342 */ 1343 TNRHC_HOLD(gw_rhc) 1344 } 1345 } 1346 } 1347 1348 mutex_exit(&attrp->igsa_lock); 1349 /* Gateway template not found */ 1350 if (gw_rhc == NULL) { 1351 /* 1352 * If destination address is directly reachable through an 1353 * interface rather than through a learned route, pass it. 1354 */ 1355 if (paddr != NULL) { 1356 DTRACE_PROBE3( 1357 tx__ip__log__drop__irematch__nogwtmpl, char *, 1358 "ire(1), label(2) off-link with no gw_rhc", 1359 ire_t *, ire, ts_label_t *, tsl); 1360 error = EINVAL; 1361 } 1362 goto done; 1363 } 1364 1365 if (gc != NULL) { 1366 tsol_gcdb_t *gcdb; 1367 /* 1368 * In the case of IRE_CACHE we've got one or more gateway 1369 * security credentials to compare against the passed in label. 1370 * Perform label range comparison against each security 1371 * credential of the gateway. In the case of a prefix ire 1372 * we need to match against the security attributes of 1373 * just the route itself, so the loop is executed only once. 1374 */ 1375 ASSERT(gcgrp != NULL); 1376 do { 1377 gcdb = gc->gc_db; 1378 if (tsl->tsl_doi == gcdb->gcdb_doi && 1379 _blinrange(&tsl->tsl_label, &gcdb->gcdb_slrange)) 1380 break; 1381 if (ire->ire_type == IRE_CACHE) 1382 gc = gc->gc_next; 1383 else 1384 gc = NULL; 1385 } while (gc != NULL); 1386 1387 if (gc == NULL) { 1388 DTRACE_PROBE3( 1389 tx__ip__log__drop__irematch__nogcmatched, 1390 char *, "ire(1), tsl(2): all gc failed match", 1391 ire_t *, ire, ts_label_t *, tsl); 1392 error = EACCES; 1393 } 1394 } else { 1395 /* 1396 * We didn't find any gateway credentials in the IRE 1397 * attributes; fall back to the gateway's template for 1398 * label range checks, if we are required to do so. 1399 */ 1400 ASSERT(gw_rhc != NULL); 1401 switch (gw_rhc->rhc_tpc->tpc_tp.host_type) { 1402 case SUN_CIPSO: 1403 if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi || 1404 (!_blinrange(&tsl->tsl_label, 1405 &gw_rhc->rhc_tpc->tpc_tp.tp_sl_range_cipso) && 1406 !blinlset(&tsl->tsl_label, 1407 gw_rhc->rhc_tpc->tpc_tp.tp_sl_set_cipso))) { 1408 error = EACCES; 1409 DTRACE_PROBE4( 1410 tx__ip__log__drop__irematch__deftmpl, 1411 char *, "ire(1), tsl(2), gw_rhc(3) " 1412 "failed match (cipso gw)", 1413 ire_t *, ire, ts_label_t *, tsl, 1414 tsol_tnrhc_t *, gw_rhc); 1415 } 1416 break; 1417 1418 case UNLABELED: 1419 if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi || 1420 (!_blinrange(&tsl->tsl_label, 1421 &gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_range) && 1422 !blinlset(&tsl->tsl_label, 1423 gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_set))) { 1424 error = EACCES; 1425 DTRACE_PROBE4( 1426 tx__ip__log__drop__irematch__deftmpl, 1427 char *, "ire(1), tsl(2), gw_rhc(3) " 1428 "failed match (unlabeled gw)", 1429 ire_t *, ire, ts_label_t *, tsl, 1430 tsol_tnrhc_t *, gw_rhc); 1431 } 1432 break; 1433 } 1434 } 1435 1436 done: 1437 1438 if (gcgrp != NULL) { 1439 rw_exit(&gcgrp->gcgrp_rwlock); 1440 GCGRP_REFRELE(gcgrp); 1441 } 1442 1443 if (gw_rhc != NULL) 1444 TNRHC_RELE(gw_rhc) 1445 1446 return (error); 1447 } 1448 1449 /* 1450 * Performs label accreditation checks for packet forwarding. 1451 * 1452 * Returns a pointer to the modified mblk if allowed for forwarding, 1453 * or NULL if the packet must be dropped. 1454 */ 1455 mblk_t * 1456 tsol_ip_forward(ire_t *ire, mblk_t *mp) 1457 { 1458 tsol_ire_gw_secattr_t *attrp = NULL; 1459 ipha_t *ipha; 1460 ip6_t *ip6h; 1461 const void *pdst; 1462 const void *psrc; 1463 boolean_t off_link; 1464 tsol_tpc_t *dst_rhtp, *gw_rhtp; 1465 tsol_ip_label_t label_type; 1466 uchar_t *opt_ptr = NULL; 1467 ts_label_t *tsl; 1468 uint8_t proto; 1469 int af, adjust; 1470 uint16_t iplen; 1471 boolean_t need_tpc_rele = B_FALSE; 1472 ipaddr_t *gw; 1473 ip_stack_t *ipst = ire->ire_ipst; 1474 1475 ASSERT(ire != NULL && mp != NULL); 1476 ASSERT(ire->ire_stq != NULL); 1477 1478 af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6; 1479 1480 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { 1481 ASSERT(ire->ire_ipversion == IPV4_VERSION); 1482 ipha = (ipha_t *)mp->b_rptr; 1483 psrc = &ipha->ipha_src; 1484 pdst = &ipha->ipha_dst; 1485 proto = ipha->ipha_protocol; 1486 1487 /* 1488 * off_link is TRUE if destination not directly reachable. 1489 * Surya note: we avoid creation of per-dst IRE_CACHE entries 1490 * for forwarded packets, so we set off_link to be TRUE 1491 * if the packet dst is different from the ire_addr of 1492 * the ire for the nexthop. 1493 */ 1494 off_link = ((ipha->ipha_dst != ire->ire_addr) || 1495 (ire->ire_gateway_addr != INADDR_ANY)); 1496 } else { 1497 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1498 ip6h = (ip6_t *)mp->b_rptr; 1499 psrc = &ip6h->ip6_src; 1500 pdst = &ip6h->ip6_dst; 1501 proto = ip6h->ip6_nxt; 1502 1503 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP && 1504 proto != IPPROTO_ICMPV6) { 1505 uint8_t *nexthdrp; 1506 uint16_t hdr_len; 1507 1508 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, 1509 &nexthdrp)) { 1510 /* malformed packet; drop it */ 1511 return (NULL); 1512 } 1513 proto = *nexthdrp; 1514 } 1515 1516 /* destination not directly reachable? */ 1517 off_link = !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6); 1518 } 1519 1520 if ((tsl = MBLK_GETLABEL(mp)) == NULL) 1521 return (mp); 1522 1523 label_type = tsol_get_option(mp, &opt_ptr); 1524 1525 ASSERT(psrc != NULL && pdst != NULL); 1526 dst_rhtp = find_tpc(pdst, ire->ire_ipversion, B_FALSE); 1527 1528 if (dst_rhtp == NULL) { 1529 /* 1530 * Without a template we do not know if forwarding 1531 * violates MAC 1532 */ 1533 DTRACE_PROBE3(tx__ip__log__drop__forward__nodst, char *, 1534 "mp(1) dropped, no template for destination ip4|6(2)", 1535 mblk_t *, mp, void *, pdst); 1536 return (NULL); 1537 } 1538 1539 /* 1540 * Gateway template must have existed for off-link destinations, 1541 * since tsol_ire_match_gwattr has ensured such condition. 1542 */ 1543 if (ire->ire_ipversion == IPV4_VERSION && off_link) { 1544 /* 1545 * Surya note: first check if we can get the gw_rhtp from 1546 * the ire_gw_secattr->igsa_rhc; if this is null, then 1547 * do a lookup based on the ire_addr (address of gw) 1548 */ 1549 if (ire->ire_gw_secattr != NULL && 1550 ire->ire_gw_secattr->igsa_rhc != NULL) { 1551 attrp = ire->ire_gw_secattr; 1552 gw_rhtp = attrp->igsa_rhc->rhc_tpc; 1553 } else { 1554 /* 1555 * use the ire_addr if this is the IRE_CACHE of nexthop 1556 */ 1557 gw = (ire->ire_gateway_addr == NULL? &ire->ire_addr : 1558 &ire->ire_gateway_addr); 1559 gw_rhtp = find_tpc(gw, ire->ire_ipversion, B_FALSE); 1560 need_tpc_rele = B_TRUE; 1561 } 1562 if (gw_rhtp == NULL) { 1563 DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *, 1564 "mp(1) dropped, no gateway in ire attributes(2)", 1565 mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp); 1566 mp = NULL; 1567 goto keep_label; 1568 } 1569 } 1570 if (ire->ire_ipversion == IPV6_VERSION && 1571 ((attrp = ire->ire_gw_secattr) == NULL || attrp->igsa_rhc == NULL || 1572 (gw_rhtp = attrp->igsa_rhc->rhc_tpc) == NULL) && off_link) { 1573 DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *, 1574 "mp(1) dropped, no gateway in ire attributes(2)", 1575 mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp); 1576 mp = NULL; 1577 goto keep_label; 1578 } 1579 1580 /* 1581 * Check that the label for the packet is acceptable 1582 * by destination host; otherwise, drop it. 1583 */ 1584 switch (dst_rhtp->tpc_tp.host_type) { 1585 case SUN_CIPSO: 1586 if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi || 1587 (!_blinrange(&tsl->tsl_label, 1588 &dst_rhtp->tpc_tp.tp_sl_range_cipso) && 1589 !blinlset(&tsl->tsl_label, 1590 dst_rhtp->tpc_tp.tp_sl_set_cipso))) { 1591 DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *, 1592 "labeled packet mp(1) dropped, label(2) fails " 1593 "destination(3) accredation check", 1594 mblk_t *, mp, ts_label_t *, tsl, 1595 tsol_tpc_t *, dst_rhtp); 1596 mp = NULL; 1597 goto keep_label; 1598 } 1599 break; 1600 1601 1602 case UNLABELED: 1603 if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi || 1604 !blequal(&dst_rhtp->tpc_tp.tp_def_label, 1605 &tsl->tsl_label)) { 1606 DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *, 1607 "unlabeled packet mp(1) dropped, label(2) fails " 1608 "destination(3) accredation check", 1609 mblk_t *, mp, ts_label_t *, tsl, 1610 tsol_tpc_t *, dst_rhtp); 1611 mp = NULL; 1612 goto keep_label; 1613 } 1614 break; 1615 } 1616 if (label_type == OPT_CIPSO) { 1617 /* 1618 * We keep the label on any of the following cases: 1619 * 1620 * 1. The destination is labeled (on/off-link). 1621 * 2. The unlabeled destination is off-link, 1622 * and the next hop gateway is labeled. 1623 */ 1624 if (dst_rhtp->tpc_tp.host_type != UNLABELED || 1625 (off_link && 1626 gw_rhtp->tpc_tp.host_type != UNLABELED)) 1627 goto keep_label; 1628 1629 /* 1630 * Strip off the CIPSO option from the packet because: the 1631 * unlabeled destination host is directly reachable through 1632 * an interface (on-link); or, the unlabeled destination host 1633 * is not directly reachable (off-link), and the next hop 1634 * gateway is unlabeled. 1635 */ 1636 adjust = (af == AF_INET) ? tsol_remove_secopt(ipha, MBLKL(mp)) : 1637 tsol_remove_secopt_v6(ip6h, MBLKL(mp)); 1638 1639 ASSERT(adjust <= 0); 1640 if (adjust != 0) { 1641 1642 /* adjust is negative */ 1643 ASSERT((mp->b_wptr + adjust) >= mp->b_rptr); 1644 mp->b_wptr += adjust; 1645 1646 if (af == AF_INET) { 1647 ipha = (ipha_t *)mp->b_rptr; 1648 iplen = ntohs(ipha->ipha_length) + adjust; 1649 ipha->ipha_length = htons(iplen); 1650 ipha->ipha_hdr_checksum = 0; 1651 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1652 } 1653 DTRACE_PROBE3(tx__ip__log__info__forward__adjust, 1654 char *, 1655 "mp(1) adjusted(2) for CIPSO option removal", 1656 mblk_t *, mp, int, adjust); 1657 } 1658 goto keep_label; 1659 } 1660 1661 ASSERT(label_type == OPT_NONE); 1662 ASSERT(dst_rhtp != NULL); 1663 1664 /* 1665 * We need to add CIPSO option if the destination or the next hop 1666 * gateway is labeled. Otherwise, pass the packet as is. 1667 */ 1668 if (dst_rhtp->tpc_tp.host_type == UNLABELED && 1669 (!off_link || gw_rhtp->tpc_tp.host_type == UNLABELED)) 1670 goto keep_label; 1671 1672 if ((af == AF_INET && 1673 tsol_check_label(DB_CRED(mp), &mp, B_FALSE, ipst) != 0) || 1674 (af == AF_INET6 && 1675 tsol_check_label_v6(DB_CRED(mp), &mp, B_FALSE, ipst) != 0)) { 1676 mp = NULL; 1677 goto keep_label; 1678 } 1679 1680 if (af == AF_INET) { 1681 ipha = (ipha_t *)mp->b_rptr; 1682 ipha->ipha_hdr_checksum = 0; 1683 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1684 } 1685 1686 keep_label: 1687 TPC_RELE(dst_rhtp); 1688 if (need_tpc_rele && gw_rhtp != NULL) 1689 TPC_RELE(gw_rhtp); 1690 return (mp); 1691 } 1692 1693 /* 1694 * Name: tsol_pmtu_adjust() 1695 * 1696 * Returns the adjusted mtu after removing security option. 1697 * Removes/subtracts the option if the packet's cred indicates an unlabeled 1698 * sender or if pkt_diff indicates this system enlarged the packet. 1699 */ 1700 uint32_t 1701 tsol_pmtu_adjust(mblk_t *mp, uint32_t mtu, int pkt_diff, int af) 1702 { 1703 int label_adj = 0; 1704 uint32_t min_mtu = IP_MIN_MTU; 1705 tsol_tpc_t *src_rhtp; 1706 void *src; 1707 1708 /* 1709 * Note: label_adj is non-positive, indicating the number of 1710 * bytes removed by removing the security option from the 1711 * header. 1712 */ 1713 if (af == AF_INET6) { 1714 ip6_t *ip6h; 1715 1716 min_mtu = IPV6_MIN_MTU; 1717 ip6h = (ip6_t *)mp->b_rptr; 1718 src = &ip6h->ip6_src; 1719 if ((src_rhtp = find_tpc(src, IPV6_VERSION, B_FALSE)) == NULL) 1720 return (mtu); 1721 if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED) { 1722 label_adj = tsol_remove_secopt_v6( 1723 (ip6_t *)mp->b_rptr, MBLKL(mp)); 1724 } 1725 } else { 1726 ipha_t *ipha; 1727 1728 ASSERT(af == AF_INET); 1729 ipha = (ipha_t *)mp->b_rptr; 1730 src = &ipha->ipha_src; 1731 if ((src_rhtp = find_tpc(src, IPV4_VERSION, B_FALSE)) == NULL) 1732 return (mtu); 1733 if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED) 1734 label_adj = tsol_remove_secopt( 1735 (ipha_t *)mp->b_rptr, MBLKL(mp)); 1736 } 1737 /* 1738 * Make pkt_diff non-negative and the larger of the bytes 1739 * previously added (if any) or just removed, since label 1740 * addition + subtraction may not be completely idempotent. 1741 */ 1742 if (pkt_diff < -label_adj) 1743 pkt_diff = -label_adj; 1744 if (pkt_diff > 0 && pkt_diff < mtu) 1745 mtu -= pkt_diff; 1746 1747 TPC_RELE(src_rhtp); 1748 return (MAX(mtu, min_mtu)); 1749 } 1750 1751 /* 1752 * Name: tsol_rtsa_init() 1753 * 1754 * Normal: Sanity checks on the route security attributes provided by 1755 * user. Convert it into a route security parameter list to 1756 * be returned to caller. 1757 * 1758 * Output: EINVAL if bad security attributes in the routing message 1759 * ENOMEM if unable to allocate data structures 1760 * 0 otherwise. 1761 * 1762 * Note: On input, cp must point to the end of any addresses in 1763 * the rt_msghdr_t structure. 1764 */ 1765 int 1766 tsol_rtsa_init(rt_msghdr_t *rtm, tsol_rtsecattr_t *sp, caddr_t cp) 1767 { 1768 uint_t sacnt; 1769 int err; 1770 caddr_t lim; 1771 tsol_rtsecattr_t *tp; 1772 1773 ASSERT((cp >= (caddr_t)&rtm[1]) && sp != NULL); 1774 1775 /* 1776 * In theory, we could accept as many security attributes configured 1777 * per route destination. However, the current design is limited 1778 * such that at most only one set security attributes is allowed to 1779 * be associated with a prefix IRE. We therefore assert for now. 1780 */ 1781 /* LINTED */ 1782 ASSERT(TSOL_RTSA_REQUEST_MAX == 1); 1783 1784 sp->rtsa_cnt = 0; 1785 lim = (caddr_t)rtm + rtm->rtm_msglen; 1786 ASSERT(cp <= lim); 1787 1788 if ((lim - cp) < sizeof (rtm_ext_t) || 1789 ((rtm_ext_t *)cp)->rtmex_type != RTMEX_GATEWAY_SECATTR) 1790 return (0); 1791 1792 if (((rtm_ext_t *)cp)->rtmex_len < sizeof (tsol_rtsecattr_t)) 1793 return (EINVAL); 1794 1795 cp += sizeof (rtm_ext_t); 1796 1797 if ((lim - cp) < sizeof (*tp) || 1798 (tp = (tsol_rtsecattr_t *)cp, (sacnt = tp->rtsa_cnt) == 0) || 1799 (lim - cp) < TSOL_RTSECATTR_SIZE(sacnt)) 1800 return (EINVAL); 1801 1802 /* 1803 * Trying to add route security attributes when system 1804 * labeling service is not available, or when user supllies 1805 * more than the maximum number of security attributes 1806 * allowed per request. 1807 */ 1808 if ((sacnt > 0 && !is_system_labeled()) || 1809 sacnt > TSOL_RTSA_REQUEST_MAX) 1810 return (EINVAL); 1811 1812 /* Ensure valid credentials */ 1813 if ((err = rtsa_validate(&((tsol_rtsecattr_t *)cp)-> 1814 rtsa_attr[0])) != 0) { 1815 cp += sizeof (*sp); 1816 return (err); 1817 } 1818 1819 bcopy(cp, sp, sizeof (*sp)); 1820 cp += sizeof (*sp); 1821 return (0); 1822 } 1823 1824 int 1825 tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc, 1826 tsol_gcgrp_t *gcgrp) 1827 { 1828 tsol_ire_gw_secattr_t *attrp; 1829 boolean_t exists = B_FALSE; 1830 in_addr_t ga_addr4; 1831 void *paddr = NULL; 1832 1833 ASSERT(ire != NULL); 1834 1835 /* 1836 * The only time that attrp can be NULL is when this routine is 1837 * called for the first time during the creation/initialization 1838 * of the corresponding IRE. It will only get cleared when the 1839 * IRE is deleted. 1840 */ 1841 if ((attrp = ire->ire_gw_secattr) == NULL) { 1842 attrp = ire_gw_secattr_alloc(KM_NOSLEEP); 1843 if (attrp == NULL) 1844 return (ENOMEM); 1845 ire->ire_gw_secattr = attrp; 1846 } else { 1847 exists = B_TRUE; 1848 mutex_enter(&attrp->igsa_lock); 1849 1850 if (attrp->igsa_rhc != NULL) { 1851 TNRHC_RELE(attrp->igsa_rhc); 1852 attrp->igsa_rhc = NULL; 1853 } 1854 1855 if (attrp->igsa_gc != NULL) 1856 GC_REFRELE(attrp->igsa_gc); 1857 if (attrp->igsa_gcgrp != NULL) 1858 GCGRP_REFRELE(attrp->igsa_gcgrp); 1859 } 1860 ASSERT(!exists || MUTEX_HELD(&attrp->igsa_lock)); 1861 1862 /* 1863 * References already held by caller and we keep them; 1864 * note that both gc and gcgrp may be set to NULL to 1865 * clear out igsa_gc and igsa_gcgrp, respectively. 1866 */ 1867 attrp->igsa_gc = gc; 1868 attrp->igsa_gcgrp = gcgrp; 1869 1870 if (gcgrp == NULL && gc != NULL) { 1871 gcgrp = gc->gc_grp; 1872 ASSERT(gcgrp != NULL); 1873 } 1874 1875 /* 1876 * Intialize the template for gateway; we use the gateway's 1877 * address found in either the passed in gateway credential 1878 * or group pointer, or the ire_gateway_addr{_v6} field. 1879 */ 1880 if (gcgrp != NULL) { 1881 tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr; 1882 1883 /* 1884 * Caller is holding a reference, and that we don't 1885 * need to hold any lock to access the address. 1886 */ 1887 if (ipversion == IPV4_VERSION) { 1888 ASSERT(ga->ga_af == AF_INET); 1889 IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4); 1890 paddr = &ga_addr4; 1891 } else { 1892 ASSERT(ga->ga_af == AF_INET6); 1893 paddr = &ga->ga_addr; 1894 } 1895 } else if (ipversion == IPV6_VERSION && 1896 !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) { 1897 paddr = &ire->ire_gateway_addr_v6; 1898 } else if (ipversion == IPV4_VERSION && 1899 ire->ire_gateway_addr != INADDR_ANY) { 1900 paddr = &ire->ire_gateway_addr; 1901 } 1902 1903 /* 1904 * Lookup the gateway template; note that we could get an internal 1905 * template here, which we cache anyway. During IRE matching, we'll 1906 * try to update this gateway template cache and hopefully get a 1907 * real one. 1908 */ 1909 if (paddr != NULL) { 1910 attrp->igsa_rhc = find_rhc(paddr, ipversion, B_FALSE); 1911 } 1912 1913 if (exists) 1914 mutex_exit(&attrp->igsa_lock); 1915 1916 return (0); 1917 } 1918 1919 /* 1920 * This function figures the type of MLP that we'll be using based on the 1921 * address that the user is binding and the zone. If the address is 1922 * unspecified, then we're looking at both private and shared. If it's one 1923 * of the zone's private addresses, then it's private only. If it's one 1924 * of the global addresses, then it's shared only. 1925 * 1926 * If we can't figure out what it is, then return mlptSingle. That's actually 1927 * an error case. 1928 * 1929 * The callers are assume to pass in zone->zone_id and not the zoneid that 1930 * is stored in a conn_t (since the latter will be GLOBAL_ZONEID in an 1931 * exclusive stack zone). 1932 */ 1933 mlp_type_t 1934 tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr, 1935 ip_stack_t *ipst) 1936 { 1937 in_addr_t in4; 1938 ire_t *ire; 1939 ipif_t *ipif; 1940 zoneid_t addrzone; 1941 zoneid_t ip_zoneid; 1942 1943 ASSERT(addr != NULL); 1944 1945 /* 1946 * For exclusive stacks we set the zoneid to zero 1947 * to operate as if in the global zone for IRE and conn_t comparisons. 1948 */ 1949 if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 1950 ip_zoneid = GLOBAL_ZONEID; 1951 else 1952 ip_zoneid = zoneid; 1953 1954 if (version == IPV6_VERSION && 1955 IN6_IS_ADDR_V4MAPPED((const in6_addr_t *)addr)) { 1956 IN6_V4MAPPED_TO_IPADDR((const in6_addr_t *)addr, in4); 1957 addr = &in4; 1958 version = IPV4_VERSION; 1959 } 1960 1961 if (version == IPV4_VERSION) { 1962 in4 = *(const in_addr_t *)addr; 1963 if (in4 == INADDR_ANY) { 1964 return (mlptBoth); 1965 } 1966 ire = ire_cache_lookup(in4, ip_zoneid, NULL, ipst); 1967 } else { 1968 if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr)) { 1969 return (mlptBoth); 1970 } 1971 ire = ire_cache_lookup_v6(addr, ip_zoneid, NULL, ipst); 1972 } 1973 /* 1974 * If we can't find the IRE, then we have to behave exactly like 1975 * ip_bind_laddr{,_v6}. That means looking up the IPIF so that users 1976 * can bind to addresses on "down" interfaces. 1977 * 1978 * If we can't find that either, then the bind is going to fail, so 1979 * just give up. Note that there's a miniscule chance that the address 1980 * is in transition, but we don't bother handling that. 1981 */ 1982 if (ire == NULL) { 1983 if (version == IPV4_VERSION) 1984 ipif = ipif_lookup_addr(*(const in_addr_t *)addr, NULL, 1985 ip_zoneid, NULL, NULL, NULL, NULL, ipst); 1986 else 1987 ipif = ipif_lookup_addr_v6((const in6_addr_t *)addr, 1988 NULL, ip_zoneid, NULL, NULL, NULL, NULL, ipst); 1989 if (ipif == NULL) { 1990 return (mlptSingle); 1991 } 1992 addrzone = ipif->ipif_zoneid; 1993 ipif_refrele(ipif); 1994 } else { 1995 addrzone = ire->ire_zoneid; 1996 ire_refrele(ire); 1997 } 1998 return (addrzone == ALL_ZONES ? mlptShared : mlptPrivate); 1999 } 2000 2001 /* 2002 * Since we are configuring local interfaces, and we know trusted 2003 * extension CDE requires local interfaces to be cipso host type in 2004 * order to function correctly, we'll associate a cipso template 2005 * to each local interface and let the interface come up. Configuring 2006 * a local interface to be "unlabeled" host type is a configuration error. 2007 * We'll override that error and make the interface host type to be cipso 2008 * here. 2009 * 2010 * The code is optimized for the usual "success" case and unwinds things on 2011 * error. We don't want to go to the trouble and expense of formatting the 2012 * interface name for the usual case where everything is configured correctly. 2013 */ 2014 boolean_t 2015 tsol_check_interface_address(const ipif_t *ipif) 2016 { 2017 tsol_tpc_t *tp; 2018 char addrbuf[INET6_ADDRSTRLEN]; 2019 int af; 2020 const void *addr; 2021 zone_t *zone; 2022 ts_label_t *plabel; 2023 const bslabel_t *label; 2024 char ifbuf[LIFNAMSIZ + 10]; 2025 const char *ifname; 2026 boolean_t retval; 2027 tsol_rhent_t rhent; 2028 netstack_t *ns = ipif->ipif_ill->ill_ipst->ips_netstack; 2029 2030 if (IN6_IS_ADDR_V4MAPPED(&ipif->ipif_v6lcl_addr)) { 2031 af = AF_INET; 2032 addr = &V4_PART_OF_V6(ipif->ipif_v6lcl_addr); 2033 } else { 2034 af = AF_INET6; 2035 addr = &ipif->ipif_v6lcl_addr; 2036 } 2037 2038 tp = find_tpc(&ipif->ipif_v6lcl_addr, IPV6_VERSION, B_FALSE); 2039 2040 /* assumes that ALL_ZONES implies that there is no exclusive stack */ 2041 if (ipif->ipif_zoneid == ALL_ZONES) { 2042 zone = NULL; 2043 } else if (ns->netstack_stackid == GLOBAL_NETSTACKID) { 2044 /* Shared stack case */ 2045 zone = zone_find_by_id(ipif->ipif_zoneid); 2046 } else { 2047 /* Exclusive stack case */ 2048 zone = zone_find_by_id(crgetzoneid(ipif->ipif_ill->ill_credp)); 2049 } 2050 if (zone != NULL) { 2051 plabel = zone->zone_slabel; 2052 ASSERT(plabel != NULL); 2053 label = label2bslabel(plabel); 2054 } 2055 2056 /* 2057 * If it's CIPSO and an all-zones address, then we're done. 2058 * If it's a CIPSO zone specific address, the zone's label 2059 * must be in the range or set specified in the template. 2060 * When the remote host entry is missing or the template 2061 * type is incorrect for this interface, we create a 2062 * CIPSO host entry in kernel and allow the interface to be 2063 * brought up as CIPSO type. 2064 */ 2065 if (tp != NULL && ( 2066 /* The all-zones case */ 2067 (tp->tpc_tp.host_type == SUN_CIPSO && 2068 tp->tpc_tp.tp_doi == default_doi && 2069 ipif->ipif_zoneid == ALL_ZONES) || 2070 /* The local-zone case */ 2071 (zone != NULL && plabel->tsl_doi == tp->tpc_tp.tp_doi && 2072 ((tp->tpc_tp.host_type == SUN_CIPSO && 2073 (_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) || 2074 blinlset(label, tp->tpc_tp.tp_sl_set_cipso))))))) { 2075 if (zone != NULL) 2076 zone_rele(zone); 2077 TPC_RELE(tp); 2078 return (B_TRUE); 2079 } 2080 2081 ifname = ipif->ipif_ill->ill_name; 2082 if (ipif->ipif_id != 0) { 2083 (void) snprintf(ifbuf, sizeof (ifbuf), "%s:%u", ifname, 2084 ipif->ipif_id); 2085 ifname = ifbuf; 2086 } 2087 (void) inet_ntop(af, addr, addrbuf, sizeof (addrbuf)); 2088 2089 if (tp == NULL) { 2090 cmn_err(CE_NOTE, "template entry for %s missing. Default to " 2091 "CIPSO type for %s", ifname, addrbuf); 2092 retval = B_TRUE; 2093 } else if (tp->tpc_tp.host_type == UNLABELED) { 2094 cmn_err(CE_NOTE, "template type for %s incorrectly configured. " 2095 "Change to CIPSO type for %s", ifname, addrbuf); 2096 retval = B_TRUE; 2097 } else if (ipif->ipif_zoneid == ALL_ZONES) { 2098 if (tp->tpc_tp.host_type != SUN_CIPSO) { 2099 cmn_err(CE_NOTE, "%s failed: %s isn't set to CIPSO for " 2100 "all-zones. Converted to CIPSO.", ifname, addrbuf); 2101 retval = B_TRUE; 2102 } else { 2103 cmn_err(CE_NOTE, "%s failed: %s has wrong DOI %d " 2104 "instead of %d", ifname, addrbuf, 2105 tp->tpc_tp.tp_doi, default_doi); 2106 retval = B_FALSE; 2107 } 2108 } else if (zone == NULL) { 2109 cmn_err(CE_NOTE, "%s failed: zoneid %d unknown", 2110 ifname, ipif->ipif_zoneid); 2111 retval = B_FALSE; 2112 } else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) { 2113 cmn_err(CE_NOTE, "%s failed: zone %s has DOI %d but %s has " 2114 "DOI %d", ifname, zone->zone_name, plabel->tsl_doi, 2115 addrbuf, tp->tpc_tp.tp_doi); 2116 retval = B_FALSE; 2117 } else { 2118 cmn_err(CE_NOTE, "%s failed: zone %s label incompatible with " 2119 "%s", ifname, zone->zone_name, addrbuf); 2120 tsol_print_label(label, "zone label"); 2121 retval = B_FALSE; 2122 } 2123 2124 if (zone != NULL) 2125 zone_rele(zone); 2126 if (tp != NULL) 2127 TPC_RELE(tp); 2128 if (retval) { 2129 /* 2130 * we've corrected a config error and let the interface 2131 * come up as cipso. Need to insert an rhent. 2132 */ 2133 if ((rhent.rh_address.ta_family = af) == AF_INET) { 2134 rhent.rh_prefix = 32; 2135 rhent.rh_address.ta_addr_v4 = *(struct in_addr *)addr; 2136 } else { 2137 rhent.rh_prefix = 128; 2138 rhent.rh_address.ta_addr_v6 = *(in6_addr_t *)addr; 2139 } 2140 (void) strcpy(rhent.rh_template, "cipso"); 2141 if (tnrh_load(&rhent) != 0) { 2142 cmn_err(CE_NOTE, "%s failed: Cannot insert CIPSO " 2143 "template for local addr %s", ifname, addrbuf); 2144 retval = B_FALSE; 2145 } 2146 } 2147 return (retval); 2148 } 2149