1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/strsubr.h> 29 #include <sys/stropts.h> 30 #include <sys/sunddi.h> 31 #include <sys/cred.h> 32 #include <sys/debug.h> 33 #include <sys/kmem.h> 34 #include <sys/errno.h> 35 #include <sys/disp.h> 36 #include <netinet/in.h> 37 #include <netinet/in_systm.h> 38 #include <netinet/ip.h> 39 #include <netinet/ip_icmp.h> 40 #include <netinet/tcp.h> 41 #include <inet/common.h> 42 #include <inet/ipclassifier.h> 43 #include <inet/ip.h> 44 #include <inet/mib2.h> 45 #include <inet/nd.h> 46 #include <inet/tcp.h> 47 #include <inet/ip_rts.h> 48 #include <inet/ip_ire.h> 49 #include <inet/ip_if.h> 50 #include <sys/modhash.h> 51 52 #include <sys/tsol/label.h> 53 #include <sys/tsol/label_macro.h> 54 #include <sys/tsol/tnet.h> 55 #include <sys/tsol/tndb.h> 56 #include <sys/strsun.h> 57 58 /* tunable for strict error-reply behavior (TCP RST and ICMP Unreachable) */ 59 int tsol_strict_error; 60 61 /* 62 * Some notes on the Trusted Solaris IRE gateway security attributes: 63 * 64 * When running in Trusted mode, the routing subsystem determines whether or 65 * not a packet can be delivered to an off-link host (not directly reachable 66 * through an interface) based on the accreditation checks of the packet's 67 * security attributes against those associated with the next-hop gateway. 68 * 69 * The next-hop gateway's security attributes can be derived from two sources 70 * (in order of preference): route-related and the host database. A Trusted 71 * system must be configured with at least the host database containing an 72 * entry for the next-hop gateway, or otherwise no accreditation checks can 73 * be performed, which may result in the inability to send packets to any 74 * off-link destination host. 75 * 76 * The major differences between the two sources are the number and type of 77 * security attributes used for accreditation checks. A host database entry 78 * can contain at most one set of security attributes, specific only to the 79 * next-hop gateway. On contrast, route-related security attributes are made 80 * up of a collection of security attributes for the distant networks, and 81 * are grouped together per next-hop gateway used to reach those networks. 82 * This is the preferred method, and the routing subsystem will fallback to 83 * the host database entry only if there are no route-related attributes 84 * associated with the next-hop gateway. 85 * 86 * In Trusted mode, all of the IRE entries (except LOCAL/LOOPBACK/BROADCAST/ 87 * INTERFACE type) are initialized to contain a placeholder to store this 88 * information. The ire_gw_secattr structure gets allocated, initialized 89 * and associated with the IRE during the time of the IRE creation. The 90 * initialization process also includes resolving the host database entry 91 * of the next-hop gateway for fallback purposes. It does not include any 92 * route-related attribute setup, as that process comes separately as part 93 * of the route requests (add/change) made to the routing subsystem. 94 * 95 * The underlying logic which involves associating IREs with the gateway 96 * security attributes are represented by the following data structures: 97 * 98 * tsol_gcdb_t, or "gcdb" 99 * 100 * - This is a system-wide collection of records containing the 101 * currently used route-related security attributes, which are fed 102 * through the routing socket interface, e.g. "route add/change". 103 * 104 * tsol_gc_t, or "gc" 105 * 106 * - This is the gateway credential structure, and it provides for the 107 * only mechanism to access the contents of gcdb. More than one gc 108 * entries may refer to the same gcdb record. gc's in the system are 109 * grouped according to the next-hop gateway address. 110 * 111 * tsol_gcgrp_t, or "gcgrp" 112 * 113 * - Group of gateway credentials, and is unique per next-hop gateway 114 * address. When the group is not empty, i.e. when gcgrp_count is 115 * greater than zero, it contains one or more gc's, each pointing to 116 * a gcdb record which indicates the gateway security attributes 117 * associated with the next-hop gateway. 118 * 119 * The fields of the tsol_ire_gw_secattr_t used from within the IRE are: 120 * 121 * igsa_lock 122 * 123 * - Lock that protects all fields within tsol_ire_gw_secattr_t. 124 * 125 * igsa_rhc 126 * 127 * - Remote host cache database entry of next-hop gateway. This is 128 * used in the case when there are no route-related attributes 129 * configured for the IRE. 130 * 131 * igsa_gc 132 * 133 * - A set of route-related attributes that only get set for prefix 134 * IREs. If this is non-NULL, the prefix IRE has been associated 135 * with a set of gateway security attributes by way of route add/ 136 * change functionality. 137 */ 138 139 static kmem_cache_t *ire_gw_secattr_cache; 140 141 #define GCDB_HASH_SIZE 101 142 #define GCGRP_HASH_SIZE 101 143 144 #define GCDB_REFRELE(p) { \ 145 mutex_enter(&gcdb_lock); \ 146 ASSERT((p)->gcdb_refcnt > 0); \ 147 if (--((p)->gcdb_refcnt) == 0) \ 148 gcdb_inactive(p); \ 149 ASSERT(MUTEX_HELD(&gcdb_lock)); \ 150 mutex_exit(&gcdb_lock); \ 151 } 152 153 static int gcdb_hash_size = GCDB_HASH_SIZE; 154 static int gcgrp_hash_size = GCGRP_HASH_SIZE; 155 static mod_hash_t *gcdb_hash; 156 static mod_hash_t *gcgrp4_hash; 157 static mod_hash_t *gcgrp6_hash; 158 159 static kmutex_t gcdb_lock; 160 kmutex_t gcgrp_lock; 161 162 static uint_t gcdb_hash_by_secattr(void *, mod_hash_key_t); 163 static int gcdb_hash_cmp(mod_hash_key_t, mod_hash_key_t); 164 static tsol_gcdb_t *gcdb_lookup(struct rtsa_s *, boolean_t); 165 static void gcdb_inactive(tsol_gcdb_t *); 166 167 static uint_t gcgrp_hash_by_addr(void *, mod_hash_key_t); 168 static int gcgrp_hash_cmp(mod_hash_key_t, mod_hash_key_t); 169 170 static int ire_gw_secattr_constructor(void *, void *, int); 171 static void ire_gw_secattr_destructor(void *, void *); 172 173 void 174 tnet_init(void) 175 { 176 ire_gw_secattr_cache = kmem_cache_create("ire_gw_secattr_cache", 177 sizeof (tsol_ire_gw_secattr_t), 64, ire_gw_secattr_constructor, 178 ire_gw_secattr_destructor, NULL, NULL, NULL, 0); 179 180 gcdb_hash = mod_hash_create_extended("gcdb_hash", 181 gcdb_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, 182 gcdb_hash_by_secattr, NULL, gcdb_hash_cmp, KM_SLEEP); 183 184 gcgrp4_hash = mod_hash_create_extended("gcgrp4_hash", 185 gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, 186 gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP); 187 188 gcgrp6_hash = mod_hash_create_extended("gcgrp6_hash", 189 gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, 190 gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP); 191 192 mutex_init(&gcdb_lock, NULL, MUTEX_DEFAULT, NULL); 193 mutex_init(&gcgrp_lock, NULL, MUTEX_DEFAULT, NULL); 194 } 195 196 void 197 tnet_fini(void) 198 { 199 kmem_cache_destroy(ire_gw_secattr_cache); 200 mod_hash_destroy_hash(gcdb_hash); 201 mod_hash_destroy_hash(gcgrp4_hash); 202 mod_hash_destroy_hash(gcgrp6_hash); 203 mutex_destroy(&gcdb_lock); 204 mutex_destroy(&gcgrp_lock); 205 } 206 207 /* ARGSUSED */ 208 static int 209 ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags) 210 { 211 tsol_ire_gw_secattr_t *attrp = buf; 212 213 mutex_init(&attrp->igsa_lock, NULL, MUTEX_DEFAULT, NULL); 214 215 attrp->igsa_rhc = NULL; 216 attrp->igsa_gc = NULL; 217 218 return (0); 219 } 220 221 /* ARGSUSED */ 222 static void 223 ire_gw_secattr_destructor(void *buf, void *cdrarg) 224 { 225 tsol_ire_gw_secattr_t *attrp = (tsol_ire_gw_secattr_t *)buf; 226 227 mutex_destroy(&attrp->igsa_lock); 228 } 229 230 tsol_ire_gw_secattr_t * 231 ire_gw_secattr_alloc(int kmflags) 232 { 233 return (kmem_cache_alloc(ire_gw_secattr_cache, kmflags)); 234 } 235 236 void 237 ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp) 238 { 239 ASSERT(MUTEX_NOT_HELD(&attrp->igsa_lock)); 240 241 if (attrp->igsa_rhc != NULL) { 242 TNRHC_RELE(attrp->igsa_rhc); 243 attrp->igsa_rhc = NULL; 244 } 245 246 if (attrp->igsa_gc != NULL) { 247 GC_REFRELE(attrp->igsa_gc); 248 attrp->igsa_gc = NULL; 249 } 250 251 ASSERT(attrp->igsa_rhc == NULL); 252 ASSERT(attrp->igsa_gc == NULL); 253 254 kmem_cache_free(ire_gw_secattr_cache, attrp); 255 } 256 257 /* ARGSUSED */ 258 static uint_t 259 gcdb_hash_by_secattr(void *hash_data, mod_hash_key_t key) 260 { 261 const struct rtsa_s *rp = (struct rtsa_s *)key; 262 const uint32_t *up, *ue; 263 uint_t hash; 264 int i; 265 266 ASSERT(rp != NULL); 267 268 /* See comments in hash_bylabel in zone.c for details */ 269 hash = rp->rtsa_doi + (rp->rtsa_doi << 1); 270 up = (const uint32_t *)&rp->rtsa_slrange; 271 ue = up + sizeof (rp->rtsa_slrange) / sizeof (*up); 272 i = 1; 273 while (up < ue) { 274 /* using 2^n + 1, 1 <= n <= 16 as source of many primes */ 275 hash += *up + (*up << ((i % 16) + 1)); 276 up++; 277 i++; 278 } 279 return (hash); 280 } 281 282 static int 283 gcdb_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 284 { 285 struct rtsa_s *rp1 = (struct rtsa_s *)key1; 286 struct rtsa_s *rp2 = (struct rtsa_s *)key2; 287 288 ASSERT(rp1 != NULL && rp2 != NULL); 289 290 if (blequal(&rp1->rtsa_slrange.lower_bound, 291 &rp2->rtsa_slrange.lower_bound) && 292 blequal(&rp1->rtsa_slrange.upper_bound, 293 &rp2->rtsa_slrange.upper_bound) && 294 rp1->rtsa_doi == rp2->rtsa_doi) 295 return (0); 296 297 /* No match; not found */ 298 return (-1); 299 } 300 301 /* ARGSUSED */ 302 static uint_t 303 gcgrp_hash_by_addr(void *hash_data, mod_hash_key_t key) 304 { 305 tsol_gcgrp_addr_t *ga = (tsol_gcgrp_addr_t *)key; 306 uint_t idx = 0; 307 uint32_t *ap; 308 309 ASSERT(ga != NULL); 310 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6); 311 312 ap = (uint32_t *)&ga->ga_addr.s6_addr32[0]; 313 idx ^= *ap++; 314 idx ^= *ap++; 315 idx ^= *ap++; 316 idx ^= *ap; 317 318 return (idx); 319 } 320 321 static int 322 gcgrp_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 323 { 324 tsol_gcgrp_addr_t *ga1 = (tsol_gcgrp_addr_t *)key1; 325 tsol_gcgrp_addr_t *ga2 = (tsol_gcgrp_addr_t *)key2; 326 327 ASSERT(ga1 != NULL && ga2 != NULL); 328 329 /* Address family must match */ 330 if (ga1->ga_af != ga2->ga_af) 331 return (-1); 332 333 if (ga1->ga_addr.s6_addr32[0] == ga2->ga_addr.s6_addr32[0] && 334 ga1->ga_addr.s6_addr32[1] == ga2->ga_addr.s6_addr32[1] && 335 ga1->ga_addr.s6_addr32[2] == ga2->ga_addr.s6_addr32[2] && 336 ga1->ga_addr.s6_addr32[3] == ga2->ga_addr.s6_addr32[3]) 337 return (0); 338 339 /* No match; not found */ 340 return (-1); 341 } 342 343 #define RTSAFLAGS "\20\11cipso\3doi\2max_sl\1min_sl" 344 345 int 346 rtsa_validate(const struct rtsa_s *rp) 347 { 348 uint32_t mask = rp->rtsa_mask; 349 350 /* RTSA_CIPSO must be set, and DOI must not be zero */ 351 if ((mask & RTSA_CIPSO) == 0 || rp->rtsa_doi == 0) { 352 DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *, 353 "rtsa(1) lacks flag or has 0 doi.", 354 rtsa_s *, rp); 355 return (EINVAL); 356 } 357 /* 358 * SL range must be specified, and it must have its 359 * upper bound dominating its lower bound. 360 */ 361 if ((mask & RTSA_SLRANGE) != RTSA_SLRANGE || 362 !bldominates(&rp->rtsa_slrange.upper_bound, 363 &rp->rtsa_slrange.lower_bound)) { 364 DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *, 365 "rtsa(1) min_sl and max_sl not set or max_sl is " 366 "not dominating.", rtsa_s *, rp); 367 return (EINVAL); 368 } 369 return (0); 370 } 371 372 /* 373 * A brief explanation of the reference counting scheme: 374 * 375 * Apart from dynamic references due to to reference holds done 376 * actively by threads, we have the following references: 377 * 378 * gcdb_refcnt: 379 * - Every tsol_gc_t pointing to a tsol_gcdb_t contributes a reference 380 * to the gcdb_refcnt. 381 * 382 * gc_refcnt: 383 * - A prefix IRE that points to an igsa_gc contributes a reference 384 * to the gc_refcnt. 385 * 386 * gcgrp_refcnt: 387 * - Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes 388 * a reference to the gcgrp_refcnt. 389 */ 390 static tsol_gcdb_t * 391 gcdb_lookup(struct rtsa_s *rp, boolean_t alloc) 392 { 393 tsol_gcdb_t *gcdb = NULL; 394 395 if (rtsa_validate(rp) != 0) 396 return (NULL); 397 398 mutex_enter(&gcdb_lock); 399 /* Find a copy in the cache; otherwise, create one and cache it */ 400 if (mod_hash_find(gcdb_hash, (mod_hash_key_t)rp, 401 (mod_hash_val_t *)&gcdb) == 0) { 402 gcdb->gcdb_refcnt++; 403 ASSERT(gcdb->gcdb_refcnt != 0); 404 405 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__lookup, char *, 406 "gcdb(1) is in gcdb_hash(global)", tsol_gcdb_t *, gcdb); 407 } else if (alloc) { 408 gcdb = kmem_zalloc(sizeof (*gcdb), KM_NOSLEEP); 409 if (gcdb != NULL) { 410 gcdb->gcdb_refcnt = 1; 411 gcdb->gcdb_mask = rp->rtsa_mask; 412 gcdb->gcdb_doi = rp->rtsa_doi; 413 gcdb->gcdb_slrange = rp->rtsa_slrange; 414 415 if (mod_hash_insert(gcdb_hash, 416 (mod_hash_key_t)&gcdb->gcdb_attr, 417 (mod_hash_val_t)gcdb) != 0) { 418 mutex_exit(&gcdb_lock); 419 kmem_free(gcdb, sizeof (*gcdb)); 420 return (NULL); 421 } 422 423 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__insert, char *, 424 "gcdb(1) inserted in gcdb_hash(global)", 425 tsol_gcdb_t *, gcdb); 426 } 427 } 428 mutex_exit(&gcdb_lock); 429 return (gcdb); 430 } 431 432 static void 433 gcdb_inactive(tsol_gcdb_t *gcdb) 434 { 435 ASSERT(MUTEX_HELD(&gcdb_lock)); 436 ASSERT(gcdb != NULL && gcdb->gcdb_refcnt == 0); 437 438 (void) mod_hash_remove(gcdb_hash, (mod_hash_key_t)&gcdb->gcdb_attr, 439 (mod_hash_val_t *)&gcdb); 440 441 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__remove, char *, 442 "gcdb(1) removed from gcdb_hash(global)", 443 tsol_gcdb_t *, gcdb); 444 kmem_free(gcdb, sizeof (*gcdb)); 445 } 446 447 tsol_gc_t * 448 gc_create(struct rtsa_s *rp, tsol_gcgrp_t *gcgrp, boolean_t *gcgrp_xtrarefp) 449 { 450 tsol_gc_t *gc; 451 tsol_gcdb_t *gcdb; 452 453 *gcgrp_xtrarefp = B_TRUE; 454 455 rw_enter(&gcgrp->gcgrp_rwlock, RW_WRITER); 456 if ((gcdb = gcdb_lookup(rp, B_TRUE)) == NULL) { 457 rw_exit(&gcgrp->gcgrp_rwlock); 458 return (NULL); 459 } 460 461 for (gc = gcgrp->gcgrp_head; gc != NULL; gc = gc->gc_next) { 462 if (gc->gc_db == gcdb) { 463 ASSERT(gc->gc_grp == gcgrp); 464 465 gc->gc_refcnt++; 466 ASSERT(gc->gc_refcnt != 0); 467 468 GCDB_REFRELE(gcdb); 469 470 DTRACE_PROBE3(tx__gcdb__log__info__gc__create, 471 char *, "found gc(1) in gcgrp(2)", 472 tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp); 473 rw_exit(&gcgrp->gcgrp_rwlock); 474 return (gc); 475 } 476 } 477 478 gc = kmem_zalloc(sizeof (*gc), KM_NOSLEEP); 479 if (gc != NULL) { 480 if (gcgrp->gcgrp_head == NULL) { 481 gcgrp->gcgrp_head = gcgrp->gcgrp_tail = gc; 482 } else { 483 gcgrp->gcgrp_tail->gc_next = gc; 484 gc->gc_prev = gcgrp->gcgrp_tail; 485 gcgrp->gcgrp_tail = gc; 486 } 487 gcgrp->gcgrp_count++; 488 ASSERT(gcgrp->gcgrp_count != 0); 489 490 /* caller has incremented gcgrp reference for us */ 491 gc->gc_grp = gcgrp; 492 493 gc->gc_db = gcdb; 494 gc->gc_refcnt = 1; 495 496 DTRACE_PROBE3(tx__gcdb__log__info__gc__create, char *, 497 "added gc(1) to gcgrp(2)", tsol_gc_t *, gc, 498 tsol_gcgrp_t *, gcgrp); 499 500 *gcgrp_xtrarefp = B_FALSE; 501 } 502 rw_exit(&gcgrp->gcgrp_rwlock); 503 504 return (gc); 505 } 506 507 void 508 gc_inactive(tsol_gc_t *gc) 509 { 510 tsol_gcgrp_t *gcgrp = gc->gc_grp; 511 512 ASSERT(gcgrp != NULL); 513 ASSERT(RW_WRITE_HELD(&gcgrp->gcgrp_rwlock)); 514 ASSERT(gc->gc_refcnt == 0); 515 516 if (gc->gc_prev != NULL) 517 gc->gc_prev->gc_next = gc->gc_next; 518 else 519 gcgrp->gcgrp_head = gc->gc_next; 520 if (gc->gc_next != NULL) 521 gc->gc_next->gc_prev = gc->gc_prev; 522 else 523 gcgrp->gcgrp_tail = gc->gc_prev; 524 ASSERT(gcgrp->gcgrp_count > 0); 525 gcgrp->gcgrp_count--; 526 527 /* drop lock before it's destroyed */ 528 rw_exit(&gcgrp->gcgrp_rwlock); 529 530 DTRACE_PROBE3(tx__gcdb__log__info__gc__remove, char *, 531 "removed inactive gc(1) from gcgrp(2)", 532 tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp); 533 534 GCGRP_REFRELE(gcgrp); 535 536 gc->gc_grp = NULL; 537 gc->gc_prev = gc->gc_next = NULL; 538 539 if (gc->gc_db != NULL) 540 GCDB_REFRELE(gc->gc_db); 541 542 kmem_free(gc, sizeof (*gc)); 543 } 544 545 tsol_gcgrp_t * 546 gcgrp_lookup(tsol_gcgrp_addr_t *ga, boolean_t alloc) 547 { 548 tsol_gcgrp_t *gcgrp = NULL; 549 mod_hash_t *hashp; 550 551 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6); 552 553 hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash; 554 555 mutex_enter(&gcgrp_lock); 556 if (mod_hash_find(hashp, (mod_hash_key_t)ga, 557 (mod_hash_val_t *)&gcgrp) == 0) { 558 gcgrp->gcgrp_refcnt++; 559 ASSERT(gcgrp->gcgrp_refcnt != 0); 560 561 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__lookup, char *, 562 "found gcgrp(1) in hash(2)", tsol_gcgrp_t *, gcgrp, 563 mod_hash_t *, hashp); 564 565 } else if (alloc) { 566 gcgrp = kmem_zalloc(sizeof (*gcgrp), KM_NOSLEEP); 567 if (gcgrp != NULL) { 568 gcgrp->gcgrp_refcnt = 1; 569 rw_init(&gcgrp->gcgrp_rwlock, NULL, RW_DEFAULT, NULL); 570 bcopy(ga, &gcgrp->gcgrp_addr, sizeof (*ga)); 571 572 if (mod_hash_insert(hashp, 573 (mod_hash_key_t)&gcgrp->gcgrp_addr, 574 (mod_hash_val_t)gcgrp) != 0) { 575 mutex_exit(&gcgrp_lock); 576 kmem_free(gcgrp, sizeof (*gcgrp)); 577 return (NULL); 578 } 579 580 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__insert, 581 char *, "inserted gcgrp(1) in hash(2)", 582 tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp); 583 } 584 } 585 mutex_exit(&gcgrp_lock); 586 return (gcgrp); 587 } 588 589 void 590 gcgrp_inactive(tsol_gcgrp_t *gcgrp) 591 { 592 tsol_gcgrp_addr_t *ga; 593 mod_hash_t *hashp; 594 595 ASSERT(MUTEX_HELD(&gcgrp_lock)); 596 ASSERT(gcgrp != NULL && gcgrp->gcgrp_refcnt == 0); 597 ASSERT(gcgrp->gcgrp_head == NULL && gcgrp->gcgrp_count == 0); 598 599 ga = &gcgrp->gcgrp_addr; 600 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6); 601 602 hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash; 603 (void) mod_hash_remove(hashp, (mod_hash_key_t)ga, 604 (mod_hash_val_t *)&gcgrp); 605 rw_destroy(&gcgrp->gcgrp_rwlock); 606 607 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__remove, char *, 608 "removed inactive gcgrp(1) from hash(2)", 609 tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp); 610 611 kmem_free(gcgrp, sizeof (*gcgrp)); 612 } 613 614 615 /* 616 * Assign a sensitivity label to inbound traffic which arrived without 617 * an explicit on-the-wire label. 618 * 619 * In the case of CIPSO-type hosts, we assume packets arriving without 620 * a label are at the most sensitive label known for the host, most 621 * likely involving out-of-band key management traffic (such as IKE, 622 * etc.,) 623 */ 624 static boolean_t 625 tsol_find_unlabeled_label(tsol_tpc_t *rhtp, bslabel_t *sl, uint32_t *doi) 626 { 627 *doi = rhtp->tpc_tp.tp_doi; 628 switch (rhtp->tpc_tp.host_type) { 629 case UNLABELED: 630 *sl = rhtp->tpc_tp.tp_def_label; 631 break; 632 case SUN_CIPSO: 633 *sl = rhtp->tpc_tp.tp_sl_range_cipso.upper_bound; 634 break; 635 default: 636 return (B_FALSE); 637 } 638 setbltype(sl, SUN_SL_ID); 639 return (B_TRUE); 640 } 641 642 /* 643 * Converts CIPSO option to sensitivity label. 644 * Validity checks based on restrictions defined in 645 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) (draft-ietf-cipso-ipsecurity) 646 */ 647 static boolean_t 648 cipso_to_sl(const uchar_t *option, bslabel_t *sl) 649 { 650 const struct cipso_option *co = (const struct cipso_option *)option; 651 const struct cipso_tag_type_1 *tt1; 652 653 tt1 = (struct cipso_tag_type_1 *)&co->cipso_tag_type[0]; 654 if (tt1->tag_type != 1 || 655 tt1->tag_length < TSOL_TT1_MIN_LENGTH || 656 tt1->tag_length > TSOL_TT1_MAX_LENGTH || 657 tt1->tag_length + TSOL_CIPSO_TAG_OFFSET > co->cipso_length) 658 return (B_FALSE); 659 660 bsllow(sl); /* assumed: sets compartments to all zeroes */ 661 LCLASS_SET((_bslabel_impl_t *)sl, tt1->tag_sl); 662 bcopy(tt1->tag_cat, &((_bslabel_impl_t *)sl)->compartments, 663 tt1->tag_length - TSOL_TT1_MIN_LENGTH); 664 return (B_TRUE); 665 } 666 667 /* 668 * If present, parse the CIPSO label in the incoming packet and 669 * construct a ts_label_t that reflects the CIPSO label and put it in 670 * the ip_recv_attr_t. Later as the packet flows up through the stack any 671 * code that needs to examine the packet label can inspect the label 672 * from the ira_tsl. This function is 673 * called right in ip_input for all packets, i.e. locally destined and 674 * to be forwarded packets. The forwarding path needs to examine the label 675 * to determine how to forward the packet. 676 * 677 * This routine pulls all message text up into the first mblk. 678 * For IPv4, only the first 20 bytes of the IP header are guaranteed 679 * to exist. For IPv6, only the IPv6 header is guaranteed to exist. 680 */ 681 boolean_t 682 tsol_get_pkt_label(mblk_t *mp, int version, ip_recv_attr_t *ira) 683 { 684 tsol_tpc_t *src_rhtp = NULL; 685 uchar_t *opt_ptr = NULL; 686 const ipha_t *ipha; 687 bslabel_t sl; 688 uint32_t doi; 689 tsol_ip_label_t label_type; 690 uint32_t label_flags = 0; /* flags to set in label */ 691 const cipso_option_t *co; 692 const void *src; 693 const ip6_t *ip6h; 694 cred_t *credp; 695 int proto; 696 697 ASSERT(DB_TYPE(mp) == M_DATA); 698 699 if (mp->b_cont != NULL && !pullupmsg(mp, -1)) 700 return (B_FALSE); 701 702 if (version == IPV4_VERSION) { 703 ASSERT(MBLKL(mp) >= IP_SIMPLE_HDR_LENGTH); 704 ipha = (const ipha_t *)mp->b_rptr; 705 src = &ipha->ipha_src; 706 if (!tsol_get_option_v4(mp, &label_type, &opt_ptr)) 707 return (B_FALSE); 708 } else { 709 ASSERT(MBLKL(mp) >= IPV6_HDR_LEN); 710 ip6h = (const ip6_t *)mp->b_rptr; 711 src = &ip6h->ip6_src; 712 if (!tsol_get_option_v6(mp, &label_type, &opt_ptr)) 713 return (B_FALSE); 714 } 715 716 switch (label_type) { 717 case OPT_CIPSO: 718 /* 719 * Convert the CIPSO label to the internal format 720 * and attach it to the dblk cred. 721 * Validity checks based on restrictions defined in 722 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) 723 * (draft-ietf-cipso-ipsecurity) 724 */ 725 if (version == IPV6_VERSION && ip6opt_ls == 0) 726 return (B_FALSE); 727 co = (const struct cipso_option *)opt_ptr; 728 if ((co->cipso_length < 729 TSOL_CIPSO_TAG_OFFSET + TSOL_TT1_MIN_LENGTH) || 730 (co->cipso_length > IP_MAX_OPT_LENGTH)) 731 return (B_FALSE); 732 bcopy(co->cipso_doi, &doi, sizeof (doi)); 733 doi = ntohl(doi); 734 if (!cipso_to_sl(opt_ptr, &sl)) 735 return (B_FALSE); 736 setbltype(&sl, SUN_SL_ID); 737 738 /* 739 * If the source was unlabeled, then flag as such, 740 * (since CIPSO routers may add headers) 741 */ 742 743 if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL) 744 return (B_FALSE); 745 746 if (src_rhtp->tpc_tp.host_type == UNLABELED) 747 label_flags = TSLF_UNLABELED; 748 749 TPC_RELE(src_rhtp); 750 751 break; 752 753 case OPT_NONE: 754 /* 755 * Handle special cases that may not be labeled, even 756 * though the sending system may otherwise be configured as 757 * labeled. 758 * - IGMP 759 * - IPv4 ICMP Router Discovery 760 * - IPv6 Neighbor Discovery 761 * - IPsec ESP 762 */ 763 if (version == IPV4_VERSION) { 764 proto = ipha->ipha_protocol; 765 if (proto == IPPROTO_IGMP) 766 return (B_TRUE); 767 if (proto == IPPROTO_ICMP) { 768 const struct icmp *icmp = (const struct icmp *) 769 (mp->b_rptr + IPH_HDR_LENGTH(ipha)); 770 771 if ((uchar_t *)icmp + ICMP_MINLEN > mp->b_wptr) 772 return (B_FALSE); 773 if (icmp->icmp_type == ICMP_ROUTERADVERT || 774 icmp->icmp_type == ICMP_ROUTERSOLICIT) 775 return (B_TRUE); 776 } 777 } else { 778 proto = ip6h->ip6_nxt; 779 if (proto == IPPROTO_ICMPV6) { 780 const icmp6_t *icmp6 = (const icmp6_t *) 781 (mp->b_rptr + IPV6_HDR_LEN); 782 783 if ((uchar_t *)icmp6 + ICMP6_MINLEN > 784 mp->b_wptr) 785 return (B_FALSE); 786 if (icmp6->icmp6_type >= MLD_LISTENER_QUERY && 787 icmp6->icmp6_type <= ICMP6_MAX_INFO_TYPE) 788 return (B_TRUE); 789 } 790 } 791 792 /* 793 * Look up the tnrhtp database and get the implicit label 794 * that is associated with the sending host and attach 795 * it to the packet. 796 */ 797 if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL) 798 return (B_FALSE); 799 800 /* 801 * If peer is label-aware, mark as "implicit" rather than 802 * "unlabeled" to cause appropriate mac-exempt processing 803 * to happen. 804 */ 805 if (src_rhtp->tpc_tp.host_type == SUN_CIPSO) 806 label_flags = TSLF_IMPLICIT_IN; 807 else if (src_rhtp->tpc_tp.host_type == UNLABELED) 808 label_flags = TSLF_UNLABELED; 809 else { 810 DTRACE_PROBE2(tx__get__pkt__label, char *, 811 "template(1) has unknown hosttype", 812 tsol_tpc_t *, src_rhtp); 813 } 814 815 816 if (!tsol_find_unlabeled_label(src_rhtp, &sl, &doi)) { 817 TPC_RELE(src_rhtp); 818 return (B_FALSE); 819 } 820 TPC_RELE(src_rhtp); 821 break; 822 823 default: 824 return (B_FALSE); 825 } 826 827 if (ira->ira_cred == NULL) { 828 credp = newcred_from_bslabel(&sl, doi, KM_NOSLEEP); 829 if (credp == NULL) 830 return (B_FALSE); 831 } else { 832 credp = copycred_from_bslabel(ira->ira_cred, &sl, doi, 833 KM_NOSLEEP); 834 if (credp == NULL) 835 return (B_FALSE); 836 if (ira->ira_free_flags & IRA_FREE_CRED) { 837 crfree(ira->ira_cred); 838 ira->ira_free_flags &= ~IRA_FREE_CRED; 839 ira->ira_cred = NULL; 840 } 841 } 842 843 /* 844 * Put the label in ira_tsl for convinience, while keeping 845 * the cred in ira_cred for getpeerucred which is used to get 846 * labels with TX. 847 * Note: no explicit refcnt/free_flag for ira_tsl. The free_flag 848 * for IRA_FREE_CRED is sufficient for both. 849 */ 850 ira->ira_tsl = crgetlabel(credp); 851 ira->ira_cred = credp; 852 ira->ira_free_flags |= IRA_FREE_CRED; 853 854 ira->ira_tsl->tsl_flags |= label_flags; 855 return (B_TRUE); 856 } 857 858 /* 859 * This routine determines whether the given packet should be accepted locally. 860 * It does a range/set check on the packet's label by looking up the given 861 * address in the remote host database. 862 */ 863 boolean_t 864 tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version, 865 ip_recv_attr_t *ira, const conn_t *connp) 866 { 867 const cred_t *credp; 868 ts_label_t *plabel, *conn_plabel; 869 tsol_tpc_t *tp; 870 boolean_t retv; 871 const bslabel_t *label, *conn_label; 872 boolean_t shared_addr = (ira->ira_flags & IRAF_TX_SHARED_ADDR); 873 874 /* 875 * tsol_get_pkt_label intentionally avoids the labeling process for: 876 * - IPv6 router and neighbor discovery as well as redirects. 877 * - MLD packets. (Anything between ICMPv6 code 130 and 138.) 878 * - IGMP packets. 879 * - IPv4 router discovery. 880 * In those cases ira_cred is NULL. 881 */ 882 credp = ira->ira_cred; 883 if (credp == NULL) 884 return (B_TRUE); 885 886 /* 887 * If this packet is from the inside (not a remote host) and has the 888 * same zoneid as the selected destination, then no checks are 889 * necessary. Membership in the zone is enough proof. This is 890 * intended to be a hot path through this function. 891 * Note: Using crgetzone here is ok since the peer is local. 892 */ 893 if (!crisremote(credp) && 894 crgetzone(credp) == crgetzone(connp->conn_cred)) 895 return (B_TRUE); 896 897 plabel = ira->ira_tsl; 898 conn_plabel = crgetlabel(connp->conn_cred); 899 ASSERT(plabel != NULL && conn_plabel != NULL); 900 901 label = label2bslabel(plabel); 902 conn_label = label2bslabel(conn_plabel); 903 904 905 /* 906 * Implicitly labeled packets from label-aware sources 907 * go only to privileged receivers 908 */ 909 if ((plabel->tsl_flags & TSLF_IMPLICIT_IN) && 910 (connp->conn_mac_mode != CONN_MAC_IMPLICIT)) { 911 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac_impl, 912 char *, 913 "implicitly labeled packet mp(1) for conn(2) " 914 "which isn't in implicit mac mode", 915 mblk_t *, mp, conn_t *, connp); 916 917 return (B_FALSE); 918 } 919 920 921 /* 922 * MLPs are always validated using the range and set of the local 923 * address, even when the remote host is unlabeled. 924 */ 925 if (connp->conn_mlp_type == mlptBoth || 926 /* LINTED: no consequent */ 927 connp->conn_mlp_type == (shared_addr ? mlptShared : mlptPrivate)) { 928 ; 929 930 /* 931 * If this is a packet from an unlabeled sender, then we must apply 932 * different rules. If the label is equal to the zone's label, then 933 * it's allowed. If it's not equal, but the zone is either the global 934 * zone or the label is dominated by the zone's label, then allow it 935 * as long as it's in the range configured for the destination. 936 */ 937 } else if (plabel->tsl_flags & TSLF_UNLABELED) { 938 if (plabel->tsl_doi == conn_plabel->tsl_doi && 939 blequal(label, conn_label)) 940 return (B_TRUE); 941 942 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) || 943 (!connp->conn_zone_is_global && 944 (plabel->tsl_doi != conn_plabel->tsl_doi || 945 !bldominates(conn_label, label)))) { 946 DTRACE_PROBE3( 947 tx__ip__log__drop__receivelocal__mac_unl, 948 char *, 949 "unlabeled packet mp(1) fails mac for conn(2)", 950 mblk_t *, mp, conn_t *, connp); 951 return (B_FALSE); 952 } 953 954 /* 955 * If this is a packet from a labeled sender, verify the 956 * label on the packet matches the connection label. 957 */ 958 } else { 959 if (plabel->tsl_doi != conn_plabel->tsl_doi || 960 !blequal(label, conn_label)) { 961 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac__slp, 962 char *, 963 "packet mp(1) failed label match to SLP conn(2)", 964 mblk_t *, mp, conn_t *, connp); 965 return (B_FALSE); 966 } 967 /* 968 * No further checks will be needed if this is a zone- 969 * specific address because (1) The process for bringing up 970 * the interface ensures the zone's label is within the zone- 971 * specific address's valid label range; (2) For cases where 972 * the conn is bound to the unspecified addresses, ip fanout 973 * logic ensures conn's zoneid equals the dest addr's zoneid; 974 * (3) Mac-exempt and mlp logic above already handle all 975 * cases where the zone label may not be the same as the 976 * conn label. 977 */ 978 if (!shared_addr) 979 return (B_TRUE); 980 } 981 982 tp = find_tpc(addr, version, B_FALSE); 983 if (tp == NULL) { 984 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__no__tnr, 985 char *, "dropping mp(1), host(2) lacks entry", 986 mblk_t *, mp, void *, addr); 987 return (B_FALSE); 988 } 989 990 /* 991 * The local host address should not be unlabeled at this point. The 992 * only way this can happen is that the destination isn't unicast. We 993 * assume that the packet should not have had a label, and thus should 994 * have been handled by the TSLF_UNLABELED logic above. 995 */ 996 if (tp->tpc_tp.host_type == UNLABELED) { 997 retv = B_FALSE; 998 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__flag, char *, 999 "mp(1) unlabeled source, but tp is not unlabeled.", 1000 mblk_t *, mp, tsol_tpc_t *, tp); 1001 1002 } else if (tp->tpc_tp.host_type != SUN_CIPSO) { 1003 retv = B_FALSE; 1004 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__tptype, char *, 1005 "delivering mp(1), found unrecognized tpc(2) type.", 1006 mblk_t *, mp, tsol_tpc_t *, tp); 1007 1008 } else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) { 1009 retv = B_FALSE; 1010 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *, 1011 "mp(1) could not be delievered to tp(2), doi mismatch", 1012 mblk_t *, mp, tsol_tpc_t *, tp); 1013 1014 } else if (!_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) && 1015 !blinlset(label, tp->tpc_tp.tp_sl_set_cipso)) { 1016 retv = B_FALSE; 1017 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *, 1018 "mp(1) could not be delievered to tp(2), bad mac", 1019 mblk_t *, mp, tsol_tpc_t *, tp); 1020 } else { 1021 retv = B_TRUE; 1022 } 1023 1024 TPC_RELE(tp); 1025 1026 return (retv); 1027 } 1028 1029 boolean_t 1030 tsol_can_accept_raw(mblk_t *mp, ip_recv_attr_t *ira, boolean_t check_host) 1031 { 1032 ts_label_t *plabel = NULL; 1033 tsol_tpc_t *src_rhtp, *dst_rhtp; 1034 boolean_t retv; 1035 1036 plabel = ira->ira_tsl; 1037 1038 /* We are bootstrapping or the internal template was never deleted */ 1039 if (plabel == NULL) 1040 return (B_TRUE); 1041 1042 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { 1043 ipha_t *ipha = (ipha_t *)mp->b_rptr; 1044 1045 src_rhtp = find_tpc(&ipha->ipha_src, IPV4_VERSION, 1046 B_FALSE); 1047 if (src_rhtp == NULL) 1048 return (B_FALSE); 1049 dst_rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, 1050 B_FALSE); 1051 } else { 1052 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1053 1054 src_rhtp = find_tpc(&ip6h->ip6_src, IPV6_VERSION, 1055 B_FALSE); 1056 if (src_rhtp == NULL) 1057 return (B_FALSE); 1058 dst_rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, 1059 B_FALSE); 1060 } 1061 if (dst_rhtp == NULL) { 1062 TPC_RELE(src_rhtp); 1063 return (B_FALSE); 1064 } 1065 1066 if (label2doi(plabel) != src_rhtp->tpc_tp.tp_doi) { 1067 retv = B_FALSE; 1068 1069 /* 1070 * Check that the packet's label is in the correct range for labeled 1071 * sender, or is equal to the default label for unlabeled sender. 1072 */ 1073 } else if ((src_rhtp->tpc_tp.host_type != UNLABELED && 1074 !_blinrange(label2bslabel(plabel), 1075 &src_rhtp->tpc_tp.tp_sl_range_cipso) && 1076 !blinlset(label2bslabel(plabel), 1077 src_rhtp->tpc_tp.tp_sl_set_cipso)) || 1078 (src_rhtp->tpc_tp.host_type == UNLABELED && 1079 !blequal(&plabel->tsl_label, &src_rhtp->tpc_tp.tp_def_label))) { 1080 retv = B_FALSE; 1081 1082 } else if (check_host) { 1083 retv = B_TRUE; 1084 1085 /* 1086 * Until we have SL range in the Zone structure, pass it 1087 * when our own address lookup returned an internal entry. 1088 */ 1089 } else switch (dst_rhtp->tpc_tp.host_type) { 1090 case UNLABELED: 1091 retv = B_TRUE; 1092 break; 1093 1094 case SUN_CIPSO: 1095 retv = _blinrange(label2bslabel(plabel), 1096 &dst_rhtp->tpc_tp.tp_sl_range_cipso) || 1097 blinlset(label2bslabel(plabel), 1098 dst_rhtp->tpc_tp.tp_sl_set_cipso); 1099 break; 1100 1101 default: 1102 retv = B_FALSE; 1103 } 1104 TPC_RELE(src_rhtp); 1105 TPC_RELE(dst_rhtp); 1106 return (retv); 1107 } 1108 1109 /* 1110 * This routine determines whether a response to a failed packet delivery or 1111 * connection should be sent back. By default, the policy is to allow such 1112 * messages to be sent at all times, as these messages reveal little useful 1113 * information and are healthy parts of TCP/IP networking. 1114 * 1115 * If tsol_strict_error is set, then we do strict tests: if the packet label is 1116 * within the label range/set of this host/zone, return B_TRUE; otherwise 1117 * return B_FALSE, which causes the packet to be dropped silently. 1118 * 1119 * Note that tsol_get_pkt_label will cause the packet to drop if the sender is 1120 * marked as labeled in the remote host database, but the packet lacks a label. 1121 * This means that we don't need to do a lookup on the source; the 1122 * TSLF_UNLABELED flag is sufficient. 1123 */ 1124 boolean_t 1125 tsol_can_reply_error(const mblk_t *mp, ip_recv_attr_t *ira) 1126 { 1127 ts_label_t *plabel = NULL; 1128 tsol_tpc_t *rhtp; 1129 const ipha_t *ipha; 1130 const ip6_t *ip6h; 1131 boolean_t retv; 1132 bslabel_t *pktbs; 1133 1134 /* Caller must pull up at least the IP header */ 1135 ASSERT(MBLKL(mp) >= (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ? 1136 sizeof (*ipha) : sizeof (*ip6h))); 1137 1138 if (!tsol_strict_error) 1139 return (B_TRUE); 1140 1141 plabel = ira->ira_tsl; 1142 1143 /* We are bootstrapping or the internal template was never deleted */ 1144 if (plabel == NULL) 1145 return (B_TRUE); 1146 1147 if (plabel->tsl_flags & TSLF_IMPLICIT_IN) { 1148 DTRACE_PROBE3(tx__ip__log__drop__replyerror__unresolved__label, 1149 char *, 1150 "cannot send error report for packet mp(1) with " 1151 "unresolved security label sl(2)", 1152 mblk_t *, mp, ts_label_t *, plabel); 1153 return (B_FALSE); 1154 } 1155 1156 1157 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { 1158 ipha = (const ipha_t *)mp->b_rptr; 1159 rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, B_FALSE); 1160 } else { 1161 ip6h = (const ip6_t *)mp->b_rptr; 1162 rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, B_FALSE); 1163 } 1164 1165 if (rhtp == NULL || label2doi(plabel) != rhtp->tpc_tp.tp_doi) { 1166 retv = B_FALSE; 1167 } else { 1168 /* 1169 * If we're in the midst of forwarding, then the destination 1170 * address might not be labeled. In that case, allow unlabeled 1171 * packets through only if the default label is the same, and 1172 * labeled ones if they dominate. 1173 */ 1174 pktbs = label2bslabel(plabel); 1175 switch (rhtp->tpc_tp.host_type) { 1176 case UNLABELED: 1177 if (plabel->tsl_flags & TSLF_UNLABELED) { 1178 retv = blequal(pktbs, 1179 &rhtp->tpc_tp.tp_def_label); 1180 } else { 1181 retv = bldominates(pktbs, 1182 &rhtp->tpc_tp.tp_def_label); 1183 } 1184 break; 1185 1186 case SUN_CIPSO: 1187 retv = _blinrange(pktbs, 1188 &rhtp->tpc_tp.tp_sl_range_cipso) || 1189 blinlset(pktbs, rhtp->tpc_tp.tp_sl_set_cipso); 1190 break; 1191 1192 default: 1193 retv = B_FALSE; 1194 break; 1195 } 1196 } 1197 1198 if (rhtp != NULL) 1199 TPC_RELE(rhtp); 1200 1201 return (retv); 1202 } 1203 1204 /* 1205 * Finds the zone associated with the receive attributes. Returns GLOBAL_ZONEID 1206 * if the zone cannot be located. 1207 * 1208 * This is used by the classifier when the packet matches an ALL_ZONES IRE, and 1209 * there's no MLP defined. 1210 * 1211 * Note that we assume that this is only invoked in the ALL_ZONES case. 1212 * Handling other cases would require handling exclusive IP zones where either 1213 * this routine or the callers would have to map from 1214 * the zoneid (zone->zone_id) to what IP uses in conn_zoneid etc. 1215 */ 1216 zoneid_t 1217 tsol_attr_to_zoneid(const ip_recv_attr_t *ira) 1218 { 1219 zone_t *zone; 1220 ts_label_t *label; 1221 1222 if ((label = ira->ira_tsl) != NULL) { 1223 zone = zone_find_by_label(label); 1224 if (zone != NULL) { 1225 zoneid_t zoneid = zone->zone_id; 1226 1227 zone_rele(zone); 1228 return (zoneid); 1229 } 1230 } 1231 return (GLOBAL_ZONEID); 1232 } 1233 1234 int 1235 tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl) 1236 { 1237 int error = 0; 1238 tsol_ire_gw_secattr_t *attrp = NULL; 1239 tsol_tnrhc_t *gw_rhc = NULL; 1240 tsol_gcgrp_t *gcgrp = NULL; 1241 tsol_gc_t *gc = NULL; 1242 in_addr_t ga_addr4; 1243 void *paddr = NULL; 1244 1245 /* Not in Trusted mode or IRE is local/loopback/broadcast/interface */ 1246 if (!is_system_labeled() || 1247 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST | 1248 IRE_IF_ALL | IRE_MULTICAST | IRE_NOROUTE))) 1249 goto done; 1250 1251 /* 1252 * If we don't have a label to compare with, or the IRE does not 1253 * contain any gateway security attributes, there's not much that 1254 * we can do. We let the former case pass, and the latter fail, 1255 * since the IRE doesn't qualify for a match due to the lack of 1256 * security attributes. 1257 */ 1258 if (tsl == NULL || ire->ire_gw_secattr == NULL) { 1259 if (tsl != NULL) { 1260 DTRACE_PROBE3( 1261 tx__ip__log__drop__irematch__nogwsec, char *, 1262 "ire(1) lacks ire_gw_secattr when matching " 1263 "label(2)", ire_t *, ire, ts_label_t *, tsl); 1264 error = EACCES; 1265 } 1266 goto done; 1267 } 1268 1269 attrp = ire->ire_gw_secattr; 1270 1271 /* 1272 * The possible lock order scenarios related to the tsol gateway 1273 * attribute locks are documented at the beginning of ip.c in the 1274 * lock order scenario section. 1275 */ 1276 mutex_enter(&attrp->igsa_lock); 1277 1278 /* 1279 * We seek the group 1280 * structure which contains all security credentials of the gateway. 1281 * An offline IRE is associated with at most one gateway credential. 1282 */ 1283 if ((gc = attrp->igsa_gc) != NULL) { 1284 gcgrp = gc->gc_grp; 1285 ASSERT(gcgrp != NULL); 1286 rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); 1287 GCGRP_REFHOLD(gcgrp); 1288 } 1289 1290 if ((gw_rhc = attrp->igsa_rhc) != NULL) { 1291 /* 1292 * If our cached entry has grown stale, then discard it so we 1293 * can get a new one. 1294 */ 1295 if (gw_rhc->rhc_invalid || gw_rhc->rhc_tpc->tpc_invalid) { 1296 TNRHC_RELE(gw_rhc); 1297 attrp->igsa_rhc = gw_rhc = NULL; 1298 } else { 1299 TNRHC_HOLD(gw_rhc) 1300 } 1301 } 1302 1303 /* Last attempt at loading the template had failed; try again */ 1304 if (gw_rhc == NULL) { 1305 if (gcgrp != NULL) { 1306 tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr; 1307 1308 if (ire->ire_ipversion == IPV4_VERSION) { 1309 ASSERT(ga->ga_af == AF_INET); 1310 IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4); 1311 paddr = &ga_addr4; 1312 } else { 1313 ASSERT(ga->ga_af == AF_INET6); 1314 paddr = &ga->ga_addr; 1315 } 1316 } else if (ire->ire_type & IRE_OFFLINK) { 1317 if (ire->ire_ipversion == IPV6_VERSION) 1318 paddr = &ire->ire_gateway_addr_v6; 1319 else if (ire->ire_ipversion == IPV4_VERSION) 1320 paddr = &ire->ire_gateway_addr; 1321 } 1322 1323 /* We've found a gateway address to do the template lookup */ 1324 if (paddr != NULL) { 1325 ASSERT(gw_rhc == NULL); 1326 gw_rhc = find_rhc(paddr, ire->ire_ipversion, B_FALSE); 1327 if (gw_rhc != NULL) { 1328 /* 1329 * Note that if the lookup above returned an 1330 * internal template, we'll use it for the 1331 * time being, and do another lookup next 1332 * time around. 1333 */ 1334 /* Another thread has loaded the template? */ 1335 if (attrp->igsa_rhc != NULL) { 1336 TNRHC_RELE(gw_rhc) 1337 /* reload, it could be different */ 1338 gw_rhc = attrp->igsa_rhc; 1339 } else { 1340 attrp->igsa_rhc = gw_rhc; 1341 } 1342 /* 1343 * Hold an extra reference just like we did 1344 * above prior to dropping the igsa_lock. 1345 */ 1346 TNRHC_HOLD(gw_rhc) 1347 } 1348 } 1349 } 1350 1351 mutex_exit(&attrp->igsa_lock); 1352 /* Gateway template not found */ 1353 if (gw_rhc == NULL) { 1354 /* 1355 * If destination address is directly reachable through an 1356 * interface rather than through a learned route, pass it. 1357 */ 1358 if (paddr != NULL) { 1359 DTRACE_PROBE3( 1360 tx__ip__log__drop__irematch__nogwtmpl, char *, 1361 "ire(1), label(2) off-link with no gw_rhc", 1362 ire_t *, ire, ts_label_t *, tsl); 1363 error = EINVAL; 1364 } 1365 goto done; 1366 } 1367 1368 if (gc != NULL) { 1369 1370 tsol_gcdb_t *gcdb; 1371 /* 1372 * In the case of IRE_CACHE we've got one or more gateway 1373 * security credentials to compare against the passed in label. 1374 * Perform label range comparison against each security 1375 * credential of the gateway. In the case of a prefix ire 1376 * we need to match against the security attributes of 1377 * just the route itself, so the loop is executed only once. 1378 */ 1379 ASSERT(gcgrp != NULL); 1380 gcdb = gc->gc_db; 1381 if (tsl->tsl_doi != gcdb->gcdb_doi || 1382 !_blinrange(&tsl->tsl_label, &gcdb->gcdb_slrange)) { 1383 DTRACE_PROBE3( 1384 tx__ip__log__drop__irematch__nogcmatched, 1385 char *, "ire(1), tsl(2): all gc failed match", 1386 ire_t *, ire, ts_label_t *, tsl); 1387 error = EACCES; 1388 } 1389 } else { 1390 /* 1391 * We didn't find any gateway credentials in the IRE 1392 * attributes; fall back to the gateway's template for 1393 * label range checks, if we are required to do so. 1394 */ 1395 ASSERT(gw_rhc != NULL); 1396 switch (gw_rhc->rhc_tpc->tpc_tp.host_type) { 1397 case SUN_CIPSO: 1398 if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi || 1399 (!_blinrange(&tsl->tsl_label, 1400 &gw_rhc->rhc_tpc->tpc_tp.tp_sl_range_cipso) && 1401 !blinlset(&tsl->tsl_label, 1402 gw_rhc->rhc_tpc->tpc_tp.tp_sl_set_cipso))) { 1403 error = EACCES; 1404 DTRACE_PROBE4( 1405 tx__ip__log__drop__irematch__deftmpl, 1406 char *, "ire(1), tsl(2), gw_rhc(3) " 1407 "failed match (cipso gw)", 1408 ire_t *, ire, ts_label_t *, tsl, 1409 tsol_tnrhc_t *, gw_rhc); 1410 } 1411 break; 1412 1413 case UNLABELED: 1414 if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi || 1415 (!_blinrange(&tsl->tsl_label, 1416 &gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_range) && 1417 !blinlset(&tsl->tsl_label, 1418 gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_set))) { 1419 error = EACCES; 1420 DTRACE_PROBE4( 1421 tx__ip__log__drop__irematch__deftmpl, 1422 char *, "ire(1), tsl(2), gw_rhc(3) " 1423 "failed match (unlabeled gw)", 1424 ire_t *, ire, ts_label_t *, tsl, 1425 tsol_tnrhc_t *, gw_rhc); 1426 } 1427 break; 1428 } 1429 } 1430 1431 done: 1432 1433 if (gcgrp != NULL) { 1434 rw_exit(&gcgrp->gcgrp_rwlock); 1435 GCGRP_REFRELE(gcgrp); 1436 } 1437 1438 if (gw_rhc != NULL) 1439 TNRHC_RELE(gw_rhc) 1440 1441 return (error); 1442 } 1443 1444 /* 1445 * Performs label accreditation checks for packet forwarding. 1446 * Add or remove a CIPSO option as needed. 1447 * 1448 * Returns a pointer to the modified mblk if allowed for forwarding, 1449 * or NULL if the packet must be dropped. 1450 */ 1451 mblk_t * 1452 tsol_ip_forward(ire_t *ire, mblk_t *mp, const ip_recv_attr_t *ira) 1453 { 1454 tsol_ire_gw_secattr_t *attrp = NULL; 1455 ipha_t *ipha; 1456 ip6_t *ip6h; 1457 const void *pdst; 1458 const void *psrc; 1459 boolean_t off_link; 1460 tsol_tpc_t *dst_rhtp, *gw_rhtp; 1461 tsol_ip_label_t label_type; 1462 uchar_t *opt_ptr = NULL; 1463 ts_label_t *tsl; 1464 uint8_t proto; 1465 int af, adjust; 1466 uint16_t iplen; 1467 boolean_t need_tpc_rele = B_FALSE; 1468 ipaddr_t *gw; 1469 ip_stack_t *ipst = ire->ire_ipst; 1470 int err; 1471 ts_label_t *effective_tsl = NULL; 1472 1473 ASSERT(ire != NULL && mp != NULL); 1474 /* 1475 * Note that the ire is the first one found, i.e., an IRE_OFFLINK if 1476 * the destination is offlink. 1477 */ 1478 1479 af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6; 1480 1481 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { 1482 ASSERT(ire->ire_ipversion == IPV4_VERSION); 1483 ipha = (ipha_t *)mp->b_rptr; 1484 psrc = &ipha->ipha_src; 1485 pdst = &ipha->ipha_dst; 1486 proto = ipha->ipha_protocol; 1487 if (!tsol_get_option_v4(mp, &label_type, &opt_ptr)) 1488 return (NULL); 1489 } else { 1490 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1491 ip6h = (ip6_t *)mp->b_rptr; 1492 psrc = &ip6h->ip6_src; 1493 pdst = &ip6h->ip6_dst; 1494 proto = ip6h->ip6_nxt; 1495 1496 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP && 1497 proto != IPPROTO_ICMPV6) { 1498 uint8_t *nexthdrp; 1499 uint16_t hdr_len; 1500 1501 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, 1502 &nexthdrp)) { 1503 /* malformed packet; drop it */ 1504 return (NULL); 1505 } 1506 proto = *nexthdrp; 1507 } 1508 if (!tsol_get_option_v6(mp, &label_type, &opt_ptr)) 1509 return (NULL); 1510 } 1511 /* 1512 * off_link is TRUE if destination not directly reachable. 1513 */ 1514 off_link = (ire->ire_type & IRE_OFFLINK); 1515 1516 if ((tsl = ira->ira_tsl) == NULL) 1517 return (mp); 1518 1519 if (tsl->tsl_flags & TSLF_IMPLICIT_IN) { 1520 DTRACE_PROBE3(tx__ip__log__drop__forward__unresolved__label, 1521 char *, 1522 "cannot forward packet mp(1) with unresolved " 1523 "security label sl(2)", 1524 mblk_t *, mp, ts_label_t *, tsl); 1525 1526 return (NULL); 1527 } 1528 1529 1530 ASSERT(psrc != NULL && pdst != NULL); 1531 dst_rhtp = find_tpc(pdst, ire->ire_ipversion, B_FALSE); 1532 1533 if (dst_rhtp == NULL) { 1534 /* 1535 * Without a template we do not know if forwarding 1536 * violates MAC 1537 */ 1538 DTRACE_PROBE3(tx__ip__log__drop__forward__nodst, char *, 1539 "mp(1) dropped, no template for destination ip4|6(2)", 1540 mblk_t *, mp, void *, pdst); 1541 return (NULL); 1542 } 1543 1544 /* 1545 * Gateway template must have existed for off-link destinations, 1546 * since tsol_ire_match_gwattr has ensured such condition. 1547 */ 1548 if (ire->ire_ipversion == IPV4_VERSION && off_link) { 1549 /* 1550 * Surya note: first check if we can get the gw_rhtp from 1551 * the ire_gw_secattr->igsa_rhc; if this is null, then 1552 * do a lookup based on the ire_addr (address of gw) 1553 */ 1554 if (ire->ire_gw_secattr != NULL && 1555 ire->ire_gw_secattr->igsa_rhc != NULL) { 1556 attrp = ire->ire_gw_secattr; 1557 gw_rhtp = attrp->igsa_rhc->rhc_tpc; 1558 } else { 1559 gw = &ire->ire_gateway_addr; 1560 gw_rhtp = find_tpc(gw, ire->ire_ipversion, B_FALSE); 1561 need_tpc_rele = B_TRUE; 1562 } 1563 if (gw_rhtp == NULL) { 1564 DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *, 1565 "mp(1) dropped, no gateway in ire attributes(2)", 1566 mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp); 1567 mp = NULL; 1568 goto keep_label; 1569 } 1570 } 1571 if (ire->ire_ipversion == IPV6_VERSION && 1572 ((attrp = ire->ire_gw_secattr) == NULL || attrp->igsa_rhc == NULL || 1573 (gw_rhtp = attrp->igsa_rhc->rhc_tpc) == NULL) && off_link) { 1574 DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *, 1575 "mp(1) dropped, no gateway in ire attributes(2)", 1576 mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp); 1577 mp = NULL; 1578 goto keep_label; 1579 } 1580 1581 /* 1582 * Check that the label for the packet is acceptable 1583 * by destination host; otherwise, drop it. 1584 */ 1585 switch (dst_rhtp->tpc_tp.host_type) { 1586 case SUN_CIPSO: 1587 if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi || 1588 (!_blinrange(&tsl->tsl_label, 1589 &dst_rhtp->tpc_tp.tp_sl_range_cipso) && 1590 !blinlset(&tsl->tsl_label, 1591 dst_rhtp->tpc_tp.tp_sl_set_cipso))) { 1592 DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *, 1593 "labeled packet mp(1) dropped, label(2) fails " 1594 "destination(3) accredation check", 1595 mblk_t *, mp, ts_label_t *, tsl, 1596 tsol_tpc_t *, dst_rhtp); 1597 mp = NULL; 1598 goto keep_label; 1599 } 1600 break; 1601 1602 1603 case UNLABELED: 1604 if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi || 1605 !blequal(&dst_rhtp->tpc_tp.tp_def_label, 1606 &tsl->tsl_label)) { 1607 DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *, 1608 "unlabeled packet mp(1) dropped, label(2) fails " 1609 "destination(3) accredation check", 1610 mblk_t *, mp, ts_label_t *, tsl, 1611 tsol_tpc_t *, dst_rhtp); 1612 mp = NULL; 1613 goto keep_label; 1614 } 1615 break; 1616 } 1617 if (label_type == OPT_CIPSO) { 1618 /* 1619 * We keep the label on any of the following cases: 1620 * 1621 * 1. The destination is labeled (on/off-link). 1622 * 2. The unlabeled destination is off-link, 1623 * and the next hop gateway is labeled. 1624 */ 1625 if (dst_rhtp->tpc_tp.host_type != UNLABELED || 1626 (off_link && 1627 gw_rhtp->tpc_tp.host_type != UNLABELED)) 1628 goto keep_label; 1629 1630 /* 1631 * Strip off the CIPSO option from the packet because: the 1632 * unlabeled destination host is directly reachable through 1633 * an interface (on-link); or, the unlabeled destination host 1634 * is not directly reachable (off-link), and the next hop 1635 * gateway is unlabeled. 1636 */ 1637 adjust = (af == AF_INET) ? tsol_remove_secopt(ipha, MBLKL(mp)) : 1638 tsol_remove_secopt_v6(ip6h, MBLKL(mp)); 1639 1640 ASSERT(adjust <= 0); 1641 if (adjust != 0) { 1642 1643 /* adjust is negative */ 1644 ASSERT((mp->b_wptr + adjust) >= mp->b_rptr); 1645 mp->b_wptr += adjust; 1646 /* 1647 * Note that caller adjusts ira_pktlen and 1648 * ira_ip_hdr_length 1649 * 1650 * For AF_INET6 note that tsol_remove_secopt_v6 1651 * adjusted ip6_plen. 1652 */ 1653 if (af == AF_INET) { 1654 ipha = (ipha_t *)mp->b_rptr; 1655 iplen = ntohs(ipha->ipha_length) + adjust; 1656 ipha->ipha_length = htons(iplen); 1657 ipha->ipha_hdr_checksum = 0; 1658 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1659 } 1660 DTRACE_PROBE3(tx__ip__log__info__forward__adjust, 1661 char *, 1662 "mp(1) adjusted(2) for CIPSO option removal", 1663 mblk_t *, mp, int, adjust); 1664 } 1665 goto keep_label; 1666 } 1667 1668 ASSERT(label_type == OPT_NONE); 1669 ASSERT(dst_rhtp != NULL); 1670 1671 /* 1672 * We need to add CIPSO option if the destination or the next hop 1673 * gateway is labeled. Otherwise, pass the packet as is. 1674 */ 1675 if (dst_rhtp->tpc_tp.host_type == UNLABELED && 1676 (!off_link || gw_rhtp->tpc_tp.host_type == UNLABELED)) 1677 goto keep_label; 1678 1679 /* 1680 * Since we are forwarding packets we use GLOBAL_ZONEID for 1681 * the IRE lookup in tsol_check_label. 1682 * Since mac_exempt is false the zoneid isn't used for anything 1683 * but the IRE lookup, hence we set zone_is_global to false. 1684 */ 1685 if (af == AF_INET) { 1686 err = tsol_check_label_v4(tsl, GLOBAL_ZONEID, &mp, 1687 CONN_MAC_DEFAULT, B_FALSE, ipst, &effective_tsl); 1688 } else { 1689 err = tsol_check_label_v6(tsl, GLOBAL_ZONEID, &mp, 1690 CONN_MAC_DEFAULT, B_FALSE, ipst, &effective_tsl); 1691 } 1692 if (err != 0) { 1693 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 1694 ip_drop_output("tsol_check_label", mp, NULL); 1695 freemsg(mp); 1696 mp = NULL; 1697 goto keep_label; 1698 } 1699 1700 /* 1701 * The effective_tsl must never affect the routing decision, hence 1702 * we ignore it here. 1703 */ 1704 if (effective_tsl != NULL) 1705 label_rele(effective_tsl); 1706 1707 if (af == AF_INET) { 1708 ipha = (ipha_t *)mp->b_rptr; 1709 ipha->ipha_hdr_checksum = 0; 1710 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1711 } 1712 1713 keep_label: 1714 TPC_RELE(dst_rhtp); 1715 if (need_tpc_rele && gw_rhtp != NULL) 1716 TPC_RELE(gw_rhtp); 1717 return (mp); 1718 } 1719 1720 /* 1721 * Name: tsol_pmtu_adjust() 1722 * 1723 * Returns the adjusted mtu after removing security option. 1724 * Removes/subtracts the option if the packet's cred indicates an unlabeled 1725 * sender or if pkt_diff indicates this system enlarged the packet. 1726 */ 1727 uint32_t 1728 tsol_pmtu_adjust(mblk_t *mp, uint32_t mtu, int pkt_diff, int af) 1729 { 1730 int label_adj = 0; 1731 uint32_t min_mtu = IP_MIN_MTU; 1732 tsol_tpc_t *src_rhtp; 1733 void *src; 1734 1735 /* 1736 * Note: label_adj is non-positive, indicating the number of 1737 * bytes removed by removing the security option from the 1738 * header. 1739 */ 1740 if (af == AF_INET6) { 1741 ip6_t *ip6h; 1742 1743 min_mtu = IPV6_MIN_MTU; 1744 ip6h = (ip6_t *)mp->b_rptr; 1745 src = &ip6h->ip6_src; 1746 if ((src_rhtp = find_tpc(src, IPV6_VERSION, B_FALSE)) == NULL) 1747 return (mtu); 1748 if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED) { 1749 label_adj = tsol_remove_secopt_v6( 1750 (ip6_t *)mp->b_rptr, MBLKL(mp)); 1751 } 1752 } else { 1753 ipha_t *ipha; 1754 1755 ASSERT(af == AF_INET); 1756 ipha = (ipha_t *)mp->b_rptr; 1757 src = &ipha->ipha_src; 1758 if ((src_rhtp = find_tpc(src, IPV4_VERSION, B_FALSE)) == NULL) 1759 return (mtu); 1760 if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED) 1761 label_adj = tsol_remove_secopt( 1762 (ipha_t *)mp->b_rptr, MBLKL(mp)); 1763 } 1764 /* 1765 * Make pkt_diff non-negative and the larger of the bytes 1766 * previously added (if any) or just removed, since label 1767 * addition + subtraction may not be completely idempotent. 1768 */ 1769 if (pkt_diff < -label_adj) 1770 pkt_diff = -label_adj; 1771 if (pkt_diff > 0 && pkt_diff < mtu) 1772 mtu -= pkt_diff; 1773 1774 TPC_RELE(src_rhtp); 1775 return (MAX(mtu, min_mtu)); 1776 } 1777 1778 /* 1779 * Name: tsol_rtsa_init() 1780 * 1781 * Normal: Sanity checks on the route security attributes provided by 1782 * user. Convert it into a route security parameter list to 1783 * be returned to caller. 1784 * 1785 * Output: EINVAL if bad security attributes in the routing message 1786 * ENOMEM if unable to allocate data structures 1787 * 0 otherwise. 1788 * 1789 * Note: On input, cp must point to the end of any addresses in 1790 * the rt_msghdr_t structure. 1791 */ 1792 int 1793 tsol_rtsa_init(rt_msghdr_t *rtm, tsol_rtsecattr_t *sp, caddr_t cp) 1794 { 1795 uint_t sacnt; 1796 int err; 1797 caddr_t lim; 1798 tsol_rtsecattr_t *tp; 1799 1800 ASSERT((cp >= (caddr_t)&rtm[1]) && sp != NULL); 1801 1802 /* 1803 * In theory, we could accept as many security attributes configured 1804 * per route destination. However, the current design is limited 1805 * such that at most only one set security attributes is allowed to 1806 * be associated with a prefix IRE. We therefore assert for now. 1807 */ 1808 /* LINTED */ 1809 ASSERT(TSOL_RTSA_REQUEST_MAX == 1); 1810 1811 sp->rtsa_cnt = 0; 1812 lim = (caddr_t)rtm + rtm->rtm_msglen; 1813 ASSERT(cp <= lim); 1814 1815 if ((lim - cp) < sizeof (rtm_ext_t) || 1816 ((rtm_ext_t *)cp)->rtmex_type != RTMEX_GATEWAY_SECATTR) 1817 return (0); 1818 1819 if (((rtm_ext_t *)cp)->rtmex_len < sizeof (tsol_rtsecattr_t)) 1820 return (EINVAL); 1821 1822 cp += sizeof (rtm_ext_t); 1823 1824 if ((lim - cp) < sizeof (*tp) || 1825 (tp = (tsol_rtsecattr_t *)cp, (sacnt = tp->rtsa_cnt) == 0) || 1826 (lim - cp) < TSOL_RTSECATTR_SIZE(sacnt)) 1827 return (EINVAL); 1828 1829 /* 1830 * Trying to add route security attributes when system 1831 * labeling service is not available, or when user supllies 1832 * more than the maximum number of security attributes 1833 * allowed per request. 1834 */ 1835 if ((sacnt > 0 && !is_system_labeled()) || 1836 sacnt > TSOL_RTSA_REQUEST_MAX) 1837 return (EINVAL); 1838 1839 /* Ensure valid credentials */ 1840 if ((err = rtsa_validate(&((tsol_rtsecattr_t *)cp)-> 1841 rtsa_attr[0])) != 0) { 1842 cp += sizeof (*sp); 1843 return (err); 1844 } 1845 1846 bcopy(cp, sp, sizeof (*sp)); 1847 cp += sizeof (*sp); 1848 return (0); 1849 } 1850 1851 int 1852 tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc) 1853 { 1854 tsol_ire_gw_secattr_t *attrp; 1855 boolean_t exists = B_FALSE; 1856 in_addr_t ga_addr4; 1857 void *paddr = NULL; 1858 tsol_gcgrp_t *gcgrp = NULL; 1859 1860 ASSERT(ire != NULL); 1861 1862 /* 1863 * The only time that attrp can be NULL is when this routine is 1864 * called for the first time during the creation/initialization 1865 * of the corresponding IRE. It will only get cleared when the 1866 * IRE is deleted. 1867 */ 1868 if ((attrp = ire->ire_gw_secattr) == NULL) { 1869 attrp = ire_gw_secattr_alloc(KM_NOSLEEP); 1870 if (attrp == NULL) 1871 return (ENOMEM); 1872 ire->ire_gw_secattr = attrp; 1873 } else { 1874 exists = B_TRUE; 1875 mutex_enter(&attrp->igsa_lock); 1876 1877 if (attrp->igsa_rhc != NULL) { 1878 TNRHC_RELE(attrp->igsa_rhc); 1879 attrp->igsa_rhc = NULL; 1880 } 1881 1882 if (attrp->igsa_gc != NULL) 1883 GC_REFRELE(attrp->igsa_gc); 1884 } 1885 ASSERT(!exists || MUTEX_HELD(&attrp->igsa_lock)); 1886 1887 /* 1888 * References already held by caller and we keep them; 1889 * note that gc may be set to NULL to clear out igsa_gc. 1890 */ 1891 attrp->igsa_gc = gc; 1892 1893 if (gc != NULL) { 1894 gcgrp = gc->gc_grp; 1895 ASSERT(gcgrp != NULL); 1896 } 1897 1898 /* 1899 * Intialize the template for gateway; we use the gateway's 1900 * address found in either the passed in gateway credential 1901 * or group pointer, or the ire_gateway_addr{_v6} field. 1902 */ 1903 if (gcgrp != NULL) { 1904 tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr; 1905 1906 /* 1907 * Caller is holding a reference, and that we don't 1908 * need to hold any lock to access the address. 1909 */ 1910 if (ipversion == IPV4_VERSION) { 1911 ASSERT(ga->ga_af == AF_INET); 1912 IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4); 1913 paddr = &ga_addr4; 1914 } else { 1915 ASSERT(ga->ga_af == AF_INET6); 1916 paddr = &ga->ga_addr; 1917 } 1918 } else if (ire->ire_type & IRE_OFFLINK) { 1919 if (ipversion == IPV6_VERSION) 1920 paddr = &ire->ire_gateway_addr_v6; 1921 else if (ipversion == IPV4_VERSION) 1922 paddr = &ire->ire_gateway_addr; 1923 } 1924 1925 /* 1926 * Lookup the gateway template; note that we could get an internal 1927 * template here, which we cache anyway. During IRE matching, we'll 1928 * try to update this gateway template cache and hopefully get a 1929 * real one. 1930 */ 1931 if (paddr != NULL) { 1932 attrp->igsa_rhc = find_rhc(paddr, ipversion, B_FALSE); 1933 } 1934 1935 if (exists) 1936 mutex_exit(&attrp->igsa_lock); 1937 1938 return (0); 1939 } 1940 1941 /* 1942 * This function figures the type of MLP that we'll be using based on the 1943 * address that the user is binding and the zone. If the address is 1944 * unspecified, then we're looking at both private and shared. If it's one 1945 * of the zone's private addresses, then it's private only. If it's one 1946 * of the global addresses, then it's shared only. Multicast addresses are 1947 * treated same as unspecified address. 1948 * 1949 * If we can't figure out what it is, then return mlptSingle. That's actually 1950 * an error case. 1951 * 1952 * The callers are assumed to pass in zone->zone_id and not the zoneid that 1953 * is stored in a conn_t (since the latter will be GLOBAL_ZONEID in an 1954 * exclusive stack zone). 1955 */ 1956 mlp_type_t 1957 tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr, 1958 ip_stack_t *ipst) 1959 { 1960 in_addr_t in4; 1961 ire_t *ire; 1962 ipif_t *ipif; 1963 zoneid_t addrzone; 1964 zoneid_t ip_zoneid; 1965 1966 ASSERT(addr != NULL); 1967 1968 /* 1969 * For exclusive stacks we set the zoneid to zero 1970 * to operate as if in the global zone for IRE and conn_t comparisons. 1971 */ 1972 if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 1973 ip_zoneid = GLOBAL_ZONEID; 1974 else 1975 ip_zoneid = zoneid; 1976 1977 if (version == IPV6_VERSION && 1978 IN6_IS_ADDR_V4MAPPED((const in6_addr_t *)addr)) { 1979 IN6_V4MAPPED_TO_IPADDR((const in6_addr_t *)addr, in4); 1980 addr = &in4; 1981 version = IPV4_VERSION; 1982 } 1983 1984 /* Check whether the IRE_LOCAL (or ipif) is ALL_ZONES */ 1985 if (version == IPV4_VERSION) { 1986 in4 = *(const in_addr_t *)addr; 1987 if ((in4 == INADDR_ANY) || CLASSD(in4)) { 1988 return (mlptBoth); 1989 } 1990 ire = ire_ftable_lookup_v4(in4, 0, 0, IRE_LOCAL|IRE_LOOPBACK, 1991 NULL, ip_zoneid, NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, 1992 0, ipst, NULL); 1993 } else { 1994 if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr) || 1995 IN6_IS_ADDR_MULTICAST((const in6_addr_t *)addr)) { 1996 return (mlptBoth); 1997 } 1998 ire = ire_ftable_lookup_v6(addr, 0, 0, IRE_LOCAL|IRE_LOOPBACK, 1999 NULL, ip_zoneid, NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY, 2000 0, ipst, NULL); 2001 } 2002 /* 2003 * If we can't find the IRE, then we have to behave exactly like 2004 * ip_laddr_verify_{v4,v6}. That means looking up the IPIF so that 2005 * users can bind to addresses on "down" interfaces. 2006 * 2007 * If we can't find that either, then the bind is going to fail, so 2008 * just give up. Note that there's a miniscule chance that the address 2009 * is in transition, but we don't bother handling that. 2010 */ 2011 if (ire == NULL) { 2012 if (version == IPV4_VERSION) 2013 ipif = ipif_lookup_addr(*(const in_addr_t *)addr, NULL, 2014 ip_zoneid, ipst); 2015 else 2016 ipif = ipif_lookup_addr_v6((const in6_addr_t *)addr, 2017 NULL, ip_zoneid, ipst); 2018 if (ipif == NULL) { 2019 return (mlptSingle); 2020 } 2021 addrzone = ipif->ipif_zoneid; 2022 ipif_refrele(ipif); 2023 } else { 2024 addrzone = ire->ire_zoneid; 2025 ire_refrele(ire); 2026 } 2027 return (addrzone == ALL_ZONES ? mlptShared : mlptPrivate); 2028 } 2029 2030 /* 2031 * Since we are configuring local interfaces, and we know trusted 2032 * extension CDE requires local interfaces to be cipso host type in 2033 * order to function correctly, we'll associate a cipso template 2034 * to each local interface and let the interface come up. Configuring 2035 * a local interface to be "unlabeled" host type is a configuration error. 2036 * We'll override that error and make the interface host type to be cipso 2037 * here. 2038 * 2039 * The code is optimized for the usual "success" case and unwinds things on 2040 * error. We don't want to go to the trouble and expense of formatting the 2041 * interface name for the usual case where everything is configured correctly. 2042 */ 2043 boolean_t 2044 tsol_check_interface_address(const ipif_t *ipif) 2045 { 2046 tsol_tpc_t *tp; 2047 char addrbuf[INET6_ADDRSTRLEN]; 2048 int af; 2049 const void *addr; 2050 zone_t *zone; 2051 ts_label_t *plabel; 2052 const bslabel_t *label; 2053 char ifbuf[LIFNAMSIZ + 10]; 2054 const char *ifname; 2055 boolean_t retval; 2056 tsol_rhent_t rhent; 2057 netstack_t *ns = ipif->ipif_ill->ill_ipst->ips_netstack; 2058 2059 if (IN6_IS_ADDR_V4MAPPED(&ipif->ipif_v6lcl_addr)) { 2060 af = AF_INET; 2061 addr = &V4_PART_OF_V6(ipif->ipif_v6lcl_addr); 2062 } else { 2063 af = AF_INET6; 2064 addr = &ipif->ipif_v6lcl_addr; 2065 } 2066 2067 tp = find_tpc(&ipif->ipif_v6lcl_addr, IPV6_VERSION, B_FALSE); 2068 2069 /* assumes that ALL_ZONES implies that there is no exclusive stack */ 2070 if (ipif->ipif_zoneid == ALL_ZONES) { 2071 zone = NULL; 2072 } else if (ns->netstack_stackid == GLOBAL_NETSTACKID) { 2073 /* Shared stack case */ 2074 zone = zone_find_by_id(ipif->ipif_zoneid); 2075 } else { 2076 /* Exclusive stack case */ 2077 zone = zone_find_by_id(crgetzoneid(ipif->ipif_ill->ill_credp)); 2078 } 2079 if (zone != NULL) { 2080 plabel = zone->zone_slabel; 2081 ASSERT(plabel != NULL); 2082 label = label2bslabel(plabel); 2083 } 2084 2085 /* 2086 * If it's CIPSO and an all-zones address, then we're done. 2087 * If it's a CIPSO zone specific address, the zone's label 2088 * must be in the range or set specified in the template. 2089 * When the remote host entry is missing or the template 2090 * type is incorrect for this interface, we create a 2091 * CIPSO host entry in kernel and allow the interface to be 2092 * brought up as CIPSO type. 2093 */ 2094 if (tp != NULL && ( 2095 /* The all-zones case */ 2096 (tp->tpc_tp.host_type == SUN_CIPSO && 2097 tp->tpc_tp.tp_doi == default_doi && 2098 ipif->ipif_zoneid == ALL_ZONES) || 2099 /* The local-zone case */ 2100 (zone != NULL && plabel->tsl_doi == tp->tpc_tp.tp_doi && 2101 ((tp->tpc_tp.host_type == SUN_CIPSO && 2102 (_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) || 2103 blinlset(label, tp->tpc_tp.tp_sl_set_cipso))))))) { 2104 if (zone != NULL) 2105 zone_rele(zone); 2106 TPC_RELE(tp); 2107 return (B_TRUE); 2108 } 2109 2110 ifname = ipif->ipif_ill->ill_name; 2111 if (ipif->ipif_id != 0) { 2112 (void) snprintf(ifbuf, sizeof (ifbuf), "%s:%u", ifname, 2113 ipif->ipif_id); 2114 ifname = ifbuf; 2115 } 2116 (void) inet_ntop(af, addr, addrbuf, sizeof (addrbuf)); 2117 2118 if (tp == NULL) { 2119 cmn_err(CE_NOTE, "template entry for %s missing. Default to " 2120 "CIPSO type for %s", ifname, addrbuf); 2121 retval = B_TRUE; 2122 } else if (tp->tpc_tp.host_type == UNLABELED) { 2123 cmn_err(CE_NOTE, "template type for %s incorrectly configured. " 2124 "Change to CIPSO type for %s", ifname, addrbuf); 2125 retval = B_TRUE; 2126 } else if (ipif->ipif_zoneid == ALL_ZONES) { 2127 if (tp->tpc_tp.host_type != SUN_CIPSO) { 2128 cmn_err(CE_NOTE, "%s failed: %s isn't set to CIPSO for " 2129 "all-zones. Converted to CIPSO.", ifname, addrbuf); 2130 retval = B_TRUE; 2131 } else { 2132 cmn_err(CE_NOTE, "%s failed: %s has wrong DOI %d " 2133 "instead of %d", ifname, addrbuf, 2134 tp->tpc_tp.tp_doi, default_doi); 2135 retval = B_FALSE; 2136 } 2137 } else if (zone == NULL) { 2138 cmn_err(CE_NOTE, "%s failed: zoneid %d unknown", 2139 ifname, ipif->ipif_zoneid); 2140 retval = B_FALSE; 2141 } else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) { 2142 cmn_err(CE_NOTE, "%s failed: zone %s has DOI %d but %s has " 2143 "DOI %d", ifname, zone->zone_name, plabel->tsl_doi, 2144 addrbuf, tp->tpc_tp.tp_doi); 2145 retval = B_FALSE; 2146 } else { 2147 cmn_err(CE_NOTE, "%s failed: zone %s label incompatible with " 2148 "%s", ifname, zone->zone_name, addrbuf); 2149 tsol_print_label(label, "zone label"); 2150 retval = B_FALSE; 2151 } 2152 2153 if (zone != NULL) 2154 zone_rele(zone); 2155 if (tp != NULL) 2156 TPC_RELE(tp); 2157 if (retval) { 2158 /* 2159 * we've corrected a config error and let the interface 2160 * come up as cipso. Need to insert an rhent. 2161 */ 2162 if ((rhent.rh_address.ta_family = af) == AF_INET) { 2163 rhent.rh_prefix = 32; 2164 rhent.rh_address.ta_addr_v4 = *(struct in_addr *)addr; 2165 } else { 2166 rhent.rh_prefix = 128; 2167 rhent.rh_address.ta_addr_v6 = *(in6_addr_t *)addr; 2168 } 2169 (void) strcpy(rhent.rh_template, "cipso"); 2170 if (tnrh_load(&rhent) != 0) { 2171 cmn_err(CE_NOTE, "%s failed: Cannot insert CIPSO " 2172 "template for local addr %s", ifname, addrbuf); 2173 retval = B_FALSE; 2174 } 2175 } 2176 return (retval); 2177 } 2178