1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/strsubr.h> 29 #include <sys/stropts.h> 30 #include <sys/sunddi.h> 31 #include <sys/cred.h> 32 #include <sys/debug.h> 33 #include <sys/kmem.h> 34 #include <sys/errno.h> 35 #include <sys/disp.h> 36 #include <netinet/in.h> 37 #include <netinet/in_systm.h> 38 #include <netinet/ip.h> 39 #include <netinet/ip_icmp.h> 40 #include <netinet/tcp.h> 41 #include <inet/common.h> 42 #include <inet/ipclassifier.h> 43 #include <inet/ip.h> 44 #include <inet/mib2.h> 45 #include <inet/nd.h> 46 #include <inet/tcp.h> 47 #include <inet/ip_rts.h> 48 #include <inet/ip_ire.h> 49 #include <inet/ip_if.h> 50 #include <sys/modhash.h> 51 52 #include <sys/tsol/label.h> 53 #include <sys/tsol/label_macro.h> 54 #include <sys/tsol/tnet.h> 55 #include <sys/tsol/tndb.h> 56 #include <sys/strsun.h> 57 58 /* tunable for strict error-reply behavior (TCP RST and ICMP Unreachable) */ 59 int tsol_strict_error; 60 61 /* 62 * Some notes on the Trusted Solaris IRE gateway security attributes: 63 * 64 * When running in Trusted mode, the routing subsystem determines whether or 65 * not a packet can be delivered to an off-link host (not directly reachable 66 * through an interface) based on the accreditation checks of the packet's 67 * security attributes against those associated with the next-hop gateway. 68 * 69 * The next-hop gateway's security attributes can be derived from two sources 70 * (in order of preference): route-related and the host database. A Trusted 71 * system must be configured with at least the host database containing an 72 * entry for the next-hop gateway, or otherwise no accreditation checks can 73 * be performed, which may result in the inability to send packets to any 74 * off-link destination host. 75 * 76 * The major differences between the two sources are the number and type of 77 * security attributes used for accreditation checks. A host database entry 78 * can contain at most one set of security attributes, specific only to the 79 * next-hop gateway. On contrast, route-related security attributes are made 80 * up of a collection of security attributes for the distant networks, and 81 * are grouped together per next-hop gateway used to reach those networks. 82 * This is the preferred method, and the routing subsystem will fallback to 83 * the host database entry only if there are no route-related attributes 84 * associated with the next-hop gateway. 85 * 86 * In Trusted mode, all of the IRE entries (except LOCAL/LOOPBACK/BROADCAST/ 87 * INTERFACE type) are initialized to contain a placeholder to store this 88 * information. The ire_gw_secattr structure gets allocated, initialized 89 * and associated with the IRE during the time of the IRE creation. The 90 * initialization process also includes resolving the host database entry 91 * of the next-hop gateway for fallback purposes. It does not include any 92 * route-related attribute setup, as that process comes separately as part 93 * of the route requests (add/change) made to the routing subsystem. 94 * 95 * The underlying logic which involves associating IREs with the gateway 96 * security attributes are represented by the following data structures: 97 * 98 * tsol_gcdb_t, or "gcdb" 99 * 100 * - This is a system-wide collection of records containing the 101 * currently used route-related security attributes, which are fed 102 * through the routing socket interface, e.g. "route add/change". 103 * 104 * tsol_gc_t, or "gc" 105 * 106 * - This is the gateway credential structure, and it provides for the 107 * only mechanism to access the contents of gcdb. More than one gc 108 * entries may refer to the same gcdb record. gc's in the system are 109 * grouped according to the next-hop gateway address. 110 * 111 * tsol_gcgrp_t, or "gcgrp" 112 * 113 * - Group of gateway credentials, and is unique per next-hop gateway 114 * address. When the group is not empty, i.e. when gcgrp_count is 115 * greater than zero, it contains one or more gc's, each pointing to 116 * a gcdb record which indicates the gateway security attributes 117 * associated with the next-hop gateway. 118 * 119 * The fields of the tsol_ire_gw_secattr_t used from within the IRE are: 120 * 121 * igsa_lock 122 * 123 * - Lock that protects all fields within tsol_ire_gw_secattr_t. 124 * 125 * igsa_rhc 126 * 127 * - Remote host cache database entry of next-hop gateway. This is 128 * used in the case when there are no route-related attributes 129 * configured for the IRE. 130 * 131 * igsa_gc 132 * 133 * - A set of route-related attributes that only get set for prefix 134 * IREs. If this is non-NULL, the prefix IRE has been associated 135 * with a set of gateway security attributes by way of route add/ 136 * change functionality. This field stays NULL for IRE_CACHEs. 137 * 138 * igsa_gcgrp 139 * 140 * - Group of gc's which only gets set for IRE_CACHEs. Each of the gc 141 * points to a gcdb record that contains the security attributes 142 * used to perform the credential checks of the packet which uses 143 * the IRE. If the group is not empty, the list of gc's can be 144 * traversed starting at gcgrp_head. This field stays NULL for 145 * prefix IREs. 146 */ 147 148 static kmem_cache_t *ire_gw_secattr_cache; 149 150 #define GCDB_HASH_SIZE 101 151 #define GCGRP_HASH_SIZE 101 152 153 #define GCDB_REFRELE(p) { \ 154 mutex_enter(&gcdb_lock); \ 155 ASSERT((p)->gcdb_refcnt > 0); \ 156 if (--((p)->gcdb_refcnt) == 0) \ 157 gcdb_inactive(p); \ 158 ASSERT(MUTEX_HELD(&gcdb_lock)); \ 159 mutex_exit(&gcdb_lock); \ 160 } 161 162 static int gcdb_hash_size = GCDB_HASH_SIZE; 163 static int gcgrp_hash_size = GCGRP_HASH_SIZE; 164 static mod_hash_t *gcdb_hash; 165 static mod_hash_t *gcgrp4_hash; 166 static mod_hash_t *gcgrp6_hash; 167 168 static kmutex_t gcdb_lock; 169 kmutex_t gcgrp_lock; 170 171 static uint_t gcdb_hash_by_secattr(void *, mod_hash_key_t); 172 static int gcdb_hash_cmp(mod_hash_key_t, mod_hash_key_t); 173 static tsol_gcdb_t *gcdb_lookup(struct rtsa_s *, boolean_t); 174 static void gcdb_inactive(tsol_gcdb_t *); 175 176 static uint_t gcgrp_hash_by_addr(void *, mod_hash_key_t); 177 static int gcgrp_hash_cmp(mod_hash_key_t, mod_hash_key_t); 178 179 static int ire_gw_secattr_constructor(void *, void *, int); 180 static void ire_gw_secattr_destructor(void *, void *); 181 182 void 183 tnet_init(void) 184 { 185 ire_gw_secattr_cache = kmem_cache_create("ire_gw_secattr_cache", 186 sizeof (tsol_ire_gw_secattr_t), 64, ire_gw_secattr_constructor, 187 ire_gw_secattr_destructor, NULL, NULL, NULL, 0); 188 189 gcdb_hash = mod_hash_create_extended("gcdb_hash", 190 gcdb_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, 191 gcdb_hash_by_secattr, NULL, gcdb_hash_cmp, KM_SLEEP); 192 193 gcgrp4_hash = mod_hash_create_extended("gcgrp4_hash", 194 gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, 195 gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP); 196 197 gcgrp6_hash = mod_hash_create_extended("gcgrp6_hash", 198 gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor, 199 gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP); 200 201 mutex_init(&gcdb_lock, NULL, MUTEX_DEFAULT, NULL); 202 mutex_init(&gcgrp_lock, NULL, MUTEX_DEFAULT, NULL); 203 } 204 205 void 206 tnet_fini(void) 207 { 208 kmem_cache_destroy(ire_gw_secattr_cache); 209 mod_hash_destroy_hash(gcdb_hash); 210 mod_hash_destroy_hash(gcgrp4_hash); 211 mod_hash_destroy_hash(gcgrp6_hash); 212 mutex_destroy(&gcdb_lock); 213 mutex_destroy(&gcgrp_lock); 214 } 215 216 /* ARGSUSED */ 217 static int 218 ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags) 219 { 220 tsol_ire_gw_secattr_t *attrp = buf; 221 222 mutex_init(&attrp->igsa_lock, NULL, MUTEX_DEFAULT, NULL); 223 224 attrp->igsa_rhc = NULL; 225 attrp->igsa_gc = NULL; 226 attrp->igsa_gcgrp = NULL; 227 228 return (0); 229 } 230 231 /* ARGSUSED */ 232 static void 233 ire_gw_secattr_destructor(void *buf, void *cdrarg) 234 { 235 tsol_ire_gw_secattr_t *attrp = (tsol_ire_gw_secattr_t *)buf; 236 237 mutex_destroy(&attrp->igsa_lock); 238 } 239 240 tsol_ire_gw_secattr_t * 241 ire_gw_secattr_alloc(int kmflags) 242 { 243 return (kmem_cache_alloc(ire_gw_secattr_cache, kmflags)); 244 } 245 246 void 247 ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp) 248 { 249 ASSERT(MUTEX_NOT_HELD(&attrp->igsa_lock)); 250 251 if (attrp->igsa_rhc != NULL) { 252 TNRHC_RELE(attrp->igsa_rhc); 253 attrp->igsa_rhc = NULL; 254 } 255 256 if (attrp->igsa_gc != NULL) { 257 GC_REFRELE(attrp->igsa_gc); 258 attrp->igsa_gc = NULL; 259 } 260 if (attrp->igsa_gcgrp != NULL) { 261 GCGRP_REFRELE(attrp->igsa_gcgrp); 262 attrp->igsa_gcgrp = NULL; 263 } 264 265 ASSERT(attrp->igsa_rhc == NULL); 266 ASSERT(attrp->igsa_gc == NULL); 267 ASSERT(attrp->igsa_gcgrp == NULL); 268 269 kmem_cache_free(ire_gw_secattr_cache, attrp); 270 } 271 272 /* ARGSUSED */ 273 static uint_t 274 gcdb_hash_by_secattr(void *hash_data, mod_hash_key_t key) 275 { 276 const struct rtsa_s *rp = (struct rtsa_s *)key; 277 const uint32_t *up, *ue; 278 uint_t hash; 279 int i; 280 281 ASSERT(rp != NULL); 282 283 /* See comments in hash_bylabel in zone.c for details */ 284 hash = rp->rtsa_doi + (rp->rtsa_doi << 1); 285 up = (const uint32_t *)&rp->rtsa_slrange; 286 ue = up + sizeof (rp->rtsa_slrange) / sizeof (*up); 287 i = 1; 288 while (up < ue) { 289 /* using 2^n + 1, 1 <= n <= 16 as source of many primes */ 290 hash += *up + (*up << ((i % 16) + 1)); 291 up++; 292 i++; 293 } 294 return (hash); 295 } 296 297 static int 298 gcdb_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 299 { 300 struct rtsa_s *rp1 = (struct rtsa_s *)key1; 301 struct rtsa_s *rp2 = (struct rtsa_s *)key2; 302 303 ASSERT(rp1 != NULL && rp2 != NULL); 304 305 if (blequal(&rp1->rtsa_slrange.lower_bound, 306 &rp2->rtsa_slrange.lower_bound) && 307 blequal(&rp1->rtsa_slrange.upper_bound, 308 &rp2->rtsa_slrange.upper_bound) && 309 rp1->rtsa_doi == rp2->rtsa_doi) 310 return (0); 311 312 /* No match; not found */ 313 return (-1); 314 } 315 316 /* ARGSUSED */ 317 static uint_t 318 gcgrp_hash_by_addr(void *hash_data, mod_hash_key_t key) 319 { 320 tsol_gcgrp_addr_t *ga = (tsol_gcgrp_addr_t *)key; 321 uint_t idx = 0; 322 uint32_t *ap; 323 324 ASSERT(ga != NULL); 325 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6); 326 327 ap = (uint32_t *)&ga->ga_addr.s6_addr32[0]; 328 idx ^= *ap++; 329 idx ^= *ap++; 330 idx ^= *ap++; 331 idx ^= *ap; 332 333 return (idx); 334 } 335 336 static int 337 gcgrp_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 338 { 339 tsol_gcgrp_addr_t *ga1 = (tsol_gcgrp_addr_t *)key1; 340 tsol_gcgrp_addr_t *ga2 = (tsol_gcgrp_addr_t *)key2; 341 342 ASSERT(ga1 != NULL && ga2 != NULL); 343 344 /* Address family must match */ 345 if (ga1->ga_af != ga2->ga_af) 346 return (-1); 347 348 if (ga1->ga_addr.s6_addr32[0] == ga2->ga_addr.s6_addr32[0] && 349 ga1->ga_addr.s6_addr32[1] == ga2->ga_addr.s6_addr32[1] && 350 ga1->ga_addr.s6_addr32[2] == ga2->ga_addr.s6_addr32[2] && 351 ga1->ga_addr.s6_addr32[3] == ga2->ga_addr.s6_addr32[3]) 352 return (0); 353 354 /* No match; not found */ 355 return (-1); 356 } 357 358 #define RTSAFLAGS "\20\11cipso\3doi\2max_sl\1min_sl" 359 360 int 361 rtsa_validate(const struct rtsa_s *rp) 362 { 363 uint32_t mask = rp->rtsa_mask; 364 365 /* RTSA_CIPSO must be set, and DOI must not be zero */ 366 if ((mask & RTSA_CIPSO) == 0 || rp->rtsa_doi == 0) { 367 DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *, 368 "rtsa(1) lacks flag or has 0 doi.", 369 rtsa_s *, rp); 370 return (EINVAL); 371 } 372 /* 373 * SL range must be specified, and it must have its 374 * upper bound dominating its lower bound. 375 */ 376 if ((mask & RTSA_SLRANGE) != RTSA_SLRANGE || 377 !bldominates(&rp->rtsa_slrange.upper_bound, 378 &rp->rtsa_slrange.lower_bound)) { 379 DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *, 380 "rtsa(1) min_sl and max_sl not set or max_sl is " 381 "not dominating.", rtsa_s *, rp); 382 return (EINVAL); 383 } 384 return (0); 385 } 386 387 /* 388 * A brief explanation of the reference counting scheme: 389 * 390 * Prefix IREs have a non-NULL igsa_gc and a NULL igsa_gcgrp; 391 * IRE_CACHEs have it vice-versa. 392 * 393 * Apart from dynamic references due to to reference holds done 394 * actively by threads, we have the following references: 395 * 396 * gcdb_refcnt: 397 * - Every tsol_gc_t pointing to a tsol_gcdb_t contributes a reference 398 * to the gcdb_refcnt. 399 * 400 * gc_refcnt: 401 * - A prefix IRE that points to an igsa_gc contributes a reference 402 * to the gc_refcnt. 403 * 404 * gcgrp_refcnt: 405 * - An IRE_CACHE that points to an igsa_gcgrp contributes a reference 406 * to the gcgrp_refcnt of the associated tsol_gcgrp_t. 407 * - Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes 408 * a reference to the gcgrp_refcnt. 409 */ 410 static tsol_gcdb_t * 411 gcdb_lookup(struct rtsa_s *rp, boolean_t alloc) 412 { 413 tsol_gcdb_t *gcdb = NULL; 414 415 if (rtsa_validate(rp) != 0) 416 return (NULL); 417 418 mutex_enter(&gcdb_lock); 419 /* Find a copy in the cache; otherwise, create one and cache it */ 420 if (mod_hash_find(gcdb_hash, (mod_hash_key_t)rp, 421 (mod_hash_val_t *)&gcdb) == 0) { 422 gcdb->gcdb_refcnt++; 423 ASSERT(gcdb->gcdb_refcnt != 0); 424 425 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__lookup, char *, 426 "gcdb(1) is in gcdb_hash(global)", tsol_gcdb_t *, gcdb); 427 } else if (alloc) { 428 gcdb = kmem_zalloc(sizeof (*gcdb), KM_NOSLEEP); 429 if (gcdb != NULL) { 430 gcdb->gcdb_refcnt = 1; 431 gcdb->gcdb_mask = rp->rtsa_mask; 432 gcdb->gcdb_doi = rp->rtsa_doi; 433 gcdb->gcdb_slrange = rp->rtsa_slrange; 434 435 if (mod_hash_insert(gcdb_hash, 436 (mod_hash_key_t)&gcdb->gcdb_attr, 437 (mod_hash_val_t)gcdb) != 0) { 438 mutex_exit(&gcdb_lock); 439 kmem_free(gcdb, sizeof (*gcdb)); 440 return (NULL); 441 } 442 443 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__insert, char *, 444 "gcdb(1) inserted in gcdb_hash(global)", 445 tsol_gcdb_t *, gcdb); 446 } 447 } 448 mutex_exit(&gcdb_lock); 449 return (gcdb); 450 } 451 452 static void 453 gcdb_inactive(tsol_gcdb_t *gcdb) 454 { 455 ASSERT(MUTEX_HELD(&gcdb_lock)); 456 ASSERT(gcdb != NULL && gcdb->gcdb_refcnt == 0); 457 458 (void) mod_hash_remove(gcdb_hash, (mod_hash_key_t)&gcdb->gcdb_attr, 459 (mod_hash_val_t *)&gcdb); 460 461 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__remove, char *, 462 "gcdb(1) removed from gcdb_hash(global)", 463 tsol_gcdb_t *, gcdb); 464 kmem_free(gcdb, sizeof (*gcdb)); 465 } 466 467 tsol_gc_t * 468 gc_create(struct rtsa_s *rp, tsol_gcgrp_t *gcgrp, boolean_t *gcgrp_xtrarefp) 469 { 470 tsol_gc_t *gc; 471 tsol_gcdb_t *gcdb; 472 473 *gcgrp_xtrarefp = B_TRUE; 474 475 rw_enter(&gcgrp->gcgrp_rwlock, RW_WRITER); 476 if ((gcdb = gcdb_lookup(rp, B_TRUE)) == NULL) { 477 rw_exit(&gcgrp->gcgrp_rwlock); 478 return (NULL); 479 } 480 481 for (gc = gcgrp->gcgrp_head; gc != NULL; gc = gc->gc_next) { 482 if (gc->gc_db == gcdb) { 483 ASSERT(gc->gc_grp == gcgrp); 484 485 gc->gc_refcnt++; 486 ASSERT(gc->gc_refcnt != 0); 487 488 GCDB_REFRELE(gcdb); 489 490 DTRACE_PROBE3(tx__gcdb__log__info__gc__create, 491 char *, "found gc(1) in gcgrp(2)", 492 tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp); 493 rw_exit(&gcgrp->gcgrp_rwlock); 494 return (gc); 495 } 496 } 497 498 gc = kmem_zalloc(sizeof (*gc), KM_NOSLEEP); 499 if (gc != NULL) { 500 if (gcgrp->gcgrp_head == NULL) { 501 gcgrp->gcgrp_head = gcgrp->gcgrp_tail = gc; 502 } else { 503 gcgrp->gcgrp_tail->gc_next = gc; 504 gc->gc_prev = gcgrp->gcgrp_tail; 505 gcgrp->gcgrp_tail = gc; 506 } 507 gcgrp->gcgrp_count++; 508 ASSERT(gcgrp->gcgrp_count != 0); 509 510 /* caller has incremented gcgrp reference for us */ 511 gc->gc_grp = gcgrp; 512 513 gc->gc_db = gcdb; 514 gc->gc_refcnt = 1; 515 516 DTRACE_PROBE3(tx__gcdb__log__info__gc__create, char *, 517 "added gc(1) to gcgrp(2)", tsol_gc_t *, gc, 518 tsol_gcgrp_t *, gcgrp); 519 520 *gcgrp_xtrarefp = B_FALSE; 521 } 522 rw_exit(&gcgrp->gcgrp_rwlock); 523 524 return (gc); 525 } 526 527 void 528 gc_inactive(tsol_gc_t *gc) 529 { 530 tsol_gcgrp_t *gcgrp = gc->gc_grp; 531 532 ASSERT(gcgrp != NULL); 533 ASSERT(RW_WRITE_HELD(&gcgrp->gcgrp_rwlock)); 534 ASSERT(gc->gc_refcnt == 0); 535 536 if (gc->gc_prev != NULL) 537 gc->gc_prev->gc_next = gc->gc_next; 538 else 539 gcgrp->gcgrp_head = gc->gc_next; 540 if (gc->gc_next != NULL) 541 gc->gc_next->gc_prev = gc->gc_prev; 542 else 543 gcgrp->gcgrp_tail = gc->gc_prev; 544 ASSERT(gcgrp->gcgrp_count > 0); 545 gcgrp->gcgrp_count--; 546 547 /* drop lock before it's destroyed */ 548 rw_exit(&gcgrp->gcgrp_rwlock); 549 550 DTRACE_PROBE3(tx__gcdb__log__info__gc__remove, char *, 551 "removed inactive gc(1) from gcgrp(2)", 552 tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp); 553 554 GCGRP_REFRELE(gcgrp); 555 556 gc->gc_grp = NULL; 557 gc->gc_prev = gc->gc_next = NULL; 558 559 if (gc->gc_db != NULL) 560 GCDB_REFRELE(gc->gc_db); 561 562 kmem_free(gc, sizeof (*gc)); 563 } 564 565 tsol_gcgrp_t * 566 gcgrp_lookup(tsol_gcgrp_addr_t *ga, boolean_t alloc) 567 { 568 tsol_gcgrp_t *gcgrp = NULL; 569 mod_hash_t *hashp; 570 571 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6); 572 573 hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash; 574 575 mutex_enter(&gcgrp_lock); 576 if (mod_hash_find(hashp, (mod_hash_key_t)ga, 577 (mod_hash_val_t *)&gcgrp) == 0) { 578 gcgrp->gcgrp_refcnt++; 579 ASSERT(gcgrp->gcgrp_refcnt != 0); 580 581 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__lookup, char *, 582 "found gcgrp(1) in hash(2)", tsol_gcgrp_t *, gcgrp, 583 mod_hash_t *, hashp); 584 585 } else if (alloc) { 586 gcgrp = kmem_zalloc(sizeof (*gcgrp), KM_NOSLEEP); 587 if (gcgrp != NULL) { 588 gcgrp->gcgrp_refcnt = 1; 589 rw_init(&gcgrp->gcgrp_rwlock, NULL, RW_DEFAULT, NULL); 590 bcopy(ga, &gcgrp->gcgrp_addr, sizeof (*ga)); 591 592 if (mod_hash_insert(hashp, 593 (mod_hash_key_t)&gcgrp->gcgrp_addr, 594 (mod_hash_val_t)gcgrp) != 0) { 595 mutex_exit(&gcgrp_lock); 596 kmem_free(gcgrp, sizeof (*gcgrp)); 597 return (NULL); 598 } 599 600 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__insert, 601 char *, "inserted gcgrp(1) in hash(2)", 602 tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp); 603 } 604 } 605 mutex_exit(&gcgrp_lock); 606 return (gcgrp); 607 } 608 609 void 610 gcgrp_inactive(tsol_gcgrp_t *gcgrp) 611 { 612 tsol_gcgrp_addr_t *ga; 613 mod_hash_t *hashp; 614 615 ASSERT(MUTEX_HELD(&gcgrp_lock)); 616 ASSERT(!RW_LOCK_HELD(&gcgrp->gcgrp_rwlock)); 617 ASSERT(gcgrp != NULL && gcgrp->gcgrp_refcnt == 0); 618 ASSERT(gcgrp->gcgrp_head == NULL && gcgrp->gcgrp_count == 0); 619 620 ga = &gcgrp->gcgrp_addr; 621 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6); 622 623 hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash; 624 (void) mod_hash_remove(hashp, (mod_hash_key_t)ga, 625 (mod_hash_val_t *)&gcgrp); 626 rw_destroy(&gcgrp->gcgrp_rwlock); 627 628 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__remove, char *, 629 "removed inactive gcgrp(1) from hash(2)", 630 tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp); 631 632 kmem_free(gcgrp, sizeof (*gcgrp)); 633 } 634 635 636 /* 637 * Assign a sensitivity label to inbound traffic which arrived without 638 * an explicit on-the-wire label. 639 * 640 * In the case of CIPSO-type hosts, we assume packets arriving without 641 * a label are at the most sensitive label known for the host, most 642 * likely involving out-of-band key management traffic (such as IKE, 643 * etc.,) 644 */ 645 static boolean_t 646 tsol_find_unlabeled_label(tsol_tpc_t *rhtp, bslabel_t *sl, uint32_t *doi) 647 { 648 *doi = rhtp->tpc_tp.tp_doi; 649 switch (rhtp->tpc_tp.host_type) { 650 case UNLABELED: 651 *sl = rhtp->tpc_tp.tp_def_label; 652 break; 653 case SUN_CIPSO: 654 *sl = rhtp->tpc_tp.tp_sl_range_cipso.upper_bound; 655 break; 656 default: 657 return (B_FALSE); 658 } 659 setbltype(sl, SUN_SL_ID); 660 return (B_TRUE); 661 } 662 663 /* 664 * Converts CIPSO option to sensitivity label. 665 * Validity checks based on restrictions defined in 666 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) (draft-ietf-cipso-ipsecurity) 667 */ 668 static boolean_t 669 cipso_to_sl(const uchar_t *option, bslabel_t *sl) 670 { 671 const struct cipso_option *co = (const struct cipso_option *)option; 672 const struct cipso_tag_type_1 *tt1; 673 674 tt1 = (struct cipso_tag_type_1 *)&co->cipso_tag_type[0]; 675 if (tt1->tag_type != 1 || 676 tt1->tag_length < TSOL_TT1_MIN_LENGTH || 677 tt1->tag_length > TSOL_TT1_MAX_LENGTH || 678 tt1->tag_length + TSOL_CIPSO_TAG_OFFSET > co->cipso_length) 679 return (B_FALSE); 680 681 bsllow(sl); /* assumed: sets compartments to all zeroes */ 682 LCLASS_SET((_bslabel_impl_t *)sl, tt1->tag_sl); 683 bcopy(tt1->tag_cat, &((_bslabel_impl_t *)sl)->compartments, 684 tt1->tag_length - TSOL_TT1_MIN_LENGTH); 685 return (B_TRUE); 686 } 687 688 /* 689 * If present, parse a CIPSO label in the incoming packet and 690 * construct a ts_label_t that reflects the CIPSO label and attach it 691 * to the dblk cred. Later as the mblk flows up through the stack any 692 * code that needs to examine the packet label can inspect the label 693 * from the dblk cred. This function is called right in ip_rput for 694 * all packets, i.e. locally destined and to be forwarded packets. The 695 * forwarding path needs to examine the label to determine how to 696 * forward the packet. 697 * 698 * This routine pulls all message text up into the first mblk. 699 * For IPv4, only the first 20 bytes of the IP header are guaranteed 700 * to exist. For IPv6, only the IPv6 header is guaranteed to exist. 701 */ 702 boolean_t 703 tsol_get_pkt_label(mblk_t *mp, int version) 704 { 705 tsol_tpc_t *src_rhtp = NULL; 706 uchar_t *opt_ptr = NULL; 707 const ipha_t *ipha; 708 bslabel_t sl; 709 uint32_t doi; 710 tsol_ip_label_t label_type; 711 uint32_t label_flags = 0; /* flags to set in label */ 712 const cipso_option_t *co; 713 const void *src; 714 const ip6_t *ip6h; 715 cred_t *credp; 716 pid_t cpid; 717 int proto; 718 719 ASSERT(DB_TYPE(mp) == M_DATA); 720 721 if (mp->b_cont != NULL && !pullupmsg(mp, -1)) 722 return (B_FALSE); 723 724 if (version == IPV4_VERSION) { 725 ASSERT(MBLKL(mp) >= IP_SIMPLE_HDR_LENGTH); 726 ipha = (const ipha_t *)mp->b_rptr; 727 src = &ipha->ipha_src; 728 if (!tsol_get_option_v4(mp, &label_type, &opt_ptr)) 729 return (B_FALSE); 730 } else { 731 ASSERT(MBLKL(mp) >= IPV6_HDR_LEN); 732 ip6h = (const ip6_t *)mp->b_rptr; 733 src = &ip6h->ip6_src; 734 if (!tsol_get_option_v6(mp, &label_type, &opt_ptr)) 735 return (B_FALSE); 736 } 737 738 switch (label_type) { 739 case OPT_CIPSO: 740 /* 741 * Convert the CIPSO label to the internal format 742 * and attach it to the dblk cred. 743 * Validity checks based on restrictions defined in 744 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) 745 * (draft-ietf-cipso-ipsecurity) 746 */ 747 if (version == IPV6_VERSION && ip6opt_ls == 0) 748 return (B_FALSE); 749 co = (const struct cipso_option *)opt_ptr; 750 if ((co->cipso_length < 751 TSOL_CIPSO_TAG_OFFSET + TSOL_TT1_MIN_LENGTH) || 752 (co->cipso_length > IP_MAX_OPT_LENGTH)) 753 return (B_FALSE); 754 bcopy(co->cipso_doi, &doi, sizeof (doi)); 755 doi = ntohl(doi); 756 if (!cipso_to_sl(opt_ptr, &sl)) 757 return (B_FALSE); 758 setbltype(&sl, SUN_SL_ID); 759 760 /* 761 * If the source was unlabeled, then flag as such, 762 * (since CIPSO routers may add headers) 763 */ 764 765 if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL) 766 return (B_FALSE); 767 768 if (src_rhtp->tpc_tp.host_type == UNLABELED) 769 label_flags = TSLF_UNLABELED; 770 771 TPC_RELE(src_rhtp); 772 773 break; 774 775 case OPT_NONE: 776 /* 777 * Handle special cases that may not be labeled, even 778 * though the sending system may otherwise be configured as 779 * labeled. 780 * - IGMP 781 * - IPv4 ICMP Router Discovery 782 * - IPv6 Neighbor Discovery 783 * - IPsec ESP 784 */ 785 if (version == IPV4_VERSION) { 786 proto = ipha->ipha_protocol; 787 if (proto == IPPROTO_IGMP) 788 return (B_TRUE); 789 if (proto == IPPROTO_ICMP) { 790 const struct icmp *icmp = (const struct icmp *) 791 (mp->b_rptr + IPH_HDR_LENGTH(ipha)); 792 793 if ((uchar_t *)icmp + ICMP_MINLEN > mp->b_wptr) 794 return (B_FALSE); 795 if (icmp->icmp_type == ICMP_ROUTERADVERT || 796 icmp->icmp_type == ICMP_ROUTERSOLICIT) 797 return (B_TRUE); 798 } 799 } else { 800 proto = ip6h->ip6_nxt; 801 if (proto == IPPROTO_ICMPV6) { 802 const icmp6_t *icmp6 = (const icmp6_t *) 803 (mp->b_rptr + IPV6_HDR_LEN); 804 805 if ((uchar_t *)icmp6 + ICMP6_MINLEN > 806 mp->b_wptr) 807 return (B_FALSE); 808 if (icmp6->icmp6_type >= MLD_LISTENER_QUERY && 809 icmp6->icmp6_type <= ICMP6_MAX_INFO_TYPE) 810 return (B_TRUE); 811 } 812 } 813 814 /* 815 * Look up the tnrhtp database and get the implicit label 816 * that is associated with the sending host and attach 817 * it to the packet. 818 */ 819 if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL) 820 return (B_FALSE); 821 822 /* 823 * If peer is label-aware, mark as "implicit" rather than 824 * "unlabeled" to cause appropriate mac-exempt processing 825 * to happen. 826 */ 827 if (src_rhtp->tpc_tp.host_type == SUN_CIPSO) 828 label_flags = TSLF_IMPLICIT_IN; 829 else if (src_rhtp->tpc_tp.host_type == UNLABELED) 830 label_flags = TSLF_UNLABELED; 831 else { 832 DTRACE_PROBE2(tx__get__pkt__label, char *, 833 "template(1) has unknown hosttype", 834 tsol_tpc_t *, src_rhtp); 835 } 836 837 838 if (!tsol_find_unlabeled_label(src_rhtp, &sl, &doi)) { 839 TPC_RELE(src_rhtp); 840 return (B_FALSE); 841 } 842 TPC_RELE(src_rhtp); 843 break; 844 845 default: 846 return (B_FALSE); 847 } 848 849 /* Make sure no other thread is messing with this mblk */ 850 ASSERT(DB_REF(mp) == 1); 851 /* Preserve db_cpid */ 852 credp = msg_extractcred(mp, &cpid); 853 if (credp == NULL) { 854 credp = newcred_from_bslabel(&sl, doi, KM_NOSLEEP); 855 } else { 856 cred_t *newcr; 857 858 newcr = copycred_from_bslabel(credp, &sl, doi, 859 KM_NOSLEEP); 860 crfree(credp); 861 credp = newcr; 862 } 863 if (credp == NULL) 864 return (B_FALSE); 865 866 crgetlabel(credp)->tsl_flags |= label_flags; 867 868 mblk_setcred(mp, credp, cpid); 869 crfree(credp); /* mblk has ref on cred */ 870 871 return (B_TRUE); 872 } 873 874 /* 875 * This routine determines whether the given packet should be accepted locally. 876 * It does a range/set check on the packet's label by looking up the given 877 * address in the remote host database. 878 */ 879 boolean_t 880 tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version, 881 boolean_t shared_addr, const conn_t *connp) 882 { 883 const cred_t *credp; 884 ts_label_t *plabel, *conn_plabel; 885 tsol_tpc_t *tp; 886 boolean_t retv; 887 const bslabel_t *label, *conn_label; 888 889 /* 890 * The cases in which this can happen are: 891 * - IPv6 Router Alert, where ip_rput_data_v6 deliberately skips 892 * over the label attachment process. 893 * - MLD output looped-back to ourselves. 894 * - IPv4 Router Discovery, where tsol_get_pkt_label intentionally 895 * avoids the labeling process. 896 * We trust that all valid paths in the code set the cred pointer when 897 * needed. 898 */ 899 if ((credp = msg_getcred(mp, NULL)) == NULL) 900 return (B_TRUE); 901 902 /* 903 * If this packet is from the inside (not a remote host) and has the 904 * same zoneid as the selected destination, then no checks are 905 * necessary. Membership in the zone is enough proof. This is 906 * intended to be a hot path through this function. 907 */ 908 if (!crisremote(credp) && 909 crgetzone(credp) == crgetzone(connp->conn_cred)) 910 return (B_TRUE); 911 912 plabel = crgetlabel(credp); 913 conn_plabel = crgetlabel(connp->conn_cred); 914 ASSERT(plabel != NULL && conn_plabel != NULL); 915 916 label = label2bslabel(plabel); 917 conn_label = label2bslabel(crgetlabel(connp->conn_cred)); 918 919 920 /* 921 * Implicitly labeled packets from label-aware sources 922 * go only to privileged receivers 923 */ 924 if ((plabel->tsl_flags & TSLF_IMPLICIT_IN) && 925 (connp->conn_mac_mode != CONN_MAC_IMPLICIT)) { 926 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac_impl, 927 char *, 928 "implicitly labeled packet mp(1) for conn(2) " 929 "which isn't in implicit mac mode", 930 mblk_t *, mp, conn_t *, connp); 931 932 return (B_FALSE); 933 } 934 935 936 /* 937 * MLPs are always validated using the range and set of the local 938 * address, even when the remote host is unlabeled. 939 */ 940 if (connp->conn_mlp_type == mlptBoth || 941 /* LINTED: no consequent */ 942 connp->conn_mlp_type == (shared_addr ? mlptShared : mlptPrivate)) { 943 ; 944 945 /* 946 * If this is a packet from an unlabeled sender, then we must apply 947 * different rules. If the label is equal to the zone's label, then 948 * it's allowed. If it's not equal, but the zone is either the global 949 * zone or the label is dominated by the zone's label, then allow it 950 * as long as it's in the range configured for the destination. 951 */ 952 } else if (plabel->tsl_flags & TSLF_UNLABELED) { 953 if (plabel->tsl_doi == conn_plabel->tsl_doi && 954 blequal(label, conn_label)) 955 return (B_TRUE); 956 957 /* 958 * conn_zoneid is global for an exclusive stack, thus we use 959 * conn_cred to get the zoneid 960 */ 961 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) || 962 (crgetzoneid(connp->conn_cred) != GLOBAL_ZONEID && 963 (plabel->tsl_doi != conn_plabel->tsl_doi || 964 !bldominates(conn_label, label)))) { 965 DTRACE_PROBE3( 966 tx__ip__log__drop__receivelocal__mac_unl, 967 char *, 968 "unlabeled packet mp(1) fails mac for conn(2)", 969 mblk_t *, mp, conn_t *, connp); 970 return (B_FALSE); 971 } 972 973 /* 974 * If this is a packet from a labeled sender, verify the 975 * label on the packet matches the connection label. 976 */ 977 } else { 978 if (plabel->tsl_doi != conn_plabel->tsl_doi || 979 !blequal(label, conn_label)) { 980 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac__slp, 981 char *, 982 "packet mp(1) failed label match to SLP conn(2)", 983 mblk_t *, mp, conn_t *, connp); 984 return (B_FALSE); 985 } 986 /* 987 * No further checks will be needed if this is a zone- 988 * specific address because (1) The process for bringing up 989 * the interface ensures the zone's label is within the zone- 990 * specific address's valid label range; (2) For cases where 991 * the conn is bound to the unspecified addresses, ip fanout 992 * logic ensures conn's zoneid equals the dest addr's zoneid; 993 * (3) Mac-exempt and mlp logic above already handle all 994 * cases where the zone label may not be the same as the 995 * conn label. 996 */ 997 if (!shared_addr) 998 return (B_TRUE); 999 } 1000 1001 tp = find_tpc(addr, version, B_FALSE); 1002 if (tp == NULL) { 1003 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__no__tnr, 1004 char *, "dropping mp(1), host(2) lacks entry", 1005 mblk_t *, mp, void *, addr); 1006 return (B_FALSE); 1007 } 1008 1009 /* 1010 * The local host address should not be unlabeled at this point. The 1011 * only way this can happen is that the destination isn't unicast. We 1012 * assume that the packet should not have had a label, and thus should 1013 * have been handled by the TSLF_UNLABELED logic above. 1014 */ 1015 if (tp->tpc_tp.host_type == UNLABELED) { 1016 retv = B_FALSE; 1017 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__flag, char *, 1018 "mp(1) unlabeled source, but tp is not unlabeled.", 1019 mblk_t *, mp, tsol_tpc_t *, tp); 1020 1021 } else if (tp->tpc_tp.host_type != SUN_CIPSO) { 1022 retv = B_FALSE; 1023 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__tptype, char *, 1024 "delivering mp(1), found unrecognized tpc(2) type.", 1025 mblk_t *, mp, tsol_tpc_t *, tp); 1026 1027 } else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) { 1028 retv = B_FALSE; 1029 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *, 1030 "mp(1) could not be delievered to tp(2), doi mismatch", 1031 mblk_t *, mp, tsol_tpc_t *, tp); 1032 1033 } else if (!_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) && 1034 !blinlset(label, tp->tpc_tp.tp_sl_set_cipso)) { 1035 retv = B_FALSE; 1036 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *, 1037 "mp(1) could not be delievered to tp(2), bad mac", 1038 mblk_t *, mp, tsol_tpc_t *, tp); 1039 } else { 1040 retv = B_TRUE; 1041 } 1042 1043 TPC_RELE(tp); 1044 1045 return (retv); 1046 } 1047 1048 boolean_t 1049 tsol_can_accept_raw(mblk_t *mp, boolean_t check_host) 1050 { 1051 ts_label_t *plabel = NULL; 1052 tsol_tpc_t *src_rhtp, *dst_rhtp; 1053 boolean_t retv; 1054 cred_t *credp; 1055 1056 credp = msg_getcred(mp, NULL); 1057 if (credp != NULL) 1058 plabel = crgetlabel(credp); 1059 1060 /* We are bootstrapping or the internal template was never deleted */ 1061 if (plabel == NULL) 1062 return (B_TRUE); 1063 1064 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { 1065 ipha_t *ipha = (ipha_t *)mp->b_rptr; 1066 1067 src_rhtp = find_tpc(&ipha->ipha_src, IPV4_VERSION, 1068 B_FALSE); 1069 if (src_rhtp == NULL) 1070 return (B_FALSE); 1071 dst_rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, 1072 B_FALSE); 1073 } else { 1074 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1075 1076 src_rhtp = find_tpc(&ip6h->ip6_src, IPV6_VERSION, 1077 B_FALSE); 1078 if (src_rhtp == NULL) 1079 return (B_FALSE); 1080 dst_rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, 1081 B_FALSE); 1082 } 1083 if (dst_rhtp == NULL) { 1084 TPC_RELE(src_rhtp); 1085 return (B_FALSE); 1086 } 1087 1088 if (label2doi(plabel) != src_rhtp->tpc_tp.tp_doi) { 1089 retv = B_FALSE; 1090 1091 /* 1092 * Check that the packet's label is in the correct range for labeled 1093 * sender, or is equal to the default label for unlabeled sender. 1094 */ 1095 } else if ((src_rhtp->tpc_tp.host_type != UNLABELED && 1096 !_blinrange(label2bslabel(plabel), 1097 &src_rhtp->tpc_tp.tp_sl_range_cipso) && 1098 !blinlset(label2bslabel(plabel), 1099 src_rhtp->tpc_tp.tp_sl_set_cipso)) || 1100 (src_rhtp->tpc_tp.host_type == UNLABELED && 1101 !blequal(&plabel->tsl_label, &src_rhtp->tpc_tp.tp_def_label))) { 1102 retv = B_FALSE; 1103 1104 } else if (check_host) { 1105 retv = B_TRUE; 1106 1107 /* 1108 * Until we have SL range in the Zone structure, pass it 1109 * when our own address lookup returned an internal entry. 1110 */ 1111 } else switch (dst_rhtp->tpc_tp.host_type) { 1112 case UNLABELED: 1113 retv = B_TRUE; 1114 break; 1115 1116 case SUN_CIPSO: 1117 retv = _blinrange(label2bslabel(plabel), 1118 &dst_rhtp->tpc_tp.tp_sl_range_cipso) || 1119 blinlset(label2bslabel(plabel), 1120 dst_rhtp->tpc_tp.tp_sl_set_cipso); 1121 break; 1122 1123 default: 1124 retv = B_FALSE; 1125 } 1126 TPC_RELE(src_rhtp); 1127 TPC_RELE(dst_rhtp); 1128 return (retv); 1129 } 1130 1131 /* 1132 * This routine determines whether a response to a failed packet delivery or 1133 * connection should be sent back. By default, the policy is to allow such 1134 * messages to be sent at all times, as these messages reveal little useful 1135 * information and are healthy parts of TCP/IP networking. 1136 * 1137 * If tsol_strict_error is set, then we do strict tests: if the packet label is 1138 * within the label range/set of this host/zone, return B_TRUE; otherwise 1139 * return B_FALSE, which causes the packet to be dropped silently. 1140 * 1141 * Note that tsol_get_pkt_label will cause the packet to drop if the sender is 1142 * marked as labeled in the remote host database, but the packet lacks a label. 1143 * This means that we don't need to do a lookup on the source; the 1144 * TSLF_UNLABELED flag is sufficient. 1145 */ 1146 boolean_t 1147 tsol_can_reply_error(const mblk_t *mp) 1148 { 1149 ts_label_t *plabel = NULL; 1150 tsol_tpc_t *rhtp; 1151 const ipha_t *ipha; 1152 const ip6_t *ip6h; 1153 boolean_t retv; 1154 bslabel_t *pktbs; 1155 cred_t *credp; 1156 1157 /* Caller must pull up at least the IP header */ 1158 ASSERT(MBLKL(mp) >= (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ? 1159 sizeof (*ipha) : sizeof (*ip6h))); 1160 1161 if (!tsol_strict_error) 1162 return (B_TRUE); 1163 1164 credp = msg_getcred(mp, NULL); 1165 if (credp != NULL) 1166 plabel = crgetlabel(credp); 1167 1168 /* We are bootstrapping or the internal template was never deleted */ 1169 if (plabel == NULL) 1170 return (B_TRUE); 1171 1172 if (plabel->tsl_flags & TSLF_IMPLICIT_IN) { 1173 DTRACE_PROBE3(tx__ip__log__drop__replyerror__unresolved__label, 1174 char *, 1175 "cannot send error report for packet mp(1) with " 1176 "unresolved security label sl(2)", 1177 mblk_t *, mp, ts_label_t *, plabel); 1178 return (B_FALSE); 1179 } 1180 1181 1182 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { 1183 ipha = (const ipha_t *)mp->b_rptr; 1184 rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, B_FALSE); 1185 } else { 1186 ip6h = (const ip6_t *)mp->b_rptr; 1187 rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, B_FALSE); 1188 } 1189 1190 if (rhtp == NULL || label2doi(plabel) != rhtp->tpc_tp.tp_doi) { 1191 retv = B_FALSE; 1192 } else { 1193 /* 1194 * If we're in the midst of forwarding, then the destination 1195 * address might not be labeled. In that case, allow unlabeled 1196 * packets through only if the default label is the same, and 1197 * labeled ones if they dominate. 1198 */ 1199 pktbs = label2bslabel(plabel); 1200 switch (rhtp->tpc_tp.host_type) { 1201 case UNLABELED: 1202 if (plabel->tsl_flags & TSLF_UNLABELED) { 1203 retv = blequal(pktbs, 1204 &rhtp->tpc_tp.tp_def_label); 1205 } else { 1206 retv = bldominates(pktbs, 1207 &rhtp->tpc_tp.tp_def_label); 1208 } 1209 break; 1210 1211 case SUN_CIPSO: 1212 retv = _blinrange(pktbs, 1213 &rhtp->tpc_tp.tp_sl_range_cipso) || 1214 blinlset(pktbs, rhtp->tpc_tp.tp_sl_set_cipso); 1215 break; 1216 1217 default: 1218 retv = B_FALSE; 1219 break; 1220 } 1221 } 1222 1223 if (rhtp != NULL) 1224 TPC_RELE(rhtp); 1225 1226 return (retv); 1227 } 1228 1229 /* 1230 * Finds the zone associated with the given packet. Returns GLOBAL_ZONEID if 1231 * the zone cannot be located. 1232 * 1233 * This is used by the classifier when the packet matches an ALL_ZONES IRE, and 1234 * there's no MLP defined. 1235 * 1236 * Note that we assume that this is only invoked in the ALL_ZONES case. 1237 * Handling other cases would require handle exclusive stack zones where either 1238 * this routine or the callers would have to map from 1239 * the zoneid (zone->zone_id) to what IP uses in conn_zoneid etc. 1240 */ 1241 zoneid_t 1242 tsol_packet_to_zoneid(const mblk_t *mp) 1243 { 1244 cred_t *cr = msg_getcred(mp, NULL); 1245 zone_t *zone; 1246 ts_label_t *label; 1247 1248 if (cr != NULL) { 1249 if ((label = crgetlabel(cr)) != NULL) { 1250 zone = zone_find_by_label(label); 1251 if (zone != NULL) { 1252 zoneid_t zoneid = zone->zone_id; 1253 1254 zone_rele(zone); 1255 return (zoneid); 1256 } 1257 } 1258 } 1259 return (GLOBAL_ZONEID); 1260 } 1261 1262 int 1263 tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl) 1264 { 1265 int error = 0; 1266 tsol_ire_gw_secattr_t *attrp = NULL; 1267 tsol_tnrhc_t *gw_rhc = NULL; 1268 tsol_gcgrp_t *gcgrp = NULL; 1269 tsol_gc_t *gc = NULL; 1270 in_addr_t ga_addr4; 1271 void *paddr = NULL; 1272 1273 /* Not in Trusted mode or IRE is local/loopback/broadcast/interface */ 1274 if (!is_system_labeled() || 1275 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST | 1276 IRE_INTERFACE))) 1277 goto done; 1278 1279 /* 1280 * If we don't have a label to compare with, or the IRE does not 1281 * contain any gateway security attributes, there's not much that 1282 * we can do. We let the former case pass, and the latter fail, 1283 * since the IRE doesn't qualify for a match due to the lack of 1284 * security attributes. 1285 */ 1286 if (tsl == NULL || ire->ire_gw_secattr == NULL) { 1287 if (tsl != NULL) { 1288 DTRACE_PROBE3( 1289 tx__ip__log__drop__irematch__nogwsec, char *, 1290 "ire(1) lacks ire_gw_secattr when matching " 1291 "label(2)", ire_t *, ire, ts_label_t *, tsl); 1292 error = EACCES; 1293 } 1294 goto done; 1295 } 1296 1297 attrp = ire->ire_gw_secattr; 1298 1299 /* 1300 * The possible lock order scenarios related to the tsol gateway 1301 * attribute locks are documented at the beginning of ip.c in the 1302 * lock order scenario section. 1303 */ 1304 mutex_enter(&attrp->igsa_lock); 1305 1306 /* 1307 * Depending on the IRE type (prefix vs. cache), we seek the group 1308 * structure which contains all security credentials of the gateway. 1309 * A prefix IRE is associated with at most one gateway credential, 1310 * while a cache IRE is associated with every credentials that the 1311 * gateway has. 1312 */ 1313 if ((gc = attrp->igsa_gc) != NULL) { /* prefix */ 1314 gcgrp = gc->gc_grp; 1315 ASSERT(gcgrp != NULL); 1316 rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); 1317 } else if ((gcgrp = attrp->igsa_gcgrp) != NULL) { /* cache */ 1318 rw_enter(&gcgrp->gcgrp_rwlock, RW_READER); 1319 gc = gcgrp->gcgrp_head; 1320 if (gc == NULL) { 1321 /* gc group is empty, so the drop lock now */ 1322 ASSERT(gcgrp->gcgrp_count == 0); 1323 rw_exit(&gcgrp->gcgrp_rwlock); 1324 gcgrp = NULL; 1325 } 1326 } 1327 1328 if (gcgrp != NULL) 1329 GCGRP_REFHOLD(gcgrp); 1330 1331 if ((gw_rhc = attrp->igsa_rhc) != NULL) { 1332 /* 1333 * If our cached entry has grown stale, then discard it so we 1334 * can get a new one. 1335 */ 1336 if (gw_rhc->rhc_invalid || gw_rhc->rhc_tpc->tpc_invalid) { 1337 TNRHC_RELE(gw_rhc); 1338 attrp->igsa_rhc = gw_rhc = NULL; 1339 } else { 1340 TNRHC_HOLD(gw_rhc) 1341 } 1342 } 1343 1344 /* Last attempt at loading the template had failed; try again */ 1345 if (gw_rhc == NULL) { 1346 if (gcgrp != NULL) { 1347 tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr; 1348 1349 if (ire->ire_ipversion == IPV4_VERSION) { 1350 ASSERT(ga->ga_af == AF_INET); 1351 IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4); 1352 paddr = &ga_addr4; 1353 } else { 1354 ASSERT(ga->ga_af == AF_INET6); 1355 paddr = &ga->ga_addr; 1356 } 1357 } else if (ire->ire_ipversion == IPV6_VERSION && 1358 !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) { 1359 paddr = &ire->ire_gateway_addr_v6; 1360 } else if (ire->ire_ipversion == IPV4_VERSION && 1361 ire->ire_gateway_addr != INADDR_ANY) { 1362 paddr = &ire->ire_gateway_addr; 1363 } 1364 1365 /* We've found a gateway address to do the template lookup */ 1366 if (paddr != NULL) { 1367 ASSERT(gw_rhc == NULL); 1368 gw_rhc = find_rhc(paddr, ire->ire_ipversion, B_FALSE); 1369 if (gw_rhc != NULL) { 1370 /* 1371 * Note that if the lookup above returned an 1372 * internal template, we'll use it for the 1373 * time being, and do another lookup next 1374 * time around. 1375 */ 1376 /* Another thread has loaded the template? */ 1377 if (attrp->igsa_rhc != NULL) { 1378 TNRHC_RELE(gw_rhc) 1379 /* reload, it could be different */ 1380 gw_rhc = attrp->igsa_rhc; 1381 } else { 1382 attrp->igsa_rhc = gw_rhc; 1383 } 1384 /* 1385 * Hold an extra reference just like we did 1386 * above prior to dropping the igsa_lock. 1387 */ 1388 TNRHC_HOLD(gw_rhc) 1389 } 1390 } 1391 } 1392 1393 mutex_exit(&attrp->igsa_lock); 1394 /* Gateway template not found */ 1395 if (gw_rhc == NULL) { 1396 /* 1397 * If destination address is directly reachable through an 1398 * interface rather than through a learned route, pass it. 1399 */ 1400 if (paddr != NULL) { 1401 DTRACE_PROBE3( 1402 tx__ip__log__drop__irematch__nogwtmpl, char *, 1403 "ire(1), label(2) off-link with no gw_rhc", 1404 ire_t *, ire, ts_label_t *, tsl); 1405 error = EINVAL; 1406 } 1407 goto done; 1408 } 1409 1410 if (gc != NULL) { 1411 tsol_gcdb_t *gcdb; 1412 /* 1413 * In the case of IRE_CACHE we've got one or more gateway 1414 * security credentials to compare against the passed in label. 1415 * Perform label range comparison against each security 1416 * credential of the gateway. In the case of a prefix ire 1417 * we need to match against the security attributes of 1418 * just the route itself, so the loop is executed only once. 1419 */ 1420 ASSERT(gcgrp != NULL); 1421 do { 1422 gcdb = gc->gc_db; 1423 if (tsl->tsl_doi == gcdb->gcdb_doi && 1424 _blinrange(&tsl->tsl_label, &gcdb->gcdb_slrange)) 1425 break; 1426 if (ire->ire_type == IRE_CACHE) 1427 gc = gc->gc_next; 1428 else 1429 gc = NULL; 1430 } while (gc != NULL); 1431 1432 if (gc == NULL) { 1433 DTRACE_PROBE3( 1434 tx__ip__log__drop__irematch__nogcmatched, 1435 char *, "ire(1), tsl(2): all gc failed match", 1436 ire_t *, ire, ts_label_t *, tsl); 1437 error = EACCES; 1438 } 1439 } else { 1440 /* 1441 * We didn't find any gateway credentials in the IRE 1442 * attributes; fall back to the gateway's template for 1443 * label range checks, if we are required to do so. 1444 */ 1445 ASSERT(gw_rhc != NULL); 1446 switch (gw_rhc->rhc_tpc->tpc_tp.host_type) { 1447 case SUN_CIPSO: 1448 if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi || 1449 (!_blinrange(&tsl->tsl_label, 1450 &gw_rhc->rhc_tpc->tpc_tp.tp_sl_range_cipso) && 1451 !blinlset(&tsl->tsl_label, 1452 gw_rhc->rhc_tpc->tpc_tp.tp_sl_set_cipso))) { 1453 error = EACCES; 1454 DTRACE_PROBE4( 1455 tx__ip__log__drop__irematch__deftmpl, 1456 char *, "ire(1), tsl(2), gw_rhc(3) " 1457 "failed match (cipso gw)", 1458 ire_t *, ire, ts_label_t *, tsl, 1459 tsol_tnrhc_t *, gw_rhc); 1460 } 1461 break; 1462 1463 case UNLABELED: 1464 if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi || 1465 (!_blinrange(&tsl->tsl_label, 1466 &gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_range) && 1467 !blinlset(&tsl->tsl_label, 1468 gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_set))) { 1469 error = EACCES; 1470 DTRACE_PROBE4( 1471 tx__ip__log__drop__irematch__deftmpl, 1472 char *, "ire(1), tsl(2), gw_rhc(3) " 1473 "failed match (unlabeled gw)", 1474 ire_t *, ire, ts_label_t *, tsl, 1475 tsol_tnrhc_t *, gw_rhc); 1476 } 1477 break; 1478 } 1479 } 1480 1481 done: 1482 1483 if (gcgrp != NULL) { 1484 rw_exit(&gcgrp->gcgrp_rwlock); 1485 GCGRP_REFRELE(gcgrp); 1486 } 1487 1488 if (gw_rhc != NULL) 1489 TNRHC_RELE(gw_rhc) 1490 1491 return (error); 1492 } 1493 1494 /* 1495 * Performs label accreditation checks for packet forwarding. 1496 * 1497 * Returns a pointer to the modified mblk if allowed for forwarding, 1498 * or NULL if the packet must be dropped. 1499 */ 1500 mblk_t * 1501 tsol_ip_forward(ire_t *ire, mblk_t *mp) 1502 { 1503 tsol_ire_gw_secattr_t *attrp = NULL; 1504 ipha_t *ipha; 1505 ip6_t *ip6h; 1506 const void *pdst; 1507 const void *psrc; 1508 boolean_t off_link; 1509 tsol_tpc_t *dst_rhtp, *gw_rhtp; 1510 tsol_ip_label_t label_type; 1511 uchar_t *opt_ptr = NULL; 1512 ts_label_t *tsl; 1513 uint8_t proto; 1514 int af, adjust; 1515 uint16_t iplen; 1516 boolean_t need_tpc_rele = B_FALSE; 1517 ipaddr_t *gw; 1518 ip_stack_t *ipst = ire->ire_ipst; 1519 cred_t *credp; 1520 pid_t pid; 1521 1522 ASSERT(ire != NULL && mp != NULL); 1523 ASSERT(ire->ire_stq != NULL); 1524 1525 af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6; 1526 1527 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) { 1528 ASSERT(ire->ire_ipversion == IPV4_VERSION); 1529 ipha = (ipha_t *)mp->b_rptr; 1530 psrc = &ipha->ipha_src; 1531 pdst = &ipha->ipha_dst; 1532 proto = ipha->ipha_protocol; 1533 1534 /* 1535 * off_link is TRUE if destination not directly reachable. 1536 * Surya note: we avoid creation of per-dst IRE_CACHE entries 1537 * for forwarded packets, so we set off_link to be TRUE 1538 * if the packet dst is different from the ire_addr of 1539 * the ire for the nexthop. 1540 */ 1541 off_link = ((ipha->ipha_dst != ire->ire_addr) || 1542 (ire->ire_gateway_addr != INADDR_ANY)); 1543 if (!tsol_get_option_v4(mp, &label_type, &opt_ptr)) 1544 return (NULL); 1545 } else { 1546 ASSERT(ire->ire_ipversion == IPV6_VERSION); 1547 ip6h = (ip6_t *)mp->b_rptr; 1548 psrc = &ip6h->ip6_src; 1549 pdst = &ip6h->ip6_dst; 1550 proto = ip6h->ip6_nxt; 1551 1552 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP && 1553 proto != IPPROTO_ICMPV6) { 1554 uint8_t *nexthdrp; 1555 uint16_t hdr_len; 1556 1557 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, 1558 &nexthdrp)) { 1559 /* malformed packet; drop it */ 1560 return (NULL); 1561 } 1562 proto = *nexthdrp; 1563 } 1564 1565 /* destination not directly reachable? */ 1566 off_link = !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6); 1567 if (!tsol_get_option_v6(mp, &label_type, &opt_ptr)) 1568 return (NULL); 1569 } 1570 1571 if ((tsl = msg_getlabel(mp)) == NULL) 1572 return (mp); 1573 1574 if (tsl->tsl_flags & TSLF_IMPLICIT_IN) { 1575 DTRACE_PROBE3(tx__ip__log__drop__forward__unresolved__label, 1576 char *, 1577 "cannot forward packet mp(1) with unresolved " 1578 "security label sl(2)", 1579 mblk_t *, mp, ts_label_t *, tsl); 1580 1581 return (NULL); 1582 } 1583 1584 1585 ASSERT(psrc != NULL && pdst != NULL); 1586 dst_rhtp = find_tpc(pdst, ire->ire_ipversion, B_FALSE); 1587 1588 if (dst_rhtp == NULL) { 1589 /* 1590 * Without a template we do not know if forwarding 1591 * violates MAC 1592 */ 1593 DTRACE_PROBE3(tx__ip__log__drop__forward__nodst, char *, 1594 "mp(1) dropped, no template for destination ip4|6(2)", 1595 mblk_t *, mp, void *, pdst); 1596 return (NULL); 1597 } 1598 1599 /* 1600 * Gateway template must have existed for off-link destinations, 1601 * since tsol_ire_match_gwattr has ensured such condition. 1602 */ 1603 if (ire->ire_ipversion == IPV4_VERSION && off_link) { 1604 /* 1605 * Surya note: first check if we can get the gw_rhtp from 1606 * the ire_gw_secattr->igsa_rhc; if this is null, then 1607 * do a lookup based on the ire_addr (address of gw) 1608 */ 1609 if (ire->ire_gw_secattr != NULL && 1610 ire->ire_gw_secattr->igsa_rhc != NULL) { 1611 attrp = ire->ire_gw_secattr; 1612 gw_rhtp = attrp->igsa_rhc->rhc_tpc; 1613 } else { 1614 /* 1615 * use the ire_addr if this is the IRE_CACHE of nexthop 1616 */ 1617 gw = (ire->ire_gateway_addr == NULL? &ire->ire_addr : 1618 &ire->ire_gateway_addr); 1619 gw_rhtp = find_tpc(gw, ire->ire_ipversion, B_FALSE); 1620 need_tpc_rele = B_TRUE; 1621 } 1622 if (gw_rhtp == NULL) { 1623 DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *, 1624 "mp(1) dropped, no gateway in ire attributes(2)", 1625 mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp); 1626 mp = NULL; 1627 goto keep_label; 1628 } 1629 } 1630 if (ire->ire_ipversion == IPV6_VERSION && 1631 ((attrp = ire->ire_gw_secattr) == NULL || attrp->igsa_rhc == NULL || 1632 (gw_rhtp = attrp->igsa_rhc->rhc_tpc) == NULL) && off_link) { 1633 DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *, 1634 "mp(1) dropped, no gateway in ire attributes(2)", 1635 mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp); 1636 mp = NULL; 1637 goto keep_label; 1638 } 1639 1640 /* 1641 * Check that the label for the packet is acceptable 1642 * by destination host; otherwise, drop it. 1643 */ 1644 switch (dst_rhtp->tpc_tp.host_type) { 1645 case SUN_CIPSO: 1646 if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi || 1647 (!_blinrange(&tsl->tsl_label, 1648 &dst_rhtp->tpc_tp.tp_sl_range_cipso) && 1649 !blinlset(&tsl->tsl_label, 1650 dst_rhtp->tpc_tp.tp_sl_set_cipso))) { 1651 DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *, 1652 "labeled packet mp(1) dropped, label(2) fails " 1653 "destination(3) accredation check", 1654 mblk_t *, mp, ts_label_t *, tsl, 1655 tsol_tpc_t *, dst_rhtp); 1656 mp = NULL; 1657 goto keep_label; 1658 } 1659 break; 1660 1661 1662 case UNLABELED: 1663 if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi || 1664 !blequal(&dst_rhtp->tpc_tp.tp_def_label, 1665 &tsl->tsl_label)) { 1666 DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *, 1667 "unlabeled packet mp(1) dropped, label(2) fails " 1668 "destination(3) accredation check", 1669 mblk_t *, mp, ts_label_t *, tsl, 1670 tsol_tpc_t *, dst_rhtp); 1671 mp = NULL; 1672 goto keep_label; 1673 } 1674 break; 1675 } 1676 if (label_type == OPT_CIPSO) { 1677 /* 1678 * We keep the label on any of the following cases: 1679 * 1680 * 1. The destination is labeled (on/off-link). 1681 * 2. The unlabeled destination is off-link, 1682 * and the next hop gateway is labeled. 1683 */ 1684 if (dst_rhtp->tpc_tp.host_type != UNLABELED || 1685 (off_link && 1686 gw_rhtp->tpc_tp.host_type != UNLABELED)) 1687 goto keep_label; 1688 1689 /* 1690 * Strip off the CIPSO option from the packet because: the 1691 * unlabeled destination host is directly reachable through 1692 * an interface (on-link); or, the unlabeled destination host 1693 * is not directly reachable (off-link), and the next hop 1694 * gateway is unlabeled. 1695 */ 1696 adjust = (af == AF_INET) ? tsol_remove_secopt(ipha, MBLKL(mp)) : 1697 tsol_remove_secopt_v6(ip6h, MBLKL(mp)); 1698 1699 ASSERT(adjust <= 0); 1700 if (adjust != 0) { 1701 1702 /* adjust is negative */ 1703 ASSERT((mp->b_wptr + adjust) >= mp->b_rptr); 1704 mp->b_wptr += adjust; 1705 1706 if (af == AF_INET) { 1707 ipha = (ipha_t *)mp->b_rptr; 1708 iplen = ntohs(ipha->ipha_length) + adjust; 1709 ipha->ipha_length = htons(iplen); 1710 ipha->ipha_hdr_checksum = 0; 1711 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1712 } 1713 DTRACE_PROBE3(tx__ip__log__info__forward__adjust, 1714 char *, 1715 "mp(1) adjusted(2) for CIPSO option removal", 1716 mblk_t *, mp, int, adjust); 1717 } 1718 goto keep_label; 1719 } 1720 1721 ASSERT(label_type == OPT_NONE); 1722 ASSERT(dst_rhtp != NULL); 1723 1724 /* 1725 * We need to add CIPSO option if the destination or the next hop 1726 * gateway is labeled. Otherwise, pass the packet as is. 1727 */ 1728 if (dst_rhtp->tpc_tp.host_type == UNLABELED && 1729 (!off_link || gw_rhtp->tpc_tp.host_type == UNLABELED)) 1730 goto keep_label; 1731 1732 1733 credp = msg_getcred(mp, &pid); 1734 if ((af == AF_INET && 1735 tsol_check_label(credp, &mp, CONN_MAC_DEFAULT, ipst, pid) != 0) || 1736 (af == AF_INET6 && 1737 tsol_check_label_v6(credp, &mp, CONN_MAC_DEFAULT, ipst, 1738 pid) != 0)) { 1739 mp = NULL; 1740 goto keep_label; 1741 } 1742 1743 if (af == AF_INET) { 1744 ipha = (ipha_t *)mp->b_rptr; 1745 ipha->ipha_hdr_checksum = 0; 1746 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1747 } 1748 1749 keep_label: 1750 TPC_RELE(dst_rhtp); 1751 if (need_tpc_rele && gw_rhtp != NULL) 1752 TPC_RELE(gw_rhtp); 1753 return (mp); 1754 } 1755 1756 /* 1757 * Name: tsol_pmtu_adjust() 1758 * 1759 * Returns the adjusted mtu after removing security option. 1760 * Removes/subtracts the option if the packet's cred indicates an unlabeled 1761 * sender or if pkt_diff indicates this system enlarged the packet. 1762 */ 1763 uint32_t 1764 tsol_pmtu_adjust(mblk_t *mp, uint32_t mtu, int pkt_diff, int af) 1765 { 1766 int label_adj = 0; 1767 uint32_t min_mtu = IP_MIN_MTU; 1768 tsol_tpc_t *src_rhtp; 1769 void *src; 1770 1771 /* 1772 * Note: label_adj is non-positive, indicating the number of 1773 * bytes removed by removing the security option from the 1774 * header. 1775 */ 1776 if (af == AF_INET6) { 1777 ip6_t *ip6h; 1778 1779 min_mtu = IPV6_MIN_MTU; 1780 ip6h = (ip6_t *)mp->b_rptr; 1781 src = &ip6h->ip6_src; 1782 if ((src_rhtp = find_tpc(src, IPV6_VERSION, B_FALSE)) == NULL) 1783 return (mtu); 1784 if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED) { 1785 label_adj = tsol_remove_secopt_v6( 1786 (ip6_t *)mp->b_rptr, MBLKL(mp)); 1787 } 1788 } else { 1789 ipha_t *ipha; 1790 1791 ASSERT(af == AF_INET); 1792 ipha = (ipha_t *)mp->b_rptr; 1793 src = &ipha->ipha_src; 1794 if ((src_rhtp = find_tpc(src, IPV4_VERSION, B_FALSE)) == NULL) 1795 return (mtu); 1796 if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED) 1797 label_adj = tsol_remove_secopt( 1798 (ipha_t *)mp->b_rptr, MBLKL(mp)); 1799 } 1800 /* 1801 * Make pkt_diff non-negative and the larger of the bytes 1802 * previously added (if any) or just removed, since label 1803 * addition + subtraction may not be completely idempotent. 1804 */ 1805 if (pkt_diff < -label_adj) 1806 pkt_diff = -label_adj; 1807 if (pkt_diff > 0 && pkt_diff < mtu) 1808 mtu -= pkt_diff; 1809 1810 TPC_RELE(src_rhtp); 1811 return (MAX(mtu, min_mtu)); 1812 } 1813 1814 /* 1815 * Name: tsol_rtsa_init() 1816 * 1817 * Normal: Sanity checks on the route security attributes provided by 1818 * user. Convert it into a route security parameter list to 1819 * be returned to caller. 1820 * 1821 * Output: EINVAL if bad security attributes in the routing message 1822 * ENOMEM if unable to allocate data structures 1823 * 0 otherwise. 1824 * 1825 * Note: On input, cp must point to the end of any addresses in 1826 * the rt_msghdr_t structure. 1827 */ 1828 int 1829 tsol_rtsa_init(rt_msghdr_t *rtm, tsol_rtsecattr_t *sp, caddr_t cp) 1830 { 1831 uint_t sacnt; 1832 int err; 1833 caddr_t lim; 1834 tsol_rtsecattr_t *tp; 1835 1836 ASSERT((cp >= (caddr_t)&rtm[1]) && sp != NULL); 1837 1838 /* 1839 * In theory, we could accept as many security attributes configured 1840 * per route destination. However, the current design is limited 1841 * such that at most only one set security attributes is allowed to 1842 * be associated with a prefix IRE. We therefore assert for now. 1843 */ 1844 /* LINTED */ 1845 ASSERT(TSOL_RTSA_REQUEST_MAX == 1); 1846 1847 sp->rtsa_cnt = 0; 1848 lim = (caddr_t)rtm + rtm->rtm_msglen; 1849 ASSERT(cp <= lim); 1850 1851 if ((lim - cp) < sizeof (rtm_ext_t) || 1852 ((rtm_ext_t *)cp)->rtmex_type != RTMEX_GATEWAY_SECATTR) 1853 return (0); 1854 1855 if (((rtm_ext_t *)cp)->rtmex_len < sizeof (tsol_rtsecattr_t)) 1856 return (EINVAL); 1857 1858 cp += sizeof (rtm_ext_t); 1859 1860 if ((lim - cp) < sizeof (*tp) || 1861 (tp = (tsol_rtsecattr_t *)cp, (sacnt = tp->rtsa_cnt) == 0) || 1862 (lim - cp) < TSOL_RTSECATTR_SIZE(sacnt)) 1863 return (EINVAL); 1864 1865 /* 1866 * Trying to add route security attributes when system 1867 * labeling service is not available, or when user supllies 1868 * more than the maximum number of security attributes 1869 * allowed per request. 1870 */ 1871 if ((sacnt > 0 && !is_system_labeled()) || 1872 sacnt > TSOL_RTSA_REQUEST_MAX) 1873 return (EINVAL); 1874 1875 /* Ensure valid credentials */ 1876 if ((err = rtsa_validate(&((tsol_rtsecattr_t *)cp)-> 1877 rtsa_attr[0])) != 0) { 1878 cp += sizeof (*sp); 1879 return (err); 1880 } 1881 1882 bcopy(cp, sp, sizeof (*sp)); 1883 cp += sizeof (*sp); 1884 return (0); 1885 } 1886 1887 int 1888 tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc, 1889 tsol_gcgrp_t *gcgrp) 1890 { 1891 tsol_ire_gw_secattr_t *attrp; 1892 boolean_t exists = B_FALSE; 1893 in_addr_t ga_addr4; 1894 void *paddr = NULL; 1895 1896 ASSERT(ire != NULL); 1897 1898 /* 1899 * The only time that attrp can be NULL is when this routine is 1900 * called for the first time during the creation/initialization 1901 * of the corresponding IRE. It will only get cleared when the 1902 * IRE is deleted. 1903 */ 1904 if ((attrp = ire->ire_gw_secattr) == NULL) { 1905 attrp = ire_gw_secattr_alloc(KM_NOSLEEP); 1906 if (attrp == NULL) 1907 return (ENOMEM); 1908 ire->ire_gw_secattr = attrp; 1909 } else { 1910 exists = B_TRUE; 1911 mutex_enter(&attrp->igsa_lock); 1912 1913 if (attrp->igsa_rhc != NULL) { 1914 TNRHC_RELE(attrp->igsa_rhc); 1915 attrp->igsa_rhc = NULL; 1916 } 1917 1918 if (attrp->igsa_gc != NULL) 1919 GC_REFRELE(attrp->igsa_gc); 1920 if (attrp->igsa_gcgrp != NULL) 1921 GCGRP_REFRELE(attrp->igsa_gcgrp); 1922 } 1923 ASSERT(!exists || MUTEX_HELD(&attrp->igsa_lock)); 1924 1925 /* 1926 * References already held by caller and we keep them; 1927 * note that both gc and gcgrp may be set to NULL to 1928 * clear out igsa_gc and igsa_gcgrp, respectively. 1929 */ 1930 attrp->igsa_gc = gc; 1931 attrp->igsa_gcgrp = gcgrp; 1932 1933 if (gcgrp == NULL && gc != NULL) { 1934 gcgrp = gc->gc_grp; 1935 ASSERT(gcgrp != NULL); 1936 } 1937 1938 /* 1939 * Intialize the template for gateway; we use the gateway's 1940 * address found in either the passed in gateway credential 1941 * or group pointer, or the ire_gateway_addr{_v6} field. 1942 */ 1943 if (gcgrp != NULL) { 1944 tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr; 1945 1946 /* 1947 * Caller is holding a reference, and that we don't 1948 * need to hold any lock to access the address. 1949 */ 1950 if (ipversion == IPV4_VERSION) { 1951 ASSERT(ga->ga_af == AF_INET); 1952 IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4); 1953 paddr = &ga_addr4; 1954 } else { 1955 ASSERT(ga->ga_af == AF_INET6); 1956 paddr = &ga->ga_addr; 1957 } 1958 } else if (ipversion == IPV6_VERSION && 1959 !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) { 1960 paddr = &ire->ire_gateway_addr_v6; 1961 } else if (ipversion == IPV4_VERSION && 1962 ire->ire_gateway_addr != INADDR_ANY) { 1963 paddr = &ire->ire_gateway_addr; 1964 } 1965 1966 /* 1967 * Lookup the gateway template; note that we could get an internal 1968 * template here, which we cache anyway. During IRE matching, we'll 1969 * try to update this gateway template cache and hopefully get a 1970 * real one. 1971 */ 1972 if (paddr != NULL) { 1973 attrp->igsa_rhc = find_rhc(paddr, ipversion, B_FALSE); 1974 } 1975 1976 if (exists) 1977 mutex_exit(&attrp->igsa_lock); 1978 1979 return (0); 1980 } 1981 1982 /* 1983 * This function figures the type of MLP that we'll be using based on the 1984 * address that the user is binding and the zone. If the address is 1985 * unspecified, then we're looking at both private and shared. If it's one 1986 * of the zone's private addresses, then it's private only. If it's one 1987 * of the global addresses, then it's shared only. Multicast addresses are 1988 * treated same as unspecified address. 1989 * 1990 * If we can't figure out what it is, then return mlptSingle. That's actually 1991 * an error case. 1992 * 1993 * The callers are assume to pass in zone->zone_id and not the zoneid that 1994 * is stored in a conn_t (since the latter will be GLOBAL_ZONEID in an 1995 * exclusive stack zone). 1996 */ 1997 mlp_type_t 1998 tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr, 1999 ip_stack_t *ipst) 2000 { 2001 in_addr_t in4; 2002 ire_t *ire; 2003 ipif_t *ipif; 2004 zoneid_t addrzone; 2005 zoneid_t ip_zoneid; 2006 2007 ASSERT(addr != NULL); 2008 2009 /* 2010 * For exclusive stacks we set the zoneid to zero 2011 * to operate as if in the global zone for IRE and conn_t comparisons. 2012 */ 2013 if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 2014 ip_zoneid = GLOBAL_ZONEID; 2015 else 2016 ip_zoneid = zoneid; 2017 2018 if (version == IPV6_VERSION && 2019 IN6_IS_ADDR_V4MAPPED((const in6_addr_t *)addr)) { 2020 IN6_V4MAPPED_TO_IPADDR((const in6_addr_t *)addr, in4); 2021 addr = &in4; 2022 version = IPV4_VERSION; 2023 } 2024 2025 if (version == IPV4_VERSION) { 2026 in4 = *(const in_addr_t *)addr; 2027 if ((in4 == INADDR_ANY) || CLASSD(in4)) { 2028 return (mlptBoth); 2029 } 2030 ire = ire_cache_lookup(in4, ip_zoneid, NULL, ipst); 2031 } else { 2032 if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr) || 2033 IN6_IS_ADDR_MULTICAST((const in6_addr_t *)addr)) { 2034 return (mlptBoth); 2035 } 2036 ire = ire_cache_lookup_v6(addr, ip_zoneid, NULL, ipst); 2037 } 2038 /* 2039 * If we can't find the IRE, then we have to behave exactly like 2040 * ip_bind_laddr{,_v6}. That means looking up the IPIF so that users 2041 * can bind to addresses on "down" interfaces. 2042 * 2043 * If we can't find that either, then the bind is going to fail, so 2044 * just give up. Note that there's a miniscule chance that the address 2045 * is in transition, but we don't bother handling that. 2046 */ 2047 if (ire == NULL) { 2048 if (version == IPV4_VERSION) 2049 ipif = ipif_lookup_addr(*(const in_addr_t *)addr, NULL, 2050 ip_zoneid, NULL, NULL, NULL, NULL, ipst); 2051 else 2052 ipif = ipif_lookup_addr_v6((const in6_addr_t *)addr, 2053 NULL, ip_zoneid, NULL, NULL, NULL, NULL, ipst); 2054 if (ipif == NULL) { 2055 return (mlptSingle); 2056 } 2057 addrzone = ipif->ipif_zoneid; 2058 ipif_refrele(ipif); 2059 } else { 2060 addrzone = ire->ire_zoneid; 2061 ire_refrele(ire); 2062 } 2063 return (addrzone == ALL_ZONES ? mlptShared : mlptPrivate); 2064 } 2065 2066 /* 2067 * Since we are configuring local interfaces, and we know trusted 2068 * extension CDE requires local interfaces to be cipso host type in 2069 * order to function correctly, we'll associate a cipso template 2070 * to each local interface and let the interface come up. Configuring 2071 * a local interface to be "unlabeled" host type is a configuration error. 2072 * We'll override that error and make the interface host type to be cipso 2073 * here. 2074 * 2075 * The code is optimized for the usual "success" case and unwinds things on 2076 * error. We don't want to go to the trouble and expense of formatting the 2077 * interface name for the usual case where everything is configured correctly. 2078 */ 2079 boolean_t 2080 tsol_check_interface_address(const ipif_t *ipif) 2081 { 2082 tsol_tpc_t *tp; 2083 char addrbuf[INET6_ADDRSTRLEN]; 2084 int af; 2085 const void *addr; 2086 zone_t *zone; 2087 ts_label_t *plabel; 2088 const bslabel_t *label; 2089 char ifbuf[LIFNAMSIZ + 10]; 2090 const char *ifname; 2091 boolean_t retval; 2092 tsol_rhent_t rhent; 2093 netstack_t *ns = ipif->ipif_ill->ill_ipst->ips_netstack; 2094 2095 if (IN6_IS_ADDR_V4MAPPED(&ipif->ipif_v6lcl_addr)) { 2096 af = AF_INET; 2097 addr = &V4_PART_OF_V6(ipif->ipif_v6lcl_addr); 2098 } else { 2099 af = AF_INET6; 2100 addr = &ipif->ipif_v6lcl_addr; 2101 } 2102 2103 tp = find_tpc(&ipif->ipif_v6lcl_addr, IPV6_VERSION, B_FALSE); 2104 2105 /* assumes that ALL_ZONES implies that there is no exclusive stack */ 2106 if (ipif->ipif_zoneid == ALL_ZONES) { 2107 zone = NULL; 2108 } else if (ns->netstack_stackid == GLOBAL_NETSTACKID) { 2109 /* Shared stack case */ 2110 zone = zone_find_by_id(ipif->ipif_zoneid); 2111 } else { 2112 /* Exclusive stack case */ 2113 zone = zone_find_by_id(crgetzoneid(ipif->ipif_ill->ill_credp)); 2114 } 2115 if (zone != NULL) { 2116 plabel = zone->zone_slabel; 2117 ASSERT(plabel != NULL); 2118 label = label2bslabel(plabel); 2119 } 2120 2121 /* 2122 * If it's CIPSO and an all-zones address, then we're done. 2123 * If it's a CIPSO zone specific address, the zone's label 2124 * must be in the range or set specified in the template. 2125 * When the remote host entry is missing or the template 2126 * type is incorrect for this interface, we create a 2127 * CIPSO host entry in kernel and allow the interface to be 2128 * brought up as CIPSO type. 2129 */ 2130 if (tp != NULL && ( 2131 /* The all-zones case */ 2132 (tp->tpc_tp.host_type == SUN_CIPSO && 2133 tp->tpc_tp.tp_doi == default_doi && 2134 ipif->ipif_zoneid == ALL_ZONES) || 2135 /* The local-zone case */ 2136 (zone != NULL && plabel->tsl_doi == tp->tpc_tp.tp_doi && 2137 ((tp->tpc_tp.host_type == SUN_CIPSO && 2138 (_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) || 2139 blinlset(label, tp->tpc_tp.tp_sl_set_cipso))))))) { 2140 if (zone != NULL) 2141 zone_rele(zone); 2142 TPC_RELE(tp); 2143 return (B_TRUE); 2144 } 2145 2146 ifname = ipif->ipif_ill->ill_name; 2147 if (ipif->ipif_id != 0) { 2148 (void) snprintf(ifbuf, sizeof (ifbuf), "%s:%u", ifname, 2149 ipif->ipif_id); 2150 ifname = ifbuf; 2151 } 2152 (void) inet_ntop(af, addr, addrbuf, sizeof (addrbuf)); 2153 2154 if (tp == NULL) { 2155 cmn_err(CE_NOTE, "template entry for %s missing. Default to " 2156 "CIPSO type for %s", ifname, addrbuf); 2157 retval = B_TRUE; 2158 } else if (tp->tpc_tp.host_type == UNLABELED) { 2159 cmn_err(CE_NOTE, "template type for %s incorrectly configured. " 2160 "Change to CIPSO type for %s", ifname, addrbuf); 2161 retval = B_TRUE; 2162 } else if (ipif->ipif_zoneid == ALL_ZONES) { 2163 if (tp->tpc_tp.host_type != SUN_CIPSO) { 2164 cmn_err(CE_NOTE, "%s failed: %s isn't set to CIPSO for " 2165 "all-zones. Converted to CIPSO.", ifname, addrbuf); 2166 retval = B_TRUE; 2167 } else { 2168 cmn_err(CE_NOTE, "%s failed: %s has wrong DOI %d " 2169 "instead of %d", ifname, addrbuf, 2170 tp->tpc_tp.tp_doi, default_doi); 2171 retval = B_FALSE; 2172 } 2173 } else if (zone == NULL) { 2174 cmn_err(CE_NOTE, "%s failed: zoneid %d unknown", 2175 ifname, ipif->ipif_zoneid); 2176 retval = B_FALSE; 2177 } else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) { 2178 cmn_err(CE_NOTE, "%s failed: zone %s has DOI %d but %s has " 2179 "DOI %d", ifname, zone->zone_name, plabel->tsl_doi, 2180 addrbuf, tp->tpc_tp.tp_doi); 2181 retval = B_FALSE; 2182 } else { 2183 cmn_err(CE_NOTE, "%s failed: zone %s label incompatible with " 2184 "%s", ifname, zone->zone_name, addrbuf); 2185 tsol_print_label(label, "zone label"); 2186 retval = B_FALSE; 2187 } 2188 2189 if (zone != NULL) 2190 zone_rele(zone); 2191 if (tp != NULL) 2192 TPC_RELE(tp); 2193 if (retval) { 2194 /* 2195 * we've corrected a config error and let the interface 2196 * come up as cipso. Need to insert an rhent. 2197 */ 2198 if ((rhent.rh_address.ta_family = af) == AF_INET) { 2199 rhent.rh_prefix = 32; 2200 rhent.rh_address.ta_addr_v4 = *(struct in_addr *)addr; 2201 } else { 2202 rhent.rh_prefix = 128; 2203 rhent.rh_address.ta_addr_v6 = *(in6_addr_t *)addr; 2204 } 2205 (void) strcpy(rhent.rh_template, "cipso"); 2206 if (tnrh_load(&rhent) != 0) { 2207 cmn_err(CE_NOTE, "%s failed: Cannot insert CIPSO " 2208 "template for local addr %s", ifname, addrbuf); 2209 retval = B_FALSE; 2210 } 2211 } 2212 return (retval); 2213 } 2214