1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/strsubr.h>
29 #include <sys/stropts.h>
30 #include <sys/sunddi.h>
31 #include <sys/cred.h>
32 #include <sys/debug.h>
33 #include <sys/kmem.h>
34 #include <sys/errno.h>
35 #include <sys/disp.h>
36 #include <netinet/in.h>
37 #include <netinet/in_systm.h>
38 #include <netinet/ip.h>
39 #include <netinet/ip_icmp.h>
40 #include <netinet/tcp.h>
41 #include <inet/common.h>
42 #include <inet/ipclassifier.h>
43 #include <inet/ip.h>
44 #include <inet/mib2.h>
45 #include <inet/nd.h>
46 #include <inet/tcp.h>
47 #include <inet/ip_rts.h>
48 #include <inet/ip_ire.h>
49 #include <inet/ip_if.h>
50 #include <sys/modhash.h>
51
52 #include <sys/tsol/label.h>
53 #include <sys/tsol/label_macro.h>
54 #include <sys/tsol/tnet.h>
55 #include <sys/tsol/tndb.h>
56 #include <sys/strsun.h>
57
58 /* tunable for strict error-reply behavior (TCP RST and ICMP Unreachable) */
59 int tsol_strict_error;
60
61 /*
62 * Some notes on the Trusted Solaris IRE gateway security attributes:
63 *
64 * When running in Trusted mode, the routing subsystem determines whether or
65 * not a packet can be delivered to an off-link host (not directly reachable
66 * through an interface) based on the accreditation checks of the packet's
67 * security attributes against those associated with the next-hop gateway.
68 *
69 * The next-hop gateway's security attributes can be derived from two sources
70 * (in order of preference): route-related and the host database. A Trusted
71 * system must be configured with at least the host database containing an
72 * entry for the next-hop gateway, or otherwise no accreditation checks can
73 * be performed, which may result in the inability to send packets to any
74 * off-link destination host.
75 *
76 * The major differences between the two sources are the number and type of
77 * security attributes used for accreditation checks. A host database entry
78 * can contain at most one set of security attributes, specific only to the
79 * next-hop gateway. On contrast, route-related security attributes are made
80 * up of a collection of security attributes for the distant networks, and
81 * are grouped together per next-hop gateway used to reach those networks.
82 * This is the preferred method, and the routing subsystem will fallback to
83 * the host database entry only if there are no route-related attributes
84 * associated with the next-hop gateway.
85 *
86 * In Trusted mode, all of the IRE entries (except LOCAL/LOOPBACK/BROADCAST/
87 * INTERFACE type) are initialized to contain a placeholder to store this
88 * information. The ire_gw_secattr structure gets allocated, initialized
89 * and associated with the IRE during the time of the IRE creation. The
90 * initialization process also includes resolving the host database entry
91 * of the next-hop gateway for fallback purposes. It does not include any
92 * route-related attribute setup, as that process comes separately as part
93 * of the route requests (add/change) made to the routing subsystem.
94 *
95 * The underlying logic which involves associating IREs with the gateway
96 * security attributes are represented by the following data structures:
97 *
98 * tsol_gcdb_t, or "gcdb"
99 *
100 * - This is a system-wide collection of records containing the
101 * currently used route-related security attributes, which are fed
102 * through the routing socket interface, e.g. "route add/change".
103 *
104 * tsol_gc_t, or "gc"
105 *
106 * - This is the gateway credential structure, and it provides for the
107 * only mechanism to access the contents of gcdb. More than one gc
108 * entries may refer to the same gcdb record. gc's in the system are
109 * grouped according to the next-hop gateway address.
110 *
111 * tsol_gcgrp_t, or "gcgrp"
112 *
113 * - Group of gateway credentials, and is unique per next-hop gateway
114 * address. When the group is not empty, i.e. when gcgrp_count is
115 * greater than zero, it contains one or more gc's, each pointing to
116 * a gcdb record which indicates the gateway security attributes
117 * associated with the next-hop gateway.
118 *
119 * The fields of the tsol_ire_gw_secattr_t used from within the IRE are:
120 *
121 * igsa_lock
122 *
123 * - Lock that protects all fields within tsol_ire_gw_secattr_t.
124 *
125 * igsa_rhc
126 *
127 * - Remote host cache database entry of next-hop gateway. This is
128 * used in the case when there are no route-related attributes
129 * configured for the IRE.
130 *
131 * igsa_gc
132 *
133 * - A set of route-related attributes that only get set for prefix
134 * IREs. If this is non-NULL, the prefix IRE has been associated
135 * with a set of gateway security attributes by way of route add/
136 * change functionality.
137 */
138
139 static kmem_cache_t *ire_gw_secattr_cache;
140
141 #define GCDB_HASH_SIZE 101
142 #define GCGRP_HASH_SIZE 101
143
144 #define GCDB_REFRELE(p) { \
145 mutex_enter(&gcdb_lock); \
146 ASSERT((p)->gcdb_refcnt > 0); \
147 if (--((p)->gcdb_refcnt) == 0) \
148 gcdb_inactive(p); \
149 ASSERT(MUTEX_HELD(&gcdb_lock)); \
150 mutex_exit(&gcdb_lock); \
151 }
152
153 static int gcdb_hash_size = GCDB_HASH_SIZE;
154 static int gcgrp_hash_size = GCGRP_HASH_SIZE;
155 static mod_hash_t *gcdb_hash;
156 static mod_hash_t *gcgrp4_hash;
157 static mod_hash_t *gcgrp6_hash;
158
159 static kmutex_t gcdb_lock;
160 kmutex_t gcgrp_lock;
161
162 static uint_t gcdb_hash_by_secattr(void *, mod_hash_key_t);
163 static int gcdb_hash_cmp(mod_hash_key_t, mod_hash_key_t);
164 static tsol_gcdb_t *gcdb_lookup(struct rtsa_s *, boolean_t);
165 static void gcdb_inactive(tsol_gcdb_t *);
166
167 static uint_t gcgrp_hash_by_addr(void *, mod_hash_key_t);
168 static int gcgrp_hash_cmp(mod_hash_key_t, mod_hash_key_t);
169
170 static int ire_gw_secattr_constructor(void *, void *, int);
171 static void ire_gw_secattr_destructor(void *, void *);
172
173 void
tnet_init(void)174 tnet_init(void)
175 {
176 ire_gw_secattr_cache = kmem_cache_create("ire_gw_secattr_cache",
177 sizeof (tsol_ire_gw_secattr_t), 64, ire_gw_secattr_constructor,
178 ire_gw_secattr_destructor, NULL, NULL, NULL, 0);
179
180 gcdb_hash = mod_hash_create_extended("gcdb_hash",
181 gcdb_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
182 gcdb_hash_by_secattr, NULL, gcdb_hash_cmp, KM_SLEEP);
183
184 gcgrp4_hash = mod_hash_create_extended("gcgrp4_hash",
185 gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
186 gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
187
188 gcgrp6_hash = mod_hash_create_extended("gcgrp6_hash",
189 gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
190 gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
191
192 mutex_init(&gcdb_lock, NULL, MUTEX_DEFAULT, NULL);
193 mutex_init(&gcgrp_lock, NULL, MUTEX_DEFAULT, NULL);
194 }
195
196 void
tnet_fini(void)197 tnet_fini(void)
198 {
199 kmem_cache_destroy(ire_gw_secattr_cache);
200 mod_hash_destroy_hash(gcdb_hash);
201 mod_hash_destroy_hash(gcgrp4_hash);
202 mod_hash_destroy_hash(gcgrp6_hash);
203 mutex_destroy(&gcdb_lock);
204 mutex_destroy(&gcgrp_lock);
205 }
206
207 /* ARGSUSED */
208 static int
ire_gw_secattr_constructor(void * buf,void * cdrarg,int kmflags)209 ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags)
210 {
211 tsol_ire_gw_secattr_t *attrp = buf;
212
213 mutex_init(&attrp->igsa_lock, NULL, MUTEX_DEFAULT, NULL);
214
215 attrp->igsa_rhc = NULL;
216 attrp->igsa_gc = NULL;
217
218 return (0);
219 }
220
221 /* ARGSUSED */
222 static void
ire_gw_secattr_destructor(void * buf,void * cdrarg)223 ire_gw_secattr_destructor(void *buf, void *cdrarg)
224 {
225 tsol_ire_gw_secattr_t *attrp = (tsol_ire_gw_secattr_t *)buf;
226
227 mutex_destroy(&attrp->igsa_lock);
228 }
229
230 tsol_ire_gw_secattr_t *
ire_gw_secattr_alloc(int kmflags)231 ire_gw_secattr_alloc(int kmflags)
232 {
233 return (kmem_cache_alloc(ire_gw_secattr_cache, kmflags));
234 }
235
236 void
ire_gw_secattr_free(tsol_ire_gw_secattr_t * attrp)237 ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp)
238 {
239 ASSERT(MUTEX_NOT_HELD(&attrp->igsa_lock));
240
241 if (attrp->igsa_rhc != NULL) {
242 TNRHC_RELE(attrp->igsa_rhc);
243 attrp->igsa_rhc = NULL;
244 }
245
246 if (attrp->igsa_gc != NULL) {
247 GC_REFRELE(attrp->igsa_gc);
248 attrp->igsa_gc = NULL;
249 }
250
251 ASSERT(attrp->igsa_rhc == NULL);
252 ASSERT(attrp->igsa_gc == NULL);
253
254 kmem_cache_free(ire_gw_secattr_cache, attrp);
255 }
256
257 /* ARGSUSED */
258 static uint_t
gcdb_hash_by_secattr(void * hash_data,mod_hash_key_t key)259 gcdb_hash_by_secattr(void *hash_data, mod_hash_key_t key)
260 {
261 const struct rtsa_s *rp = (struct rtsa_s *)key;
262 const uint32_t *up, *ue;
263 uint_t hash;
264 int i;
265
266 ASSERT(rp != NULL);
267
268 /* See comments in hash_bylabel in zone.c for details */
269 hash = rp->rtsa_doi + (rp->rtsa_doi << 1);
270 up = (const uint32_t *)&rp->rtsa_slrange;
271 ue = up + sizeof (rp->rtsa_slrange) / sizeof (*up);
272 i = 1;
273 while (up < ue) {
274 /* using 2^n + 1, 1 <= n <= 16 as source of many primes */
275 hash += *up + (*up << ((i % 16) + 1));
276 up++;
277 i++;
278 }
279 return (hash);
280 }
281
282 static int
gcdb_hash_cmp(mod_hash_key_t key1,mod_hash_key_t key2)283 gcdb_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
284 {
285 struct rtsa_s *rp1 = (struct rtsa_s *)key1;
286 struct rtsa_s *rp2 = (struct rtsa_s *)key2;
287
288 ASSERT(rp1 != NULL && rp2 != NULL);
289
290 if (blequal(&rp1->rtsa_slrange.lower_bound,
291 &rp2->rtsa_slrange.lower_bound) &&
292 blequal(&rp1->rtsa_slrange.upper_bound,
293 &rp2->rtsa_slrange.upper_bound) &&
294 rp1->rtsa_doi == rp2->rtsa_doi)
295 return (0);
296
297 /* No match; not found */
298 return (-1);
299 }
300
301 /* ARGSUSED */
302 static uint_t
gcgrp_hash_by_addr(void * hash_data,mod_hash_key_t key)303 gcgrp_hash_by_addr(void *hash_data, mod_hash_key_t key)
304 {
305 tsol_gcgrp_addr_t *ga = (tsol_gcgrp_addr_t *)key;
306 uint_t idx = 0;
307 uint32_t *ap;
308
309 ASSERT(ga != NULL);
310 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
311
312 ap = (uint32_t *)&ga->ga_addr.s6_addr32[0];
313 idx ^= *ap++;
314 idx ^= *ap++;
315 idx ^= *ap++;
316 idx ^= *ap;
317
318 return (idx);
319 }
320
321 static int
gcgrp_hash_cmp(mod_hash_key_t key1,mod_hash_key_t key2)322 gcgrp_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
323 {
324 tsol_gcgrp_addr_t *ga1 = (tsol_gcgrp_addr_t *)key1;
325 tsol_gcgrp_addr_t *ga2 = (tsol_gcgrp_addr_t *)key2;
326
327 ASSERT(ga1 != NULL && ga2 != NULL);
328
329 /* Address family must match */
330 if (ga1->ga_af != ga2->ga_af)
331 return (-1);
332
333 if (ga1->ga_addr.s6_addr32[0] == ga2->ga_addr.s6_addr32[0] &&
334 ga1->ga_addr.s6_addr32[1] == ga2->ga_addr.s6_addr32[1] &&
335 ga1->ga_addr.s6_addr32[2] == ga2->ga_addr.s6_addr32[2] &&
336 ga1->ga_addr.s6_addr32[3] == ga2->ga_addr.s6_addr32[3])
337 return (0);
338
339 /* No match; not found */
340 return (-1);
341 }
342
343 #define RTSAFLAGS "\20\11cipso\3doi\2max_sl\1min_sl"
344
345 int
rtsa_validate(const struct rtsa_s * rp)346 rtsa_validate(const struct rtsa_s *rp)
347 {
348 uint32_t mask = rp->rtsa_mask;
349
350 /* RTSA_CIPSO must be set, and DOI must not be zero */
351 if ((mask & RTSA_CIPSO) == 0 || rp->rtsa_doi == 0) {
352 DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
353 "rtsa(1) lacks flag or has 0 doi.",
354 rtsa_s *, rp);
355 return (EINVAL);
356 }
357 /*
358 * SL range must be specified, and it must have its
359 * upper bound dominating its lower bound.
360 */
361 if ((mask & RTSA_SLRANGE) != RTSA_SLRANGE ||
362 !bldominates(&rp->rtsa_slrange.upper_bound,
363 &rp->rtsa_slrange.lower_bound)) {
364 DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
365 "rtsa(1) min_sl and max_sl not set or max_sl is "
366 "not dominating.", rtsa_s *, rp);
367 return (EINVAL);
368 }
369 return (0);
370 }
371
372 /*
373 * A brief explanation of the reference counting scheme:
374 *
375 * Apart from dynamic references due to to reference holds done
376 * actively by threads, we have the following references:
377 *
378 * gcdb_refcnt:
379 * - Every tsol_gc_t pointing to a tsol_gcdb_t contributes a reference
380 * to the gcdb_refcnt.
381 *
382 * gc_refcnt:
383 * - A prefix IRE that points to an igsa_gc contributes a reference
384 * to the gc_refcnt.
385 *
386 * gcgrp_refcnt:
387 * - Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes
388 * a reference to the gcgrp_refcnt.
389 */
390 static tsol_gcdb_t *
gcdb_lookup(struct rtsa_s * rp,boolean_t alloc)391 gcdb_lookup(struct rtsa_s *rp, boolean_t alloc)
392 {
393 tsol_gcdb_t *gcdb = NULL;
394
395 if (rtsa_validate(rp) != 0)
396 return (NULL);
397
398 mutex_enter(&gcdb_lock);
399 /* Find a copy in the cache; otherwise, create one and cache it */
400 if (mod_hash_find(gcdb_hash, (mod_hash_key_t)rp,
401 (mod_hash_val_t *)&gcdb) == 0) {
402 gcdb->gcdb_refcnt++;
403 ASSERT(gcdb->gcdb_refcnt != 0);
404
405 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__lookup, char *,
406 "gcdb(1) is in gcdb_hash(global)", tsol_gcdb_t *, gcdb);
407 } else if (alloc) {
408 gcdb = kmem_zalloc(sizeof (*gcdb), KM_NOSLEEP);
409 if (gcdb != NULL) {
410 gcdb->gcdb_refcnt = 1;
411 gcdb->gcdb_mask = rp->rtsa_mask;
412 gcdb->gcdb_doi = rp->rtsa_doi;
413 gcdb->gcdb_slrange = rp->rtsa_slrange;
414
415 if (mod_hash_insert(gcdb_hash,
416 (mod_hash_key_t)&gcdb->gcdb_attr,
417 (mod_hash_val_t)gcdb) != 0) {
418 mutex_exit(&gcdb_lock);
419 kmem_free(gcdb, sizeof (*gcdb));
420 return (NULL);
421 }
422
423 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__insert, char *,
424 "gcdb(1) inserted in gcdb_hash(global)",
425 tsol_gcdb_t *, gcdb);
426 }
427 }
428 mutex_exit(&gcdb_lock);
429 return (gcdb);
430 }
431
432 static void
gcdb_inactive(tsol_gcdb_t * gcdb)433 gcdb_inactive(tsol_gcdb_t *gcdb)
434 {
435 ASSERT(MUTEX_HELD(&gcdb_lock));
436 ASSERT(gcdb != NULL && gcdb->gcdb_refcnt == 0);
437
438 (void) mod_hash_remove(gcdb_hash, (mod_hash_key_t)&gcdb->gcdb_attr,
439 (mod_hash_val_t *)&gcdb);
440
441 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__remove, char *,
442 "gcdb(1) removed from gcdb_hash(global)",
443 tsol_gcdb_t *, gcdb);
444 kmem_free(gcdb, sizeof (*gcdb));
445 }
446
447 tsol_gc_t *
gc_create(struct rtsa_s * rp,tsol_gcgrp_t * gcgrp,boolean_t * gcgrp_xtrarefp)448 gc_create(struct rtsa_s *rp, tsol_gcgrp_t *gcgrp, boolean_t *gcgrp_xtrarefp)
449 {
450 tsol_gc_t *gc;
451 tsol_gcdb_t *gcdb;
452
453 *gcgrp_xtrarefp = B_TRUE;
454
455 rw_enter(&gcgrp->gcgrp_rwlock, RW_WRITER);
456 if ((gcdb = gcdb_lookup(rp, B_TRUE)) == NULL) {
457 rw_exit(&gcgrp->gcgrp_rwlock);
458 return (NULL);
459 }
460
461 for (gc = gcgrp->gcgrp_head; gc != NULL; gc = gc->gc_next) {
462 if (gc->gc_db == gcdb) {
463 ASSERT(gc->gc_grp == gcgrp);
464
465 gc->gc_refcnt++;
466 ASSERT(gc->gc_refcnt != 0);
467
468 GCDB_REFRELE(gcdb);
469
470 DTRACE_PROBE3(tx__gcdb__log__info__gc__create,
471 char *, "found gc(1) in gcgrp(2)",
472 tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
473 rw_exit(&gcgrp->gcgrp_rwlock);
474 return (gc);
475 }
476 }
477
478 gc = kmem_zalloc(sizeof (*gc), KM_NOSLEEP);
479 if (gc != NULL) {
480 if (gcgrp->gcgrp_head == NULL) {
481 gcgrp->gcgrp_head = gcgrp->gcgrp_tail = gc;
482 } else {
483 gcgrp->gcgrp_tail->gc_next = gc;
484 gc->gc_prev = gcgrp->gcgrp_tail;
485 gcgrp->gcgrp_tail = gc;
486 }
487 gcgrp->gcgrp_count++;
488 ASSERT(gcgrp->gcgrp_count != 0);
489
490 /* caller has incremented gcgrp reference for us */
491 gc->gc_grp = gcgrp;
492
493 gc->gc_db = gcdb;
494 gc->gc_refcnt = 1;
495
496 DTRACE_PROBE3(tx__gcdb__log__info__gc__create, char *,
497 "added gc(1) to gcgrp(2)", tsol_gc_t *, gc,
498 tsol_gcgrp_t *, gcgrp);
499
500 *gcgrp_xtrarefp = B_FALSE;
501 }
502 rw_exit(&gcgrp->gcgrp_rwlock);
503
504 return (gc);
505 }
506
507 void
gc_inactive(tsol_gc_t * gc)508 gc_inactive(tsol_gc_t *gc)
509 {
510 tsol_gcgrp_t *gcgrp = gc->gc_grp;
511
512 ASSERT(gcgrp != NULL);
513 ASSERT(RW_WRITE_HELD(&gcgrp->gcgrp_rwlock));
514 ASSERT(gc->gc_refcnt == 0);
515
516 if (gc->gc_prev != NULL)
517 gc->gc_prev->gc_next = gc->gc_next;
518 else
519 gcgrp->gcgrp_head = gc->gc_next;
520 if (gc->gc_next != NULL)
521 gc->gc_next->gc_prev = gc->gc_prev;
522 else
523 gcgrp->gcgrp_tail = gc->gc_prev;
524 ASSERT(gcgrp->gcgrp_count > 0);
525 gcgrp->gcgrp_count--;
526
527 /* drop lock before it's destroyed */
528 rw_exit(&gcgrp->gcgrp_rwlock);
529
530 DTRACE_PROBE3(tx__gcdb__log__info__gc__remove, char *,
531 "removed inactive gc(1) from gcgrp(2)",
532 tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
533
534 GCGRP_REFRELE(gcgrp);
535
536 gc->gc_grp = NULL;
537 gc->gc_prev = gc->gc_next = NULL;
538
539 if (gc->gc_db != NULL)
540 GCDB_REFRELE(gc->gc_db);
541
542 kmem_free(gc, sizeof (*gc));
543 }
544
545 tsol_gcgrp_t *
gcgrp_lookup(tsol_gcgrp_addr_t * ga,boolean_t alloc)546 gcgrp_lookup(tsol_gcgrp_addr_t *ga, boolean_t alloc)
547 {
548 tsol_gcgrp_t *gcgrp = NULL;
549 mod_hash_t *hashp;
550
551 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
552
553 hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
554
555 mutex_enter(&gcgrp_lock);
556 if (mod_hash_find(hashp, (mod_hash_key_t)ga,
557 (mod_hash_val_t *)&gcgrp) == 0) {
558 gcgrp->gcgrp_refcnt++;
559 ASSERT(gcgrp->gcgrp_refcnt != 0);
560
561 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__lookup, char *,
562 "found gcgrp(1) in hash(2)", tsol_gcgrp_t *, gcgrp,
563 mod_hash_t *, hashp);
564
565 } else if (alloc) {
566 gcgrp = kmem_zalloc(sizeof (*gcgrp), KM_NOSLEEP);
567 if (gcgrp != NULL) {
568 gcgrp->gcgrp_refcnt = 1;
569 rw_init(&gcgrp->gcgrp_rwlock, NULL, RW_DEFAULT, NULL);
570 bcopy(ga, &gcgrp->gcgrp_addr, sizeof (*ga));
571
572 if (mod_hash_insert(hashp,
573 (mod_hash_key_t)&gcgrp->gcgrp_addr,
574 (mod_hash_val_t)gcgrp) != 0) {
575 mutex_exit(&gcgrp_lock);
576 kmem_free(gcgrp, sizeof (*gcgrp));
577 return (NULL);
578 }
579
580 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__insert,
581 char *, "inserted gcgrp(1) in hash(2)",
582 tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
583 }
584 }
585 mutex_exit(&gcgrp_lock);
586 return (gcgrp);
587 }
588
589 void
gcgrp_inactive(tsol_gcgrp_t * gcgrp)590 gcgrp_inactive(tsol_gcgrp_t *gcgrp)
591 {
592 tsol_gcgrp_addr_t *ga;
593 mod_hash_t *hashp;
594
595 ASSERT(MUTEX_HELD(&gcgrp_lock));
596 ASSERT(gcgrp != NULL && gcgrp->gcgrp_refcnt == 0);
597 ASSERT(gcgrp->gcgrp_head == NULL && gcgrp->gcgrp_count == 0);
598
599 ga = &gcgrp->gcgrp_addr;
600 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
601
602 hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
603 (void) mod_hash_remove(hashp, (mod_hash_key_t)ga,
604 (mod_hash_val_t *)&gcgrp);
605 rw_destroy(&gcgrp->gcgrp_rwlock);
606
607 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__remove, char *,
608 "removed inactive gcgrp(1) from hash(2)",
609 tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
610
611 kmem_free(gcgrp, sizeof (*gcgrp));
612 }
613
614
615 /*
616 * Assign a sensitivity label to inbound traffic which arrived without
617 * an explicit on-the-wire label.
618 *
619 * In the case of CIPSO-type hosts, we assume packets arriving without
620 * a label are at the most sensitive label known for the host, most
621 * likely involving out-of-band key management traffic (such as IKE,
622 * etc.,)
623 */
624 static boolean_t
tsol_find_unlabeled_label(tsol_tpc_t * rhtp,bslabel_t * sl,uint32_t * doi)625 tsol_find_unlabeled_label(tsol_tpc_t *rhtp, bslabel_t *sl, uint32_t *doi)
626 {
627 *doi = rhtp->tpc_tp.tp_doi;
628 switch (rhtp->tpc_tp.host_type) {
629 case UNLABELED:
630 *sl = rhtp->tpc_tp.tp_def_label;
631 break;
632 case SUN_CIPSO:
633 *sl = rhtp->tpc_tp.tp_sl_range_cipso.upper_bound;
634 break;
635 default:
636 return (B_FALSE);
637 }
638 setbltype(sl, SUN_SL_ID);
639 return (B_TRUE);
640 }
641
642 /*
643 * Converts CIPSO option to sensitivity label.
644 * Validity checks based on restrictions defined in
645 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) (draft-ietf-cipso-ipsecurity)
646 */
647 static boolean_t
cipso_to_sl(const uchar_t * option,bslabel_t * sl)648 cipso_to_sl(const uchar_t *option, bslabel_t *sl)
649 {
650 const struct cipso_option *co = (const struct cipso_option *)option;
651 const struct cipso_tag_type_1 *tt1;
652
653 tt1 = (struct cipso_tag_type_1 *)&co->cipso_tag_type[0];
654 if (tt1->tag_type != 1 ||
655 tt1->tag_length < TSOL_TT1_MIN_LENGTH ||
656 tt1->tag_length > TSOL_TT1_MAX_LENGTH ||
657 tt1->tag_length + TSOL_CIPSO_TAG_OFFSET > co->cipso_length)
658 return (B_FALSE);
659
660 bsllow(sl); /* assumed: sets compartments to all zeroes */
661 LCLASS_SET((_bslabel_impl_t *)sl, tt1->tag_sl);
662 bcopy(tt1->tag_cat, &((_bslabel_impl_t *)sl)->compartments,
663 tt1->tag_length - TSOL_TT1_MIN_LENGTH);
664 return (B_TRUE);
665 }
666
667 /*
668 * If present, parse the CIPSO label in the incoming packet and
669 * construct a ts_label_t that reflects the CIPSO label and put it in
670 * the ip_recv_attr_t. Later as the packet flows up through the stack any
671 * code that needs to examine the packet label can inspect the label
672 * from the ira_tsl. This function is
673 * called right in ip_input for all packets, i.e. locally destined and
674 * to be forwarded packets. The forwarding path needs to examine the label
675 * to determine how to forward the packet.
676 *
677 * This routine pulls all message text up into the first mblk.
678 * For IPv4, only the first 20 bytes of the IP header are guaranteed
679 * to exist. For IPv6, only the IPv6 header is guaranteed to exist.
680 */
681 boolean_t
tsol_get_pkt_label(mblk_t * mp,int version,ip_recv_attr_t * ira)682 tsol_get_pkt_label(mblk_t *mp, int version, ip_recv_attr_t *ira)
683 {
684 tsol_tpc_t *src_rhtp = NULL;
685 uchar_t *opt_ptr = NULL;
686 const ipha_t *ipha;
687 bslabel_t sl;
688 uint32_t doi;
689 tsol_ip_label_t label_type;
690 uint32_t label_flags = 0; /* flags to set in label */
691 const cipso_option_t *co;
692 const void *src;
693 const ip6_t *ip6h;
694 cred_t *credp;
695 int proto;
696
697 ASSERT(DB_TYPE(mp) == M_DATA);
698
699 if (mp->b_cont != NULL && !pullupmsg(mp, -1))
700 return (B_FALSE);
701
702 if (version == IPV4_VERSION) {
703 ASSERT(MBLKL(mp) >= IP_SIMPLE_HDR_LENGTH);
704 ipha = (const ipha_t *)mp->b_rptr;
705 src = &ipha->ipha_src;
706 if (!tsol_get_option_v4(mp, &label_type, &opt_ptr))
707 return (B_FALSE);
708 } else {
709 ASSERT(MBLKL(mp) >= IPV6_HDR_LEN);
710 ip6h = (const ip6_t *)mp->b_rptr;
711 src = &ip6h->ip6_src;
712 if (!tsol_get_option_v6(mp, &label_type, &opt_ptr))
713 return (B_FALSE);
714 }
715
716 switch (label_type) {
717 case OPT_CIPSO:
718 /*
719 * Convert the CIPSO label to the internal format
720 * and attach it to the dblk cred.
721 * Validity checks based on restrictions defined in
722 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2)
723 * (draft-ietf-cipso-ipsecurity)
724 */
725 if (version == IPV6_VERSION && ip6opt_ls == 0)
726 return (B_FALSE);
727 co = (const struct cipso_option *)opt_ptr;
728 if ((co->cipso_length <
729 TSOL_CIPSO_TAG_OFFSET + TSOL_TT1_MIN_LENGTH) ||
730 (co->cipso_length > IP_MAX_OPT_LENGTH))
731 return (B_FALSE);
732 bcopy(co->cipso_doi, &doi, sizeof (doi));
733 doi = ntohl(doi);
734 if (!cipso_to_sl(opt_ptr, &sl))
735 return (B_FALSE);
736 setbltype(&sl, SUN_SL_ID);
737
738 /*
739 * If the source was unlabeled, then flag as such,
740 * (since CIPSO routers may add headers)
741 */
742
743 if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
744 return (B_FALSE);
745
746 if (src_rhtp->tpc_tp.host_type == UNLABELED)
747 label_flags = TSLF_UNLABELED;
748
749 TPC_RELE(src_rhtp);
750
751 break;
752
753 case OPT_NONE:
754 /*
755 * Handle special cases that may not be labeled, even
756 * though the sending system may otherwise be configured as
757 * labeled.
758 * - IGMP
759 * - IPv4 ICMP Router Discovery
760 * - IPv6 Neighbor Discovery
761 * - IPsec ESP
762 */
763 if (version == IPV4_VERSION) {
764 proto = ipha->ipha_protocol;
765 if (proto == IPPROTO_IGMP)
766 return (B_TRUE);
767 if (proto == IPPROTO_ICMP) {
768 const struct icmp *icmp = (const struct icmp *)
769 (mp->b_rptr + IPH_HDR_LENGTH(ipha));
770
771 if ((uchar_t *)icmp + ICMP_MINLEN > mp->b_wptr)
772 return (B_FALSE);
773 if (icmp->icmp_type == ICMP_ROUTERADVERT ||
774 icmp->icmp_type == ICMP_ROUTERSOLICIT)
775 return (B_TRUE);
776 }
777 } else {
778 proto = ip6h->ip6_nxt;
779 if (proto == IPPROTO_ICMPV6) {
780 const icmp6_t *icmp6 = (const icmp6_t *)
781 (mp->b_rptr + IPV6_HDR_LEN);
782
783 if ((uchar_t *)icmp6 + ICMP6_MINLEN >
784 mp->b_wptr)
785 return (B_FALSE);
786 if (icmp6->icmp6_type >= MLD_LISTENER_QUERY &&
787 icmp6->icmp6_type <= ICMP6_MAX_INFO_TYPE)
788 return (B_TRUE);
789 }
790 }
791
792 /*
793 * Look up the tnrhtp database and get the implicit label
794 * that is associated with the sending host and attach
795 * it to the packet.
796 */
797 if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
798 return (B_FALSE);
799
800 /*
801 * If peer is label-aware, mark as "implicit" rather than
802 * "unlabeled" to cause appropriate mac-exempt processing
803 * to happen.
804 */
805 if (src_rhtp->tpc_tp.host_type == SUN_CIPSO)
806 label_flags = TSLF_IMPLICIT_IN;
807 else if (src_rhtp->tpc_tp.host_type == UNLABELED)
808 label_flags = TSLF_UNLABELED;
809 else {
810 DTRACE_PROBE2(tx__get__pkt__label, char *,
811 "template(1) has unknown hosttype",
812 tsol_tpc_t *, src_rhtp);
813 }
814
815
816 if (!tsol_find_unlabeled_label(src_rhtp, &sl, &doi)) {
817 TPC_RELE(src_rhtp);
818 return (B_FALSE);
819 }
820 TPC_RELE(src_rhtp);
821 break;
822
823 default:
824 return (B_FALSE);
825 }
826
827 if (ira->ira_cred == NULL) {
828 credp = newcred_from_bslabel(&sl, doi, KM_NOSLEEP);
829 if (credp == NULL)
830 return (B_FALSE);
831 } else {
832 credp = copycred_from_bslabel(ira->ira_cred, &sl, doi,
833 KM_NOSLEEP);
834 if (credp == NULL)
835 return (B_FALSE);
836 if (ira->ira_free_flags & IRA_FREE_CRED) {
837 crfree(ira->ira_cred);
838 ira->ira_free_flags &= ~IRA_FREE_CRED;
839 ira->ira_cred = NULL;
840 }
841 }
842
843 /*
844 * Put the label in ira_tsl for convinience, while keeping
845 * the cred in ira_cred for getpeerucred which is used to get
846 * labels with TX.
847 * Note: no explicit refcnt/free_flag for ira_tsl. The free_flag
848 * for IRA_FREE_CRED is sufficient for both.
849 */
850 ira->ira_tsl = crgetlabel(credp);
851 ira->ira_cred = credp;
852 ira->ira_free_flags |= IRA_FREE_CRED;
853
854 ira->ira_tsl->tsl_flags |= label_flags;
855 return (B_TRUE);
856 }
857
858 /*
859 * This routine determines whether the given packet should be accepted locally.
860 * It does a range/set check on the packet's label by looking up the given
861 * address in the remote host database.
862 */
863 boolean_t
tsol_receive_local(const mblk_t * mp,const void * addr,uchar_t version,ip_recv_attr_t * ira,const conn_t * connp)864 tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version,
865 ip_recv_attr_t *ira, const conn_t *connp)
866 {
867 const cred_t *credp;
868 ts_label_t *plabel, *conn_plabel;
869 tsol_tpc_t *tp;
870 boolean_t retv;
871 const bslabel_t *label, *conn_label;
872 boolean_t shared_addr = (ira->ira_flags & IRAF_TX_SHARED_ADDR);
873
874 /*
875 * tsol_get_pkt_label intentionally avoids the labeling process for:
876 * - IPv6 router and neighbor discovery as well as redirects.
877 * - MLD packets. (Anything between ICMPv6 code 130 and 138.)
878 * - IGMP packets.
879 * - IPv4 router discovery.
880 * In those cases ira_cred is NULL.
881 */
882 credp = ira->ira_cred;
883 if (credp == NULL)
884 return (B_TRUE);
885
886 /*
887 * If this packet is from the inside (not a remote host) and has the
888 * same zoneid as the selected destination, then no checks are
889 * necessary. Membership in the zone is enough proof. This is
890 * intended to be a hot path through this function.
891 * Note: Using crgetzone here is ok since the peer is local.
892 */
893 if (!crisremote(credp) &&
894 crgetzone(credp) == crgetzone(connp->conn_cred))
895 return (B_TRUE);
896
897 plabel = ira->ira_tsl;
898 conn_plabel = crgetlabel(connp->conn_cred);
899 ASSERT(plabel != NULL && conn_plabel != NULL);
900
901 label = label2bslabel(plabel);
902 conn_label = label2bslabel(conn_plabel);
903
904
905 /*
906 * Implicitly labeled packets from label-aware sources
907 * go only to privileged receivers
908 */
909 if ((plabel->tsl_flags & TSLF_IMPLICIT_IN) &&
910 (connp->conn_mac_mode != CONN_MAC_IMPLICIT)) {
911 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac_impl,
912 char *,
913 "implicitly labeled packet mp(1) for conn(2) "
914 "which isn't in implicit mac mode",
915 mblk_t *, mp, conn_t *, connp);
916
917 return (B_FALSE);
918 }
919
920
921 /*
922 * MLPs are always validated using the range and set of the local
923 * address, even when the remote host is unlabeled.
924 */
925 if (connp->conn_mlp_type == mlptBoth ||
926 /* LINTED: no consequent */
927 connp->conn_mlp_type == (shared_addr ? mlptShared : mlptPrivate)) {
928 ;
929
930 /*
931 * If this is a packet from an unlabeled sender, then we must apply
932 * different rules. If the label is equal to the zone's label, then
933 * it's allowed. If it's not equal, but the zone is either the global
934 * zone or the label is dominated by the zone's label, then allow it
935 * as long as it's in the range configured for the destination.
936 */
937 } else if (plabel->tsl_flags & TSLF_UNLABELED) {
938 if (plabel->tsl_doi == conn_plabel->tsl_doi &&
939 blequal(label, conn_label))
940 return (B_TRUE);
941
942 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) ||
943 (!connp->conn_zone_is_global &&
944 (plabel->tsl_doi != conn_plabel->tsl_doi ||
945 !bldominates(conn_label, label)))) {
946 DTRACE_PROBE3(
947 tx__ip__log__drop__receivelocal__mac_unl,
948 char *,
949 "unlabeled packet mp(1) fails mac for conn(2)",
950 mblk_t *, mp, conn_t *, connp);
951 return (B_FALSE);
952 }
953
954 /*
955 * If this is a packet from a labeled sender, verify the
956 * label on the packet matches the connection label.
957 */
958 } else {
959 if (plabel->tsl_doi != conn_plabel->tsl_doi ||
960 !blequal(label, conn_label)) {
961 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac__slp,
962 char *,
963 "packet mp(1) failed label match to SLP conn(2)",
964 mblk_t *, mp, conn_t *, connp);
965 return (B_FALSE);
966 }
967 /*
968 * No further checks will be needed if this is a zone-
969 * specific address because (1) The process for bringing up
970 * the interface ensures the zone's label is within the zone-
971 * specific address's valid label range; (2) For cases where
972 * the conn is bound to the unspecified addresses, ip fanout
973 * logic ensures conn's zoneid equals the dest addr's zoneid;
974 * (3) Mac-exempt and mlp logic above already handle all
975 * cases where the zone label may not be the same as the
976 * conn label.
977 */
978 if (!shared_addr)
979 return (B_TRUE);
980 }
981
982 tp = find_tpc(addr, version, B_FALSE);
983 if (tp == NULL) {
984 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__no__tnr,
985 char *, "dropping mp(1), host(2) lacks entry",
986 mblk_t *, mp, void *, addr);
987 return (B_FALSE);
988 }
989
990 /*
991 * The local host address should not be unlabeled at this point. The
992 * only way this can happen is that the destination isn't unicast. We
993 * assume that the packet should not have had a label, and thus should
994 * have been handled by the TSLF_UNLABELED logic above.
995 */
996 if (tp->tpc_tp.host_type == UNLABELED) {
997 retv = B_FALSE;
998 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__flag, char *,
999 "mp(1) unlabeled source, but tp is not unlabeled.",
1000 mblk_t *, mp, tsol_tpc_t *, tp);
1001
1002 } else if (tp->tpc_tp.host_type != SUN_CIPSO) {
1003 retv = B_FALSE;
1004 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__tptype, char *,
1005 "delivering mp(1), found unrecognized tpc(2) type.",
1006 mblk_t *, mp, tsol_tpc_t *, tp);
1007
1008 } else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
1009 retv = B_FALSE;
1010 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
1011 "mp(1) could not be delievered to tp(2), doi mismatch",
1012 mblk_t *, mp, tsol_tpc_t *, tp);
1013
1014 } else if (!_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) &&
1015 !blinlset(label, tp->tpc_tp.tp_sl_set_cipso)) {
1016 retv = B_FALSE;
1017 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
1018 "mp(1) could not be delievered to tp(2), bad mac",
1019 mblk_t *, mp, tsol_tpc_t *, tp);
1020 } else {
1021 retv = B_TRUE;
1022 }
1023
1024 TPC_RELE(tp);
1025
1026 return (retv);
1027 }
1028
1029 boolean_t
tsol_can_accept_raw(mblk_t * mp,ip_recv_attr_t * ira,boolean_t check_host)1030 tsol_can_accept_raw(mblk_t *mp, ip_recv_attr_t *ira, boolean_t check_host)
1031 {
1032 ts_label_t *plabel = NULL;
1033 tsol_tpc_t *src_rhtp, *dst_rhtp;
1034 boolean_t retv;
1035
1036 plabel = ira->ira_tsl;
1037
1038 /* We are bootstrapping or the internal template was never deleted */
1039 if (plabel == NULL)
1040 return (B_TRUE);
1041
1042 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
1043 ipha_t *ipha = (ipha_t *)mp->b_rptr;
1044
1045 src_rhtp = find_tpc(&ipha->ipha_src, IPV4_VERSION,
1046 B_FALSE);
1047 if (src_rhtp == NULL)
1048 return (B_FALSE);
1049 dst_rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION,
1050 B_FALSE);
1051 } else {
1052 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
1053
1054 src_rhtp = find_tpc(&ip6h->ip6_src, IPV6_VERSION,
1055 B_FALSE);
1056 if (src_rhtp == NULL)
1057 return (B_FALSE);
1058 dst_rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION,
1059 B_FALSE);
1060 }
1061 if (dst_rhtp == NULL) {
1062 TPC_RELE(src_rhtp);
1063 return (B_FALSE);
1064 }
1065
1066 if (label2doi(plabel) != src_rhtp->tpc_tp.tp_doi) {
1067 retv = B_FALSE;
1068
1069 /*
1070 * Check that the packet's label is in the correct range for labeled
1071 * sender, or is equal to the default label for unlabeled sender.
1072 */
1073 } else if ((src_rhtp->tpc_tp.host_type != UNLABELED &&
1074 !_blinrange(label2bslabel(plabel),
1075 &src_rhtp->tpc_tp.tp_sl_range_cipso) &&
1076 !blinlset(label2bslabel(plabel),
1077 src_rhtp->tpc_tp.tp_sl_set_cipso)) ||
1078 (src_rhtp->tpc_tp.host_type == UNLABELED &&
1079 !blequal(&plabel->tsl_label, &src_rhtp->tpc_tp.tp_def_label))) {
1080 retv = B_FALSE;
1081
1082 } else if (check_host) {
1083 retv = B_TRUE;
1084
1085 /*
1086 * Until we have SL range in the Zone structure, pass it
1087 * when our own address lookup returned an internal entry.
1088 */
1089 } else switch (dst_rhtp->tpc_tp.host_type) {
1090 case UNLABELED:
1091 retv = B_TRUE;
1092 break;
1093
1094 case SUN_CIPSO:
1095 retv = _blinrange(label2bslabel(plabel),
1096 &dst_rhtp->tpc_tp.tp_sl_range_cipso) ||
1097 blinlset(label2bslabel(plabel),
1098 dst_rhtp->tpc_tp.tp_sl_set_cipso);
1099 break;
1100
1101 default:
1102 retv = B_FALSE;
1103 }
1104 TPC_RELE(src_rhtp);
1105 TPC_RELE(dst_rhtp);
1106 return (retv);
1107 }
1108
1109 /*
1110 * This routine determines whether a response to a failed packet delivery or
1111 * connection should be sent back. By default, the policy is to allow such
1112 * messages to be sent at all times, as these messages reveal little useful
1113 * information and are healthy parts of TCP/IP networking.
1114 *
1115 * If tsol_strict_error is set, then we do strict tests: if the packet label is
1116 * within the label range/set of this host/zone, return B_TRUE; otherwise
1117 * return B_FALSE, which causes the packet to be dropped silently.
1118 *
1119 * Note that tsol_get_pkt_label will cause the packet to drop if the sender is
1120 * marked as labeled in the remote host database, but the packet lacks a label.
1121 * This means that we don't need to do a lookup on the source; the
1122 * TSLF_UNLABELED flag is sufficient.
1123 */
1124 boolean_t
tsol_can_reply_error(const mblk_t * mp,ip_recv_attr_t * ira)1125 tsol_can_reply_error(const mblk_t *mp, ip_recv_attr_t *ira)
1126 {
1127 ts_label_t *plabel = NULL;
1128 tsol_tpc_t *rhtp;
1129 const ipha_t *ipha;
1130 const ip6_t *ip6h;
1131 boolean_t retv;
1132 bslabel_t *pktbs;
1133
1134 /* Caller must pull up at least the IP header */
1135 ASSERT(MBLKL(mp) >= (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ?
1136 sizeof (*ipha) : sizeof (*ip6h)));
1137
1138 if (!tsol_strict_error)
1139 return (B_TRUE);
1140
1141 plabel = ira->ira_tsl;
1142
1143 /* We are bootstrapping or the internal template was never deleted */
1144 if (plabel == NULL)
1145 return (B_TRUE);
1146
1147 if (plabel->tsl_flags & TSLF_IMPLICIT_IN) {
1148 DTRACE_PROBE3(tx__ip__log__drop__replyerror__unresolved__label,
1149 char *,
1150 "cannot send error report for packet mp(1) with "
1151 "unresolved security label sl(2)",
1152 mblk_t *, mp, ts_label_t *, plabel);
1153 return (B_FALSE);
1154 }
1155
1156
1157 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
1158 ipha = (const ipha_t *)mp->b_rptr;
1159 rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, B_FALSE);
1160 } else {
1161 ip6h = (const ip6_t *)mp->b_rptr;
1162 rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, B_FALSE);
1163 }
1164
1165 if (rhtp == NULL || label2doi(plabel) != rhtp->tpc_tp.tp_doi) {
1166 retv = B_FALSE;
1167 } else {
1168 /*
1169 * If we're in the midst of forwarding, then the destination
1170 * address might not be labeled. In that case, allow unlabeled
1171 * packets through only if the default label is the same, and
1172 * labeled ones if they dominate.
1173 */
1174 pktbs = label2bslabel(plabel);
1175 switch (rhtp->tpc_tp.host_type) {
1176 case UNLABELED:
1177 if (plabel->tsl_flags & TSLF_UNLABELED) {
1178 retv = blequal(pktbs,
1179 &rhtp->tpc_tp.tp_def_label);
1180 } else {
1181 retv = bldominates(pktbs,
1182 &rhtp->tpc_tp.tp_def_label);
1183 }
1184 break;
1185
1186 case SUN_CIPSO:
1187 retv = _blinrange(pktbs,
1188 &rhtp->tpc_tp.tp_sl_range_cipso) ||
1189 blinlset(pktbs, rhtp->tpc_tp.tp_sl_set_cipso);
1190 break;
1191
1192 default:
1193 retv = B_FALSE;
1194 break;
1195 }
1196 }
1197
1198 if (rhtp != NULL)
1199 TPC_RELE(rhtp);
1200
1201 return (retv);
1202 }
1203
1204 /*
1205 * Finds the zone associated with the receive attributes. Returns GLOBAL_ZONEID
1206 * if the zone cannot be located.
1207 *
1208 * This is used by the classifier when the packet matches an ALL_ZONES IRE, and
1209 * there's no MLP defined.
1210 *
1211 * Note that we assume that this is only invoked in the ALL_ZONES case.
1212 * Handling other cases would require handling exclusive IP zones where either
1213 * this routine or the callers would have to map from
1214 * the zoneid (zone->zone_id) to what IP uses in conn_zoneid etc.
1215 */
1216 zoneid_t
tsol_attr_to_zoneid(const ip_recv_attr_t * ira)1217 tsol_attr_to_zoneid(const ip_recv_attr_t *ira)
1218 {
1219 zone_t *zone;
1220 ts_label_t *label;
1221
1222 if ((label = ira->ira_tsl) != NULL) {
1223 zone = zone_find_by_label(label);
1224 if (zone != NULL) {
1225 zoneid_t zoneid = zone->zone_id;
1226
1227 zone_rele(zone);
1228 return (zoneid);
1229 }
1230 }
1231 return (GLOBAL_ZONEID);
1232 }
1233
1234 int
tsol_ire_match_gwattr(ire_t * ire,const ts_label_t * tsl)1235 tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl)
1236 {
1237 int error = 0;
1238 tsol_ire_gw_secattr_t *attrp = NULL;
1239 tsol_tnrhc_t *gw_rhc = NULL;
1240 tsol_gcgrp_t *gcgrp = NULL;
1241 tsol_gc_t *gc = NULL;
1242 in_addr_t ga_addr4;
1243 void *paddr = NULL;
1244
1245 /* Not in Trusted mode or IRE is local/loopback/broadcast/interface */
1246 if (!is_system_labeled() ||
1247 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST |
1248 IRE_IF_ALL | IRE_MULTICAST | IRE_NOROUTE)))
1249 goto done;
1250
1251 /*
1252 * If we don't have a label to compare with, or the IRE does not
1253 * contain any gateway security attributes, there's not much that
1254 * we can do. We let the former case pass, and the latter fail,
1255 * since the IRE doesn't qualify for a match due to the lack of
1256 * security attributes.
1257 */
1258 if (tsl == NULL || ire->ire_gw_secattr == NULL) {
1259 if (tsl != NULL) {
1260 DTRACE_PROBE3(
1261 tx__ip__log__drop__irematch__nogwsec, char *,
1262 "ire(1) lacks ire_gw_secattr when matching "
1263 "label(2)", ire_t *, ire, ts_label_t *, tsl);
1264 error = EACCES;
1265 }
1266 goto done;
1267 }
1268
1269 attrp = ire->ire_gw_secattr;
1270
1271 /*
1272 * The possible lock order scenarios related to the tsol gateway
1273 * attribute locks are documented at the beginning of ip.c in the
1274 * lock order scenario section.
1275 */
1276 mutex_enter(&attrp->igsa_lock);
1277
1278 /*
1279 * We seek the group
1280 * structure which contains all security credentials of the gateway.
1281 * An offline IRE is associated with at most one gateway credential.
1282 */
1283 if ((gc = attrp->igsa_gc) != NULL) {
1284 gcgrp = gc->gc_grp;
1285 ASSERT(gcgrp != NULL);
1286 rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1287 GCGRP_REFHOLD(gcgrp);
1288 }
1289
1290 if ((gw_rhc = attrp->igsa_rhc) != NULL) {
1291 /*
1292 * If our cached entry has grown stale, then discard it so we
1293 * can get a new one.
1294 */
1295 if (gw_rhc->rhc_invalid || gw_rhc->rhc_tpc->tpc_invalid) {
1296 TNRHC_RELE(gw_rhc);
1297 attrp->igsa_rhc = gw_rhc = NULL;
1298 } else {
1299 TNRHC_HOLD(gw_rhc)
1300 }
1301 }
1302
1303 /* Last attempt at loading the template had failed; try again */
1304 if (gw_rhc == NULL) {
1305 if (gcgrp != NULL) {
1306 tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr;
1307
1308 if (ire->ire_ipversion == IPV4_VERSION) {
1309 ASSERT(ga->ga_af == AF_INET);
1310 IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4);
1311 paddr = &ga_addr4;
1312 } else {
1313 ASSERT(ga->ga_af == AF_INET6);
1314 paddr = &ga->ga_addr;
1315 }
1316 } else if (ire->ire_type & IRE_OFFLINK) {
1317 if (ire->ire_ipversion == IPV6_VERSION)
1318 paddr = &ire->ire_gateway_addr_v6;
1319 else if (ire->ire_ipversion == IPV4_VERSION)
1320 paddr = &ire->ire_gateway_addr;
1321 }
1322
1323 /* We've found a gateway address to do the template lookup */
1324 if (paddr != NULL) {
1325 ASSERT(gw_rhc == NULL);
1326 gw_rhc = find_rhc(paddr, ire->ire_ipversion, B_FALSE);
1327 if (gw_rhc != NULL) {
1328 /*
1329 * Note that if the lookup above returned an
1330 * internal template, we'll use it for the
1331 * time being, and do another lookup next
1332 * time around.
1333 */
1334 /* Another thread has loaded the template? */
1335 if (attrp->igsa_rhc != NULL) {
1336 TNRHC_RELE(gw_rhc)
1337 /* reload, it could be different */
1338 gw_rhc = attrp->igsa_rhc;
1339 } else {
1340 attrp->igsa_rhc = gw_rhc;
1341 }
1342 /*
1343 * Hold an extra reference just like we did
1344 * above prior to dropping the igsa_lock.
1345 */
1346 TNRHC_HOLD(gw_rhc)
1347 }
1348 }
1349 }
1350
1351 mutex_exit(&attrp->igsa_lock);
1352 /* Gateway template not found */
1353 if (gw_rhc == NULL) {
1354 /*
1355 * If destination address is directly reachable through an
1356 * interface rather than through a learned route, pass it.
1357 */
1358 if (paddr != NULL) {
1359 DTRACE_PROBE3(
1360 tx__ip__log__drop__irematch__nogwtmpl, char *,
1361 "ire(1), label(2) off-link with no gw_rhc",
1362 ire_t *, ire, ts_label_t *, tsl);
1363 error = EINVAL;
1364 }
1365 goto done;
1366 }
1367
1368 if (gc != NULL) {
1369
1370 tsol_gcdb_t *gcdb;
1371 /*
1372 * In the case of IRE_CACHE we've got one or more gateway
1373 * security credentials to compare against the passed in label.
1374 * Perform label range comparison against each security
1375 * credential of the gateway. In the case of a prefix ire
1376 * we need to match against the security attributes of
1377 * just the route itself, so the loop is executed only once.
1378 */
1379 ASSERT(gcgrp != NULL);
1380 gcdb = gc->gc_db;
1381 if (tsl->tsl_doi != gcdb->gcdb_doi ||
1382 !_blinrange(&tsl->tsl_label, &gcdb->gcdb_slrange)) {
1383 DTRACE_PROBE3(
1384 tx__ip__log__drop__irematch__nogcmatched,
1385 char *, "ire(1), tsl(2): all gc failed match",
1386 ire_t *, ire, ts_label_t *, tsl);
1387 error = EACCES;
1388 }
1389 } else {
1390 /*
1391 * We didn't find any gateway credentials in the IRE
1392 * attributes; fall back to the gateway's template for
1393 * label range checks, if we are required to do so.
1394 */
1395 ASSERT(gw_rhc != NULL);
1396 switch (gw_rhc->rhc_tpc->tpc_tp.host_type) {
1397 case SUN_CIPSO:
1398 if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi ||
1399 (!_blinrange(&tsl->tsl_label,
1400 &gw_rhc->rhc_tpc->tpc_tp.tp_sl_range_cipso) &&
1401 !blinlset(&tsl->tsl_label,
1402 gw_rhc->rhc_tpc->tpc_tp.tp_sl_set_cipso))) {
1403 error = EACCES;
1404 DTRACE_PROBE4(
1405 tx__ip__log__drop__irematch__deftmpl,
1406 char *, "ire(1), tsl(2), gw_rhc(3) "
1407 "failed match (cipso gw)",
1408 ire_t *, ire, ts_label_t *, tsl,
1409 tsol_tnrhc_t *, gw_rhc);
1410 }
1411 break;
1412
1413 case UNLABELED:
1414 if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi ||
1415 (!_blinrange(&tsl->tsl_label,
1416 &gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_range) &&
1417 !blinlset(&tsl->tsl_label,
1418 gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_set))) {
1419 error = EACCES;
1420 DTRACE_PROBE4(
1421 tx__ip__log__drop__irematch__deftmpl,
1422 char *, "ire(1), tsl(2), gw_rhc(3) "
1423 "failed match (unlabeled gw)",
1424 ire_t *, ire, ts_label_t *, tsl,
1425 tsol_tnrhc_t *, gw_rhc);
1426 }
1427 break;
1428 }
1429 }
1430
1431 done:
1432
1433 if (gcgrp != NULL) {
1434 rw_exit(&gcgrp->gcgrp_rwlock);
1435 GCGRP_REFRELE(gcgrp);
1436 }
1437
1438 if (gw_rhc != NULL)
1439 TNRHC_RELE(gw_rhc)
1440
1441 return (error);
1442 }
1443
1444 /*
1445 * Performs label accreditation checks for packet forwarding.
1446 * Add or remove a CIPSO option as needed.
1447 *
1448 * Returns a pointer to the modified mblk if allowed for forwarding,
1449 * or NULL if the packet must be dropped.
1450 */
1451 mblk_t *
tsol_ip_forward(ire_t * ire,mblk_t * mp,const ip_recv_attr_t * ira)1452 tsol_ip_forward(ire_t *ire, mblk_t *mp, const ip_recv_attr_t *ira)
1453 {
1454 tsol_ire_gw_secattr_t *attrp = NULL;
1455 ipha_t *ipha;
1456 ip6_t *ip6h;
1457 const void *pdst;
1458 const void *psrc;
1459 boolean_t off_link;
1460 tsol_tpc_t *dst_rhtp, *gw_rhtp;
1461 tsol_ip_label_t label_type;
1462 uchar_t *opt_ptr = NULL;
1463 ts_label_t *tsl;
1464 uint8_t proto;
1465 int af, adjust;
1466 uint16_t iplen;
1467 boolean_t need_tpc_rele = B_FALSE;
1468 ipaddr_t *gw;
1469 ip_stack_t *ipst = ire->ire_ipst;
1470 int err;
1471 ts_label_t *effective_tsl = NULL;
1472
1473 ASSERT(ire != NULL && mp != NULL);
1474 /*
1475 * Note that the ire is the first one found, i.e., an IRE_OFFLINK if
1476 * the destination is offlink.
1477 */
1478
1479 af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6;
1480 ipha = NULL;
1481 ip6h = NULL;
1482 gw_rhtp = NULL;
1483
1484 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
1485 ASSERT(ire->ire_ipversion == IPV4_VERSION);
1486 ipha = (ipha_t *)mp->b_rptr;
1487 psrc = &ipha->ipha_src;
1488 pdst = &ipha->ipha_dst;
1489 proto = ipha->ipha_protocol;
1490 if (!tsol_get_option_v4(mp, &label_type, &opt_ptr))
1491 return (NULL);
1492 } else {
1493 ASSERT(ire->ire_ipversion == IPV6_VERSION);
1494 ip6h = (ip6_t *)mp->b_rptr;
1495 psrc = &ip6h->ip6_src;
1496 pdst = &ip6h->ip6_dst;
1497 proto = ip6h->ip6_nxt;
1498
1499 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP &&
1500 proto != IPPROTO_ICMPV6) {
1501 uint8_t *nexthdrp;
1502 uint16_t hdr_len;
1503
1504 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len,
1505 &nexthdrp)) {
1506 /* malformed packet; drop it */
1507 return (NULL);
1508 }
1509 proto = *nexthdrp;
1510 }
1511 if (!tsol_get_option_v6(mp, &label_type, &opt_ptr))
1512 return (NULL);
1513 }
1514 /*
1515 * off_link is TRUE if destination not directly reachable.
1516 */
1517 off_link = (ire->ire_type & IRE_OFFLINK);
1518
1519 if ((tsl = ira->ira_tsl) == NULL)
1520 return (mp);
1521
1522 if (tsl->tsl_flags & TSLF_IMPLICIT_IN) {
1523 DTRACE_PROBE3(tx__ip__log__drop__forward__unresolved__label,
1524 char *,
1525 "cannot forward packet mp(1) with unresolved "
1526 "security label sl(2)",
1527 mblk_t *, mp, ts_label_t *, tsl);
1528
1529 return (NULL);
1530 }
1531
1532
1533 ASSERT(psrc != NULL && pdst != NULL);
1534 dst_rhtp = find_tpc(pdst, ire->ire_ipversion, B_FALSE);
1535
1536 if (dst_rhtp == NULL) {
1537 /*
1538 * Without a template we do not know if forwarding
1539 * violates MAC
1540 */
1541 DTRACE_PROBE3(tx__ip__log__drop__forward__nodst, char *,
1542 "mp(1) dropped, no template for destination ip4|6(2)",
1543 mblk_t *, mp, void *, pdst);
1544 return (NULL);
1545 }
1546
1547 /*
1548 * Gateway template must have existed for off-link destinations,
1549 * since tsol_ire_match_gwattr has ensured such condition.
1550 */
1551 if (ire->ire_ipversion == IPV4_VERSION && off_link) {
1552 /*
1553 * Surya note: first check if we can get the gw_rhtp from
1554 * the ire_gw_secattr->igsa_rhc; if this is null, then
1555 * do a lookup based on the ire_addr (address of gw)
1556 */
1557 if (ire->ire_gw_secattr != NULL &&
1558 ire->ire_gw_secattr->igsa_rhc != NULL) {
1559 attrp = ire->ire_gw_secattr;
1560 gw_rhtp = attrp->igsa_rhc->rhc_tpc;
1561 } else {
1562 gw = &ire->ire_gateway_addr;
1563 gw_rhtp = find_tpc(gw, ire->ire_ipversion, B_FALSE);
1564 need_tpc_rele = B_TRUE;
1565 }
1566 if (gw_rhtp == NULL) {
1567 DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *,
1568 "mp(1) dropped, no gateway in ire attributes(2)",
1569 mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp);
1570 mp = NULL;
1571 goto keep_label;
1572 }
1573 }
1574 if (ire->ire_ipversion == IPV6_VERSION &&
1575 ((attrp = ire->ire_gw_secattr) == NULL || attrp->igsa_rhc == NULL ||
1576 (gw_rhtp = attrp->igsa_rhc->rhc_tpc) == NULL) && off_link) {
1577 DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *,
1578 "mp(1) dropped, no gateway in ire attributes(2)",
1579 mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp);
1580 mp = NULL;
1581 goto keep_label;
1582 }
1583
1584 /*
1585 * Check that the label for the packet is acceptable
1586 * by destination host; otherwise, drop it.
1587 */
1588 switch (dst_rhtp->tpc_tp.host_type) {
1589 case SUN_CIPSO:
1590 if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi ||
1591 (!_blinrange(&tsl->tsl_label,
1592 &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
1593 !blinlset(&tsl->tsl_label,
1594 dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
1595 DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *,
1596 "labeled packet mp(1) dropped, label(2) fails "
1597 "destination(3) accredation check",
1598 mblk_t *, mp, ts_label_t *, tsl,
1599 tsol_tpc_t *, dst_rhtp);
1600 mp = NULL;
1601 goto keep_label;
1602 }
1603 break;
1604
1605
1606 case UNLABELED:
1607 if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi ||
1608 !blequal(&dst_rhtp->tpc_tp.tp_def_label,
1609 &tsl->tsl_label)) {
1610 DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *,
1611 "unlabeled packet mp(1) dropped, label(2) fails "
1612 "destination(3) accredation check",
1613 mblk_t *, mp, ts_label_t *, tsl,
1614 tsol_tpc_t *, dst_rhtp);
1615 mp = NULL;
1616 goto keep_label;
1617 }
1618 break;
1619 }
1620 if (label_type == OPT_CIPSO) {
1621 /*
1622 * We keep the label on any of the following cases:
1623 *
1624 * 1. The destination is labeled (on/off-link).
1625 * 2. The unlabeled destination is off-link,
1626 * and the next hop gateway is labeled.
1627 */
1628 if (dst_rhtp->tpc_tp.host_type != UNLABELED ||
1629 (off_link &&
1630 gw_rhtp->tpc_tp.host_type != UNLABELED))
1631 goto keep_label;
1632
1633 /*
1634 * Strip off the CIPSO option from the packet because: the
1635 * unlabeled destination host is directly reachable through
1636 * an interface (on-link); or, the unlabeled destination host
1637 * is not directly reachable (off-link), and the next hop
1638 * gateway is unlabeled.
1639 */
1640 adjust = (af == AF_INET) ? tsol_remove_secopt(ipha, MBLKL(mp)) :
1641 tsol_remove_secopt_v6(ip6h, MBLKL(mp));
1642
1643 ASSERT(adjust <= 0);
1644 if (adjust != 0) {
1645
1646 /* adjust is negative */
1647 ASSERT((mp->b_wptr + adjust) >= mp->b_rptr);
1648 mp->b_wptr += adjust;
1649 /*
1650 * Note that caller adjusts ira_pktlen and
1651 * ira_ip_hdr_length
1652 *
1653 * For AF_INET6 note that tsol_remove_secopt_v6
1654 * adjusted ip6_plen.
1655 */
1656 if (af == AF_INET) {
1657 ipha = (ipha_t *)mp->b_rptr;
1658 iplen = ntohs(ipha->ipha_length) + adjust;
1659 ipha->ipha_length = htons(iplen);
1660 ipha->ipha_hdr_checksum = 0;
1661 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1662 }
1663 DTRACE_PROBE3(tx__ip__log__info__forward__adjust,
1664 char *,
1665 "mp(1) adjusted(2) for CIPSO option removal",
1666 mblk_t *, mp, int, adjust);
1667 }
1668 goto keep_label;
1669 }
1670
1671 ASSERT(label_type == OPT_NONE);
1672 ASSERT(dst_rhtp != NULL);
1673
1674 /*
1675 * We need to add CIPSO option if the destination or the next hop
1676 * gateway is labeled. Otherwise, pass the packet as is.
1677 */
1678 if (dst_rhtp->tpc_tp.host_type == UNLABELED &&
1679 (!off_link || gw_rhtp->tpc_tp.host_type == UNLABELED))
1680 goto keep_label;
1681
1682 /*
1683 * Since we are forwarding packets we use GLOBAL_ZONEID for
1684 * the IRE lookup in tsol_check_label.
1685 * Since mac_exempt is false the zoneid isn't used for anything
1686 * but the IRE lookup, hence we set zone_is_global to false.
1687 */
1688 if (af == AF_INET) {
1689 err = tsol_check_label_v4(tsl, GLOBAL_ZONEID, &mp,
1690 CONN_MAC_DEFAULT, B_FALSE, ipst, &effective_tsl);
1691 } else {
1692 err = tsol_check_label_v6(tsl, GLOBAL_ZONEID, &mp,
1693 CONN_MAC_DEFAULT, B_FALSE, ipst, &effective_tsl);
1694 }
1695 if (err != 0) {
1696 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1697 ip_drop_output("tsol_check_label", mp, NULL);
1698 freemsg(mp);
1699 mp = NULL;
1700 goto keep_label;
1701 }
1702
1703 /*
1704 * The effective_tsl must never affect the routing decision, hence
1705 * we ignore it here.
1706 */
1707 if (effective_tsl != NULL)
1708 label_rele(effective_tsl);
1709
1710 if (af == AF_INET) {
1711 ipha = (ipha_t *)mp->b_rptr;
1712 ipha->ipha_hdr_checksum = 0;
1713 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1714 }
1715
1716 keep_label:
1717 TPC_RELE(dst_rhtp);
1718 if (need_tpc_rele && gw_rhtp != NULL)
1719 TPC_RELE(gw_rhtp);
1720 return (mp);
1721 }
1722
1723 /*
1724 * Name: tsol_pmtu_adjust()
1725 *
1726 * Returns the adjusted mtu after removing security option.
1727 * Removes/subtracts the option if the packet's cred indicates an unlabeled
1728 * sender or if pkt_diff indicates this system enlarged the packet.
1729 */
1730 uint32_t
tsol_pmtu_adjust(mblk_t * mp,uint32_t mtu,int pkt_diff,int af)1731 tsol_pmtu_adjust(mblk_t *mp, uint32_t mtu, int pkt_diff, int af)
1732 {
1733 int label_adj = 0;
1734 uint32_t min_mtu = IP_MIN_MTU;
1735 tsol_tpc_t *src_rhtp;
1736 void *src;
1737
1738 /*
1739 * Note: label_adj is non-positive, indicating the number of
1740 * bytes removed by removing the security option from the
1741 * header.
1742 */
1743 if (af == AF_INET6) {
1744 ip6_t *ip6h;
1745
1746 min_mtu = IPV6_MIN_MTU;
1747 ip6h = (ip6_t *)mp->b_rptr;
1748 src = &ip6h->ip6_src;
1749 if ((src_rhtp = find_tpc(src, IPV6_VERSION, B_FALSE)) == NULL)
1750 return (mtu);
1751 if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED) {
1752 label_adj = tsol_remove_secopt_v6(
1753 (ip6_t *)mp->b_rptr, MBLKL(mp));
1754 }
1755 } else {
1756 ipha_t *ipha;
1757
1758 ASSERT(af == AF_INET);
1759 ipha = (ipha_t *)mp->b_rptr;
1760 src = &ipha->ipha_src;
1761 if ((src_rhtp = find_tpc(src, IPV4_VERSION, B_FALSE)) == NULL)
1762 return (mtu);
1763 if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED)
1764 label_adj = tsol_remove_secopt(
1765 (ipha_t *)mp->b_rptr, MBLKL(mp));
1766 }
1767 /*
1768 * Make pkt_diff non-negative and the larger of the bytes
1769 * previously added (if any) or just removed, since label
1770 * addition + subtraction may not be completely idempotent.
1771 */
1772 if (pkt_diff < -label_adj)
1773 pkt_diff = -label_adj;
1774 if (pkt_diff > 0 && pkt_diff < mtu)
1775 mtu -= pkt_diff;
1776
1777 TPC_RELE(src_rhtp);
1778 return (MAX(mtu, min_mtu));
1779 }
1780
1781 /*
1782 * Name: tsol_rtsa_init()
1783 *
1784 * Normal: Sanity checks on the route security attributes provided by
1785 * user. Convert it into a route security parameter list to
1786 * be returned to caller.
1787 *
1788 * Output: EINVAL if bad security attributes in the routing message
1789 * ENOMEM if unable to allocate data structures
1790 * 0 otherwise.
1791 *
1792 * Note: On input, cp must point to the end of any addresses in
1793 * the rt_msghdr_t structure.
1794 */
1795 int
tsol_rtsa_init(rt_msghdr_t * rtm,tsol_rtsecattr_t * sp,caddr_t cp)1796 tsol_rtsa_init(rt_msghdr_t *rtm, tsol_rtsecattr_t *sp, caddr_t cp)
1797 {
1798 uint_t sacnt;
1799 int err;
1800 caddr_t lim;
1801 tsol_rtsecattr_t *tp;
1802
1803 ASSERT((cp >= (caddr_t)&rtm[1]) && sp != NULL);
1804
1805 /*
1806 * In theory, we could accept as many security attributes configured
1807 * per route destination. However, the current design is limited
1808 * such that at most only one set security attributes is allowed to
1809 * be associated with a prefix IRE. We therefore assert for now.
1810 */
1811 /* LINTED */
1812 ASSERT(TSOL_RTSA_REQUEST_MAX == 1);
1813
1814 sp->rtsa_cnt = 0;
1815 lim = (caddr_t)rtm + rtm->rtm_msglen;
1816 ASSERT(cp <= lim);
1817
1818 if ((lim - cp) < sizeof (rtm_ext_t) ||
1819 ((rtm_ext_t *)cp)->rtmex_type != RTMEX_GATEWAY_SECATTR)
1820 return (0);
1821
1822 if (((rtm_ext_t *)cp)->rtmex_len < sizeof (tsol_rtsecattr_t))
1823 return (EINVAL);
1824
1825 cp += sizeof (rtm_ext_t);
1826
1827 if ((lim - cp) < sizeof (*tp) ||
1828 (tp = (tsol_rtsecattr_t *)cp, (sacnt = tp->rtsa_cnt) == 0) ||
1829 (lim - cp) < TSOL_RTSECATTR_SIZE(sacnt))
1830 return (EINVAL);
1831
1832 /*
1833 * Trying to add route security attributes when system
1834 * labeling service is not available, or when user supllies
1835 * more than the maximum number of security attributes
1836 * allowed per request.
1837 */
1838 if ((sacnt > 0 && !is_system_labeled()) ||
1839 sacnt > TSOL_RTSA_REQUEST_MAX)
1840 return (EINVAL);
1841
1842 /* Ensure valid credentials */
1843 if ((err = rtsa_validate(&((tsol_rtsecattr_t *)cp)->
1844 rtsa_attr[0])) != 0) {
1845 cp += sizeof (*sp);
1846 return (err);
1847 }
1848
1849 bcopy(cp, sp, sizeof (*sp));
1850 cp += sizeof (*sp);
1851 return (0);
1852 }
1853
1854 int
tsol_ire_init_gwattr(ire_t * ire,uchar_t ipversion,tsol_gc_t * gc)1855 tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc)
1856 {
1857 tsol_ire_gw_secattr_t *attrp;
1858 boolean_t exists = B_FALSE;
1859 in_addr_t ga_addr4;
1860 void *paddr = NULL;
1861 tsol_gcgrp_t *gcgrp = NULL;
1862
1863 ASSERT(ire != NULL);
1864
1865 /*
1866 * The only time that attrp can be NULL is when this routine is
1867 * called for the first time during the creation/initialization
1868 * of the corresponding IRE. It will only get cleared when the
1869 * IRE is deleted.
1870 */
1871 if ((attrp = ire->ire_gw_secattr) == NULL) {
1872 attrp = ire_gw_secattr_alloc(KM_NOSLEEP);
1873 if (attrp == NULL)
1874 return (ENOMEM);
1875 ire->ire_gw_secattr = attrp;
1876 } else {
1877 exists = B_TRUE;
1878 mutex_enter(&attrp->igsa_lock);
1879
1880 if (attrp->igsa_rhc != NULL) {
1881 TNRHC_RELE(attrp->igsa_rhc);
1882 attrp->igsa_rhc = NULL;
1883 }
1884
1885 if (attrp->igsa_gc != NULL)
1886 GC_REFRELE(attrp->igsa_gc);
1887 }
1888 ASSERT(!exists || MUTEX_HELD(&attrp->igsa_lock));
1889
1890 /*
1891 * References already held by caller and we keep them;
1892 * note that gc may be set to NULL to clear out igsa_gc.
1893 */
1894 attrp->igsa_gc = gc;
1895
1896 if (gc != NULL) {
1897 gcgrp = gc->gc_grp;
1898 ASSERT(gcgrp != NULL);
1899 }
1900
1901 /*
1902 * Intialize the template for gateway; we use the gateway's
1903 * address found in either the passed in gateway credential
1904 * or group pointer, or the ire_gateway_addr{_v6} field.
1905 */
1906 if (gcgrp != NULL) {
1907 tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr;
1908
1909 /*
1910 * Caller is holding a reference, and that we don't
1911 * need to hold any lock to access the address.
1912 */
1913 if (ipversion == IPV4_VERSION) {
1914 ASSERT(ga->ga_af == AF_INET);
1915 IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4);
1916 paddr = &ga_addr4;
1917 } else {
1918 ASSERT(ga->ga_af == AF_INET6);
1919 paddr = &ga->ga_addr;
1920 }
1921 } else if (ire->ire_type & IRE_OFFLINK) {
1922 if (ipversion == IPV6_VERSION)
1923 paddr = &ire->ire_gateway_addr_v6;
1924 else if (ipversion == IPV4_VERSION)
1925 paddr = &ire->ire_gateway_addr;
1926 }
1927
1928 /*
1929 * Lookup the gateway template; note that we could get an internal
1930 * template here, which we cache anyway. During IRE matching, we'll
1931 * try to update this gateway template cache and hopefully get a
1932 * real one.
1933 */
1934 if (paddr != NULL) {
1935 attrp->igsa_rhc = find_rhc(paddr, ipversion, B_FALSE);
1936 }
1937
1938 if (exists)
1939 mutex_exit(&attrp->igsa_lock);
1940
1941 return (0);
1942 }
1943
1944 /*
1945 * This function figures the type of MLP that we'll be using based on the
1946 * address that the user is binding and the zone. If the address is
1947 * unspecified, then we're looking at both private and shared. If it's one
1948 * of the zone's private addresses, then it's private only. If it's one
1949 * of the global addresses, then it's shared only. Multicast addresses are
1950 * treated same as unspecified address.
1951 *
1952 * If we can't figure out what it is, then return mlptSingle. That's actually
1953 * an error case.
1954 *
1955 * The callers are assumed to pass in zone->zone_id and not the zoneid that
1956 * is stored in a conn_t (since the latter will be GLOBAL_ZONEID in an
1957 * exclusive stack zone).
1958 */
1959 mlp_type_t
tsol_mlp_addr_type(zoneid_t zoneid,uchar_t version,const void * addr,ip_stack_t * ipst)1960 tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr,
1961 ip_stack_t *ipst)
1962 {
1963 in_addr_t in4;
1964 ire_t *ire;
1965 ipif_t *ipif;
1966 zoneid_t addrzone;
1967 zoneid_t ip_zoneid;
1968
1969 ASSERT(addr != NULL);
1970
1971 /*
1972 * For exclusive stacks we set the zoneid to zero
1973 * to operate as if in the global zone for IRE and conn_t comparisons.
1974 */
1975 if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
1976 ip_zoneid = GLOBAL_ZONEID;
1977 else
1978 ip_zoneid = zoneid;
1979
1980 if (version == IPV6_VERSION &&
1981 IN6_IS_ADDR_V4MAPPED((const in6_addr_t *)addr)) {
1982 IN6_V4MAPPED_TO_IPADDR((const in6_addr_t *)addr, in4);
1983 addr = &in4;
1984 version = IPV4_VERSION;
1985 }
1986
1987 /* Check whether the IRE_LOCAL (or ipif) is ALL_ZONES */
1988 if (version == IPV4_VERSION) {
1989 in4 = *(const in_addr_t *)addr;
1990 if ((in4 == INADDR_ANY) || CLASSD(in4)) {
1991 return (mlptBoth);
1992 }
1993 ire = ire_ftable_lookup_v4(in4, 0, 0, IRE_LOCAL|IRE_LOOPBACK,
1994 NULL, ip_zoneid, NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY,
1995 0, ipst, NULL);
1996 } else {
1997 if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr) ||
1998 IN6_IS_ADDR_MULTICAST((const in6_addr_t *)addr)) {
1999 return (mlptBoth);
2000 }
2001 ire = ire_ftable_lookup_v6(addr, 0, 0, IRE_LOCAL|IRE_LOOPBACK,
2002 NULL, ip_zoneid, NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY,
2003 0, ipst, NULL);
2004 }
2005 /*
2006 * If we can't find the IRE, then we have to behave exactly like
2007 * ip_laddr_verify_{v4,v6}. That means looking up the IPIF so that
2008 * users can bind to addresses on "down" interfaces.
2009 *
2010 * If we can't find that either, then the bind is going to fail, so
2011 * just give up. Note that there's a miniscule chance that the address
2012 * is in transition, but we don't bother handling that.
2013 */
2014 if (ire == NULL) {
2015 if (version == IPV4_VERSION)
2016 ipif = ipif_lookup_addr(*(const in_addr_t *)addr, NULL,
2017 ip_zoneid, ipst);
2018 else
2019 ipif = ipif_lookup_addr_v6((const in6_addr_t *)addr,
2020 NULL, ip_zoneid, ipst);
2021 if (ipif == NULL) {
2022 return (mlptSingle);
2023 }
2024 addrzone = ipif->ipif_zoneid;
2025 ipif_refrele(ipif);
2026 } else {
2027 addrzone = ire->ire_zoneid;
2028 ire_refrele(ire);
2029 }
2030 return (addrzone == ALL_ZONES ? mlptShared : mlptPrivate);
2031 }
2032
2033 /*
2034 * Since we are configuring local interfaces, and we know trusted
2035 * extension CDE requires local interfaces to be cipso host type in
2036 * order to function correctly, we'll associate a cipso template
2037 * to each local interface and let the interface come up. Configuring
2038 * a local interface to be "unlabeled" host type is a configuration error.
2039 * We'll override that error and make the interface host type to be cipso
2040 * here.
2041 *
2042 * The code is optimized for the usual "success" case and unwinds things on
2043 * error. We don't want to go to the trouble and expense of formatting the
2044 * interface name for the usual case where everything is configured correctly.
2045 */
2046 boolean_t
tsol_check_interface_address(const ipif_t * ipif)2047 tsol_check_interface_address(const ipif_t *ipif)
2048 {
2049 tsol_tpc_t *tp;
2050 char addrbuf[INET6_ADDRSTRLEN];
2051 int af;
2052 const void *addr;
2053 zone_t *zone;
2054 ts_label_t *plabel;
2055 const bslabel_t *label;
2056 char ifname[LIFNAMSIZ];
2057 boolean_t retval;
2058 tsol_rhent_t rhent;
2059 netstack_t *ns = ipif->ipif_ill->ill_ipst->ips_netstack;
2060
2061 if (IN6_IS_ADDR_V4MAPPED(&ipif->ipif_v6lcl_addr)) {
2062 af = AF_INET;
2063 addr = &V4_PART_OF_V6(ipif->ipif_v6lcl_addr);
2064 } else {
2065 af = AF_INET6;
2066 addr = &ipif->ipif_v6lcl_addr;
2067 }
2068
2069 tp = find_tpc(&ipif->ipif_v6lcl_addr, IPV6_VERSION, B_FALSE);
2070
2071 /* assumes that ALL_ZONES implies that there is no exclusive stack */
2072 if (ipif->ipif_zoneid == ALL_ZONES) {
2073 zone = NULL;
2074 } else if (ns->netstack_stackid == GLOBAL_NETSTACKID) {
2075 /* Shared stack case */
2076 zone = zone_find_by_id(ipif->ipif_zoneid);
2077 } else {
2078 /* Exclusive stack case */
2079 zone = zone_find_by_id(crgetzoneid(ipif->ipif_ill->ill_credp));
2080 }
2081 if (zone != NULL) {
2082 plabel = zone->zone_slabel;
2083 ASSERT(plabel != NULL);
2084 label = label2bslabel(plabel);
2085 }
2086
2087 /*
2088 * If it's CIPSO and an all-zones address, then we're done.
2089 * If it's a CIPSO zone specific address, the zone's label
2090 * must be in the range or set specified in the template.
2091 * When the remote host entry is missing or the template
2092 * type is incorrect for this interface, we create a
2093 * CIPSO host entry in kernel and allow the interface to be
2094 * brought up as CIPSO type.
2095 */
2096 if (tp != NULL && (
2097 /* The all-zones case */
2098 (tp->tpc_tp.host_type == SUN_CIPSO &&
2099 tp->tpc_tp.tp_doi == default_doi &&
2100 ipif->ipif_zoneid == ALL_ZONES) ||
2101 /* The local-zone case */
2102 (zone != NULL && plabel->tsl_doi == tp->tpc_tp.tp_doi &&
2103 ((tp->tpc_tp.host_type == SUN_CIPSO &&
2104 (_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) ||
2105 blinlset(label, tp->tpc_tp.tp_sl_set_cipso))))))) {
2106 if (zone != NULL)
2107 zone_rele(zone);
2108 TPC_RELE(tp);
2109 return (B_TRUE);
2110 }
2111
2112 ipif_get_name(ipif, ifname, sizeof (ifname));
2113 (void) inet_ntop(af, addr, addrbuf, sizeof (addrbuf));
2114
2115 if (tp == NULL) {
2116 cmn_err(CE_NOTE, "template entry for %s missing. Default to "
2117 "CIPSO type for %s", ifname, addrbuf);
2118 retval = B_TRUE;
2119 } else if (tp->tpc_tp.host_type == UNLABELED) {
2120 cmn_err(CE_NOTE, "template type for %s incorrectly configured. "
2121 "Change to CIPSO type for %s", ifname, addrbuf);
2122 retval = B_TRUE;
2123 } else if (ipif->ipif_zoneid == ALL_ZONES) {
2124 if (tp->tpc_tp.host_type != SUN_CIPSO) {
2125 cmn_err(CE_NOTE, "%s failed: %s isn't set to CIPSO for "
2126 "all-zones. Converted to CIPSO.", ifname, addrbuf);
2127 retval = B_TRUE;
2128 } else {
2129 cmn_err(CE_NOTE, "%s failed: %s has wrong DOI %d "
2130 "instead of %d", ifname, addrbuf,
2131 tp->tpc_tp.tp_doi, default_doi);
2132 retval = B_FALSE;
2133 }
2134 } else if (zone == NULL) {
2135 cmn_err(CE_NOTE, "%s failed: zoneid %d unknown",
2136 ifname, ipif->ipif_zoneid);
2137 retval = B_FALSE;
2138 } else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
2139 cmn_err(CE_NOTE, "%s failed: zone %s has DOI %d but %s has "
2140 "DOI %d", ifname, zone->zone_name, plabel->tsl_doi,
2141 addrbuf, tp->tpc_tp.tp_doi);
2142 retval = B_FALSE;
2143 } else {
2144 cmn_err(CE_NOTE, "%s failed: zone %s label incompatible with "
2145 "%s", ifname, zone->zone_name, addrbuf);
2146 tsol_print_label(label, "zone label");
2147 retval = B_FALSE;
2148 }
2149
2150 if (zone != NULL)
2151 zone_rele(zone);
2152 if (tp != NULL)
2153 TPC_RELE(tp);
2154 if (retval) {
2155 /*
2156 * we've corrected a config error and let the interface
2157 * come up as cipso. Need to insert an rhent.
2158 */
2159 if ((rhent.rh_address.ta_family = af) == AF_INET) {
2160 rhent.rh_prefix = 32;
2161 rhent.rh_address.ta_addr_v4 = *(struct in_addr *)addr;
2162 } else {
2163 rhent.rh_prefix = 128;
2164 rhent.rh_address.ta_addr_v6 = *(in6_addr_t *)addr;
2165 }
2166 (void) strcpy(rhent.rh_template, "cipso");
2167 if (tnrh_load(&rhent) != 0) {
2168 cmn_err(CE_NOTE, "%s failed: Cannot insert CIPSO "
2169 "template for local addr %s", ifname, addrbuf);
2170 retval = B_FALSE;
2171 }
2172 }
2173 return (retval);
2174 }
2175