1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/strsubr.h>
29 #include <sys/stropts.h>
30 #include <sys/sunddi.h>
31 #include <sys/cred.h>
32 #include <sys/debug.h>
33 #include <sys/kmem.h>
34 #include <sys/errno.h>
35 #include <sys/disp.h>
36 #include <netinet/in.h>
37 #include <netinet/in_systm.h>
38 #include <netinet/ip.h>
39 #include <netinet/ip_icmp.h>
40 #include <netinet/tcp.h>
41 #include <inet/common.h>
42 #include <inet/ipclassifier.h>
43 #include <inet/ip.h>
44 #include <inet/mib2.h>
45 #include <inet/nd.h>
46 #include <inet/tcp.h>
47 #include <inet/ip_rts.h>
48 #include <inet/ip_ire.h>
49 #include <inet/ip_if.h>
50 #include <sys/modhash.h>
51
52 #include <sys/tsol/label.h>
53 #include <sys/tsol/label_macro.h>
54 #include <sys/tsol/tnet.h>
55 #include <sys/tsol/tndb.h>
56 #include <sys/strsun.h>
57
58 /* tunable for strict error-reply behavior (TCP RST and ICMP Unreachable) */
59 int tsol_strict_error;
60
61 /*
62 * Some notes on the Trusted Solaris IRE gateway security attributes:
63 *
64 * When running in Trusted mode, the routing subsystem determines whether or
65 * not a packet can be delivered to an off-link host (not directly reachable
66 * through an interface) based on the accreditation checks of the packet's
67 * security attributes against those associated with the next-hop gateway.
68 *
69 * The next-hop gateway's security attributes can be derived from two sources
70 * (in order of preference): route-related and the host database. A Trusted
71 * system must be configured with at least the host database containing an
72 * entry for the next-hop gateway, or otherwise no accreditation checks can
73 * be performed, which may result in the inability to send packets to any
74 * off-link destination host.
75 *
76 * The major differences between the two sources are the number and type of
77 * security attributes used for accreditation checks. A host database entry
78 * can contain at most one set of security attributes, specific only to the
79 * next-hop gateway. On contrast, route-related security attributes are made
80 * up of a collection of security attributes for the distant networks, and
81 * are grouped together per next-hop gateway used to reach those networks.
82 * This is the preferred method, and the routing subsystem will fallback to
83 * the host database entry only if there are no route-related attributes
84 * associated with the next-hop gateway.
85 *
86 * In Trusted mode, all of the IRE entries (except LOCAL/LOOPBACK/BROADCAST/
87 * INTERFACE type) are initialized to contain a placeholder to store this
88 * information. The ire_gw_secattr structure gets allocated, initialized
89 * and associated with the IRE during the time of the IRE creation. The
90 * initialization process also includes resolving the host database entry
91 * of the next-hop gateway for fallback purposes. It does not include any
92 * route-related attribute setup, as that process comes separately as part
93 * of the route requests (add/change) made to the routing subsystem.
94 *
95 * The underlying logic which involves associating IREs with the gateway
96 * security attributes are represented by the following data structures:
97 *
98 * tsol_gcdb_t, or "gcdb"
99 *
100 * - This is a system-wide collection of records containing the
101 * currently used route-related security attributes, which are fed
102 * through the routing socket interface, e.g. "route add/change".
103 *
104 * tsol_gc_t, or "gc"
105 *
106 * - This is the gateway credential structure, and it provides for the
107 * only mechanism to access the contents of gcdb. More than one gc
108 * entries may refer to the same gcdb record. gc's in the system are
109 * grouped according to the next-hop gateway address.
110 *
111 * tsol_gcgrp_t, or "gcgrp"
112 *
113 * - Group of gateway credentials, and is unique per next-hop gateway
114 * address. When the group is not empty, i.e. when gcgrp_count is
115 * greater than zero, it contains one or more gc's, each pointing to
116 * a gcdb record which indicates the gateway security attributes
117 * associated with the next-hop gateway.
118 *
119 * The fields of the tsol_ire_gw_secattr_t used from within the IRE are:
120 *
121 * igsa_lock
122 *
123 * - Lock that protects all fields within tsol_ire_gw_secattr_t.
124 *
125 * igsa_rhc
126 *
127 * - Remote host cache database entry of next-hop gateway. This is
128 * used in the case when there are no route-related attributes
129 * configured for the IRE.
130 *
131 * igsa_gc
132 *
133 * - A set of route-related attributes that only get set for prefix
134 * IREs. If this is non-NULL, the prefix IRE has been associated
135 * with a set of gateway security attributes by way of route add/
136 * change functionality.
137 */
138
139 static kmem_cache_t *ire_gw_secattr_cache;
140
141 #define GCDB_HASH_SIZE 101
142 #define GCGRP_HASH_SIZE 101
143
144 #define GCDB_REFRELE(p) { \
145 mutex_enter(&gcdb_lock); \
146 ASSERT((p)->gcdb_refcnt > 0); \
147 if (--((p)->gcdb_refcnt) == 0) \
148 gcdb_inactive(p); \
149 ASSERT(MUTEX_HELD(&gcdb_lock)); \
150 mutex_exit(&gcdb_lock); \
151 }
152
153 static int gcdb_hash_size = GCDB_HASH_SIZE;
154 static int gcgrp_hash_size = GCGRP_HASH_SIZE;
155 static mod_hash_t *gcdb_hash;
156 static mod_hash_t *gcgrp4_hash;
157 static mod_hash_t *gcgrp6_hash;
158
159 static kmutex_t gcdb_lock;
160 kmutex_t gcgrp_lock;
161
162 static uint_t gcdb_hash_by_secattr(void *, mod_hash_key_t);
163 static int gcdb_hash_cmp(mod_hash_key_t, mod_hash_key_t);
164 static tsol_gcdb_t *gcdb_lookup(struct rtsa_s *, boolean_t);
165 static void gcdb_inactive(tsol_gcdb_t *);
166
167 static uint_t gcgrp_hash_by_addr(void *, mod_hash_key_t);
168 static int gcgrp_hash_cmp(mod_hash_key_t, mod_hash_key_t);
169
170 static int ire_gw_secattr_constructor(void *, void *, int);
171 static void ire_gw_secattr_destructor(void *, void *);
172
173 void
tnet_init(void)174 tnet_init(void)
175 {
176 ire_gw_secattr_cache = kmem_cache_create("ire_gw_secattr_cache",
177 sizeof (tsol_ire_gw_secattr_t), 64, ire_gw_secattr_constructor,
178 ire_gw_secattr_destructor, NULL, NULL, NULL, 0);
179
180 gcdb_hash = mod_hash_create_extended("gcdb_hash",
181 gcdb_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
182 gcdb_hash_by_secattr, NULL, gcdb_hash_cmp, KM_SLEEP);
183
184 gcgrp4_hash = mod_hash_create_extended("gcgrp4_hash",
185 gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
186 gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
187
188 gcgrp6_hash = mod_hash_create_extended("gcgrp6_hash",
189 gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
190 gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
191
192 mutex_init(&gcdb_lock, NULL, MUTEX_DEFAULT, NULL);
193 mutex_init(&gcgrp_lock, NULL, MUTEX_DEFAULT, NULL);
194 }
195
196 void
tnet_fini(void)197 tnet_fini(void)
198 {
199 kmem_cache_destroy(ire_gw_secattr_cache);
200 mod_hash_destroy_hash(gcdb_hash);
201 mod_hash_destroy_hash(gcgrp4_hash);
202 mod_hash_destroy_hash(gcgrp6_hash);
203 mutex_destroy(&gcdb_lock);
204 mutex_destroy(&gcgrp_lock);
205 }
206
207 /* ARGSUSED */
208 static int
ire_gw_secattr_constructor(void * buf,void * cdrarg,int kmflags)209 ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags)
210 {
211 tsol_ire_gw_secattr_t *attrp = buf;
212
213 mutex_init(&attrp->igsa_lock, NULL, MUTEX_DEFAULT, NULL);
214
215 attrp->igsa_rhc = NULL;
216 attrp->igsa_gc = NULL;
217
218 return (0);
219 }
220
221 /* ARGSUSED */
222 static void
ire_gw_secattr_destructor(void * buf,void * cdrarg)223 ire_gw_secattr_destructor(void *buf, void *cdrarg)
224 {
225 tsol_ire_gw_secattr_t *attrp = (tsol_ire_gw_secattr_t *)buf;
226
227 mutex_destroy(&attrp->igsa_lock);
228 }
229
230 tsol_ire_gw_secattr_t *
ire_gw_secattr_alloc(int kmflags)231 ire_gw_secattr_alloc(int kmflags)
232 {
233 return (kmem_cache_alloc(ire_gw_secattr_cache, kmflags));
234 }
235
236 void
ire_gw_secattr_free(tsol_ire_gw_secattr_t * attrp)237 ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp)
238 {
239 ASSERT(MUTEX_NOT_HELD(&attrp->igsa_lock));
240
241 if (attrp->igsa_rhc != NULL) {
242 TNRHC_RELE(attrp->igsa_rhc);
243 attrp->igsa_rhc = NULL;
244 }
245
246 if (attrp->igsa_gc != NULL) {
247 GC_REFRELE(attrp->igsa_gc);
248 attrp->igsa_gc = NULL;
249 }
250
251 ASSERT(attrp->igsa_rhc == NULL);
252 ASSERT(attrp->igsa_gc == NULL);
253
254 kmem_cache_free(ire_gw_secattr_cache, attrp);
255 }
256
257 /* ARGSUSED */
258 static uint_t
gcdb_hash_by_secattr(void * hash_data,mod_hash_key_t key)259 gcdb_hash_by_secattr(void *hash_data, mod_hash_key_t key)
260 {
261 const struct rtsa_s *rp = (struct rtsa_s *)key;
262 const uint32_t *up, *ue;
263 uint_t hash;
264 int i;
265
266 ASSERT(rp != NULL);
267
268 /* See comments in hash_bylabel in zone.c for details */
269 hash = rp->rtsa_doi + (rp->rtsa_doi << 1);
270 up = (const uint32_t *)&rp->rtsa_slrange;
271 ue = up + sizeof (rp->rtsa_slrange) / sizeof (*up);
272 i = 1;
273 while (up < ue) {
274 /* using 2^n + 1, 1 <= n <= 16 as source of many primes */
275 hash += *up + (*up << ((i % 16) + 1));
276 up++;
277 i++;
278 }
279 return (hash);
280 }
281
282 static int
gcdb_hash_cmp(mod_hash_key_t key1,mod_hash_key_t key2)283 gcdb_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
284 {
285 struct rtsa_s *rp1 = (struct rtsa_s *)key1;
286 struct rtsa_s *rp2 = (struct rtsa_s *)key2;
287
288 ASSERT(rp1 != NULL && rp2 != NULL);
289
290 if (blequal(&rp1->rtsa_slrange.lower_bound,
291 &rp2->rtsa_slrange.lower_bound) &&
292 blequal(&rp1->rtsa_slrange.upper_bound,
293 &rp2->rtsa_slrange.upper_bound) &&
294 rp1->rtsa_doi == rp2->rtsa_doi)
295 return (0);
296
297 /* No match; not found */
298 return (-1);
299 }
300
301 /* ARGSUSED */
302 static uint_t
gcgrp_hash_by_addr(void * hash_data,mod_hash_key_t key)303 gcgrp_hash_by_addr(void *hash_data, mod_hash_key_t key)
304 {
305 tsol_gcgrp_addr_t *ga = (tsol_gcgrp_addr_t *)key;
306 uint_t idx = 0;
307 uint32_t *ap;
308
309 ASSERT(ga != NULL);
310 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
311
312 ap = (uint32_t *)&ga->ga_addr.s6_addr32[0];
313 idx ^= *ap++;
314 idx ^= *ap++;
315 idx ^= *ap++;
316 idx ^= *ap;
317
318 return (idx);
319 }
320
321 static int
gcgrp_hash_cmp(mod_hash_key_t key1,mod_hash_key_t key2)322 gcgrp_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
323 {
324 tsol_gcgrp_addr_t *ga1 = (tsol_gcgrp_addr_t *)key1;
325 tsol_gcgrp_addr_t *ga2 = (tsol_gcgrp_addr_t *)key2;
326
327 ASSERT(ga1 != NULL && ga2 != NULL);
328
329 /* Address family must match */
330 if (ga1->ga_af != ga2->ga_af)
331 return (-1);
332
333 if (ga1->ga_addr.s6_addr32[0] == ga2->ga_addr.s6_addr32[0] &&
334 ga1->ga_addr.s6_addr32[1] == ga2->ga_addr.s6_addr32[1] &&
335 ga1->ga_addr.s6_addr32[2] == ga2->ga_addr.s6_addr32[2] &&
336 ga1->ga_addr.s6_addr32[3] == ga2->ga_addr.s6_addr32[3])
337 return (0);
338
339 /* No match; not found */
340 return (-1);
341 }
342
343 #define RTSAFLAGS "\20\11cipso\3doi\2max_sl\1min_sl"
344
345 int
rtsa_validate(const struct rtsa_s * rp)346 rtsa_validate(const struct rtsa_s *rp)
347 {
348 uint32_t mask = rp->rtsa_mask;
349
350 /* RTSA_CIPSO must be set, and DOI must not be zero */
351 if ((mask & RTSA_CIPSO) == 0 || rp->rtsa_doi == 0) {
352 DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
353 "rtsa(1) lacks flag or has 0 doi.",
354 rtsa_s *, rp);
355 return (EINVAL);
356 }
357 /*
358 * SL range must be specified, and it must have its
359 * upper bound dominating its lower bound.
360 */
361 if ((mask & RTSA_SLRANGE) != RTSA_SLRANGE ||
362 !bldominates(&rp->rtsa_slrange.upper_bound,
363 &rp->rtsa_slrange.lower_bound)) {
364 DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
365 "rtsa(1) min_sl and max_sl not set or max_sl is "
366 "not dominating.", rtsa_s *, rp);
367 return (EINVAL);
368 }
369 return (0);
370 }
371
372 /*
373 * A brief explanation of the reference counting scheme:
374 *
375 * Apart from dynamic references due to to reference holds done
376 * actively by threads, we have the following references:
377 *
378 * gcdb_refcnt:
379 * - Every tsol_gc_t pointing to a tsol_gcdb_t contributes a reference
380 * to the gcdb_refcnt.
381 *
382 * gc_refcnt:
383 * - A prefix IRE that points to an igsa_gc contributes a reference
384 * to the gc_refcnt.
385 *
386 * gcgrp_refcnt:
387 * - Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes
388 * a reference to the gcgrp_refcnt.
389 */
390 static tsol_gcdb_t *
gcdb_lookup(struct rtsa_s * rp,boolean_t alloc)391 gcdb_lookup(struct rtsa_s *rp, boolean_t alloc)
392 {
393 tsol_gcdb_t *gcdb = NULL;
394
395 if (rtsa_validate(rp) != 0)
396 return (NULL);
397
398 mutex_enter(&gcdb_lock);
399 /* Find a copy in the cache; otherwise, create one and cache it */
400 if (mod_hash_find(gcdb_hash, (mod_hash_key_t)rp,
401 (mod_hash_val_t *)&gcdb) == 0) {
402 gcdb->gcdb_refcnt++;
403 ASSERT(gcdb->gcdb_refcnt != 0);
404
405 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__lookup, char *,
406 "gcdb(1) is in gcdb_hash(global)", tsol_gcdb_t *, gcdb);
407 } else if (alloc) {
408 gcdb = kmem_zalloc(sizeof (*gcdb), KM_NOSLEEP);
409 if (gcdb != NULL) {
410 gcdb->gcdb_refcnt = 1;
411 gcdb->gcdb_mask = rp->rtsa_mask;
412 gcdb->gcdb_doi = rp->rtsa_doi;
413 gcdb->gcdb_slrange = rp->rtsa_slrange;
414
415 if (mod_hash_insert(gcdb_hash,
416 (mod_hash_key_t)&gcdb->gcdb_attr,
417 (mod_hash_val_t)gcdb) != 0) {
418 mutex_exit(&gcdb_lock);
419 kmem_free(gcdb, sizeof (*gcdb));
420 return (NULL);
421 }
422
423 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__insert, char *,
424 "gcdb(1) inserted in gcdb_hash(global)",
425 tsol_gcdb_t *, gcdb);
426 }
427 }
428 mutex_exit(&gcdb_lock);
429 return (gcdb);
430 }
431
432 static void
gcdb_inactive(tsol_gcdb_t * gcdb)433 gcdb_inactive(tsol_gcdb_t *gcdb)
434 {
435 ASSERT(MUTEX_HELD(&gcdb_lock));
436 ASSERT(gcdb != NULL && gcdb->gcdb_refcnt == 0);
437
438 (void) mod_hash_remove(gcdb_hash, (mod_hash_key_t)&gcdb->gcdb_attr,
439 (mod_hash_val_t *)&gcdb);
440
441 DTRACE_PROBE2(tx__gcdb__log__info__gcdb__remove, char *,
442 "gcdb(1) removed from gcdb_hash(global)",
443 tsol_gcdb_t *, gcdb);
444 kmem_free(gcdb, sizeof (*gcdb));
445 }
446
447 tsol_gc_t *
gc_create(struct rtsa_s * rp,tsol_gcgrp_t * gcgrp,boolean_t * gcgrp_xtrarefp)448 gc_create(struct rtsa_s *rp, tsol_gcgrp_t *gcgrp, boolean_t *gcgrp_xtrarefp)
449 {
450 tsol_gc_t *gc;
451 tsol_gcdb_t *gcdb;
452
453 *gcgrp_xtrarefp = B_TRUE;
454
455 rw_enter(&gcgrp->gcgrp_rwlock, RW_WRITER);
456 if ((gcdb = gcdb_lookup(rp, B_TRUE)) == NULL) {
457 rw_exit(&gcgrp->gcgrp_rwlock);
458 return (NULL);
459 }
460
461 for (gc = gcgrp->gcgrp_head; gc != NULL; gc = gc->gc_next) {
462 if (gc->gc_db == gcdb) {
463 ASSERT(gc->gc_grp == gcgrp);
464
465 gc->gc_refcnt++;
466 ASSERT(gc->gc_refcnt != 0);
467
468 GCDB_REFRELE(gcdb);
469
470 DTRACE_PROBE3(tx__gcdb__log__info__gc__create,
471 char *, "found gc(1) in gcgrp(2)",
472 tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
473 rw_exit(&gcgrp->gcgrp_rwlock);
474 return (gc);
475 }
476 }
477
478 gc = kmem_zalloc(sizeof (*gc), KM_NOSLEEP);
479 if (gc != NULL) {
480 if (gcgrp->gcgrp_head == NULL) {
481 gcgrp->gcgrp_head = gcgrp->gcgrp_tail = gc;
482 } else {
483 gcgrp->gcgrp_tail->gc_next = gc;
484 gc->gc_prev = gcgrp->gcgrp_tail;
485 gcgrp->gcgrp_tail = gc;
486 }
487 gcgrp->gcgrp_count++;
488 ASSERT(gcgrp->gcgrp_count != 0);
489
490 /* caller has incremented gcgrp reference for us */
491 gc->gc_grp = gcgrp;
492
493 gc->gc_db = gcdb;
494 gc->gc_refcnt = 1;
495
496 DTRACE_PROBE3(tx__gcdb__log__info__gc__create, char *,
497 "added gc(1) to gcgrp(2)", tsol_gc_t *, gc,
498 tsol_gcgrp_t *, gcgrp);
499
500 *gcgrp_xtrarefp = B_FALSE;
501 }
502 rw_exit(&gcgrp->gcgrp_rwlock);
503
504 return (gc);
505 }
506
507 void
gc_inactive(tsol_gc_t * gc)508 gc_inactive(tsol_gc_t *gc)
509 {
510 tsol_gcgrp_t *gcgrp = gc->gc_grp;
511
512 ASSERT(gcgrp != NULL);
513 ASSERT(RW_WRITE_HELD(&gcgrp->gcgrp_rwlock));
514 ASSERT(gc->gc_refcnt == 0);
515
516 if (gc->gc_prev != NULL)
517 gc->gc_prev->gc_next = gc->gc_next;
518 else
519 gcgrp->gcgrp_head = gc->gc_next;
520 if (gc->gc_next != NULL)
521 gc->gc_next->gc_prev = gc->gc_prev;
522 else
523 gcgrp->gcgrp_tail = gc->gc_prev;
524 ASSERT(gcgrp->gcgrp_count > 0);
525 gcgrp->gcgrp_count--;
526
527 /* drop lock before it's destroyed */
528 rw_exit(&gcgrp->gcgrp_rwlock);
529
530 DTRACE_PROBE3(tx__gcdb__log__info__gc__remove, char *,
531 "removed inactive gc(1) from gcgrp(2)",
532 tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
533
534 GCGRP_REFRELE(gcgrp);
535
536 gc->gc_grp = NULL;
537 gc->gc_prev = gc->gc_next = NULL;
538
539 if (gc->gc_db != NULL)
540 GCDB_REFRELE(gc->gc_db);
541
542 kmem_free(gc, sizeof (*gc));
543 }
544
545 tsol_gcgrp_t *
gcgrp_lookup(tsol_gcgrp_addr_t * ga,boolean_t alloc)546 gcgrp_lookup(tsol_gcgrp_addr_t *ga, boolean_t alloc)
547 {
548 tsol_gcgrp_t *gcgrp = NULL;
549 mod_hash_t *hashp;
550
551 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
552
553 hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
554
555 mutex_enter(&gcgrp_lock);
556 if (mod_hash_find(hashp, (mod_hash_key_t)ga,
557 (mod_hash_val_t *)&gcgrp) == 0) {
558 gcgrp->gcgrp_refcnt++;
559 ASSERT(gcgrp->gcgrp_refcnt != 0);
560
561 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__lookup, char *,
562 "found gcgrp(1) in hash(2)", tsol_gcgrp_t *, gcgrp,
563 mod_hash_t *, hashp);
564
565 } else if (alloc) {
566 gcgrp = kmem_zalloc(sizeof (*gcgrp), KM_NOSLEEP);
567 if (gcgrp != NULL) {
568 gcgrp->gcgrp_refcnt = 1;
569 rw_init(&gcgrp->gcgrp_rwlock, NULL, RW_DEFAULT, NULL);
570 bcopy(ga, &gcgrp->gcgrp_addr, sizeof (*ga));
571
572 if (mod_hash_insert(hashp,
573 (mod_hash_key_t)&gcgrp->gcgrp_addr,
574 (mod_hash_val_t)gcgrp) != 0) {
575 mutex_exit(&gcgrp_lock);
576 kmem_free(gcgrp, sizeof (*gcgrp));
577 return (NULL);
578 }
579
580 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__insert,
581 char *, "inserted gcgrp(1) in hash(2)",
582 tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
583 }
584 }
585 mutex_exit(&gcgrp_lock);
586 return (gcgrp);
587 }
588
589 void
gcgrp_inactive(tsol_gcgrp_t * gcgrp)590 gcgrp_inactive(tsol_gcgrp_t *gcgrp)
591 {
592 tsol_gcgrp_addr_t *ga;
593 mod_hash_t *hashp;
594
595 ASSERT(MUTEX_HELD(&gcgrp_lock));
596 ASSERT(gcgrp != NULL && gcgrp->gcgrp_refcnt == 0);
597 ASSERT(gcgrp->gcgrp_head == NULL && gcgrp->gcgrp_count == 0);
598
599 ga = &gcgrp->gcgrp_addr;
600 ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
601
602 hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
603 (void) mod_hash_remove(hashp, (mod_hash_key_t)ga,
604 (mod_hash_val_t *)&gcgrp);
605 rw_destroy(&gcgrp->gcgrp_rwlock);
606
607 DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__remove, char *,
608 "removed inactive gcgrp(1) from hash(2)",
609 tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
610
611 kmem_free(gcgrp, sizeof (*gcgrp));
612 }
613
614
615 /*
616 * Assign a sensitivity label to inbound traffic which arrived without
617 * an explicit on-the-wire label.
618 *
619 * In the case of CIPSO-type hosts, we assume packets arriving without
620 * a label are at the most sensitive label known for the host, most
621 * likely involving out-of-band key management traffic (such as IKE,
622 * etc.,)
623 */
624 static boolean_t
tsol_find_unlabeled_label(tsol_tpc_t * rhtp,bslabel_t * sl,uint32_t * doi)625 tsol_find_unlabeled_label(tsol_tpc_t *rhtp, bslabel_t *sl, uint32_t *doi)
626 {
627 *doi = rhtp->tpc_tp.tp_doi;
628 switch (rhtp->tpc_tp.host_type) {
629 case UNLABELED:
630 *sl = rhtp->tpc_tp.tp_def_label;
631 break;
632 case SUN_CIPSO:
633 *sl = rhtp->tpc_tp.tp_sl_range_cipso.upper_bound;
634 break;
635 default:
636 return (B_FALSE);
637 }
638 setbltype(sl, SUN_SL_ID);
639 return (B_TRUE);
640 }
641
642 /*
643 * Converts CIPSO option to sensitivity label.
644 * Validity checks based on restrictions defined in
645 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) (draft-ietf-cipso-ipsecurity)
646 */
647 static boolean_t
cipso_to_sl(const uchar_t * option,bslabel_t * sl)648 cipso_to_sl(const uchar_t *option, bslabel_t *sl)
649 {
650 const struct cipso_option *co = (const struct cipso_option *)option;
651 const struct cipso_tag_type_1 *tt1;
652
653 tt1 = (struct cipso_tag_type_1 *)&co->cipso_tag_type[0];
654 if (tt1->tag_type != 1 ||
655 tt1->tag_length < TSOL_TT1_MIN_LENGTH ||
656 tt1->tag_length > TSOL_TT1_MAX_LENGTH ||
657 tt1->tag_length + TSOL_CIPSO_TAG_OFFSET > co->cipso_length)
658 return (B_FALSE);
659
660 bsllow(sl); /* assumed: sets compartments to all zeroes */
661 LCLASS_SET((_bslabel_impl_t *)sl, tt1->tag_sl);
662 bcopy(tt1->tag_cat, &((_bslabel_impl_t *)sl)->compartments,
663 tt1->tag_length - TSOL_TT1_MIN_LENGTH);
664 return (B_TRUE);
665 }
666
667 /*
668 * If present, parse the CIPSO label in the incoming packet and
669 * construct a ts_label_t that reflects the CIPSO label and put it in
670 * the ip_recv_attr_t. Later as the packet flows up through the stack any
671 * code that needs to examine the packet label can inspect the label
672 * from the ira_tsl. This function is
673 * called right in ip_input for all packets, i.e. locally destined and
674 * to be forwarded packets. The forwarding path needs to examine the label
675 * to determine how to forward the packet.
676 *
677 * This routine pulls all message text up into the first mblk.
678 * For IPv4, only the first 20 bytes of the IP header are guaranteed
679 * to exist. For IPv6, only the IPv6 header is guaranteed to exist.
680 */
681 boolean_t
tsol_get_pkt_label(mblk_t * mp,int version,ip_recv_attr_t * ira)682 tsol_get_pkt_label(mblk_t *mp, int version, ip_recv_attr_t *ira)
683 {
684 tsol_tpc_t *src_rhtp = NULL;
685 uchar_t *opt_ptr = NULL;
686 const ipha_t *ipha;
687 bslabel_t sl;
688 uint32_t doi;
689 tsol_ip_label_t label_type;
690 uint32_t label_flags = 0; /* flags to set in label */
691 const cipso_option_t *co;
692 const void *src;
693 const ip6_t *ip6h;
694 cred_t *credp;
695 int proto;
696
697 ASSERT(DB_TYPE(mp) == M_DATA);
698
699 if (mp->b_cont != NULL && !pullupmsg(mp, -1))
700 return (B_FALSE);
701
702 if (version == IPV4_VERSION) {
703 ASSERT(MBLKL(mp) >= IP_SIMPLE_HDR_LENGTH);
704 ipha = (const ipha_t *)mp->b_rptr;
705 src = &ipha->ipha_src;
706 if (!tsol_get_option_v4(mp, &label_type, &opt_ptr))
707 return (B_FALSE);
708 } else {
709 ASSERT(MBLKL(mp) >= IPV6_HDR_LEN);
710 ip6h = (const ip6_t *)mp->b_rptr;
711 src = &ip6h->ip6_src;
712 if (!tsol_get_option_v6(mp, &label_type, &opt_ptr))
713 return (B_FALSE);
714 }
715
716 switch (label_type) {
717 case OPT_CIPSO:
718 /*
719 * Convert the CIPSO label to the internal format
720 * and attach it to the dblk cred.
721 * Validity checks based on restrictions defined in
722 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2)
723 * (draft-ietf-cipso-ipsecurity)
724 */
725 if (version == IPV6_VERSION && ip6opt_ls == 0)
726 return (B_FALSE);
727 co = (const struct cipso_option *)opt_ptr;
728 if ((co->cipso_length <
729 TSOL_CIPSO_TAG_OFFSET + TSOL_TT1_MIN_LENGTH) ||
730 (co->cipso_length > IP_MAX_OPT_LENGTH))
731 return (B_FALSE);
732 bcopy(co->cipso_doi, &doi, sizeof (doi));
733 doi = ntohl(doi);
734 if (!cipso_to_sl(opt_ptr, &sl))
735 return (B_FALSE);
736 setbltype(&sl, SUN_SL_ID);
737
738 /*
739 * If the source was unlabeled, then flag as such,
740 * (since CIPSO routers may add headers)
741 */
742
743 if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
744 return (B_FALSE);
745
746 if (src_rhtp->tpc_tp.host_type == UNLABELED)
747 label_flags = TSLF_UNLABELED;
748
749 TPC_RELE(src_rhtp);
750
751 break;
752
753 case OPT_NONE:
754 /*
755 * Handle special cases that may not be labeled, even
756 * though the sending system may otherwise be configured as
757 * labeled.
758 * - IGMP
759 * - IPv4 ICMP Router Discovery
760 * - IPv6 Neighbor Discovery
761 * - IPsec ESP
762 */
763 if (version == IPV4_VERSION) {
764 proto = ipha->ipha_protocol;
765 if (proto == IPPROTO_IGMP)
766 return (B_TRUE);
767 if (proto == IPPROTO_ICMP) {
768 const struct icmp *icmp = (const struct icmp *)
769 (mp->b_rptr + IPH_HDR_LENGTH(ipha));
770
771 if ((uchar_t *)icmp + ICMP_MINLEN > mp->b_wptr)
772 return (B_FALSE);
773 if (icmp->icmp_type == ICMP_ROUTERADVERT ||
774 icmp->icmp_type == ICMP_ROUTERSOLICIT)
775 return (B_TRUE);
776 }
777 } else {
778 proto = ip6h->ip6_nxt;
779 if (proto == IPPROTO_ICMPV6) {
780 const icmp6_t *icmp6 = (const icmp6_t *)
781 (mp->b_rptr + IPV6_HDR_LEN);
782
783 if ((uchar_t *)icmp6 + ICMP6_MINLEN >
784 mp->b_wptr)
785 return (B_FALSE);
786 if (icmp6->icmp6_type >= MLD_LISTENER_QUERY &&
787 icmp6->icmp6_type <= ICMP6_MAX_INFO_TYPE)
788 return (B_TRUE);
789 }
790 }
791
792 /*
793 * Look up the tnrhtp database and get the implicit label
794 * that is associated with the sending host and attach
795 * it to the packet.
796 */
797 if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
798 return (B_FALSE);
799
800 /*
801 * If peer is label-aware, mark as "implicit" rather than
802 * "unlabeled" to cause appropriate mac-exempt processing
803 * to happen.
804 */
805 if (src_rhtp->tpc_tp.host_type == SUN_CIPSO)
806 label_flags = TSLF_IMPLICIT_IN;
807 else if (src_rhtp->tpc_tp.host_type == UNLABELED)
808 label_flags = TSLF_UNLABELED;
809 else {
810 DTRACE_PROBE2(tx__get__pkt__label, char *,
811 "template(1) has unknown hosttype",
812 tsol_tpc_t *, src_rhtp);
813 }
814
815
816 if (!tsol_find_unlabeled_label(src_rhtp, &sl, &doi)) {
817 TPC_RELE(src_rhtp);
818 return (B_FALSE);
819 }
820 TPC_RELE(src_rhtp);
821 break;
822
823 default:
824 return (B_FALSE);
825 }
826
827 if (ira->ira_cred == NULL) {
828 credp = newcred_from_bslabel(&sl, doi, KM_NOSLEEP);
829 if (credp == NULL)
830 return (B_FALSE);
831 } else {
832 credp = copycred_from_bslabel(ira->ira_cred, &sl, doi,
833 KM_NOSLEEP);
834 if (credp == NULL)
835 return (B_FALSE);
836 if (ira->ira_free_flags & IRA_FREE_CRED) {
837 crfree(ira->ira_cred);
838 ira->ira_free_flags &= ~IRA_FREE_CRED;
839 ira->ira_cred = NULL;
840 }
841 }
842
843 /*
844 * Put the label in ira_tsl for convinience, while keeping
845 * the cred in ira_cred for getpeerucred which is used to get
846 * labels with TX.
847 * Note: no explicit refcnt/free_flag for ira_tsl. The free_flag
848 * for IRA_FREE_CRED is sufficient for both.
849 */
850 ira->ira_tsl = crgetlabel(credp);
851 ira->ira_cred = credp;
852 ira->ira_free_flags |= IRA_FREE_CRED;
853
854 ira->ira_tsl->tsl_flags |= label_flags;
855 return (B_TRUE);
856 }
857
858 /*
859 * This routine determines whether the given packet should be accepted locally.
860 * It does a range/set check on the packet's label by looking up the given
861 * address in the remote host database.
862 */
863 boolean_t
tsol_receive_local(const mblk_t * mp,const void * addr,uchar_t version,ip_recv_attr_t * ira,const conn_t * connp)864 tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version,
865 ip_recv_attr_t *ira, const conn_t *connp)
866 {
867 const cred_t *credp;
868 ts_label_t *plabel, *conn_plabel;
869 tsol_tpc_t *tp;
870 boolean_t retv;
871 const bslabel_t *label, *conn_label;
872 boolean_t shared_addr = (ira->ira_flags & IRAF_TX_SHARED_ADDR);
873
874 /*
875 * tsol_get_pkt_label intentionally avoids the labeling process for:
876 * - IPv6 router and neighbor discovery as well as redirects.
877 * - MLD packets. (Anything between ICMPv6 code 130 and 138.)
878 * - IGMP packets.
879 * - IPv4 router discovery.
880 * In those cases ira_cred is NULL.
881 */
882 credp = ira->ira_cred;
883 if (credp == NULL)
884 return (B_TRUE);
885
886 /*
887 * If this packet is from the inside (not a remote host) and has the
888 * same zoneid as the selected destination, then no checks are
889 * necessary. Membership in the zone is enough proof. This is
890 * intended to be a hot path through this function.
891 * Note: Using crgetzone here is ok since the peer is local.
892 */
893 if (!crisremote(credp) &&
894 crgetzone(credp) == crgetzone(connp->conn_cred))
895 return (B_TRUE);
896
897 plabel = ira->ira_tsl;
898 conn_plabel = crgetlabel(connp->conn_cred);
899 ASSERT(plabel != NULL && conn_plabel != NULL);
900
901 label = label2bslabel(plabel);
902 conn_label = label2bslabel(conn_plabel);
903
904
905 /*
906 * Implicitly labeled packets from label-aware sources
907 * go only to privileged receivers
908 */
909 if ((plabel->tsl_flags & TSLF_IMPLICIT_IN) &&
910 (connp->conn_mac_mode != CONN_MAC_IMPLICIT)) {
911 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac_impl,
912 char *,
913 "implicitly labeled packet mp(1) for conn(2) "
914 "which isn't in implicit mac mode",
915 mblk_t *, mp, conn_t *, connp);
916
917 return (B_FALSE);
918 }
919
920
921 /*
922 * MLPs are always validated using the range and set of the local
923 * address, even when the remote host is unlabeled.
924 */
925 if (connp->conn_mlp_type == mlptBoth ||
926 /* LINTED: no consequent */
927 connp->conn_mlp_type == (shared_addr ? mlptShared : mlptPrivate)) {
928 ;
929
930 /*
931 * If this is a packet from an unlabeled sender, then we must apply
932 * different rules. If the label is equal to the zone's label, then
933 * it's allowed. If it's not equal, but the zone is either the global
934 * zone or the label is dominated by the zone's label, then allow it
935 * as long as it's in the range configured for the destination.
936 */
937 } else if (plabel->tsl_flags & TSLF_UNLABELED) {
938 if (plabel->tsl_doi == conn_plabel->tsl_doi &&
939 blequal(label, conn_label))
940 return (B_TRUE);
941
942 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) ||
943 (!connp->conn_zone_is_global &&
944 (plabel->tsl_doi != conn_plabel->tsl_doi ||
945 !bldominates(conn_label, label)))) {
946 DTRACE_PROBE3(
947 tx__ip__log__drop__receivelocal__mac_unl,
948 char *,
949 "unlabeled packet mp(1) fails mac for conn(2)",
950 mblk_t *, mp, conn_t *, connp);
951 return (B_FALSE);
952 }
953
954 /*
955 * If this is a packet from a labeled sender, verify the
956 * label on the packet matches the connection label.
957 */
958 } else {
959 if (plabel->tsl_doi != conn_plabel->tsl_doi ||
960 !blequal(label, conn_label)) {
961 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac__slp,
962 char *,
963 "packet mp(1) failed label match to SLP conn(2)",
964 mblk_t *, mp, conn_t *, connp);
965 return (B_FALSE);
966 }
967 /*
968 * No further checks will be needed if this is a zone-
969 * specific address because (1) The process for bringing up
970 * the interface ensures the zone's label is within the zone-
971 * specific address's valid label range; (2) For cases where
972 * the conn is bound to the unspecified addresses, ip fanout
973 * logic ensures conn's zoneid equals the dest addr's zoneid;
974 * (3) Mac-exempt and mlp logic above already handle all
975 * cases where the zone label may not be the same as the
976 * conn label.
977 */
978 if (!shared_addr)
979 return (B_TRUE);
980 }
981
982 tp = find_tpc(addr, version, B_FALSE);
983 if (tp == NULL) {
984 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__no__tnr,
985 char *, "dropping mp(1), host(2) lacks entry",
986 mblk_t *, mp, void *, addr);
987 return (B_FALSE);
988 }
989
990 /*
991 * The local host address should not be unlabeled at this point. The
992 * only way this can happen is that the destination isn't unicast. We
993 * assume that the packet should not have had a label, and thus should
994 * have been handled by the TSLF_UNLABELED logic above.
995 */
996 if (tp->tpc_tp.host_type == UNLABELED) {
997 retv = B_FALSE;
998 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__flag, char *,
999 "mp(1) unlabeled source, but tp is not unlabeled.",
1000 mblk_t *, mp, tsol_tpc_t *, tp);
1001
1002 } else if (tp->tpc_tp.host_type != SUN_CIPSO) {
1003 retv = B_FALSE;
1004 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__tptype, char *,
1005 "delivering mp(1), found unrecognized tpc(2) type.",
1006 mblk_t *, mp, tsol_tpc_t *, tp);
1007
1008 } else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
1009 retv = B_FALSE;
1010 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
1011 "mp(1) could not be delievered to tp(2), doi mismatch",
1012 mblk_t *, mp, tsol_tpc_t *, tp);
1013
1014 } else if (!_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) &&
1015 !blinlset(label, tp->tpc_tp.tp_sl_set_cipso)) {
1016 retv = B_FALSE;
1017 DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
1018 "mp(1) could not be delievered to tp(2), bad mac",
1019 mblk_t *, mp, tsol_tpc_t *, tp);
1020 } else {
1021 retv = B_TRUE;
1022 }
1023
1024 TPC_RELE(tp);
1025
1026 return (retv);
1027 }
1028
1029 boolean_t
tsol_can_accept_raw(mblk_t * mp,ip_recv_attr_t * ira,boolean_t check_host)1030 tsol_can_accept_raw(mblk_t *mp, ip_recv_attr_t *ira, boolean_t check_host)
1031 {
1032 ts_label_t *plabel = NULL;
1033 tsol_tpc_t *src_rhtp, *dst_rhtp;
1034 boolean_t retv;
1035
1036 plabel = ira->ira_tsl;
1037
1038 /* We are bootstrapping or the internal template was never deleted */
1039 if (plabel == NULL)
1040 return (B_TRUE);
1041
1042 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
1043 ipha_t *ipha = (ipha_t *)mp->b_rptr;
1044
1045 src_rhtp = find_tpc(&ipha->ipha_src, IPV4_VERSION,
1046 B_FALSE);
1047 if (src_rhtp == NULL)
1048 return (B_FALSE);
1049 dst_rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION,
1050 B_FALSE);
1051 } else {
1052 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
1053
1054 src_rhtp = find_tpc(&ip6h->ip6_src, IPV6_VERSION,
1055 B_FALSE);
1056 if (src_rhtp == NULL)
1057 return (B_FALSE);
1058 dst_rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION,
1059 B_FALSE);
1060 }
1061 if (dst_rhtp == NULL) {
1062 TPC_RELE(src_rhtp);
1063 return (B_FALSE);
1064 }
1065
1066 if (label2doi(plabel) != src_rhtp->tpc_tp.tp_doi) {
1067 retv = B_FALSE;
1068
1069 /*
1070 * Check that the packet's label is in the correct range for labeled
1071 * sender, or is equal to the default label for unlabeled sender.
1072 */
1073 } else if ((src_rhtp->tpc_tp.host_type != UNLABELED &&
1074 !_blinrange(label2bslabel(plabel),
1075 &src_rhtp->tpc_tp.tp_sl_range_cipso) &&
1076 !blinlset(label2bslabel(plabel),
1077 src_rhtp->tpc_tp.tp_sl_set_cipso)) ||
1078 (src_rhtp->tpc_tp.host_type == UNLABELED &&
1079 !blequal(&plabel->tsl_label, &src_rhtp->tpc_tp.tp_def_label))) {
1080 retv = B_FALSE;
1081
1082 } else if (check_host) {
1083 retv = B_TRUE;
1084
1085 /*
1086 * Until we have SL range in the Zone structure, pass it
1087 * when our own address lookup returned an internal entry.
1088 */
1089 } else switch (dst_rhtp->tpc_tp.host_type) {
1090 case UNLABELED:
1091 retv = B_TRUE;
1092 break;
1093
1094 case SUN_CIPSO:
1095 retv = _blinrange(label2bslabel(plabel),
1096 &dst_rhtp->tpc_tp.tp_sl_range_cipso) ||
1097 blinlset(label2bslabel(plabel),
1098 dst_rhtp->tpc_tp.tp_sl_set_cipso);
1099 break;
1100
1101 default:
1102 retv = B_FALSE;
1103 }
1104 TPC_RELE(src_rhtp);
1105 TPC_RELE(dst_rhtp);
1106 return (retv);
1107 }
1108
1109 /*
1110 * This routine determines whether a response to a failed packet delivery or
1111 * connection should be sent back. By default, the policy is to allow such
1112 * messages to be sent at all times, as these messages reveal little useful
1113 * information and are healthy parts of TCP/IP networking.
1114 *
1115 * If tsol_strict_error is set, then we do strict tests: if the packet label is
1116 * within the label range/set of this host/zone, return B_TRUE; otherwise
1117 * return B_FALSE, which causes the packet to be dropped silently.
1118 *
1119 * Note that tsol_get_pkt_label will cause the packet to drop if the sender is
1120 * marked as labeled in the remote host database, but the packet lacks a label.
1121 * This means that we don't need to do a lookup on the source; the
1122 * TSLF_UNLABELED flag is sufficient.
1123 */
1124 boolean_t
tsol_can_reply_error(const mblk_t * mp,ip_recv_attr_t * ira)1125 tsol_can_reply_error(const mblk_t *mp, ip_recv_attr_t *ira)
1126 {
1127 ts_label_t *plabel = NULL;
1128 tsol_tpc_t *rhtp;
1129 const ipha_t *ipha;
1130 const ip6_t *ip6h;
1131 boolean_t retv;
1132 bslabel_t *pktbs;
1133
1134 /* Caller must pull up at least the IP header */
1135 ASSERT(MBLKL(mp) >= (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ?
1136 sizeof (*ipha) : sizeof (*ip6h)));
1137
1138 if (!tsol_strict_error)
1139 return (B_TRUE);
1140
1141 plabel = ira->ira_tsl;
1142
1143 /* We are bootstrapping or the internal template was never deleted */
1144 if (plabel == NULL)
1145 return (B_TRUE);
1146
1147 if (plabel->tsl_flags & TSLF_IMPLICIT_IN) {
1148 DTRACE_PROBE3(tx__ip__log__drop__replyerror__unresolved__label,
1149 char *,
1150 "cannot send error report for packet mp(1) with "
1151 "unresolved security label sl(2)",
1152 mblk_t *, mp, ts_label_t *, plabel);
1153 return (B_FALSE);
1154 }
1155
1156
1157 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
1158 ipha = (const ipha_t *)mp->b_rptr;
1159 rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, B_FALSE);
1160 } else {
1161 ip6h = (const ip6_t *)mp->b_rptr;
1162 rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, B_FALSE);
1163 }
1164
1165 if (rhtp == NULL || label2doi(plabel) != rhtp->tpc_tp.tp_doi) {
1166 retv = B_FALSE;
1167 } else {
1168 /*
1169 * If we're in the midst of forwarding, then the destination
1170 * address might not be labeled. In that case, allow unlabeled
1171 * packets through only if the default label is the same, and
1172 * labeled ones if they dominate.
1173 */
1174 pktbs = label2bslabel(plabel);
1175 switch (rhtp->tpc_tp.host_type) {
1176 case UNLABELED:
1177 if (plabel->tsl_flags & TSLF_UNLABELED) {
1178 retv = blequal(pktbs,
1179 &rhtp->tpc_tp.tp_def_label);
1180 } else {
1181 retv = bldominates(pktbs,
1182 &rhtp->tpc_tp.tp_def_label);
1183 }
1184 break;
1185
1186 case SUN_CIPSO:
1187 retv = _blinrange(pktbs,
1188 &rhtp->tpc_tp.tp_sl_range_cipso) ||
1189 blinlset(pktbs, rhtp->tpc_tp.tp_sl_set_cipso);
1190 break;
1191
1192 default:
1193 retv = B_FALSE;
1194 break;
1195 }
1196 }
1197
1198 if (rhtp != NULL)
1199 TPC_RELE(rhtp);
1200
1201 return (retv);
1202 }
1203
1204 /*
1205 * Finds the zone associated with the receive attributes. Returns GLOBAL_ZONEID
1206 * if the zone cannot be located.
1207 *
1208 * This is used by the classifier when the packet matches an ALL_ZONES IRE, and
1209 * there's no MLP defined.
1210 *
1211 * Note that we assume that this is only invoked in the ALL_ZONES case.
1212 * Handling other cases would require handling exclusive IP zones where either
1213 * this routine or the callers would have to map from
1214 * the zoneid (zone->zone_id) to what IP uses in conn_zoneid etc.
1215 */
1216 zoneid_t
tsol_attr_to_zoneid(const ip_recv_attr_t * ira)1217 tsol_attr_to_zoneid(const ip_recv_attr_t *ira)
1218 {
1219 zone_t *zone;
1220 ts_label_t *label;
1221
1222 if ((label = ira->ira_tsl) != NULL) {
1223 zone = zone_find_by_label(label);
1224 if (zone != NULL) {
1225 zoneid_t zoneid = zone->zone_id;
1226
1227 zone_rele(zone);
1228 return (zoneid);
1229 }
1230 }
1231 return (GLOBAL_ZONEID);
1232 }
1233
1234 int
tsol_ire_match_gwattr(ire_t * ire,const ts_label_t * tsl)1235 tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl)
1236 {
1237 int error = 0;
1238 tsol_ire_gw_secattr_t *attrp = NULL;
1239 tsol_tnrhc_t *gw_rhc = NULL;
1240 tsol_gcgrp_t *gcgrp = NULL;
1241 tsol_gc_t *gc = NULL;
1242 in_addr_t ga_addr4;
1243 void *paddr = NULL;
1244
1245 /* Not in Trusted mode or IRE is local/loopback/broadcast/interface */
1246 if (!is_system_labeled() ||
1247 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST |
1248 IRE_IF_ALL | IRE_MULTICAST | IRE_NOROUTE)))
1249 goto done;
1250
1251 /*
1252 * If we don't have a label to compare with, or the IRE does not
1253 * contain any gateway security attributes, there's not much that
1254 * we can do. We let the former case pass, and the latter fail,
1255 * since the IRE doesn't qualify for a match due to the lack of
1256 * security attributes.
1257 */
1258 if (tsl == NULL || ire->ire_gw_secattr == NULL) {
1259 if (tsl != NULL) {
1260 DTRACE_PROBE3(
1261 tx__ip__log__drop__irematch__nogwsec, char *,
1262 "ire(1) lacks ire_gw_secattr when matching "
1263 "label(2)", ire_t *, ire, ts_label_t *, tsl);
1264 error = EACCES;
1265 }
1266 goto done;
1267 }
1268
1269 attrp = ire->ire_gw_secattr;
1270
1271 /*
1272 * The possible lock order scenarios related to the tsol gateway
1273 * attribute locks are documented at the beginning of ip.c in the
1274 * lock order scenario section.
1275 */
1276 mutex_enter(&attrp->igsa_lock);
1277
1278 /*
1279 * We seek the group
1280 * structure which contains all security credentials of the gateway.
1281 * An offline IRE is associated with at most one gateway credential.
1282 */
1283 if ((gc = attrp->igsa_gc) != NULL) {
1284 gcgrp = gc->gc_grp;
1285 ASSERT(gcgrp != NULL);
1286 rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1287 GCGRP_REFHOLD(gcgrp);
1288 }
1289
1290 if ((gw_rhc = attrp->igsa_rhc) != NULL) {
1291 /*
1292 * If our cached entry has grown stale, then discard it so we
1293 * can get a new one.
1294 */
1295 if (gw_rhc->rhc_invalid || gw_rhc->rhc_tpc->tpc_invalid) {
1296 TNRHC_RELE(gw_rhc);
1297 attrp->igsa_rhc = gw_rhc = NULL;
1298 } else {
1299 TNRHC_HOLD(gw_rhc)
1300 }
1301 }
1302
1303 /* Last attempt at loading the template had failed; try again */
1304 if (gw_rhc == NULL) {
1305 if (gcgrp != NULL) {
1306 tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr;
1307
1308 if (ire->ire_ipversion == IPV4_VERSION) {
1309 ASSERT(ga->ga_af == AF_INET);
1310 IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4);
1311 paddr = &ga_addr4;
1312 } else {
1313 ASSERT(ga->ga_af == AF_INET6);
1314 paddr = &ga->ga_addr;
1315 }
1316 } else if (ire->ire_type & IRE_OFFLINK) {
1317 if (ire->ire_ipversion == IPV6_VERSION)
1318 paddr = &ire->ire_gateway_addr_v6;
1319 else if (ire->ire_ipversion == IPV4_VERSION)
1320 paddr = &ire->ire_gateway_addr;
1321 }
1322
1323 /* We've found a gateway address to do the template lookup */
1324 if (paddr != NULL) {
1325 ASSERT(gw_rhc == NULL);
1326 gw_rhc = find_rhc(paddr, ire->ire_ipversion, B_FALSE);
1327 if (gw_rhc != NULL) {
1328 /*
1329 * Note that if the lookup above returned an
1330 * internal template, we'll use it for the
1331 * time being, and do another lookup next
1332 * time around.
1333 */
1334 /* Another thread has loaded the template? */
1335 if (attrp->igsa_rhc != NULL) {
1336 TNRHC_RELE(gw_rhc)
1337 /* reload, it could be different */
1338 gw_rhc = attrp->igsa_rhc;
1339 } else {
1340 attrp->igsa_rhc = gw_rhc;
1341 }
1342 /*
1343 * Hold an extra reference just like we did
1344 * above prior to dropping the igsa_lock.
1345 */
1346 TNRHC_HOLD(gw_rhc)
1347 }
1348 }
1349 }
1350
1351 mutex_exit(&attrp->igsa_lock);
1352 /* Gateway template not found */
1353 if (gw_rhc == NULL) {
1354 /*
1355 * If destination address is directly reachable through an
1356 * interface rather than through a learned route, pass it.
1357 */
1358 if (paddr != NULL) {
1359 DTRACE_PROBE3(
1360 tx__ip__log__drop__irematch__nogwtmpl, char *,
1361 "ire(1), label(2) off-link with no gw_rhc",
1362 ire_t *, ire, ts_label_t *, tsl);
1363 error = EINVAL;
1364 }
1365 goto done;
1366 }
1367
1368 if (gc != NULL) {
1369
1370 tsol_gcdb_t *gcdb;
1371 /*
1372 * In the case of IRE_CACHE we've got one or more gateway
1373 * security credentials to compare against the passed in label.
1374 * Perform label range comparison against each security
1375 * credential of the gateway. In the case of a prefix ire
1376 * we need to match against the security attributes of
1377 * just the route itself, so the loop is executed only once.
1378 */
1379 ASSERT(gcgrp != NULL);
1380 gcdb = gc->gc_db;
1381 if (tsl->tsl_doi != gcdb->gcdb_doi ||
1382 !_blinrange(&tsl->tsl_label, &gcdb->gcdb_slrange)) {
1383 DTRACE_PROBE3(
1384 tx__ip__log__drop__irematch__nogcmatched,
1385 char *, "ire(1), tsl(2): all gc failed match",
1386 ire_t *, ire, ts_label_t *, tsl);
1387 error = EACCES;
1388 }
1389 } else {
1390 /*
1391 * We didn't find any gateway credentials in the IRE
1392 * attributes; fall back to the gateway's template for
1393 * label range checks, if we are required to do so.
1394 */
1395 ASSERT(gw_rhc != NULL);
1396 switch (gw_rhc->rhc_tpc->tpc_tp.host_type) {
1397 case SUN_CIPSO:
1398 if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi ||
1399 (!_blinrange(&tsl->tsl_label,
1400 &gw_rhc->rhc_tpc->tpc_tp.tp_sl_range_cipso) &&
1401 !blinlset(&tsl->tsl_label,
1402 gw_rhc->rhc_tpc->tpc_tp.tp_sl_set_cipso))) {
1403 error = EACCES;
1404 DTRACE_PROBE4(
1405 tx__ip__log__drop__irematch__deftmpl,
1406 char *, "ire(1), tsl(2), gw_rhc(3) "
1407 "failed match (cipso gw)",
1408 ire_t *, ire, ts_label_t *, tsl,
1409 tsol_tnrhc_t *, gw_rhc);
1410 }
1411 break;
1412
1413 case UNLABELED:
1414 if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi ||
1415 (!_blinrange(&tsl->tsl_label,
1416 &gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_range) &&
1417 !blinlset(&tsl->tsl_label,
1418 gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_set))) {
1419 error = EACCES;
1420 DTRACE_PROBE4(
1421 tx__ip__log__drop__irematch__deftmpl,
1422 char *, "ire(1), tsl(2), gw_rhc(3) "
1423 "failed match (unlabeled gw)",
1424 ire_t *, ire, ts_label_t *, tsl,
1425 tsol_tnrhc_t *, gw_rhc);
1426 }
1427 break;
1428 }
1429 }
1430
1431 done:
1432
1433 if (gcgrp != NULL) {
1434 rw_exit(&gcgrp->gcgrp_rwlock);
1435 GCGRP_REFRELE(gcgrp);
1436 }
1437
1438 if (gw_rhc != NULL)
1439 TNRHC_RELE(gw_rhc)
1440
1441 return (error);
1442 }
1443
1444 /*
1445 * Performs label accreditation checks for packet forwarding.
1446 * Add or remove a CIPSO option as needed.
1447 *
1448 * Returns a pointer to the modified mblk if allowed for forwarding,
1449 * or NULL if the packet must be dropped.
1450 */
1451 mblk_t *
tsol_ip_forward(ire_t * ire,mblk_t * mp,const ip_recv_attr_t * ira)1452 tsol_ip_forward(ire_t *ire, mblk_t *mp, const ip_recv_attr_t *ira)
1453 {
1454 tsol_ire_gw_secattr_t *attrp = NULL;
1455 ipha_t *ipha;
1456 ip6_t *ip6h;
1457 const void *pdst;
1458 const void *psrc;
1459 boolean_t off_link;
1460 tsol_tpc_t *dst_rhtp, *gw_rhtp;
1461 tsol_ip_label_t label_type;
1462 uchar_t *opt_ptr = NULL;
1463 ts_label_t *tsl;
1464 uint8_t proto;
1465 int af, adjust;
1466 uint16_t iplen;
1467 boolean_t need_tpc_rele = B_FALSE;
1468 ipaddr_t *gw;
1469 ip_stack_t *ipst = ire->ire_ipst;
1470 int err;
1471 ts_label_t *effective_tsl = NULL;
1472
1473 ASSERT(ire != NULL && mp != NULL);
1474 /*
1475 * Note that the ire is the first one found, i.e., an IRE_OFFLINK if
1476 * the destination is offlink.
1477 */
1478
1479 af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6;
1480
1481 if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
1482 ASSERT(ire->ire_ipversion == IPV4_VERSION);
1483 ipha = (ipha_t *)mp->b_rptr;
1484 psrc = &ipha->ipha_src;
1485 pdst = &ipha->ipha_dst;
1486 proto = ipha->ipha_protocol;
1487 if (!tsol_get_option_v4(mp, &label_type, &opt_ptr))
1488 return (NULL);
1489 } else {
1490 ASSERT(ire->ire_ipversion == IPV6_VERSION);
1491 ip6h = (ip6_t *)mp->b_rptr;
1492 psrc = &ip6h->ip6_src;
1493 pdst = &ip6h->ip6_dst;
1494 proto = ip6h->ip6_nxt;
1495
1496 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP &&
1497 proto != IPPROTO_ICMPV6) {
1498 uint8_t *nexthdrp;
1499 uint16_t hdr_len;
1500
1501 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len,
1502 &nexthdrp)) {
1503 /* malformed packet; drop it */
1504 return (NULL);
1505 }
1506 proto = *nexthdrp;
1507 }
1508 if (!tsol_get_option_v6(mp, &label_type, &opt_ptr))
1509 return (NULL);
1510 }
1511 /*
1512 * off_link is TRUE if destination not directly reachable.
1513 */
1514 off_link = (ire->ire_type & IRE_OFFLINK);
1515
1516 if ((tsl = ira->ira_tsl) == NULL)
1517 return (mp);
1518
1519 if (tsl->tsl_flags & TSLF_IMPLICIT_IN) {
1520 DTRACE_PROBE3(tx__ip__log__drop__forward__unresolved__label,
1521 char *,
1522 "cannot forward packet mp(1) with unresolved "
1523 "security label sl(2)",
1524 mblk_t *, mp, ts_label_t *, tsl);
1525
1526 return (NULL);
1527 }
1528
1529
1530 ASSERT(psrc != NULL && pdst != NULL);
1531 dst_rhtp = find_tpc(pdst, ire->ire_ipversion, B_FALSE);
1532
1533 if (dst_rhtp == NULL) {
1534 /*
1535 * Without a template we do not know if forwarding
1536 * violates MAC
1537 */
1538 DTRACE_PROBE3(tx__ip__log__drop__forward__nodst, char *,
1539 "mp(1) dropped, no template for destination ip4|6(2)",
1540 mblk_t *, mp, void *, pdst);
1541 return (NULL);
1542 }
1543
1544 /*
1545 * Gateway template must have existed for off-link destinations,
1546 * since tsol_ire_match_gwattr has ensured such condition.
1547 */
1548 if (ire->ire_ipversion == IPV4_VERSION && off_link) {
1549 /*
1550 * Surya note: first check if we can get the gw_rhtp from
1551 * the ire_gw_secattr->igsa_rhc; if this is null, then
1552 * do a lookup based on the ire_addr (address of gw)
1553 */
1554 if (ire->ire_gw_secattr != NULL &&
1555 ire->ire_gw_secattr->igsa_rhc != NULL) {
1556 attrp = ire->ire_gw_secattr;
1557 gw_rhtp = attrp->igsa_rhc->rhc_tpc;
1558 } else {
1559 gw = &ire->ire_gateway_addr;
1560 gw_rhtp = find_tpc(gw, ire->ire_ipversion, B_FALSE);
1561 need_tpc_rele = B_TRUE;
1562 }
1563 if (gw_rhtp == NULL) {
1564 DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *,
1565 "mp(1) dropped, no gateway in ire attributes(2)",
1566 mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp);
1567 mp = NULL;
1568 goto keep_label;
1569 }
1570 }
1571 if (ire->ire_ipversion == IPV6_VERSION &&
1572 ((attrp = ire->ire_gw_secattr) == NULL || attrp->igsa_rhc == NULL ||
1573 (gw_rhtp = attrp->igsa_rhc->rhc_tpc) == NULL) && off_link) {
1574 DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *,
1575 "mp(1) dropped, no gateway in ire attributes(2)",
1576 mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp);
1577 mp = NULL;
1578 goto keep_label;
1579 }
1580
1581 /*
1582 * Check that the label for the packet is acceptable
1583 * by destination host; otherwise, drop it.
1584 */
1585 switch (dst_rhtp->tpc_tp.host_type) {
1586 case SUN_CIPSO:
1587 if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi ||
1588 (!_blinrange(&tsl->tsl_label,
1589 &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
1590 !blinlset(&tsl->tsl_label,
1591 dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
1592 DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *,
1593 "labeled packet mp(1) dropped, label(2) fails "
1594 "destination(3) accredation check",
1595 mblk_t *, mp, ts_label_t *, tsl,
1596 tsol_tpc_t *, dst_rhtp);
1597 mp = NULL;
1598 goto keep_label;
1599 }
1600 break;
1601
1602
1603 case UNLABELED:
1604 if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi ||
1605 !blequal(&dst_rhtp->tpc_tp.tp_def_label,
1606 &tsl->tsl_label)) {
1607 DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *,
1608 "unlabeled packet mp(1) dropped, label(2) fails "
1609 "destination(3) accredation check",
1610 mblk_t *, mp, ts_label_t *, tsl,
1611 tsol_tpc_t *, dst_rhtp);
1612 mp = NULL;
1613 goto keep_label;
1614 }
1615 break;
1616 }
1617 if (label_type == OPT_CIPSO) {
1618 /*
1619 * We keep the label on any of the following cases:
1620 *
1621 * 1. The destination is labeled (on/off-link).
1622 * 2. The unlabeled destination is off-link,
1623 * and the next hop gateway is labeled.
1624 */
1625 if (dst_rhtp->tpc_tp.host_type != UNLABELED ||
1626 (off_link &&
1627 gw_rhtp->tpc_tp.host_type != UNLABELED))
1628 goto keep_label;
1629
1630 /*
1631 * Strip off the CIPSO option from the packet because: the
1632 * unlabeled destination host is directly reachable through
1633 * an interface (on-link); or, the unlabeled destination host
1634 * is not directly reachable (off-link), and the next hop
1635 * gateway is unlabeled.
1636 */
1637 adjust = (af == AF_INET) ? tsol_remove_secopt(ipha, MBLKL(mp)) :
1638 tsol_remove_secopt_v6(ip6h, MBLKL(mp));
1639
1640 ASSERT(adjust <= 0);
1641 if (adjust != 0) {
1642
1643 /* adjust is negative */
1644 ASSERT((mp->b_wptr + adjust) >= mp->b_rptr);
1645 mp->b_wptr += adjust;
1646 /*
1647 * Note that caller adjusts ira_pktlen and
1648 * ira_ip_hdr_length
1649 *
1650 * For AF_INET6 note that tsol_remove_secopt_v6
1651 * adjusted ip6_plen.
1652 */
1653 if (af == AF_INET) {
1654 ipha = (ipha_t *)mp->b_rptr;
1655 iplen = ntohs(ipha->ipha_length) + adjust;
1656 ipha->ipha_length = htons(iplen);
1657 ipha->ipha_hdr_checksum = 0;
1658 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1659 }
1660 DTRACE_PROBE3(tx__ip__log__info__forward__adjust,
1661 char *,
1662 "mp(1) adjusted(2) for CIPSO option removal",
1663 mblk_t *, mp, int, adjust);
1664 }
1665 goto keep_label;
1666 }
1667
1668 ASSERT(label_type == OPT_NONE);
1669 ASSERT(dst_rhtp != NULL);
1670
1671 /*
1672 * We need to add CIPSO option if the destination or the next hop
1673 * gateway is labeled. Otherwise, pass the packet as is.
1674 */
1675 if (dst_rhtp->tpc_tp.host_type == UNLABELED &&
1676 (!off_link || gw_rhtp->tpc_tp.host_type == UNLABELED))
1677 goto keep_label;
1678
1679 /*
1680 * Since we are forwarding packets we use GLOBAL_ZONEID for
1681 * the IRE lookup in tsol_check_label.
1682 * Since mac_exempt is false the zoneid isn't used for anything
1683 * but the IRE lookup, hence we set zone_is_global to false.
1684 */
1685 if (af == AF_INET) {
1686 err = tsol_check_label_v4(tsl, GLOBAL_ZONEID, &mp,
1687 CONN_MAC_DEFAULT, B_FALSE, ipst, &effective_tsl);
1688 } else {
1689 err = tsol_check_label_v6(tsl, GLOBAL_ZONEID, &mp,
1690 CONN_MAC_DEFAULT, B_FALSE, ipst, &effective_tsl);
1691 }
1692 if (err != 0) {
1693 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1694 ip_drop_output("tsol_check_label", mp, NULL);
1695 freemsg(mp);
1696 mp = NULL;
1697 goto keep_label;
1698 }
1699
1700 /*
1701 * The effective_tsl must never affect the routing decision, hence
1702 * we ignore it here.
1703 */
1704 if (effective_tsl != NULL)
1705 label_rele(effective_tsl);
1706
1707 if (af == AF_INET) {
1708 ipha = (ipha_t *)mp->b_rptr;
1709 ipha->ipha_hdr_checksum = 0;
1710 ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1711 }
1712
1713 keep_label:
1714 TPC_RELE(dst_rhtp);
1715 if (need_tpc_rele && gw_rhtp != NULL)
1716 TPC_RELE(gw_rhtp);
1717 return (mp);
1718 }
1719
1720 /*
1721 * Name: tsol_pmtu_adjust()
1722 *
1723 * Returns the adjusted mtu after removing security option.
1724 * Removes/subtracts the option if the packet's cred indicates an unlabeled
1725 * sender or if pkt_diff indicates this system enlarged the packet.
1726 */
1727 uint32_t
tsol_pmtu_adjust(mblk_t * mp,uint32_t mtu,int pkt_diff,int af)1728 tsol_pmtu_adjust(mblk_t *mp, uint32_t mtu, int pkt_diff, int af)
1729 {
1730 int label_adj = 0;
1731 uint32_t min_mtu = IP_MIN_MTU;
1732 tsol_tpc_t *src_rhtp;
1733 void *src;
1734
1735 /*
1736 * Note: label_adj is non-positive, indicating the number of
1737 * bytes removed by removing the security option from the
1738 * header.
1739 */
1740 if (af == AF_INET6) {
1741 ip6_t *ip6h;
1742
1743 min_mtu = IPV6_MIN_MTU;
1744 ip6h = (ip6_t *)mp->b_rptr;
1745 src = &ip6h->ip6_src;
1746 if ((src_rhtp = find_tpc(src, IPV6_VERSION, B_FALSE)) == NULL)
1747 return (mtu);
1748 if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED) {
1749 label_adj = tsol_remove_secopt_v6(
1750 (ip6_t *)mp->b_rptr, MBLKL(mp));
1751 }
1752 } else {
1753 ipha_t *ipha;
1754
1755 ASSERT(af == AF_INET);
1756 ipha = (ipha_t *)mp->b_rptr;
1757 src = &ipha->ipha_src;
1758 if ((src_rhtp = find_tpc(src, IPV4_VERSION, B_FALSE)) == NULL)
1759 return (mtu);
1760 if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED)
1761 label_adj = tsol_remove_secopt(
1762 (ipha_t *)mp->b_rptr, MBLKL(mp));
1763 }
1764 /*
1765 * Make pkt_diff non-negative and the larger of the bytes
1766 * previously added (if any) or just removed, since label
1767 * addition + subtraction may not be completely idempotent.
1768 */
1769 if (pkt_diff < -label_adj)
1770 pkt_diff = -label_adj;
1771 if (pkt_diff > 0 && pkt_diff < mtu)
1772 mtu -= pkt_diff;
1773
1774 TPC_RELE(src_rhtp);
1775 return (MAX(mtu, min_mtu));
1776 }
1777
1778 /*
1779 * Name: tsol_rtsa_init()
1780 *
1781 * Normal: Sanity checks on the route security attributes provided by
1782 * user. Convert it into a route security parameter list to
1783 * be returned to caller.
1784 *
1785 * Output: EINVAL if bad security attributes in the routing message
1786 * ENOMEM if unable to allocate data structures
1787 * 0 otherwise.
1788 *
1789 * Note: On input, cp must point to the end of any addresses in
1790 * the rt_msghdr_t structure.
1791 */
1792 int
tsol_rtsa_init(rt_msghdr_t * rtm,tsol_rtsecattr_t * sp,caddr_t cp)1793 tsol_rtsa_init(rt_msghdr_t *rtm, tsol_rtsecattr_t *sp, caddr_t cp)
1794 {
1795 uint_t sacnt;
1796 int err;
1797 caddr_t lim;
1798 tsol_rtsecattr_t *tp;
1799
1800 ASSERT((cp >= (caddr_t)&rtm[1]) && sp != NULL);
1801
1802 /*
1803 * In theory, we could accept as many security attributes configured
1804 * per route destination. However, the current design is limited
1805 * such that at most only one set security attributes is allowed to
1806 * be associated with a prefix IRE. We therefore assert for now.
1807 */
1808 /* LINTED */
1809 ASSERT(TSOL_RTSA_REQUEST_MAX == 1);
1810
1811 sp->rtsa_cnt = 0;
1812 lim = (caddr_t)rtm + rtm->rtm_msglen;
1813 ASSERT(cp <= lim);
1814
1815 if ((lim - cp) < sizeof (rtm_ext_t) ||
1816 ((rtm_ext_t *)cp)->rtmex_type != RTMEX_GATEWAY_SECATTR)
1817 return (0);
1818
1819 if (((rtm_ext_t *)cp)->rtmex_len < sizeof (tsol_rtsecattr_t))
1820 return (EINVAL);
1821
1822 cp += sizeof (rtm_ext_t);
1823
1824 if ((lim - cp) < sizeof (*tp) ||
1825 (tp = (tsol_rtsecattr_t *)cp, (sacnt = tp->rtsa_cnt) == 0) ||
1826 (lim - cp) < TSOL_RTSECATTR_SIZE(sacnt))
1827 return (EINVAL);
1828
1829 /*
1830 * Trying to add route security attributes when system
1831 * labeling service is not available, or when user supllies
1832 * more than the maximum number of security attributes
1833 * allowed per request.
1834 */
1835 if ((sacnt > 0 && !is_system_labeled()) ||
1836 sacnt > TSOL_RTSA_REQUEST_MAX)
1837 return (EINVAL);
1838
1839 /* Ensure valid credentials */
1840 if ((err = rtsa_validate(&((tsol_rtsecattr_t *)cp)->
1841 rtsa_attr[0])) != 0) {
1842 cp += sizeof (*sp);
1843 return (err);
1844 }
1845
1846 bcopy(cp, sp, sizeof (*sp));
1847 cp += sizeof (*sp);
1848 return (0);
1849 }
1850
1851 int
tsol_ire_init_gwattr(ire_t * ire,uchar_t ipversion,tsol_gc_t * gc)1852 tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc)
1853 {
1854 tsol_ire_gw_secattr_t *attrp;
1855 boolean_t exists = B_FALSE;
1856 in_addr_t ga_addr4;
1857 void *paddr = NULL;
1858 tsol_gcgrp_t *gcgrp = NULL;
1859
1860 ASSERT(ire != NULL);
1861
1862 /*
1863 * The only time that attrp can be NULL is when this routine is
1864 * called for the first time during the creation/initialization
1865 * of the corresponding IRE. It will only get cleared when the
1866 * IRE is deleted.
1867 */
1868 if ((attrp = ire->ire_gw_secattr) == NULL) {
1869 attrp = ire_gw_secattr_alloc(KM_NOSLEEP);
1870 if (attrp == NULL)
1871 return (ENOMEM);
1872 ire->ire_gw_secattr = attrp;
1873 } else {
1874 exists = B_TRUE;
1875 mutex_enter(&attrp->igsa_lock);
1876
1877 if (attrp->igsa_rhc != NULL) {
1878 TNRHC_RELE(attrp->igsa_rhc);
1879 attrp->igsa_rhc = NULL;
1880 }
1881
1882 if (attrp->igsa_gc != NULL)
1883 GC_REFRELE(attrp->igsa_gc);
1884 }
1885 ASSERT(!exists || MUTEX_HELD(&attrp->igsa_lock));
1886
1887 /*
1888 * References already held by caller and we keep them;
1889 * note that gc may be set to NULL to clear out igsa_gc.
1890 */
1891 attrp->igsa_gc = gc;
1892
1893 if (gc != NULL) {
1894 gcgrp = gc->gc_grp;
1895 ASSERT(gcgrp != NULL);
1896 }
1897
1898 /*
1899 * Intialize the template for gateway; we use the gateway's
1900 * address found in either the passed in gateway credential
1901 * or group pointer, or the ire_gateway_addr{_v6} field.
1902 */
1903 if (gcgrp != NULL) {
1904 tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr;
1905
1906 /*
1907 * Caller is holding a reference, and that we don't
1908 * need to hold any lock to access the address.
1909 */
1910 if (ipversion == IPV4_VERSION) {
1911 ASSERT(ga->ga_af == AF_INET);
1912 IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4);
1913 paddr = &ga_addr4;
1914 } else {
1915 ASSERT(ga->ga_af == AF_INET6);
1916 paddr = &ga->ga_addr;
1917 }
1918 } else if (ire->ire_type & IRE_OFFLINK) {
1919 if (ipversion == IPV6_VERSION)
1920 paddr = &ire->ire_gateway_addr_v6;
1921 else if (ipversion == IPV4_VERSION)
1922 paddr = &ire->ire_gateway_addr;
1923 }
1924
1925 /*
1926 * Lookup the gateway template; note that we could get an internal
1927 * template here, which we cache anyway. During IRE matching, we'll
1928 * try to update this gateway template cache and hopefully get a
1929 * real one.
1930 */
1931 if (paddr != NULL) {
1932 attrp->igsa_rhc = find_rhc(paddr, ipversion, B_FALSE);
1933 }
1934
1935 if (exists)
1936 mutex_exit(&attrp->igsa_lock);
1937
1938 return (0);
1939 }
1940
1941 /*
1942 * This function figures the type of MLP that we'll be using based on the
1943 * address that the user is binding and the zone. If the address is
1944 * unspecified, then we're looking at both private and shared. If it's one
1945 * of the zone's private addresses, then it's private only. If it's one
1946 * of the global addresses, then it's shared only. Multicast addresses are
1947 * treated same as unspecified address.
1948 *
1949 * If we can't figure out what it is, then return mlptSingle. That's actually
1950 * an error case.
1951 *
1952 * The callers are assumed to pass in zone->zone_id and not the zoneid that
1953 * is stored in a conn_t (since the latter will be GLOBAL_ZONEID in an
1954 * exclusive stack zone).
1955 */
1956 mlp_type_t
tsol_mlp_addr_type(zoneid_t zoneid,uchar_t version,const void * addr,ip_stack_t * ipst)1957 tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr,
1958 ip_stack_t *ipst)
1959 {
1960 in_addr_t in4;
1961 ire_t *ire;
1962 ipif_t *ipif;
1963 zoneid_t addrzone;
1964 zoneid_t ip_zoneid;
1965
1966 ASSERT(addr != NULL);
1967
1968 /*
1969 * For exclusive stacks we set the zoneid to zero
1970 * to operate as if in the global zone for IRE and conn_t comparisons.
1971 */
1972 if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
1973 ip_zoneid = GLOBAL_ZONEID;
1974 else
1975 ip_zoneid = zoneid;
1976
1977 if (version == IPV6_VERSION &&
1978 IN6_IS_ADDR_V4MAPPED((const in6_addr_t *)addr)) {
1979 IN6_V4MAPPED_TO_IPADDR((const in6_addr_t *)addr, in4);
1980 addr = &in4;
1981 version = IPV4_VERSION;
1982 }
1983
1984 /* Check whether the IRE_LOCAL (or ipif) is ALL_ZONES */
1985 if (version == IPV4_VERSION) {
1986 in4 = *(const in_addr_t *)addr;
1987 if ((in4 == INADDR_ANY) || CLASSD(in4)) {
1988 return (mlptBoth);
1989 }
1990 ire = ire_ftable_lookup_v4(in4, 0, 0, IRE_LOCAL|IRE_LOOPBACK,
1991 NULL, ip_zoneid, NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY,
1992 0, ipst, NULL);
1993 } else {
1994 if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr) ||
1995 IN6_IS_ADDR_MULTICAST((const in6_addr_t *)addr)) {
1996 return (mlptBoth);
1997 }
1998 ire = ire_ftable_lookup_v6(addr, 0, 0, IRE_LOCAL|IRE_LOOPBACK,
1999 NULL, ip_zoneid, NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY,
2000 0, ipst, NULL);
2001 }
2002 /*
2003 * If we can't find the IRE, then we have to behave exactly like
2004 * ip_laddr_verify_{v4,v6}. That means looking up the IPIF so that
2005 * users can bind to addresses on "down" interfaces.
2006 *
2007 * If we can't find that either, then the bind is going to fail, so
2008 * just give up. Note that there's a miniscule chance that the address
2009 * is in transition, but we don't bother handling that.
2010 */
2011 if (ire == NULL) {
2012 if (version == IPV4_VERSION)
2013 ipif = ipif_lookup_addr(*(const in_addr_t *)addr, NULL,
2014 ip_zoneid, ipst);
2015 else
2016 ipif = ipif_lookup_addr_v6((const in6_addr_t *)addr,
2017 NULL, ip_zoneid, ipst);
2018 if (ipif == NULL) {
2019 return (mlptSingle);
2020 }
2021 addrzone = ipif->ipif_zoneid;
2022 ipif_refrele(ipif);
2023 } else {
2024 addrzone = ire->ire_zoneid;
2025 ire_refrele(ire);
2026 }
2027 return (addrzone == ALL_ZONES ? mlptShared : mlptPrivate);
2028 }
2029
2030 /*
2031 * Since we are configuring local interfaces, and we know trusted
2032 * extension CDE requires local interfaces to be cipso host type in
2033 * order to function correctly, we'll associate a cipso template
2034 * to each local interface and let the interface come up. Configuring
2035 * a local interface to be "unlabeled" host type is a configuration error.
2036 * We'll override that error and make the interface host type to be cipso
2037 * here.
2038 *
2039 * The code is optimized for the usual "success" case and unwinds things on
2040 * error. We don't want to go to the trouble and expense of formatting the
2041 * interface name for the usual case where everything is configured correctly.
2042 */
2043 boolean_t
tsol_check_interface_address(const ipif_t * ipif)2044 tsol_check_interface_address(const ipif_t *ipif)
2045 {
2046 tsol_tpc_t *tp;
2047 char addrbuf[INET6_ADDRSTRLEN];
2048 int af;
2049 const void *addr;
2050 zone_t *zone;
2051 ts_label_t *plabel;
2052 const bslabel_t *label;
2053 char ifname[LIFNAMSIZ];
2054 boolean_t retval;
2055 tsol_rhent_t rhent;
2056 netstack_t *ns = ipif->ipif_ill->ill_ipst->ips_netstack;
2057
2058 if (IN6_IS_ADDR_V4MAPPED(&ipif->ipif_v6lcl_addr)) {
2059 af = AF_INET;
2060 addr = &V4_PART_OF_V6(ipif->ipif_v6lcl_addr);
2061 } else {
2062 af = AF_INET6;
2063 addr = &ipif->ipif_v6lcl_addr;
2064 }
2065
2066 tp = find_tpc(&ipif->ipif_v6lcl_addr, IPV6_VERSION, B_FALSE);
2067
2068 /* assumes that ALL_ZONES implies that there is no exclusive stack */
2069 if (ipif->ipif_zoneid == ALL_ZONES) {
2070 zone = NULL;
2071 } else if (ns->netstack_stackid == GLOBAL_NETSTACKID) {
2072 /* Shared stack case */
2073 zone = zone_find_by_id(ipif->ipif_zoneid);
2074 } else {
2075 /* Exclusive stack case */
2076 zone = zone_find_by_id(crgetzoneid(ipif->ipif_ill->ill_credp));
2077 }
2078 if (zone != NULL) {
2079 plabel = zone->zone_slabel;
2080 ASSERT(plabel != NULL);
2081 label = label2bslabel(plabel);
2082 }
2083
2084 /*
2085 * If it's CIPSO and an all-zones address, then we're done.
2086 * If it's a CIPSO zone specific address, the zone's label
2087 * must be in the range or set specified in the template.
2088 * When the remote host entry is missing or the template
2089 * type is incorrect for this interface, we create a
2090 * CIPSO host entry in kernel and allow the interface to be
2091 * brought up as CIPSO type.
2092 */
2093 if (tp != NULL && (
2094 /* The all-zones case */
2095 (tp->tpc_tp.host_type == SUN_CIPSO &&
2096 tp->tpc_tp.tp_doi == default_doi &&
2097 ipif->ipif_zoneid == ALL_ZONES) ||
2098 /* The local-zone case */
2099 (zone != NULL && plabel->tsl_doi == tp->tpc_tp.tp_doi &&
2100 ((tp->tpc_tp.host_type == SUN_CIPSO &&
2101 (_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) ||
2102 blinlset(label, tp->tpc_tp.tp_sl_set_cipso))))))) {
2103 if (zone != NULL)
2104 zone_rele(zone);
2105 TPC_RELE(tp);
2106 return (B_TRUE);
2107 }
2108
2109 ipif_get_name(ipif, ifname, sizeof (ifname));
2110 (void) inet_ntop(af, addr, addrbuf, sizeof (addrbuf));
2111
2112 if (tp == NULL) {
2113 cmn_err(CE_NOTE, "template entry for %s missing. Default to "
2114 "CIPSO type for %s", ifname, addrbuf);
2115 retval = B_TRUE;
2116 } else if (tp->tpc_tp.host_type == UNLABELED) {
2117 cmn_err(CE_NOTE, "template type for %s incorrectly configured. "
2118 "Change to CIPSO type for %s", ifname, addrbuf);
2119 retval = B_TRUE;
2120 } else if (ipif->ipif_zoneid == ALL_ZONES) {
2121 if (tp->tpc_tp.host_type != SUN_CIPSO) {
2122 cmn_err(CE_NOTE, "%s failed: %s isn't set to CIPSO for "
2123 "all-zones. Converted to CIPSO.", ifname, addrbuf);
2124 retval = B_TRUE;
2125 } else {
2126 cmn_err(CE_NOTE, "%s failed: %s has wrong DOI %d "
2127 "instead of %d", ifname, addrbuf,
2128 tp->tpc_tp.tp_doi, default_doi);
2129 retval = B_FALSE;
2130 }
2131 } else if (zone == NULL) {
2132 cmn_err(CE_NOTE, "%s failed: zoneid %d unknown",
2133 ifname, ipif->ipif_zoneid);
2134 retval = B_FALSE;
2135 } else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
2136 cmn_err(CE_NOTE, "%s failed: zone %s has DOI %d but %s has "
2137 "DOI %d", ifname, zone->zone_name, plabel->tsl_doi,
2138 addrbuf, tp->tpc_tp.tp_doi);
2139 retval = B_FALSE;
2140 } else {
2141 cmn_err(CE_NOTE, "%s failed: zone %s label incompatible with "
2142 "%s", ifname, zone->zone_name, addrbuf);
2143 tsol_print_label(label, "zone label");
2144 retval = B_FALSE;
2145 }
2146
2147 if (zone != NULL)
2148 zone_rele(zone);
2149 if (tp != NULL)
2150 TPC_RELE(tp);
2151 if (retval) {
2152 /*
2153 * we've corrected a config error and let the interface
2154 * come up as cipso. Need to insert an rhent.
2155 */
2156 if ((rhent.rh_address.ta_family = af) == AF_INET) {
2157 rhent.rh_prefix = 32;
2158 rhent.rh_address.ta_addr_v4 = *(struct in_addr *)addr;
2159 } else {
2160 rhent.rh_prefix = 128;
2161 rhent.rh_address.ta_addr_v6 = *(in6_addr_t *)addr;
2162 }
2163 (void) strcpy(rhent.rh_template, "cipso");
2164 if (tnrh_load(&rhent) != 0) {
2165 cmn_err(CE_NOTE, "%s failed: Cannot insert CIPSO "
2166 "template for local addr %s", ifname, addrbuf);
2167 retval = B_FALSE;
2168 }
2169 }
2170 return (retval);
2171 }
2172