xref: /freebsd/sys/security/mac_do/mac_do.c (revision 4e4cf18b85cc51f41bcae20114f9c0e7b69f76e0)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright(c) 2024 Baptiste Daroussin <bapt@FreeBSD.org>
5  * Copyright (c) 2024 The FreeBSD Foundation
6  * Copyright (c) 2025 Kushagra Srivastava <kushagra1403@gmail.com>
7  *
8  * Portions of this software were developed by Olivier Certner
9  * <olce.freebsd@certner.fr> at Kumacom SARL under sponsorship from the FreeBSD
10  * Foundation.
11  */
12 
13 #include <sys/param.h>
14 #include <sys/systm.h>
15 #include <sys/ctype.h>
16 #include <sys/jail.h>
17 #include <sys/kernel.h>
18 #include <sys/limits.h>
19 #include <sys/lock.h>
20 #include <sys/malloc.h>
21 #include <sys/module.h>
22 #include <sys/mount.h>
23 #include <sys/mutex.h>
24 #include <sys/priv.h>
25 #include <sys/proc.h>
26 #include <sys/refcount.h>
27 #include <sys/socket.h>
28 #include <sys/stdarg.h>
29 #include <sys/sx.h>
30 #include <sys/sysctl.h>
31 #include <sys/ucred.h>
32 #include <sys/vnode.h>
33 
34 #include <security/mac/mac_policy.h>
35 
36 static SYSCTL_NODE(_security_mac, OID_AUTO, do,
37     CTLFLAG_RW|CTLFLAG_MPSAFE, 0, "mac_do policy controls");
38 
39 static int	do_enabled = 1;
40 SYSCTL_INT(_security_mac_do, OID_AUTO, enabled, CTLFLAG_RWTUN,
41     &do_enabled, 0, "Enforce do policy");
42 
43 static int	print_parse_error = 1;
44 SYSCTL_INT(_security_mac_do, OID_AUTO, print_parse_error, CTLFLAG_RWTUN,
45     &print_parse_error, 0, "Print parse errors on setting rules "
46     "(via sysctl(8)).");
47 
48 static MALLOC_DEFINE(M_MAC_DO, "mac_do", "mac_do(4) security module");
49 
50 #define MAX_EXEC_PATHS_SIZE	2048
51 #define MAX_EXEC_PATHS		8
52 
53 #define MAX_RULE_STRING_SIZE	1024
54 
55 static unsigned		osd_jail_slot;
56 static unsigned		osd_thread_slot;
57 
58 #define IT_INVALID	0 /* Must stay 0. */
59 #define IT_UID		1
60 #define IT_GID		2
61 #define IT_ANY		3
62 #define IT_LAST		IT_ANY
63 
64 static const char *id_type_to_str[] = {
65 	[IT_INVALID]	= "invalid",
66 	[IT_UID]	= "uid",
67 	[IT_GID]	= "gid",
68 	/* See also parse_id_type(). */
69 	[IT_ANY]	= "*",
70 };
71 
72 #define PARSE_ERROR_SIZE	256
73 
74 /*
75  * All functions having a parse error parameter must return through it a parse
76  * error object if and only if they return an error value (non-zero); else, NULL
77  * must be returned through it.
78  */
79 struct parse_error {
80 	size_t	pos;
81 	char	msg[PARSE_ERROR_SIZE];
82 };
83 
84 /*
85  * We assume that 'uid_t' and 'gid_t' are aliases to 'u_int' in conversions
86  * required for parsing rules specification strings.
87  */
88 _Static_assert(sizeof(uid_t) == sizeof(u_int) && (uid_t)-1 >= 0 &&
89     sizeof(gid_t) == sizeof(u_int) && (gid_t)-1 >= 0,
90     "mac_do(4) assumes that 'uid_t' and 'gid_t' are aliases to 'u_int'");
91 
92 /*
93  * Internal flags.
94  *
95  * They either apply as per-type (t) or per-ID (i) but are conflated because all
96  * per-ID flags are also valid as per-type ones to qualify the "current" (".")
97  * per-type flag.  Also, some of them are in fact exclusive, but we use one-hot
98  * encoding for simplicity.
99  *
100  * There is currently room for "only" 16 bits.  As these flags are purely
101  * internal, they can be renumbered and/or their type changed as needed.
102  *
103  * See also the check_*() functions below.
104  */
105 typedef uint16_t	flags_t;
106 
107 /* (i,gid) Specification concerns primary groups. */
108 #define MDF_PRIMARY	(1u << 0)
109 /* (i,gid) Specification concerns supplementary groups. */
110 #define MDF_SUPP_ALLOW	(1u << 1)
111 /* (i,gid) Group must appear as a supplementary group. */
112 #define MDF_SUPP_MUST	(1u << 2)
113 /* (i,gid) Group must not appear as a supplementary group. */
114 #define MDF_SUPP_DONT	(1u << 3)
115 #define MDF_SUPP_MASK	(MDF_SUPP_ALLOW | MDF_SUPP_MUST | MDF_SUPP_DONT)
116 #define MDF_ID_MASK	(MDF_PRIMARY | MDF_SUPP_MASK)
117 
118 /*
119  * (t) All IDs allowed.
120  *
121  * For GIDs, MDF_ANY only concerns primary groups.  The MDF_PRIMARY and
122  * MDF_SUPP_* flags never apply to MDF_ANY, but can be present if MDF_CURRENT is
123  * present also, as usual.
124  */
125 #define MDF_ANY			(1u << 8)
126 /* (t) Current IDs allowed. */
127 #define MDF_CURRENT		(1u << 9)
128 #define MDF_TYPE_COMMON_MASK	(MDF_ANY | MDF_CURRENT)
129 /* (t,gid) All IDs allowed as supplementary groups. */
130 #define MDF_ANY_SUPP		(1u << 10)
131 /* (t,gid) Some ID or MDF_CURRENT has MDF_SUPP_MUST or MDF_SUPP_DONT. */
132 #define MDF_MAY_REJ_SUPP	(1u << 11)
133 /* (t,gid) Some explicit ID (not MDF_CURRENT) has MDF_SUPP_MUST. */
134 #define MDF_EXPLICIT_SUPP_MUST	(1u << 12)
135 /*
136  * (t,gid) Whether any target clause is about primary groups.  Used during
137  * parsing only.
138  */
139 #define MDF_HAS_PRIMARY_CLAUSE	(1u << 13)
140 /*
141  * (t,gid) Whether any target clause is about supplementary groups.  Used during
142  * parsing only.
143  */
144 #define MDF_HAS_SUPP_CLAUSE	(1u << 14)
145 #define MDF_TYPE_GID_MASK	(MDF_ANY_SUPP | MDF_MAY_REJ_SUPP |	\
146     MDF_EXPLICIT_SUPP_MUST | MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE)
147 #define MDF_TYPE_MASK		(MDF_TYPE_COMMON_MASK | MDF_TYPE_GID_MASK)
148 
149 /*
150  * Persistent structures.
151  */
152 
153 struct id_spec {
154 	u_int		 id;
155 	flags_t		 flags; /* See MDF_* above. */
156 };
157 
158 /*
159  * This limits the number of target clauses per type to 65535.  With the current
160  * value of MAX_RULE_STRING_SIZE (1024), this is way more than enough anyway.
161  */
162 typedef uint16_t	 id_nb_t;
163 /* We only have a few IT_* types. */
164 typedef uint16_t	 id_type_t;
165 
166 struct rule {
167 	STAILQ_ENTRY(rule) r_entries;
168 	id_type_t	 from_type;
169 	u_int		 from_id;
170 	flags_t		 uid_flags; /* See MDF_* above. */
171 	id_nb_t		 uids_nb;
172 	flags_t		 gid_flags; /* See MDF_* above. */
173 	id_nb_t		 gids_nb;
174 	struct id_spec	*uids;
175 	struct id_spec	*gids;
176 };
177 
178 STAILQ_HEAD(rulehead, rule);
179 
180 struct rules {
181 	char		string[MAX_RULE_STRING_SIZE];
182 	struct rulehead	head;
183 };
184 
185 struct exec_paths {
186 	char exec_paths_str[MAX_EXEC_PATHS_SIZE];
187 	char exec_paths[MAX_EXEC_PATHS][PATH_MAX];
188 	int exec_path_count;
189 };
190 
191 struct conf {
192 	struct rules rules;
193 	struct exec_paths exec_paths;
194 	volatile u_int	use_count __aligned(CACHE_LINE_SIZE);
195 };
196 
197 /*
198  * Temporary structures used to build a 'struct rule' above.
199  */
200 
201 struct id_elem {
202 	STAILQ_ENTRY(id_elem) ie_entries;
203 	struct id_spec spec;
204 };
205 
206 STAILQ_HEAD(id_list, id_elem);
207 
208 #ifdef INVARIANTS
209 static void
210 check_type(const id_type_t type)
211 {
212 	if (type > IT_LAST)
213 		panic("Invalid type number %u", type);
214 }
215 
216 static void
217 panic_for_unexpected_flags(const id_type_t type, const flags_t flags,
218     const char *const str)
219 {
220 	panic("ID type %s: Unexpected flags %u (%s), ", id_type_to_str[type],
221 	    flags, str);
222 }
223 
224 static void
225 check_type_and_id_flags(const id_type_t type, const flags_t flags)
226 {
227 	const char *str;
228 
229 	check_type(type);
230 	switch (type) {
231 	case IT_UID:
232 		if (flags != 0) {
233 			str = "only 0 allowed";
234 			goto unexpected_flags;
235 		}
236 		break;
237 	case IT_GID:
238 		if ((flags & ~MDF_ID_MASK) != 0) {
239 			str = "only bits in MDF_ID_MASK allowed";
240 			goto unexpected_flags;
241 		}
242 		if (!powerof2(flags & MDF_SUPP_MASK)) {
243 			str = "only a single flag in MDF_SUPP_MASK allowed";
244 			goto unexpected_flags;
245 		}
246 		break;
247 	default:
248 		__assert_unreachable();
249 	}
250 	return;
251 
252 unexpected_flags:
253 	panic_for_unexpected_flags(type, flags, str);
254 }
255 
256 static void
257 check_type_and_id_spec(const id_type_t type, const struct id_spec *const is)
258 {
259 	check_type_and_id_flags(type, is->flags);
260 }
261 
262 static void
263 check_type_and_type_flags(const id_type_t type, const flags_t flags)
264 {
265 	const char *str;
266 
267 	check_type_and_id_flags(type, flags & MDF_ID_MASK);
268 	if ((flags & ~MDF_ID_MASK & ~MDF_TYPE_MASK) != 0) {
269 		str = "only MDF_ID_MASK | MDF_TYPE_MASK bits allowed";
270 		goto unexpected_flags;
271 	}
272 	if ((flags & MDF_ANY) != 0 && (flags & MDF_CURRENT) != 0 &&
273 	    (type == IT_UID || (flags & MDF_PRIMARY) != 0)) {
274 		str = "MDF_ANY and MDF_CURRENT are exclusive for UIDs "
275 		    "or primary group GIDs";
276 		goto unexpected_flags;
277 	}
278 	if ((flags & MDF_ANY_SUPP) != 0 && (flags & MDF_CURRENT) != 0 &&
279 	    (flags & MDF_SUPP_MASK) != 0) {
280 		str = "MDF_SUPP_ANY and MDF_CURRENT with supplementary "
281 		    "groups specification are exclusive";
282 		goto unexpected_flags;
283 	}
284 	if (type == IT_GID &&
285 	    ((flags & MDF_PRIMARY) != 0 || (flags & MDF_ANY) != 0) &&
286 	    (flags & MDF_HAS_PRIMARY_CLAUSE) == 0) {
287 		str = "Presence of folded primary clause not reflected "
288 		    "by presence of MDF_HAS_PRIMARY_CLAUSE";
289 		goto unexpected_flags;
290 	}
291 	if (((flags & MDF_SUPP_MASK) != 0 || (flags & MDF_ANY_SUPP) != 0) &&
292 	    (flags & MDF_HAS_SUPP_CLAUSE) == 0) {
293 		str = "Presence of folded supplementary clause not reflected "
294 		    "by presence of MDF_HAS_SUPP_CLAUSE";
295 		goto unexpected_flags;
296 	}
297 	return;
298 
299 unexpected_flags:
300 	panic_for_unexpected_flags(type, flags, str);
301 }
302 #else /* !INVARIANTS */
303 #define check_type_and_id_flags(...)
304 #define check_type_and_id_spec(...)
305 #define check_type_and_type_flags(...)
306 #endif /* INVARIANTS */
307 
308 /*
309  * Returns EALREADY if both flags have some overlap, or EINVAL if flags are
310  * incompatible, else 0 with flags successfully merged into 'dest'.
311  */
312 static int
313 coalesce_id_flags(const flags_t src, flags_t *const dest)
314 {
315 	flags_t res;
316 
317 	if ((src & *dest) != 0)
318 		return (EALREADY);
319 
320 	res = src | *dest;
321 
322 	/* Check for compatibility of supplementary flags, and coalesce. */
323 	if ((res & MDF_SUPP_MASK) != 0) {
324 		/* MDF_SUPP_DONT incompatible with the rest. */
325 		if ((res & MDF_SUPP_DONT) != 0 && (res & MDF_SUPP_MASK &
326 		    ~MDF_SUPP_DONT) != 0)
327 			return (EINVAL);
328 		/*
329 		 * Coalesce MDF_SUPP_ALLOW and MDF_SUPP_MUST into MDF_SUPP_MUST.
330 		 */
331 		if ((res & MDF_SUPP_ALLOW) != 0 && (res & MDF_SUPP_MUST) != 0)
332 			res &= ~MDF_SUPP_ALLOW;
333 	}
334 
335 	*dest = res;
336 	return (0);
337 }
338 
339 static void
340 toast_rules(struct rules *const rules)
341 {
342 	struct rulehead *const head = &rules->head;
343 	struct rule *rule, *rule_next;
344 
345 	STAILQ_FOREACH_SAFE(rule, head, r_entries, rule_next) {
346 		free(rule->uids, M_MAC_DO);
347 		free(rule->gids, M_MAC_DO);
348 		free(rule, M_MAC_DO);
349 	}
350 }
351 
352 /* Assumes storage has been zeroed. */
353 static void
354 init_rules(struct rules *const rules)
355 {
356 	_Static_assert(MAX_RULE_STRING_SIZE > 0, "MAX_RULE_STRING_SIZE <= 0!");
357 	STAILQ_INIT(&rules->head);
358 }
359 
360 static void
361 init_exec_paths(struct exec_paths *const exec_paths)
362 {
363 	_Static_assert(MAX_EXEC_PATHS_SIZE > 0, "MAX_EXEC_PATHS_SIZE <= 0!");
364 	bzero(exec_paths, sizeof(*exec_paths));
365 	exec_paths->exec_paths_str[0] = 0;
366 }
367 
368 static struct conf *
369 alloc_conf(void)
370 {
371 	struct conf *const conf = malloc(sizeof(*conf), M_MAC_DO, M_WAITOK |
372 	    M_ZERO);
373 
374 	init_rules(&conf->rules);
375 	init_exec_paths(&conf->exec_paths);
376 	conf->use_count = 0;
377 
378 	return (conf);
379 }
380 
381 static bool
382 is_null_or_empty(const char *s)
383 {
384 	return (s == NULL || s[0] == '\0');
385 }
386 
387 /*
388  * String to unsigned int.
389  *
390  * Contrary to the "standard" strtou*() family of functions, do not tolerate
391  * spaces at start nor an empty string, and returns a status code, the 'u_int'
392  * result being returned through a passed pointer (if no error).
393  *
394  * We detour through 'quad_t' because in-kernel strto*() functions cannot set
395  * 'errno' and thus can't distinguish a true maximum value from one returned
396  * because of overflow.  We use 'quad_t' instead of 'u_quad_t' to support
397  * negative specifications (e.g., such as "-1" for UINT_MAX).
398  */
399 static int
400 strtoui_strict(const char *const restrict s, const char **const restrict endptr,
401     int base, u_int *result)
402 {
403 	char *ep;
404 	quad_t q;
405 
406 	/* Rule out spaces and empty specifications. */
407 	if (s[0] == '\0' || isspace(s[0])) {
408 		if (endptr != NULL)
409 			*endptr = s;
410 		return (EINVAL);
411 	}
412 
413 	q = strtoq(s, &ep, base);
414 	if (endptr != NULL)
415 		*endptr = ep;
416 	if (q < 0) {
417 		/* We allow specifying a negative number. */
418 		if (q < -(quad_t)UINT_MAX - 1 || q == QUAD_MIN)
419 			return (EOVERFLOW);
420 	} else {
421 		if (q > UINT_MAX || q == UQUAD_MAX)
422 			return (EOVERFLOW);
423 	}
424 
425 	*result = (u_int)q;
426 	return (0);
427 }
428 
429 /*
430  * strsep() variant skipping spaces and tabs.
431  *
432  * Skips spaces and tabs at beginning and end of the token before one of the
433  * 'delim' characters, i.e., at start of string and just before one of the
434  * delimiter characters (so it doesn't prevent tokens containing spaces and tabs
435  * in the middle).
436  */
437 static char *
438 strsep_noblanks(char **const stringp, const char *delim)
439 {
440 	char *p = *stringp;
441 	char *ret, *wsp;
442 	size_t idx;
443 
444 	if (p == NULL)
445 		return (NULL);
446 
447 	idx = strspn(p, " \t");
448 	p += idx;
449 
450 	ret = strsep(&p, delim);
451 
452 	/* Rewind spaces/tabs at the end. */
453 	if (p == NULL)
454 		wsp = ret + strlen(ret);
455 	else
456 		wsp = p - 1;
457 	for (; wsp != ret; --wsp) {
458 		switch (wsp[-1]) {
459 		case ' ':
460 		case '\t':
461 			continue;
462 		}
463 		break;
464 	}
465 	*wsp = '\0';
466 
467 	*stringp = p;
468 	return (ret);
469 }
470 
471 
472 static void
473 make_parse_error(struct parse_error **const parse_error, const size_t pos,
474     const char *const fmt, ...)
475 {
476 	struct parse_error *const err = malloc(sizeof(*err), M_MAC_DO,
477 	    M_WAITOK);
478 	va_list ap;
479 
480 	err->pos = pos;
481 	va_start(ap, fmt);
482 	vsnprintf(err->msg, PARSE_ERROR_SIZE, fmt, ap);
483 	va_end(ap);
484 
485 	MPASS(*parse_error == NULL);
486 	*parse_error = err;
487 }
488 
489 static void
490 free_parse_error(struct parse_error *const parse_error)
491 {
492 	free(parse_error, M_MAC_DO);
493 }
494 
495 static int
496 parse_id_type(const char *const string, id_type_t *const type,
497     struct parse_error **const parse_error)
498 {
499 	/*
500 	 * Special case for "any", as the canonical form for IT_ANY in
501 	 * id_type_to_str[] is "*".
502 	 */
503 	if (strcmp(string, "any") == 0) {
504 		*type = IT_ANY;
505 		return (0);
506 	}
507 
508 	/* Start at 1 to avoid parsing "invalid". */
509 	for (size_t i = 1; i <= IT_LAST; ++i) {
510 		if (strcmp(string, id_type_to_str[i]) == 0) {
511 			*type = i;
512 			return (0);
513 		}
514 	}
515 
516 	*type = IT_INVALID;
517 	make_parse_error(parse_error, 0, "No valid type found.");
518 	return (EINVAL);
519 }
520 
521 static size_t
522 parse_gid_flags(const char *const string, flags_t *const flags,
523     flags_t *const gid_flags)
524 {
525 	switch (string[0]) {
526 	case '+':
527 		*flags |= MDF_SUPP_ALLOW;
528 		goto has_supp_clause;
529 	case '!':
530 		*flags |= MDF_SUPP_MUST;
531 		*gid_flags |= MDF_MAY_REJ_SUPP;
532 		goto has_supp_clause;
533 	case '-':
534 		*flags |= MDF_SUPP_DONT;
535 		*gid_flags |= MDF_MAY_REJ_SUPP;
536 		goto has_supp_clause;
537 	has_supp_clause:
538 		*gid_flags |= MDF_HAS_SUPP_CLAUSE;
539 		return (1);
540 	}
541 
542 	return (0);
543 }
544 
545 static bool
546 parse_any(const char *const string)
547 {
548 	return (strcmp(string, "*") == 0 || strcmp(string, "any") == 0);
549 }
550 
551 static bool
552 has_clauses(const id_nb_t nb, const flags_t type_flags)
553 {
554 	return ((type_flags & MDF_TYPE_MASK) != 0 || nb != 0);
555 }
556 
557 static int
558 parse_target_clause(char *to, struct rule *const rule,
559     struct id_list *const uid_list, struct id_list *const gid_list,
560     struct parse_error **const parse_error)
561 {
562 	const char *const start = to;
563 	char *to_type, *to_id;
564 	const char *p;
565 	struct id_list *list;
566 	id_nb_t *nb;
567 	flags_t *tflags;
568 	struct id_elem *ie;
569 	struct id_spec is = {.flags = 0};
570 	flags_t gid_flags = 0;
571 	id_type_t type;
572 	int error;
573 
574 	MPASS(*parse_error == NULL);
575 	MPASS(to != NULL);
576 	to_type = strsep_noblanks(&to, "=");
577 	MPASS(to_type != NULL);
578 	to_type += parse_gid_flags(to_type, &is.flags, &gid_flags);
579 	error = parse_id_type(to_type, &type, parse_error);
580 	if (error != 0)
581 		goto einval;
582 	if (type != IT_GID && is.flags != 0) {
583 		make_parse_error(parse_error, to_type - start,
584 		    "Expected type 'gid' after flags, not '%s'.",
585 		    to_type);
586 		goto einval;
587 	}
588 
589 	to_id = strsep_noblanks(&to, "");
590 	switch (type) {
591 	case IT_GID:
592 		if (to_id == NULL) {
593 			make_parse_error(parse_error, to_type - start,
594 			    "No '=' and ID specification after type '%s'.",
595 			    to_type);
596 			goto einval;
597 		}
598 
599 		if (is.flags == 0) {
600 			/* No flags: Dealing with a primary group. */
601 			is.flags |= MDF_PRIMARY;
602 			gid_flags |= MDF_HAS_PRIMARY_CLAUSE;
603 		}
604 
605 		list = gid_list;
606 		nb = &rule->gids_nb;
607 		tflags = &rule->gid_flags;
608 
609 		/* "*" or "any"? */
610 		if (parse_any(to_id)) {
611 			/*
612 			 * We check that we have not seen any other clause of
613 			 * the same category (i.e., concerning primary or
614 			 * supplementary groups).
615 			 */
616 			if ((is.flags & MDF_PRIMARY) != 0) {
617 				if ((*tflags & MDF_HAS_PRIMARY_CLAUSE) != 0) {
618 					make_parse_error(parse_error,
619 					    to_id - start,
620 					    "'any' specified after another "
621 					    "(primary) GID.");
622 					goto einval;
623 				}
624 				*tflags |= gid_flags | MDF_ANY;
625 			} else {
626 				/*
627 				 * If a supplementary group flag was present, it
628 				 * must be MDF_SUPP_ALLOW ("+").
629 				 */
630 				if ((is.flags & MDF_SUPP_MASK) != MDF_SUPP_ALLOW) {
631 					make_parse_error(parse_error,
632 					    to_id - start,
633 					    "'any' specified with another "
634 					    "flag than '+'.");
635 					goto einval;
636 				}
637 				if ((*tflags & MDF_HAS_SUPP_CLAUSE) != 0) {
638 					make_parse_error(parse_error,
639 					    to_id - start,
640 					    "'any' with flag '+' specified after "
641 					    "another (supplementary) GID.");
642 					goto einval;
643 				}
644 				*tflags |= gid_flags | MDF_ANY_SUPP;
645 			}
646 			goto check_type_and_finish;
647 		} else {
648 			/*
649 			 * Check that we haven't already seen "any" for the same
650 			 * category.
651 			 */
652 			if ((is.flags & MDF_PRIMARY) != 0) {
653 				if ((*tflags & MDF_ANY) != 0) {
654 					make_parse_error(parse_error,
655 					    to_id - start,
656 					    "Some (primary) GID specified after "
657 					    "'any'.");
658 					goto einval;
659 				}
660 			} else if ((*tflags & MDF_ANY_SUPP) != 0 &&
661 			    (is.flags & MDF_SUPP_ALLOW) != 0) {
662 				make_parse_error(parse_error,
663 				    to_id - start,
664 				    "Some (supplementary) GID specified after "
665 				    "'any' with flag '+'.");
666 				goto einval;
667 			}
668 			*tflags |= gid_flags;
669 		}
670 		break;
671 
672 	case IT_UID:
673 		if (to_id == NULL) {
674 			make_parse_error(parse_error, to_type - start,
675 			    "No '=' and ID specification after type '%s'.",
676 			    to_type);
677 			goto einval;
678 		}
679 
680 		list = uid_list;
681 		nb = &rule->uids_nb;
682 		tflags = &rule->uid_flags;
683 
684 		/* "*" or "any"? */
685 		if (parse_any(to_id)) {
686 			/* There must not be any other clause. */
687 			if (has_clauses(*nb, *tflags)) {
688 				make_parse_error(parse_error, to_id - start,
689 				    "'any' specified after another UID.");
690 				goto einval;
691 			}
692 			*tflags |= MDF_ANY;
693 			goto check_type_and_finish;
694 		} else {
695 			/*
696 			 * Check that we haven't already seen "any" for the same
697 			 * category.
698 			 */
699 			if ((*tflags & MDF_ANY) != 0) {
700 				make_parse_error(parse_error, to_id - start,
701 				    "Some UID specified after 'any'.");
702 				goto einval;
703 			}
704 		}
705 		break;
706 
707 	case IT_ANY:
708 		/* No ID allowed. */
709 		if (to_id != NULL) {
710 			make_parse_error(parse_error, to_type - start,
711 			    "No '=' and ID allowed after type '%s'.", to_type);
712 			goto einval;
713 		}
714 		/*
715 		 * We can't have IT_ANY after any other IT_*, it must be the
716 		 * only one.
717 		 */
718 		if (has_clauses(rule->uids_nb, rule->uid_flags) ||
719 		    has_clauses(rule->gids_nb, rule->gid_flags)) {
720 			make_parse_error(parse_error, to_type - start,
721 			    "Target clause of type '%s' coming after another "
722 			    "clause (must be alone).", to_type);
723 			goto einval;
724 		}
725 		rule->uid_flags |= MDF_ANY;
726 		rule->gid_flags |= MDF_ANY | MDF_ANY_SUPP |
727 		    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE;
728 		goto finish;
729 
730 	default:
731 		/* parse_id_type() returns no other types currently. */
732 		__assert_unreachable();
733 	}
734 
735 	/* Rule out cases that have been treated above. */
736 	MPASS((type == IT_UID || type == IT_GID) && !parse_any(to_id));
737 
738 	/* "."? */
739 	if (strcmp(to_id, ".") == 0) {
740 		if ((*tflags & MDF_CURRENT) != 0) {
741 			/* Duplicate "." <id>.  Try to coalesce. */
742 			error = coalesce_id_flags(is.flags, tflags);
743 			if (error != 0) {
744 				make_parse_error(parse_error, to_id - start,
745 				    "Incompatible flags with prior clause "
746 				    "with same target.");
747 				goto einval;
748 			}
749 		} else
750 			*tflags |= MDF_CURRENT | is.flags;
751 		goto check_type_and_finish;
752 	}
753 
754 	/* Parse an ID. */
755 	error = strtoui_strict(to_id, &p, 10, &is.id);
756 	if (error != 0 || *p != '\0') {
757 		make_parse_error(parse_error, to_id - start,
758 		    "Cannot parse a numerical ID (base 10).");
759 		goto einval;
760 	}
761 
762 	/* Explicit ID flags. */
763 	if (type == IT_GID && (is.flags & MDF_SUPP_MUST) != 0)
764 		*tflags |= MDF_EXPLICIT_SUPP_MUST;
765 
766 	/*
767 	 * We check for duplicate IDs and coalesce their 'struct id_spec' only
768 	 * at end of parse_single_rule() because it is much more performant then
769 	 * (using sorted arrays).
770 	 */
771 	++*nb;
772 	if (*nb == 0) {
773 		make_parse_error(parse_error, 0,
774 		    "Too many target clauses of type '%s'.", to_type);
775 		return (EOVERFLOW);
776 	}
777 	ie = malloc(sizeof(*ie), M_MAC_DO, M_WAITOK);
778 	ie->spec = is;
779 	STAILQ_INSERT_TAIL(list, ie, ie_entries);
780 	check_type_and_id_spec(type, &is);
781 check_type_and_finish:
782 	check_type_and_type_flags(type, *tflags);
783 finish:
784 	MPASS(error == 0 && *parse_error == NULL);
785 	return (0);
786 einval:
787 	/* We must have built a parse error on error. */
788 	MPASS(*parse_error != NULL);
789 	return (EINVAL);
790 }
791 
792 static int
793 u_int_cmp(const u_int i1, const u_int i2)
794 {
795 	return ((i1 > i2) - (i1 < i2));
796 }
797 
798 static int
799 id_spec_cmp(const void *const p1, const void *const p2)
800 {
801 	const struct id_spec *const is1 = p1;
802 	const struct id_spec *const is2 = p2;
803 
804 	return (u_int_cmp(is1->id, is2->id));
805 }
806 
807 /*
808  * Transfer content of 'list' into 'array', freeing and emptying list.
809  *
810  * 'nb' must be 'list''s length and not be greater than 'array''s size.  The
811  * destination array is sorted by ID.  Structures 'struct id_spec' with same IDs
812  * are coalesced if that makes sense (not including duplicate clauses), else
813  * EINVAL is returned.  On success, 'nb' is updated (lowered) to account for
814  * coalesced specifications.  The parameter 'type' is only for testing purposes
815  * (INVARIANTS).
816  */
817 static int
818 pour_list_into_rule(const id_type_t type, struct id_list *const list,
819     struct id_spec *const array, id_nb_t *const nb,
820     struct parse_error **const parse_error)
821 {
822 	struct id_elem *ie, *ie_next;
823 	size_t idx = 0;
824 
825 	/* Fill the array. */
826 	STAILQ_FOREACH_SAFE(ie, list, ie_entries, ie_next) {
827 		MPASS(idx < *nb);
828 		array[idx] = ie->spec;
829 		free(ie, M_MAC_DO);
830 		++idx;
831 	}
832 	MPASS(idx == *nb);
833 	STAILQ_INIT(list);
834 
835 	/* Sort it (by ID). */
836 	qsort(array, *nb, sizeof(*array), id_spec_cmp);
837 
838 	/* Coalesce same IDs. */
839 	if (*nb != 0) {
840 		size_t ref_idx = 0;
841 
842 		for (idx = 1; idx < *nb; ++idx) {
843 			const u_int id = array[idx].id;
844 
845 			if (id != array[ref_idx].id) {
846 				++ref_idx;
847 				if (ref_idx != idx)
848 					array[ref_idx] = array[idx];
849 				continue;
850 			}
851 
852 			switch (type) {
853 				int error;
854 
855 			case IT_GID:
856 				error = coalesce_id_flags(array[idx].flags,
857 				    &array[ref_idx].flags);
858 				if (error != 0) {
859 					make_parse_error(parse_error, 0,
860 					    "Incompatible flags or duplicate "
861 					    "GID %u.", id);
862 					goto einval;
863 				}
864 				check_type_and_id_flags(type,
865 				    array[ref_idx].flags);
866 				break;
867 
868 			case IT_UID:
869 				/*
870 				 * No flags in this case.  Multiple appearances
871 				 * of the same UID is an exact redundancy, so
872 				 * error out.
873 				 */
874 				make_parse_error(parse_error, 0,
875 				    "Duplicate UID %u.", id);
876 				goto einval;
877 
878 			default:
879 				__assert_unreachable();
880 			}
881 		}
882 		*nb = ref_idx + 1;
883 	}
884 
885 	MPASS(*parse_error == NULL);
886 	return (0);
887 
888 einval:
889 	MPASS(*parse_error != NULL);
890 	return (EINVAL);
891 }
892 
893 /*
894  * See also the herald comment for parse_rules() below.
895  *
896  * The second part of a rule, called <target> (or <to>), is a comma-separated
897  * (',') list of '<flags><type>=<id>' clauses similar to that of the <from>
898  * part, with the extensions that <id> may also be "*" or "any" or ".", and that
899  * <flags> may contain at most one of the '+', '-' and '!' characters when
900  * <type> is "gid" (no flags are allowed for "uid").  No two clauses in a single
901  * <to> list may list the same <id>.  "*" and "any" both designate any ID for
902  * the <type>, and are aliases to each other.  In front of "any" (or "*"), only
903  * the '+' flag is allowed (in the "gid" case).  "." designates the process'
904  * current IDs for the <type>.  The precise meaning of flags and "." is
905  * explained in functions checking privileges below.
906  */
907 static int
908 parse_single_rule(char *rule, struct rules *const rules,
909     struct parse_error **const parse_error)
910 {
911 	const char *const start = rule;
912 	const char *from_type, *from_id, *p;
913 	char *to_list;
914 	struct id_list uid_list, gid_list;
915 	struct id_elem *ie, *ie_next;
916 	struct rule *new;
917 	int error;
918 
919 	MPASS(*parse_error == NULL);
920 	STAILQ_INIT(&uid_list);
921 	STAILQ_INIT(&gid_list);
922 
923 	/* Freed when the 'struct rules' container is freed. */
924 	new = malloc(sizeof(*new), M_MAC_DO, M_WAITOK | M_ZERO);
925 
926 	from_type = strsep_noblanks(&rule, "=");
927 	MPASS(from_type != NULL); /* Because 'rule' was not NULL. */
928 	error = parse_id_type(from_type, &new->from_type, parse_error);
929 	if (error != 0)
930 		goto einval;
931 	switch (new->from_type) {
932 	case IT_UID:
933 	case IT_GID:
934 		break;
935 	default:
936 		make_parse_error(parse_error, 0, "Type '%s' not allowed in "
937 		    "the \"from\" part of rules.");
938 		goto einval;
939 	}
940 
941 	from_id = strsep_noblanks(&rule, ":>");
942 	if (is_null_or_empty(from_id)) {
943 		make_parse_error(parse_error, 0, "No ID specified.");
944 		goto einval;
945 	}
946 
947 	error = strtoui_strict(from_id, &p, 10, &new->from_id);
948 	if (error != 0 || *p != '\0') {
949 		make_parse_error(parse_error, from_id - start,
950 		    "Cannot parse a numerical ID (base 10).");
951 		goto einval;
952 	}
953 
954 	/*
955 	 * We will now parse the "to" list.
956 	 *
957 	 * In order to ease parsing, we will begin by building lists of target
958 	 * UIDs and GIDs in local variables 'uid_list' and 'gid_list'.  The
959 	 * number of each type of IDs will be filled directly in 'new'.  At end
960 	 * of parse, we will allocate both arrays of IDs to be placed into the
961 	 * 'uids' and 'gids' members, sort them, and discard the tail queues
962 	 * used to build them.  This conversion to sorted arrays at end of parse
963 	 * allows to minimize memory allocations and enables searching IDs in
964 	 * O(log(n)) instead of linearly.
965 	 */
966 	to_list = strsep_noblanks(&rule, ",");
967 	if (to_list == NULL) {
968 		make_parse_error(parse_error, 0, "No target list.");
969 		goto einval;
970 	}
971 	do {
972 		error = parse_target_clause(to_list, new, &uid_list, &gid_list,
973 		    parse_error);
974 		if (error != 0) {
975 			(*parse_error)->pos += to_list - start;
976 			goto einval;
977 		}
978 
979 		to_list = strsep_noblanks(&rule, ",");
980 	} while (to_list != NULL);
981 
982 	if (new->uids_nb != 0) {
983 		new->uids = malloc(sizeof(*new->uids) * new->uids_nb, M_MAC_DO,
984 		    M_WAITOK);
985 		error = pour_list_into_rule(IT_UID, &uid_list, new->uids,
986 		    &new->uids_nb, parse_error);
987 		if (error != 0)
988 			goto einval;
989 	}
990 	MPASS(STAILQ_EMPTY(&uid_list));
991 	if (!has_clauses(new->uids_nb, new->uid_flags)) {
992 		/* No UID specified, default is "uid=.". */
993 		MPASS(new->uid_flags == 0);
994 		new->uid_flags = MDF_CURRENT;
995 		check_type_and_type_flags(IT_UID, new->uid_flags);
996 	}
997 
998 	if (new->gids_nb != 0) {
999 		new->gids = malloc(sizeof(*new->gids) * new->gids_nb, M_MAC_DO,
1000 		    M_WAITOK);
1001 		error = pour_list_into_rule(IT_GID, &gid_list, new->gids,
1002 		    &new->gids_nb, parse_error);
1003 		if (error != 0)
1004 			goto einval;
1005 	}
1006 	MPASS(STAILQ_EMPTY(&gid_list));
1007 	if (!has_clauses(new->gids_nb, new->gid_flags)) {
1008 		/* No GID specified, default is "gid=.,!gid=.". */
1009 		MPASS(new->gid_flags == 0);
1010 		new->gid_flags = MDF_CURRENT | MDF_PRIMARY | MDF_SUPP_MUST |
1011 		    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE;
1012 		check_type_and_type_flags(IT_GID, new->gid_flags);
1013 	}
1014 
1015 	STAILQ_INSERT_TAIL(&rules->head, new, r_entries);
1016 	MPASS(error == 0 && *parse_error == NULL);
1017 	return (0);
1018 
1019 einval:
1020 	free(new->gids, M_MAC_DO);
1021 	free(new->uids, M_MAC_DO);
1022 	free(new, M_MAC_DO);
1023 	STAILQ_FOREACH_SAFE(ie, &gid_list, ie_entries, ie_next)
1024 	    free(ie, M_MAC_DO);
1025 	STAILQ_FOREACH_SAFE(ie, &uid_list, ie_entries, ie_next)
1026 	    free(ie, M_MAC_DO);
1027 	MPASS(*parse_error != NULL);
1028 	return (EINVAL);
1029 }
1030 
1031 /*
1032  * Parse rules specification and produce rule structures out of it.
1033  *
1034  * Must be called with '*parse_error' set to NULL.  Returns 0 on success, with
1035  * '*rulesp' made to point to a 'struct rule' representing the rules.  On error,
1036  * the returned value is non-zero and '*rulesp' is unchanged.  If 'string' has
1037  * length greater or equal to MAX_RULE_STRING_SIZE, ENAMETOOLONG is returned.  If
1038  * it is not in the expected format, EINVAL is returned.  If an error is
1039  * returned, '*parse_error' is set to point to a 'struct parse_error' giving an
1040  * error message for the problem.
1041  *
1042  * Expected format: A >-colon-separated list of rules of the form
1043  * "<from>><target>" (for backwards compatibility, a semi-colon ":" is accepted
1044  * in place of '>').  The <from> part is of the form "<type>=<id>" where <type>
1045  * is "uid" or "gid", <id> an UID or GID (depending on <type>) and <target> is
1046  * "*", "any" or a comma-separated list of '<flags><type>=<id>' clauses (see the
1047  * comment for parse_single_rule() for more details).  For convenience, empty
1048  * rules are allowed (and do nothing), and spaces and tabs are allowed (and
1049  * removed) around each token (tokens are natural ones, except that
1050  * '<flags><type>' as a whole is considered a single token, so no blanks are
1051  * allowed between '<flags>' and '<type>').
1052  *
1053  * Examples:
1054  * - "uid=1001>uid=1010,gid=1010;uid=1002>any"
1055  * - "gid=1010>gid=1011,gid=1012,gid=1013"
1056  */
1057 static int
1058 parse_rules(const char *const string, struct rules *const rules,
1059     struct parse_error **const parse_error)
1060 {
1061 	const size_t len = strlen(string);
1062 	char *copy, *p, *rule;
1063 	int error = 0;
1064 
1065 	if (len >= MAX_RULE_STRING_SIZE) {
1066 		make_parse_error(parse_error, 0,
1067 		    "Rule specification string is too long (%zu, max %zu)",
1068 		    len, MAX_RULE_STRING_SIZE - 1);
1069 		return (ENAMETOOLONG);
1070 	}
1071 
1072 	bcopy(string, rules->string, len + 1);
1073 	MPASS(rules->string[len] == '\0'); /* Catch some races. */
1074 
1075 	copy = malloc(len + 1, M_MAC_DO, M_WAITOK);
1076 	bcopy(string, copy, len + 1);
1077 	MPASS(copy[len] == '\0'); /* Catch some races. */
1078 
1079 	p = copy;
1080 	while ((rule = strsep_noblanks(&p, ";")) != NULL) {
1081 		if (rule[0] == '\0')
1082 			continue;
1083 		error = parse_single_rule(rule, rules, parse_error);
1084 		if (error != 0) {
1085 			(*parse_error)->pos += rule - copy;
1086 			toast_rules(rules);
1087 			goto error;
1088 		}
1089 	}
1090 
1091 	MPASS(error == 0 && *parse_error == NULL);
1092 out:
1093 	free(copy, M_MAC_DO);
1094 	return (error);
1095 error:
1096 	MPASS(error != 0 && *parse_error != NULL);
1097 	goto out;
1098 }
1099 
1100 /*
1101  * Similar constraints as parse_rules() (which see).
1102  */
1103 static int
1104 parse_exec_paths(const char *const string, struct exec_paths *const exec_paths,
1105     struct parse_error **const parse_error)
1106 {
1107 	const size_t len = strlen(string);
1108 	char *copy, *p, *path;
1109 	int error = 0;
1110 
1111 	if (len >= MAX_EXEC_PATHS_SIZE) {
1112 		make_parse_error(parse_error, 0,
1113 		    "Exec path specification string is too long (%zu, max %u)",
1114 		    len, MAX_EXEC_PATHS_SIZE - 1);
1115 		return (ENAMETOOLONG);
1116 	}
1117 
1118 	bcopy(string, exec_paths->exec_paths_str, len + 1);
1119 	MPASS(exec_paths->exec_paths_str[len] == '\0');
1120 
1121 	copy = malloc(len + 1, M_MAC_DO, M_WAITOK);
1122 	bcopy(string, copy, len + 1);
1123 	MPASS(copy[len] == '\0');
1124 
1125 	p = copy;
1126 	while ((path = strsep_noblanks(&p, ":")) != NULL) {
1127 		size_t path_len;
1128 
1129 		if (*path == '\0')
1130 			continue;
1131 
1132 		if (exec_paths->exec_path_count >= MAX_EXEC_PATHS) {
1133 			make_parse_error(parse_error, path - copy,
1134 			    "Too many exec paths specified (max %d)",
1135 			    MAX_EXEC_PATHS);
1136 			error = EINVAL;
1137 			goto error;
1138 		}
1139 
1140 		path_len = strlen(path);
1141 		if (path_len >= PATH_MAX) {
1142 			make_parse_error(parse_error, path - copy,
1143 			    "Exec paths too long (%zu, max %u)",
1144 			    path_len, PATH_MAX - 1);
1145 			error = ENAMETOOLONG;
1146 			goto error;
1147 		}
1148 
1149 		strlcpy(exec_paths->exec_paths[exec_paths->exec_path_count],
1150 		    path, PATH_MAX);
1151 		exec_paths->exec_path_count++;
1152 	}
1153 
1154 	MPASS(error == 0 && *parse_error == NULL);
1155 out:
1156 	free(copy, M_MAC_DO);
1157 	return (error);
1158 error:
1159 	MPASS(error != 0 && *parse_error != NULL);
1160 	goto out;
1161 }
1162 
1163 static void
1164 hold_conf(struct conf *const conf)
1165 {
1166 	refcount_acquire(&conf->use_count);
1167 }
1168 
1169 static void
1170 drop_conf(struct conf *const conf)
1171 {
1172 	if (refcount_release(&conf->use_count)) {
1173 		toast_rules(&conf->rules);
1174 		free(conf, M_MAC_DO);
1175 	}
1176 }
1177 
1178 /*
1179  * Find configuration applicable to the passed prison.
1180  *
1181  * Returns the applicable configuration (which always exists), with an
1182  * additional reference that must be freed by the caller.  'pr' must not be
1183  * locked.
1184  *
1185  * The applicable configuration is that of the closest ancestor prison
1186  * (including itself) of the passed prison that actually has a 'struct conf'
1187  * associated to it.
1188  *
1189  * If 'hpr' is not NULL, it is used to return a pointer to the (unlocked) prison
1190  * holding the applicable configuration.
1191  */
1192 static struct conf *
1193 find_conf(struct prison *const pr, struct prison **const hpr)
1194 {
1195 	struct prison *cpr, *ppr;
1196 	struct conf *conf;
1197 
1198 	cpr = pr;
1199 	for (;;) {
1200 		prison_lock(cpr);
1201 		conf = osd_jail_get(cpr, osd_jail_slot);
1202 		if (conf != NULL)
1203 			break;
1204 		prison_unlock(cpr);
1205 
1206 		ppr = cpr->pr_parent;
1207 		/*
1208 		 * 'prison0' normally always have a mac_do(4) configuration
1209 		 * because we installed one on module load/activation and
1210 		 * nothing can destroy it as 'prison0' is not a regular jail and
1211 		 * the 'mac.do' parameter cannot be set to 'inherit' on it,
1212 		 * which is the only way to clear an existing configuration.
1213 		 */
1214 		KASSERT(ppr != NULL,
1215 		    ("MAC/do: 'prison0' must always have a configuration."));
1216 		cpr = ppr;
1217 	}
1218 
1219 	hold_conf(conf);
1220 	prison_unlock(cpr);
1221 
1222 	if (hpr != NULL)
1223 		*hpr = cpr;
1224 	return (conf);
1225 }
1226 
1227 #ifdef INVARIANTS
1228 static void
1229 check_conf_use_count(const struct conf *const conf, u_int expected)
1230 {
1231 	const u_int use_count = refcount_load(&conf->use_count);
1232 
1233 	if (use_count != expected)
1234 		panic("MAC/do: Configuration at %p: Use count is %u, "
1235 		    "expected %u", conf, use_count, expected);
1236 }
1237 #else
1238 #define check_conf_use_count(...)
1239 #endif /* INVARIANTS */
1240 
1241 /*
1242  * OSD destructor for slot 'osd_jail_slot'.
1243  *
1244  * Called with 'value' not NULL.  We have arranged that it is only ever called
1245  * when the corresponding jail goes down or at module unload.
1246  */
1247 static void
1248 dealloc_jail_osd(void *const value)
1249 {
1250 	struct conf *const conf = value;
1251 
1252 	/*
1253 	 * If called because the "holding" jail goes down, no one should be
1254 	 * using the rules but us at this point because no threads of that jail
1255 	 * (or its sub-jails) should currently be executing (in particular,
1256 	 * currently executing setcred()).  The case of module unload is more
1257 	 * complex.  Although the MAC framework takes care that no hook is
1258 	 * called while a module is unloading, the unload could happen between
1259 	 * two calls to MAC hooks in the course of, e.g., executing setcred(),
1260 	 * where the rules' reference count has been bumped to keep them alive
1261 	 * even if the rules on the "holding" jail has been concurrently
1262 	 * changed.  These other references are held in our thread OSD slot, so
1263 	 * we ensure that all thread's slots are freed first in mac_do_destroy()
1264 	 * to be able to check that only one reference remains.
1265 	 */
1266 	check_conf_use_count(conf, 1);
1267 	drop_conf(conf);
1268 }
1269 
1270 /*
1271  * Remove the rules specifically associated to a prison.
1272  *
1273  * In practice, this means that the rules become inherited (from the closest
1274  * ascendant that has some).
1275  *
1276  * Destroys the 'osd_jail_slot' slot of the passed jail.
1277  */
1278 static void
1279 remove_conf(struct prison *const pr)
1280 {
1281 	struct conf *old_conf;
1282 	int error __unused;
1283 
1284 	prison_lock(pr);
1285 	/*
1286 	 * We burden ourselves with extracting rules first instead of just
1287 	 * letting osd_jail_del() call dealloc_jail_osd() as we want to
1288 	 * decrement their use count, and possibly free them, outside of the
1289 	 * prison lock.
1290 	 */
1291 	old_conf = osd_jail_get(pr, osd_jail_slot);
1292 	error = osd_jail_set(pr, osd_jail_slot, NULL);
1293 	/* osd_set() never allocates memory when 'value' is NULL, nor fails. */
1294 	MPASS(error == 0);
1295 	/*
1296 	 * This completely frees the OSD slot, but doesn't call the destructor
1297 	 * since we've just put NULL in the slot.
1298 	 */
1299 	osd_jail_del(pr, osd_jail_slot);
1300 	prison_unlock(pr);
1301 
1302 	if (old_conf != NULL)
1303 		drop_conf(old_conf);
1304 }
1305 
1306 /*
1307  * Assign an already-built configuration to a jail.
1308  */
1309 static void
1310 set_conf(struct prison *const pr, struct conf *const conf)
1311 {
1312 	struct conf *old_conf;
1313 	void **rsv;
1314 
1315 	hold_conf(conf);
1316 	rsv = osd_reserve(osd_jail_slot);
1317 
1318 	prison_lock(pr);
1319 	old_conf = osd_jail_get(pr, osd_jail_slot);
1320 	osd_jail_set_reserved(pr, osd_jail_slot, rsv, conf);
1321 	prison_unlock(pr);
1322 	if (old_conf != NULL)
1323 		drop_conf(old_conf);
1324 }
1325 
1326 /*
1327  * Assigns the default configuration to a jail.
1328  */
1329 static void
1330 set_default_conf(struct prison *const pr)
1331 {
1332 	struct conf *const conf = alloc_conf();
1333 
1334 	strlcpy(conf->exec_paths.exec_paths_str, "/usr/bin/mdo",
1335 	    MAX_EXEC_PATHS_SIZE);
1336 	strlcpy(conf->exec_paths.exec_paths[0], "/usr/bin/mdo", PATH_MAX);
1337 	conf->exec_paths.exec_path_count = 1;
1338 
1339 	set_conf(pr, conf);
1340 }
1341 
1342 /*
1343  * Parse a rules specification and assign them to a jail.
1344  *
1345  * Returns the same error code as parse_rules() (which see).
1346  */
1347 
1348 static void
1349 clone_rules(struct rules *dst, struct rules *const src)
1350 {
1351 	struct rule *src_rule, *dst_rule;
1352 
1353 	bzero(dst, sizeof(*dst));
1354 	strlcpy(dst->string, src->string, sizeof(dst->string));
1355 	STAILQ_INIT(&dst->head);
1356 
1357 	STAILQ_FOREACH(src_rule, &src->head, r_entries) {
1358 		dst_rule = malloc(sizeof(*dst_rule), M_MAC_DO, M_WAITOK |
1359 		    M_ZERO);
1360 		bcopy(src_rule, dst_rule, sizeof(*dst_rule));
1361 
1362 		if (src_rule->uids_nb > 0) {
1363 			dst_rule->uids = malloc(sizeof(*dst_rule->uids) *
1364 			    src_rule->uids_nb, M_MAC_DO, M_WAITOK);
1365 			bcopy(src_rule->uids, dst_rule->uids,
1366 			    sizeof(*dst_rule->uids) * src_rule->uids_nb);
1367 		}
1368 
1369 		if (src_rule->gids_nb > 0) {
1370 			dst_rule->gids = malloc(sizeof(*dst_rule->gids) *
1371 			    src_rule->gids_nb, M_MAC_DO, M_WAITOK);
1372 			bcopy(src_rule->gids, dst_rule->gids,
1373 			    sizeof(*dst_rule->gids) * src_rule->gids_nb);
1374 		}
1375 
1376 		STAILQ_INSERT_TAIL(&dst->head, dst_rule, r_entries);
1377 	}
1378 }
1379 
1380 static void
1381 clone_exec_paths(struct exec_paths *dst, struct exec_paths *const src)
1382 {
1383 	bzero(dst, sizeof(*dst));
1384 	dst->exec_path_count = src->exec_path_count;
1385 	for (int i = 0; i < src->exec_path_count; i++)
1386 		strlcpy(dst->exec_paths[i], src->exec_paths[i],
1387 		    sizeof(dst->exec_paths[i]));
1388 
1389 	strlcpy(dst->exec_paths_str, src->exec_paths_str,
1390 	    sizeof(dst->exec_paths_str));
1391 }
1392 
1393 /* Must be called with '*parse_error' set to NULL. */
1394 static int
1395 parse_and_set_conf(struct prison *pr, const char *rules_string,
1396     const char *exec_paths_string, struct parse_error **parse_error)
1397 {
1398 	struct conf *applicable_conf = NULL;
1399 	struct conf *conf;
1400 	int error = 0;
1401 	bool need_applicable_conf;
1402 
1403 	need_applicable_conf = (rules_string == NULL || rules_string[0] == '\0' ||
1404 	    exec_paths_string == NULL || exec_paths_string[0] == '\0');
1405 
1406 	if (need_applicable_conf)
1407 		applicable_conf = find_conf(pr, NULL);
1408 
1409 	conf = alloc_conf();
1410 
1411 	if (rules_string != NULL && rules_string[0] != '\0') {
1412 		error = parse_rules(rules_string, &conf->rules, parse_error);
1413 		if (error != 0)
1414 			goto error;
1415 	}
1416 	else if (applicable_conf != NULL)
1417 		clone_rules(&conf->rules, &applicable_conf->rules);
1418 
1419 	if (exec_paths_string != NULL && exec_paths_string[0] != '\0') {
1420 		error = parse_exec_paths(exec_paths_string, &conf->exec_paths,
1421 		    parse_error);
1422 		if (error != 0)
1423 			goto error;
1424 	} else if (applicable_conf != NULL)
1425 		clone_exec_paths(&conf->exec_paths,
1426 		    &applicable_conf->exec_paths);
1427 
1428 	set_conf(pr, conf);
1429 
1430 	MPASS(error == 0 && *parse_error == NULL);
1431 out:
1432 	if (applicable_conf != NULL)
1433 		drop_conf(applicable_conf);
1434 	return (error);
1435 error:
1436 	MPASS(error != 0 && *parse_error != NULL);
1437 	drop_conf(conf);
1438 	goto out;
1439 }
1440 
1441 static int
1442 mac_do_sysctl_rules(SYSCTL_HANDLER_ARGS)
1443 {
1444 	char *const buf = malloc(MAX_RULE_STRING_SIZE, M_MAC_DO, M_WAITOK);
1445 	struct prison *const pr = req->td->td_ucred->cr_prison;
1446 	struct conf *conf;
1447 	struct parse_error *parse_error = NULL;
1448 	int error;
1449 
1450 	conf = find_conf(pr, NULL);
1451 	strlcpy(buf, conf->rules.string, MAX_RULE_STRING_SIZE);
1452 
1453 	error = sysctl_handle_string(oidp, buf, MAX_RULE_STRING_SIZE, req);
1454 	if (error != 0 || req->newptr == NULL)
1455 		goto out;
1456 
1457 	/* Set our prison's rules, not that of the jail we inherited from. */
1458 	error = parse_and_set_conf(pr, buf, NULL, &parse_error);
1459 	if (error != 0) {
1460 		if (print_parse_error)
1461 			printf("MAC/do: Parse error at index %zu: %s\n",
1462 			    parse_error->pos, parse_error->msg);
1463 		free_parse_error(parse_error);
1464 	}
1465 
1466 out:
1467 	drop_conf(conf);
1468 	free(buf, M_MAC_DO);
1469 	return (error);
1470 }
1471 
1472 SYSCTL_PROC(_security_mac_do, OID_AUTO, rules,
1473     CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_PRISON|CTLFLAG_MPSAFE,
1474     0, 0, mac_do_sysctl_rules, "A",
1475     "Rules");
1476 
1477 
1478 SYSCTL_JAIL_PARAM_SYS_SUBNODE(mac, do, CTLFLAG_RW, "Jail MAC/do parameters");
1479 SYSCTL_JAIL_PARAM_STRING(_mac_do, rules, CTLFLAG_RW, MAX_RULE_STRING_SIZE,
1480     "Jail MAC/do rules");
1481 
1482 static int
1483 mac_do_sysctl_exec_paths(SYSCTL_HANDLER_ARGS)
1484 {
1485 	char *const buf = malloc(MAX_EXEC_PATHS_SIZE, M_MAC_DO, M_WAITOK);
1486 	struct prison *const pr = req->td->td_ucred->cr_prison;
1487 	struct conf *conf;
1488 	struct parse_error *parse_error = NULL;
1489 	int error;
1490 
1491 	conf = find_conf(pr, NULL);
1492 	strlcpy(buf, conf->exec_paths.exec_paths_str, MAX_EXEC_PATHS_SIZE);
1493 
1494 	error = sysctl_handle_string(oidp, buf, MAX_EXEC_PATHS_SIZE, req);
1495 	if (error != 0 || req->newptr == NULL)
1496 		goto out;
1497 
1498 	error = parse_and_set_conf(pr, NULL, buf, &parse_error);
1499 	if (error != 0) {
1500 		if (print_parse_error)
1501 			printf("MAC/do: Parse error at index %zu: %s\n",
1502 			    parse_error->pos, parse_error->msg);
1503 		free_parse_error(parse_error);
1504 	}
1505 
1506 out:
1507 	drop_conf(conf);
1508 	free(buf, M_MAC_DO);
1509 	return (error);
1510 }
1511 
1512 SYSCTL_PROC(_security_mac_do, OID_AUTO, exec_paths,
1513     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1514     0, 0, mac_do_sysctl_exec_paths, "A",
1515     "Colon-separated list of allowed executables");
1516 
1517 SYSCTL_JAIL_PARAM_STRING(_mac_do, exec_paths, CTLFLAG_RW, MAX_EXEC_PATHS_SIZE,
1518     "Jail MAC/do executable paths");
1519 
1520 static int
1521 mac_do_jail_create(void *obj, void *data)
1522 {
1523 	struct prison *const pr = obj;
1524 
1525 	set_default_conf(pr);
1526 
1527 	return (0);
1528 }
1529 
1530 static int
1531 mac_do_jail_get(void *obj, void *data)
1532 {
1533 	struct prison *const pr = obj;
1534 	struct vfsoptlist *const opts = data;
1535 	struct prison *hpr_out;
1536 	struct conf *const applicable_conf = find_conf(pr, &hpr_out);
1537 	const struct prison *const hpr = hpr_out;
1538 	const struct rules *const rules = &applicable_conf->rules;
1539 	const struct exec_paths *const exec_paths = &applicable_conf->exec_paths;
1540 	int jsys, error;
1541 
1542 	jsys = hpr == pr ?
1543 	    (STAILQ_EMPTY(&rules->head) ? JAIL_SYS_DISABLE : JAIL_SYS_NEW) :
1544 	    JAIL_SYS_INHERIT;
1545 
1546 	error = vfs_setopt(opts, "mac.do", &jsys, sizeof(jsys));
1547 	if (error != 0 && error != ENOENT)
1548 		goto done;
1549 
1550 	error = vfs_setopts(opts, "mac.do.rules", rules->string);
1551 	if (error != 0 && error != ENOENT)
1552 		goto done;
1553 
1554 	error = vfs_setopts(opts, "mac.do.exec_paths",
1555 	    exec_paths->exec_paths_str);
1556 	if (error != 0 && error != ENOENT)
1557 		goto done;
1558 
1559 	error = 0;
1560 done:
1561 	drop_conf(applicable_conf);
1562 	return (error);
1563 }
1564 
1565 /*
1566  * -1 is used as a sentinel in mac_do_jail_check() and mac_do_jail_set() below.
1567  */
1568 _Static_assert(-1 != JAIL_SYS_DISABLE && -1 != JAIL_SYS_NEW &&
1569     -1 != JAIL_SYS_INHERIT,
1570     "mac_do(4) uses -1 as a sentinel for uninitialized 'jsys'.");
1571 
1572 /*
1573  * We perform only cheap checks here, i.e., we do not really parse the rules
1574  * specification string, if any.
1575  */
1576 static int
1577 mac_do_jail_check(void *obj, void *data)
1578 {
1579 	struct vfsoptlist *opts = data;
1580 	char *rules_string, *exec_paths_string;
1581 	int error, jsys, rules_size = 0, exec_paths_size = 0;
1582 	bool has_rules, has_exec_paths;
1583 
1584 	error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys));
1585 	if (error == ENOENT)
1586 		/*
1587 		 * Mark unspecified.  Will fill it up below depending on the
1588 		 * other options.
1589 		 */
1590 		jsys = -1;
1591 	else {
1592 		if (error != 0)
1593 			return (error);
1594 		if (jsys != JAIL_SYS_DISABLE && jsys != JAIL_SYS_NEW &&
1595 		    jsys != JAIL_SYS_INHERIT)
1596 			return (EINVAL);
1597 	}
1598 
1599 	/*
1600 	 * We use vfs_getopt() below instead of vfs_getopts() to get the
1601 	 * string's buffer size.  We perform the additional checks done by the
1602 	 * latter here, even if jail_set() calls vfs_getopts() itself later
1603 	 * (they becoming inconsistent wouldn't cause any security problem).
1604 	 */
1605 
1606 	/* Rules. */
1607 	error = vfs_getopt(opts, "mac.do.rules", (void **)&rules_string,
1608 	    &rules_size);
1609 	if (error == ENOENT)
1610 		rules_string = NULL;
1611 	else {
1612 		if (error != 0)
1613 			return (error);
1614 		if (rules_size == 0 || rules_string[rules_size - 1] != '\0') {
1615 			vfs_opterror(opts,
1616 			    "'mac.do.rules' not a proper string");
1617 			return (EINVAL);
1618 		}
1619 		if (rules_size > MAX_RULE_STRING_SIZE) {
1620 			vfs_opterror(opts, "'mac.do.rules' too long");
1621 			return (ENAMETOOLONG);
1622 		}
1623 	}
1624 
1625 	/* Executable paths. */
1626 	error = vfs_getopt(opts, "mac.do.exec_paths",
1627 	    (void **)&exec_paths_string, &exec_paths_size);
1628 	if (error == ENOENT)
1629 		exec_paths_string = NULL;
1630 	else {
1631 		if (error != 0)
1632 			return (error);
1633 		if (exec_paths_size == 0 ||
1634 		    exec_paths_string[exec_paths_size - 1] != '\0') {
1635 			vfs_opterror(opts,
1636 			    "'mac.do.exec_paths' not a proper string");
1637 			return (EINVAL);
1638 		}
1639 		if (exec_paths_size > MAX_EXEC_PATHS_SIZE) {
1640 			vfs_opterror(opts, "'mac.do.exec_paths' too long");
1641 			return (ENAMETOOLONG);
1642 		}
1643 	}
1644 
1645 	/*
1646 	 * Be liberal, considering that an empty rule or execution paths
1647 	 * specification is equivalent to no specification.  This affects the
1648 	 * JAIL_SYS_DISABLE and JAIL_SYS_INHERIT sanity checks below.
1649 	 */
1650 	has_rules = rules_string != NULL && rules_string[0] != '\0';
1651 	has_exec_paths = exec_paths_string != NULL &&
1652 	    exec_paths_string[0] != '\0';
1653 
1654 	/* If not specified, infer 'jsys' from passed options. */
1655 	if (jsys == -1) {
1656 		/*
1657 		 * Default in absence of "mac.do.rules" and "mac.do.exec_paths"
1658 		 * is to disable (and, in particular, not inherit).
1659 		 */
1660 		if (has_rules || has_exec_paths)
1661 			jsys = JAIL_SYS_NEW;
1662 		else
1663 			jsys = JAIL_SYS_DISABLE;
1664 	}
1665 
1666 	/* Final checks based on resolved 'jsys'. */
1667 	switch (jsys) {
1668 	case JAIL_SYS_DISABLE:
1669 	case JAIL_SYS_INHERIT:
1670 		if (has_rules) {
1671 			vfs_opterror(opts,
1672 			    "'mac.do.rules' specified but should not be when "
1673 			    "'mac.do' is 'disabled' or 'inherited'");
1674 			return (EINVAL);
1675 		}
1676 		if (has_exec_paths) {
1677 			vfs_opterror(opts,
1678 			    "'mac.do.exec_paths' specified but should not be "
1679 			    "when 'mac.do' is 'disabled' or 'inherited'");
1680 			return (EINVAL);
1681 		}
1682 		break;
1683 
1684 	case JAIL_SYS_NEW:
1685 		if (!has_rules && !has_exec_paths) {
1686 			vfs_opterror(opts, "'mac.do' set to 'new' but neither "
1687 			    "rules nor executable paths specified");
1688 			return (EINVAL);
1689 		}
1690 		break;
1691 
1692 	default:
1693 		__assert_unreachable();
1694 	}
1695 
1696 	return (0);
1697 }
1698 
1699 static int
1700 mac_do_jail_set(void *obj, void *data)
1701 {
1702 	struct prison *pr = obj;
1703 	struct vfsoptlist *opts = data;
1704 	char *rules_string, *exec_paths_string;
1705 	struct parse_error *parse_error = NULL;
1706 	int error, jsys;
1707 	bool has_rules, has_exec_paths;
1708 
1709 	/*
1710 	 * The invariants checks used below correspond to what has already been
1711 	 * checked in jail_check() above.
1712 	 */
1713 
1714 	error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys));
1715 	MPASS(error == 0 || error == ENOENT);
1716 	if (error != 0)
1717 		jsys = -1; /* Mark unfilled. */
1718 
1719 	rules_string = vfs_getopts(opts, "mac.do.rules", &error);
1720 	MPASS(error == 0 || error == ENOENT);
1721 	exec_paths_string = vfs_getopts(opts, "mac.do.exec_paths", &error);
1722 	MPASS(error == 0 || error == ENOENT);
1723 
1724 	has_rules = rules_string != NULL && rules_string[0] != '\0';
1725 	has_exec_paths = exec_paths_string != NULL &&
1726 	    exec_paths_string[0] != '\0';
1727 
1728 	if (jsys == -1) {
1729 		if (has_rules || has_exec_paths)
1730 			jsys = JAIL_SYS_NEW;
1731 		else
1732 			jsys = JAIL_SYS_DISABLE;
1733 	}
1734 
1735 	switch (jsys) {
1736 	case JAIL_SYS_INHERIT:
1737 		remove_conf(pr);
1738 		return (0);
1739 
1740 	case JAIL_SYS_DISABLE:
1741 		rules_string = "";
1742 		has_rules = true;
1743 		/* FALLTHROUGH */
1744 
1745 	case JAIL_SYS_NEW:
1746 		error = parse_and_set_conf(pr,
1747 		    has_rules ? rules_string : NULL,
1748 		    has_exec_paths ? exec_paths_string : NULL,
1749 		    &parse_error);
1750 
1751 		if (error != 0) {
1752 			vfs_opterror(opts,
1753 			    "MAC/do: Parse error at index %zu: %s\n",
1754 			    parse_error->pos, parse_error->msg);
1755 			free_parse_error(parse_error);
1756 
1757 			return (error);
1758 		}
1759 
1760 		return (0);
1761 
1762 	default:
1763 		__assert_unreachable();
1764 	}
1765 }
1766 
1767 /*
1768  * OSD jail methods.
1769  *
1770  * There is no PR_METHOD_REMOVE, as OSD storage is destroyed by the common jail
1771  * code (see prison_cleanup()), which triggers a run of our dealloc_jail_osd()
1772  * destructor.
1773  */
1774 static const osd_method_t osd_methods[PR_MAXMETHOD] = {
1775 	[PR_METHOD_CREATE] = mac_do_jail_create,
1776 	[PR_METHOD_GET] = mac_do_jail_get,
1777 	[PR_METHOD_CHECK] = mac_do_jail_check,
1778 	[PR_METHOD_SET] = mac_do_jail_set,
1779 };
1780 
1781 
1782 /*
1783  * Common header structure.
1784  *
1785  * Each structure that is used to pass information between some MAC check
1786  * function and priv_grant() must start with this header.
1787  */
1788 struct mac_do_data_header {
1789 	/* Size of the allocated buffer holding the containing structure. */
1790 	size_t		 allocated_size;
1791 	/* Full size of the containing structure. */
1792 	size_t		 size;
1793 	/*
1794 	 * For convenience, we use privilege numbers as an identifier for the
1795 	 * containing structure's type, since there is one distinct privilege
1796 	 * for each privilege changing function we are supporting.  0 in 'priv'
1797 	 * indicates this header is uninitialized.
1798 	 */
1799 	int		 priv;
1800 	/* The configuration that applies. */
1801 	struct conf	*conf;
1802 };
1803 
1804 /*
1805  * The case of unusable or absent per-thread data can actually happen as nothing
1806  * prevents, e.g., priv_check*() with privilege 'priv' to be called standalone,
1807  * as it is currently by, e.g., the Linux emulator for PRIV_CRED_SETUID.  We
1808  * interpret such calls to priv_check*() as full, unrestricted requests for
1809  * 'priv', contrary to what we're doing here for selected operations, and
1810  * consequently will not grant the requested privilege.
1811  *
1812  * Also, we protect ourselves from a concurrent change of 'do_enabled' while
1813  * a call to setcred() is in progress by storing the rules per-thread
1814  * which is then consulted by each successive hook so that they all have
1815  * a coherent view of the specifications, and we empty the slot (actually, mark
1816  * it as empty) when MAC/do is disabled.
1817  */
1818 static int
1819 check_data_usable(const void *const data, const size_t size, const int priv)
1820 {
1821 	const struct mac_do_data_header *const hdr = data;
1822 
1823 	if (hdr == NULL || hdr->priv == 0)
1824 		return (ENOENT);
1825 	/*
1826 	 * Impacting changes in the protocols we are based on...  Don't crash in
1827 	 * production.
1828 	 */
1829 	if (hdr->priv != priv) {
1830 		MPASS(hdr->priv == priv);
1831 		return (EBUSY);
1832 	}
1833 	MPASS(hdr->size == size);
1834 	MPASS(hdr->size <= hdr->allocated_size);
1835 	return (0);
1836 }
1837 
1838 static void
1839 clear_data(void *const data)
1840 {
1841 	struct mac_do_data_header *const hdr = data;
1842 
1843 	if (hdr != NULL) {
1844 		drop_conf(hdr->conf);
1845 		/* We don't deallocate so as to save time on next access. */
1846 		hdr->priv = 0;
1847 	}
1848 }
1849 
1850 static void *
1851 fetch_data(void)
1852 {
1853 	return (osd_thread_get_unlocked(curthread, osd_thread_slot));
1854 }
1855 
1856 static bool
1857 is_data_reusable(const void *const data, const size_t size)
1858 {
1859 	const struct mac_do_data_header *const hdr = data;
1860 
1861 	return (hdr != NULL && size <= hdr->allocated_size);
1862 }
1863 
1864 static void
1865 set_data_header(void *const data, const size_t size, const int priv,
1866     struct conf *const conf)
1867 {
1868 	struct mac_do_data_header *const hdr = data;
1869 
1870 	MPASS(hdr->priv == 0);
1871 	MPASS(priv != 0);
1872 	MPASS(size <= hdr->allocated_size);
1873 	hdr->size = size;
1874 	hdr->priv = priv;
1875 	hdr->conf = conf;
1876 }
1877 
1878 /* The proc lock (and any other non-sleepable lock) must not be held. */
1879 static void *
1880 alloc_data(void *const data, const size_t size)
1881 {
1882 	struct mac_do_data_header *const hdr = realloc(data, size, M_MAC_DO,
1883 	    M_WAITOK);
1884 
1885 	MPASS(size >= sizeof(struct mac_do_data_header));
1886 	hdr->allocated_size = size;
1887 	hdr->priv = 0;
1888 	if (hdr != data) {
1889 		/*
1890 		 * This call either reuses the existing memory allocated for the
1891 		 * slot or tries to allocate some without blocking.
1892 		 */
1893 		int error = osd_thread_set(curthread, osd_thread_slot, hdr);
1894 
1895 		if (error != 0) {
1896 			/* Going to make a M_WAITOK allocation. */
1897 			void **const rsv = osd_reserve(osd_thread_slot);
1898 
1899 			error = osd_thread_set_reserved(curthread,
1900 			    osd_thread_slot, rsv, hdr);
1901 			MPASS(error == 0);
1902 		}
1903 	}
1904 	return (hdr);
1905 }
1906 
1907 /* Destructor for 'osd_thread_slot'. */
1908 static void
1909 dealloc_thread_osd(void *const value)
1910 {
1911 	free(value, M_MAC_DO);
1912 }
1913 
1914 /*
1915  * Whether to grant access to some primary group according to flags.
1916  *
1917  * The passed 'flags' must be those of a rule's matching GID, or the IT_GID type
1918  * flags when MDF_CURRENT has been matched.
1919  *
1920  * Return values:
1921  * - 0:			Access granted.
1922  * - EJUSTRETURN:	Flags are agnostic.
1923  */
1924 static int
1925 grant_primary_group_from_flags(const flags_t flags)
1926 {
1927 	return ((flags & MDF_PRIMARY) != 0 ? 0 : EJUSTRETURN);
1928 }
1929 
1930 /*
1931  * Same as grant_primary_group_from_flags(), but for supplementary groups.
1932  *
1933  * Return values:
1934  * - 0:			Access granted.
1935  * - EJUSTRETURN:	Flags are agnostic.
1936  * - EPERM:		Access denied.
1937  */
1938 static int __unused
1939 grant_supplementary_group_from_flags(const flags_t flags)
1940 {
1941 	if ((flags & MDF_SUPP_MASK) != 0)
1942 		return ((flags & MDF_SUPP_DONT) != 0 ? EPERM : 0);
1943 
1944 	return (EJUSTRETURN);
1945 }
1946 
1947 static int
1948 rule_grant_supplementary_groups(const struct rule *const rule,
1949     const struct ucred *const old_cred, const struct ucred *const new_cred)
1950 {
1951 	const gid_t *const old_groups = old_cred->cr_groups;
1952 	const gid_t *const new_groups = new_cred->cr_groups;
1953 	const int old_ngroups = old_cred->cr_ngroups;
1954 	const int new_ngroups = new_cred->cr_ngroups;
1955 	const flags_t gid_flags = rule->gid_flags;
1956 	const bool current_has_supp = (gid_flags & MDF_CURRENT) != 0 &&
1957 	    (gid_flags & MDF_SUPP_MASK) != 0;
1958 	id_nb_t rule_idx = 0;
1959 	int old_idx = 0, new_idx = 0;
1960 
1961 	if ((gid_flags & MDF_ANY_SUPP) != 0 &&
1962 	    (gid_flags & MDF_MAY_REJ_SUPP) == 0)
1963 		/*
1964 		 * Any set of supplementary groups is accepted, no need to loop
1965 		 * over them.
1966 		 */
1967 		return (0);
1968 
1969 	for (; new_idx < new_ngroups; ++new_idx) {
1970 		const gid_t gid = new_groups[new_idx];
1971 		bool may_accept = false;
1972 
1973 		if ((gid_flags & MDF_ANY_SUPP) != 0)
1974 			may_accept = true;
1975 
1976 		/* Do we have to check for the current supplementary groups? */
1977 		if (current_has_supp) {
1978 			/*
1979 			 * Linear search, as both supplementary groups arrays
1980 			 * are sorted.  Advancing 'old_idx' with a binary search
1981 			 * on absence of MDF_SUPP_MUST doesn't seem worth it in
1982 			 * practice.
1983 			 */
1984 			for (; old_idx < old_ngroups; ++old_idx) {
1985 				const gid_t old_gid = old_groups[old_idx];
1986 
1987 				if (old_gid < gid) {
1988 					/* Mandatory but absent. */
1989 					if ((gid_flags & MDF_SUPP_MUST) != 0)
1990 						return (EPERM);
1991 				} else if (old_gid == gid) {
1992 					switch (gid_flags & MDF_SUPP_MASK) {
1993 					case MDF_SUPP_DONT:
1994 						/* Present but forbidden. */
1995 						return (EPERM);
1996 					case MDF_SUPP_ALLOW:
1997 					case MDF_SUPP_MUST:
1998 						may_accept = true;
1999 						break;
2000 					default:
2001 #ifdef INVARIANTS
2002 						__assert_unreachable();
2003 #else
2004 						/* Better be safe than sorry. */
2005 						return (EPERM);
2006 #endif
2007 					}
2008 					++old_idx;
2009 					break;
2010 				}
2011 				else
2012 					break;
2013 			}
2014 		}
2015 
2016 		/*
2017 		 * Search by GID for a corresponding 'struct id_spec'.
2018 		 *
2019 		 * Again, linear search, with same note on not using binary
2020 		 * search optimization as above (the trigger would be absence of
2021 		 * MDF_EXPLICIT_SUPP_MUST this time).
2022 		 */
2023 		for (; rule_idx < rule->gids_nb; ++rule_idx) {
2024 			const struct id_spec is = rule->gids[rule_idx];
2025 
2026 			if (is.id < gid) {
2027 				/* Mandatory but absent. */
2028 				if ((is.flags & MDF_SUPP_MUST) != 0)
2029 					return (EPERM);
2030 			} else if (is.id == gid) {
2031 				switch (is.flags & MDF_SUPP_MASK) {
2032 				case MDF_SUPP_DONT:
2033 					/* Present but forbidden. */
2034 					return (EPERM);
2035 				case MDF_SUPP_ALLOW:
2036 				case MDF_SUPP_MUST:
2037 					may_accept = true;
2038 					break;
2039 				case 0:
2040 					/* Primary group only. */
2041 					break;
2042 				default:
2043 #ifdef INVARIANTS
2044 					__assert_unreachable();
2045 #else
2046 					/* Better be safe than sorry. */
2047 					return (EPERM);
2048 #endif
2049 				}
2050 				++rule_idx;
2051 				break;
2052 			}
2053 			else
2054 				break;
2055 		}
2056 
2057 		/* 'gid' wasn't explicitly accepted. */
2058 		if (!may_accept)
2059 			return (EPERM);
2060 	}
2061 
2062 	/*
2063 	 * If we must have all current groups and we didn't browse all
2064 	 * of them at this point (because the remaining ones have GIDs
2065 	 * greater than the last requested group), we are simply missing
2066 	 * them.
2067 	 */
2068 	if ((gid_flags & MDF_CURRENT) != 0 &&
2069 	    (gid_flags & MDF_SUPP_MUST) != 0 &&
2070 	    old_idx < old_ngroups)
2071 		return (EPERM);
2072 	/*
2073 	 * Similarly, we have to finish browsing all GIDs from the rule
2074 	 * in case some are marked mandatory.
2075 	 */
2076 	if ((gid_flags & MDF_EXPLICIT_SUPP_MUST) != 0) {
2077 		for (; rule_idx < rule->gids_nb; ++rule_idx) {
2078 			const struct id_spec is = rule->gids[rule_idx];
2079 
2080 			if ((is.flags & MDF_SUPP_MUST) != 0)
2081 				return (EPERM);
2082 		}
2083 	}
2084 
2085 	return (0);
2086 }
2087 
2088 static int
2089 rule_grant_primary_group(const struct rule *const rule,
2090     const struct ucred *const old_cred, const gid_t gid)
2091 {
2092 	struct id_spec gid_is = {.flags = 0};
2093 	const struct id_spec *found_is;
2094 	int error;
2095 
2096 	if ((rule->gid_flags & MDF_ANY) != 0)
2097 		return (0);
2098 
2099 	/* Was MDF_CURRENT specified, and is 'gid' a current GID? */
2100 	if ((rule->gid_flags & MDF_CURRENT) != 0 &&
2101 	    group_is_primary(gid, old_cred)) {
2102 		error = grant_primary_group_from_flags(rule->gid_flags);
2103 		if (error == 0)
2104 			return (0);
2105 	}
2106 
2107 	/* Search by GID for a corresponding 'struct id_spec'. */
2108 	gid_is.id = gid;
2109 	found_is = bsearch(&gid_is, rule->gids, rule->gids_nb,
2110 	    sizeof(*rule->gids), id_spec_cmp);
2111 
2112 	if (found_is != NULL) {
2113 		error = grant_primary_group_from_flags(found_is->flags);
2114 		if (error == 0)
2115 			return (0);
2116 	}
2117 
2118 	return (EPERM);
2119 }
2120 
2121 static int
2122 rule_grant_primary_groups(const struct rule *const rule,
2123     const struct ucred *const old_cred, const struct ucred *const new_cred)
2124 {
2125 	int error;
2126 
2127 	/* Shortcut. */
2128 	if ((rule->gid_flags & MDF_ANY) != 0)
2129 		return (0);
2130 
2131 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_gid);
2132 	if (error != 0)
2133 		return (error);
2134 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_rgid);
2135 	if (error != 0)
2136 		return (error);
2137 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_svgid);
2138 	if (error != 0)
2139 		return (error);
2140 	return (0);
2141 }
2142 
2143 static bool
2144 user_is_current(const uid_t uid, const struct ucred *const old_cred)
2145 {
2146 	return (uid == old_cred->cr_uid || uid == old_cred->cr_ruid ||
2147 	    uid == old_cred->cr_svuid);
2148 }
2149 
2150 static int
2151 rule_grant_user(const struct rule *const rule,
2152     const struct ucred *const old_cred, const uid_t uid)
2153 {
2154 	struct id_spec uid_is = {.flags = 0};
2155 	const struct id_spec *found_is;
2156 
2157 	if ((rule->uid_flags & MDF_ANY) != 0)
2158 		return (0);
2159 
2160 	/* Was MDF_CURRENT specified, and is 'uid' a current UID? */
2161 	if ((rule->uid_flags & MDF_CURRENT) != 0 &&
2162 	    user_is_current(uid, old_cred))
2163 		return (0);
2164 
2165 	/* Search by UID for a corresponding 'struct id_spec'. */
2166 	uid_is.id = uid;
2167 	found_is = bsearch(&uid_is, rule->uids, rule->uids_nb,
2168 	    sizeof(*rule->uids), id_spec_cmp);
2169 
2170 	if (found_is != NULL)
2171 		return (0);
2172 
2173 	return (EPERM);
2174 }
2175 
2176 static int
2177 rule_grant_users(const struct rule *const rule,
2178     const struct ucred *const old_cred, const struct ucred *const new_cred)
2179 {
2180 	int error;
2181 
2182 	/* Shortcut. */
2183 	if ((rule->uid_flags & MDF_ANY) != 0)
2184 		return (0);
2185 
2186 	error = rule_grant_user(rule, old_cred, new_cred->cr_uid);
2187 	if (error != 0)
2188 		return (error);
2189 	error = rule_grant_user(rule, old_cred, new_cred->cr_ruid);
2190 	if (error != 0)
2191 		return (error);
2192 	error = rule_grant_user(rule, old_cred, new_cred->cr_svuid);
2193 	if (error != 0)
2194 		return (error);
2195 
2196 	return (0);
2197 }
2198 
2199 static int
2200 rule_grant_setcred(const struct rule *const rule,
2201     const struct ucred *const old_cred, const struct ucred *const new_cred)
2202 {
2203 	int error;
2204 
2205 	error = rule_grant_users(rule, old_cred, new_cred);
2206 	if (error != 0)
2207 		return (error);
2208 	error = rule_grant_primary_groups(rule, old_cred, new_cred);
2209 	if (error != 0)
2210 		return (error);
2211 	error = rule_grant_supplementary_groups(rule, old_cred, new_cred);
2212 	if (error != 0)
2213 		return (error);
2214 
2215 	return (0);
2216 }
2217 
2218 static bool
2219 rule_applies(const struct rule *const rule, const struct ucred *const cred)
2220 {
2221 	if (rule->from_type == IT_UID && rule->from_id == cred->cr_ruid)
2222 		return (true);
2223 	if (rule->from_type == IT_GID && realgroupmember(rule->from_id, cred))
2224 		return (true);
2225 	return (false);
2226 }
2227 
2228 /*
2229  * To pass data between check_setcred() and priv_grant() (on PRIV_CRED_SETCRED).
2230  */
2231 struct mac_do_setcred_data {
2232 	struct mac_do_data_header hdr;
2233 	const struct ucred *new_cred;
2234 	u_int setcred_flags;
2235 };
2236 
2237 static int
2238 mac_do_priv_grant(struct ucred *cred, int priv)
2239 {
2240 	struct mac_do_setcred_data *const data = fetch_data();
2241 	struct rules *rules;
2242 	const struct ucred *new_cred;
2243 	const struct rule *rule;
2244 	u_int setcred_flags;
2245 	int error;
2246 
2247 	/* Bail out fast if we aren't concerned. */
2248 	if (priv != PRIV_CRED_SETCRED)
2249 		return (EPERM);
2250 
2251 	/*
2252 	 * Do we have to do something?
2253 	 */
2254 	if (check_data_usable(data, sizeof(*data), priv) != 0)
2255 		/* No. */
2256 		return (EPERM);
2257 
2258 	rules = &data->hdr.conf->rules;
2259 	new_cred = data->new_cred;
2260 	KASSERT(new_cred != NULL,
2261 	    ("priv_check*() called before mac_cred_check_setcred()"));
2262 	setcred_flags = data->setcred_flags;
2263 
2264 	/*
2265 	 * Explicitly check that only the flags we currently support are present
2266 	 * in order to avoid accepting transitions with other changes than those
2267 	 * we are actually going to check.  Currently, this rules out the
2268 	 * SETCREDF_MAC_LABEL flag.  This may be improved by adding code
2269 	 * actually checking whether the requested label and the current one
2270 	 * would differ.
2271 	 */
2272 	if ((setcred_flags & ~(SETCREDF_UID | SETCREDF_RUID | SETCREDF_SVUID |
2273 	    SETCREDF_GID | SETCREDF_RGID | SETCREDF_SVGID |
2274 	    SETCREDF_SUPP_GROUPS)) != 0)
2275 		return (EPERM);
2276 
2277 	/*
2278 	 * Browse rules, and for those that match the requestor, call specific
2279 	 * privilege granting functions interpreting the "to"/"target" part.
2280 	 */
2281 	error = EPERM;
2282 	STAILQ_FOREACH(rule, &rules->head, r_entries)
2283 	    if (rule_applies(rule, cred)) {
2284 		    error = rule_grant_setcred(rule, cred, new_cred);
2285 		    if (error != EPERM)
2286 			    break;
2287 	    }
2288 
2289 	return (error);
2290 }
2291 
2292 static int
2293 check_proc(void)
2294 {
2295 	struct prison *const pr = curproc->p_ucred->cr_prison;
2296 	char *path, *to_free;
2297 	struct conf *conf;
2298 	struct exec_paths *exec_paths;
2299 	int error;
2300 
2301 	/*
2302 	 * Only grant privileges if requested by the right executable.
2303 	 *
2304 	 * As MAC/do configuration is per-jail, in order to avoid confused
2305 	 * deputy situations in chroots (privileged or unprivileged), make sure
2306 	 * to check the path from the current jail's root.
2307 	 *
2308 	 * XXXOC: We may want to base this check on a tunable path and/or
2309 	 * a specific MAC label.  Going even further, e.g., envisioning to
2310 	 * completely replace the path check with the latter, we would need to
2311 	 * install FreeBSD on a FS with multilabel enabled by default, which in
2312 	 * practice entails adding an option to ZFS to set MNT_MULTILABEL
2313 	 * automatically on mounts, ensuring that root (and more if using
2314 	 * different partitions) ZFS or UFS filesystems are created with
2315 	 * multilabel turned on, and having the installation procedure support
2316 	 * setting a MAC label per file (perhaps via additions to mtree(1)).  So
2317 	 * this probably isn't going to happen overnight, if ever.
2318 	 */
2319 	if (vn_fullpath_jail(curproc->p_textvp, &path, &to_free) != 0)
2320 		return (EPERM);
2321 
2322 	error = EPERM;
2323 	conf = find_conf(pr, NULL);
2324 	exec_paths = &conf->exec_paths;
2325 
2326 	for (int i = 0; i < exec_paths->exec_path_count; i++)
2327 		if (strcmp(exec_paths->exec_paths[i], path) == 0) {
2328 			error = 0;
2329 			break;
2330 		}
2331 
2332 	drop_conf(conf);
2333 	free(to_free, M_TEMP);
2334 	return (error);
2335 }
2336 
2337 static void
2338 mac_do_setcred_enter(void)
2339 {
2340 	struct prison *const pr = curproc->p_ucred->cr_prison;
2341 	struct mac_do_setcred_data * data;
2342 	struct conf *conf;
2343 	int error;
2344 
2345 	/*
2346 	 * If not enabled, don't prepare data.  Other hooks will check for that
2347 	 * to know if they have to do something.
2348 	 */
2349 	if (do_enabled == 0)
2350 		return;
2351 
2352 	/*
2353 	 * MAC/do only applies to a process launched from a given executable.
2354 	 * For other processes, we just won't intervene (we don't deny requests,
2355 	 * nor do we grant privileges to them).
2356 	 */
2357 	error = check_proc();
2358 	if (error != 0)
2359 		return;
2360 
2361 	/*
2362 	 * Find the currently applicable rules.
2363 	 */
2364 	conf = find_conf(pr, NULL);
2365 
2366 	/*
2367 	 * Setup thread data to be used by other hooks.
2368 	 */
2369 	data = fetch_data();
2370 	if (!is_data_reusable(data, sizeof(*data)))
2371 		data = alloc_data(data, sizeof(*data));
2372 	set_data_header(data, sizeof(*data), PRIV_CRED_SETCRED, conf);
2373 	/* Not really necessary, but helps to catch programming errors. */
2374 	data->new_cred = NULL;
2375 	data->setcred_flags = 0;
2376 }
2377 
2378 static int
2379 mac_do_check_setcred(u_int flags, const struct ucred *const old_cred,
2380     struct ucred *const new_cred)
2381 {
2382 	struct mac_do_setcred_data *const data = fetch_data();
2383 
2384 	/*
2385 	 * Do we have to do something?
2386 	 */
2387 	if (check_data_usable(data, sizeof(*data), PRIV_CRED_SETCRED) != 0)
2388 		/* No. */
2389 		return (0);
2390 
2391 	/*
2392 	 * Keep track of the setcred() flags and the new credentials for
2393 	 * priv_check*().
2394 	 */
2395 	data->new_cred = new_cred;
2396 	data->setcred_flags = flags;
2397 
2398 	return (0);
2399 }
2400 
2401 static void
2402 mac_do_setcred_exit(void)
2403 {
2404 	struct mac_do_setcred_data *const data = fetch_data();
2405 
2406 	if (check_data_usable(data, sizeof(*data), PRIV_CRED_SETCRED) == 0)
2407 		/*
2408 		 * This doesn't deallocate the small per-thread data storage,
2409 		 * which can be reused on subsequent calls.  (That data is of
2410 		 * course deallocated as the current thread dies or this module
2411 		 * is unloaded.)
2412 		 */
2413 		clear_data(data);
2414 }
2415 
2416 static void
2417 mac_do_init(struct mac_policy_conf *mpc)
2418 {
2419 	struct prison *pr;
2420 
2421 	osd_jail_slot = osd_jail_register(dealloc_jail_osd, osd_methods);
2422 	set_default_conf(&prison0);
2423 	sx_slock(&allprison_lock);
2424 	TAILQ_FOREACH(pr, &allprison, pr_list)
2425 	    set_default_conf(pr);
2426 	sx_sunlock(&allprison_lock);
2427 
2428 	osd_thread_slot = osd_thread_register(dealloc_thread_osd);
2429 }
2430 
2431 static void
2432 mac_do_destroy(struct mac_policy_conf *mpc)
2433 {
2434 	/*
2435 	 * osd_thread_deregister() must be called before osd_jail_deregister(),
2436 	 * for the reason explained in dealloc_jail_osd().
2437 	 */
2438 	osd_thread_deregister(osd_thread_slot);
2439 	osd_jail_deregister(osd_jail_slot);
2440 }
2441 
2442 static struct mac_policy_ops do_ops = {
2443 	.mpo_init = mac_do_init,
2444 	.mpo_destroy = mac_do_destroy,
2445 	.mpo_cred_setcred_enter = mac_do_setcred_enter,
2446 	.mpo_cred_check_setcred = mac_do_check_setcred,
2447 	.mpo_cred_setcred_exit = mac_do_setcred_exit,
2448 	.mpo_priv_grant = mac_do_priv_grant,
2449 };
2450 
2451 MAC_POLICY_SET(&do_ops, mac_do, "MAC/do", MPC_LOADTIME_FLAG_UNLOADOK, NULL);
2452 MODULE_VERSION(mac_do, 1);
2453