xref: /freebsd/sys/security/mac_do/mac_do.c (revision 01e2b0ce1820adf475e372ec72371dffca17a7af)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright(c) 2024 Baptiste Daroussin <bapt@FreeBSD.org>
5  * Copyright (c) 2024 The FreeBSD Foundation
6  * Copyright (c) 2025 Kushagra Srivastava <kushagra1403@gmail.com>
7  *
8  * Portions of this software were developed by Olivier Certner
9  * <olce.freebsd@certner.fr> at Kumacom SARL under sponsorship from the FreeBSD
10  * Foundation.
11  */
12 
13 #include <sys/param.h>
14 #include <sys/systm.h>
15 #include <sys/ctype.h>
16 #include <sys/jail.h>
17 #include <sys/kernel.h>
18 #include <sys/limits.h>
19 #include <sys/lock.h>
20 #include <sys/malloc.h>
21 #include <sys/module.h>
22 #include <sys/mount.h>
23 #include <sys/mutex.h>
24 #include <sys/priv.h>
25 #include <sys/proc.h>
26 #include <sys/refcount.h>
27 #include <sys/socket.h>
28 #include <sys/stdarg.h>
29 #include <sys/sx.h>
30 #include <sys/sysctl.h>
31 #include <sys/ucred.h>
32 #include <sys/vnode.h>
33 
34 #include <security/mac/mac_policy.h>
35 
36 
37 #ifdef INVARIANTS
38 /*
39  * Should typically be moved to libkern (and perhaps libc) at some point, and be
40  * optimized if to be used outside of INVARIANTS.
41  */
42 static bool
43 is_zeroed(const void *const buf, const size_t size)
44 {
45 	const char *const p = buf;
46 
47 	for (size_t i = 0; i < size; ++i)
48 		if (p[i] != 0)
49 			return (false);
50 	return (true);
51 }
52 #endif
53 
54 static SYSCTL_NODE(_security_mac, OID_AUTO, do,
55     CTLFLAG_RW|CTLFLAG_MPSAFE, 0, "mac_do policy controls");
56 
57 static int	do_enabled = 1;
58 SYSCTL_INT(_security_mac_do, OID_AUTO, enabled, CTLFLAG_RWTUN,
59     &do_enabled, 0, "Enforce do policy");
60 
61 static int	print_parse_error = 1;
62 SYSCTL_INT(_security_mac_do, OID_AUTO, print_parse_error, CTLFLAG_RWTUN,
63     &print_parse_error, 0, "Print parse errors on setting rules "
64     "(via sysctl(8)).");
65 
66 static MALLOC_DEFINE(M_MAC_DO, "mac_do", "mac_do(4) security module");
67 
68 #define MAX_RULE_STRING_SIZE	1024
69 _Static_assert(MAX_RULE_STRING_SIZE > 0,
70     "MAX_RULE_STRING_SIZE: No space for the NUL terminator!");
71 
72 #define MAX_EXEC_PATHS_SIZE	2048
73 #define MAX_EXEC_PATHS		8
74 _Static_assert(MAX_EXEC_PATHS_SIZE > 0,
75     "MAX_EXEC_PATHS_SIZE: No space for the NUL terminator!");
76 
77 static unsigned		osd_jail_slot;
78 static unsigned		osd_thread_slot;
79 
80 #define IT_INVALID	0 /* Must stay 0. */
81 #define IT_UID		1
82 #define IT_GID		2
83 #define IT_ANY		3
84 #define IT_LAST		IT_ANY
85 
86 static const char *id_type_to_str[] = {
87 	[IT_INVALID]	= "invalid",
88 	[IT_UID]	= "uid",
89 	[IT_GID]	= "gid",
90 	/* See also parse_id_type(). */
91 	[IT_ANY]	= "*",
92 };
93 
94 #define PARSE_ERROR_SIZE	256
95 
96 /*
97  * All functions having a parse error parameter must return through it a parse
98  * error object if and only if they return an error value (non-zero); else, NULL
99  * must be returned through it.
100  */
101 struct parse_error {
102 	size_t	pos;
103 	char	msg[PARSE_ERROR_SIZE];
104 };
105 
106 /*
107  * We assume that 'uid_t' and 'gid_t' are aliases to 'u_int' in conversions
108  * required for parsing rules specification strings.
109  */
110 _Static_assert(sizeof(uid_t) == sizeof(u_int) && (uid_t)-1 >= 0 &&
111     sizeof(gid_t) == sizeof(u_int) && (gid_t)-1 >= 0,
112     "mac_do(4) assumes that 'uid_t' and 'gid_t' are aliases to 'u_int'");
113 
114 /*
115  * Internal flags.
116  *
117  * They either apply as per-type (t) or per-ID (i) but are conflated because all
118  * per-ID flags are also valid as per-type ones to qualify the "current" (".")
119  * per-type flag.  Also, some of them are in fact exclusive, but we use one-hot
120  * encoding for simplicity.
121  *
122  * There is currently room for "only" 16 bits.  As these flags are purely
123  * internal, they can be renumbered and/or their type changed as needed.
124  *
125  * See also the check_*() functions below.
126  */
127 typedef uint16_t	flags_t;
128 
129 /* (i,gid) Specification concerns primary groups. */
130 #define MDF_PRIMARY	(1u << 0)
131 /* (i,gid) Specification concerns supplementary groups. */
132 #define MDF_SUPP_ALLOW	(1u << 1)
133 /* (i,gid) Group must appear as a supplementary group. */
134 #define MDF_SUPP_MUST	(1u << 2)
135 /* (i,gid) Group must not appear as a supplementary group. */
136 #define MDF_SUPP_DONT	(1u << 3)
137 #define MDF_SUPP_MASK	(MDF_SUPP_ALLOW | MDF_SUPP_MUST | MDF_SUPP_DONT)
138 #define MDF_ID_MASK	(MDF_PRIMARY | MDF_SUPP_MASK)
139 
140 /*
141  * (t) All IDs allowed.
142  *
143  * For GIDs, MDF_ANY only concerns primary groups.  The MDF_PRIMARY and
144  * MDF_SUPP_* flags never apply to MDF_ANY, but can be present if MDF_CURRENT is
145  * present also, as usual.
146  */
147 #define MDF_ANY			(1u << 8)
148 /* (t) Current IDs allowed. */
149 #define MDF_CURRENT		(1u << 9)
150 #define MDF_TYPE_COMMON_MASK	(MDF_ANY | MDF_CURRENT)
151 /* (t,gid) All IDs allowed as supplementary groups. */
152 #define MDF_ANY_SUPP		(1u << 10)
153 /* (t,gid) Some ID or MDF_CURRENT has MDF_SUPP_MUST or MDF_SUPP_DONT. */
154 #define MDF_MAY_REJ_SUPP	(1u << 11)
155 /* (t,gid) Some explicit ID (not MDF_CURRENT) has MDF_SUPP_MUST. */
156 #define MDF_EXPLICIT_SUPP_MUST	(1u << 12)
157 /*
158  * (t,gid) Whether any target clause is about primary groups.  Used during
159  * parsing only.
160  */
161 #define MDF_HAS_PRIMARY_CLAUSE	(1u << 13)
162 /*
163  * (t,gid) Whether any target clause is about supplementary groups.  Used during
164  * parsing only.
165  */
166 #define MDF_HAS_SUPP_CLAUSE	(1u << 14)
167 #define MDF_TYPE_GID_MASK	(MDF_ANY_SUPP | MDF_MAY_REJ_SUPP |	\
168     MDF_EXPLICIT_SUPP_MUST | MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE)
169 #define MDF_TYPE_MASK		(MDF_TYPE_COMMON_MASK | MDF_TYPE_GID_MASK)
170 
171 /*
172  * Persistent structures.
173  */
174 
175 struct id_spec {
176 	u_int		 id;
177 	flags_t		 flags; /* See MDF_* above. */
178 };
179 
180 /*
181  * This limits the number of target clauses per type to 65535.  With the current
182  * value of MAX_RULE_STRING_SIZE (1024), this is way more than enough anyway.
183  */
184 typedef uint16_t	 id_nb_t;
185 /* We only have a few IT_* types. */
186 typedef uint16_t	 id_type_t;
187 
188 struct rule {
189 	STAILQ_ENTRY(rule) r_entries;
190 	id_type_t	 from_type;
191 	u_int		 from_id;
192 	flags_t		 uid_flags; /* See MDF_* above. */
193 	id_nb_t		 uids_nb;
194 	flags_t		 gid_flags; /* See MDF_* above. */
195 	id_nb_t		 gids_nb;
196 	struct id_spec	*uids;
197 	struct id_spec	*gids;
198 };
199 
200 STAILQ_HEAD(rulehead, rule);
201 
202 struct rules {
203 	char		string[MAX_RULE_STRING_SIZE];
204 	struct rulehead	head;
205 };
206 
207 struct exec_paths {
208 	char exec_paths_str[MAX_EXEC_PATHS_SIZE];
209 	char exec_paths[MAX_EXEC_PATHS][PATH_MAX];
210 	int exec_path_count;
211 };
212 
213 struct conf {
214 	struct rules rules;
215 	struct exec_paths exec_paths;
216 	volatile u_int	use_count __aligned(CACHE_LINE_SIZE);
217 };
218 
219 /*
220  * Temporary structures used to build a 'struct rule' above.
221  */
222 
223 struct id_elem {
224 	STAILQ_ENTRY(id_elem) ie_entries;
225 	struct id_spec spec;
226 };
227 
228 STAILQ_HEAD(id_list, id_elem);
229 
230 
231 #ifdef INVARIANTS
232 static void
233 check_type(const id_type_t type)
234 {
235 	if (type > IT_LAST)
236 		panic("Invalid type number %u", type);
237 }
238 
239 static void
240 panic_for_unexpected_flags(const id_type_t type, const flags_t flags,
241     const char *const str)
242 {
243 	panic("ID type %s: Unexpected flags %u (%s), ", id_type_to_str[type],
244 	    flags, str);
245 }
246 
247 static void
248 check_type_and_id_flags(const id_type_t type, const flags_t flags)
249 {
250 	const char *str;
251 
252 	check_type(type);
253 	switch (type) {
254 	case IT_UID:
255 		if (flags != 0) {
256 			str = "only 0 allowed";
257 			goto unexpected_flags;
258 		}
259 		break;
260 	case IT_GID:
261 		if ((flags & ~MDF_ID_MASK) != 0) {
262 			str = "only bits in MDF_ID_MASK allowed";
263 			goto unexpected_flags;
264 		}
265 		if (!powerof2(flags & MDF_SUPP_MASK)) {
266 			str = "only a single flag in MDF_SUPP_MASK allowed";
267 			goto unexpected_flags;
268 		}
269 		break;
270 	default:
271 		__assert_unreachable();
272 	}
273 	return;
274 
275 unexpected_flags:
276 	panic_for_unexpected_flags(type, flags, str);
277 }
278 
279 static void
280 check_type_and_id_spec(const id_type_t type, const struct id_spec *const is)
281 {
282 	check_type_and_id_flags(type, is->flags);
283 }
284 
285 static void
286 check_type_and_type_flags(const id_type_t type, const flags_t flags)
287 {
288 	const char *str;
289 
290 	check_type_and_id_flags(type, flags & MDF_ID_MASK);
291 	if ((flags & ~MDF_ID_MASK & ~MDF_TYPE_MASK) != 0) {
292 		str = "only MDF_ID_MASK | MDF_TYPE_MASK bits allowed";
293 		goto unexpected_flags;
294 	}
295 	if ((flags & MDF_ANY) != 0 && (flags & MDF_CURRENT) != 0 &&
296 	    (type == IT_UID || (flags & MDF_PRIMARY) != 0)) {
297 		str = "MDF_ANY and MDF_CURRENT are exclusive for UIDs "
298 		    "or primary group GIDs";
299 		goto unexpected_flags;
300 	}
301 	if ((flags & MDF_ANY_SUPP) != 0 && (flags & MDF_CURRENT) != 0 &&
302 	    (flags & MDF_SUPP_MASK) != 0) {
303 		str = "MDF_SUPP_ANY and MDF_CURRENT with supplementary "
304 		    "groups specification are exclusive";
305 		goto unexpected_flags;
306 	}
307 	if (type == IT_GID &&
308 	    ((flags & MDF_PRIMARY) != 0 || (flags & MDF_ANY) != 0) &&
309 	    (flags & MDF_HAS_PRIMARY_CLAUSE) == 0) {
310 		str = "Presence of folded primary clause not reflected "
311 		    "by presence of MDF_HAS_PRIMARY_CLAUSE";
312 		goto unexpected_flags;
313 	}
314 	if (((flags & MDF_SUPP_MASK) != 0 || (flags & MDF_ANY_SUPP) != 0) &&
315 	    (flags & MDF_HAS_SUPP_CLAUSE) == 0) {
316 		str = "Presence of folded supplementary clause not reflected "
317 		    "by presence of MDF_HAS_SUPP_CLAUSE";
318 		goto unexpected_flags;
319 	}
320 	return;
321 
322 unexpected_flags:
323 	panic_for_unexpected_flags(type, flags, str);
324 }
325 #else /* !INVARIANTS */
326 #define check_type_and_id_flags(...)
327 #define check_type_and_id_spec(...)
328 #define check_type_and_type_flags(...)
329 #endif /* INVARIANTS */
330 
331 static bool
332 has_rules(const struct rules *const rules)
333 {
334 	return (rules->string[0] != '\0');
335 }
336 
337 static bool
338 has_exec_paths(const struct exec_paths *const exec_paths)
339 {
340 	return (exec_paths->exec_paths_str[0] != '\0');
341 }
342 
343 /*
344  * Returns EALREADY if both flags have some overlap, or EINVAL if flags are
345  * incompatible, else 0 with flags successfully merged into 'dest'.
346  */
347 static int
348 coalesce_id_flags(const flags_t src, flags_t *const dest)
349 {
350 	flags_t res;
351 
352 	if ((src & *dest) != 0)
353 		return (EALREADY);
354 
355 	res = src | *dest;
356 
357 	/* Check for compatibility of supplementary flags, and coalesce. */
358 	if ((res & MDF_SUPP_MASK) != 0) {
359 		/* MDF_SUPP_DONT incompatible with the rest. */
360 		if ((res & MDF_SUPP_DONT) != 0 && (res & MDF_SUPP_MASK &
361 		    ~MDF_SUPP_DONT) != 0)
362 			return (EINVAL);
363 		/*
364 		 * Coalesce MDF_SUPP_ALLOW and MDF_SUPP_MUST into MDF_SUPP_MUST.
365 		 */
366 		if ((res & MDF_SUPP_ALLOW) != 0 && (res & MDF_SUPP_MUST) != 0)
367 			res &= ~MDF_SUPP_ALLOW;
368 	}
369 
370 	*dest = res;
371 	return (0);
372 }
373 
374 static void
375 toast_rules(struct rules *const rules)
376 {
377 	struct rulehead *const head = &rules->head;
378 	struct rule *rule, *rule_next;
379 
380 	STAILQ_FOREACH_SAFE(rule, head, r_entries, rule_next) {
381 		free(rule->uids, M_MAC_DO);
382 		free(rule->gids, M_MAC_DO);
383 		free(rule, M_MAC_DO);
384 	}
385 }
386 
387 static inline void
388 init_rules(struct rules *const rules)
389 {
390 	MPASS(is_zeroed(rules, sizeof(*rules)));
391 	STAILQ_INIT(&rules->head);
392 }
393 
394 static inline void
395 init_exec_paths(struct exec_paths *const exec_paths)
396 {
397 	MPASS(is_zeroed(exec_paths, sizeof(*exec_paths)));
398 }
399 
400 static struct conf *
401 new_conf(void)
402 {
403 	struct conf *const conf = malloc(sizeof(*conf), M_MAC_DO,
404 	    M_WAITOK | M_ZERO);
405 
406 	init_rules(&conf->rules);
407 	init_exec_paths(&conf->exec_paths);
408 	refcount_init(&conf->use_count, 1);
409 
410 	return (conf);
411 }
412 
413 static bool
414 is_null_or_empty(const char *const s)
415 {
416 	return (s == NULL || s[0] == '\0');
417 }
418 
419 /*
420  * String to unsigned int.
421  *
422  * Contrary to the "standard" strtou*() family of functions, do not tolerate
423  * spaces at start nor an empty string, and returns a status code, the 'u_int'
424  * result being returned through a passed pointer (if no error).
425  *
426  * We detour through 'quad_t' because in-kernel strto*() functions cannot set
427  * 'errno' and thus can't distinguish a true maximum value from one returned
428  * because of overflow.  We use 'quad_t' instead of 'u_quad_t' to support
429  * negative specifications (e.g., such as "-1" for UINT_MAX).
430  */
431 static int
432 strtoui_strict(const char *const restrict s, const char **const restrict endptr,
433     int base, u_int *result)
434 {
435 	char *ep;
436 	quad_t q;
437 
438 	/* Rule out spaces and empty specifications. */
439 	if (s[0] == '\0' || isspace(s[0])) {
440 		if (endptr != NULL)
441 			*endptr = s;
442 		return (EINVAL);
443 	}
444 
445 	q = strtoq(s, &ep, base);
446 	if (endptr != NULL)
447 		*endptr = ep;
448 	if (q < 0) {
449 		/* We allow specifying a negative number. */
450 		if (q < -(quad_t)UINT_MAX - 1 || q == QUAD_MIN)
451 			return (EOVERFLOW);
452 	} else {
453 		if (q > UINT_MAX || q == UQUAD_MAX)
454 			return (EOVERFLOW);
455 	}
456 
457 	*result = (u_int)q;
458 	return (0);
459 }
460 
461 /*
462  * strsep() variant skipping spaces and tabs.
463  *
464  * Skips spaces and tabs at beginning and end of the token before one of the
465  * 'delim' characters, i.e., at start of string and just before one of the
466  * delimiter characters (so it doesn't prevent tokens containing spaces and tabs
467  * in the middle).
468  */
469 static char *
470 strsep_noblanks(char **const stringp, const char *delim)
471 {
472 	char *p = *stringp;
473 	char *ret, *wsp;
474 	size_t idx;
475 
476 	if (p == NULL)
477 		return (NULL);
478 
479 	idx = strspn(p, " \t");
480 	p += idx;
481 
482 	ret = strsep(&p, delim);
483 
484 	/* Rewind spaces/tabs at the end. */
485 	if (p == NULL)
486 		wsp = ret + strlen(ret);
487 	else
488 		wsp = p - 1;
489 	for (; wsp != ret; --wsp) {
490 		switch (wsp[-1]) {
491 		case ' ':
492 		case '\t':
493 			continue;
494 		}
495 		break;
496 	}
497 	*wsp = '\0';
498 
499 	*stringp = p;
500 	return (ret);
501 }
502 
503 
504 static void
505 make_parse_error(struct parse_error **const parse_error, const size_t pos,
506     const char *const fmt, ...)
507 {
508 	struct parse_error *const err = malloc(sizeof(*err), M_MAC_DO,
509 	    M_WAITOK);
510 	va_list ap;
511 
512 	err->pos = pos;
513 	va_start(ap, fmt);
514 	vsnprintf(err->msg, PARSE_ERROR_SIZE, fmt, ap);
515 	va_end(ap);
516 
517 	MPASS(*parse_error == NULL);
518 	*parse_error = err;
519 }
520 
521 static void
522 free_parse_error(struct parse_error *const parse_error)
523 {
524 	free(parse_error, M_MAC_DO);
525 }
526 
527 static int
528 parse_id_type(const char *const string, id_type_t *const type,
529     struct parse_error **const parse_error)
530 {
531 	/*
532 	 * Special case for "any", as the canonical form for IT_ANY in
533 	 * id_type_to_str[] is "*".
534 	 */
535 	if (strcmp(string, "any") == 0) {
536 		*type = IT_ANY;
537 		return (0);
538 	}
539 
540 	/* Start at 1 to avoid parsing "invalid". */
541 	for (size_t i = 1; i <= IT_LAST; ++i) {
542 		if (strcmp(string, id_type_to_str[i]) == 0) {
543 			*type = i;
544 			return (0);
545 		}
546 	}
547 
548 	*type = IT_INVALID;
549 	make_parse_error(parse_error, 0, "No valid type found.");
550 	return (EINVAL);
551 }
552 
553 static size_t
554 parse_gid_flags(const char *const string, flags_t *const flags,
555     flags_t *const gid_flags)
556 {
557 	switch (string[0]) {
558 	case '+':
559 		*flags |= MDF_SUPP_ALLOW;
560 		goto has_supp_clause;
561 	case '!':
562 		*flags |= MDF_SUPP_MUST;
563 		*gid_flags |= MDF_MAY_REJ_SUPP;
564 		goto has_supp_clause;
565 	case '-':
566 		*flags |= MDF_SUPP_DONT;
567 		*gid_flags |= MDF_MAY_REJ_SUPP;
568 		goto has_supp_clause;
569 	has_supp_clause:
570 		*gid_flags |= MDF_HAS_SUPP_CLAUSE;
571 		return (1);
572 	}
573 
574 	return (0);
575 }
576 
577 static bool
578 parse_any(const char *const string)
579 {
580 	return (strcmp(string, "*") == 0 || strcmp(string, "any") == 0);
581 }
582 
583 static bool
584 has_clauses(const id_nb_t nb, const flags_t type_flags)
585 {
586 	return ((type_flags & MDF_TYPE_MASK) != 0 || nb != 0);
587 }
588 
589 static int
590 parse_target_clause(char *to, struct rule *const rule,
591     struct id_list *const uid_list, struct id_list *const gid_list,
592     struct parse_error **const parse_error)
593 {
594 	const char *const start = to;
595 	char *to_type, *to_id;
596 	const char *p;
597 	struct id_list *list;
598 	id_nb_t *nb;
599 	flags_t *tflags;
600 	struct id_elem *ie;
601 	struct id_spec is = {.flags = 0};
602 	flags_t gid_flags = 0;
603 	id_type_t type;
604 	int error;
605 
606 	MPASS(*parse_error == NULL);
607 	MPASS(to != NULL);
608 	to_type = strsep_noblanks(&to, "=");
609 	MPASS(to_type != NULL);
610 	to_type += parse_gid_flags(to_type, &is.flags, &gid_flags);
611 	error = parse_id_type(to_type, &type, parse_error);
612 	if (error != 0)
613 		goto einval;
614 	if (type != IT_GID && is.flags != 0) {
615 		make_parse_error(parse_error, to_type - start,
616 		    "Expected type 'gid' after flags, not '%s'.",
617 		    to_type);
618 		goto einval;
619 	}
620 
621 	to_id = strsep_noblanks(&to, "");
622 	switch (type) {
623 	case IT_GID:
624 		if (to_id == NULL) {
625 			make_parse_error(parse_error, to_type - start,
626 			    "No '=' and ID specification after type '%s'.",
627 			    to_type);
628 			goto einval;
629 		}
630 
631 		if (is.flags == 0) {
632 			/* No flags: Dealing with a primary group. */
633 			is.flags |= MDF_PRIMARY;
634 			gid_flags |= MDF_HAS_PRIMARY_CLAUSE;
635 		}
636 
637 		list = gid_list;
638 		nb = &rule->gids_nb;
639 		tflags = &rule->gid_flags;
640 
641 		/* "*" or "any"? */
642 		if (parse_any(to_id)) {
643 			/*
644 			 * We check that we have not seen any other clause of
645 			 * the same category (i.e., concerning primary or
646 			 * supplementary groups).
647 			 */
648 			if ((is.flags & MDF_PRIMARY) != 0) {
649 				if ((*tflags & MDF_HAS_PRIMARY_CLAUSE) != 0) {
650 					make_parse_error(parse_error,
651 					    to_id - start,
652 					    "'any' specified after another "
653 					    "(primary) GID.");
654 					goto einval;
655 				}
656 				*tflags |= gid_flags | MDF_ANY;
657 			} else {
658 				/*
659 				 * If a supplementary group flag was present, it
660 				 * must be MDF_SUPP_ALLOW ("+").
661 				 */
662 				if ((is.flags & MDF_SUPP_MASK) != MDF_SUPP_ALLOW) {
663 					make_parse_error(parse_error,
664 					    to_id - start,
665 					    "'any' specified with another "
666 					    "flag than '+'.");
667 					goto einval;
668 				}
669 				if ((*tflags & MDF_HAS_SUPP_CLAUSE) != 0) {
670 					make_parse_error(parse_error,
671 					    to_id - start,
672 					    "'any' with flag '+' specified after "
673 					    "another (supplementary) GID.");
674 					goto einval;
675 				}
676 				*tflags |= gid_flags | MDF_ANY_SUPP;
677 			}
678 			goto check_type_and_finish;
679 		} else {
680 			/*
681 			 * Check that we haven't already seen "any" for the same
682 			 * category.
683 			 */
684 			if ((is.flags & MDF_PRIMARY) != 0) {
685 				if ((*tflags & MDF_ANY) != 0) {
686 					make_parse_error(parse_error,
687 					    to_id - start,
688 					    "Some (primary) GID specified after "
689 					    "'any'.");
690 					goto einval;
691 				}
692 			} else if ((*tflags & MDF_ANY_SUPP) != 0 &&
693 			    (is.flags & MDF_SUPP_ALLOW) != 0) {
694 				make_parse_error(parse_error,
695 				    to_id - start,
696 				    "Some (supplementary) GID specified after "
697 				    "'any' with flag '+'.");
698 				goto einval;
699 			}
700 			*tflags |= gid_flags;
701 		}
702 		break;
703 
704 	case IT_UID:
705 		if (to_id == NULL) {
706 			make_parse_error(parse_error, to_type - start,
707 			    "No '=' and ID specification after type '%s'.",
708 			    to_type);
709 			goto einval;
710 		}
711 
712 		list = uid_list;
713 		nb = &rule->uids_nb;
714 		tflags = &rule->uid_flags;
715 
716 		/* "*" or "any"? */
717 		if (parse_any(to_id)) {
718 			/* There must not be any other clause. */
719 			if (has_clauses(*nb, *tflags)) {
720 				make_parse_error(parse_error, to_id - start,
721 				    "'any' specified after another UID.");
722 				goto einval;
723 			}
724 			*tflags |= MDF_ANY;
725 			goto check_type_and_finish;
726 		} else {
727 			/*
728 			 * Check that we haven't already seen "any" for the same
729 			 * category.
730 			 */
731 			if ((*tflags & MDF_ANY) != 0) {
732 				make_parse_error(parse_error, to_id - start,
733 				    "Some UID specified after 'any'.");
734 				goto einval;
735 			}
736 		}
737 		break;
738 
739 	case IT_ANY:
740 		/* No ID allowed. */
741 		if (to_id != NULL) {
742 			make_parse_error(parse_error, to_type - start,
743 			    "No '=' and ID allowed after type '%s'.", to_type);
744 			goto einval;
745 		}
746 		/*
747 		 * We can't have IT_ANY after any other IT_*, it must be the
748 		 * only one.
749 		 */
750 		if (has_clauses(rule->uids_nb, rule->uid_flags) ||
751 		    has_clauses(rule->gids_nb, rule->gid_flags)) {
752 			make_parse_error(parse_error, to_type - start,
753 			    "Target clause of type '%s' coming after another "
754 			    "clause (must be alone).", to_type);
755 			goto einval;
756 		}
757 		rule->uid_flags |= MDF_ANY;
758 		rule->gid_flags |= MDF_ANY | MDF_ANY_SUPP |
759 		    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE;
760 		goto finish;
761 
762 	default:
763 		/* parse_id_type() returns no other types currently. */
764 		__assert_unreachable();
765 	}
766 
767 	/* Rule out cases that have been treated above. */
768 	MPASS((type == IT_UID || type == IT_GID) && !parse_any(to_id));
769 
770 	/* "."? */
771 	if (strcmp(to_id, ".") == 0) {
772 		if ((*tflags & MDF_CURRENT) != 0) {
773 			/* Duplicate "." <id>.  Try to coalesce. */
774 			error = coalesce_id_flags(is.flags, tflags);
775 			if (error != 0) {
776 				make_parse_error(parse_error, to_id - start,
777 				    "Incompatible flags with prior clause "
778 				    "with same target.");
779 				goto einval;
780 			}
781 		} else
782 			*tflags |= MDF_CURRENT | is.flags;
783 		goto check_type_and_finish;
784 	}
785 
786 	/* Parse an ID. */
787 	error = strtoui_strict(to_id, &p, 10, &is.id);
788 	if (error != 0 || *p != '\0') {
789 		make_parse_error(parse_error, to_id - start,
790 		    "Cannot parse a numerical ID (base 10).");
791 		goto einval;
792 	}
793 
794 	/* Explicit ID flags. */
795 	if (type == IT_GID && (is.flags & MDF_SUPP_MUST) != 0)
796 		*tflags |= MDF_EXPLICIT_SUPP_MUST;
797 
798 	/*
799 	 * We check for duplicate IDs and coalesce their 'struct id_spec' only
800 	 * at end of parse_single_rule() because it is much more performant then
801 	 * (using sorted arrays).
802 	 */
803 	++*nb;
804 	if (*nb == 0) {
805 		make_parse_error(parse_error, 0,
806 		    "Too many target clauses of type '%s'.", to_type);
807 		return (EOVERFLOW);
808 	}
809 	ie = malloc(sizeof(*ie), M_MAC_DO, M_WAITOK);
810 	ie->spec = is;
811 	STAILQ_INSERT_TAIL(list, ie, ie_entries);
812 	check_type_and_id_spec(type, &is);
813 check_type_and_finish:
814 	check_type_and_type_flags(type, *tflags);
815 finish:
816 	MPASS(error == 0 && *parse_error == NULL);
817 	return (0);
818 einval:
819 	/* We must have built a parse error on error. */
820 	MPASS(*parse_error != NULL);
821 	return (EINVAL);
822 }
823 
824 static int
825 u_int_cmp(const u_int i1, const u_int i2)
826 {
827 	return ((i1 > i2) - (i1 < i2));
828 }
829 
830 static int
831 id_spec_cmp(const void *const p1, const void *const p2)
832 {
833 	const struct id_spec *const is1 = p1;
834 	const struct id_spec *const is2 = p2;
835 
836 	return (u_int_cmp(is1->id, is2->id));
837 }
838 
839 /*
840  * Transfer content of 'list' into 'array', freeing and emptying list.
841  *
842  * 'nb' must be 'list''s length and not be greater than 'array''s size.  The
843  * destination array is sorted by ID.  Structures 'struct id_spec' with same IDs
844  * are coalesced if that makes sense (not including duplicate clauses), else
845  * EINVAL is returned.  On success, 'nb' is updated (lowered) to account for
846  * coalesced specifications.  The parameter 'type' is only for testing purposes
847  * (INVARIANTS).
848  */
849 static int
850 pour_list_into_rule(const id_type_t type, struct id_list *const list,
851     struct id_spec *const array, id_nb_t *const nb,
852     struct parse_error **const parse_error)
853 {
854 	struct id_elem *ie, *ie_next;
855 	size_t idx = 0;
856 
857 	/* Fill the array. */
858 	STAILQ_FOREACH_SAFE(ie, list, ie_entries, ie_next) {
859 		MPASS(idx < *nb);
860 		array[idx] = ie->spec;
861 		free(ie, M_MAC_DO);
862 		++idx;
863 	}
864 	MPASS(idx == *nb);
865 	STAILQ_INIT(list);
866 
867 	/* Sort it (by ID). */
868 	qsort(array, *nb, sizeof(*array), id_spec_cmp);
869 
870 	/* Coalesce same IDs. */
871 	if (*nb != 0) {
872 		size_t ref_idx = 0;
873 
874 		for (idx = 1; idx < *nb; ++idx) {
875 			const u_int id = array[idx].id;
876 
877 			if (id != array[ref_idx].id) {
878 				++ref_idx;
879 				if (ref_idx != idx)
880 					array[ref_idx] = array[idx];
881 				continue;
882 			}
883 
884 			switch (type) {
885 				int error;
886 
887 			case IT_GID:
888 				error = coalesce_id_flags(array[idx].flags,
889 				    &array[ref_idx].flags);
890 				if (error != 0) {
891 					make_parse_error(parse_error, 0,
892 					    "Incompatible flags or duplicate "
893 					    "GID %u.", id);
894 					goto einval;
895 				}
896 				check_type_and_id_flags(type,
897 				    array[ref_idx].flags);
898 				break;
899 
900 			case IT_UID:
901 				/*
902 				 * No flags in this case.  Multiple appearances
903 				 * of the same UID is an exact redundancy, so
904 				 * error out.
905 				 */
906 				make_parse_error(parse_error, 0,
907 				    "Duplicate UID %u.", id);
908 				goto einval;
909 
910 			default:
911 				__assert_unreachable();
912 			}
913 		}
914 		*nb = ref_idx + 1;
915 	}
916 
917 	MPASS(*parse_error == NULL);
918 	return (0);
919 
920 einval:
921 	MPASS(*parse_error != NULL);
922 	return (EINVAL);
923 }
924 
925 /*
926  * See also the herald comment for parse_rules() below.
927  *
928  * The second part of a rule, called <target> (or <to>), is a comma-separated
929  * (',') list of '<flags><type>=<id>' clauses similar to that of the <from>
930  * part, with the extensions that <id> may also be "*" or "any" or ".", and that
931  * <flags> may contain at most one of the '+', '-' and '!' characters when
932  * <type> is "gid" (no flags are allowed for "uid").  No two clauses in a single
933  * <to> list may list the same <id>.  "*" and "any" both designate any ID for
934  * the <type>, and are aliases to each other.  In front of "any" (or "*"), only
935  * the '+' flag is allowed (in the "gid" case).  "." designates the process'
936  * current IDs for the <type>.  The precise meaning of flags and "." is
937  * explained in functions checking privileges below.
938  */
939 static int
940 parse_single_rule(char *rule, struct rules *const rules,
941     struct parse_error **const parse_error)
942 {
943 	const char *const start = rule;
944 	const char *from_type, *from_id, *p;
945 	char *to_list;
946 	struct id_list uid_list, gid_list;
947 	struct id_elem *ie, *ie_next;
948 	struct rule *new;
949 	int error;
950 
951 	MPASS(*parse_error == NULL);
952 	STAILQ_INIT(&uid_list);
953 	STAILQ_INIT(&gid_list);
954 
955 	/* Freed when the 'struct rules' container is freed. */
956 	new = malloc(sizeof(*new), M_MAC_DO, M_WAITOK | M_ZERO);
957 
958 	from_type = strsep_noblanks(&rule, "=");
959 	MPASS(from_type != NULL); /* Because 'rule' was not NULL. */
960 	error = parse_id_type(from_type, &new->from_type, parse_error);
961 	if (error != 0)
962 		goto einval;
963 	switch (new->from_type) {
964 	case IT_UID:
965 	case IT_GID:
966 		break;
967 	default:
968 		make_parse_error(parse_error, 0, "Type '%s' not allowed in "
969 		    "the \"from\" part of rules.");
970 		goto einval;
971 	}
972 
973 	from_id = strsep_noblanks(&rule, ":>");
974 	if (is_null_or_empty(from_id)) {
975 		make_parse_error(parse_error, 0, "No ID specified.");
976 		goto einval;
977 	}
978 
979 	error = strtoui_strict(from_id, &p, 10, &new->from_id);
980 	if (error != 0 || *p != '\0') {
981 		make_parse_error(parse_error, from_id - start,
982 		    "Cannot parse a numerical ID (base 10).");
983 		goto einval;
984 	}
985 
986 	/*
987 	 * We will now parse the "to" list.
988 	 *
989 	 * In order to ease parsing, we will begin by building lists of target
990 	 * UIDs and GIDs in local variables 'uid_list' and 'gid_list'.  The
991 	 * number of each type of IDs will be filled directly in 'new'.  At end
992 	 * of parse, we will allocate both arrays of IDs to be placed into the
993 	 * 'uids' and 'gids' members, sort them, and discard the tail queues
994 	 * used to build them.  This conversion to sorted arrays at end of parse
995 	 * allows to minimize memory allocations and enables searching IDs in
996 	 * O(log(n)) instead of linearly.
997 	 */
998 	to_list = strsep_noblanks(&rule, ",");
999 	if (to_list == NULL) {
1000 		make_parse_error(parse_error, 0, "No target list.");
1001 		goto einval;
1002 	}
1003 	do {
1004 		error = parse_target_clause(to_list, new, &uid_list, &gid_list,
1005 		    parse_error);
1006 		if (error != 0) {
1007 			(*parse_error)->pos += to_list - start;
1008 			goto einval;
1009 		}
1010 
1011 		to_list = strsep_noblanks(&rule, ",");
1012 	} while (to_list != NULL);
1013 
1014 	if (new->uids_nb != 0) {
1015 		new->uids = malloc(sizeof(*new->uids) * new->uids_nb, M_MAC_DO,
1016 		    M_WAITOK);
1017 		error = pour_list_into_rule(IT_UID, &uid_list, new->uids,
1018 		    &new->uids_nb, parse_error);
1019 		if (error != 0)
1020 			goto einval;
1021 	}
1022 	MPASS(STAILQ_EMPTY(&uid_list));
1023 	if (!has_clauses(new->uids_nb, new->uid_flags)) {
1024 		/* No UID specified, default is "uid=.". */
1025 		MPASS(new->uid_flags == 0);
1026 		new->uid_flags = MDF_CURRENT;
1027 		check_type_and_type_flags(IT_UID, new->uid_flags);
1028 	}
1029 
1030 	if (new->gids_nb != 0) {
1031 		new->gids = malloc(sizeof(*new->gids) * new->gids_nb, M_MAC_DO,
1032 		    M_WAITOK);
1033 		error = pour_list_into_rule(IT_GID, &gid_list, new->gids,
1034 		    &new->gids_nb, parse_error);
1035 		if (error != 0)
1036 			goto einval;
1037 	}
1038 	MPASS(STAILQ_EMPTY(&gid_list));
1039 	if (!has_clauses(new->gids_nb, new->gid_flags)) {
1040 		/* No GID specified, default is "gid=.,!gid=.". */
1041 		MPASS(new->gid_flags == 0);
1042 		new->gid_flags = MDF_CURRENT | MDF_PRIMARY | MDF_SUPP_MUST |
1043 		    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE;
1044 		check_type_and_type_flags(IT_GID, new->gid_flags);
1045 	}
1046 
1047 	STAILQ_INSERT_TAIL(&rules->head, new, r_entries);
1048 	MPASS(error == 0 && *parse_error == NULL);
1049 	return (0);
1050 
1051 einval:
1052 	free(new->gids, M_MAC_DO);
1053 	free(new->uids, M_MAC_DO);
1054 	free(new, M_MAC_DO);
1055 	STAILQ_FOREACH_SAFE(ie, &gid_list, ie_entries, ie_next)
1056 	    free(ie, M_MAC_DO);
1057 	STAILQ_FOREACH_SAFE(ie, &uid_list, ie_entries, ie_next)
1058 	    free(ie, M_MAC_DO);
1059 	MPASS(*parse_error != NULL);
1060 	return (EINVAL);
1061 }
1062 
1063 /*
1064  * Parse rules specification and produce rule structures out of it.
1065  *
1066  * Must be called with '*parse_error' set to NULL.  Returns 0 on success, with
1067  * '*rulesp' made to point to a 'struct rule' representing the rules.  On error,
1068  * the returned value is non-zero and '*rulesp' is unchanged.  If 'string' has
1069  * length greater or equal to MAX_RULE_STRING_SIZE, ENAMETOOLONG is returned.  If
1070  * it is not in the expected format, EINVAL is returned.  If an error is
1071  * returned, '*parse_error' is set to point to a 'struct parse_error' giving an
1072  * error message for the problem.
1073  *
1074  * Expected format: A >-colon-separated list of rules of the form
1075  * "<from>><target>" (for backwards compatibility, a semi-colon ":" is accepted
1076  * in place of '>').  The <from> part is of the form "<type>=<id>" where <type>
1077  * is "uid" or "gid", <id> an UID or GID (depending on <type>) and <target> is
1078  * "*", "any" or a comma-separated list of '<flags><type>=<id>' clauses (see the
1079  * comment for parse_single_rule() for more details).  For convenience, empty
1080  * rules are allowed (and do nothing), and spaces and tabs are allowed (and
1081  * removed) around each token (tokens are natural ones, except that
1082  * '<flags><type>' as a whole is considered a single token, so no blanks are
1083  * allowed between '<flags>' and '<type>').
1084  *
1085  * Examples:
1086  * - "uid=1001>uid=1010,gid=1010;uid=1002>any"
1087  * - "gid=1010>gid=1011,gid=1012,gid=1013"
1088  */
1089 static int
1090 parse_rules(const char *const string, struct rules *const rules,
1091     struct parse_error **const parse_error)
1092 {
1093 	const size_t len = strlen(string);
1094 	char *copy, *p, *rule;
1095 	int error = 0;
1096 
1097 	if (len >= MAX_RULE_STRING_SIZE) {
1098 		make_parse_error(parse_error, 0,
1099 		    "Rule specification string is too long (%zu, max %zu)",
1100 		    len, MAX_RULE_STRING_SIZE - 1);
1101 		return (ENAMETOOLONG);
1102 	}
1103 
1104 	bcopy(string, rules->string, len + 1);
1105 	MPASS(rules->string[len] == '\0'); /* Catch some races. */
1106 
1107 	copy = malloc(len + 1, M_MAC_DO, M_WAITOK);
1108 	bcopy(string, copy, len + 1);
1109 	MPASS(copy[len] == '\0'); /* Catch some races. */
1110 
1111 	p = copy;
1112 	while ((rule = strsep_noblanks(&p, ";")) != NULL) {
1113 		if (rule[0] == '\0')
1114 			continue;
1115 		error = parse_single_rule(rule, rules, parse_error);
1116 		if (error != 0) {
1117 			(*parse_error)->pos += rule - copy;
1118 			toast_rules(rules);
1119 			goto error;
1120 		}
1121 	}
1122 
1123 	MPASS(error == 0 && *parse_error == NULL);
1124 out:
1125 	free(copy, M_MAC_DO);
1126 	return (error);
1127 error:
1128 	MPASS(error != 0 && *parse_error != NULL);
1129 	goto out;
1130 }
1131 
1132 /*
1133  * Similar constraints as parse_rules() (which see).
1134  */
1135 static int
1136 parse_exec_paths(const char *const string, struct exec_paths *const exec_paths,
1137     struct parse_error **const parse_error)
1138 {
1139 	const size_t len = strlen(string);
1140 	char *copy, *p, *path;
1141 	int error = 0;
1142 
1143 	if (len >= MAX_EXEC_PATHS_SIZE) {
1144 		make_parse_error(parse_error, 0,
1145 		    "Exec path specification string is too long (%zu, max %u)",
1146 		    len, MAX_EXEC_PATHS_SIZE - 1);
1147 		return (ENAMETOOLONG);
1148 	}
1149 
1150 	bcopy(string, exec_paths->exec_paths_str, len + 1);
1151 	MPASS(exec_paths->exec_paths_str[len] == '\0');
1152 
1153 	copy = malloc(len + 1, M_MAC_DO, M_WAITOK);
1154 	bcopy(string, copy, len + 1);
1155 	MPASS(copy[len] == '\0');
1156 
1157 	p = copy;
1158 	while ((path = strsep_noblanks(&p, ":")) != NULL) {
1159 		size_t path_len;
1160 
1161 		if (*path == '\0')
1162 			continue;
1163 
1164 		if (exec_paths->exec_path_count >= MAX_EXEC_PATHS) {
1165 			make_parse_error(parse_error, path - copy,
1166 			    "Too many exec paths specified (max %d)",
1167 			    MAX_EXEC_PATHS);
1168 			error = EINVAL;
1169 			goto error;
1170 		}
1171 
1172 		path_len = strlen(path);
1173 		if (path_len >= PATH_MAX) {
1174 			make_parse_error(parse_error, path - copy,
1175 			    "Exec paths too long (%zu, max %u)",
1176 			    path_len, PATH_MAX - 1);
1177 			error = ENAMETOOLONG;
1178 			goto error;
1179 		}
1180 
1181 		strlcpy(exec_paths->exec_paths[exec_paths->exec_path_count],
1182 		    path, PATH_MAX);
1183 		exec_paths->exec_path_count++;
1184 	}
1185 
1186 	MPASS(error == 0 && *parse_error == NULL);
1187 out:
1188 	free(copy, M_MAC_DO);
1189 	return (error);
1190 error:
1191 	MPASS(error != 0 && *parse_error != NULL);
1192 	goto out;
1193 }
1194 
1195 static void
1196 hold_conf(struct conf *const conf)
1197 {
1198 	int old_count __diagused = refcount_acquire(&conf->use_count);
1199 
1200 	KASSERT(old_count != 0,
1201 	    ("MAC/do: Trying to resurrect a destroyed configuration."));
1202 }
1203 
1204 static void
1205 drop_conf(struct conf *const conf)
1206 {
1207 	if (refcount_release(&conf->use_count)) {
1208 		toast_rules(&conf->rules);
1209 		free(conf, M_MAC_DO);
1210 	}
1211 }
1212 
1213 /*
1214  * Find configuration applicable to the passed prison.
1215  *
1216  * Returns the applicable configuration (which always exists), with an
1217  * additional reference that must be freed by the caller.  'pr' must not be
1218  * locked.
1219  *
1220  * The applicable configuration is that of the closest ancestor prison
1221  * (including itself) of the passed prison that actually has a 'struct conf'
1222  * associated to it.
1223  *
1224  * If 'hpr' is not NULL, it is used to return a pointer to the (unlocked) prison
1225  * holding the applicable configuration.
1226  */
1227 static struct conf *
1228 find_conf(struct prison *const pr, struct prison **const hpr)
1229 {
1230 	struct prison *cpr, *ppr;
1231 	struct conf *conf;
1232 
1233 	cpr = pr;
1234 	for (;;) {
1235 		prison_lock(cpr);
1236 		conf = osd_jail_get(cpr, osd_jail_slot);
1237 		if (conf != NULL)
1238 			break;
1239 		prison_unlock(cpr);
1240 
1241 		ppr = cpr->pr_parent;
1242 		/*
1243 		 * 'prison0' normally always have a mac_do(4) configuration
1244 		 * because we installed one on module load/activation and
1245 		 * nothing can destroy it as 'prison0' is not a regular jail and
1246 		 * the 'mac.do' parameter cannot be set to 'inherit' on it,
1247 		 * which is the only way to clear an existing configuration.
1248 		 */
1249 		KASSERT(ppr != NULL,
1250 		    ("MAC/do: 'prison0' must always have a configuration."));
1251 		cpr = ppr;
1252 	}
1253 
1254 	hold_conf(conf);
1255 	prison_unlock(cpr);
1256 
1257 	if (hpr != NULL)
1258 		*hpr = cpr;
1259 	return (conf);
1260 }
1261 
1262 #ifdef INVARIANTS
1263 static void
1264 check_conf_use_count(const struct conf *const conf, u_int expected)
1265 {
1266 	const u_int use_count = refcount_load(&conf->use_count);
1267 
1268 	if (use_count != expected)
1269 		panic("MAC/do: Configuration at %p: Use count is %u, "
1270 		    "expected %u", conf, use_count, expected);
1271 }
1272 #else
1273 #define check_conf_use_count(...)
1274 #endif /* INVARIANTS */
1275 
1276 /*
1277  * OSD destructor for slot 'osd_jail_slot'.
1278  *
1279  * Called with 'value' not NULL.  We have arranged that it is only ever called
1280  * when the corresponding jail goes down or at module unload.
1281  */
1282 static void
1283 dealloc_jail_osd(void *const value)
1284 {
1285 	struct conf *const conf = value;
1286 
1287 	/*
1288 	 * If called because the "holding" jail goes down, no one should be
1289 	 * using the rules but us at this point because no threads of that jail
1290 	 * (or its sub-jails) should currently be executing (in particular,
1291 	 * currently executing setcred()).  The case of module unload is more
1292 	 * complex.  Although the MAC framework takes care that no hook is
1293 	 * called while a module is unloading, the unload could happen between
1294 	 * two calls to MAC hooks in the course of, e.g., executing setcred(),
1295 	 * where the rules' reference count has been bumped to keep them alive
1296 	 * even if the rules on the "holding" jail has been concurrently
1297 	 * changed.  These other references are held in our thread OSD slot, so
1298 	 * we ensure that all thread's slots are freed first in mac_do_destroy()
1299 	 * to be able to check that only one reference remains.
1300 	 */
1301 	check_conf_use_count(conf, 1);
1302 	drop_conf(conf);
1303 }
1304 
1305 /*
1306  * Remove the rules specifically associated to a prison.
1307  *
1308  * In practice, this means that the rules become inherited (from the closest
1309  * ancestor that has some).
1310  *
1311  * Destroys the 'osd_jail_slot' slot of the passed jail.
1312  */
1313 static void
1314 remove_conf(struct prison *const pr)
1315 {
1316 	struct conf *old_conf;
1317 	int error __unused;
1318 
1319 	prison_lock(pr);
1320 	/*
1321 	 * We burden ourselves with extracting rules first instead of just
1322 	 * letting osd_jail_del() call dealloc_jail_osd() as we want to
1323 	 * decrement their use count, and possibly free them, outside of the
1324 	 * prison lock.
1325 	 */
1326 	old_conf = osd_jail_get(pr, osd_jail_slot);
1327 	error = osd_jail_set(pr, osd_jail_slot, NULL);
1328 	/* osd_set() never allocates memory when 'value' is NULL, nor fails. */
1329 	MPASS(error == 0);
1330 	/*
1331 	 * This completely frees the OSD slot, but doesn't call the destructor
1332 	 * since we've just put NULL in the slot.
1333 	 */
1334 	osd_jail_del(pr, osd_jail_slot);
1335 	prison_unlock(pr);
1336 
1337 	if (old_conf != NULL)
1338 		drop_conf(old_conf);
1339 }
1340 
1341 /*
1342  * Assign an already-built configuration to a jail.
1343  */
1344 static void
1345 set_conf(struct prison *const pr, struct conf *const conf)
1346 {
1347 	struct conf *old_conf;
1348 	void **rsv;
1349 
1350 	hold_conf(conf);
1351 	rsv = osd_reserve(osd_jail_slot);
1352 
1353 	prison_lock(pr);
1354 	old_conf = osd_jail_get(pr, osd_jail_slot);
1355 	osd_jail_set_reserved(pr, osd_jail_slot, rsv, conf);
1356 	prison_unlock(pr);
1357 	if (old_conf != NULL)
1358 		drop_conf(old_conf);
1359 }
1360 
1361 /*
1362  * Assigns the default configuration to a jail.
1363  */
1364 static void
1365 set_default_conf(struct prison *const pr)
1366 {
1367 	struct conf *const conf = new_conf();
1368 
1369 	strlcpy(conf->exec_paths.exec_paths_str, "/usr/bin/mdo",
1370 	    MAX_EXEC_PATHS_SIZE);
1371 	strlcpy(conf->exec_paths.exec_paths[0], "/usr/bin/mdo", PATH_MAX);
1372 	conf->exec_paths.exec_path_count = 1;
1373 
1374 	set_conf(pr, conf);
1375 	drop_conf(conf);
1376 }
1377 
1378 static void
1379 clone_rules(struct rules *const dst, const struct rules *const src)
1380 {
1381 	const struct rule *src_rule;
1382 
1383 	strlcpy(dst->string, src->string, sizeof(dst->string));
1384 
1385 	STAILQ_FOREACH(src_rule, &src->head, r_entries) {
1386 		struct rule *const dst_rule = malloc(sizeof(*dst_rule),
1387 		    M_MAC_DO, M_WAITOK);
1388 		bcopy(src_rule, dst_rule, sizeof(*dst_rule));
1389 
1390 		if (src_rule->uids_nb > 0) {
1391 			const size_t uids_size = sizeof(*dst_rule->uids) *
1392 			    src_rule->uids_nb;
1393 
1394 			dst_rule->uids = malloc(uids_size, M_MAC_DO, M_WAITOK);
1395 			bcopy(src_rule->uids, dst_rule->uids, uids_size);
1396 		}
1397 
1398 		if (src_rule->gids_nb > 0) {
1399 			const size_t gids_size = sizeof(*dst_rule->gids) *
1400 			    src_rule->gids_nb;
1401 
1402 			dst_rule->gids = malloc(gids_size, M_MAC_DO, M_WAITOK);
1403 			bcopy(src_rule->gids, dst_rule->gids, gids_size);
1404 		}
1405 
1406 		STAILQ_INSERT_TAIL(&dst->head, dst_rule, r_entries);
1407 	}
1408 }
1409 
1410 static void
1411 clone_exec_paths(struct exec_paths *const dst,
1412     const struct exec_paths *const src)
1413 {
1414 	MPASS(is_zeroed(dst, sizeof(*dst)));
1415 	dst->exec_path_count = src->exec_path_count;
1416 	for (int i = 0; i < src->exec_path_count; i++)
1417 		strlcpy(dst->exec_paths[i], src->exec_paths[i],
1418 		    sizeof(dst->exec_paths[i]));
1419 
1420 	strlcpy(dst->exec_paths_str, src->exec_paths_str,
1421 	    sizeof(dst->exec_paths_str));
1422 }
1423 
1424 /*
1425  * Sets/modifies the MAC/do configuration for a jail.
1426  *
1427  * Must be called with '*parse_error' set to NULL.
1428  *
1429  * Supports explicitly setting all parameters or only some of them.  An
1430  * unspecified parameter must be passed as NULL.  The values of unspecified
1431  * parameters are copied from those of the passed model configuration (which is
1432  * expected to be the currently applicable configuration, i.e., that of the
1433  * closest ancestor jail that has one).
1434  */
1435 static int
1436 parse_and_set_conf(struct prison *const pr, const char *const rules_string,
1437     const char *const exec_paths_string, const struct conf *const model_conf,
1438     struct parse_error **const parse_error)
1439 {
1440 	struct conf *const conf = new_conf();
1441 	int error = 0;
1442 
1443 	KASSERT(model_conf != NULL ||
1444 	    (rules_string != NULL && exec_paths_string != NULL),
1445 	    ("MAC/do: %s: Model configuration needed!", __func__));
1446 
1447 	if (rules_string != NULL) {
1448 		error = parse_rules(rules_string, &conf->rules, parse_error);
1449 		if (error != 0)
1450 			goto error;
1451 	}
1452 	else
1453 		clone_rules(&conf->rules, &model_conf->rules);
1454 
1455 	if (exec_paths_string != NULL) {
1456 		error = parse_exec_paths(exec_paths_string, &conf->exec_paths,
1457 		    parse_error);
1458 		if (error != 0)
1459 			goto error;
1460 	} else
1461 		clone_exec_paths(&conf->exec_paths,
1462 		    &model_conf->exec_paths);
1463 
1464 	set_conf(pr, conf);
1465 
1466 	MPASS(error == 0 && *parse_error == NULL);
1467 out:
1468 	drop_conf(conf);
1469 	return (error);
1470 error:
1471 	MPASS(error != 0 && *parse_error != NULL);
1472 	goto out;
1473 }
1474 
1475 static int
1476 mac_do_sysctl_rules(SYSCTL_HANDLER_ARGS)
1477 {
1478 	char *const buf = malloc(MAX_RULE_STRING_SIZE, M_MAC_DO, M_WAITOK);
1479 	struct prison *const pr = req->td->td_ucred->cr_prison;
1480 	struct conf *conf;
1481 	struct parse_error *parse_error = NULL;
1482 	int error;
1483 
1484 	conf = find_conf(pr, NULL);
1485 	strlcpy(buf, conf->rules.string, MAX_RULE_STRING_SIZE);
1486 
1487 	error = sysctl_handle_string(oidp, buf, MAX_RULE_STRING_SIZE, req);
1488 	if (error != 0 || req->newptr == NULL)
1489 		goto out;
1490 
1491 	error = parse_and_set_conf(pr, buf, NULL, conf, &parse_error);
1492 	if (error != 0) {
1493 		if (print_parse_error)
1494 			printf("MAC/do: Parse error at index %zu: %s\n",
1495 			    parse_error->pos, parse_error->msg);
1496 		free_parse_error(parse_error);
1497 	}
1498 
1499 out:
1500 	drop_conf(conf);
1501 	free(buf, M_MAC_DO);
1502 	return (error);
1503 }
1504 
1505 SYSCTL_PROC(_security_mac_do, OID_AUTO, rules,
1506     CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_PRISON|CTLFLAG_MPSAFE,
1507     0, 0, mac_do_sysctl_rules, "A",
1508     "Rules");
1509 
1510 
1511 SYSCTL_JAIL_PARAM_SYS_SUBNODE(mac, do, CTLFLAG_RW, "Jail MAC/do parameters");
1512 SYSCTL_JAIL_PARAM_STRING(_mac_do, rules, CTLFLAG_RW, MAX_RULE_STRING_SIZE,
1513     "Jail MAC/do rules");
1514 
1515 static int
1516 mac_do_sysctl_exec_paths(SYSCTL_HANDLER_ARGS)
1517 {
1518 	char *const buf = malloc(MAX_EXEC_PATHS_SIZE, M_MAC_DO, M_WAITOK);
1519 	struct prison *const pr = req->td->td_ucred->cr_prison;
1520 	struct conf *conf;
1521 	struct parse_error *parse_error = NULL;
1522 	int error;
1523 
1524 	conf = find_conf(pr, NULL);
1525 	strlcpy(buf, conf->exec_paths.exec_paths_str, MAX_EXEC_PATHS_SIZE);
1526 
1527 	error = sysctl_handle_string(oidp, buf, MAX_EXEC_PATHS_SIZE, req);
1528 	if (error != 0 || req->newptr == NULL)
1529 		goto out;
1530 
1531 	error = parse_and_set_conf(pr, NULL, buf, conf, &parse_error);
1532 	if (error != 0) {
1533 		if (print_parse_error)
1534 			printf("MAC/do: Parse error at index %zu: %s\n",
1535 			    parse_error->pos, parse_error->msg);
1536 		free_parse_error(parse_error);
1537 	}
1538 
1539 out:
1540 	drop_conf(conf);
1541 	free(buf, M_MAC_DO);
1542 	return (error);
1543 }
1544 
1545 SYSCTL_PROC(_security_mac_do, OID_AUTO, exec_paths,
1546     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1547     0, 0, mac_do_sysctl_exec_paths, "A",
1548     "Colon-separated list of allowed executables");
1549 
1550 SYSCTL_JAIL_PARAM_STRING(_mac_do, exec_paths, CTLFLAG_RW, MAX_EXEC_PATHS_SIZE,
1551     "Jail MAC/do executable paths");
1552 
1553 static int
1554 mac_do_jail_get(void *obj, void *data)
1555 {
1556 	struct prison *const pr = obj;
1557 	struct vfsoptlist *const opts = data;
1558 	struct prison *hpr_out;
1559 	struct conf *const applicable_conf = find_conf(pr, &hpr_out);
1560 	const struct prison *const hpr = hpr_out;
1561 	const struct rules *const rules = &applicable_conf->rules;
1562 	const struct exec_paths *const exec_paths = &applicable_conf->exec_paths;
1563 	int jsys, error;
1564 
1565 	jsys = hpr == pr ? (has_rules(rules) && has_exec_paths(exec_paths) ?
1566 	    JAIL_SYS_NEW : JAIL_SYS_DISABLE) : JAIL_SYS_INHERIT;
1567 
1568 	error = vfs_setopt(opts, "mac.do", &jsys, sizeof(jsys));
1569 	if (error != 0 && error != ENOENT)
1570 		goto done;
1571 
1572 	error = vfs_setopts(opts, "mac.do.rules", rules->string);
1573 	if (error != 0 && error != ENOENT)
1574 		goto done;
1575 
1576 	error = vfs_setopts(opts, "mac.do.exec_paths",
1577 	    exec_paths->exec_paths_str);
1578 	if (error != 0 && error != ENOENT)
1579 		goto done;
1580 
1581 	error = 0;
1582 done:
1583 	drop_conf(applicable_conf);
1584 	return (error);
1585 }
1586 
1587 /*
1588  * -1 is used as a sentinel in mac_do_jail_check() and mac_do_jail_set() below.
1589  */
1590 _Static_assert(-1 != JAIL_SYS_DISABLE && -1 != JAIL_SYS_NEW &&
1591     -1 != JAIL_SYS_INHERIT,
1592     "mac_do(4) uses -1 as a sentinel for uninitialized 'jsys'.");
1593 
1594 /*
1595  * We perform only cheap checks here, i.e., we do not really parse the rules
1596  * specification string, if any.
1597  */
1598 static int
1599 mac_do_jail_check(void *obj, void *data)
1600 {
1601 	struct vfsoptlist *opts = data;
1602 	char *rules_string, *exec_paths_string;
1603 	int error, jsys, rules_size = 0, exec_paths_size = 0;
1604 	bool absent_or_empty_rules, absent_or_empty_exec_paths;
1605 
1606 	error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys));
1607 	if (error == ENOENT)
1608 		/*
1609 		 * Mark unspecified.  Will fill it up below depending on the
1610 		 * other options.
1611 		 */
1612 		jsys = -1;
1613 	else {
1614 		if (error != 0)
1615 			return (error);
1616 		if (jsys != JAIL_SYS_DISABLE && jsys != JAIL_SYS_NEW &&
1617 		    jsys != JAIL_SYS_INHERIT)
1618 			return (EINVAL);
1619 	}
1620 
1621 	/*
1622 	 * We use vfs_getopt() below instead of vfs_getopts() to get the
1623 	 * string's buffer size.  We perform the additional checks done by the
1624 	 * latter here, even if jail_set() calls vfs_getopts() itself later
1625 	 * (they becoming inconsistent wouldn't cause any security problem).
1626 	 */
1627 
1628 	/* Rules. */
1629 	error = vfs_getopt(opts, "mac.do.rules", (void **)&rules_string,
1630 	    &rules_size);
1631 	if (error == ENOENT)
1632 		rules_string = NULL;
1633 	else {
1634 		if (error != 0)
1635 			return (error);
1636 		if (rules_size == 0 || rules_string[rules_size - 1] != '\0') {
1637 			vfs_opterror(opts,
1638 			    "'mac.do.rules' not a proper string");
1639 			return (EINVAL);
1640 		}
1641 		if (rules_size > MAX_RULE_STRING_SIZE) {
1642 			vfs_opterror(opts, "'mac.do.rules' too long");
1643 			return (ENAMETOOLONG);
1644 		}
1645 	}
1646 
1647 	/* Executable paths. */
1648 	error = vfs_getopt(opts, "mac.do.exec_paths",
1649 	    (void **)&exec_paths_string, &exec_paths_size);
1650 	if (error == ENOENT)
1651 		exec_paths_string = NULL;
1652 	else {
1653 		if (error != 0)
1654 			return (error);
1655 		if (exec_paths_size == 0 ||
1656 		    exec_paths_string[exec_paths_size - 1] != '\0') {
1657 			vfs_opterror(opts,
1658 			    "'mac.do.exec_paths' not a proper string");
1659 			return (EINVAL);
1660 		}
1661 		if (exec_paths_size > MAX_EXEC_PATHS_SIZE) {
1662 			vfs_opterror(opts, "'mac.do.exec_paths' too long");
1663 			return (ENAMETOOLONG);
1664 		}
1665 	}
1666 
1667 	absent_or_empty_rules = is_null_or_empty(rules_string);
1668 	absent_or_empty_exec_paths = is_null_or_empty(exec_paths_string);
1669 
1670 	/* If not specified, infer 'jsys' from passed options. */
1671 	if (jsys == -1) {
1672 		/*
1673 		 * Default in absence of "mac.do.rules" and "mac.do.exec_paths"
1674 		 * is to disable.  We never implicitly inherit, as that changes
1675 		 * reasoning about configurations.
1676 		 */
1677 		if (!absent_or_empty_rules || !absent_or_empty_exec_paths)
1678 			jsys = JAIL_SYS_NEW;
1679 		else
1680 			jsys = JAIL_SYS_DISABLE;
1681 	}
1682 
1683 	/* Final checks based on resolved 'jsys'. */
1684 	switch (jsys) {
1685 	case JAIL_SYS_DISABLE:
1686 		/*
1687 		 * Tolerate specified but empty rules or execution paths
1688 		 * (instead of not being specified).  Also, tolerate that one of
1689 		 * them is not empty (but not both).  Indeed, as soon as one is
1690 		 * empty, mac_do(4) is effectively disabled.  This allows the
1691 		 * administrator to still specify a value for one of them, which
1692 		 * is then used for new sub-jails that do not inherit and for
1693 		 * which no value for the parameter is explicitly specified
1694 		 * (because then the value passed here is copied).
1695 		 */
1696 		if (!absent_or_empty_rules && !absent_or_empty_exec_paths) {
1697 			vfs_opterror(opts,
1698 			    "One of 'mac.do.rules' and 'mac_do.exec_paths' "
1699 			    "should not be specified or should be empty when "
1700 			    "'mac.do' is 'disabled'");
1701 			return (EINVAL);
1702 		}
1703 		break;
1704 
1705 	case JAIL_SYS_INHERIT:
1706 		/*
1707 		 * Canonically, no parameters should be specified in this case.
1708 		 * However, we tolerate empty ones, and also non-empty ones
1709 		 * provided they match the inherited values, so that we can
1710 		 * report the *resolved* value of current parameters via
1711 		 * mac_do_jail_get() and have them re-applicable to this jail in
1712 		 * a similar situation.  Testing that inherited values are the
1713 		 * same as passed ones is more expensive than a single test and
1714 		 * requires some atomicity, which is why we do not perform that
1715 		 * here but only in mac_do_jail_set().
1716 		 */
1717 		break;
1718 	}
1719 
1720 	return (0);
1721 }
1722 
1723 static int
1724 mac_do_jail_set(void *obj, void *data)
1725 {
1726 	struct prison *const pr = obj;
1727 	struct vfsoptlist *const opts = data;
1728 	char *rules_string, *exec_paths_string;
1729 	struct parse_error *parse_error = NULL;
1730 	struct conf *model_conf;
1731 	int error, jsys;
1732 	bool absent_or_empty_rules, absent_or_empty_exec_paths;
1733 
1734 	/*
1735 	 * The invariants checks used below correspond to what has already been
1736 	 * checked in jail_check() above.
1737 	 */
1738 
1739 	error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys));
1740 	MPASS(error == 0 || error == ENOENT);
1741 	if (error != 0)
1742 		jsys = -1; /* Mark unfilled. */
1743 
1744 	rules_string = vfs_getopts(opts, "mac.do.rules", &error);
1745 	MPASS(error == 0 || error == ENOENT);
1746 	exec_paths_string = vfs_getopts(opts, "mac.do.exec_paths", &error);
1747 	MPASS(error == 0 || error == ENOENT);
1748 
1749 	absent_or_empty_rules = is_null_or_empty(rules_string);
1750 	absent_or_empty_exec_paths = is_null_or_empty(exec_paths_string);
1751 
1752 	if (jsys == -1) {
1753 		if (!absent_or_empty_rules || !absent_or_empty_exec_paths)
1754 			jsys = JAIL_SYS_NEW;
1755 		else
1756 			jsys = JAIL_SYS_DISABLE;
1757 	}
1758 
1759 	if (jsys == JAIL_SYS_INHERIT) {
1760 		error = 0;
1761 
1762 		if (!absent_or_empty_rules || !absent_or_empty_exec_paths) {
1763 			/*
1764 			 * Some values specified.  Check that they match the
1765 			 * ones we are going to inherit.
1766 			 */
1767 			model_conf = find_conf(pr->pr_parent, NULL);
1768 			if (strcmp(model_conf->rules.string, rules_string)
1769 			    != 0) {
1770 				error = EINVAL;
1771 				vfs_opterror(opts,
1772 				    "'mac.do' is 'inherited' but 'mac.do.rules'"
1773 				    " was specified with a different value "
1774 				    "than the one to be inherited (\"%s\")",
1775 				    model_conf->rules.string);
1776 			}
1777 			if (strcmp(model_conf->exec_paths.exec_paths_str,
1778 			    exec_paths_string) != 0) {
1779 				error = EINVAL;
1780 				vfs_opterror(opts,
1781 				    "'mac.do' is 'inherited' but "
1782 				    "'mac.do.exec_paths' was specified with a "
1783 				    "different value than the one to be "
1784 				    "inherited (\"%s\")",
1785 				    model_conf->exec_paths.exec_paths_str);
1786 			}
1787 			drop_conf(model_conf);
1788 		}
1789 
1790 		if (error == 0)
1791 			/*
1792 			 * There's no TOCTOU problem here as the removal of the
1793 			 * current jail's configuration commutes with changing
1794 			 * the inherited configuration we checked against.
1795 			 */
1796 			remove_conf(pr);
1797 
1798 		return (error);
1799 	}
1800 
1801 	model_conf = NULL;
1802 
1803 	switch (jsys) {
1804 	case JAIL_SYS_DISABLE:
1805 		/*
1806 		 * mac_do(4) is disabled iff one of the parameter's string is
1807 		 * empty.  The parse_and_set_conf() call below treats passing
1808 		 * NULL for a parameter as a flag to copy its value from the
1809 		 * relevant ancestor jail's configuration, so we have to watch
1810 		 * for the final result having an empty parameter if no
1811 		 * parameter has been explicitly passed as empty.  Thanks to
1812 		 * mac_do_jail_check(), we know that at least one parameter is
1813 		 * absent or empty (see the comment for the corresponding case
1814 		 * there).
1815 		 */
1816 		MPASS(absent_or_empty_rules || absent_or_empty_exec_paths);
1817 		if (!absent_or_empty_rules)
1818 			exec_paths_string = "";
1819 		else if (!absent_or_empty_exec_paths)
1820 			rules_string = "";
1821 		else {
1822 			/*
1823 			 * Both are either empty or absent.  If at least one is
1824 			 * absent, we retrieve the applicable configuration as
1825 			 * it will serve as a template (provides default
1826 			 * values).
1827 			 */
1828 			if (rules_string == NULL || exec_paths_string == NULL)
1829 				model_conf = find_conf(pr, NULL);
1830 			/* If both are absent, we have to examine if, in the
1831 			 * currently applicable configuration, one of the
1832 			 * parameters, which we are going to copy, is
1833 			 * effectively empty.  If both of those are non-empty,
1834 			 * we keep the executable paths and empty the rules,
1835 			 * since we expect that this is more convenient to
1836 			 * administrators that may want to enable mac_do(4)
1837 			 * later by just setting new rules.
1838 			 */
1839 			if (rules_string == NULL && exec_paths_string == NULL &&
1840 			    has_rules(&model_conf->rules) &&
1841 			    has_exec_paths(&model_conf->exec_paths))
1842 				rules_string = "";
1843 		}
1844 		break;
1845 
1846 	case JAIL_SYS_NEW:
1847 		/* See the comment before the call to find_conf() above. */
1848 		if (rules_string == NULL || exec_paths_string == NULL)
1849 			model_conf = find_conf(pr, NULL);
1850 		break;
1851 
1852 	default:
1853 		__assert_unreachable();
1854 	}
1855 
1856 	error = parse_and_set_conf(pr, rules_string, exec_paths_string,
1857 	    model_conf, &parse_error);
1858 	if (model_conf != NULL)
1859 		drop_conf(model_conf);
1860 	if (error != 0) {
1861 		vfs_opterror(opts,
1862 		    "MAC/do: Parse error at index %zu: %s\n",
1863 		    parse_error->pos, parse_error->msg);
1864 		free_parse_error(parse_error);
1865 	}
1866 
1867 	return (error);
1868 }
1869 
1870 /*
1871  * OSD jail methods.
1872  *
1873  * There is no PR_METHOD_REMOVE method, as OSD storage is destroyed by the
1874  * common jail code (see prison_cleanup()), which triggers a run of our
1875  * dealloc_jail_osd() destructor.  There is neither a PR_METHOD_CREATE as
1876  * PR_METHOD_SET is called just after (or the created jail destroyed if some
1877  * PR_METHOD_CREATE fails), and our mac_do_jail_set() will ensure a jail is
1878  * properly configured.
1879  */
1880 static const osd_method_t osd_methods[PR_MAXMETHOD] = {
1881 	[PR_METHOD_GET] = mac_do_jail_get,
1882 	[PR_METHOD_CHECK] = mac_do_jail_check,
1883 	[PR_METHOD_SET] = mac_do_jail_set,
1884 };
1885 
1886 
1887 /*
1888  * Common header structure.
1889  *
1890  * Each structure that is used to pass information between some MAC check
1891  * function and priv_grant() must start with this header.
1892  */
1893 struct mac_do_data_header {
1894 	/* Size of the allocated buffer holding the containing structure. */
1895 	size_t		 allocated_size;
1896 	/* Full size of the containing structure. */
1897 	size_t		 size;
1898 	/*
1899 	 * For convenience, we use privilege numbers as an identifier for the
1900 	 * containing structure's type, since there is one distinct privilege
1901 	 * for each privilege changing function we are supporting.  0 in 'priv'
1902 	 * indicates this header is uninitialized.
1903 	 */
1904 	int		 priv;
1905 	/* The configuration that applies. */
1906 	struct conf	*conf;
1907 };
1908 
1909 /*
1910  * The case of unusable or absent per-thread data can actually happen as nothing
1911  * prevents, e.g., priv_check*() with privilege 'priv' to be called standalone,
1912  * as it is currently by, e.g., the Linux emulator for PRIV_CRED_SETUID.  We
1913  * interpret such calls to priv_check*() as full, unrestricted requests for
1914  * 'priv', contrary to what we're doing here for selected operations, and
1915  * consequently will not grant the requested privilege.
1916  *
1917  * Also, we protect ourselves from a concurrent change of 'do_enabled' while
1918  * a call to setcred() is in progress by storing the rules per-thread
1919  * which is then consulted by each successive hook so that they all have
1920  * a coherent view of the specifications, and we empty the slot (actually, mark
1921  * it as empty) when MAC/do is disabled.
1922  */
1923 static int
1924 check_data_usable(const void *const data, const size_t size, const int priv)
1925 {
1926 	const struct mac_do_data_header *const hdr = data;
1927 
1928 	if (hdr == NULL || hdr->priv == 0)
1929 		return (ENOENT);
1930 	/*
1931 	 * Impacting changes in the protocols we are based on...  Don't crash in
1932 	 * production.
1933 	 */
1934 	if (hdr->priv != priv) {
1935 		MPASS(hdr->priv == priv);
1936 		return (EBUSY);
1937 	}
1938 	MPASS(hdr->size == size);
1939 	MPASS(hdr->size <= hdr->allocated_size);
1940 	return (0);
1941 }
1942 
1943 static void
1944 clear_data(void *const data)
1945 {
1946 	struct mac_do_data_header *const hdr = data;
1947 
1948 	if (hdr != NULL) {
1949 		drop_conf(hdr->conf);
1950 		/* We don't deallocate so as to save time on next access. */
1951 		hdr->priv = 0;
1952 	}
1953 }
1954 
1955 static void *
1956 fetch_data(void)
1957 {
1958 	return (osd_thread_get_unlocked(curthread, osd_thread_slot));
1959 }
1960 
1961 static bool
1962 is_data_reusable(const void *const data, const size_t size)
1963 {
1964 	const struct mac_do_data_header *const hdr = data;
1965 
1966 	return (hdr != NULL && size <= hdr->allocated_size);
1967 }
1968 
1969 static void
1970 set_data_header(void *const data, const size_t size, const int priv,
1971     struct conf *const conf)
1972 {
1973 	struct mac_do_data_header *const hdr = data;
1974 
1975 	MPASS(hdr->priv == 0);
1976 	MPASS(priv != 0);
1977 	MPASS(size <= hdr->allocated_size);
1978 	hdr->size = size;
1979 	hdr->priv = priv;
1980 	hdr->conf = conf;
1981 }
1982 
1983 /* The proc lock (and any other non-sleepable lock) must not be held. */
1984 static void *
1985 alloc_data(void *const data, const size_t size)
1986 {
1987 	struct mac_do_data_header *const hdr = realloc(data, size, M_MAC_DO,
1988 	    M_WAITOK);
1989 
1990 	MPASS(size >= sizeof(struct mac_do_data_header));
1991 	hdr->allocated_size = size;
1992 	hdr->priv = 0;
1993 	if (hdr != data) {
1994 		/*
1995 		 * This call either reuses the existing memory allocated for the
1996 		 * slot or tries to allocate some without blocking.
1997 		 */
1998 		int error = osd_thread_set(curthread, osd_thread_slot, hdr);
1999 
2000 		if (error != 0) {
2001 			/* Going to make a M_WAITOK allocation. */
2002 			void **const rsv = osd_reserve(osd_thread_slot);
2003 
2004 			error = osd_thread_set_reserved(curthread,
2005 			    osd_thread_slot, rsv, hdr);
2006 			MPASS(error == 0);
2007 		}
2008 	}
2009 	return (hdr);
2010 }
2011 
2012 /* Destructor for 'osd_thread_slot'. */
2013 static void
2014 dealloc_thread_osd(void *const value)
2015 {
2016 	free(value, M_MAC_DO);
2017 }
2018 
2019 /*
2020  * Whether to grant access to some primary group according to flags.
2021  *
2022  * The passed 'flags' must be those of a rule's matching GID, or the IT_GID type
2023  * flags when MDF_CURRENT has been matched.
2024  *
2025  * Return values:
2026  * - 0:			Access granted.
2027  * - EJUSTRETURN:	Flags are agnostic.
2028  */
2029 static int
2030 grant_primary_group_from_flags(const flags_t flags)
2031 {
2032 	return ((flags & MDF_PRIMARY) != 0 ? 0 : EJUSTRETURN);
2033 }
2034 
2035 /*
2036  * Same as grant_primary_group_from_flags(), but for supplementary groups.
2037  *
2038  * Return values:
2039  * - 0:			Access granted.
2040  * - EJUSTRETURN:	Flags are agnostic.
2041  * - EPERM:		Access denied.
2042  */
2043 static int __unused
2044 grant_supplementary_group_from_flags(const flags_t flags)
2045 {
2046 	if ((flags & MDF_SUPP_MASK) != 0)
2047 		return ((flags & MDF_SUPP_DONT) != 0 ? EPERM : 0);
2048 
2049 	return (EJUSTRETURN);
2050 }
2051 
2052 static int
2053 rule_grant_supplementary_groups(const struct rule *const rule,
2054     const struct ucred *const old_cred, const struct ucred *const new_cred)
2055 {
2056 	const gid_t *const old_groups = old_cred->cr_groups;
2057 	const gid_t *const new_groups = new_cred->cr_groups;
2058 	const int old_ngroups = old_cred->cr_ngroups;
2059 	const int new_ngroups = new_cred->cr_ngroups;
2060 	const flags_t gid_flags = rule->gid_flags;
2061 	const bool current_has_supp = (gid_flags & MDF_CURRENT) != 0 &&
2062 	    (gid_flags & MDF_SUPP_MASK) != 0;
2063 	id_nb_t rule_idx = 0;
2064 	int old_idx = 0, new_idx = 0;
2065 
2066 	if ((gid_flags & MDF_ANY_SUPP) != 0 &&
2067 	    (gid_flags & MDF_MAY_REJ_SUPP) == 0)
2068 		/*
2069 		 * Any set of supplementary groups is accepted, no need to loop
2070 		 * over them.
2071 		 */
2072 		return (0);
2073 
2074 	for (; new_idx < new_ngroups; ++new_idx) {
2075 		const gid_t gid = new_groups[new_idx];
2076 		bool may_accept = false;
2077 
2078 		if ((gid_flags & MDF_ANY_SUPP) != 0)
2079 			may_accept = true;
2080 
2081 		/* Do we have to check for the current supplementary groups? */
2082 		if (current_has_supp) {
2083 			/*
2084 			 * Linear search, as both supplementary groups arrays
2085 			 * are sorted.  Advancing 'old_idx' with a binary search
2086 			 * on absence of MDF_SUPP_MUST doesn't seem worth it in
2087 			 * practice.
2088 			 */
2089 			for (; old_idx < old_ngroups; ++old_idx) {
2090 				const gid_t old_gid = old_groups[old_idx];
2091 
2092 				if (old_gid < gid) {
2093 					/* Mandatory but absent. */
2094 					if ((gid_flags & MDF_SUPP_MUST) != 0)
2095 						return (EPERM);
2096 				} else if (old_gid == gid) {
2097 					switch (gid_flags & MDF_SUPP_MASK) {
2098 					case MDF_SUPP_DONT:
2099 						/* Present but forbidden. */
2100 						return (EPERM);
2101 					case MDF_SUPP_ALLOW:
2102 					case MDF_SUPP_MUST:
2103 						may_accept = true;
2104 						break;
2105 					default:
2106 #ifdef INVARIANTS
2107 						__assert_unreachable();
2108 #else
2109 						/* Better be safe than sorry. */
2110 						return (EPERM);
2111 #endif
2112 					}
2113 					++old_idx;
2114 					break;
2115 				}
2116 				else
2117 					break;
2118 			}
2119 		}
2120 
2121 		/*
2122 		 * Search by GID for a corresponding 'struct id_spec'.
2123 		 *
2124 		 * Again, linear search, with same note on not using binary
2125 		 * search optimization as above (the trigger would be absence of
2126 		 * MDF_EXPLICIT_SUPP_MUST this time).
2127 		 */
2128 		for (; rule_idx < rule->gids_nb; ++rule_idx) {
2129 			const struct id_spec is = rule->gids[rule_idx];
2130 
2131 			if (is.id < gid) {
2132 				/* Mandatory but absent. */
2133 				if ((is.flags & MDF_SUPP_MUST) != 0)
2134 					return (EPERM);
2135 			} else if (is.id == gid) {
2136 				switch (is.flags & MDF_SUPP_MASK) {
2137 				case MDF_SUPP_DONT:
2138 					/* Present but forbidden. */
2139 					return (EPERM);
2140 				case MDF_SUPP_ALLOW:
2141 				case MDF_SUPP_MUST:
2142 					may_accept = true;
2143 					break;
2144 				case 0:
2145 					/* Primary group only. */
2146 					break;
2147 				default:
2148 #ifdef INVARIANTS
2149 					__assert_unreachable();
2150 #else
2151 					/* Better be safe than sorry. */
2152 					return (EPERM);
2153 #endif
2154 				}
2155 				++rule_idx;
2156 				break;
2157 			}
2158 			else
2159 				break;
2160 		}
2161 
2162 		/* 'gid' wasn't explicitly accepted. */
2163 		if (!may_accept)
2164 			return (EPERM);
2165 	}
2166 
2167 	/*
2168 	 * If we must have all current groups and we didn't browse all
2169 	 * of them at this point (because the remaining ones have GIDs
2170 	 * greater than the last requested group), we are simply missing
2171 	 * them.
2172 	 */
2173 	if ((gid_flags & MDF_CURRENT) != 0 &&
2174 	    (gid_flags & MDF_SUPP_MUST) != 0 &&
2175 	    old_idx < old_ngroups)
2176 		return (EPERM);
2177 	/*
2178 	 * Similarly, we have to finish browsing all GIDs from the rule
2179 	 * in case some are marked mandatory.
2180 	 */
2181 	if ((gid_flags & MDF_EXPLICIT_SUPP_MUST) != 0) {
2182 		for (; rule_idx < rule->gids_nb; ++rule_idx) {
2183 			const struct id_spec is = rule->gids[rule_idx];
2184 
2185 			if ((is.flags & MDF_SUPP_MUST) != 0)
2186 				return (EPERM);
2187 		}
2188 	}
2189 
2190 	return (0);
2191 }
2192 
2193 static int
2194 rule_grant_primary_group(const struct rule *const rule,
2195     const struct ucred *const old_cred, const gid_t gid)
2196 {
2197 	struct id_spec gid_is = {.flags = 0};
2198 	const struct id_spec *found_is;
2199 	int error;
2200 
2201 	if ((rule->gid_flags & MDF_ANY) != 0)
2202 		return (0);
2203 
2204 	/* Was MDF_CURRENT specified, and is 'gid' a current GID? */
2205 	if ((rule->gid_flags & MDF_CURRENT) != 0 &&
2206 	    group_is_primary(gid, old_cred)) {
2207 		error = grant_primary_group_from_flags(rule->gid_flags);
2208 		if (error == 0)
2209 			return (0);
2210 	}
2211 
2212 	/* Search by GID for a corresponding 'struct id_spec'. */
2213 	gid_is.id = gid;
2214 	found_is = bsearch(&gid_is, rule->gids, rule->gids_nb,
2215 	    sizeof(*rule->gids), id_spec_cmp);
2216 
2217 	if (found_is != NULL) {
2218 		error = grant_primary_group_from_flags(found_is->flags);
2219 		if (error == 0)
2220 			return (0);
2221 	}
2222 
2223 	return (EPERM);
2224 }
2225 
2226 static int
2227 rule_grant_primary_groups(const struct rule *const rule,
2228     const struct ucred *const old_cred, const struct ucred *const new_cred)
2229 {
2230 	int error;
2231 
2232 	/* Shortcut. */
2233 	if ((rule->gid_flags & MDF_ANY) != 0)
2234 		return (0);
2235 
2236 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_gid);
2237 	if (error != 0)
2238 		return (error);
2239 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_rgid);
2240 	if (error != 0)
2241 		return (error);
2242 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_svgid);
2243 	if (error != 0)
2244 		return (error);
2245 	return (0);
2246 }
2247 
2248 static bool
2249 user_is_current(const uid_t uid, const struct ucred *const old_cred)
2250 {
2251 	return (uid == old_cred->cr_uid || uid == old_cred->cr_ruid ||
2252 	    uid == old_cred->cr_svuid);
2253 }
2254 
2255 static int
2256 rule_grant_user(const struct rule *const rule,
2257     const struct ucred *const old_cred, const uid_t uid)
2258 {
2259 	struct id_spec uid_is = {.flags = 0};
2260 	const struct id_spec *found_is;
2261 
2262 	if ((rule->uid_flags & MDF_ANY) != 0)
2263 		return (0);
2264 
2265 	/* Was MDF_CURRENT specified, and is 'uid' a current UID? */
2266 	if ((rule->uid_flags & MDF_CURRENT) != 0 &&
2267 	    user_is_current(uid, old_cred))
2268 		return (0);
2269 
2270 	/* Search by UID for a corresponding 'struct id_spec'. */
2271 	uid_is.id = uid;
2272 	found_is = bsearch(&uid_is, rule->uids, rule->uids_nb,
2273 	    sizeof(*rule->uids), id_spec_cmp);
2274 
2275 	if (found_is != NULL)
2276 		return (0);
2277 
2278 	return (EPERM);
2279 }
2280 
2281 static int
2282 rule_grant_users(const struct rule *const rule,
2283     const struct ucred *const old_cred, const struct ucred *const new_cred)
2284 {
2285 	int error;
2286 
2287 	/* Shortcut. */
2288 	if ((rule->uid_flags & MDF_ANY) != 0)
2289 		return (0);
2290 
2291 	error = rule_grant_user(rule, old_cred, new_cred->cr_uid);
2292 	if (error != 0)
2293 		return (error);
2294 	error = rule_grant_user(rule, old_cred, new_cred->cr_ruid);
2295 	if (error != 0)
2296 		return (error);
2297 	error = rule_grant_user(rule, old_cred, new_cred->cr_svuid);
2298 	if (error != 0)
2299 		return (error);
2300 
2301 	return (0);
2302 }
2303 
2304 static int
2305 rule_grant_setcred(const struct rule *const rule,
2306     const struct ucred *const old_cred, const struct ucred *const new_cred)
2307 {
2308 	int error;
2309 
2310 	error = rule_grant_users(rule, old_cred, new_cred);
2311 	if (error != 0)
2312 		return (error);
2313 	error = rule_grant_primary_groups(rule, old_cred, new_cred);
2314 	if (error != 0)
2315 		return (error);
2316 	error = rule_grant_supplementary_groups(rule, old_cred, new_cred);
2317 	if (error != 0)
2318 		return (error);
2319 
2320 	return (0);
2321 }
2322 
2323 static bool
2324 rule_applies(const struct rule *const rule, const struct ucred *const cred)
2325 {
2326 	if (rule->from_type == IT_UID && rule->from_id == cred->cr_ruid)
2327 		return (true);
2328 	if (rule->from_type == IT_GID && realgroupmember(rule->from_id, cred))
2329 		return (true);
2330 	return (false);
2331 }
2332 
2333 /*
2334  * To pass data between check_setcred() and priv_grant() (on PRIV_CRED_SETCRED).
2335  */
2336 struct mac_do_setcred_data {
2337 	struct mac_do_data_header hdr;
2338 	const struct ucred *new_cred;
2339 	u_int setcred_flags;
2340 };
2341 
2342 static int
2343 mac_do_priv_grant(struct ucred *cred, int priv)
2344 {
2345 	struct mac_do_setcred_data *const data = fetch_data();
2346 	struct rules *rules;
2347 	const struct ucred *new_cred;
2348 	const struct rule *rule;
2349 	u_int setcred_flags;
2350 	int error;
2351 
2352 	/* Bail out fast if we aren't concerned. */
2353 	if (priv != PRIV_CRED_SETCRED)
2354 		return (EPERM);
2355 
2356 	/*
2357 	 * Do we have to do something?
2358 	 */
2359 	if (check_data_usable(data, sizeof(*data), priv) != 0)
2360 		/* No. */
2361 		return (EPERM);
2362 
2363 	rules = &data->hdr.conf->rules;
2364 	new_cred = data->new_cred;
2365 	KASSERT(new_cred != NULL,
2366 	    ("priv_check*() called before mac_cred_check_setcred()"));
2367 	setcred_flags = data->setcred_flags;
2368 
2369 	/*
2370 	 * Explicitly check that only the flags we currently support are present
2371 	 * in order to avoid accepting transitions with other changes than those
2372 	 * we are actually going to check.  Currently, this rules out the
2373 	 * SETCREDF_MAC_LABEL flag.  This may be improved by adding code
2374 	 * actually checking whether the requested label and the current one
2375 	 * would differ.
2376 	 */
2377 	if ((setcred_flags & ~(SETCREDF_UID | SETCREDF_RUID | SETCREDF_SVUID |
2378 	    SETCREDF_GID | SETCREDF_RGID | SETCREDF_SVGID |
2379 	    SETCREDF_SUPP_GROUPS)) != 0)
2380 		return (EPERM);
2381 
2382 	/*
2383 	 * Browse rules, and for those that match the requestor, call specific
2384 	 * privilege granting functions interpreting the "to"/"target" part.
2385 	 */
2386 	error = EPERM;
2387 	STAILQ_FOREACH(rule, &rules->head, r_entries)
2388 	    if (rule_applies(rule, cred)) {
2389 		    error = rule_grant_setcred(rule, cred, new_cred);
2390 		    if (error != EPERM)
2391 			    break;
2392 	    }
2393 
2394 	return (error);
2395 }
2396 
2397 static int
2398 check_proc(void)
2399 {
2400 	struct prison *const pr = curproc->p_ucred->cr_prison;
2401 	char *path, *to_free;
2402 	struct conf *conf;
2403 	struct exec_paths *exec_paths;
2404 	int error;
2405 
2406 	/*
2407 	 * Only grant privileges if requested by the right executable.
2408 	 *
2409 	 * As MAC/do configuration is per-jail, in order to avoid confused
2410 	 * deputy situations in chroots (privileged or unprivileged), make sure
2411 	 * to check the path from the current jail's root.
2412 	 *
2413 	 * XXXOC: We may want to base this check on a tunable path and/or
2414 	 * a specific MAC label.  Going even further, e.g., envisioning to
2415 	 * completely replace the path check with the latter, we would need to
2416 	 * install FreeBSD on a FS with multilabel enabled by default, which in
2417 	 * practice entails adding an option to ZFS to set MNT_MULTILABEL
2418 	 * automatically on mounts, ensuring that root (and more if using
2419 	 * different partitions) ZFS or UFS filesystems are created with
2420 	 * multilabel turned on, and having the installation procedure support
2421 	 * setting a MAC label per file (perhaps via additions to mtree(1)).  So
2422 	 * this probably isn't going to happen overnight, if ever.
2423 	 */
2424 	if (vn_fullpath_jail(curproc->p_textvp, &path, &to_free) != 0)
2425 		return (EPERM);
2426 
2427 	error = EPERM;
2428 	conf = find_conf(pr, NULL);
2429 	exec_paths = &conf->exec_paths;
2430 
2431 	for (int i = 0; i < exec_paths->exec_path_count; i++)
2432 		if (strcmp(exec_paths->exec_paths[i], path) == 0) {
2433 			error = 0;
2434 			break;
2435 		}
2436 
2437 	drop_conf(conf);
2438 	free(to_free, M_TEMP);
2439 	return (error);
2440 }
2441 
2442 static void
2443 mac_do_setcred_enter(void)
2444 {
2445 	struct prison *const pr = curproc->p_ucred->cr_prison;
2446 	struct mac_do_setcred_data * data;
2447 	struct conf *conf;
2448 	int error;
2449 
2450 	/*
2451 	 * If not enabled, don't prepare data.  Other hooks will check for that
2452 	 * to know if they have to do something.
2453 	 */
2454 	if (do_enabled == 0)
2455 		return;
2456 
2457 	/*
2458 	 * MAC/do only applies to a process launched from a given executable.
2459 	 * For other processes, we just won't intervene (we don't deny requests,
2460 	 * nor do we grant privileges to them).
2461 	 */
2462 	error = check_proc();
2463 	if (error != 0)
2464 		return;
2465 
2466 	/*
2467 	 * Find the currently applicable rules.
2468 	 */
2469 	conf = find_conf(pr, NULL);
2470 
2471 	/*
2472 	 * Setup thread data to be used by other hooks.
2473 	 */
2474 	data = fetch_data();
2475 	if (!is_data_reusable(data, sizeof(*data)))
2476 		data = alloc_data(data, sizeof(*data));
2477 	set_data_header(data, sizeof(*data), PRIV_CRED_SETCRED, conf);
2478 	/* Not really necessary, but helps to catch programming errors. */
2479 	data->new_cred = NULL;
2480 	data->setcred_flags = 0;
2481 }
2482 
2483 static int
2484 mac_do_check_setcred(u_int flags, const struct ucred *const old_cred,
2485     struct ucred *const new_cred)
2486 {
2487 	struct mac_do_setcred_data *const data = fetch_data();
2488 
2489 	/*
2490 	 * Do we have to do something?
2491 	 */
2492 	if (check_data_usable(data, sizeof(*data), PRIV_CRED_SETCRED) != 0)
2493 		/* No. */
2494 		return (0);
2495 
2496 	/*
2497 	 * Keep track of the setcred() flags and the new credentials for
2498 	 * priv_check*().
2499 	 */
2500 	data->new_cred = new_cred;
2501 	data->setcred_flags = flags;
2502 
2503 	return (0);
2504 }
2505 
2506 static void
2507 mac_do_setcred_exit(void)
2508 {
2509 	struct mac_do_setcred_data *const data = fetch_data();
2510 
2511 	if (check_data_usable(data, sizeof(*data), PRIV_CRED_SETCRED) == 0)
2512 		/*
2513 		 * This doesn't deallocate the small per-thread data storage,
2514 		 * which can be reused on subsequent calls.  (That data is of
2515 		 * course deallocated as the current thread dies or this module
2516 		 * is unloaded.)
2517 		 */
2518 		clear_data(data);
2519 }
2520 
2521 static void
2522 mac_do_init(struct mac_policy_conf *mpc)
2523 {
2524 	struct prison *pr;
2525 
2526 	osd_jail_slot = osd_jail_register(dealloc_jail_osd, osd_methods);
2527 	set_default_conf(&prison0);
2528 	sx_slock(&allprison_lock);
2529 	TAILQ_FOREACH(pr, &allprison, pr_list)
2530 	    set_default_conf(pr);
2531 	sx_sunlock(&allprison_lock);
2532 
2533 	osd_thread_slot = osd_thread_register(dealloc_thread_osd);
2534 }
2535 
2536 static void
2537 mac_do_destroy(struct mac_policy_conf *mpc)
2538 {
2539 	/*
2540 	 * osd_thread_deregister() must be called before osd_jail_deregister(),
2541 	 * for the reason explained in dealloc_jail_osd().
2542 	 */
2543 	osd_thread_deregister(osd_thread_slot);
2544 	osd_jail_deregister(osd_jail_slot);
2545 }
2546 
2547 static struct mac_policy_ops do_ops = {
2548 	.mpo_init = mac_do_init,
2549 	.mpo_destroy = mac_do_destroy,
2550 	.mpo_cred_setcred_enter = mac_do_setcred_enter,
2551 	.mpo_cred_check_setcred = mac_do_check_setcred,
2552 	.mpo_cred_setcred_exit = mac_do_setcred_exit,
2553 	.mpo_priv_grant = mac_do_priv_grant,
2554 };
2555 
2556 MAC_POLICY_SET(&do_ops, mac_do, "MAC/do", MPC_LOADTIME_FLAG_UNLOADOK, NULL);
2557 MODULE_VERSION(mac_do, 1);
2558