xref: /freebsd/sys/security/mac_do/mac_do.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright(c) 2024 Baptiste Daroussin <bapt@FreeBSD.org>
5  * Copyright (c) 2024 The FreeBSD Foundation
6  *
7  * Portions of this software were developed by Olivier Certner
8  * <olce.freebsd@certner.fr> at Kumacom SARL under sponsorship from the FreeBSD
9  * Foundation.
10  */
11 
12 #include <sys/param.h>
13 #include <sys/systm.h>
14 #include <sys/ctype.h>
15 #include <sys/jail.h>
16 #include <sys/kernel.h>
17 #include <sys/limits.h>
18 #include <sys/lock.h>
19 #include <sys/malloc.h>
20 #include <sys/module.h>
21 #include <sys/mount.h>
22 #include <sys/mutex.h>
23 #include <sys/priv.h>
24 #include <sys/proc.h>
25 #include <sys/refcount.h>
26 #include <sys/socket.h>
27 #include <sys/sx.h>
28 #include <sys/sysctl.h>
29 #include <sys/ucred.h>
30 #include <sys/vnode.h>
31 
32 #include <machine/stdarg.h>
33 
34 #include <security/mac/mac_policy.h>
35 
36 static SYSCTL_NODE(_security_mac, OID_AUTO, do,
37     CTLFLAG_RW|CTLFLAG_MPSAFE, 0, "mac_do policy controls");
38 
39 static int	do_enabled = 1;
40 SYSCTL_INT(_security_mac_do, OID_AUTO, enabled, CTLFLAG_RWTUN,
41     &do_enabled, 0, "Enforce do policy");
42 
43 static int	print_parse_error = 1;
44 SYSCTL_INT(_security_mac_do, OID_AUTO, print_parse_error, CTLFLAG_RWTUN,
45     &print_parse_error, 0, "Print parse errors on setting rules "
46     "(via sysctl(8)).");
47 
48 static MALLOC_DEFINE(M_DO, "do_rule", "Rules for mac_do");
49 
50 #define MAC_RULE_STRING_LEN	1024
51 
52 static unsigned		osd_jail_slot;
53 static unsigned		osd_thread_slot;
54 
55 #define IT_INVALID	0 /* Must stay 0. */
56 #define IT_UID		1
57 #define IT_GID		2
58 #define IT_ANY		3
59 #define IT_LAST		IT_ANY
60 
61 static const char *id_type_to_str[] = {
62 	[IT_INVALID]	= "invalid",
63 	[IT_UID]	= "uid",
64 	[IT_GID]	= "gid",
65 	/* See also parse_id_type(). */
66 	[IT_ANY]	= "*",
67 };
68 
69 #define PARSE_ERROR_SIZE	256
70 
71 struct parse_error {
72 	size_t	pos;
73 	char	msg[PARSE_ERROR_SIZE];
74 };
75 
76 /*
77  * We assume that 'uid_t' and 'gid_t' are aliases to 'u_int' in conversions
78  * required for parsing rules specification strings.
79  */
80 _Static_assert(sizeof(uid_t) == sizeof(u_int) && (uid_t)-1 >= 0 &&
81     sizeof(gid_t) == sizeof(u_int) && (gid_t)-1 >= 0,
82     "mac_do(4) assumes that 'uid_t' and 'gid_t' are aliases to 'u_int'");
83 
84 /*
85  * Internal flags.
86  *
87  * They either apply as per-type (t) or per-ID (i) but are conflated because all
88  * per-ID flags are also valid as per-type ones to qualify the "current" (".")
89  * per-type flag.  Also, some of them are in fact exclusive, but we use one-hot
90  * encoding for simplicity.
91  *
92  * There is currently room for "only" 16 bits.  As these flags are purely
93  * internal, they can be renumbered and/or their type changed as needed.
94  *
95  * See also the check_*() functions below.
96  */
97 typedef uint16_t	flags_t;
98 
99 /* (i,gid) Specification concerns primary groups. */
100 #define MDF_PRIMARY	(1u << 0)
101 /* (i,gid) Specification concerns supplementary groups. */
102 #define MDF_SUPP_ALLOW	(1u << 1)
103 /* (i,gid) Group must appear as a supplementary group. */
104 #define MDF_SUPP_MUST	(1u << 2)
105 /* (i,gid) Group must not appear as a supplementary group. */
106 #define MDF_SUPP_DONT	(1u << 3)
107 #define MDF_SUPP_MASK	(MDF_SUPP_ALLOW | MDF_SUPP_MUST | MDF_SUPP_DONT)
108 #define MDF_ID_MASK	(MDF_PRIMARY | MDF_SUPP_MASK)
109 
110 /*
111  * (t) All IDs allowed.
112  *
113  * For GIDs, MDF_ANY only concerns primary groups.  The MDF_PRIMARY and
114  * MDF_SUPP_* flags never apply to MDF_ANY, but can be present if MDF_CURRENT is
115  * present also, as usual.
116  */
117 #define MDF_ANY			(1u << 8)
118 /* (t) Current IDs allowed. */
119 #define MDF_CURRENT		(1u << 9)
120 #define MDF_TYPE_COMMON_MASK	(MDF_ANY | MDF_CURRENT)
121 /* (t,gid) All IDs allowed as supplementary groups. */
122 #define MDF_ANY_SUPP		(1u << 10)
123 /* (t,gid) Some ID or MDF_CURRENT has MDF_SUPP_MUST or MDF_SUPP_DONT. */
124 #define MDF_MAY_REJ_SUPP	(1u << 11)
125 /* (t,gid) Some explicit ID (not MDF_CURRENT) has MDF_SUPP_MUST. */
126 #define MDF_EXPLICIT_SUPP_MUST	(1u << 12)
127 /* (t,gid) Whether any target clause is about primary groups.  Used during
128  * parsing only. */
129 #define MDF_HAS_PRIMARY_CLAUSE	(1u << 13)
130 /* (t,gid) Whether any target clause is about supplementary groups.  Used during
131  * parsing only. */
132 #define MDF_HAS_SUPP_CLAUSE	(1u << 14)
133 #define MDF_TYPE_GID_MASK	(MDF_ANY_SUPP | MDF_MAY_REJ_SUPP |	\
134     MDF_EXPLICIT_SUPP_MUST | MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE)
135 #define MDF_TYPE_MASK		(MDF_TYPE_COMMON_MASK | MDF_TYPE_GID_MASK)
136 
137 /*
138  * Persistent structures.
139  */
140 
141 struct id_spec {
142 	u_int		 id;
143 	flags_t		 flags; /* See MDF_* above. */
144 };
145 
146 /*
147  * This limits the number of target clauses per type to 65535.  With the current
148  * value of MAC_RULE_STRING_LEN (1024), this is way more than enough anyway.
149  */
150 typedef uint16_t	 id_nb_t;
151 /* We only have a few IT_* types. */
152 typedef uint16_t	 id_type_t;
153 
154 struct rule {
155 	STAILQ_ENTRY(rule) r_entries;
156 	id_type_t	 from_type;
157 	u_int		 from_id;
158 	flags_t		 uid_flags; /* See MDF_* above. */
159 	id_nb_t		 uids_nb;
160 	flags_t		 gid_flags; /* See MDF_* above. */
161 	id_nb_t		 gids_nb;
162 	struct id_spec	*uids;
163 	struct id_spec	*gids;
164 };
165 
166 STAILQ_HEAD(rulehead, rule);
167 
168 struct rules {
169 	char		string[MAC_RULE_STRING_LEN];
170 	struct rulehead	head;
171 	volatile u_int	use_count __aligned(CACHE_LINE_SIZE);
172 };
173 
174 /*
175  * Temporary structures used to build a 'struct rule' above.
176  */
177 
178 struct id_elem {
179 	STAILQ_ENTRY(id_elem) ie_entries;
180 	struct id_spec spec;
181 };
182 
183 STAILQ_HEAD(id_list, id_elem);
184 
185 #ifdef INVARIANTS
186 static void
187 check_type(const id_type_t type)
188 {
189 	if (type > IT_LAST)
190 		panic("Invalid type number %u", type);
191 }
192 
193 static void
194 panic_for_unexpected_flags(const id_type_t type, const flags_t flags,
195     const char *const str)
196 {
197 	panic("ID type %s: Unexpected flags %u (%s), ", id_type_to_str[type],
198 	    flags, str);
199 }
200 
201 static void
202 check_type_and_id_flags(const id_type_t type, const flags_t flags)
203 {
204 	const char *str;
205 
206 	check_type(type);
207 	switch (type) {
208 	case IT_UID:
209 		if (flags != 0) {
210 			str = "only 0 allowed";
211 			goto unexpected_flags;
212 		}
213 		break;
214 	case IT_GID:
215 		if ((flags & ~MDF_ID_MASK) != 0) {
216 			str = "only bits in MDF_ID_MASK allowed";
217 			goto unexpected_flags;
218 		}
219 		if (!powerof2(flags & MDF_SUPP_MASK)) {
220 			str = "only a single flag in MDF_SUPP_MASK allowed";
221 			goto unexpected_flags;
222 		}
223 		break;
224 	default:
225 	    __assert_unreachable();
226 	}
227 	return;
228 
229 unexpected_flags:
230 	panic_for_unexpected_flags(type, flags, str);
231 }
232 
233 static void
234 check_type_and_id_spec(const id_type_t type, const struct id_spec *const is)
235 {
236 	check_type_and_id_flags(type, is->flags);
237 }
238 
239 static void
240 check_type_and_type_flags(const id_type_t type, const flags_t flags)
241 {
242 	const char *str;
243 
244 	check_type_and_id_flags(type, flags & MDF_ID_MASK);
245 	if ((flags & ~MDF_ID_MASK & ~MDF_TYPE_MASK) != 0) {
246 		str = "only MDF_ID_MASK | MDF_TYPE_MASK bits allowed";
247 		goto unexpected_flags;
248 	}
249 	if ((flags & MDF_ANY) != 0 && (flags & MDF_CURRENT) != 0 &&
250 	    (type == IT_UID || (flags & MDF_PRIMARY) != 0)) {
251 		str = "MDF_ANY and MDF_CURRENT are exclusive for UIDs "
252 		    "or primary group GIDs";
253 		goto unexpected_flags;
254 	}
255 	if ((flags & MDF_ANY_SUPP) != 0 && (flags & MDF_CURRENT) != 0 &&
256 	    (flags & MDF_SUPP_MASK) != 0) {
257 		str = "MDF_SUPP_ANY and MDF_CURRENT with supplementary "
258 		    "groups specification are exclusive";
259 		goto unexpected_flags;
260 	}
261 	if (((flags & MDF_PRIMARY) != 0 || (flags & MDF_ANY) != 0) &&
262 	    (flags & MDF_HAS_PRIMARY_CLAUSE) == 0) {
263 		str = "Presence of folded primary clause not reflected "
264 		    "by presence of MDF_HAS_PRIMARY_CLAUSE";
265 		goto unexpected_flags;
266 	}
267 	if (((flags & MDF_SUPP_MASK) != 0 || (flags & MDF_ANY_SUPP) != 0) &&
268 	    (flags & MDF_HAS_SUPP_CLAUSE) == 0) {
269 		str = "Presence of folded supplementary clause not reflected "
270 		    "by presence of MDF_HAS_SUPP_CLAUSE";
271 		goto unexpected_flags;
272 	}
273 	return;
274 
275 unexpected_flags:
276 	panic_for_unexpected_flags(type, flags, str);
277 }
278 #else /* !INVARIANTS */
279 #define check_type_and_id_flags(...)
280 #define check_type_and_id_spec(...)
281 #define check_type_and_type_flags(...)
282 #endif /* INVARIANTS */
283 
284 /*
285  * Returns EALREADY if both flags have some overlap, or EINVAL if flags are
286  * incompatible, else 0 with flags successfully merged into 'dest'.
287  */
288 static int
289 coalesce_id_flags(const flags_t src, flags_t *const dest)
290 {
291 	flags_t res;
292 
293 	if ((src & *dest) != 0)
294 		return (EALREADY);
295 
296 	res = src | *dest;
297 
298 	/* Check for compatibility of supplementary flags, and coalesce. */
299 	if ((res & MDF_SUPP_MASK) != 0) {
300 		/* MDF_SUPP_DONT incompatible with the rest. */
301 		if ((res & MDF_SUPP_DONT) != 0 && (res & MDF_SUPP_MASK &
302 		    ~MDF_SUPP_DONT) != 0)
303 			return (EINVAL);
304 		/*
305 		 * Coalesce MDF_SUPP_ALLOW and MDF_SUPP_MUST into MDF_SUPP_MUST.
306 		 */
307 		if ((res & MDF_SUPP_ALLOW) != 0 && (res & MDF_SUPP_MUST) != 0)
308 			res &= ~MDF_SUPP_ALLOW;
309 	}
310 
311 	*dest = res;
312 	return (0);
313 }
314 
315 static void
316 toast_rules(struct rules *const rules)
317 {
318 	struct rulehead *const head = &rules->head;
319 	struct rule *rule, *rule_next;
320 
321 	STAILQ_FOREACH_SAFE(rule, head, r_entries, rule_next) {
322 		free(rule->uids, M_DO);
323 		free(rule->gids, M_DO);
324 		free(rule, M_DO);
325 	}
326 	free(rules, M_DO);
327 }
328 
329 static struct rules *
330 alloc_rules(void)
331 {
332 	struct rules *const rules = malloc(sizeof(*rules), M_DO, M_WAITOK);
333 
334 	_Static_assert(MAC_RULE_STRING_LEN > 0, "MAC_RULE_STRING_LEN <= 0!");
335 	rules->string[0] = 0;
336 	STAILQ_INIT(&rules->head);
337 	rules->use_count = 0;
338 	return (rules);
339 }
340 
341 static bool
342 is_null_or_empty(const char *s)
343 {
344 	return (s == NULL || s[0] == '\0');
345 }
346 
347 /*
348  * String to unsigned int.
349  *
350  * Contrary to the "standard" strtou*() family of functions, do not tolerate
351  * spaces at start nor an empty string, and returns a status code, the 'u_int'
352  * result being returned through a passed pointer (if no error).
353  *
354  * We detour through 'quad_t' because in-kernel strto*() functions cannot set
355  * 'errno' and thus can't distinguish a true maximum value from one returned
356  * because of overflow.  We use 'quad_t' instead of 'u_quad_t' to support
357  * negative specifications (e.g., such as "-1" for UINT_MAX).
358  */
359 static int
360 strtoui_strict(const char *const restrict s, const char **const restrict endptr,
361     int base, u_int *result)
362 {
363 	char *ep;
364 	quad_t q;
365 
366 	/* Rule out spaces and empty specifications. */
367 	if (s[0] == '\0' || isspace(s[0])) {
368 		if (endptr != NULL)
369 			*endptr = s;
370 		return (EINVAL);
371 	}
372 
373 	q = strtoq(s, &ep, base);
374 	if (endptr != NULL)
375 		*endptr = ep;
376 	if (q < 0) {
377 		/* We allow specifying a negative number. */
378 		if (q < -(quad_t)UINT_MAX - 1 || q == QUAD_MIN)
379 			return (EOVERFLOW);
380 	} else {
381 		if (q > UINT_MAX || q == UQUAD_MAX)
382 			return (EOVERFLOW);
383 	}
384 
385 	*result = (u_int)q;
386 	return (0);
387 }
388 
389 /*
390  * strsep() variant skipping spaces and tabs.
391  *
392  * Skips spaces and tabs at beginning and end of the token before one of the
393  * 'delim' characters, i.e., at start of string and just before one of the
394  * delimiter characters (so it doesn't prevent tokens containing spaces and tabs
395  * in the middle).
396  */
397 static char *
398 strsep_noblanks(char **const stringp, const char *delim)
399 {
400 	char *p = *stringp;
401 	char *ret, *wsp;
402 	size_t idx;
403 
404 	if (p == NULL)
405 		return (NULL);
406 
407 	idx = strspn(p, " \t");
408 	p += idx;
409 
410 	ret = strsep(&p, delim);
411 
412 	/* Rewind spaces/tabs at the end. */
413 	if (p == NULL)
414 		wsp = ret + strlen(ret);
415 	else
416 		wsp = p - 1;
417 	for (; wsp != ret; --wsp) {
418 		switch (wsp[-1]) {
419 		case ' ':
420 		case '\t':
421 			continue;
422 		}
423 		break;
424 	}
425 	*wsp = '\0';
426 
427 	*stringp = p;
428 	return (ret);
429 }
430 
431 
432 static void
433 make_parse_error(struct parse_error **const parse_error, const size_t pos,
434     const char *const fmt, ...)
435 {
436 	struct parse_error *const err = malloc(sizeof(*err), M_DO, M_WAITOK);
437 	va_list ap;
438 
439 	err->pos = pos;
440 	va_start(ap, fmt);
441 	vsnprintf(err->msg, PARSE_ERROR_SIZE, fmt, ap);
442 	va_end(ap);
443 
444 	MPASS(*parse_error == NULL);
445 	*parse_error = err;
446 }
447 
448 static void
449 free_parse_error(struct parse_error *const parse_error)
450 {
451 	free(parse_error, M_DO);
452 }
453 
454 static int
455 parse_id_type(const char *const string, id_type_t *const type,
456     struct parse_error **const parse_error)
457 {
458 	/*
459 	 * Special case for "any", as the canonical form for IT_ANY in
460 	 * id_type_to_str[] is "*".
461 	 */
462 	if (strcmp(string, "any") == 0) {
463 		*type = IT_ANY;
464 		return (0);
465 	}
466 
467 	/* Start at 1 to avoid parsing "invalid". */
468 	for (size_t i = 1; i <= IT_LAST; ++i) {
469 		if (strcmp(string, id_type_to_str[i]) == 0) {
470 			*type = i;
471 			return (0);
472 		}
473 	}
474 
475 	*type = IT_INVALID;
476 	make_parse_error(parse_error, 0, "No valid type found.");
477 	return (EINVAL);
478 }
479 
480 static size_t
481 parse_gid_flags(const char *const string, flags_t *const flags,
482     flags_t *const gid_flags)
483 {
484 	switch (string[0]) {
485 	case '+':
486 		*flags |= MDF_SUPP_ALLOW;
487 		goto has_supp_clause;
488 	case '!':
489 		*flags |= MDF_SUPP_MUST;
490 		*gid_flags |= MDF_MAY_REJ_SUPP;
491 		goto has_supp_clause;
492 	case '-':
493 		*flags |= MDF_SUPP_DONT;
494 		*gid_flags |= MDF_MAY_REJ_SUPP;
495 		goto has_supp_clause;
496 	has_supp_clause:
497 		*gid_flags |= MDF_HAS_SUPP_CLAUSE;
498 		return (1);
499 	}
500 
501 	return (0);
502 }
503 
504 static bool
505 parse_any(const char *const string)
506 {
507 	return (strcmp(string, "*") == 0 || strcmp(string, "any") == 0);
508 }
509 
510 static bool
511 has_clauses(const id_nb_t nb, const flags_t type_flags)
512 {
513 	return ((type_flags & MDF_TYPE_MASK) != 0 || nb != 0);
514 }
515 
516 static int
517 parse_target_clause(char *to, struct rule *const rule,
518     struct id_list *const uid_list, struct id_list *const gid_list,
519     struct parse_error **const parse_error)
520 {
521 	const char *const start = to;
522 	char *to_type, *to_id;
523 	const char *p;
524 	struct id_list *list;
525 	id_nb_t *nb;
526 	flags_t *tflags;
527 	struct id_elem *ie;
528 	struct id_spec is = {.flags = 0};
529 	flags_t gid_flags = 0;
530 	id_type_t type;
531 	int error;
532 
533 	MPASS(*parse_error == NULL);
534 	MPASS(to != NULL);
535 	to_type = strsep_noblanks(&to, "=");
536 	MPASS(to_type != NULL);
537 	to_type += parse_gid_flags(to_type, &is.flags, &gid_flags);
538 	error = parse_id_type(to_type, &type, parse_error);
539 	if (error != 0)
540 		goto einval;
541 	if (type != IT_GID && is.flags != 0) {
542 		make_parse_error(parse_error, to_type - start,
543 		    "Expected type 'gid' after flags, not '%s'.",
544 		    to_type);
545 		goto einval;
546 	}
547 
548 	to_id = strsep_noblanks(&to, "");
549 	switch (type) {
550 	case IT_GID:
551 		if (to_id == NULL) {
552 			make_parse_error(parse_error, to_type - start,
553 			    "No '=' and ID specification after type '%s'.",
554 			    to_type);
555 			goto einval;
556 		}
557 
558 		if (is.flags == 0) {
559 			/* No flags: Dealing with a primary group. */
560 			is.flags |= MDF_PRIMARY;
561 			gid_flags |= MDF_HAS_PRIMARY_CLAUSE;
562 		}
563 
564 		list = gid_list;
565 		nb = &rule->gids_nb;
566 		tflags = &rule->gid_flags;
567 
568 		/* "*" or "any"? */
569 		if (parse_any(to_id)) {
570 			/*
571 			 * We check that we have not seen any other clause of
572 			 * the same category (i.e., concerning primary or
573 			 * supplementary groups).
574 			 */
575 			if ((is.flags & MDF_PRIMARY) != 0) {
576 				if ((*tflags & MDF_HAS_PRIMARY_CLAUSE) != 0) {
577 					make_parse_error(parse_error,
578 					    to_id - start,
579 					    "'any' specified after another "
580 					    "(primary) GID.");
581 					goto einval;
582 				}
583 				*tflags |= gid_flags | MDF_ANY;
584 			} else {
585 				/*
586 				 * If a supplementary group flag was present, it
587 				 * must be MDF_SUPP_ALLOW ("+").
588 				 */
589 				if ((is.flags & MDF_SUPP_MASK) != MDF_SUPP_ALLOW) {
590 					make_parse_error(parse_error,
591 					    to_id - start,
592 					    "'any' specified with another "
593 					    "flag than '+'.");
594 					goto einval;
595 				}
596 				if ((*tflags & MDF_HAS_SUPP_CLAUSE) != 0) {
597 					make_parse_error(parse_error,
598 					    to_id - start,
599 					    "'any' with flag '+' specified after "
600 					    "another (supplementary) GID.");
601 					goto einval;
602 				}
603 				*tflags |= gid_flags | MDF_ANY_SUPP;
604 			}
605 			goto check_type_and_finish;
606 		} else {
607 			/*
608 			 * Check that we haven't already seen "any" for the same
609 			 * category.
610 			 */
611 			if ((is.flags & MDF_PRIMARY) != 0) {
612 				if ((*tflags & MDF_ANY) != 0) {
613 					make_parse_error(parse_error,
614 					    to_id - start,
615 					    "Some (primary) GID specified after "
616 					    "'any'.");
617 					goto einval;
618 				}
619 			} else if ((*tflags & MDF_ANY_SUPP) != 0 &&
620 			    (is.flags & MDF_SUPP_ALLOW) != 0) {
621 				make_parse_error(parse_error,
622 				    to_id - start,
623 				    "Some (supplementary) GID specified after "
624 				    "'any' with flag '+'.");
625 				goto einval;
626 			}
627 			*tflags |= gid_flags;
628 		}
629 		break;
630 
631 	case IT_UID:
632 		if (to_id == NULL) {
633 			make_parse_error(parse_error, to_type - start,
634 			    "No '=' and ID specification after type '%s'.",
635 			    to_type);
636 			goto einval;
637 		}
638 
639 		list = uid_list;
640 		nb = &rule->uids_nb;
641 		tflags = &rule->uid_flags;
642 
643 		/* "*" or "any"? */
644 		if (parse_any(to_id)) {
645 			/* There must not be any other clause. */
646 			if (has_clauses(*nb, *tflags)) {
647 				make_parse_error(parse_error, to_id - start,
648 				    "'any' specified after another UID.");
649 				goto einval;
650 			}
651 			*tflags |= MDF_ANY;
652 			goto check_type_and_finish;
653 		} else {
654 			/*
655 			 * Check that we haven't already seen "any" for the same
656 			 * category.
657 			 */
658 			if ((*tflags & MDF_ANY) != 0) {
659 				make_parse_error(parse_error, to_id - start,
660 				    "Some UID specified after 'any'.");
661 				goto einval;
662 			}
663 		}
664 		break;
665 
666 	case IT_ANY:
667 		/* No ID allowed. */
668 		if (to_id != NULL) {
669 			make_parse_error(parse_error, to_type - start,
670 			    "No '=' and ID allowed after type '%s'.", to_type);
671 			goto einval;
672 		}
673 		/*
674 		 * We can't have IT_ANY after any other IT_*, it must be the
675 		 * only one.
676 		 */
677 		if (has_clauses(rule->uids_nb, rule->uid_flags) ||
678 		    has_clauses(rule->gids_nb, rule->gid_flags)) {
679 			make_parse_error(parse_error, to_type - start,
680 			    "Target clause of type '%s' coming after another "
681 			    "clause (must be alone).", to_type);
682 			goto einval;
683 		}
684 		rule->uid_flags |= MDF_ANY;
685 		rule->gid_flags |= MDF_ANY | MDF_ANY_SUPP |
686 		    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE;
687 		goto finish;
688 
689 	default:
690 		/* parse_id_type() returns no other types currently. */
691 		__assert_unreachable();
692 	}
693 
694 	/* Rule out cases that have been treated above. */
695 	MPASS((type == IT_UID || type == IT_GID) && !parse_any(to_id));
696 
697 	/* "."? */
698 	if (strcmp(to_id, ".") == 0) {
699 		if ((*tflags & MDF_CURRENT) != 0) {
700 			/* Duplicate "." <id>.  Try to coalesce. */
701 			error = coalesce_id_flags(is.flags, tflags);
702 			if (error != 0) {
703 				make_parse_error(parse_error, to_id - start,
704 				    "Incompatible flags with prior clause "
705 				    "with same target.");
706 				goto einval;
707 			}
708 		} else
709 			*tflags |= MDF_CURRENT | is.flags;
710 		goto check_type_and_finish;
711 	}
712 
713 	/* Parse an ID. */
714 	error = strtoui_strict(to_id, &p, 10, &is.id);
715 	if (error != 0 || *p != '\0') {
716 		make_parse_error(parse_error, to_id - start,
717 		    "Cannot parse a numerical ID (base 10).");
718 		goto einval;
719 	}
720 
721 	/* Explicit ID flags. */
722 	if (type == IT_GID && (is.flags & MDF_SUPP_MUST) != 0)
723 		*tflags |= MDF_EXPLICIT_SUPP_MUST;
724 
725 	/*
726 	 * We check for duplicate IDs and coalesce their 'struct id_spec' only
727 	 * at end of parse_single_rule() because it is much more performant then
728 	 * (using sorted arrays).
729 	 */
730 	++*nb;
731 	if (*nb == 0) {
732 		make_parse_error(parse_error, 0,
733 		    "Too many target clauses of type '%s'.", to_type);
734 		return (EOVERFLOW);
735 	}
736 	ie = malloc(sizeof(*ie), M_DO, M_WAITOK);
737 	ie->spec = is;
738 	STAILQ_INSERT_TAIL(list, ie, ie_entries);
739 	check_type_and_id_spec(type, &is);
740 check_type_and_finish:
741 	check_type_and_type_flags(type, *tflags);
742 finish:
743 	return (0);
744 einval:
745 	/* We must have built a parse error on error. */
746 	MPASS(*parse_error != NULL);
747 	return (EINVAL);
748 }
749 
750 static int
751 u_int_cmp(const u_int i1, const u_int i2)
752 {
753 	return ((i1 > i2) - (i1 < i2));
754 }
755 
756 static int
757 id_spec_cmp(const void *const p1, const void *const p2)
758 {
759 	const struct id_spec *const is1 = p1;
760 	const struct id_spec *const is2 = p2;
761 
762 	return (u_int_cmp(is1->id, is2->id));
763 }
764 
765 /*
766  * Transfer content of 'list' into 'array', freeing and emptying list.
767  *
768  * 'nb' must be 'list''s length and not be greater than 'array''s size.  The
769  * destination array is sorted by ID.  Structures 'struct id_spec' with same IDs
770  * are coalesced if that makes sense (not including duplicate clauses), else
771  * EINVAL is returned.  On success, 'nb' is updated (lowered) to account for
772  * coalesced specifications.  The parameter 'type' is only for testing purposes
773  * (INVARIANTS).
774  */
775 static int
776 pour_list_into_rule(const id_type_t type, struct id_list *const list,
777     struct id_spec *const array, id_nb_t *const nb,
778     struct parse_error **const parse_error)
779 {
780 	struct id_elem *ie, *ie_next;
781 	size_t idx = 0;
782 
783 	/* Fill the array. */
784 	STAILQ_FOREACH_SAFE(ie, list, ie_entries, ie_next) {
785 		MPASS(idx < *nb);
786 		array[idx] = ie->spec;
787 		free(ie, M_DO);
788 		++idx;
789 	}
790 	MPASS(idx == *nb);
791 	STAILQ_INIT(list);
792 
793 	/* Sort it (by ID). */
794 	qsort(array, *nb, sizeof(*array), id_spec_cmp);
795 
796 	/* Coalesce same IDs. */
797 	if (*nb != 0) {
798 		size_t ref_idx = 0;
799 
800 		for (idx = 1; idx < *nb; ++idx) {
801 			const u_int id = array[idx].id;
802 
803 			if (id != array[ref_idx].id) {
804 				++ref_idx;
805 				if (ref_idx != idx)
806 					array[ref_idx] = array[idx];
807 				continue;
808 			}
809 
810 			switch (type) {
811 				int error;
812 
813 			case IT_GID:
814 				error = coalesce_id_flags(array[idx].flags,
815 				    &array[ref_idx].flags);
816 				if (error != 0) {
817 					make_parse_error(parse_error, 0,
818 					    "Incompatible flags or duplicate "
819 					    "GID %u.", id);
820 					return (EINVAL);
821 				}
822 				check_type_and_id_flags(type,
823 				    array[ref_idx].flags);
824 				break;
825 
826 			case IT_UID:
827 				/*
828 				 * No flags in this case.  Multiple appearances
829 				 * of the same UID is an exact redundancy, so
830 				 * error out.
831 				 */
832 				make_parse_error(parse_error, 0,
833 				    "Duplicate UID %u.", id);
834 				return (EINVAL);
835 
836 			default:
837 				__assert_unreachable();
838 			}
839 		}
840 		*nb = ref_idx + 1;
841 	}
842 
843 	return (0);
844 }
845 
846 /*
847  * See also first comments for parse_rule() below.
848  *
849  * The second part of a rule, called <target> (or <to>), is a comma-separated
850  * (',') list of '<flags><type>=<id>' clauses similar to that of the <from>
851  * part, with the extensions that <id> may also be "*" or "any" or ".", and that
852  * <flags> may contain at most one of the '+', '-' and '!' characters when
853  * <type> is "gid" (no flags are allowed for "uid").  No two clauses in a single
854  * <to> list may list the same <id>.  "*" and "any" both designate any ID for
855  * the <type>, and are aliases to each other.  In front of "any" (or "*"), only
856  * the '+' flag is allowed (in the "gid" case).  "." designates the process'
857  * current IDs for the <type>.  The precise meaning of flags and "." is
858  * explained in functions checking privileges below.
859  */
860 static int
861 parse_single_rule(char *rule, struct rules *const rules,
862     struct parse_error **const parse_error)
863 {
864 	const char *const start = rule;
865 	const char *from_type, *from_id, *p;
866 	char *to_list;
867 	struct id_list uid_list, gid_list;
868 	struct id_elem *ie, *ie_next;
869 	struct rule *new;
870 	int error;
871 
872 	MPASS(*parse_error == NULL);
873 	STAILQ_INIT(&uid_list);
874 	STAILQ_INIT(&gid_list);
875 
876 	/* Freed when the 'struct rules' container is freed. */
877 	new = malloc(sizeof(*new), M_DO, M_WAITOK | M_ZERO);
878 
879 	from_type = strsep_noblanks(&rule, "=");
880 	MPASS(from_type != NULL); /* Because 'rule' was not NULL. */
881 	error = parse_id_type(from_type, &new->from_type, parse_error);
882 	if (error != 0)
883 		goto einval;
884 	switch (new->from_type) {
885 	case IT_UID:
886 	case IT_GID:
887 		break;
888 	default:
889 		make_parse_error(parse_error, 0, "Type '%s' not allowed in "
890 		    "the \"from\" part of rules.");
891 		goto einval;
892 	}
893 
894 	from_id = strsep_noblanks(&rule, ":");
895 	if (is_null_or_empty(from_id)) {
896 		make_parse_error(parse_error, 0, "No ID specified.");
897 		goto einval;
898 	}
899 
900 	error = strtoui_strict(from_id, &p, 10, &new->from_id);
901 	if (error != 0 || *p != '\0') {
902 		make_parse_error(parse_error, from_id - start,
903 		    "Cannot parse a numerical ID (base 10).");
904 		goto einval;
905 	}
906 
907 	/*
908 	 * We will now parse the "to" list.
909 	 *
910 	 * In order to ease parsing, we will begin by building lists of target
911 	 * UIDs and GIDs in local variables 'uid_list' and 'gid_list'.  The
912 	 * number of each type of IDs will be filled directly in 'new'.  At end
913 	 * of parse, we will allocate both arrays of IDs to be placed into the
914 	 * 'uids' and 'gids' members, sort them, and discard the tail queues
915 	 * used to build them.  This conversion to sorted arrays at end of parse
916 	 * allows to minimize memory allocations and enables searching IDs in
917 	 * O(log(n)) instead of linearly.
918 	 */
919 	to_list = strsep_noblanks(&rule, ",");
920 	if (to_list == NULL) {
921 		make_parse_error(parse_error, 0, "No target list.");
922 		goto einval;
923 	}
924 	do {
925 		error = parse_target_clause(to_list, new, &uid_list, &gid_list,
926 		    parse_error);
927 		if (error != 0) {
928 			(*parse_error)->pos += to_list - start;
929 			goto einval;
930 		}
931 
932 		to_list = strsep_noblanks(&rule, ",");
933 	} while (to_list != NULL);
934 
935 	if (new->uids_nb != 0) {
936 		new->uids = malloc(sizeof(*new->uids) * new->uids_nb, M_DO,
937 		    M_WAITOK);
938 		error = pour_list_into_rule(IT_UID, &uid_list, new->uids,
939 		    &new->uids_nb, parse_error);
940 		if (error != 0)
941 			goto einval;
942 	}
943 	MPASS(STAILQ_EMPTY(&uid_list));
944 	if (!has_clauses(new->uids_nb, new->uid_flags)) {
945 		/* No UID specified, default is "uid=.". */
946 		MPASS(new->uid_flags == 0);
947 		new->uid_flags = MDF_CURRENT;
948 		check_type_and_type_flags(IT_UID, new->uid_flags);
949 	}
950 
951 	if (new->gids_nb != 0) {
952 		new->gids = malloc(sizeof(*new->gids) * new->gids_nb, M_DO,
953 		    M_WAITOK);
954 		error = pour_list_into_rule(IT_GID, &gid_list, new->gids,
955 		    &new->gids_nb, parse_error);
956 		if (error != 0)
957 			goto einval;
958 	}
959 	MPASS(STAILQ_EMPTY(&gid_list));
960 	if (!has_clauses(new->gids_nb, new->gid_flags)) {
961 		/* No GID specified, default is "gid=.,!gid=.". */
962 		MPASS(new->gid_flags == 0);
963 		new->gid_flags = MDF_CURRENT | MDF_PRIMARY | MDF_SUPP_MUST |
964 		    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE;
965 		check_type_and_type_flags(IT_GID, new->gid_flags);
966 	}
967 
968 	STAILQ_INSERT_TAIL(&rules->head, new, r_entries);
969 	return (0);
970 
971 einval:
972 	free(new->gids, M_DO);
973 	free(new->uids, M_DO);
974 	free(new, M_DO);
975 	STAILQ_FOREACH_SAFE(ie, &gid_list, ie_entries, ie_next)
976 	    free(ie, M_DO);
977 	STAILQ_FOREACH_SAFE(ie, &uid_list, ie_entries, ie_next)
978 	    free(ie, M_DO);
979 	MPASS(*parse_error != NULL);
980 	return (EINVAL);
981 }
982 
983 /*
984  * Parse rules specification and produce rule structures out of it.
985  *
986  * Returns 0 on success, with '*rulesp' made to point to a 'struct rule'
987  * representing the rules.  On error, the returned value is non-zero and
988  * '*rulesp' is unchanged.  If 'string' has length greater or equal to
989  * MAC_RULE_STRING_LEN, ENAMETOOLONG is returned.  If it is not in the expected
990  * format, EINVAL is returned.  If an error is returned, '*parse_error' is set
991  * to point to a 'struct parse_error' giving an error message for the problem,
992  * else '*parse_error' is set to NULL.
993  *
994  * Expected format: A semi-colon-separated list of rules of the form
995  * "<from>:<target>".  The <from> part is of the form "<type>=<id>" where <type>
996  * is "uid" or "gid", <id> an UID or GID (depending on <type>) and <target> is
997  * "*", "any" or a comma-separated list of '<flags><type>=<id>' clauses (see the
998  * comment for parse_single_rule() for more details).  For convenience, empty
999  * rules are allowed (and do nothing), and spaces and tabs are allowed (and
1000  * removed) around each token (tokens are natural ones, except that
1001  * '<flags><type>' as a whole is considered a single token, so no blanks are
1002  * allowed between '<flags>' and '<type>').
1003  *
1004  * Examples:
1005  * - "uid=1001:uid=1010,gid=1010;uid=1002:any"
1006  * - "gid=1010:gid=1011,gid=1012,gid=1013"
1007  */
1008 static int
1009 parse_rules(const char *const string, struct rules **const rulesp,
1010     struct parse_error **const parse_error)
1011 {
1012 	const size_t len = strlen(string);
1013 	char *copy, *p, *rule;
1014 	struct rules *rules;
1015 	int error = 0;
1016 
1017 	*parse_error = NULL;
1018 
1019 	if (len >= MAC_RULE_STRING_LEN) {
1020 		make_parse_error(parse_error, 0,
1021 		    "Rule specification string is too long (%zu, max %zu)",
1022 		    len, MAC_RULE_STRING_LEN - 1);
1023 		return (ENAMETOOLONG);
1024 	}
1025 
1026 	rules = alloc_rules();
1027 	bcopy(string, rules->string, len + 1);
1028 	MPASS(rules->string[len] == '\0'); /* Catch some races. */
1029 
1030 	copy = malloc(len + 1, M_DO, M_WAITOK);
1031 	bcopy(string, copy, len + 1);
1032 	MPASS(copy[len] == '\0'); /* Catch some races. */
1033 
1034 	p = copy;
1035 	while ((rule = strsep_noblanks(&p, ";")) != NULL) {
1036 		if (rule[0] == '\0')
1037 			continue;
1038 		error = parse_single_rule(rule, rules, parse_error);
1039 		if (error != 0) {
1040 			(*parse_error)->pos += rule - copy;
1041 			toast_rules(rules);
1042 			goto out;
1043 		}
1044 	}
1045 
1046 	*rulesp = rules;
1047 out:
1048 	free(copy, M_DO);
1049 	return (error);
1050 }
1051 
1052 /*
1053  * Find rules applicable to the passed prison.
1054  *
1055  * Returns the applicable rules (and never NULL).  'pr' must be unlocked.
1056  * 'aprp' is set to the (ancestor) prison holding these, and it must be unlocked
1057  * once the caller is done accessing the rules.  '*aprp' is equal to 'pr' if and
1058  * only if the current jail has its own set of rules.
1059  */
1060 static struct rules *
1061 find_rules(struct prison *const pr, struct prison **const aprp)
1062 {
1063 	struct prison *cpr, *ppr;
1064 	struct rules *rules;
1065 
1066 	cpr = pr;
1067 	for (;;) {
1068 		prison_lock(cpr);
1069 		rules = osd_jail_get(cpr, osd_jail_slot);
1070 		if (rules != NULL)
1071 			break;
1072 		prison_unlock(cpr);
1073 
1074 		ppr = cpr->pr_parent;
1075 		MPASS(ppr != NULL); /* prison0 always has rules. */
1076 		cpr = ppr;
1077 	}
1078 
1079 	*aprp = cpr;
1080 	return (rules);
1081 }
1082 
1083 static void
1084 hold_rules(struct rules *const rules)
1085 {
1086 	refcount_acquire(&rules->use_count);
1087 }
1088 
1089 static void
1090 drop_rules(struct rules *const rules)
1091 {
1092 	if (refcount_release(&rules->use_count))
1093 		toast_rules(rules);
1094 }
1095 
1096 #ifdef INVARIANTS
1097 static void
1098 check_rules_use_count(const struct rules *const rules, u_int expected)
1099 {
1100 	const u_int use_count = refcount_load(&rules->use_count);
1101 
1102 	if (use_count != expected)
1103 		panic("MAC/do: Rules at %p: Use count is %u, expected %u",
1104 		    rules, use_count, expected);
1105 }
1106 #else
1107 #define check_rules_use_count(...)
1108 #endif /* INVARIANTS */
1109 
1110 /*
1111  * OSD destructor for slot 'osd_jail_slot'.
1112  *
1113  * Called with 'value' not NULL.  We have arranged that it is only ever called
1114  * when the corresponding jail goes down or at module unload.
1115  */
1116 static void
1117 dealloc_jail_osd(void *const value)
1118 {
1119 	struct rules *const rules = value;
1120 
1121 	/*
1122 	 * If called because the "holding" jail goes down, no one should be
1123 	 * using the rules but us at this point because no threads of that jail
1124 	 * (or its sub-jails) should currently be executing (in particular,
1125 	 * currently executing setcred()).  The case of module unload is more
1126 	 * complex.  Although the MAC framework takes care that no hook is
1127 	 * called while a module is unloading, the unload could happen between
1128 	 * two calls to MAC hooks in the course of, e.g., executing setcred(),
1129 	 * where the rules' reference count has been bumped to keep them alive
1130 	 * even if the rules on the "holding" jail has been concurrently
1131 	 * changed.  These other references are held in our thread OSD slot, so
1132 	 * we ensure that all thread's slots are freed first in mac_do_destroy()
1133 	 * to be able to check that only one reference remains.
1134 	 */
1135 	check_rules_use_count(rules, 1);
1136 	toast_rules(rules);
1137 }
1138 
1139 /*
1140  * Remove the rules specifically associated to a prison.
1141  *
1142  * In practice, this means that the rules become inherited (from the closest
1143  * ascendant that has some).
1144  *
1145  * Destroys the 'osd_jail_slot' slot of the passed jail.
1146  */
1147 static void
1148 remove_rules(struct prison *const pr)
1149 {
1150 	struct rules *old_rules;
1151 	int error __unused;
1152 
1153 	prison_lock(pr);
1154 	/*
1155 	 * We go to the burden of extracting rules first instead of just letting
1156 	 * osd_jail_del() calling dealloc_jail_osd() as we want to decrement
1157 	 * their use count, and possibly free them, outside of the prison lock.
1158 	 */
1159 	old_rules = osd_jail_get(pr, osd_jail_slot);
1160 	error = osd_jail_set(pr, osd_jail_slot, NULL);
1161 	/* osd_set() never fails nor allocate memory when 'value' is NULL. */
1162 	MPASS(error == 0);
1163 	/*
1164 	 * This completely frees the OSD slot, but doesn't call the destructor
1165 	 * since we've just put NULL in the slot.
1166 	 */
1167 	osd_jail_del(pr, osd_jail_slot);
1168 	prison_unlock(pr);
1169 
1170 	if (old_rules != NULL)
1171 		drop_rules(old_rules);
1172 }
1173 
1174 /*
1175  * Assign already built rules to a jail.
1176  */
1177 static void
1178 set_rules(struct prison *const pr, struct rules *const rules)
1179 {
1180 	struct rules *old_rules;
1181 	void **rsv;
1182 
1183 	check_rules_use_count(rules, 0);
1184 	hold_rules(rules);
1185 	rsv = osd_reserve(osd_jail_slot);
1186 
1187 	prison_lock(pr);
1188 	old_rules = osd_jail_get(pr, osd_jail_slot);
1189 	osd_jail_set_reserved(pr, osd_jail_slot, rsv, rules);
1190 	prison_unlock(pr);
1191 	if (old_rules != NULL)
1192 		drop_rules(old_rules);
1193 }
1194 
1195 /*
1196  * Assigns empty rules to a jail.
1197  */
1198 static void
1199 set_empty_rules(struct prison *const pr)
1200 {
1201 	struct rules *const rules = alloc_rules();
1202 
1203 	set_rules(pr, rules);
1204 }
1205 
1206 /*
1207  * Parse a rules specification and assign them to a jail.
1208  *
1209  * Returns the same error code as parse_rules() (which see).
1210  */
1211 static int
1212 parse_and_set_rules(struct prison *const pr, const char *rules_string,
1213     struct parse_error **const parse_error)
1214 {
1215 	struct rules *rules;
1216 	int error;
1217 
1218 	error = parse_rules(rules_string, &rules, parse_error);
1219 	if (error != 0)
1220 		return (error);
1221 	set_rules(pr, rules);
1222 	return (0);
1223 }
1224 
1225 static int
1226 mac_do_sysctl_rules(SYSCTL_HANDLER_ARGS)
1227 {
1228 	char *const buf = malloc(MAC_RULE_STRING_LEN, M_DO, M_WAITOK);
1229 	struct prison *const td_pr = req->td->td_ucred->cr_prison;
1230 	struct prison *pr;
1231 	struct rules *rules;
1232 	struct parse_error *parse_error;
1233 	int error;
1234 
1235 	rules = find_rules(td_pr, &pr);
1236 	strlcpy(buf, rules->string, MAC_RULE_STRING_LEN);
1237 	prison_unlock(pr);
1238 
1239 	error = sysctl_handle_string(oidp, buf, MAC_RULE_STRING_LEN, req);
1240 	if (error != 0 || req->newptr == NULL)
1241 		goto out;
1242 
1243 	/* Set our prison's rules, not that of the jail we inherited from. */
1244 	error = parse_and_set_rules(td_pr, buf, &parse_error);
1245 	if (error != 0) {
1246 		if (print_parse_error)
1247 			printf("MAC/do: Parse error at index %zu: %s\n",
1248 			    parse_error->pos, parse_error->msg);
1249 		free_parse_error(parse_error);
1250 	}
1251 out:
1252 	free(buf, M_DO);
1253 	return (error);
1254 }
1255 
1256 SYSCTL_PROC(_security_mac_do, OID_AUTO, rules,
1257     CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_PRISON|CTLFLAG_MPSAFE,
1258     0, 0, mac_do_sysctl_rules, "A",
1259     "Rules");
1260 
1261 
1262 SYSCTL_JAIL_PARAM_SYS_SUBNODE(mac, do, CTLFLAG_RW, "Jail MAC/do parameters");
1263 SYSCTL_JAIL_PARAM_STRING(_mac_do, rules, CTLFLAG_RW, MAC_RULE_STRING_LEN,
1264     "Jail MAC/do rules");
1265 
1266 
1267 static int
1268 mac_do_jail_create(void *obj, void *data __unused)
1269 {
1270 	struct prison *const pr = obj;
1271 
1272 	set_empty_rules(pr);
1273 	return (0);
1274 }
1275 
1276 static int
1277 mac_do_jail_get(void *obj, void *data)
1278 {
1279 	struct prison *ppr, *const pr = obj;
1280 	struct vfsoptlist *const opts = data;
1281 	struct rules *rules;
1282 	int jsys, error;
1283 
1284 	rules = find_rules(pr, &ppr);
1285 
1286 	jsys = pr == ppr ?
1287 	    (STAILQ_EMPTY(&rules->head) ? JAIL_SYS_DISABLE : JAIL_SYS_NEW) :
1288 	    JAIL_SYS_INHERIT;
1289 	error = vfs_setopt(opts, "mac.do", &jsys, sizeof(jsys));
1290 	if (error != 0 && error != ENOENT)
1291 		goto done;
1292 
1293 	error = vfs_setopts(opts, "mac.do.rules", rules->string);
1294 	if (error != 0 && error != ENOENT)
1295 		goto done;
1296 
1297 	error = 0;
1298 done:
1299 	prison_unlock(ppr);
1300 	return (error);
1301 }
1302 
1303 /*
1304  * -1 is used as a sentinel in mac_do_jail_check() and mac_do_jail_set() below.
1305  */
1306 _Static_assert(-1 != JAIL_SYS_DISABLE && -1 != JAIL_SYS_NEW &&
1307     -1 != JAIL_SYS_INHERIT,
1308     "mac_do(4) uses -1 as a sentinel for uninitialized 'jsys'.");
1309 
1310 /*
1311  * We perform only cheap checks here, i.e., we do not really parse the rules
1312  * specification string, if any.
1313  */
1314 static int
1315 mac_do_jail_check(void *obj, void *data)
1316 {
1317 	struct vfsoptlist *opts = data;
1318 	char *rules_string;
1319 	int error, jsys, size;
1320 
1321 	error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys));
1322 	if (error == ENOENT)
1323 		jsys = -1;
1324 	else {
1325 		if (error != 0)
1326 			return (error);
1327 		if (jsys != JAIL_SYS_DISABLE && jsys != JAIL_SYS_NEW &&
1328 		    jsys != JAIL_SYS_INHERIT)
1329 			return (EINVAL);
1330 	}
1331 
1332 	/*
1333 	 * We use vfs_getopt() here instead of vfs_getopts() to get the length.
1334 	 * We perform the additional checks done by the latter here, even if
1335 	 * jail_set() calls vfs_getopts() itself later (they becoming
1336 	 * inconsistent wouldn't cause any security problem).
1337 	 */
1338 	error = vfs_getopt(opts, "mac.do.rules", (void**)&rules_string, &size);
1339 	if (error == ENOENT) {
1340 		/*
1341 		 * Default (in absence of "mac.do.rules") is to disable (and, in
1342 		 * particular, not inherit).
1343 		 */
1344 		if (jsys == -1)
1345 			jsys = JAIL_SYS_DISABLE;
1346 
1347 		if (jsys == JAIL_SYS_NEW) {
1348 			vfs_opterror(opts, "'mac.do.rules' must be specified "
1349 			    "given 'mac.do''s value");
1350 			return (EINVAL);
1351 		}
1352 
1353 		/* Absence of "mac.do.rules" at this point is OK. */
1354 		error = 0;
1355 	} else {
1356 		if (error != 0)
1357 			return (error);
1358 
1359 		/* Not a proper string. */
1360 		if (size == 0 || rules_string[size - 1] != '\0') {
1361 			vfs_opterror(opts, "'mac.do.rules' not a proper string");
1362 			return (EINVAL);
1363 		}
1364 
1365 		if (size > MAC_RULE_STRING_LEN) {
1366 			vfs_opterror(opts, "'mdo.rules' too long");
1367 			return (ENAMETOOLONG);
1368 		}
1369 
1370 		if (jsys == -1)
1371 			/* Default (if "mac.do.rules" is present). */
1372 			jsys = rules_string[0] == '\0' ? JAIL_SYS_DISABLE :
1373 			    JAIL_SYS_NEW;
1374 
1375 		/*
1376 		 * Be liberal and accept JAIL_SYS_DISABLE and JAIL_SYS_INHERIT
1377 		 * with an explicit empty rules specification.
1378 		 */
1379 		switch (jsys) {
1380 		case JAIL_SYS_DISABLE:
1381 		case JAIL_SYS_INHERIT:
1382 			if (rules_string[0] != '\0') {
1383 				vfs_opterror(opts, "'mac.do.rules' specified "
1384 				    "but should not given 'mac.do''s value");
1385 				return (EINVAL);
1386 			}
1387 			break;
1388 		}
1389 	}
1390 
1391 	return (error);
1392 }
1393 
1394 static int
1395 mac_do_jail_set(void *obj, void *data)
1396 {
1397 	struct prison *pr = obj;
1398 	struct vfsoptlist *opts = data;
1399 	char *rules_string;
1400 	struct parse_error *parse_error;
1401 	int error, jsys;
1402 
1403 	/*
1404 	 * The invariants checks used below correspond to what has already been
1405 	 * checked in jail_check() above.
1406 	 */
1407 
1408 	error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys));
1409 	MPASS(error == 0 || error == ENOENT);
1410 	if (error != 0)
1411 		jsys = -1; /* Mark unfilled. */
1412 
1413 	rules_string = vfs_getopts(opts, "mac.do.rules", &error);
1414 	MPASS(error == 0 || error == ENOENT);
1415 	if (error == 0) {
1416 		MPASS(strlen(rules_string) < MAC_RULE_STRING_LEN);
1417 		if (jsys == -1)
1418 			/* Default (if "mac.do.rules" is present). */
1419 			jsys = rules_string[0] == '\0' ? JAIL_SYS_DISABLE :
1420 			    JAIL_SYS_NEW;
1421 		else
1422 			MPASS(jsys == JAIL_SYS_NEW ||
1423 			    ((jsys == JAIL_SYS_DISABLE ||
1424 			    jsys == JAIL_SYS_INHERIT) &&
1425 			    rules_string[0] == '\0'));
1426 	} else {
1427 		MPASS(jsys != JAIL_SYS_NEW);
1428 		if (jsys == -1)
1429 			/*
1430 			 * Default (in absence of "mac.do.rules") is to disable
1431 			 * (and, in particular, not inherit).
1432 			 */
1433 			jsys = JAIL_SYS_DISABLE;
1434 		/* If disabled, we'll store an empty rule specification. */
1435 		if (jsys == JAIL_SYS_DISABLE)
1436 			rules_string = "";
1437 	}
1438 
1439 	switch (jsys) {
1440 	case JAIL_SYS_INHERIT:
1441 		remove_rules(pr);
1442 		error = 0;
1443 		break;
1444 	case JAIL_SYS_DISABLE:
1445 	case JAIL_SYS_NEW:
1446 		error = parse_and_set_rules(pr, rules_string, &parse_error);
1447 		if (error != 0) {
1448 			vfs_opterror(opts,
1449 			    "MAC/do: Parse error at index %zu: %s\n",
1450 			    parse_error->pos, parse_error->msg);
1451 			free_parse_error(parse_error);
1452 		}
1453 		break;
1454 	default:
1455 		__assert_unreachable();
1456 	}
1457 	return (error);
1458 }
1459 
1460 /*
1461  * OSD jail methods.
1462  *
1463  * There is no PR_METHOD_REMOVE, as OSD storage is destroyed by the common jail
1464  * code (see prison_cleanup()), which triggers a run of our dealloc_jail_osd()
1465  * destructor.
1466  */
1467 static const osd_method_t osd_methods[PR_MAXMETHOD] = {
1468 	[PR_METHOD_CREATE] = mac_do_jail_create,
1469 	[PR_METHOD_GET] = mac_do_jail_get,
1470 	[PR_METHOD_CHECK] = mac_do_jail_check,
1471 	[PR_METHOD_SET] = mac_do_jail_set,
1472 };
1473 
1474 
1475 /*
1476  * Common header structure.
1477  *
1478  * Each structure that is used to pass information between some MAC check
1479  * function and priv_grant() must start with this header.
1480  */
1481 struct mac_do_data_header {
1482 	/* Size of the allocated buffer holding the containing structure. */
1483 	size_t		 allocated_size;
1484 	/* Full size of the containing structure. */
1485 	size_t		 size;
1486 	/*
1487 	 * For convenience, we use privilege numbers as an identifier for the
1488 	 * containing structure's type, since there is one distinct privilege
1489 	 * for each privilege changing function we are supporting.  0 in 'priv'
1490 	 * indicates this header is uninitialized.
1491 	 */
1492 	int		 priv;
1493 	/* Rules to apply. */
1494 	struct rules	*rules;
1495 };
1496 
1497 /*
1498  * The case of unusable or absent per-thread data can actually happen as nothing
1499  * prevents, e.g., priv_check*() with privilege 'priv' to be called standalone,
1500  * as it is currently by, e.g., the Linux emulator for PRIV_CRED_SETUID.  We
1501  * interpret such calls to priv_check*() as full, unrestricted requests for
1502  * 'priv', contrary to what we're doing here for selected operations, and
1503  * consequently will not grant the requested privilege.
1504  *
1505  * Also, we protect ourselves from a concurrent change of 'do_enabled' while
1506  * a call to setcred() is in progress by storing the rules per-thread
1507  * which is then consulted by each successive hook so that they all have
1508  * a coherent view of the specifications, and we empty the slot (actually, mark
1509  * it as empty) when MAC/do is disabled.
1510  */
1511 static int
1512 check_data_usable(const void *const data, const size_t size, const int priv)
1513 {
1514 	const struct mac_do_data_header *const hdr = data;
1515 
1516 	if (hdr == NULL || hdr->priv == 0)
1517 		return (ENOENT);
1518 	/*
1519 	 * Impacting changes in the protocols we are based on...  Don't crash in
1520 	 * production.
1521 	 */
1522 	if (hdr->priv != priv) {
1523 		MPASS(hdr->priv == priv);
1524 		return (EBUSY);
1525 	}
1526 	MPASS(hdr->size == size);
1527 	MPASS(hdr->size <= hdr->allocated_size);
1528 	return (0);
1529 }
1530 
1531 static void
1532 clear_data(void *const data)
1533 {
1534 	struct mac_do_data_header *const hdr = data;
1535 
1536 	if (hdr != NULL) {
1537 		drop_rules(hdr->rules);
1538 		/* We don't deallocate so as to save time on next access. */
1539 		hdr->priv = 0;
1540 	}
1541 }
1542 
1543 static void *
1544 fetch_data(void)
1545 {
1546 	return (osd_thread_get_unlocked(curthread, osd_thread_slot));
1547 }
1548 
1549 static bool
1550 is_data_reusable(const void *const data, const size_t size)
1551 {
1552 	const struct mac_do_data_header *const hdr = data;
1553 
1554 	return (hdr != NULL && size <= hdr->allocated_size);
1555 }
1556 
1557 static void
1558 set_data_header(void *const data, const size_t size, const int priv,
1559     struct rules *const rules)
1560 {
1561 	struct mac_do_data_header *const hdr = data;
1562 
1563 	MPASS(hdr->priv == 0);
1564 	MPASS(priv != 0);
1565 	MPASS(size <= hdr->allocated_size);
1566 	hdr->size = size;
1567 	hdr->priv = priv;
1568 	hdr->rules = rules;
1569 }
1570 
1571 /* The proc lock (and any other non-sleepable lock) must not be held. */
1572 static void *
1573 alloc_data(void *const data, const size_t size)
1574 {
1575 	struct mac_do_data_header *const hdr = realloc(data, size, M_DO,
1576 	    M_WAITOK);
1577 
1578 	MPASS(size >= sizeof(struct mac_do_data_header));
1579 	hdr->allocated_size = size;
1580 	hdr->priv = 0;
1581 	if (hdr != data) {
1582 		/*
1583 		 * This call either reuses the existing memory allocated for the
1584 		 * slot or tries to allocate some without blocking.
1585 		 */
1586 		int error = osd_thread_set(curthread, osd_thread_slot, hdr);
1587 
1588 		if (error != 0) {
1589 			/* Going to make a M_WAITOK allocation. */
1590 			void **const rsv = osd_reserve(osd_thread_slot);
1591 
1592 			error = osd_thread_set_reserved(curthread,
1593 			    osd_thread_slot, rsv, hdr);
1594 			MPASS(error == 0);
1595 		}
1596 	}
1597 	return (hdr);
1598 }
1599 
1600 /* Destructor for 'osd_thread_slot'. */
1601 static void
1602 dealloc_thread_osd(void *const value)
1603 {
1604 	free(value, M_DO);
1605 }
1606 
1607 /*
1608  * Whether to grant access to some primary group according to flags.
1609  *
1610  * The passed 'flags' must be those of a rule's matching GID, or the IT_GID type
1611  * flags when MDF_CURRENT has been matched.
1612  *
1613  * Return values:
1614  * - 0:			Access granted.
1615  * - EJUSTRETURN:	Flags are agnostic.
1616  */
1617 static int
1618 grant_primary_group_from_flags(const flags_t flags)
1619 {
1620 	return ((flags & MDF_PRIMARY) != 0 ? 0 : EJUSTRETURN);
1621 }
1622 
1623 /*
1624  * Same as grant_primary_group_from_flags(), but for supplementary groups.
1625  *
1626  * Return values:
1627  * - 0:			Access granted.
1628  * - EJUSTRETURN:	Flags are agnostic.
1629  * - EPERM:		Access denied.
1630  */
1631 static int
1632 grant_supplementary_group_from_flags(const flags_t flags)
1633 {
1634 	if ((flags & MDF_SUPP_MASK) != 0)
1635 		return ((flags & MDF_SUPP_DONT) != 0 ? EPERM : 0);
1636 
1637 	return (EJUSTRETURN);
1638 }
1639 
1640 static int
1641 rule_grant_supplementary_groups(const struct rule *const rule,
1642     const struct ucred *const old_cred, const struct ucred *const new_cred)
1643 {
1644 	const gid_t *const old_groups = old_cred->cr_groups;
1645 	const gid_t *const new_groups = new_cred->cr_groups;
1646 	const int old_ngroups = old_cred->cr_ngroups;
1647 	const int new_ngroups = new_cred->cr_ngroups;
1648 	const flags_t gid_flags = rule->gid_flags;
1649 	const bool current_has_supp = (gid_flags & MDF_CURRENT) != 0 &&
1650 	    (gid_flags & MDF_SUPP_MASK) != 0;
1651 	id_nb_t rule_idx = 0;
1652 	int old_idx = 1, new_idx = 1;
1653 
1654 	if ((gid_flags & MDF_ANY_SUPP) != 0 &&
1655 	    (gid_flags & MDF_MAY_REJ_SUPP) == 0)
1656 		/*
1657 		 * Any set of supplementary groups is accepted, no need to loop
1658 		 * over them.
1659 		 */
1660 		return (0);
1661 
1662 	for (; new_idx < new_ngroups; ++new_idx) {
1663 		const gid_t gid = new_groups[new_idx];
1664 		bool may_accept = false;
1665 
1666 		if ((gid_flags & MDF_ANY_SUPP) != 0)
1667 			may_accept = true;
1668 
1669 		/* Do we have to check for the current supplementary groups? */
1670 		if (current_has_supp) {
1671 			/*
1672 			 * Linear search, as both supplementary groups arrays
1673 			 * are sorted.  Advancing 'old_idx' with a binary search
1674 			 * on absence of MDF_SUPP_MUST doesn't seem worth it in
1675 			 * practice.
1676 			 */
1677 			for (; old_idx < old_ngroups; ++old_idx) {
1678 				const gid_t old_gid = old_groups[old_idx];
1679 
1680 				if (old_gid < gid) {
1681 					/* Mandatory but absent. */
1682 					if ((gid_flags & MDF_SUPP_MUST) != 0)
1683 						return (EPERM);
1684 				} else if (old_gid == gid) {
1685 					switch (gid_flags & MDF_SUPP_MASK) {
1686 					case MDF_SUPP_DONT:
1687 						/* Present but forbidden. */
1688 						return (EPERM);
1689 					case MDF_SUPP_ALLOW:
1690 					case MDF_SUPP_MUST:
1691 						may_accept = true;
1692 						break;
1693 					default:
1694 #ifdef INVARIANTS
1695 						__assert_unreachable();
1696 #else
1697 						/* Better be safe than sorry. */
1698 						return (EPERM);
1699 #endif
1700 					}
1701 					++old_idx;
1702 					break;
1703 				}
1704 				else
1705 					break;
1706 			}
1707 		}
1708 
1709 		/*
1710 		 * Search by GID for a corresponding 'struct id_spec'.
1711 		 *
1712 		 * Again, linear search, with same note on not using binary
1713 		 * search optimization as above (the trigger would be absence of
1714 		 * MDF_EXPLICIT_SUPP_MUST this time).
1715 		 */
1716 		for (; rule_idx < rule->gids_nb; ++rule_idx) {
1717 			const struct id_spec is = rule->gids[rule_idx];
1718 
1719 			if (is.id < gid) {
1720 				/* Mandatory but absent. */
1721 				if ((is.flags & MDF_SUPP_MUST) != 0)
1722 					return (EPERM);
1723 			} else if (is.id == gid) {
1724 				switch (is.flags & MDF_SUPP_MASK) {
1725 				case MDF_SUPP_DONT:
1726 					/* Present but forbidden. */
1727 					return (EPERM);
1728 				case MDF_SUPP_ALLOW:
1729 				case MDF_SUPP_MUST:
1730 					may_accept = true;
1731 					break;
1732 				case 0:
1733 					/* Primary group only. */
1734 					break;
1735 				default:
1736 #ifdef INVARIANTS
1737 					__assert_unreachable();
1738 #else
1739 					/* Better be safe than sorry. */
1740 					return (EPERM);
1741 #endif
1742 				}
1743 				++rule_idx;
1744 				break;
1745 			}
1746 			else
1747 				break;
1748 		}
1749 
1750 		/* 'gid' wasn't explicitly accepted. */
1751 		if (!may_accept)
1752 			return (EPERM);
1753 	}
1754 
1755 	/*
1756 	 * If we must have all current groups and we didn't browse all
1757 	 * of them at this point (because the remaining ones have GIDs
1758 	 * greater than the last requested group), we are simply missing
1759 	 * them.
1760 	 */
1761 	if ((gid_flags & MDF_CURRENT) != 0 &&
1762 	    (gid_flags & MDF_SUPP_MUST) != 0 &&
1763 	    old_idx < old_ngroups)
1764 		return (EPERM);
1765 	/*
1766 	 * Similarly, we have to finish browsing all GIDs from the rule
1767 	 * in case some are marked mandatory.
1768 	 */
1769 	if ((gid_flags & MDF_EXPLICIT_SUPP_MUST) != 0) {
1770 		for (; rule_idx < rule->gids_nb; ++rule_idx) {
1771 			const struct id_spec is = rule->gids[rule_idx];
1772 
1773 			if ((is.flags & MDF_SUPP_MUST) != 0)
1774 				return (EPERM);
1775 		}
1776 	}
1777 
1778 	return (0);
1779 }
1780 
1781 static int
1782 rule_grant_primary_group(const struct rule *const rule,
1783     const struct ucred *const old_cred, const gid_t gid)
1784 {
1785 	struct id_spec gid_is = {.flags = 0};
1786 	const struct id_spec *found_is;
1787 	int error;
1788 
1789 	if ((rule->gid_flags & MDF_ANY) != 0)
1790 		return (0);
1791 
1792 	/* Was MDF_CURRENT specified, and is 'gid' a current GID? */
1793 	if ((rule->gid_flags & MDF_CURRENT) != 0 &&
1794 	    group_is_primary(gid, old_cred)) {
1795 		error = grant_primary_group_from_flags(rule->gid_flags);
1796 		if (error == 0)
1797 			return (0);
1798 	}
1799 
1800 	/* Search by GID for a corresponding 'struct id_spec'. */
1801 	gid_is.id = gid;
1802 	found_is = bsearch(&gid_is, rule->gids, rule->gids_nb,
1803 	    sizeof(*rule->gids), id_spec_cmp);
1804 
1805 	if (found_is != NULL) {
1806 		error = grant_primary_group_from_flags(found_is->flags);
1807 		if (error == 0)
1808 			return (0);
1809 	}
1810 
1811 	return (EPERM);
1812 }
1813 
1814 static int
1815 rule_grant_primary_groups(const struct rule *const rule,
1816     const struct ucred *const old_cred, const struct ucred *const new_cred)
1817 {
1818 	int error;
1819 
1820 	/* Shortcut. */
1821 	if ((rule->gid_flags & MDF_ANY) != 0)
1822 		return (0);
1823 
1824 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_gid);
1825 	if (error != 0)
1826 		return (error);
1827 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_rgid);
1828 	if (error != 0)
1829 		return (error);
1830 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_svgid);
1831 	if (error != 0)
1832 		return (error);
1833 	return (0);
1834 }
1835 
1836 static bool
1837 user_is_current(const uid_t uid, const struct ucred *const old_cred)
1838 {
1839 	return (uid == old_cred->cr_uid || uid == old_cred->cr_ruid ||
1840 	    uid == old_cred->cr_svuid);
1841 }
1842 
1843 static int
1844 rule_grant_user(const struct rule *const rule,
1845     const struct ucred *const old_cred, const uid_t uid)
1846 {
1847 	struct id_spec uid_is = {.flags = 0};
1848 	const struct id_spec *found_is;
1849 
1850 	if ((rule->uid_flags & MDF_ANY) != 0)
1851 		return (0);
1852 
1853 	/* Was MDF_CURRENT specified, and is 'uid' a current UID? */
1854 	if ((rule->uid_flags & MDF_CURRENT) != 0 &&
1855 	    user_is_current(uid, old_cred))
1856 		return (0);
1857 
1858 	/* Search by UID for a corresponding 'struct id_spec'. */
1859 	uid_is.id = uid;
1860 	found_is = bsearch(&uid_is, rule->uids, rule->uids_nb,
1861 	    sizeof(*rule->uids), id_spec_cmp);
1862 
1863 	if (found_is != NULL)
1864 		return (0);
1865 
1866 	return (EPERM);
1867 }
1868 
1869 static int
1870 rule_grant_users(const struct rule *const rule,
1871     const struct ucred *const old_cred, const struct ucred *const new_cred)
1872 {
1873 	int error;
1874 
1875 	/* Shortcut. */
1876 	if ((rule->uid_flags & MDF_ANY) != 0)
1877 		return (0);
1878 
1879 	error = rule_grant_user(rule, old_cred, new_cred->cr_uid);
1880 	if (error != 0)
1881 		return (error);
1882 	error = rule_grant_user(rule, old_cred, new_cred->cr_ruid);
1883 	if (error != 0)
1884 		return (error);
1885 	error = rule_grant_user(rule, old_cred, new_cred->cr_svuid);
1886 	if (error != 0)
1887 		return (error);
1888 
1889 	return (0);
1890 }
1891 
1892 static int
1893 rule_grant_setcred(const struct rule *const rule,
1894     const struct ucred *const old_cred, const struct ucred *const new_cred)
1895 {
1896 	int error;
1897 
1898 	error = rule_grant_users(rule, old_cred, new_cred);
1899 	if (error != 0)
1900 		return (error);
1901 	error = rule_grant_primary_groups(rule, old_cred, new_cred);
1902 	if (error != 0)
1903 		return (error);
1904 	error = rule_grant_supplementary_groups(rule, old_cred, new_cred);
1905 	if (error != 0)
1906 		return (error);
1907 
1908 	return (0);
1909 }
1910 
1911 static bool
1912 rule_applies(const struct rule *const rule, const struct ucred *const cred)
1913 {
1914 	if (rule->from_type == IT_UID && rule->from_id == cred->cr_ruid)
1915 		return (true);
1916 	if (rule->from_type == IT_GID && realgroupmember(rule->from_id, cred))
1917 		return (true);
1918 	return (false);
1919 }
1920 
1921 /*
1922  * To pass data between check_setcred() and priv_grant() (on PRIV_CRED_SETCRED).
1923  */
1924 struct mac_do_setcred_data {
1925 	struct mac_do_data_header hdr;
1926 	const struct ucred *new_cred;
1927 	u_int setcred_flags;
1928 };
1929 
1930 static int
1931 mac_do_priv_grant(struct ucred *cred, int priv)
1932 {
1933 	struct mac_do_setcred_data *const data = fetch_data();
1934 	const struct rules *rules;
1935 	const struct ucred *new_cred;
1936 	const struct rule *rule;
1937 	u_int setcred_flags;
1938 	int error;
1939 
1940 	/* Bail out fast if we aren't concerned. */
1941 	if (priv != PRIV_CRED_SETCRED)
1942 		return (EPERM);
1943 
1944 	/*
1945 	 * Do we have to do something?
1946 	 */
1947 	if (check_data_usable(data, sizeof(*data), priv) != 0)
1948 		/* No. */
1949 		return (EPERM);
1950 
1951 	rules = data->hdr.rules;
1952 	new_cred = data->new_cred;
1953 	KASSERT(new_cred != NULL,
1954 	    ("priv_check*() called before mac_cred_check_setcred()"));
1955 	setcred_flags = data->setcred_flags;
1956 
1957 	/*
1958 	 * Explicitly check that only the flags we currently support are present
1959 	 * in order to avoid accepting transitions with other changes than those
1960 	 * we are actually going to check.  Currently, this rules out the
1961 	 * SETCREDF_MAC_LABEL flag.  This may be improved by adding code
1962 	 * actually checking whether the requested label and the current one
1963 	 * would differ.
1964 	 */
1965 	if ((setcred_flags & ~(SETCREDF_UID | SETCREDF_RUID | SETCREDF_SVUID |
1966 	    SETCREDF_GID | SETCREDF_RGID | SETCREDF_SVGID |
1967 	    SETCREDF_SUPP_GROUPS)) != 0)
1968 		return (EPERM);
1969 
1970 	/*
1971 	 * Browse rules, and for those that match the requestor, call specific
1972 	 * privilege granting functions interpreting the "to"/"target" part.
1973 	 */
1974 	error = EPERM;
1975 	STAILQ_FOREACH(rule, &rules->head, r_entries)
1976 	    if (rule_applies(rule, cred)) {
1977 		    error = rule_grant_setcred(rule, cred, new_cred);
1978 		    if (error != EPERM)
1979 			    break;
1980 	    }
1981 
1982 	return (error);
1983 }
1984 
1985 static int
1986 check_proc(void)
1987 {
1988 	char *path, *to_free;
1989 	int error;
1990 
1991 	/*
1992 	 * Only grant privileges if requested by the right executable.
1993 	 *
1994 	 * XXXOC: We may want to base this check on a tunable path and/or
1995 	 * a specific MAC label.  Going even further, e.g., envisioning to
1996 	 * completely replace the path check with the latter, we would need to
1997 	 * install FreeBSD on a FS with multilabel enabled by default, which in
1998 	 * practice entails adding an option to ZFS to set MNT_MULTILABEL
1999 	 * automatically on mounts, ensuring that root (and more if using
2000 	 * different partitions) ZFS or UFS filesystems are created with
2001 	 * multilabel turned on, and having the installation procedure support
2002 	 * setting a MAC label per file (perhaps via additions to mtree(1)).  So
2003 	 * this probably isn't going to happen overnight, if ever.
2004 	 */
2005 	if (vn_fullpath(curproc->p_textvp, &path, &to_free) != 0)
2006 		return (EPERM);
2007 	error = strcmp(path, "/usr/bin/mdo") == 0 ? 0 : EPERM;
2008 	free(to_free, M_TEMP);
2009 	return (error);
2010 }
2011 
2012 static void
2013 mac_do_setcred_enter(void)
2014 {
2015 	struct rules *rules;
2016 	struct prison *pr;
2017 	struct mac_do_setcred_data * data;
2018 	int error;
2019 
2020 	/*
2021 	 * If not enabled, don't prepare data.  Other hooks will check for that
2022 	 * to know if they have to do something.
2023 	 */
2024 	if (do_enabled == 0)
2025 		return;
2026 
2027 	/*
2028 	 * MAC/do only applies to a process launched from a given executable.
2029 	 * For other processes, we just won't intervene (we don't deny requests,
2030 	 * nor do we grant privileges to them).
2031 	 */
2032 	error = check_proc();
2033 	if (error != 0)
2034 		return;
2035 
2036 	/*
2037 	 * Find the currently applicable rules.
2038 	 */
2039 	rules = find_rules(curproc->p_ucred->cr_prison, &pr);
2040 	hold_rules(rules);
2041 	prison_unlock(pr);
2042 
2043 	/*
2044 	 * Setup thread data to be used by other hooks.
2045 	 */
2046 	data = fetch_data();
2047 	if (!is_data_reusable(data, sizeof(*data)))
2048 		data = alloc_data(data, sizeof(*data));
2049 	set_data_header(data, sizeof(*data), PRIV_CRED_SETCRED, rules);
2050 	/* Not really necessary, but helps to catch programming errors. */
2051 	data->new_cred = NULL;
2052 	data->setcred_flags = 0;
2053 }
2054 
2055 static int
2056 mac_do_check_setcred(u_int flags, const struct ucred *const old_cred,
2057     struct ucred *const new_cred)
2058 {
2059 	struct mac_do_setcred_data *const data = fetch_data();
2060 
2061 	/*
2062 	 * Do we have to do something?
2063 	 */
2064 	if (check_data_usable(data, sizeof(*data), PRIV_CRED_SETCRED) != 0)
2065 		/* No. */
2066 		return (0);
2067 
2068 	/*
2069 	 * Keep track of the setcred() flags and the new credentials for
2070 	 * priv_check*().
2071 	 */
2072 	data->new_cred = new_cred;
2073 	data->setcred_flags = flags;
2074 
2075 	return (0);
2076 }
2077 
2078 static void
2079 mac_do_setcred_exit(void)
2080 {
2081 	struct mac_do_setcred_data *const data = fetch_data();
2082 
2083 	if (check_data_usable(data, sizeof(*data), PRIV_CRED_SETCRED) == 0)
2084 		/*
2085 		 * This doesn't deallocate the small per-thread data storage,
2086 		 * which can be reused on subsequent calls.  (That data is of
2087 		 * course deallocated as the current thread dies or this module
2088 		 * is unloaded.)
2089 		 */
2090 		clear_data(data);
2091 }
2092 
2093 static void
2094 mac_do_init(struct mac_policy_conf *mpc)
2095 {
2096 	struct prison *pr;
2097 
2098 	osd_jail_slot = osd_jail_register(dealloc_jail_osd, osd_methods);
2099 	set_empty_rules(&prison0);
2100 	sx_slock(&allprison_lock);
2101 	TAILQ_FOREACH(pr, &allprison, pr_list)
2102 	    set_empty_rules(pr);
2103 	sx_sunlock(&allprison_lock);
2104 
2105 	osd_thread_slot = osd_thread_register(dealloc_thread_osd);
2106 }
2107 
2108 static void
2109 mac_do_destroy(struct mac_policy_conf *mpc)
2110 {
2111 	/*
2112 	 * osd_thread_deregister() must be called before osd_jail_deregister(),
2113 	 * for the reason explained in dealloc_jail_osd().
2114 	 */
2115 	osd_thread_deregister(osd_thread_slot);
2116 	osd_jail_deregister(osd_jail_slot);
2117 }
2118 
2119 static struct mac_policy_ops do_ops = {
2120 	.mpo_init = mac_do_init,
2121 	.mpo_destroy = mac_do_destroy,
2122 	.mpo_cred_setcred_enter = mac_do_setcred_enter,
2123 	.mpo_cred_check_setcred = mac_do_check_setcred,
2124 	.mpo_cred_setcred_exit = mac_do_setcred_exit,
2125 	.mpo_priv_grant = mac_do_priv_grant,
2126 };
2127 
2128 MAC_POLICY_SET(&do_ops, mac_do, "MAC/do", MPC_LOADTIME_FLAG_UNLOADOK, NULL);
2129 MODULE_VERSION(mac_do, 1);
2130