xref: /freebsd/sys/security/mac_do/mac_do.c (revision c27f7d6b9cf6d4ab01cb3d0972726c14e0aca146)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright(c) 2024 Baptiste Daroussin <bapt@FreeBSD.org>
5  * Copyright (c) 2024 The FreeBSD Foundation
6  *
7  * Portions of this software were developed by Olivier Certner
8  * <olce.freebsd@certner.fr> at Kumacom SARL under sponsorship from the FreeBSD
9  * Foundation.
10  */
11 
12 #include <sys/param.h>
13 #include <sys/systm.h>
14 #include <sys/ctype.h>
15 #include <sys/jail.h>
16 #include <sys/kernel.h>
17 #include <sys/limits.h>
18 #include <sys/lock.h>
19 #include <sys/malloc.h>
20 #include <sys/module.h>
21 #include <sys/mount.h>
22 #include <sys/mutex.h>
23 #include <sys/priv.h>
24 #include <sys/proc.h>
25 #include <sys/refcount.h>
26 #include <sys/socket.h>
27 #include <sys/sx.h>
28 #include <sys/sysctl.h>
29 #include <sys/ucred.h>
30 #include <sys/vnode.h>
31 
32 #include <machine/stdarg.h>
33 
34 #include <security/mac/mac_policy.h>
35 
36 static SYSCTL_NODE(_security_mac, OID_AUTO, do,
37     CTLFLAG_RW|CTLFLAG_MPSAFE, 0, "mac_do policy controls");
38 
39 static int	do_enabled = 1;
40 SYSCTL_INT(_security_mac_do, OID_AUTO, enabled, CTLFLAG_RWTUN,
41     &do_enabled, 0, "Enforce do policy");
42 
43 static int	print_parse_error = 1;
44 SYSCTL_INT(_security_mac_do, OID_AUTO, print_parse_error, CTLFLAG_RWTUN,
45     &print_parse_error, 0, "Print parse errors on setting rules "
46     "(via sysctl(8)).");
47 
48 static MALLOC_DEFINE(M_DO, "do_rule", "Rules for mac_do");
49 
50 #define MAC_RULE_STRING_LEN	1024
51 
52 static unsigned		osd_jail_slot;
53 static unsigned		osd_thread_slot;
54 
55 #define IT_INVALID	0 /* Must stay 0. */
56 #define IT_UID		1
57 #define IT_GID		2
58 #define IT_ANY		3
59 #define IT_LAST		IT_ANY
60 
61 static const char *id_type_to_str[] = {
62 	[IT_INVALID]	= "invalid",
63 	[IT_UID]	= "uid",
64 	[IT_GID]	= "gid",
65 	/* See also parse_id_type(). */
66 	[IT_ANY]	= "*",
67 };
68 
69 #define PARSE_ERROR_SIZE	256
70 
71 struct parse_error {
72 	size_t	pos;
73 	char	msg[PARSE_ERROR_SIZE];
74 };
75 
76 /*
77  * We assume that 'uid_t' and 'gid_t' are aliases to 'u_int' in conversions
78  * required for parsing rules specification strings.
79  */
80 _Static_assert(sizeof(uid_t) == sizeof(u_int) && (uid_t)-1 >= 0 &&
81     sizeof(gid_t) == sizeof(u_int) && (gid_t)-1 >= 0,
82     "mac_do(4) assumes that 'uid_t' and 'gid_t' are aliases to 'u_int'");
83 
84 /*
85  * Internal flags.
86  *
87  * They either apply as per-type (t) or per-ID (i) but are conflated because all
88  * per-ID flags are also valid as per-type ones to qualify the "current" (".")
89  * per-type flag.  Also, some of them are in fact exclusive, but we use one-hot
90  * encoding for simplicity.
91  *
92  * There is currently room for "only" 16 bits.  As these flags are purely
93  * internal, they can be renumbered and/or their type changed as needed.
94  *
95  * See also the check_*() functions below.
96  */
97 typedef uint16_t	flags_t;
98 
99 /* (i,gid) Specification concerns primary groups. */
100 #define MDF_PRIMARY	(1u << 0)
101 /* (i,gid) Specification concerns supplementary groups. */
102 #define MDF_SUPP_ALLOW	(1u << 1)
103 /* (i,gid) Group must appear as a supplementary group. */
104 #define MDF_SUPP_MUST	(1u << 2)
105 /* (i,gid) Group must not appear as a supplementary group. */
106 #define MDF_SUPP_DONT	(1u << 3)
107 #define MDF_SUPP_MASK	(MDF_SUPP_ALLOW | MDF_SUPP_MUST | MDF_SUPP_DONT)
108 #define MDF_ID_MASK	(MDF_PRIMARY | MDF_SUPP_MASK)
109 
110 /*
111  * (t) All IDs allowed.
112  *
113  * For GIDs, MDF_ANY only concerns primary groups.  The MDF_PRIMARY and
114  * MDF_SUPP_* flags never apply to MDF_ANY, but can be present if MDF_CURRENT is
115  * present also, as usual.
116  */
117 #define MDF_ANY			(1u << 8)
118 /* (t) Current IDs allowed. */
119 #define MDF_CURRENT		(1u << 9)
120 #define MDF_TYPE_COMMON_MASK	(MDF_ANY | MDF_CURRENT)
121 /* (t,gid) All IDs allowed as supplementary groups. */
122 #define MDF_ANY_SUPP		(1u << 10)
123 /* (t,gid) Some ID or MDF_CURRENT has MDF_SUPP_MUST or MDF_SUPP_DONT. */
124 #define MDF_MAY_REJ_SUPP	(1u << 11)
125 /* (t,gid) Some explicit ID (not MDF_CURRENT) has MDF_SUPP_MUST. */
126 #define MDF_EXPLICIT_SUPP_MUST	(1u << 12)
127 /* (t,gid) Whether any target clause is about primary groups.  Used during
128  * parsing only. */
129 #define MDF_HAS_PRIMARY_CLAUSE	(1u << 13)
130 /* (t,gid) Whether any target clause is about supplementary groups.  Used during
131  * parsing only. */
132 #define MDF_HAS_SUPP_CLAUSE	(1u << 14)
133 #define MDF_TYPE_GID_MASK	(MDF_ANY_SUPP | MDF_MAY_REJ_SUPP |	\
134     MDF_EXPLICIT_SUPP_MUST | MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE)
135 #define MDF_TYPE_MASK		(MDF_TYPE_COMMON_MASK | MDF_TYPE_GID_MASK)
136 
137 /*
138  * Persistent structures.
139  */
140 
141 struct id_spec {
142 	u_int		 id;
143 	flags_t		 flags; /* See MDF_* above. */
144 };
145 
146 /*
147  * This limits the number of target clauses per type to 65535.  With the current
148  * value of MAC_RULE_STRING_LEN (1024), this is way more than enough anyway.
149  */
150 typedef uint16_t	 id_nb_t;
151 /* We only have a few IT_* types. */
152 typedef uint16_t	 id_type_t;
153 
154 struct rule {
155 	STAILQ_ENTRY(rule) r_entries;
156 	id_type_t	 from_type;
157 	u_int		 from_id;
158 	flags_t		 uid_flags; /* See MDF_* above. */
159 	id_nb_t		 uids_nb;
160 	flags_t		 gid_flags; /* See MDF_* above. */
161 	id_nb_t		 gids_nb;
162 	struct id_spec	*uids;
163 	struct id_spec	*gids;
164 };
165 
166 STAILQ_HEAD(rulehead, rule);
167 
168 struct rules {
169 	char		string[MAC_RULE_STRING_LEN];
170 	struct rulehead	head;
171 	volatile u_int	use_count __aligned(CACHE_LINE_SIZE);
172 };
173 
174 /*
175  * Temporary structures used to build a 'struct rule' above.
176  */
177 
178 struct id_elem {
179 	STAILQ_ENTRY(id_elem) ie_entries;
180 	struct id_spec spec;
181 };
182 
183 STAILQ_HEAD(id_list, id_elem);
184 
185 #ifdef INVARIANTS
186 static void
187 check_type(const id_type_t type)
188 {
189 	if (type > IT_LAST)
190 		panic("Invalid type number %u", type);
191 }
192 
193 static void
194 panic_for_unexpected_flags(const id_type_t type, const flags_t flags,
195     const char *const str)
196 {
197 	panic("ID type %s: Unexpected flags %u (%s), ", id_type_to_str[type],
198 	    flags, str);
199 }
200 
201 static void
202 check_type_and_id_flags(const id_type_t type, const flags_t flags)
203 {
204 	const char *str;
205 
206 	check_type(type);
207 	switch (type) {
208 	case IT_UID:
209 		if (flags != 0) {
210 			str = "only 0 allowed";
211 			goto unexpected_flags;
212 		}
213 		break;
214 	case IT_GID:
215 		if ((flags & ~MDF_ID_MASK) != 0) {
216 			str = "only bits in MDF_ID_MASK allowed";
217 			goto unexpected_flags;
218 		}
219 		if (!powerof2(flags & MDF_SUPP_MASK)) {
220 			str = "only a single flag in MDF_SUPP_MASK allowed";
221 			goto unexpected_flags;
222 		}
223 		break;
224 	default:
225 	    __assert_unreachable();
226 	}
227 	return;
228 
229 unexpected_flags:
230 	panic_for_unexpected_flags(type, flags, str);
231 }
232 
233 static void
234 check_type_and_id_spec(const id_type_t type, const struct id_spec *const is)
235 {
236 	check_type_and_id_flags(type, is->flags);
237 }
238 
239 static void
240 check_type_and_type_flags(const id_type_t type, const flags_t flags)
241 {
242 	const char *str;
243 
244 	check_type_and_id_flags(type, flags & MDF_ID_MASK);
245 	if ((flags & ~MDF_ID_MASK & ~MDF_TYPE_MASK) != 0) {
246 		str = "only MDF_ID_MASK | MDF_TYPE_MASK bits allowed";
247 		goto unexpected_flags;
248 	}
249 	if ((flags & MDF_ANY) != 0 && (flags & MDF_CURRENT) != 0 &&
250 	    (type == IT_UID || (flags & MDF_PRIMARY) != 0)) {
251 		str = "MDF_ANY and MDF_CURRENT are exclusive for UIDs "
252 		    "or primary group GIDs";
253 		goto unexpected_flags;
254 	}
255 	if ((flags & MDF_ANY_SUPP) != 0 && (flags & MDF_CURRENT) != 0 &&
256 	    (flags & MDF_SUPP_MASK) != 0) {
257 		str = "MDF_SUPP_ANY and MDF_CURRENT with supplementary "
258 		    "groups specification are exclusive";
259 		goto unexpected_flags;
260 	}
261 	if (type == IT_GID &&
262 	    ((flags & MDF_PRIMARY) != 0 || (flags & MDF_ANY) != 0) &&
263 	    (flags & MDF_HAS_PRIMARY_CLAUSE) == 0) {
264 		str = "Presence of folded primary clause not reflected "
265 		    "by presence of MDF_HAS_PRIMARY_CLAUSE";
266 		goto unexpected_flags;
267 	}
268 	if (((flags & MDF_SUPP_MASK) != 0 || (flags & MDF_ANY_SUPP) != 0) &&
269 	    (flags & MDF_HAS_SUPP_CLAUSE) == 0) {
270 		str = "Presence of folded supplementary clause not reflected "
271 		    "by presence of MDF_HAS_SUPP_CLAUSE";
272 		goto unexpected_flags;
273 	}
274 	return;
275 
276 unexpected_flags:
277 	panic_for_unexpected_flags(type, flags, str);
278 }
279 #else /* !INVARIANTS */
280 #define check_type_and_id_flags(...)
281 #define check_type_and_id_spec(...)
282 #define check_type_and_type_flags(...)
283 #endif /* INVARIANTS */
284 
285 /*
286  * Returns EALREADY if both flags have some overlap, or EINVAL if flags are
287  * incompatible, else 0 with flags successfully merged into 'dest'.
288  */
289 static int
290 coalesce_id_flags(const flags_t src, flags_t *const dest)
291 {
292 	flags_t res;
293 
294 	if ((src & *dest) != 0)
295 		return (EALREADY);
296 
297 	res = src | *dest;
298 
299 	/* Check for compatibility of supplementary flags, and coalesce. */
300 	if ((res & MDF_SUPP_MASK) != 0) {
301 		/* MDF_SUPP_DONT incompatible with the rest. */
302 		if ((res & MDF_SUPP_DONT) != 0 && (res & MDF_SUPP_MASK &
303 		    ~MDF_SUPP_DONT) != 0)
304 			return (EINVAL);
305 		/*
306 		 * Coalesce MDF_SUPP_ALLOW and MDF_SUPP_MUST into MDF_SUPP_MUST.
307 		 */
308 		if ((res & MDF_SUPP_ALLOW) != 0 && (res & MDF_SUPP_MUST) != 0)
309 			res &= ~MDF_SUPP_ALLOW;
310 	}
311 
312 	*dest = res;
313 	return (0);
314 }
315 
316 static void
317 toast_rules(struct rules *const rules)
318 {
319 	struct rulehead *const head = &rules->head;
320 	struct rule *rule, *rule_next;
321 
322 	STAILQ_FOREACH_SAFE(rule, head, r_entries, rule_next) {
323 		free(rule->uids, M_DO);
324 		free(rule->gids, M_DO);
325 		free(rule, M_DO);
326 	}
327 	free(rules, M_DO);
328 }
329 
330 static struct rules *
331 alloc_rules(void)
332 {
333 	struct rules *const rules = malloc(sizeof(*rules), M_DO, M_WAITOK);
334 
335 	_Static_assert(MAC_RULE_STRING_LEN > 0, "MAC_RULE_STRING_LEN <= 0!");
336 	rules->string[0] = 0;
337 	STAILQ_INIT(&rules->head);
338 	rules->use_count = 0;
339 	return (rules);
340 }
341 
342 static bool
343 is_null_or_empty(const char *s)
344 {
345 	return (s == NULL || s[0] == '\0');
346 }
347 
348 /*
349  * String to unsigned int.
350  *
351  * Contrary to the "standard" strtou*() family of functions, do not tolerate
352  * spaces at start nor an empty string, and returns a status code, the 'u_int'
353  * result being returned through a passed pointer (if no error).
354  *
355  * We detour through 'quad_t' because in-kernel strto*() functions cannot set
356  * 'errno' and thus can't distinguish a true maximum value from one returned
357  * because of overflow.  We use 'quad_t' instead of 'u_quad_t' to support
358  * negative specifications (e.g., such as "-1" for UINT_MAX).
359  */
360 static int
361 strtoui_strict(const char *const restrict s, const char **const restrict endptr,
362     int base, u_int *result)
363 {
364 	char *ep;
365 	quad_t q;
366 
367 	/* Rule out spaces and empty specifications. */
368 	if (s[0] == '\0' || isspace(s[0])) {
369 		if (endptr != NULL)
370 			*endptr = s;
371 		return (EINVAL);
372 	}
373 
374 	q = strtoq(s, &ep, base);
375 	if (endptr != NULL)
376 		*endptr = ep;
377 	if (q < 0) {
378 		/* We allow specifying a negative number. */
379 		if (q < -(quad_t)UINT_MAX - 1 || q == QUAD_MIN)
380 			return (EOVERFLOW);
381 	} else {
382 		if (q > UINT_MAX || q == UQUAD_MAX)
383 			return (EOVERFLOW);
384 	}
385 
386 	*result = (u_int)q;
387 	return (0);
388 }
389 
390 /*
391  * strsep() variant skipping spaces and tabs.
392  *
393  * Skips spaces and tabs at beginning and end of the token before one of the
394  * 'delim' characters, i.e., at start of string and just before one of the
395  * delimiter characters (so it doesn't prevent tokens containing spaces and tabs
396  * in the middle).
397  */
398 static char *
399 strsep_noblanks(char **const stringp, const char *delim)
400 {
401 	char *p = *stringp;
402 	char *ret, *wsp;
403 	size_t idx;
404 
405 	if (p == NULL)
406 		return (NULL);
407 
408 	idx = strspn(p, " \t");
409 	p += idx;
410 
411 	ret = strsep(&p, delim);
412 
413 	/* Rewind spaces/tabs at the end. */
414 	if (p == NULL)
415 		wsp = ret + strlen(ret);
416 	else
417 		wsp = p - 1;
418 	for (; wsp != ret; --wsp) {
419 		switch (wsp[-1]) {
420 		case ' ':
421 		case '\t':
422 			continue;
423 		}
424 		break;
425 	}
426 	*wsp = '\0';
427 
428 	*stringp = p;
429 	return (ret);
430 }
431 
432 
433 static void
434 make_parse_error(struct parse_error **const parse_error, const size_t pos,
435     const char *const fmt, ...)
436 {
437 	struct parse_error *const err = malloc(sizeof(*err), M_DO, M_WAITOK);
438 	va_list ap;
439 
440 	err->pos = pos;
441 	va_start(ap, fmt);
442 	vsnprintf(err->msg, PARSE_ERROR_SIZE, fmt, ap);
443 	va_end(ap);
444 
445 	MPASS(*parse_error == NULL);
446 	*parse_error = err;
447 }
448 
449 static void
450 free_parse_error(struct parse_error *const parse_error)
451 {
452 	free(parse_error, M_DO);
453 }
454 
455 static int
456 parse_id_type(const char *const string, id_type_t *const type,
457     struct parse_error **const parse_error)
458 {
459 	/*
460 	 * Special case for "any", as the canonical form for IT_ANY in
461 	 * id_type_to_str[] is "*".
462 	 */
463 	if (strcmp(string, "any") == 0) {
464 		*type = IT_ANY;
465 		return (0);
466 	}
467 
468 	/* Start at 1 to avoid parsing "invalid". */
469 	for (size_t i = 1; i <= IT_LAST; ++i) {
470 		if (strcmp(string, id_type_to_str[i]) == 0) {
471 			*type = i;
472 			return (0);
473 		}
474 	}
475 
476 	*type = IT_INVALID;
477 	make_parse_error(parse_error, 0, "No valid type found.");
478 	return (EINVAL);
479 }
480 
481 static size_t
482 parse_gid_flags(const char *const string, flags_t *const flags,
483     flags_t *const gid_flags)
484 {
485 	switch (string[0]) {
486 	case '+':
487 		*flags |= MDF_SUPP_ALLOW;
488 		goto has_supp_clause;
489 	case '!':
490 		*flags |= MDF_SUPP_MUST;
491 		*gid_flags |= MDF_MAY_REJ_SUPP;
492 		goto has_supp_clause;
493 	case '-':
494 		*flags |= MDF_SUPP_DONT;
495 		*gid_flags |= MDF_MAY_REJ_SUPP;
496 		goto has_supp_clause;
497 	has_supp_clause:
498 		*gid_flags |= MDF_HAS_SUPP_CLAUSE;
499 		return (1);
500 	}
501 
502 	return (0);
503 }
504 
505 static bool
506 parse_any(const char *const string)
507 {
508 	return (strcmp(string, "*") == 0 || strcmp(string, "any") == 0);
509 }
510 
511 static bool
512 has_clauses(const id_nb_t nb, const flags_t type_flags)
513 {
514 	return ((type_flags & MDF_TYPE_MASK) != 0 || nb != 0);
515 }
516 
517 static int
518 parse_target_clause(char *to, struct rule *const rule,
519     struct id_list *const uid_list, struct id_list *const gid_list,
520     struct parse_error **const parse_error)
521 {
522 	const char *const start = to;
523 	char *to_type, *to_id;
524 	const char *p;
525 	struct id_list *list;
526 	id_nb_t *nb;
527 	flags_t *tflags;
528 	struct id_elem *ie;
529 	struct id_spec is = {.flags = 0};
530 	flags_t gid_flags = 0;
531 	id_type_t type;
532 	int error;
533 
534 	MPASS(*parse_error == NULL);
535 	MPASS(to != NULL);
536 	to_type = strsep_noblanks(&to, "=");
537 	MPASS(to_type != NULL);
538 	to_type += parse_gid_flags(to_type, &is.flags, &gid_flags);
539 	error = parse_id_type(to_type, &type, parse_error);
540 	if (error != 0)
541 		goto einval;
542 	if (type != IT_GID && is.flags != 0) {
543 		make_parse_error(parse_error, to_type - start,
544 		    "Expected type 'gid' after flags, not '%s'.",
545 		    to_type);
546 		goto einval;
547 	}
548 
549 	to_id = strsep_noblanks(&to, "");
550 	switch (type) {
551 	case IT_GID:
552 		if (to_id == NULL) {
553 			make_parse_error(parse_error, to_type - start,
554 			    "No '=' and ID specification after type '%s'.",
555 			    to_type);
556 			goto einval;
557 		}
558 
559 		if (is.flags == 0) {
560 			/* No flags: Dealing with a primary group. */
561 			is.flags |= MDF_PRIMARY;
562 			gid_flags |= MDF_HAS_PRIMARY_CLAUSE;
563 		}
564 
565 		list = gid_list;
566 		nb = &rule->gids_nb;
567 		tflags = &rule->gid_flags;
568 
569 		/* "*" or "any"? */
570 		if (parse_any(to_id)) {
571 			/*
572 			 * We check that we have not seen any other clause of
573 			 * the same category (i.e., concerning primary or
574 			 * supplementary groups).
575 			 */
576 			if ((is.flags & MDF_PRIMARY) != 0) {
577 				if ((*tflags & MDF_HAS_PRIMARY_CLAUSE) != 0) {
578 					make_parse_error(parse_error,
579 					    to_id - start,
580 					    "'any' specified after another "
581 					    "(primary) GID.");
582 					goto einval;
583 				}
584 				*tflags |= gid_flags | MDF_ANY;
585 			} else {
586 				/*
587 				 * If a supplementary group flag was present, it
588 				 * must be MDF_SUPP_ALLOW ("+").
589 				 */
590 				if ((is.flags & MDF_SUPP_MASK) != MDF_SUPP_ALLOW) {
591 					make_parse_error(parse_error,
592 					    to_id - start,
593 					    "'any' specified with another "
594 					    "flag than '+'.");
595 					goto einval;
596 				}
597 				if ((*tflags & MDF_HAS_SUPP_CLAUSE) != 0) {
598 					make_parse_error(parse_error,
599 					    to_id - start,
600 					    "'any' with flag '+' specified after "
601 					    "another (supplementary) GID.");
602 					goto einval;
603 				}
604 				*tflags |= gid_flags | MDF_ANY_SUPP;
605 			}
606 			goto check_type_and_finish;
607 		} else {
608 			/*
609 			 * Check that we haven't already seen "any" for the same
610 			 * category.
611 			 */
612 			if ((is.flags & MDF_PRIMARY) != 0) {
613 				if ((*tflags & MDF_ANY) != 0) {
614 					make_parse_error(parse_error,
615 					    to_id - start,
616 					    "Some (primary) GID specified after "
617 					    "'any'.");
618 					goto einval;
619 				}
620 			} else if ((*tflags & MDF_ANY_SUPP) != 0 &&
621 			    (is.flags & MDF_SUPP_ALLOW) != 0) {
622 				make_parse_error(parse_error,
623 				    to_id - start,
624 				    "Some (supplementary) GID specified after "
625 				    "'any' with flag '+'.");
626 				goto einval;
627 			}
628 			*tflags |= gid_flags;
629 		}
630 		break;
631 
632 	case IT_UID:
633 		if (to_id == NULL) {
634 			make_parse_error(parse_error, to_type - start,
635 			    "No '=' and ID specification after type '%s'.",
636 			    to_type);
637 			goto einval;
638 		}
639 
640 		list = uid_list;
641 		nb = &rule->uids_nb;
642 		tflags = &rule->uid_flags;
643 
644 		/* "*" or "any"? */
645 		if (parse_any(to_id)) {
646 			/* There must not be any other clause. */
647 			if (has_clauses(*nb, *tflags)) {
648 				make_parse_error(parse_error, to_id - start,
649 				    "'any' specified after another UID.");
650 				goto einval;
651 			}
652 			*tflags |= MDF_ANY;
653 			goto check_type_and_finish;
654 		} else {
655 			/*
656 			 * Check that we haven't already seen "any" for the same
657 			 * category.
658 			 */
659 			if ((*tflags & MDF_ANY) != 0) {
660 				make_parse_error(parse_error, to_id - start,
661 				    "Some UID specified after 'any'.");
662 				goto einval;
663 			}
664 		}
665 		break;
666 
667 	case IT_ANY:
668 		/* No ID allowed. */
669 		if (to_id != NULL) {
670 			make_parse_error(parse_error, to_type - start,
671 			    "No '=' and ID allowed after type '%s'.", to_type);
672 			goto einval;
673 		}
674 		/*
675 		 * We can't have IT_ANY after any other IT_*, it must be the
676 		 * only one.
677 		 */
678 		if (has_clauses(rule->uids_nb, rule->uid_flags) ||
679 		    has_clauses(rule->gids_nb, rule->gid_flags)) {
680 			make_parse_error(parse_error, to_type - start,
681 			    "Target clause of type '%s' coming after another "
682 			    "clause (must be alone).", to_type);
683 			goto einval;
684 		}
685 		rule->uid_flags |= MDF_ANY;
686 		rule->gid_flags |= MDF_ANY | MDF_ANY_SUPP |
687 		    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE;
688 		goto finish;
689 
690 	default:
691 		/* parse_id_type() returns no other types currently. */
692 		__assert_unreachable();
693 	}
694 
695 	/* Rule out cases that have been treated above. */
696 	MPASS((type == IT_UID || type == IT_GID) && !parse_any(to_id));
697 
698 	/* "."? */
699 	if (strcmp(to_id, ".") == 0) {
700 		if ((*tflags & MDF_CURRENT) != 0) {
701 			/* Duplicate "." <id>.  Try to coalesce. */
702 			error = coalesce_id_flags(is.flags, tflags);
703 			if (error != 0) {
704 				make_parse_error(parse_error, to_id - start,
705 				    "Incompatible flags with prior clause "
706 				    "with same target.");
707 				goto einval;
708 			}
709 		} else
710 			*tflags |= MDF_CURRENT | is.flags;
711 		goto check_type_and_finish;
712 	}
713 
714 	/* Parse an ID. */
715 	error = strtoui_strict(to_id, &p, 10, &is.id);
716 	if (error != 0 || *p != '\0') {
717 		make_parse_error(parse_error, to_id - start,
718 		    "Cannot parse a numerical ID (base 10).");
719 		goto einval;
720 	}
721 
722 	/* Explicit ID flags. */
723 	if (type == IT_GID && (is.flags & MDF_SUPP_MUST) != 0)
724 		*tflags |= MDF_EXPLICIT_SUPP_MUST;
725 
726 	/*
727 	 * We check for duplicate IDs and coalesce their 'struct id_spec' only
728 	 * at end of parse_single_rule() because it is much more performant then
729 	 * (using sorted arrays).
730 	 */
731 	++*nb;
732 	if (*nb == 0) {
733 		make_parse_error(parse_error, 0,
734 		    "Too many target clauses of type '%s'.", to_type);
735 		return (EOVERFLOW);
736 	}
737 	ie = malloc(sizeof(*ie), M_DO, M_WAITOK);
738 	ie->spec = is;
739 	STAILQ_INSERT_TAIL(list, ie, ie_entries);
740 	check_type_and_id_spec(type, &is);
741 check_type_and_finish:
742 	check_type_and_type_flags(type, *tflags);
743 finish:
744 	return (0);
745 einval:
746 	/* We must have built a parse error on error. */
747 	MPASS(*parse_error != NULL);
748 	return (EINVAL);
749 }
750 
751 static int
752 u_int_cmp(const u_int i1, const u_int i2)
753 {
754 	return ((i1 > i2) - (i1 < i2));
755 }
756 
757 static int
758 id_spec_cmp(const void *const p1, const void *const p2)
759 {
760 	const struct id_spec *const is1 = p1;
761 	const struct id_spec *const is2 = p2;
762 
763 	return (u_int_cmp(is1->id, is2->id));
764 }
765 
766 /*
767  * Transfer content of 'list' into 'array', freeing and emptying list.
768  *
769  * 'nb' must be 'list''s length and not be greater than 'array''s size.  The
770  * destination array is sorted by ID.  Structures 'struct id_spec' with same IDs
771  * are coalesced if that makes sense (not including duplicate clauses), else
772  * EINVAL is returned.  On success, 'nb' is updated (lowered) to account for
773  * coalesced specifications.  The parameter 'type' is only for testing purposes
774  * (INVARIANTS).
775  */
776 static int
777 pour_list_into_rule(const id_type_t type, struct id_list *const list,
778     struct id_spec *const array, id_nb_t *const nb,
779     struct parse_error **const parse_error)
780 {
781 	struct id_elem *ie, *ie_next;
782 	size_t idx = 0;
783 
784 	/* Fill the array. */
785 	STAILQ_FOREACH_SAFE(ie, list, ie_entries, ie_next) {
786 		MPASS(idx < *nb);
787 		array[idx] = ie->spec;
788 		free(ie, M_DO);
789 		++idx;
790 	}
791 	MPASS(idx == *nb);
792 	STAILQ_INIT(list);
793 
794 	/* Sort it (by ID). */
795 	qsort(array, *nb, sizeof(*array), id_spec_cmp);
796 
797 	/* Coalesce same IDs. */
798 	if (*nb != 0) {
799 		size_t ref_idx = 0;
800 
801 		for (idx = 1; idx < *nb; ++idx) {
802 			const u_int id = array[idx].id;
803 
804 			if (id != array[ref_idx].id) {
805 				++ref_idx;
806 				if (ref_idx != idx)
807 					array[ref_idx] = array[idx];
808 				continue;
809 			}
810 
811 			switch (type) {
812 				int error;
813 
814 			case IT_GID:
815 				error = coalesce_id_flags(array[idx].flags,
816 				    &array[ref_idx].flags);
817 				if (error != 0) {
818 					make_parse_error(parse_error, 0,
819 					    "Incompatible flags or duplicate "
820 					    "GID %u.", id);
821 					return (EINVAL);
822 				}
823 				check_type_and_id_flags(type,
824 				    array[ref_idx].flags);
825 				break;
826 
827 			case IT_UID:
828 				/*
829 				 * No flags in this case.  Multiple appearances
830 				 * of the same UID is an exact redundancy, so
831 				 * error out.
832 				 */
833 				make_parse_error(parse_error, 0,
834 				    "Duplicate UID %u.", id);
835 				return (EINVAL);
836 
837 			default:
838 				__assert_unreachable();
839 			}
840 		}
841 		*nb = ref_idx + 1;
842 	}
843 
844 	return (0);
845 }
846 
847 /*
848  * See also the herald comment for parse_rules() below.
849  *
850  * The second part of a rule, called <target> (or <to>), is a comma-separated
851  * (',') list of '<flags><type>=<id>' clauses similar to that of the <from>
852  * part, with the extensions that <id> may also be "*" or "any" or ".", and that
853  * <flags> may contain at most one of the '+', '-' and '!' characters when
854  * <type> is "gid" (no flags are allowed for "uid").  No two clauses in a single
855  * <to> list may list the same <id>.  "*" and "any" both designate any ID for
856  * the <type>, and are aliases to each other.  In front of "any" (or "*"), only
857  * the '+' flag is allowed (in the "gid" case).  "." designates the process'
858  * current IDs for the <type>.  The precise meaning of flags and "." is
859  * explained in functions checking privileges below.
860  */
861 static int
862 parse_single_rule(char *rule, struct rules *const rules,
863     struct parse_error **const parse_error)
864 {
865 	const char *const start = rule;
866 	const char *from_type, *from_id, *p;
867 	char *to_list;
868 	struct id_list uid_list, gid_list;
869 	struct id_elem *ie, *ie_next;
870 	struct rule *new;
871 	int error;
872 
873 	MPASS(*parse_error == NULL);
874 	STAILQ_INIT(&uid_list);
875 	STAILQ_INIT(&gid_list);
876 
877 	/* Freed when the 'struct rules' container is freed. */
878 	new = malloc(sizeof(*new), M_DO, M_WAITOK | M_ZERO);
879 
880 	from_type = strsep_noblanks(&rule, "=");
881 	MPASS(from_type != NULL); /* Because 'rule' was not NULL. */
882 	error = parse_id_type(from_type, &new->from_type, parse_error);
883 	if (error != 0)
884 		goto einval;
885 	switch (new->from_type) {
886 	case IT_UID:
887 	case IT_GID:
888 		break;
889 	default:
890 		make_parse_error(parse_error, 0, "Type '%s' not allowed in "
891 		    "the \"from\" part of rules.");
892 		goto einval;
893 	}
894 
895 	from_id = strsep_noblanks(&rule, ":>");
896 	if (is_null_or_empty(from_id)) {
897 		make_parse_error(parse_error, 0, "No ID specified.");
898 		goto einval;
899 	}
900 
901 	error = strtoui_strict(from_id, &p, 10, &new->from_id);
902 	if (error != 0 || *p != '\0') {
903 		make_parse_error(parse_error, from_id - start,
904 		    "Cannot parse a numerical ID (base 10).");
905 		goto einval;
906 	}
907 
908 	/*
909 	 * We will now parse the "to" list.
910 	 *
911 	 * In order to ease parsing, we will begin by building lists of target
912 	 * UIDs and GIDs in local variables 'uid_list' and 'gid_list'.  The
913 	 * number of each type of IDs will be filled directly in 'new'.  At end
914 	 * of parse, we will allocate both arrays of IDs to be placed into the
915 	 * 'uids' and 'gids' members, sort them, and discard the tail queues
916 	 * used to build them.  This conversion to sorted arrays at end of parse
917 	 * allows to minimize memory allocations and enables searching IDs in
918 	 * O(log(n)) instead of linearly.
919 	 */
920 	to_list = strsep_noblanks(&rule, ",");
921 	if (to_list == NULL) {
922 		make_parse_error(parse_error, 0, "No target list.");
923 		goto einval;
924 	}
925 	do {
926 		error = parse_target_clause(to_list, new, &uid_list, &gid_list,
927 		    parse_error);
928 		if (error != 0) {
929 			(*parse_error)->pos += to_list - start;
930 			goto einval;
931 		}
932 
933 		to_list = strsep_noblanks(&rule, ",");
934 	} while (to_list != NULL);
935 
936 	if (new->uids_nb != 0) {
937 		new->uids = malloc(sizeof(*new->uids) * new->uids_nb, M_DO,
938 		    M_WAITOK);
939 		error = pour_list_into_rule(IT_UID, &uid_list, new->uids,
940 		    &new->uids_nb, parse_error);
941 		if (error != 0)
942 			goto einval;
943 	}
944 	MPASS(STAILQ_EMPTY(&uid_list));
945 	if (!has_clauses(new->uids_nb, new->uid_flags)) {
946 		/* No UID specified, default is "uid=.". */
947 		MPASS(new->uid_flags == 0);
948 		new->uid_flags = MDF_CURRENT;
949 		check_type_and_type_flags(IT_UID, new->uid_flags);
950 	}
951 
952 	if (new->gids_nb != 0) {
953 		new->gids = malloc(sizeof(*new->gids) * new->gids_nb, M_DO,
954 		    M_WAITOK);
955 		error = pour_list_into_rule(IT_GID, &gid_list, new->gids,
956 		    &new->gids_nb, parse_error);
957 		if (error != 0)
958 			goto einval;
959 	}
960 	MPASS(STAILQ_EMPTY(&gid_list));
961 	if (!has_clauses(new->gids_nb, new->gid_flags)) {
962 		/* No GID specified, default is "gid=.,!gid=.". */
963 		MPASS(new->gid_flags == 0);
964 		new->gid_flags = MDF_CURRENT | MDF_PRIMARY | MDF_SUPP_MUST |
965 		    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE;
966 		check_type_and_type_flags(IT_GID, new->gid_flags);
967 	}
968 
969 	STAILQ_INSERT_TAIL(&rules->head, new, r_entries);
970 	return (0);
971 
972 einval:
973 	free(new->gids, M_DO);
974 	free(new->uids, M_DO);
975 	free(new, M_DO);
976 	STAILQ_FOREACH_SAFE(ie, &gid_list, ie_entries, ie_next)
977 	    free(ie, M_DO);
978 	STAILQ_FOREACH_SAFE(ie, &uid_list, ie_entries, ie_next)
979 	    free(ie, M_DO);
980 	MPASS(*parse_error != NULL);
981 	return (EINVAL);
982 }
983 
984 /*
985  * Parse rules specification and produce rule structures out of it.
986  *
987  * Returns 0 on success, with '*rulesp' made to point to a 'struct rule'
988  * representing the rules.  On error, the returned value is non-zero and
989  * '*rulesp' is unchanged.  If 'string' has length greater or equal to
990  * MAC_RULE_STRING_LEN, ENAMETOOLONG is returned.  If it is not in the expected
991  * format, EINVAL is returned.  If an error is returned, '*parse_error' is set
992  * to point to a 'struct parse_error' giving an error message for the problem,
993  * else '*parse_error' is set to NULL.
994  *
995  * Expected format: A >-colon-separated list of rules of the form
996  * "<from>><target>" (for backwards compatibility, a semi-colon ":" is accepted
997  * in place of '>').  The <from> part is of the form "<type>=<id>" where <type>
998  * is "uid" or "gid", <id> an UID or GID (depending on <type>) and <target> is
999  * "*", "any" or a comma-separated list of '<flags><type>=<id>' clauses (see the
1000  * comment for parse_single_rule() for more details).  For convenience, empty
1001  * rules are allowed (and do nothing), and spaces and tabs are allowed (and
1002  * removed) around each token (tokens are natural ones, except that
1003  * '<flags><type>' as a whole is considered a single token, so no blanks are
1004  * allowed between '<flags>' and '<type>').
1005  *
1006  * Examples:
1007  * - "uid=1001>uid=1010,gid=1010;uid=1002>any"
1008  * - "gid=1010>gid=1011,gid=1012,gid=1013"
1009  */
1010 static int
1011 parse_rules(const char *const string, struct rules **const rulesp,
1012     struct parse_error **const parse_error)
1013 {
1014 	const size_t len = strlen(string);
1015 	char *copy, *p, *rule;
1016 	struct rules *rules;
1017 	int error = 0;
1018 
1019 	*parse_error = NULL;
1020 
1021 	if (len >= MAC_RULE_STRING_LEN) {
1022 		make_parse_error(parse_error, 0,
1023 		    "Rule specification string is too long (%zu, max %zu)",
1024 		    len, MAC_RULE_STRING_LEN - 1);
1025 		return (ENAMETOOLONG);
1026 	}
1027 
1028 	rules = alloc_rules();
1029 	bcopy(string, rules->string, len + 1);
1030 	MPASS(rules->string[len] == '\0'); /* Catch some races. */
1031 
1032 	copy = malloc(len + 1, M_DO, M_WAITOK);
1033 	bcopy(string, copy, len + 1);
1034 	MPASS(copy[len] == '\0'); /* Catch some races. */
1035 
1036 	p = copy;
1037 	while ((rule = strsep_noblanks(&p, ";")) != NULL) {
1038 		if (rule[0] == '\0')
1039 			continue;
1040 		error = parse_single_rule(rule, rules, parse_error);
1041 		if (error != 0) {
1042 			(*parse_error)->pos += rule - copy;
1043 			toast_rules(rules);
1044 			goto out;
1045 		}
1046 	}
1047 
1048 	*rulesp = rules;
1049 out:
1050 	free(copy, M_DO);
1051 	return (error);
1052 }
1053 
1054 /*
1055  * Find rules applicable to the passed prison.
1056  *
1057  * Returns the applicable rules (and never NULL).  'pr' must be unlocked.
1058  * 'aprp' is set to the (ancestor) prison holding these, and it must be unlocked
1059  * once the caller is done accessing the rules.  '*aprp' is equal to 'pr' if and
1060  * only if the current jail has its own set of rules.
1061  */
1062 static struct rules *
1063 find_rules(struct prison *const pr, struct prison **const aprp)
1064 {
1065 	struct prison *cpr, *ppr;
1066 	struct rules *rules;
1067 
1068 	cpr = pr;
1069 	for (;;) {
1070 		prison_lock(cpr);
1071 		rules = osd_jail_get(cpr, osd_jail_slot);
1072 		if (rules != NULL)
1073 			break;
1074 		prison_unlock(cpr);
1075 
1076 		ppr = cpr->pr_parent;
1077 		MPASS(ppr != NULL); /* prison0 always has rules. */
1078 		cpr = ppr;
1079 	}
1080 
1081 	*aprp = cpr;
1082 	return (rules);
1083 }
1084 
1085 static void
1086 hold_rules(struct rules *const rules)
1087 {
1088 	refcount_acquire(&rules->use_count);
1089 }
1090 
1091 static void
1092 drop_rules(struct rules *const rules)
1093 {
1094 	if (refcount_release(&rules->use_count))
1095 		toast_rules(rules);
1096 }
1097 
1098 #ifdef INVARIANTS
1099 static void
1100 check_rules_use_count(const struct rules *const rules, u_int expected)
1101 {
1102 	const u_int use_count = refcount_load(&rules->use_count);
1103 
1104 	if (use_count != expected)
1105 		panic("MAC/do: Rules at %p: Use count is %u, expected %u",
1106 		    rules, use_count, expected);
1107 }
1108 #else
1109 #define check_rules_use_count(...)
1110 #endif /* INVARIANTS */
1111 
1112 /*
1113  * OSD destructor for slot 'osd_jail_slot'.
1114  *
1115  * Called with 'value' not NULL.  We have arranged that it is only ever called
1116  * when the corresponding jail goes down or at module unload.
1117  */
1118 static void
1119 dealloc_jail_osd(void *const value)
1120 {
1121 	struct rules *const rules = value;
1122 
1123 	/*
1124 	 * If called because the "holding" jail goes down, no one should be
1125 	 * using the rules but us at this point because no threads of that jail
1126 	 * (or its sub-jails) should currently be executing (in particular,
1127 	 * currently executing setcred()).  The case of module unload is more
1128 	 * complex.  Although the MAC framework takes care that no hook is
1129 	 * called while a module is unloading, the unload could happen between
1130 	 * two calls to MAC hooks in the course of, e.g., executing setcred(),
1131 	 * where the rules' reference count has been bumped to keep them alive
1132 	 * even if the rules on the "holding" jail has been concurrently
1133 	 * changed.  These other references are held in our thread OSD slot, so
1134 	 * we ensure that all thread's slots are freed first in mac_do_destroy()
1135 	 * to be able to check that only one reference remains.
1136 	 */
1137 	check_rules_use_count(rules, 1);
1138 	toast_rules(rules);
1139 }
1140 
1141 /*
1142  * Remove the rules specifically associated to a prison.
1143  *
1144  * In practice, this means that the rules become inherited (from the closest
1145  * ascendant that has some).
1146  *
1147  * Destroys the 'osd_jail_slot' slot of the passed jail.
1148  */
1149 static void
1150 remove_rules(struct prison *const pr)
1151 {
1152 	struct rules *old_rules;
1153 	int error __unused;
1154 
1155 	prison_lock(pr);
1156 	/*
1157 	 * We go to the burden of extracting rules first instead of just letting
1158 	 * osd_jail_del() calling dealloc_jail_osd() as we want to decrement
1159 	 * their use count, and possibly free them, outside of the prison lock.
1160 	 */
1161 	old_rules = osd_jail_get(pr, osd_jail_slot);
1162 	error = osd_jail_set(pr, osd_jail_slot, NULL);
1163 	/* osd_set() never fails nor allocate memory when 'value' is NULL. */
1164 	MPASS(error == 0);
1165 	/*
1166 	 * This completely frees the OSD slot, but doesn't call the destructor
1167 	 * since we've just put NULL in the slot.
1168 	 */
1169 	osd_jail_del(pr, osd_jail_slot);
1170 	prison_unlock(pr);
1171 
1172 	if (old_rules != NULL)
1173 		drop_rules(old_rules);
1174 }
1175 
1176 /*
1177  * Assign already built rules to a jail.
1178  */
1179 static void
1180 set_rules(struct prison *const pr, struct rules *const rules)
1181 {
1182 	struct rules *old_rules;
1183 	void **rsv;
1184 
1185 	check_rules_use_count(rules, 0);
1186 	hold_rules(rules);
1187 	rsv = osd_reserve(osd_jail_slot);
1188 
1189 	prison_lock(pr);
1190 	old_rules = osd_jail_get(pr, osd_jail_slot);
1191 	osd_jail_set_reserved(pr, osd_jail_slot, rsv, rules);
1192 	prison_unlock(pr);
1193 	if (old_rules != NULL)
1194 		drop_rules(old_rules);
1195 }
1196 
1197 /*
1198  * Assigns empty rules to a jail.
1199  */
1200 static void
1201 set_empty_rules(struct prison *const pr)
1202 {
1203 	struct rules *const rules = alloc_rules();
1204 
1205 	set_rules(pr, rules);
1206 }
1207 
1208 /*
1209  * Parse a rules specification and assign them to a jail.
1210  *
1211  * Returns the same error code as parse_rules() (which see).
1212  */
1213 static int
1214 parse_and_set_rules(struct prison *const pr, const char *rules_string,
1215     struct parse_error **const parse_error)
1216 {
1217 	struct rules *rules;
1218 	int error;
1219 
1220 	error = parse_rules(rules_string, &rules, parse_error);
1221 	if (error != 0)
1222 		return (error);
1223 	set_rules(pr, rules);
1224 	return (0);
1225 }
1226 
1227 static int
1228 mac_do_sysctl_rules(SYSCTL_HANDLER_ARGS)
1229 {
1230 	char *const buf = malloc(MAC_RULE_STRING_LEN, M_DO, M_WAITOK);
1231 	struct prison *const td_pr = req->td->td_ucred->cr_prison;
1232 	struct prison *pr;
1233 	struct rules *rules;
1234 	struct parse_error *parse_error;
1235 	int error;
1236 
1237 	rules = find_rules(td_pr, &pr);
1238 	strlcpy(buf, rules->string, MAC_RULE_STRING_LEN);
1239 	prison_unlock(pr);
1240 
1241 	error = sysctl_handle_string(oidp, buf, MAC_RULE_STRING_LEN, req);
1242 	if (error != 0 || req->newptr == NULL)
1243 		goto out;
1244 
1245 	/* Set our prison's rules, not that of the jail we inherited from. */
1246 	error = parse_and_set_rules(td_pr, buf, &parse_error);
1247 	if (error != 0) {
1248 		if (print_parse_error)
1249 			printf("MAC/do: Parse error at index %zu: %s\n",
1250 			    parse_error->pos, parse_error->msg);
1251 		free_parse_error(parse_error);
1252 	}
1253 out:
1254 	free(buf, M_DO);
1255 	return (error);
1256 }
1257 
1258 SYSCTL_PROC(_security_mac_do, OID_AUTO, rules,
1259     CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_PRISON|CTLFLAG_MPSAFE,
1260     0, 0, mac_do_sysctl_rules, "A",
1261     "Rules");
1262 
1263 
1264 SYSCTL_JAIL_PARAM_SYS_SUBNODE(mac, do, CTLFLAG_RW, "Jail MAC/do parameters");
1265 SYSCTL_JAIL_PARAM_STRING(_mac_do, rules, CTLFLAG_RW, MAC_RULE_STRING_LEN,
1266     "Jail MAC/do rules");
1267 
1268 
1269 static int
1270 mac_do_jail_create(void *obj, void *data __unused)
1271 {
1272 	struct prison *const pr = obj;
1273 
1274 	set_empty_rules(pr);
1275 	return (0);
1276 }
1277 
1278 static int
1279 mac_do_jail_get(void *obj, void *data)
1280 {
1281 	struct prison *ppr, *const pr = obj;
1282 	struct vfsoptlist *const opts = data;
1283 	struct rules *rules;
1284 	int jsys, error;
1285 
1286 	rules = find_rules(pr, &ppr);
1287 
1288 	jsys = pr == ppr ?
1289 	    (STAILQ_EMPTY(&rules->head) ? JAIL_SYS_DISABLE : JAIL_SYS_NEW) :
1290 	    JAIL_SYS_INHERIT;
1291 	error = vfs_setopt(opts, "mac.do", &jsys, sizeof(jsys));
1292 	if (error != 0 && error != ENOENT)
1293 		goto done;
1294 
1295 	error = vfs_setopts(opts, "mac.do.rules", rules->string);
1296 	if (error != 0 && error != ENOENT)
1297 		goto done;
1298 
1299 	error = 0;
1300 done:
1301 	prison_unlock(ppr);
1302 	return (error);
1303 }
1304 
1305 /*
1306  * -1 is used as a sentinel in mac_do_jail_check() and mac_do_jail_set() below.
1307  */
1308 _Static_assert(-1 != JAIL_SYS_DISABLE && -1 != JAIL_SYS_NEW &&
1309     -1 != JAIL_SYS_INHERIT,
1310     "mac_do(4) uses -1 as a sentinel for uninitialized 'jsys'.");
1311 
1312 /*
1313  * We perform only cheap checks here, i.e., we do not really parse the rules
1314  * specification string, if any.
1315  */
1316 static int
1317 mac_do_jail_check(void *obj, void *data)
1318 {
1319 	struct vfsoptlist *opts = data;
1320 	char *rules_string;
1321 	int error, jsys, size;
1322 
1323 	error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys));
1324 	if (error == ENOENT)
1325 		jsys = -1;
1326 	else {
1327 		if (error != 0)
1328 			return (error);
1329 		if (jsys != JAIL_SYS_DISABLE && jsys != JAIL_SYS_NEW &&
1330 		    jsys != JAIL_SYS_INHERIT)
1331 			return (EINVAL);
1332 	}
1333 
1334 	/*
1335 	 * We use vfs_getopt() here instead of vfs_getopts() to get the length.
1336 	 * We perform the additional checks done by the latter here, even if
1337 	 * jail_set() calls vfs_getopts() itself later (they becoming
1338 	 * inconsistent wouldn't cause any security problem).
1339 	 */
1340 	error = vfs_getopt(opts, "mac.do.rules", (void**)&rules_string, &size);
1341 	if (error == ENOENT) {
1342 		/*
1343 		 * Default (in absence of "mac.do.rules") is to disable (and, in
1344 		 * particular, not inherit).
1345 		 */
1346 		if (jsys == -1)
1347 			jsys = JAIL_SYS_DISABLE;
1348 
1349 		if (jsys == JAIL_SYS_NEW) {
1350 			vfs_opterror(opts, "'mac.do.rules' must be specified "
1351 			    "given 'mac.do''s value");
1352 			return (EINVAL);
1353 		}
1354 
1355 		/* Absence of "mac.do.rules" at this point is OK. */
1356 		error = 0;
1357 	} else {
1358 		if (error != 0)
1359 			return (error);
1360 
1361 		/* Not a proper string. */
1362 		if (size == 0 || rules_string[size - 1] != '\0') {
1363 			vfs_opterror(opts, "'mac.do.rules' not a proper string");
1364 			return (EINVAL);
1365 		}
1366 
1367 		if (size > MAC_RULE_STRING_LEN) {
1368 			vfs_opterror(opts, "'mdo.rules' too long");
1369 			return (ENAMETOOLONG);
1370 		}
1371 
1372 		if (jsys == -1)
1373 			/* Default (if "mac.do.rules" is present). */
1374 			jsys = rules_string[0] == '\0' ? JAIL_SYS_DISABLE :
1375 			    JAIL_SYS_NEW;
1376 
1377 		/*
1378 		 * Be liberal and accept JAIL_SYS_DISABLE and JAIL_SYS_INHERIT
1379 		 * with an explicit empty rules specification.
1380 		 */
1381 		switch (jsys) {
1382 		case JAIL_SYS_DISABLE:
1383 		case JAIL_SYS_INHERIT:
1384 			if (rules_string[0] != '\0') {
1385 				vfs_opterror(opts, "'mac.do.rules' specified "
1386 				    "but should not given 'mac.do''s value");
1387 				return (EINVAL);
1388 			}
1389 			break;
1390 		}
1391 	}
1392 
1393 	return (error);
1394 }
1395 
1396 static int
1397 mac_do_jail_set(void *obj, void *data)
1398 {
1399 	struct prison *pr = obj;
1400 	struct vfsoptlist *opts = data;
1401 	char *rules_string;
1402 	struct parse_error *parse_error;
1403 	int error, jsys;
1404 
1405 	/*
1406 	 * The invariants checks used below correspond to what has already been
1407 	 * checked in jail_check() above.
1408 	 */
1409 
1410 	error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys));
1411 	MPASS(error == 0 || error == ENOENT);
1412 	if (error != 0)
1413 		jsys = -1; /* Mark unfilled. */
1414 
1415 	rules_string = vfs_getopts(opts, "mac.do.rules", &error);
1416 	MPASS(error == 0 || error == ENOENT);
1417 	if (error == 0) {
1418 		MPASS(strlen(rules_string) < MAC_RULE_STRING_LEN);
1419 		if (jsys == -1)
1420 			/* Default (if "mac.do.rules" is present). */
1421 			jsys = rules_string[0] == '\0' ? JAIL_SYS_DISABLE :
1422 			    JAIL_SYS_NEW;
1423 		else
1424 			MPASS(jsys == JAIL_SYS_NEW ||
1425 			    ((jsys == JAIL_SYS_DISABLE ||
1426 			    jsys == JAIL_SYS_INHERIT) &&
1427 			    rules_string[0] == '\0'));
1428 	} else {
1429 		MPASS(jsys != JAIL_SYS_NEW);
1430 		if (jsys == -1)
1431 			/*
1432 			 * Default (in absence of "mac.do.rules") is to disable
1433 			 * (and, in particular, not inherit).
1434 			 */
1435 			jsys = JAIL_SYS_DISABLE;
1436 		/* If disabled, we'll store an empty rule specification. */
1437 		if (jsys == JAIL_SYS_DISABLE)
1438 			rules_string = "";
1439 	}
1440 
1441 	switch (jsys) {
1442 	case JAIL_SYS_INHERIT:
1443 		remove_rules(pr);
1444 		error = 0;
1445 		break;
1446 	case JAIL_SYS_DISABLE:
1447 	case JAIL_SYS_NEW:
1448 		error = parse_and_set_rules(pr, rules_string, &parse_error);
1449 		if (error != 0) {
1450 			vfs_opterror(opts,
1451 			    "MAC/do: Parse error at index %zu: %s\n",
1452 			    parse_error->pos, parse_error->msg);
1453 			free_parse_error(parse_error);
1454 		}
1455 		break;
1456 	default:
1457 		__assert_unreachable();
1458 	}
1459 	return (error);
1460 }
1461 
1462 /*
1463  * OSD jail methods.
1464  *
1465  * There is no PR_METHOD_REMOVE, as OSD storage is destroyed by the common jail
1466  * code (see prison_cleanup()), which triggers a run of our dealloc_jail_osd()
1467  * destructor.
1468  */
1469 static const osd_method_t osd_methods[PR_MAXMETHOD] = {
1470 	[PR_METHOD_CREATE] = mac_do_jail_create,
1471 	[PR_METHOD_GET] = mac_do_jail_get,
1472 	[PR_METHOD_CHECK] = mac_do_jail_check,
1473 	[PR_METHOD_SET] = mac_do_jail_set,
1474 };
1475 
1476 
1477 /*
1478  * Common header structure.
1479  *
1480  * Each structure that is used to pass information between some MAC check
1481  * function and priv_grant() must start with this header.
1482  */
1483 struct mac_do_data_header {
1484 	/* Size of the allocated buffer holding the containing structure. */
1485 	size_t		 allocated_size;
1486 	/* Full size of the containing structure. */
1487 	size_t		 size;
1488 	/*
1489 	 * For convenience, we use privilege numbers as an identifier for the
1490 	 * containing structure's type, since there is one distinct privilege
1491 	 * for each privilege changing function we are supporting.  0 in 'priv'
1492 	 * indicates this header is uninitialized.
1493 	 */
1494 	int		 priv;
1495 	/* Rules to apply. */
1496 	struct rules	*rules;
1497 };
1498 
1499 /*
1500  * The case of unusable or absent per-thread data can actually happen as nothing
1501  * prevents, e.g., priv_check*() with privilege 'priv' to be called standalone,
1502  * as it is currently by, e.g., the Linux emulator for PRIV_CRED_SETUID.  We
1503  * interpret such calls to priv_check*() as full, unrestricted requests for
1504  * 'priv', contrary to what we're doing here for selected operations, and
1505  * consequently will not grant the requested privilege.
1506  *
1507  * Also, we protect ourselves from a concurrent change of 'do_enabled' while
1508  * a call to setcred() is in progress by storing the rules per-thread
1509  * which is then consulted by each successive hook so that they all have
1510  * a coherent view of the specifications, and we empty the slot (actually, mark
1511  * it as empty) when MAC/do is disabled.
1512  */
1513 static int
1514 check_data_usable(const void *const data, const size_t size, const int priv)
1515 {
1516 	const struct mac_do_data_header *const hdr = data;
1517 
1518 	if (hdr == NULL || hdr->priv == 0)
1519 		return (ENOENT);
1520 	/*
1521 	 * Impacting changes in the protocols we are based on...  Don't crash in
1522 	 * production.
1523 	 */
1524 	if (hdr->priv != priv) {
1525 		MPASS(hdr->priv == priv);
1526 		return (EBUSY);
1527 	}
1528 	MPASS(hdr->size == size);
1529 	MPASS(hdr->size <= hdr->allocated_size);
1530 	return (0);
1531 }
1532 
1533 static void
1534 clear_data(void *const data)
1535 {
1536 	struct mac_do_data_header *const hdr = data;
1537 
1538 	if (hdr != NULL) {
1539 		drop_rules(hdr->rules);
1540 		/* We don't deallocate so as to save time on next access. */
1541 		hdr->priv = 0;
1542 	}
1543 }
1544 
1545 static void *
1546 fetch_data(void)
1547 {
1548 	return (osd_thread_get_unlocked(curthread, osd_thread_slot));
1549 }
1550 
1551 static bool
1552 is_data_reusable(const void *const data, const size_t size)
1553 {
1554 	const struct mac_do_data_header *const hdr = data;
1555 
1556 	return (hdr != NULL && size <= hdr->allocated_size);
1557 }
1558 
1559 static void
1560 set_data_header(void *const data, const size_t size, const int priv,
1561     struct rules *const rules)
1562 {
1563 	struct mac_do_data_header *const hdr = data;
1564 
1565 	MPASS(hdr->priv == 0);
1566 	MPASS(priv != 0);
1567 	MPASS(size <= hdr->allocated_size);
1568 	hdr->size = size;
1569 	hdr->priv = priv;
1570 	hdr->rules = rules;
1571 }
1572 
1573 /* The proc lock (and any other non-sleepable lock) must not be held. */
1574 static void *
1575 alloc_data(void *const data, const size_t size)
1576 {
1577 	struct mac_do_data_header *const hdr = realloc(data, size, M_DO,
1578 	    M_WAITOK);
1579 
1580 	MPASS(size >= sizeof(struct mac_do_data_header));
1581 	hdr->allocated_size = size;
1582 	hdr->priv = 0;
1583 	if (hdr != data) {
1584 		/*
1585 		 * This call either reuses the existing memory allocated for the
1586 		 * slot or tries to allocate some without blocking.
1587 		 */
1588 		int error = osd_thread_set(curthread, osd_thread_slot, hdr);
1589 
1590 		if (error != 0) {
1591 			/* Going to make a M_WAITOK allocation. */
1592 			void **const rsv = osd_reserve(osd_thread_slot);
1593 
1594 			error = osd_thread_set_reserved(curthread,
1595 			    osd_thread_slot, rsv, hdr);
1596 			MPASS(error == 0);
1597 		}
1598 	}
1599 	return (hdr);
1600 }
1601 
1602 /* Destructor for 'osd_thread_slot'. */
1603 static void
1604 dealloc_thread_osd(void *const value)
1605 {
1606 	free(value, M_DO);
1607 }
1608 
1609 /*
1610  * Whether to grant access to some primary group according to flags.
1611  *
1612  * The passed 'flags' must be those of a rule's matching GID, or the IT_GID type
1613  * flags when MDF_CURRENT has been matched.
1614  *
1615  * Return values:
1616  * - 0:			Access granted.
1617  * - EJUSTRETURN:	Flags are agnostic.
1618  */
1619 static int
1620 grant_primary_group_from_flags(const flags_t flags)
1621 {
1622 	return ((flags & MDF_PRIMARY) != 0 ? 0 : EJUSTRETURN);
1623 }
1624 
1625 /*
1626  * Same as grant_primary_group_from_flags(), but for supplementary groups.
1627  *
1628  * Return values:
1629  * - 0:			Access granted.
1630  * - EJUSTRETURN:	Flags are agnostic.
1631  * - EPERM:		Access denied.
1632  */
1633 static int __unused
1634 grant_supplementary_group_from_flags(const flags_t flags)
1635 {
1636 	if ((flags & MDF_SUPP_MASK) != 0)
1637 		return ((flags & MDF_SUPP_DONT) != 0 ? EPERM : 0);
1638 
1639 	return (EJUSTRETURN);
1640 }
1641 
1642 static int
1643 rule_grant_supplementary_groups(const struct rule *const rule,
1644     const struct ucred *const old_cred, const struct ucred *const new_cred)
1645 {
1646 	const gid_t *const old_groups = old_cred->cr_groups;
1647 	const gid_t *const new_groups = new_cred->cr_groups;
1648 	const int old_ngroups = old_cred->cr_ngroups;
1649 	const int new_ngroups = new_cred->cr_ngroups;
1650 	const flags_t gid_flags = rule->gid_flags;
1651 	const bool current_has_supp = (gid_flags & MDF_CURRENT) != 0 &&
1652 	    (gid_flags & MDF_SUPP_MASK) != 0;
1653 	id_nb_t rule_idx = 0;
1654 	int old_idx = 1, new_idx = 1;
1655 
1656 	if ((gid_flags & MDF_ANY_SUPP) != 0 &&
1657 	    (gid_flags & MDF_MAY_REJ_SUPP) == 0)
1658 		/*
1659 		 * Any set of supplementary groups is accepted, no need to loop
1660 		 * over them.
1661 		 */
1662 		return (0);
1663 
1664 	for (; new_idx < new_ngroups; ++new_idx) {
1665 		const gid_t gid = new_groups[new_idx];
1666 		bool may_accept = false;
1667 
1668 		if ((gid_flags & MDF_ANY_SUPP) != 0)
1669 			may_accept = true;
1670 
1671 		/* Do we have to check for the current supplementary groups? */
1672 		if (current_has_supp) {
1673 			/*
1674 			 * Linear search, as both supplementary groups arrays
1675 			 * are sorted.  Advancing 'old_idx' with a binary search
1676 			 * on absence of MDF_SUPP_MUST doesn't seem worth it in
1677 			 * practice.
1678 			 */
1679 			for (; old_idx < old_ngroups; ++old_idx) {
1680 				const gid_t old_gid = old_groups[old_idx];
1681 
1682 				if (old_gid < gid) {
1683 					/* Mandatory but absent. */
1684 					if ((gid_flags & MDF_SUPP_MUST) != 0)
1685 						return (EPERM);
1686 				} else if (old_gid == gid) {
1687 					switch (gid_flags & MDF_SUPP_MASK) {
1688 					case MDF_SUPP_DONT:
1689 						/* Present but forbidden. */
1690 						return (EPERM);
1691 					case MDF_SUPP_ALLOW:
1692 					case MDF_SUPP_MUST:
1693 						may_accept = true;
1694 						break;
1695 					default:
1696 #ifdef INVARIANTS
1697 						__assert_unreachable();
1698 #else
1699 						/* Better be safe than sorry. */
1700 						return (EPERM);
1701 #endif
1702 					}
1703 					++old_idx;
1704 					break;
1705 				}
1706 				else
1707 					break;
1708 			}
1709 		}
1710 
1711 		/*
1712 		 * Search by GID for a corresponding 'struct id_spec'.
1713 		 *
1714 		 * Again, linear search, with same note on not using binary
1715 		 * search optimization as above (the trigger would be absence of
1716 		 * MDF_EXPLICIT_SUPP_MUST this time).
1717 		 */
1718 		for (; rule_idx < rule->gids_nb; ++rule_idx) {
1719 			const struct id_spec is = rule->gids[rule_idx];
1720 
1721 			if (is.id < gid) {
1722 				/* Mandatory but absent. */
1723 				if ((is.flags & MDF_SUPP_MUST) != 0)
1724 					return (EPERM);
1725 			} else if (is.id == gid) {
1726 				switch (is.flags & MDF_SUPP_MASK) {
1727 				case MDF_SUPP_DONT:
1728 					/* Present but forbidden. */
1729 					return (EPERM);
1730 				case MDF_SUPP_ALLOW:
1731 				case MDF_SUPP_MUST:
1732 					may_accept = true;
1733 					break;
1734 				case 0:
1735 					/* Primary group only. */
1736 					break;
1737 				default:
1738 #ifdef INVARIANTS
1739 					__assert_unreachable();
1740 #else
1741 					/* Better be safe than sorry. */
1742 					return (EPERM);
1743 #endif
1744 				}
1745 				++rule_idx;
1746 				break;
1747 			}
1748 			else
1749 				break;
1750 		}
1751 
1752 		/* 'gid' wasn't explicitly accepted. */
1753 		if (!may_accept)
1754 			return (EPERM);
1755 	}
1756 
1757 	/*
1758 	 * If we must have all current groups and we didn't browse all
1759 	 * of them at this point (because the remaining ones have GIDs
1760 	 * greater than the last requested group), we are simply missing
1761 	 * them.
1762 	 */
1763 	if ((gid_flags & MDF_CURRENT) != 0 &&
1764 	    (gid_flags & MDF_SUPP_MUST) != 0 &&
1765 	    old_idx < old_ngroups)
1766 		return (EPERM);
1767 	/*
1768 	 * Similarly, we have to finish browsing all GIDs from the rule
1769 	 * in case some are marked mandatory.
1770 	 */
1771 	if ((gid_flags & MDF_EXPLICIT_SUPP_MUST) != 0) {
1772 		for (; rule_idx < rule->gids_nb; ++rule_idx) {
1773 			const struct id_spec is = rule->gids[rule_idx];
1774 
1775 			if ((is.flags & MDF_SUPP_MUST) != 0)
1776 				return (EPERM);
1777 		}
1778 	}
1779 
1780 	return (0);
1781 }
1782 
1783 static int
1784 rule_grant_primary_group(const struct rule *const rule,
1785     const struct ucred *const old_cred, const gid_t gid)
1786 {
1787 	struct id_spec gid_is = {.flags = 0};
1788 	const struct id_spec *found_is;
1789 	int error;
1790 
1791 	if ((rule->gid_flags & MDF_ANY) != 0)
1792 		return (0);
1793 
1794 	/* Was MDF_CURRENT specified, and is 'gid' a current GID? */
1795 	if ((rule->gid_flags & MDF_CURRENT) != 0 &&
1796 	    group_is_primary(gid, old_cred)) {
1797 		error = grant_primary_group_from_flags(rule->gid_flags);
1798 		if (error == 0)
1799 			return (0);
1800 	}
1801 
1802 	/* Search by GID for a corresponding 'struct id_spec'. */
1803 	gid_is.id = gid;
1804 	found_is = bsearch(&gid_is, rule->gids, rule->gids_nb,
1805 	    sizeof(*rule->gids), id_spec_cmp);
1806 
1807 	if (found_is != NULL) {
1808 		error = grant_primary_group_from_flags(found_is->flags);
1809 		if (error == 0)
1810 			return (0);
1811 	}
1812 
1813 	return (EPERM);
1814 }
1815 
1816 static int
1817 rule_grant_primary_groups(const struct rule *const rule,
1818     const struct ucred *const old_cred, const struct ucred *const new_cred)
1819 {
1820 	int error;
1821 
1822 	/* Shortcut. */
1823 	if ((rule->gid_flags & MDF_ANY) != 0)
1824 		return (0);
1825 
1826 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_gid);
1827 	if (error != 0)
1828 		return (error);
1829 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_rgid);
1830 	if (error != 0)
1831 		return (error);
1832 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_svgid);
1833 	if (error != 0)
1834 		return (error);
1835 	return (0);
1836 }
1837 
1838 static bool
1839 user_is_current(const uid_t uid, const struct ucred *const old_cred)
1840 {
1841 	return (uid == old_cred->cr_uid || uid == old_cred->cr_ruid ||
1842 	    uid == old_cred->cr_svuid);
1843 }
1844 
1845 static int
1846 rule_grant_user(const struct rule *const rule,
1847     const struct ucred *const old_cred, const uid_t uid)
1848 {
1849 	struct id_spec uid_is = {.flags = 0};
1850 	const struct id_spec *found_is;
1851 
1852 	if ((rule->uid_flags & MDF_ANY) != 0)
1853 		return (0);
1854 
1855 	/* Was MDF_CURRENT specified, and is 'uid' a current UID? */
1856 	if ((rule->uid_flags & MDF_CURRENT) != 0 &&
1857 	    user_is_current(uid, old_cred))
1858 		return (0);
1859 
1860 	/* Search by UID for a corresponding 'struct id_spec'. */
1861 	uid_is.id = uid;
1862 	found_is = bsearch(&uid_is, rule->uids, rule->uids_nb,
1863 	    sizeof(*rule->uids), id_spec_cmp);
1864 
1865 	if (found_is != NULL)
1866 		return (0);
1867 
1868 	return (EPERM);
1869 }
1870 
1871 static int
1872 rule_grant_users(const struct rule *const rule,
1873     const struct ucred *const old_cred, const struct ucred *const new_cred)
1874 {
1875 	int error;
1876 
1877 	/* Shortcut. */
1878 	if ((rule->uid_flags & MDF_ANY) != 0)
1879 		return (0);
1880 
1881 	error = rule_grant_user(rule, old_cred, new_cred->cr_uid);
1882 	if (error != 0)
1883 		return (error);
1884 	error = rule_grant_user(rule, old_cred, new_cred->cr_ruid);
1885 	if (error != 0)
1886 		return (error);
1887 	error = rule_grant_user(rule, old_cred, new_cred->cr_svuid);
1888 	if (error != 0)
1889 		return (error);
1890 
1891 	return (0);
1892 }
1893 
1894 static int
1895 rule_grant_setcred(const struct rule *const rule,
1896     const struct ucred *const old_cred, const struct ucred *const new_cred)
1897 {
1898 	int error;
1899 
1900 	error = rule_grant_users(rule, old_cred, new_cred);
1901 	if (error != 0)
1902 		return (error);
1903 	error = rule_grant_primary_groups(rule, old_cred, new_cred);
1904 	if (error != 0)
1905 		return (error);
1906 	error = rule_grant_supplementary_groups(rule, old_cred, new_cred);
1907 	if (error != 0)
1908 		return (error);
1909 
1910 	return (0);
1911 }
1912 
1913 static bool
1914 rule_applies(const struct rule *const rule, const struct ucred *const cred)
1915 {
1916 	if (rule->from_type == IT_UID && rule->from_id == cred->cr_ruid)
1917 		return (true);
1918 	if (rule->from_type == IT_GID && realgroupmember(rule->from_id, cred))
1919 		return (true);
1920 	return (false);
1921 }
1922 
1923 /*
1924  * To pass data between check_setcred() and priv_grant() (on PRIV_CRED_SETCRED).
1925  */
1926 struct mac_do_setcred_data {
1927 	struct mac_do_data_header hdr;
1928 	const struct ucred *new_cred;
1929 	u_int setcred_flags;
1930 };
1931 
1932 static int
1933 mac_do_priv_grant(struct ucred *cred, int priv)
1934 {
1935 	struct mac_do_setcred_data *const data = fetch_data();
1936 	const struct rules *rules;
1937 	const struct ucred *new_cred;
1938 	const struct rule *rule;
1939 	u_int setcred_flags;
1940 	int error;
1941 
1942 	/* Bail out fast if we aren't concerned. */
1943 	if (priv != PRIV_CRED_SETCRED)
1944 		return (EPERM);
1945 
1946 	/*
1947 	 * Do we have to do something?
1948 	 */
1949 	if (check_data_usable(data, sizeof(*data), priv) != 0)
1950 		/* No. */
1951 		return (EPERM);
1952 
1953 	rules = data->hdr.rules;
1954 	new_cred = data->new_cred;
1955 	KASSERT(new_cred != NULL,
1956 	    ("priv_check*() called before mac_cred_check_setcred()"));
1957 	setcred_flags = data->setcred_flags;
1958 
1959 	/*
1960 	 * Explicitly check that only the flags we currently support are present
1961 	 * in order to avoid accepting transitions with other changes than those
1962 	 * we are actually going to check.  Currently, this rules out the
1963 	 * SETCREDF_MAC_LABEL flag.  This may be improved by adding code
1964 	 * actually checking whether the requested label and the current one
1965 	 * would differ.
1966 	 */
1967 	if ((setcred_flags & ~(SETCREDF_UID | SETCREDF_RUID | SETCREDF_SVUID |
1968 	    SETCREDF_GID | SETCREDF_RGID | SETCREDF_SVGID |
1969 	    SETCREDF_SUPP_GROUPS)) != 0)
1970 		return (EPERM);
1971 
1972 	/*
1973 	 * Browse rules, and for those that match the requestor, call specific
1974 	 * privilege granting functions interpreting the "to"/"target" part.
1975 	 */
1976 	error = EPERM;
1977 	STAILQ_FOREACH(rule, &rules->head, r_entries)
1978 	    if (rule_applies(rule, cred)) {
1979 		    error = rule_grant_setcred(rule, cred, new_cred);
1980 		    if (error != EPERM)
1981 			    break;
1982 	    }
1983 
1984 	return (error);
1985 }
1986 
1987 static int
1988 check_proc(void)
1989 {
1990 	char *path, *to_free;
1991 	int error;
1992 
1993 	/*
1994 	 * Only grant privileges if requested by the right executable.
1995 	 *
1996 	 * XXXOC: We may want to base this check on a tunable path and/or
1997 	 * a specific MAC label.  Going even further, e.g., envisioning to
1998 	 * completely replace the path check with the latter, we would need to
1999 	 * install FreeBSD on a FS with multilabel enabled by default, which in
2000 	 * practice entails adding an option to ZFS to set MNT_MULTILABEL
2001 	 * automatically on mounts, ensuring that root (and more if using
2002 	 * different partitions) ZFS or UFS filesystems are created with
2003 	 * multilabel turned on, and having the installation procedure support
2004 	 * setting a MAC label per file (perhaps via additions to mtree(1)).  So
2005 	 * this probably isn't going to happen overnight, if ever.
2006 	 */
2007 	if (vn_fullpath(curproc->p_textvp, &path, &to_free) != 0)
2008 		return (EPERM);
2009 	error = strcmp(path, "/usr/bin/mdo") == 0 ? 0 : EPERM;
2010 	free(to_free, M_TEMP);
2011 	return (error);
2012 }
2013 
2014 static void
2015 mac_do_setcred_enter(void)
2016 {
2017 	struct rules *rules;
2018 	struct prison *pr;
2019 	struct mac_do_setcred_data * data;
2020 	int error;
2021 
2022 	/*
2023 	 * If not enabled, don't prepare data.  Other hooks will check for that
2024 	 * to know if they have to do something.
2025 	 */
2026 	if (do_enabled == 0)
2027 		return;
2028 
2029 	/*
2030 	 * MAC/do only applies to a process launched from a given executable.
2031 	 * For other processes, we just won't intervene (we don't deny requests,
2032 	 * nor do we grant privileges to them).
2033 	 */
2034 	error = check_proc();
2035 	if (error != 0)
2036 		return;
2037 
2038 	/*
2039 	 * Find the currently applicable rules.
2040 	 */
2041 	rules = find_rules(curproc->p_ucred->cr_prison, &pr);
2042 	hold_rules(rules);
2043 	prison_unlock(pr);
2044 
2045 	/*
2046 	 * Setup thread data to be used by other hooks.
2047 	 */
2048 	data = fetch_data();
2049 	if (!is_data_reusable(data, sizeof(*data)))
2050 		data = alloc_data(data, sizeof(*data));
2051 	set_data_header(data, sizeof(*data), PRIV_CRED_SETCRED, rules);
2052 	/* Not really necessary, but helps to catch programming errors. */
2053 	data->new_cred = NULL;
2054 	data->setcred_flags = 0;
2055 }
2056 
2057 static int
2058 mac_do_check_setcred(u_int flags, const struct ucred *const old_cred,
2059     struct ucred *const new_cred)
2060 {
2061 	struct mac_do_setcred_data *const data = fetch_data();
2062 
2063 	/*
2064 	 * Do we have to do something?
2065 	 */
2066 	if (check_data_usable(data, sizeof(*data), PRIV_CRED_SETCRED) != 0)
2067 		/* No. */
2068 		return (0);
2069 
2070 	/*
2071 	 * Keep track of the setcred() flags and the new credentials for
2072 	 * priv_check*().
2073 	 */
2074 	data->new_cred = new_cred;
2075 	data->setcred_flags = flags;
2076 
2077 	return (0);
2078 }
2079 
2080 static void
2081 mac_do_setcred_exit(void)
2082 {
2083 	struct mac_do_setcred_data *const data = fetch_data();
2084 
2085 	if (check_data_usable(data, sizeof(*data), PRIV_CRED_SETCRED) == 0)
2086 		/*
2087 		 * This doesn't deallocate the small per-thread data storage,
2088 		 * which can be reused on subsequent calls.  (That data is of
2089 		 * course deallocated as the current thread dies or this module
2090 		 * is unloaded.)
2091 		 */
2092 		clear_data(data);
2093 }
2094 
2095 static void
2096 mac_do_init(struct mac_policy_conf *mpc)
2097 {
2098 	struct prison *pr;
2099 
2100 	osd_jail_slot = osd_jail_register(dealloc_jail_osd, osd_methods);
2101 	set_empty_rules(&prison0);
2102 	sx_slock(&allprison_lock);
2103 	TAILQ_FOREACH(pr, &allprison, pr_list)
2104 	    set_empty_rules(pr);
2105 	sx_sunlock(&allprison_lock);
2106 
2107 	osd_thread_slot = osd_thread_register(dealloc_thread_osd);
2108 }
2109 
2110 static void
2111 mac_do_destroy(struct mac_policy_conf *mpc)
2112 {
2113 	/*
2114 	 * osd_thread_deregister() must be called before osd_jail_deregister(),
2115 	 * for the reason explained in dealloc_jail_osd().
2116 	 */
2117 	osd_thread_deregister(osd_thread_slot);
2118 	osd_jail_deregister(osd_jail_slot);
2119 }
2120 
2121 static struct mac_policy_ops do_ops = {
2122 	.mpo_init = mac_do_init,
2123 	.mpo_destroy = mac_do_destroy,
2124 	.mpo_cred_setcred_enter = mac_do_setcred_enter,
2125 	.mpo_cred_check_setcred = mac_do_check_setcred,
2126 	.mpo_cred_setcred_exit = mac_do_setcred_exit,
2127 	.mpo_priv_grant = mac_do_priv_grant,
2128 };
2129 
2130 MAC_POLICY_SET(&do_ops, mac_do, "MAC/do", MPC_LOADTIME_FLAG_UNLOADOK, NULL);
2131 MODULE_VERSION(mac_do, 1);
2132