xref: /freebsd/sys/security/mac_do/mac_do.c (revision 4a03b64517b3151064c52e213ebbc068ab1430d1)
1 /*-
2  * Copyright(c) 2024 Baptiste Daroussin <bapt@FreeBSD.org>
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  */
6 
7 #include <sys/param.h>
8 #include <sys/systm.h>
9 #include <sys/ctype.h>
10 #include <sys/jail.h>
11 #include <sys/kernel.h>
12 #include <sys/limits.h>
13 #include <sys/lock.h>
14 #include <sys/malloc.h>
15 #include <sys/module.h>
16 #include <sys/mount.h>
17 #include <sys/mutex.h>
18 #include <sys/priv.h>
19 #include <sys/proc.h>
20 #include <sys/refcount.h>
21 #include <sys/socket.h>
22 #include <sys/sx.h>
23 #include <sys/sysctl.h>
24 #include <sys/ucred.h>
25 #include <sys/vnode.h>
26 
27 #include <machine/stdarg.h>
28 
29 #include <security/mac/mac_policy.h>
30 
31 static SYSCTL_NODE(_security_mac, OID_AUTO, do,
32     CTLFLAG_RW|CTLFLAG_MPSAFE, 0, "mac_do policy controls");
33 
34 static int	do_enabled = 1;
35 SYSCTL_INT(_security_mac_do, OID_AUTO, enabled, CTLFLAG_RWTUN,
36     &do_enabled, 0, "Enforce do policy");
37 
38 static int	print_parse_error = 1;
39 SYSCTL_INT(_security_mac_do, OID_AUTO, print_parse_error, CTLFLAG_RWTUN,
40     &print_parse_error, 0, "Print parse errors on setting rules "
41     "(via sysctl(8)).");
42 
43 static MALLOC_DEFINE(M_DO, "do_rule", "Rules for mac_do");
44 
45 #define MAC_RULE_STRING_LEN	1024
46 
47 static unsigned		osd_jail_slot;
48 static unsigned		osd_thread_slot;
49 
50 #define IT_INVALID	0 /* Must stay 0. */
51 #define IT_UID		1
52 #define IT_GID		2
53 #define IT_ANY		3
54 #define IT_LAST		IT_ANY
55 
56 static const char *id_type_to_str[] = {
57 	[IT_INVALID]	= "invalid",
58 	[IT_UID]	= "uid",
59 	[IT_GID]	= "gid",
60 	/* See also parse_id_type(). */
61 	[IT_ANY]	= "*",
62 };
63 
64 #define PARSE_ERROR_SIZE	256
65 
66 struct parse_error {
67 	size_t	pos;
68 	char	msg[PARSE_ERROR_SIZE];
69 };
70 
71 /*
72  * We assume that 'uid_t' and 'gid_t' are aliases to 'u_int' in conversions
73  * required for parsing rules specification strings.
74  */
75 _Static_assert(sizeof(uid_t) == sizeof(u_int) && (uid_t)-1 >= 0 &&
76     sizeof(gid_t) == sizeof(u_int) && (gid_t)-1 >= 0,
77     "mac_do(4) assumes that 'uid_t' and 'gid_t' are aliases to 'u_int'");
78 
79 /*
80  * Internal flags.
81  *
82  * They either apply as per-type (t) or per-ID (i) but are conflated because all
83  * per-ID flags are also valid as per-type ones to qualify the "current" (".")
84  * per-type flag.  Also, some of them are in fact exclusive, but we use one-hot
85  * encoding for simplicity.
86  *
87  * There is currently room for "only" 16 bits.  As these flags are purely
88  * internal, they can be renumbered and/or their type changed as needed.
89  *
90  * See also the check_*() functions below.
91  */
92 typedef uint16_t	flags_t;
93 
94 /* (i,gid) Specification concerns primary groups. */
95 #define MDF_PRIMARY	(1u << 0)
96 /* (i,gid) Specification concerns supplementary groups. */
97 #define MDF_SUPP_ALLOW	(1u << 1)
98 /* (i,gid) Group must appear as a supplementary group. */
99 #define MDF_SUPP_MUST	(1u << 2)
100 /* (i,gid) Group must not appear as a supplementary group. */
101 #define MDF_SUPP_DONT	(1u << 3)
102 #define MDF_SUPP_MASK	(MDF_SUPP_ALLOW | MDF_SUPP_MUST | MDF_SUPP_DONT)
103 #define MDF_ID_MASK	(MDF_PRIMARY | MDF_SUPP_MASK)
104 
105 /*
106  * (t) All IDs allowed.
107  *
108  * For GIDs, MDF_ANY only concerns primary groups.  The MDF_PRIMARY and
109  * MDF_SUPP_* flags never apply to MDF_ANY, but can be present if MDF_CURRENT is
110  * present also, as usual.
111  */
112 #define MDF_ANY			(1u << 8)
113 /* (t) Current IDs allowed. */
114 #define MDF_CURRENT		(1u << 9)
115 #define MDF_TYPE_COMMON_MASK	(MDF_ANY | MDF_CURRENT)
116 /* (t,gid) All IDs allowed as supplementary groups. */
117 #define MDF_ANY_SUPP		(1u << 10)
118 /* (t,gid) Some ID or MDF_CURRENT has MDF_SUPP_MUST or MDF_SUPP_DONT. */
119 #define MDF_MAY_REJ_SUPP	(1u << 11)
120 /* (t,gid) Some explicit ID (not MDF_CURRENT) has MDF_SUPP_MUST. */
121 #define MDF_EXPLICIT_SUPP_MUST	(1u << 12)
122 /* (t,gid) Whether any target clause is about primary groups.  Used during
123  * parsing only. */
124 #define MDF_HAS_PRIMARY_CLAUSE	(1u << 13)
125 /* (t,gid) Whether any target clause is about supplementary groups.  Used during
126  * parsing only. */
127 #define MDF_HAS_SUPP_CLAUSE	(1u << 14)
128 #define MDF_TYPE_GID_MASK	(MDF_ANY_SUPP | MDF_MAY_REJ_SUPP |	\
129     MDF_EXPLICIT_SUPP_MUST | MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE)
130 #define MDF_TYPE_MASK		(MDF_TYPE_COMMON_MASK | MDF_TYPE_GID_MASK)
131 
132 /*
133  * Persistent structures.
134  */
135 
136 struct id_spec {
137 	u_int		 id;
138 	flags_t		 flags; /* See MDF_* above. */
139 };
140 
141 /*
142  * This limits the number of target clauses per type to 65535.  With the current
143  * value of MAC_RULE_STRING_LEN (1024), this is way more than enough anyway.
144  */
145 typedef uint16_t	 id_nb_t;
146 /* We only have a few IT_* types. */
147 typedef uint16_t	 id_type_t;
148 
149 struct rule {
150 	TAILQ_ENTRY(rule) r_entries;
151 	id_type_t	 from_type;
152 	u_int		 from_id;
153 	flags_t		 uid_flags; /* See MDF_* above. */
154 	id_nb_t		 uids_nb;
155 	flags_t		 gid_flags; /* See MDF_* above. */
156 	id_nb_t		 gids_nb;
157 	struct id_spec	*uids;
158 	struct id_spec	*gids;
159 };
160 
161 TAILQ_HEAD(rulehead, rule);
162 
163 struct rules {
164 	char		string[MAC_RULE_STRING_LEN];
165 	struct rulehead	head;
166 	volatile u_int	use_count __aligned(CACHE_LINE_SIZE);
167 };
168 
169 /*
170  * Temporary structures used to build a 'struct rule' above.
171  */
172 
173 struct id_elem {
174 	TAILQ_ENTRY(id_elem) ie_entries;
175 	struct id_spec spec;
176 };
177 
178 TAILQ_HEAD(id_list, id_elem);
179 
180 #ifdef INVARIANTS
181 static void
182 check_type(const id_type_t type)
183 {
184 	if (type > IT_LAST)
185 		panic("Invalid type number %u", type);
186 }
187 
188 static void
189 panic_for_unexpected_flags(const id_type_t type, const flags_t flags,
190     const char *const str)
191 {
192 	panic("ID type %s: Unexpected flags %u (%s), ", id_type_to_str[type],
193 	    flags, str);
194 }
195 
196 static void
197 check_type_and_id_flags(const id_type_t type, const flags_t flags)
198 {
199 	const char *str;
200 
201 	check_type(type);
202 	switch (type) {
203 	case IT_UID:
204 		if (flags != 0) {
205 			str = "only 0 allowed";
206 			goto unexpected_flags;
207 		}
208 		break;
209 	case IT_GID:
210 		if ((flags & ~MDF_ID_MASK) != 0) {
211 			str = "only bits in MDF_ID_MASK allowed";
212 			goto unexpected_flags;
213 		}
214 		if (!powerof2(flags & MDF_SUPP_MASK)) {
215 			str = "only a single flag in MDF_SUPP_MASK allowed";
216 			goto unexpected_flags;
217 		}
218 		break;
219 	default:
220 	    __assert_unreachable();
221 	}
222 	return;
223 
224 unexpected_flags:
225 	panic_for_unexpected_flags(type, flags, str);
226 }
227 
228 static void
229 check_type_and_id_spec(const id_type_t type, const struct id_spec *const is)
230 {
231 	check_type_and_id_flags(type, is->flags);
232 }
233 
234 static void
235 check_type_and_type_flags(const id_type_t type, const flags_t flags)
236 {
237 	const char *str;
238 
239 	check_type_and_id_flags(type, flags & MDF_ID_MASK);
240 	if ((flags & ~MDF_ID_MASK & ~MDF_TYPE_MASK) != 0) {
241 		str = "only MDF_ID_MASK | MDF_TYPE_MASK bits allowed";
242 		goto unexpected_flags;
243 	}
244 	if ((flags & MDF_ANY) != 0 && (flags & MDF_CURRENT) != 0 &&
245 	    (type == IT_UID || (flags & MDF_PRIMARY) != 0)) {
246 		str = "MDF_ANY and MDF_CURRENT are exclusive for UIDs "
247 		    "or primary group GIDs";
248 		goto unexpected_flags;
249 	}
250 	if ((flags & MDF_ANY_SUPP) != 0 && (flags & MDF_CURRENT) != 0 &&
251 	    (flags & MDF_SUPP_MASK) != 0) {
252 		str = "MDF_SUPP_ANY and MDF_CURRENT with supplementary "
253 		    "groups specification are exclusive";
254 		goto unexpected_flags;
255 	}
256 	if (((flags & MDF_PRIMARY) != 0 || (flags & MDF_ANY) != 0) &&
257 	    (flags & MDF_HAS_PRIMARY_CLAUSE) == 0) {
258 		str = "Presence of folded primary clause not reflected "
259 		    "by presence of MDF_HAS_PRIMARY_CLAUSE";
260 		goto unexpected_flags;
261 	}
262 	if (((flags & MDF_SUPP_MASK) != 0 || (flags & MDF_ANY_SUPP) != 0) &&
263 	    (flags & MDF_HAS_SUPP_CLAUSE) == 0) {
264 		str = "Presence of folded supplementary clause not reflected "
265 		    "by presence of MDF_HAS_SUPP_CLAUSE";
266 		goto unexpected_flags;
267 	}
268 	return;
269 
270 unexpected_flags:
271 	panic_for_unexpected_flags(type, flags, str);
272 }
273 #else /* !INVARIANTS */
274 #define check_type_and_id_flags(...)
275 #define check_type_and_id_spec(...)
276 #define check_type_and_type_flags(...)
277 #endif /* INVARIANTS */
278 
279 /*
280  * Returns EALREADY if both flags have some overlap, or EINVAL if flags are
281  * incompatible, else 0 with flags successfully merged into 'dest'.
282  */
283 static int
284 coalesce_id_flags(const flags_t src, flags_t *const dest)
285 {
286 	flags_t res;
287 
288 	if ((src & *dest) != 0)
289 		return (EALREADY);
290 
291 	res = src | *dest;
292 
293 	/* Check for compatibility of supplementary flags, and coalesce. */
294 	if ((res & MDF_SUPP_MASK) != 0) {
295 		/* MDF_SUPP_DONT incompatible with the rest. */
296 		if ((res & MDF_SUPP_DONT) != 0 && (res & MDF_SUPP_MASK &
297 		    ~MDF_SUPP_DONT) != 0)
298 			return (EINVAL);
299 		/*
300 		 * Coalesce MDF_SUPP_ALLOW and MDF_SUPP_MUST into MDF_SUPP_MUST.
301 		 */
302 		if ((res & MDF_SUPP_ALLOW) != 0 && (res & MDF_SUPP_MUST) != 0)
303 			res &= ~MDF_SUPP_ALLOW;
304 	}
305 
306 	*dest = res;
307 	return (0);
308 }
309 
310 static void
311 toast_rules(struct rules *const rules)
312 {
313 	struct rulehead *const head = &rules->head;
314 	struct rule *rule, *rule_next;
315 
316 	TAILQ_FOREACH_SAFE(rule, head, r_entries, rule_next) {
317 		free(rule->uids, M_DO);
318 		free(rule->gids, M_DO);
319 		free(rule, M_DO);
320 	}
321 	free(rules, M_DO);
322 }
323 
324 static struct rules *
325 alloc_rules(void)
326 {
327 	struct rules *const rules = malloc(sizeof(*rules), M_DO, M_WAITOK);
328 
329 	_Static_assert(MAC_RULE_STRING_LEN > 0, "MAC_RULE_STRING_LEN <= 0!");
330 	rules->string[0] = 0;
331 	TAILQ_INIT(&rules->head);
332 	rules->use_count = 0;
333 	return (rules);
334 }
335 
336 static bool
337 is_null_or_empty(const char *s)
338 {
339 	return (s == NULL || s[0] == '\0');
340 }
341 
342 /*
343  * String to unsigned int.
344  *
345  * Contrary to the "standard" strtou*() family of functions, do not tolerate
346  * spaces at start nor an empty string, and returns a status code, the 'u_int'
347  * result being returned through a passed pointer (if no error).
348  *
349  * We detour through 'quad_t' because in-kernel strto*() functions cannot set
350  * 'errno' and thus can't distinguish a true maximum value from one returned
351  * because of overflow.  We use 'quad_t' instead of 'u_quad_t' to support
352  * negative specifications (e.g., such as "-1" for UINT_MAX).
353  */
354 static int
355 strtoui_strict(const char *const restrict s, const char **const restrict endptr,
356     int base, u_int *result)
357 {
358 	char *ep;
359 	quad_t q;
360 
361 	/* Rule out spaces and empty specifications. */
362 	if (s[0] == '\0' || isspace(s[0])) {
363 		if (endptr != NULL)
364 			*endptr = s;
365 		return (EINVAL);
366 	}
367 
368 	q = strtoq(s, &ep, base);
369 	if (endptr != NULL)
370 		*endptr = ep;
371 	if (q < 0) {
372 		/* We allow specifying a negative number. */
373 		if (q < -(quad_t)UINT_MAX - 1 || q == QUAD_MIN)
374 			return (EOVERFLOW);
375 	} else {
376 		if (q > UINT_MAX || q == UQUAD_MAX)
377 			return (EOVERFLOW);
378 	}
379 
380 	*result = (u_int)q;
381 	return (0);
382 }
383 
384 /*
385  * strsep() variant skipping spaces and tabs.
386  *
387  * Skips spaces and tabs at beginning and end of the token before one of the
388  * 'delim' characters, i.e., at start of string and just before one of the
389  * delimiter characters (so it doesn't prevent tokens containing spaces and tabs
390  * in the middle).
391  */
392 static char *
393 strsep_noblanks(char **const stringp, const char *delim)
394 {
395 	char *p = *stringp;
396 	char *ret, *wsp;
397 	size_t idx;
398 
399 	if (p == NULL)
400 		return (NULL);
401 
402 	idx = strspn(p, " \t");
403 	p += idx;
404 
405 	ret = strsep(&p, delim);
406 
407 	/* Rewind spaces/tabs at the end. */
408 	if (p == NULL)
409 		wsp = ret + strlen(ret);
410 	else
411 		wsp = p - 1;
412 	for (; wsp != ret; --wsp) {
413 		switch (wsp[-1]) {
414 		case ' ':
415 		case '\t':
416 			continue;
417 		}
418 		break;
419 	}
420 	*wsp = '\0';
421 
422 	*stringp = p;
423 	return (ret);
424 }
425 
426 
427 static void
428 make_parse_error(struct parse_error **const parse_error, const size_t pos,
429     const char *const fmt, ...)
430 {
431 	struct parse_error *const err = malloc(sizeof(*err), M_DO, M_WAITOK);
432 	va_list ap;
433 
434 	err->pos = pos;
435 	va_start(ap, fmt);
436 	vsnprintf(err->msg, PARSE_ERROR_SIZE, fmt, ap);
437 	va_end(ap);
438 
439 	MPASS(*parse_error == NULL);
440 	*parse_error = err;
441 }
442 
443 static void
444 free_parse_error(struct parse_error *const parse_error)
445 {
446 	free(parse_error, M_DO);
447 }
448 
449 static int
450 parse_id_type(const char *const string, id_type_t *const type,
451     struct parse_error **const parse_error)
452 {
453 	/*
454 	 * Special case for "any", as the canonical form for IT_ANY in
455 	 * id_type_to_str[] is "*".
456 	 */
457 	if (strcmp(string, "any") == 0) {
458 		*type = IT_ANY;
459 		return (0);
460 	}
461 
462 	/* Start at 1 to avoid parsing "invalid". */
463 	for (size_t i = 1; i <= IT_LAST; ++i) {
464 		if (strcmp(string, id_type_to_str[i]) == 0) {
465 			*type = i;
466 			return (0);
467 		}
468 	}
469 
470 	*type = IT_INVALID;
471 	make_parse_error(parse_error, 0, "No valid type found.");
472 	return (EINVAL);
473 }
474 
475 static size_t
476 parse_gid_flags(const char *const string, flags_t *const flags,
477     flags_t *const gid_flags)
478 {
479 	switch (string[0]) {
480 	case '+':
481 		*flags |= MDF_SUPP_ALLOW;
482 		goto has_supp_clause;
483 	case '!':
484 		*flags |= MDF_SUPP_MUST;
485 		*gid_flags |= MDF_MAY_REJ_SUPP;
486 		goto has_supp_clause;
487 	case '-':
488 		*flags |= MDF_SUPP_DONT;
489 		*gid_flags |= MDF_MAY_REJ_SUPP;
490 		goto has_supp_clause;
491 	has_supp_clause:
492 		*gid_flags |= MDF_HAS_SUPP_CLAUSE;
493 		return (1);
494 	}
495 
496 	return (0);
497 }
498 
499 static bool
500 parse_any(const char *const string)
501 {
502 	return (strcmp(string, "*") == 0 || strcmp(string, "any") == 0);
503 }
504 
505 static bool
506 has_clauses(const id_nb_t nb, const flags_t type_flags)
507 {
508 	return ((type_flags & MDF_TYPE_MASK) != 0 || nb != 0);
509 }
510 
511 static int
512 parse_target_clause(char *to, struct rule *const rule,
513     struct id_list *const uid_list, struct id_list *const gid_list,
514     struct parse_error **const parse_error)
515 {
516 	const char *const start = to;
517 	char *to_type, *to_id;
518 	const char *p;
519 	struct id_list *list;
520 	id_nb_t *nb;
521 	flags_t *tflags;
522 	struct id_elem *ie;
523 	struct id_spec is = {.flags = 0};
524 	flags_t gid_flags = 0;
525 	id_type_t type;
526 	int error;
527 
528 	MPASS(*parse_error == NULL);
529 	MPASS(to != NULL);
530 	to_type = strsep_noblanks(&to, "=");
531 	MPASS(to_type != NULL);
532 	to_type += parse_gid_flags(to_type, &is.flags, &gid_flags);
533 	error = parse_id_type(to_type, &type, parse_error);
534 	if (error != 0)
535 		goto einval;
536 	if (type != IT_GID && is.flags != 0) {
537 		make_parse_error(parse_error, to_type - start,
538 		    "Expected type 'gid' after flags, not '%s'.",
539 		    to_type);
540 		goto einval;
541 	}
542 
543 	to_id = strsep_noblanks(&to, "");
544 	switch (type) {
545 	case IT_GID:
546 		if (to_id == NULL) {
547 			make_parse_error(parse_error, to_type - start,
548 			    "No '=' and ID specification after type '%s'.",
549 			    to_type);
550 			goto einval;
551 		}
552 
553 		if (is.flags == 0) {
554 			/* No flags: Dealing with a primary group. */
555 			is.flags |= MDF_PRIMARY;
556 			gid_flags |= MDF_HAS_PRIMARY_CLAUSE;
557 		}
558 
559 		list = gid_list;
560 		nb = &rule->gids_nb;
561 		tflags = &rule->gid_flags;
562 
563 		/* "*" or "any"? */
564 		if (parse_any(to_id)) {
565 			/*
566 			 * We check that we have not seen any other clause of
567 			 * the same category (i.e., concerning primary or
568 			 * supplementary groups).
569 			 */
570 			if ((is.flags & MDF_PRIMARY) != 0) {
571 				if ((*tflags & MDF_HAS_PRIMARY_CLAUSE) != 0) {
572 					make_parse_error(parse_error,
573 					    to_id - start,
574 					    "'any' specified after another "
575 					    "(primary) GID.");
576 					goto einval;
577 				}
578 				*tflags |= gid_flags | MDF_ANY;
579 			} else {
580 				/*
581 				 * If a supplementary group flag was present, it
582 				 * must be MDF_SUPP_ALLOW ("+").
583 				 */
584 				if ((is.flags & MDF_SUPP_MASK) != MDF_SUPP_ALLOW) {
585 					make_parse_error(parse_error,
586 					    to_id - start,
587 					    "'any' specified with another "
588 					    "flag than '+'.");
589 					goto einval;
590 				}
591 				if ((*tflags & MDF_HAS_SUPP_CLAUSE) != 0) {
592 					make_parse_error(parse_error,
593 					    to_id - start,
594 					    "'any' with flag '+' specified after "
595 					    "another (supplementary) GID.");
596 					goto einval;
597 				}
598 				*tflags |= gid_flags | MDF_ANY_SUPP;
599 			}
600 			goto check_type_and_finish;
601 		} else {
602 			/*
603 			 * Check that we haven't already seen "any" for the same
604 			 * category.
605 			 */
606 			if ((is.flags & MDF_PRIMARY) != 0) {
607 				if ((*tflags & MDF_ANY) != 0) {
608 					make_parse_error(parse_error,
609 					    to_id - start,
610 					    "Some (primary) GID specified after "
611 					    "'any'.");
612 					goto einval;
613 				}
614 			} else if ((*tflags & MDF_ANY_SUPP) != 0 &&
615 			    (is.flags & MDF_SUPP_ALLOW) != 0) {
616 				make_parse_error(parse_error,
617 				    to_id - start,
618 				    "Some (supplementary) GID specified after "
619 				    "'any' with flag '+'.");
620 				goto einval;
621 			}
622 			*tflags |= gid_flags;
623 		}
624 		break;
625 
626 	case IT_UID:
627 		if (to_id == NULL) {
628 			make_parse_error(parse_error, to_type - start,
629 			    "No '=' and ID specification after type '%s'.",
630 			    to_type);
631 			goto einval;
632 		}
633 
634 		list = uid_list;
635 		nb = &rule->uids_nb;
636 		tflags = &rule->uid_flags;
637 
638 		/* "*" or "any"? */
639 		if (parse_any(to_id)) {
640 			/* There must not be any other clause. */
641 			if (has_clauses(*nb, *tflags)) {
642 				make_parse_error(parse_error, to_id - start,
643 				    "'any' specified after another UID.");
644 				goto einval;
645 			}
646 			*tflags |= MDF_ANY;
647 			goto check_type_and_finish;
648 		} else {
649 			/*
650 			 * Check that we haven't already seen "any" for the same
651 			 * category.
652 			 */
653 			if ((*tflags & MDF_ANY) != 0) {
654 				make_parse_error(parse_error, to_id - start,
655 				    "Some UID specified after 'any'.");
656 				goto einval;
657 			}
658 		}
659 		break;
660 
661 	case IT_ANY:
662 		/* No ID allowed. */
663 		if (to_id != NULL) {
664 			make_parse_error(parse_error, to_type - start,
665 			    "No '=' and ID allowed after type '%s'.", to_type);
666 			goto einval;
667 		}
668 		/*
669 		 * We can't have IT_ANY after any other IT_*, it must be the
670 		 * only one.
671 		 */
672 		if (has_clauses(rule->uids_nb, rule->uid_flags) ||
673 		    has_clauses(rule->gids_nb, rule->gid_flags)) {
674 			make_parse_error(parse_error, to_type - start,
675 			    "Target clause of type '%s' coming after another "
676 			    "clause (must be alone).", to_type);
677 			goto einval;
678 		}
679 		rule->uid_flags |= MDF_ANY;
680 		rule->gid_flags |= MDF_ANY | MDF_ANY_SUPP |
681 		    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE;
682 		goto finish;
683 
684 	default:
685 		/* parse_id_type() returns no other types currently. */
686 		__assert_unreachable();
687 	}
688 
689 	/* Rule out cases that have been treated above. */
690 	MPASS((type == IT_UID || type == IT_GID) && !parse_any(to_id));
691 
692 	/* "."? */
693 	if (strcmp(to_id, ".") == 0) {
694 		if ((*tflags & MDF_CURRENT) != 0) {
695 			/* Duplicate "." <id>.  Try to coalesce. */
696 			error = coalesce_id_flags(is.flags, tflags);
697 			if (error != 0) {
698 				make_parse_error(parse_error, to_id - start,
699 				    "Incompatible flags with prior clause "
700 				    "with same target.");
701 				goto einval;
702 			}
703 		} else
704 			*tflags |= MDF_CURRENT | is.flags;
705 		goto check_type_and_finish;
706 	}
707 
708 	/* Parse an ID. */
709 	error = strtoui_strict(to_id, &p, 10, &is.id);
710 	if (error != 0 || *p != '\0') {
711 		make_parse_error(parse_error, to_id - start,
712 		    "Cannot parse a numerical ID (base 10).");
713 		goto einval;
714 	}
715 
716 	/* Explicit ID flags. */
717 	if (type == IT_GID && (is.flags & MDF_SUPP_MUST) != 0)
718 		*tflags |= MDF_EXPLICIT_SUPP_MUST;
719 
720 	/*
721 	 * We check for duplicate IDs and coalesce their 'struct id_spec' only
722 	 * at end of parse_single_rule() because it is much more performant then
723 	 * (using sorted arrays).
724 	 */
725 	++*nb;
726 	if (*nb == 0) {
727 		make_parse_error(parse_error, 0,
728 		    "Too many target clauses of type '%s'.", to_type);
729 		return (EOVERFLOW);
730 	}
731 	ie = malloc(sizeof(*ie), M_DO, M_WAITOK);
732 	ie->spec = is;
733 	TAILQ_INSERT_TAIL(list, ie, ie_entries);
734 	check_type_and_id_spec(type, &is);
735 check_type_and_finish:
736 	check_type_and_type_flags(type, *tflags);
737 finish:
738 	return (0);
739 einval:
740 	/* We must have built a parse error on error. */
741 	MPASS(*parse_error != NULL);
742 	return (EINVAL);
743 }
744 
745 static int
746 u_int_cmp(const u_int i1, const u_int i2)
747 {
748 	return ((i1 > i2) - (i1 < i2));
749 }
750 
751 static int
752 id_spec_cmp(const void *const p1, const void *const p2)
753 {
754 	const struct id_spec *const is1 = p1;
755 	const struct id_spec *const is2 = p2;
756 
757 	return (u_int_cmp(is1->id, is2->id));
758 }
759 
760 /*
761  * Transfer content of 'list' into 'array', freeing and emptying list.
762  *
763  * 'nb' must be 'list''s length and not be greater than 'array''s size.  The
764  * destination array is sorted by ID.  Structures 'struct id_spec' with same IDs
765  * are coalesced if that makes sense (not including duplicate clauses), else
766  * EINVAL is returned.  On success, 'nb' is updated (lowered) to account for
767  * coalesced specifications.  The parameter 'type' is only for testing purposes
768  * (INVARIANTS).
769  */
770 static int
771 pour_list_into_rule(const id_type_t type, struct id_list *const list,
772     struct id_spec *const array, id_nb_t *const nb,
773     struct parse_error **const parse_error)
774 {
775 	struct id_elem *ie, *ie_next;
776 	size_t idx = 0;
777 
778 	/* Fill the array. */
779 	TAILQ_FOREACH_SAFE(ie, list, ie_entries, ie_next) {
780 		MPASS(idx < *nb);
781 		array[idx] = ie->spec;
782 		free(ie, M_DO);
783 		++idx;
784 	}
785 	MPASS(idx == *nb);
786 	TAILQ_INIT(list);
787 
788 	/* Sort it (by ID). */
789 	qsort(array, *nb, sizeof(*array), id_spec_cmp);
790 
791 	/* Coalesce same IDs. */
792 	if (*nb != 0) {
793 		size_t ref_idx = 0;
794 
795 		for (idx = 1; idx < *nb; ++idx) {
796 			const u_int id = array[idx].id;
797 
798 			if (id != array[ref_idx].id) {
799 				++ref_idx;
800 				if (ref_idx != idx)
801 					array[ref_idx] = array[idx];
802 				continue;
803 			}
804 
805 			switch (type) {
806 				int error;
807 
808 			case IT_GID:
809 				error = coalesce_id_flags(array[idx].flags,
810 				    &array[ref_idx].flags);
811 				if (error != 0) {
812 					make_parse_error(parse_error, 0,
813 					    "Incompatible flags or duplicate "
814 					    "GID %u.", id);
815 					return (EINVAL);
816 				}
817 				check_type_and_id_flags(type,
818 				    array[ref_idx].flags);
819 				break;
820 
821 			case IT_UID:
822 				/*
823 				 * No flags in this case.  Multiple appearances
824 				 * of the same UID is an exact redundancy, so
825 				 * error out.
826 				 */
827 				make_parse_error(parse_error, 0,
828 				    "Duplicate UID %u.", id);
829 				return (EINVAL);
830 
831 			default:
832 				__assert_unreachable();
833 			}
834 		}
835 		*nb = ref_idx + 1;
836 	}
837 
838 	return (0);
839 }
840 
841 /*
842  * See also first comments for parse_rule() below.
843  *
844  * The second part of a rule, called <target> (or <to>), is a comma-separated
845  * (',') list of '<flags><type>=<id>' clauses similar to that of the <from>
846  * part, with the extensions that <id> may also be "*" or "any" or ".", and that
847  * <flags> may contain at most one of the '+', '-' and '!' characters when
848  * <type> is "gid" (no flags are allowed for "uid").  No two clauses in a single
849  * <to> list may list the same <id>.  "*" and "any" both designate any ID for
850  * the <type>, and are aliases to each other.  In front of "any" (or "*"), only
851  * the '+' flag is allowed (in the "gid" case).  "." designates the process'
852  * current IDs for the <type>.  The precise meaning of flags and "." is
853  * explained in functions checking privileges below.
854  */
855 static int
856 parse_single_rule(char *rule, struct rules *const rules,
857     struct parse_error **const parse_error)
858 {
859 	const char *const start = rule;
860 	const char *from_type, *from_id, *p;
861 	char *to_list;
862 	struct id_list uid_list, gid_list;
863 	struct id_elem *ie, *ie_next;
864 	struct rule *new;
865 	int error;
866 
867 	MPASS(*parse_error == NULL);
868 	TAILQ_INIT(&uid_list);
869 	TAILQ_INIT(&gid_list);
870 
871 	/* Freed when the 'struct rules' container is freed. */
872 	new = malloc(sizeof(*new), M_DO, M_WAITOK | M_ZERO);
873 
874 	from_type = strsep_noblanks(&rule, "=");
875 	MPASS(from_type != NULL); /* Because 'rule' was not NULL. */
876 	error = parse_id_type(from_type, &new->from_type, parse_error);
877 	if (error != 0)
878 		goto einval;
879 	switch (new->from_type) {
880 	case IT_UID:
881 	case IT_GID:
882 		break;
883 	default:
884 		make_parse_error(parse_error, 0, "Type '%s' not allowed in "
885 		    "the \"from\" part of rules.");
886 		goto einval;
887 	}
888 
889 	from_id = strsep_noblanks(&rule, ":");
890 	if (is_null_or_empty(from_id)) {
891 		make_parse_error(parse_error, 0, "No ID specified.");
892 		goto einval;
893 	}
894 
895 	error = strtoui_strict(from_id, &p, 10, &new->from_id);
896 	if (error != 0 || *p != '\0') {
897 		make_parse_error(parse_error, from_id - start,
898 		    "Cannot parse a numerical ID (base 10).");
899 		goto einval;
900 	}
901 
902 	/*
903 	 * We will now parse the "to" list.
904 	 *
905 	 * In order to ease parsing, we will begin by building lists of target
906 	 * UIDs and GIDs in local variables 'uid_list' and 'gid_list'.  The
907 	 * number of each type of IDs will be filled directly in 'new'.  At end
908 	 * of parse, we will allocate both arrays of IDs to be placed into the
909 	 * 'uids' and 'gids' members, sort them, and discard the tail queues
910 	 * used to build them.  This conversion to sorted arrays at end of parse
911 	 * allows to minimize memory allocations and enables searching IDs in
912 	 * O(log(n)) instead of linearly.
913 	 */
914 	to_list = strsep_noblanks(&rule, ",");
915 	if (to_list == NULL) {
916 		make_parse_error(parse_error, 0, "No target list.");
917 		goto einval;
918 	}
919 	do {
920 		error = parse_target_clause(to_list, new, &uid_list, &gid_list,
921 		    parse_error);
922 		if (error != 0) {
923 			(*parse_error)->pos += to_list - start;
924 			goto einval;
925 		}
926 
927 		to_list = strsep_noblanks(&rule, ",");
928 	} while (to_list != NULL);
929 
930 	if (new->uids_nb != 0) {
931 		new->uids = malloc(sizeof(*new->uids) * new->uids_nb, M_DO,
932 		    M_WAITOK);
933 		error = pour_list_into_rule(IT_UID, &uid_list, new->uids,
934 		    &new->uids_nb, parse_error);
935 		if (error != 0)
936 			goto einval;
937 	}
938 	MPASS(TAILQ_EMPTY(&uid_list));
939 	if (!has_clauses(new->uids_nb, new->uid_flags)) {
940 		/* No UID specified, default is "uid=.". */
941 		MPASS(new->uid_flags == 0);
942 		new->uid_flags = MDF_CURRENT;
943 		check_type_and_type_flags(IT_UID, new->uid_flags);
944 	}
945 
946 	if (new->gids_nb != 0) {
947 		new->gids = malloc(sizeof(*new->gids) * new->gids_nb, M_DO,
948 		    M_WAITOK);
949 		error = pour_list_into_rule(IT_GID, &gid_list, new->gids,
950 		    &new->gids_nb, parse_error);
951 		if (error != 0)
952 			goto einval;
953 	}
954 	MPASS(TAILQ_EMPTY(&gid_list));
955 	if (!has_clauses(new->gids_nb, new->gid_flags)) {
956 		/* No GID specified, default is "gid=.,!gid=.". */
957 		MPASS(new->gid_flags == 0);
958 		new->gid_flags = MDF_CURRENT | MDF_PRIMARY | MDF_SUPP_MUST |
959 		    MDF_HAS_PRIMARY_CLAUSE | MDF_HAS_SUPP_CLAUSE;
960 		check_type_and_type_flags(IT_GID, new->gid_flags);
961 	}
962 
963 	TAILQ_INSERT_TAIL(&rules->head, new, r_entries);
964 	return (0);
965 
966 einval:
967 	free(new->gids, M_DO);
968 	free(new->uids, M_DO);
969 	free(new, M_DO);
970 	TAILQ_FOREACH_SAFE(ie, &gid_list, ie_entries, ie_next)
971 	    free(ie, M_DO);
972 	TAILQ_FOREACH_SAFE(ie, &uid_list, ie_entries, ie_next)
973 	    free(ie, M_DO);
974 	MPASS(*parse_error != NULL);
975 	return (EINVAL);
976 }
977 
978 /*
979  * Parse rules specification and produce rule structures out of it.
980  *
981  * Returns 0 on success, with '*rulesp' made to point to a 'struct rule'
982  * representing the rules.  On error, the returned value is non-zero and
983  * '*rulesp' is unchanged.  If 'string' has length greater or equal to
984  * MAC_RULE_STRING_LEN, ENAMETOOLONG is returned.  If it is not in the expected
985  * format, EINVAL is returned.  If an error is returned, '*parse_error' is set
986  * to point to a 'struct parse_error' giving an error message for the problem,
987  * else '*parse_error' is set to NULL.
988  *
989  * Expected format: A semi-colon-separated list of rules of the form
990  * "<from>:<target>".  The <from> part is of the form "<type>=<id>" where <type>
991  * is "uid" or "gid", <id> an UID or GID (depending on <type>) and <target> is
992  * "*", "any" or a comma-separated list of '<flags><type>=<id>' clauses (see the
993  * comment for parse_single_rule() for more details).  For convenience, empty
994  * rules are allowed (and do nothing), and spaces and tabs are allowed (and
995  * removed) around each token (tokens are natural ones, except that
996  * '<flags><type>' as a whole is considered a single token, so no blanks are
997  * allowed between '<flags>' and '<type>').
998  *
999  * Examples:
1000  * - "uid=1001:uid=1010,gid=1010;uid=1002:any"
1001  * - "gid=1010:gid=1011,gid=1012,gid=1013"
1002  */
1003 static int
1004 parse_rules(const char *const string, struct rules **const rulesp,
1005     struct parse_error **const parse_error)
1006 {
1007 	const size_t len = strlen(string);
1008 	char *copy, *p, *rule;
1009 	struct rules *rules;
1010 	int error = 0;
1011 
1012 	*parse_error = NULL;
1013 
1014 	if (len >= MAC_RULE_STRING_LEN) {
1015 		make_parse_error(parse_error, 0,
1016 		    "Rule specification string is too long (%zu, max %zu)",
1017 		    len, MAC_RULE_STRING_LEN - 1);
1018 		return (ENAMETOOLONG);
1019 	}
1020 
1021 	rules = alloc_rules();
1022 	bcopy(string, rules->string, len + 1);
1023 	MPASS(rules->string[len] == '\0'); /* Catch some races. */
1024 
1025 	copy = malloc(len + 1, M_DO, M_WAITOK);
1026 	bcopy(string, copy, len + 1);
1027 	MPASS(copy[len] == '\0'); /* Catch some races. */
1028 
1029 	p = copy;
1030 	while ((rule = strsep_noblanks(&p, ";")) != NULL) {
1031 		if (rule[0] == '\0')
1032 			continue;
1033 		error = parse_single_rule(rule, rules, parse_error);
1034 		if (error != 0) {
1035 			(*parse_error)->pos += rule - copy;
1036 			toast_rules(rules);
1037 			goto out;
1038 		}
1039 	}
1040 
1041 	*rulesp = rules;
1042 out:
1043 	free(copy, M_DO);
1044 	return (error);
1045 }
1046 
1047 /*
1048  * Find rules applicable to the passed prison.
1049  *
1050  * Returns the applicable rules (and never NULL).  'pr' must be unlocked.
1051  * 'aprp' is set to the (ancestor) prison holding these, and it must be unlocked
1052  * once the caller is done accessing the rules.  '*aprp' is equal to 'pr' if and
1053  * only if the current jail has its own set of rules.
1054  */
1055 static struct rules *
1056 find_rules(struct prison *const pr, struct prison **const aprp)
1057 {
1058 	struct prison *cpr, *ppr;
1059 	struct rules *rules;
1060 
1061 	cpr = pr;
1062 	for (;;) {
1063 		prison_lock(cpr);
1064 		rules = osd_jail_get(cpr, osd_jail_slot);
1065 		if (rules != NULL)
1066 			break;
1067 		prison_unlock(cpr);
1068 
1069 		ppr = cpr->pr_parent;
1070 		MPASS(ppr != NULL); /* prison0 always has rules. */
1071 		cpr = ppr;
1072 	}
1073 
1074 	*aprp = cpr;
1075 	return (rules);
1076 }
1077 
1078 static void
1079 hold_rules(struct rules *const rules)
1080 {
1081 	refcount_acquire(&rules->use_count);
1082 }
1083 
1084 static void
1085 drop_rules(struct rules *const rules)
1086 {
1087 	if (refcount_release(&rules->use_count))
1088 		toast_rules(rules);
1089 }
1090 
1091 #ifdef INVARIANTS
1092 static void
1093 check_rules_use_count(const struct rules *const rules, u_int expected)
1094 {
1095 	const u_int use_count = refcount_load(&rules->use_count);
1096 
1097 	if (use_count != expected)
1098 		panic("MAC/do: Rules at %p: Use count is %u, expected %u",
1099 		    rules, use_count, expected);
1100 }
1101 #else
1102 #define check_rules_use_count(...)
1103 #endif /* INVARIANTS */
1104 
1105 /*
1106  * OSD destructor for slot 'osd_jail_slot'.
1107  *
1108  * Called with 'value' not NULL.  We have arranged that it is only ever called
1109  * when the corresponding jail goes down or at module unload.
1110  */
1111 static void
1112 dealloc_jail_osd(void *const value)
1113 {
1114 	struct rules *const rules = value;
1115 
1116 	/*
1117 	 * If called because the "holding" jail goes down, no one should be
1118 	 * using the rules but us at this point because no threads of that jail
1119 	 * (or its sub-jails) should currently be executing (in particular,
1120 	 * currently executing setcred()).  The case of module unload is more
1121 	 * complex.  Although the MAC framework takes care that no hook is
1122 	 * called while a module is unloading, the unload could happen between
1123 	 * two calls to MAC hooks in the course of, e.g., executing setcred(),
1124 	 * where the rules' reference count has been bumped to keep them alive
1125 	 * even if the rules on the "holding" jail has been concurrently
1126 	 * changed.  These other references are held in our thread OSD slot, so
1127 	 * we ensure that all thread's slots are freed first in mac_do_destroy()
1128 	 * to be able to check that only one reference remains.
1129 	 */
1130 	check_rules_use_count(rules, 1);
1131 	toast_rules(rules);
1132 }
1133 
1134 /*
1135  * Remove the rules specifically associated to a prison.
1136  *
1137  * In practice, this means that the rules become inherited (from the closest
1138  * ascendant that has some).
1139  *
1140  * Destroys the 'osd_jail_slot' slot of the passed jail.
1141  */
1142 static void
1143 remove_rules(struct prison *const pr)
1144 {
1145 	struct rules *old_rules;
1146 	int error __unused;
1147 
1148 	prison_lock(pr);
1149 	/*
1150 	 * We go to the burden of extracting rules first instead of just letting
1151 	 * osd_jail_del() calling dealloc_jail_osd() as we want to decrement
1152 	 * their use count, and possibly free them, outside of the prison lock.
1153 	 */
1154 	old_rules = osd_jail_get(pr, osd_jail_slot);
1155 	error = osd_jail_set(pr, osd_jail_slot, NULL);
1156 	/* osd_set() never fails nor allocate memory when 'value' is NULL. */
1157 	MPASS(error == 0);
1158 	/*
1159 	 * This completely frees the OSD slot, but doesn't call the destructor
1160 	 * since we've just put NULL in the slot.
1161 	 */
1162 	osd_jail_del(pr, osd_jail_slot);
1163 	prison_unlock(pr);
1164 
1165 	if (old_rules != NULL)
1166 		drop_rules(old_rules);
1167 }
1168 
1169 /*
1170  * Assign already built rules to a jail.
1171  */
1172 static void
1173 set_rules(struct prison *const pr, struct rules *const rules)
1174 {
1175 	struct rules *old_rules;
1176 	void **rsv;
1177 
1178 	check_rules_use_count(rules, 0);
1179 	hold_rules(rules);
1180 	rsv = osd_reserve(osd_jail_slot);
1181 
1182 	prison_lock(pr);
1183 	old_rules = osd_jail_get(pr, osd_jail_slot);
1184 	osd_jail_set_reserved(pr, osd_jail_slot, rsv, rules);
1185 	prison_unlock(pr);
1186 	if (old_rules != NULL)
1187 		drop_rules(old_rules);
1188 }
1189 
1190 /*
1191  * Assigns empty rules to a jail.
1192  */
1193 static void
1194 set_empty_rules(struct prison *const pr)
1195 {
1196 	struct rules *const rules = alloc_rules();
1197 
1198 	set_rules(pr, rules);
1199 }
1200 
1201 /*
1202  * Parse a rules specification and assign them to a jail.
1203  *
1204  * Returns the same error code as parse_rules() (which see).
1205  */
1206 static int
1207 parse_and_set_rules(struct prison *const pr, const char *rules_string,
1208     struct parse_error **const parse_error)
1209 {
1210 	struct rules *rules;
1211 	int error;
1212 
1213 	error = parse_rules(rules_string, &rules, parse_error);
1214 	if (error != 0)
1215 		return (error);
1216 	set_rules(pr, rules);
1217 	return (0);
1218 }
1219 
1220 static int
1221 mac_do_sysctl_rules(SYSCTL_HANDLER_ARGS)
1222 {
1223 	char *const buf = malloc(MAC_RULE_STRING_LEN, M_DO, M_WAITOK);
1224 	struct prison *const td_pr = req->td->td_ucred->cr_prison;
1225 	struct prison *pr;
1226 	struct rules *rules;
1227 	struct parse_error *parse_error;
1228 	int error;
1229 
1230 	rules = find_rules(td_pr, &pr);
1231 	strlcpy(buf, rules->string, MAC_RULE_STRING_LEN);
1232 	prison_unlock(pr);
1233 
1234 	error = sysctl_handle_string(oidp, buf, MAC_RULE_STRING_LEN, req);
1235 	if (error != 0 || req->newptr == NULL)
1236 		goto out;
1237 
1238 	/* Set our prison's rules, not that of the jail we inherited from. */
1239 	error = parse_and_set_rules(td_pr, buf, &parse_error);
1240 	if (error != 0) {
1241 		if (print_parse_error)
1242 			printf("MAC/do: Parse error at index %zu: %s\n",
1243 			    parse_error->pos, parse_error->msg);
1244 		free_parse_error(parse_error);
1245 	}
1246 out:
1247 	free(buf, M_DO);
1248 	return (error);
1249 }
1250 
1251 SYSCTL_PROC(_security_mac_do, OID_AUTO, rules,
1252     CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_PRISON|CTLFLAG_MPSAFE,
1253     0, 0, mac_do_sysctl_rules, "A",
1254     "Rules");
1255 
1256 
1257 SYSCTL_JAIL_PARAM_SYS_SUBNODE(mac, do, CTLFLAG_RW, "Jail MAC/do parameters");
1258 SYSCTL_JAIL_PARAM_STRING(_mac_do, rules, CTLFLAG_RW, MAC_RULE_STRING_LEN,
1259     "Jail MAC/do rules");
1260 
1261 
1262 static int
1263 mac_do_jail_create(void *obj, void *data __unused)
1264 {
1265 	struct prison *const pr = obj;
1266 
1267 	set_empty_rules(pr);
1268 	return (0);
1269 }
1270 
1271 static int
1272 mac_do_jail_get(void *obj, void *data)
1273 {
1274 	struct prison *ppr, *const pr = obj;
1275 	struct vfsoptlist *const opts = data;
1276 	struct rules *rules;
1277 	int jsys, error;
1278 
1279 	rules = find_rules(pr, &ppr);
1280 
1281 	jsys = pr == ppr ?
1282 	    (TAILQ_EMPTY(&rules->head) ? JAIL_SYS_DISABLE : JAIL_SYS_NEW) :
1283 	    JAIL_SYS_INHERIT;
1284 	error = vfs_setopt(opts, "mac.do", &jsys, sizeof(jsys));
1285 	if (error != 0 && error != ENOENT)
1286 		goto done;
1287 
1288 	error = vfs_setopts(opts, "mac.do.rules", rules->string);
1289 	if (error != 0 && error != ENOENT)
1290 		goto done;
1291 
1292 	error = 0;
1293 done:
1294 	prison_unlock(ppr);
1295 	return (error);
1296 }
1297 
1298 /*
1299  * -1 is used as a sentinel in mac_do_jail_check() and mac_do_jail_set() below.
1300  */
1301 _Static_assert(-1 != JAIL_SYS_DISABLE && -1 != JAIL_SYS_NEW &&
1302     -1 != JAIL_SYS_INHERIT,
1303     "mac_do(4) uses -1 as a sentinel for uninitialized 'jsys'.");
1304 
1305 /*
1306  * We perform only cheap checks here, i.e., we do not really parse the rules
1307  * specification string, if any.
1308  */
1309 static int
1310 mac_do_jail_check(void *obj, void *data)
1311 {
1312 	struct vfsoptlist *opts = data;
1313 	char *rules_string;
1314 	int error, jsys, size;
1315 
1316 	error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys));
1317 	if (error == ENOENT)
1318 		jsys = -1;
1319 	else {
1320 		if (error != 0)
1321 			return (error);
1322 		if (jsys != JAIL_SYS_DISABLE && jsys != JAIL_SYS_NEW &&
1323 		    jsys != JAIL_SYS_INHERIT)
1324 			return (EINVAL);
1325 	}
1326 
1327 	/*
1328 	 * We use vfs_getopt() here instead of vfs_getopts() to get the length.
1329 	 * We perform the additional checks done by the latter here, even if
1330 	 * jail_set() calls vfs_getopts() itself later (they becoming
1331 	 * inconsistent wouldn't cause any security problem).
1332 	 */
1333 	error = vfs_getopt(opts, "mac.do.rules", (void**)&rules_string, &size);
1334 	if (error == ENOENT) {
1335 		/*
1336 		 * Default (in absence of "mac.do.rules") is to disable (and, in
1337 		 * particular, not inherit).
1338 		 */
1339 		if (jsys == -1)
1340 			jsys = JAIL_SYS_DISABLE;
1341 
1342 		if (jsys == JAIL_SYS_NEW) {
1343 			vfs_opterror(opts, "'mac.do.rules' must be specified "
1344 			    "given 'mac.do''s value");
1345 			return (EINVAL);
1346 		}
1347 
1348 		/* Absence of "mac.do.rules" at this point is OK. */
1349 		error = 0;
1350 	} else {
1351 		if (error != 0)
1352 			return (error);
1353 
1354 		/* Not a proper string. */
1355 		if (size == 0 || rules_string[size - 1] != '\0') {
1356 			vfs_opterror(opts, "'mac.do.rules' not a proper string");
1357 			return (EINVAL);
1358 		}
1359 
1360 		if (size > MAC_RULE_STRING_LEN) {
1361 			vfs_opterror(opts, "'mdo.rules' too long");
1362 			return (ENAMETOOLONG);
1363 		}
1364 
1365 		if (jsys == -1)
1366 			/* Default (if "mac.do.rules" is present). */
1367 			jsys = rules_string[0] == '\0' ? JAIL_SYS_DISABLE :
1368 			    JAIL_SYS_NEW;
1369 
1370 		/*
1371 		 * Be liberal and accept JAIL_SYS_DISABLE and JAIL_SYS_INHERIT
1372 		 * with an explicit empty rules specification.
1373 		 */
1374 		switch (jsys) {
1375 		case JAIL_SYS_DISABLE:
1376 		case JAIL_SYS_INHERIT:
1377 			if (rules_string[0] != '\0') {
1378 				vfs_opterror(opts, "'mac.do.rules' specified "
1379 				    "but should not given 'mac.do''s value");
1380 				return (EINVAL);
1381 			}
1382 			break;
1383 		}
1384 	}
1385 
1386 	return (error);
1387 }
1388 
1389 static int
1390 mac_do_jail_set(void *obj, void *data)
1391 {
1392 	struct prison *pr = obj;
1393 	struct vfsoptlist *opts = data;
1394 	char *rules_string;
1395 	struct parse_error *parse_error;
1396 	int error, jsys;
1397 
1398 	/*
1399 	 * The invariants checks used below correspond to what has already been
1400 	 * checked in jail_check() above.
1401 	 */
1402 
1403 	error = vfs_copyopt(opts, "mac.do", &jsys, sizeof(jsys));
1404 	MPASS(error == 0 || error == ENOENT);
1405 	if (error != 0)
1406 		jsys = -1; /* Mark unfilled. */
1407 
1408 	rules_string = vfs_getopts(opts, "mac.do.rules", &error);
1409 	MPASS(error == 0 || error == ENOENT);
1410 	if (error == 0) {
1411 		MPASS(strlen(rules_string) < MAC_RULE_STRING_LEN);
1412 		if (jsys == -1)
1413 			/* Default (if "mac.do.rules" is present). */
1414 			jsys = rules_string[0] == '\0' ? JAIL_SYS_DISABLE :
1415 			    JAIL_SYS_NEW;
1416 		else
1417 			MPASS(jsys == JAIL_SYS_NEW ||
1418 			    ((jsys == JAIL_SYS_DISABLE ||
1419 			    jsys == JAIL_SYS_INHERIT) &&
1420 			    rules_string[0] == '\0'));
1421 	} else {
1422 		MPASS(jsys != JAIL_SYS_NEW);
1423 		if (jsys == -1)
1424 			/*
1425 			 * Default (in absence of "mac.do.rules") is to disable
1426 			 * (and, in particular, not inherit).
1427 			 */
1428 			jsys = JAIL_SYS_DISABLE;
1429 		/* If disabled, we'll store an empty rule specification. */
1430 		if (jsys == JAIL_SYS_DISABLE)
1431 			rules_string = "";
1432 	}
1433 
1434 	switch (jsys) {
1435 	case JAIL_SYS_INHERIT:
1436 		remove_rules(pr);
1437 		error = 0;
1438 		break;
1439 	case JAIL_SYS_DISABLE:
1440 	case JAIL_SYS_NEW:
1441 		error = parse_and_set_rules(pr, rules_string, &parse_error);
1442 		if (error != 0) {
1443 			vfs_opterror(opts,
1444 			    "MAC/do: Parse error at index %zu: %s\n",
1445 			    parse_error->pos, parse_error->msg);
1446 			free_parse_error(parse_error);
1447 		}
1448 		break;
1449 	default:
1450 		__assert_unreachable();
1451 	}
1452 	return (error);
1453 }
1454 
1455 /*
1456  * OSD jail methods.
1457  *
1458  * There is no PR_METHOD_REMOVE, as OSD storage is destroyed by the common jail
1459  * code (see prison_cleanup()), which triggers a run of our dealloc_jail_osd()
1460  * destructor.
1461  */
1462 static const osd_method_t osd_methods[PR_MAXMETHOD] = {
1463 	[PR_METHOD_CREATE] = mac_do_jail_create,
1464 	[PR_METHOD_GET] = mac_do_jail_get,
1465 	[PR_METHOD_CHECK] = mac_do_jail_check,
1466 	[PR_METHOD_SET] = mac_do_jail_set,
1467 };
1468 
1469 
1470 /*
1471  * Common header structure.
1472  *
1473  * Each structure that is used to pass information between some MAC check
1474  * function and priv_grant() must start with this header.
1475  */
1476 struct mac_do_data_header {
1477 	/* Size of the allocated buffer holding the containing structure. */
1478 	size_t		 allocated_size;
1479 	/* Full size of the containing structure. */
1480 	size_t		 size;
1481 	/*
1482 	 * For convenience, we use privilege numbers as an identifier for the
1483 	 * containing structure's type, since there is one distinct privilege
1484 	 * for each privilege changing function we are supporting.  0 in 'priv'
1485 	 * indicates this header is uninitialized.
1486 	 */
1487 	int		 priv;
1488 	/* Rules to apply. */
1489 	struct rules	*rules;
1490 };
1491 
1492 /*
1493  * The case of unusable or absent per-thread data can actually happen as nothing
1494  * prevents, e.g., priv_check*() with privilege 'priv' to be called standalone,
1495  * as it is currently by, e.g., the Linux emulator for PRIV_CRED_SETUID.  We
1496  * interpret such calls to priv_check*() as full, unrestricted requests for
1497  * 'priv', contrary to what we're doing here for selected operations, and
1498  * consequently will not grant the requested privilege.
1499  *
1500  * Also, we protect ourselves from a concurrent change of 'do_enabled' while
1501  * a call to setcred() is in progress by storing the rules per-thread
1502  * which is then consulted by each successive hook so that they all have
1503  * a coherent view of the specifications, and we empty the slot (actually, mark
1504  * it as empty) when MAC/do is disabled.
1505  */
1506 static int
1507 check_data_usable(const void *const data, const size_t size, const int priv)
1508 {
1509 	const struct mac_do_data_header *const hdr = data;
1510 
1511 	if (hdr == NULL || hdr->priv == 0)
1512 		return (ENOENT);
1513 	/*
1514 	 * Impacting changes in the protocols we are based on...  Don't crash in
1515 	 * production.
1516 	 */
1517 	if (hdr->priv != priv) {
1518 		MPASS(hdr->priv == priv);
1519 		return (EBUSY);
1520 	}
1521 	MPASS(hdr->size == size);
1522 	MPASS(hdr->size <= hdr->allocated_size);
1523 	return (0);
1524 }
1525 
1526 static void
1527 clear_data(void *const data)
1528 {
1529 	struct mac_do_data_header *const hdr = data;
1530 
1531 	if (hdr != NULL) {
1532 		drop_rules(hdr->rules);
1533 		/* We don't deallocate so as to save time on next access. */
1534 		hdr->priv = 0;
1535 	}
1536 }
1537 
1538 static void *
1539 fetch_data(void)
1540 {
1541 	return (osd_thread_get_unlocked(curthread, osd_thread_slot));
1542 }
1543 
1544 static bool
1545 is_data_reusable(const void *const data, const size_t size)
1546 {
1547 	const struct mac_do_data_header *const hdr = data;
1548 
1549 	return (hdr != NULL && size <= hdr->allocated_size);
1550 }
1551 
1552 static void
1553 set_data_header(void *const data, const size_t size, const int priv,
1554     struct rules *const rules)
1555 {
1556 	struct mac_do_data_header *const hdr = data;
1557 
1558 	MPASS(hdr->priv == 0);
1559 	MPASS(priv != 0);
1560 	MPASS(size <= hdr->allocated_size);
1561 	hdr->size = size;
1562 	hdr->priv = priv;
1563 	hdr->rules = rules;
1564 }
1565 
1566 /* The proc lock (and any other non-sleepable lock) must not be held. */
1567 static void *
1568 alloc_data(void *const data, const size_t size)
1569 {
1570 	struct mac_do_data_header *const hdr = realloc(data, size, M_DO,
1571 	    M_WAITOK);
1572 
1573 	MPASS(size >= sizeof(struct mac_do_data_header));
1574 	hdr->allocated_size = size;
1575 	hdr->priv = 0;
1576 	if (hdr != data) {
1577 		/*
1578 		 * This call either reuses the existing memory allocated for the
1579 		 * slot or tries to allocate some without blocking.
1580 		 */
1581 		int error = osd_thread_set(curthread, osd_thread_slot, hdr);
1582 
1583 		if (error != 0) {
1584 			/* Going to make a M_WAITOK allocation. */
1585 			void **const rsv = osd_reserve(osd_thread_slot);
1586 
1587 			error = osd_thread_set_reserved(curthread,
1588 			    osd_thread_slot, rsv, hdr);
1589 			MPASS(error == 0);
1590 		}
1591 	}
1592 	return (hdr);
1593 }
1594 
1595 /* Destructor for 'osd_thread_slot'. */
1596 static void
1597 dealloc_thread_osd(void *const value)
1598 {
1599 	free(value, M_DO);
1600 }
1601 
1602 /*
1603  * Whether to grant access to some primary group according to flags.
1604  *
1605  * The passed 'flags' must be those of a rule's matching GID, or the IT_GID type
1606  * flags when MDF_CURRENT has been matched.
1607  *
1608  * Return values:
1609  * - 0:			Access granted.
1610  * - EJUSTRETURN:	Flags are agnostic.
1611  */
1612 static int
1613 grant_primary_group_from_flags(const flags_t flags)
1614 {
1615 	return ((flags & MDF_PRIMARY) != 0 ? 0 : EJUSTRETURN);
1616 }
1617 
1618 /*
1619  * Same as grant_primary_group_from_flags(), but for supplementary groups.
1620  *
1621  * Return values:
1622  * - 0:			Access granted.
1623  * - EJUSTRETURN:	Flags are agnostic.
1624  * - EPERM:		Access denied.
1625  */
1626 static int
1627 grant_supplementary_group_from_flags(const flags_t flags)
1628 {
1629 	if ((flags & MDF_SUPP_MASK) != 0)
1630 		return ((flags & MDF_SUPP_DONT) != 0 ? EPERM : 0);
1631 
1632 	return (EJUSTRETURN);
1633 }
1634 
1635 static int
1636 rule_grant_supplementary_groups(const struct rule *const rule,
1637     const struct ucred *const old_cred, const struct ucred *const new_cred)
1638 {
1639 	const gid_t *const old_groups = old_cred->cr_groups;
1640 	const gid_t *const new_groups = new_cred->cr_groups;
1641 	const int old_ngroups = old_cred->cr_ngroups;
1642 	const int new_ngroups = new_cred->cr_ngroups;
1643 	const flags_t gid_flags = rule->gid_flags;
1644 	const bool current_has_supp = (gid_flags & MDF_CURRENT) != 0 &&
1645 	    (gid_flags & MDF_SUPP_MASK) != 0;
1646 	id_nb_t rule_idx = 0;
1647 	int old_idx = 1, new_idx = 1;
1648 
1649 	if ((gid_flags & MDF_ANY_SUPP) != 0 &&
1650 	    (gid_flags & MDF_MAY_REJ_SUPP) == 0)
1651 		/*
1652 		 * Any set of supplementary groups is accepted, no need to loop
1653 		 * over them.
1654 		 */
1655 		return (0);
1656 
1657 	for (; new_idx < new_ngroups; ++new_idx) {
1658 		const gid_t gid = new_groups[new_idx];
1659 		bool may_accept = false;
1660 
1661 		if ((gid_flags & MDF_ANY_SUPP) != 0)
1662 			may_accept = true;
1663 
1664 		/* Do we have to check for the current supplementary groups? */
1665 		if (current_has_supp) {
1666 			/*
1667 			 * Linear search, as both supplementary groups arrays
1668 			 * are sorted.  Advancing 'old_idx' with a binary search
1669 			 * on absence of MDF_SUPP_MUST doesn't seem worth it in
1670 			 * practice.
1671 			 */
1672 			for (; old_idx < old_ngroups; ++old_idx) {
1673 				const gid_t old_gid = old_groups[old_idx];
1674 
1675 				if (old_gid < gid) {
1676 					/* Mandatory but absent. */
1677 					if ((gid_flags & MDF_SUPP_MUST) != 0)
1678 						return (EPERM);
1679 				} else if (old_gid == gid) {
1680 					switch (gid_flags & MDF_SUPP_MASK) {
1681 					case MDF_SUPP_DONT:
1682 						/* Present but forbidden. */
1683 						return (EPERM);
1684 					case MDF_SUPP_ALLOW:
1685 					case MDF_SUPP_MUST:
1686 						may_accept = true;
1687 						break;
1688 					default:
1689 #ifdef INVARIANTS
1690 						__assert_unreachable();
1691 #else
1692 						/* Better be safe than sorry. */
1693 						return (EPERM);
1694 #endif
1695 					}
1696 					++old_idx;
1697 					break;
1698 				}
1699 				else
1700 					break;
1701 			}
1702 		}
1703 
1704 		/*
1705 		 * Search by GID for a corresponding 'struct id_spec'.
1706 		 *
1707 		 * Again, linear search, with same note on not using binary
1708 		 * search optimization as above (the trigger would be absence of
1709 		 * MDF_EXPLICIT_SUPP_MUST this time).
1710 		 */
1711 		for (; rule_idx < rule->gids_nb; ++rule_idx) {
1712 			const struct id_spec is = rule->gids[rule_idx];
1713 
1714 			if (is.id < gid) {
1715 				/* Mandatory but absent. */
1716 				if ((is.flags & MDF_SUPP_MUST) != 0)
1717 					return (EPERM);
1718 			} else if (is.id == gid) {
1719 				switch (is.flags & MDF_SUPP_MASK) {
1720 				case MDF_SUPP_DONT:
1721 					/* Present but forbidden. */
1722 					return (EPERM);
1723 				case MDF_SUPP_ALLOW:
1724 				case MDF_SUPP_MUST:
1725 					may_accept = true;
1726 					break;
1727 				case 0:
1728 					/* Primary group only. */
1729 					break;
1730 				default:
1731 #ifdef INVARIANTS
1732 					__assert_unreachable();
1733 #else
1734 					/* Better be safe than sorry. */
1735 					return (EPERM);
1736 #endif
1737 				}
1738 				++rule_idx;
1739 				break;
1740 			}
1741 			else
1742 				break;
1743 		}
1744 
1745 		/* 'gid' wasn't explicitly accepted. */
1746 		if (!may_accept)
1747 			return (EPERM);
1748 	}
1749 
1750 	/*
1751 	 * If we must have all current groups and we didn't browse all
1752 	 * of them at this point (because the remaining ones have GIDs
1753 	 * greater than the last requested group), we are simply missing
1754 	 * them.
1755 	 */
1756 	if ((gid_flags & MDF_CURRENT) != 0 &&
1757 	    (gid_flags & MDF_SUPP_MUST) != 0 &&
1758 	    old_idx < old_ngroups)
1759 		return (EPERM);
1760 	/*
1761 	 * Similarly, we have to finish browsing all GIDs from the rule
1762 	 * in case some are marked mandatory.
1763 	 */
1764 	if ((gid_flags & MDF_EXPLICIT_SUPP_MUST) != 0) {
1765 		for (; rule_idx < rule->gids_nb; ++rule_idx) {
1766 			const struct id_spec is = rule->gids[rule_idx];
1767 
1768 			if ((is.flags & MDF_SUPP_MUST) != 0)
1769 				return (EPERM);
1770 		}
1771 	}
1772 
1773 	return (0);
1774 }
1775 
1776 static int
1777 rule_grant_primary_group(const struct rule *const rule,
1778     const struct ucred *const old_cred, const gid_t gid)
1779 {
1780 	struct id_spec gid_is = {.flags = 0};
1781 	const struct id_spec *found_is;
1782 	int error;
1783 
1784 	if ((rule->gid_flags & MDF_ANY) != 0)
1785 		return (0);
1786 
1787 	/* Was MDF_CURRENT specified, and is 'gid' a current GID? */
1788 	if ((rule->gid_flags & MDF_CURRENT) != 0 &&
1789 	    group_is_primary(gid, old_cred)) {
1790 		error = grant_primary_group_from_flags(rule->gid_flags);
1791 		if (error == 0)
1792 			return (0);
1793 	}
1794 
1795 	/* Search by GID for a corresponding 'struct id_spec'. */
1796 	gid_is.id = gid;
1797 	found_is = bsearch(&gid_is, rule->gids, rule->gids_nb,
1798 	    sizeof(*rule->gids), id_spec_cmp);
1799 
1800 	if (found_is != NULL) {
1801 		error = grant_primary_group_from_flags(found_is->flags);
1802 		if (error == 0)
1803 			return (0);
1804 	}
1805 
1806 	return (EPERM);
1807 }
1808 
1809 static int
1810 rule_grant_primary_groups(const struct rule *const rule,
1811     const struct ucred *const old_cred, const struct ucred *const new_cred)
1812 {
1813 	int error;
1814 
1815 	/* Shortcut. */
1816 	if ((rule->gid_flags & MDF_ANY) != 0)
1817 		return (0);
1818 
1819 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_gid);
1820 	if (error != 0)
1821 		return (error);
1822 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_rgid);
1823 	if (error != 0)
1824 		return (error);
1825 	error = rule_grant_primary_group(rule, old_cred, new_cred->cr_svgid);
1826 	if (error != 0)
1827 		return (error);
1828 	return (0);
1829 }
1830 
1831 static bool
1832 user_is_current(const uid_t uid, const struct ucred *const old_cred)
1833 {
1834 	return (uid == old_cred->cr_uid || uid == old_cred->cr_ruid ||
1835 	    uid == old_cred->cr_svuid);
1836 }
1837 
1838 static int
1839 rule_grant_user(const struct rule *const rule,
1840     const struct ucred *const old_cred, const uid_t uid)
1841 {
1842 	struct id_spec uid_is = {.flags = 0};
1843 	const struct id_spec *found_is;
1844 
1845 	if ((rule->uid_flags & MDF_ANY) != 0)
1846 		return (0);
1847 
1848 	/* Was MDF_CURRENT specified, and is 'uid' a current UID? */
1849 	if ((rule->uid_flags & MDF_CURRENT) != 0 &&
1850 	    user_is_current(uid, old_cred))
1851 		return (0);
1852 
1853 	/* Search by UID for a corresponding 'struct id_spec'. */
1854 	uid_is.id = uid;
1855 	found_is = bsearch(&uid_is, rule->uids, rule->uids_nb,
1856 	    sizeof(*rule->uids), id_spec_cmp);
1857 
1858 	if (found_is != NULL)
1859 		return (0);
1860 
1861 	return (EPERM);
1862 }
1863 
1864 static int
1865 rule_grant_users(const struct rule *const rule,
1866     const struct ucred *const old_cred, const struct ucred *const new_cred)
1867 {
1868 	int error;
1869 
1870 	/* Shortcut. */
1871 	if ((rule->uid_flags & MDF_ANY) != 0)
1872 		return (0);
1873 
1874 	error = rule_grant_user(rule, old_cred, new_cred->cr_uid);
1875 	if (error != 0)
1876 		return (error);
1877 	error = rule_grant_user(rule, old_cred, new_cred->cr_ruid);
1878 	if (error != 0)
1879 		return (error);
1880 	error = rule_grant_user(rule, old_cred, new_cred->cr_svuid);
1881 	if (error != 0)
1882 		return (error);
1883 
1884 	return (0);
1885 }
1886 
1887 static int
1888 rule_grant_setcred(const struct rule *const rule,
1889     const struct ucred *const old_cred, const struct ucred *const new_cred)
1890 {
1891 	int error;
1892 
1893 	error = rule_grant_users(rule, old_cred, new_cred);
1894 	if (error != 0)
1895 		return (error);
1896 	error = rule_grant_primary_groups(rule, old_cred, new_cred);
1897 	if (error != 0)
1898 		return (error);
1899 	error = rule_grant_supplementary_groups(rule, old_cred, new_cred);
1900 	if (error != 0)
1901 		return (error);
1902 
1903 	return (0);
1904 }
1905 
1906 static bool
1907 rule_applies(const struct rule *const rule, const struct ucred *const cred)
1908 {
1909 	if (rule->from_type == IT_UID && rule->from_id == cred->cr_uid)
1910 		return (true);
1911 	if (rule->from_type == IT_GID && groupmember(rule->from_id, cred))
1912 		return (true);
1913 	return (false);
1914 }
1915 
1916 /*
1917  * To pass data between check_setcred() and priv_grant() (on PRIV_CRED_SETCRED).
1918  */
1919 struct mac_do_setcred_data {
1920 	struct mac_do_data_header hdr;
1921 	const struct ucred *new_cred;
1922 	u_int setcred_flags;
1923 };
1924 
1925 static int
1926 mac_do_priv_grant(struct ucred *cred, int priv)
1927 {
1928 	struct mac_do_setcred_data *const data = fetch_data();
1929 	const struct rules *rules;
1930 	const struct ucred *new_cred;
1931 	const struct rule *rule;
1932 	u_int setcred_flags;
1933 	int error;
1934 
1935 	/* Bail out fast if we aren't concerned. */
1936 	if (priv != PRIV_CRED_SETCRED)
1937 		return (EPERM);
1938 
1939 	/*
1940 	 * Do we have to do something?
1941 	 */
1942 	if (check_data_usable(data, sizeof(*data), priv) != 0)
1943 		/* No. */
1944 		return (EPERM);
1945 
1946 	rules = data->hdr.rules;
1947 	new_cred = data->new_cred;
1948 	KASSERT(new_cred != NULL,
1949 	    ("priv_check*() called before mac_cred_check_setcred()"));
1950 	setcred_flags = data->setcred_flags;
1951 
1952 	/*
1953 	 * Explicitly check that only the flags we currently support are present
1954 	 * in order to avoid accepting transitions with other changes than those
1955 	 * we are actually going to check.  Currently, this rules out the
1956 	 * SETCREDF_MAC_LABEL flag.  This may be improved by adding code
1957 	 * actually checking whether the requested label and the current one
1958 	 * would differ.
1959 	 */
1960 	if ((setcred_flags & ~(SETCREDF_UID | SETCREDF_RUID | SETCREDF_SVUID |
1961 	    SETCREDF_GID | SETCREDF_RGID | SETCREDF_SVGID |
1962 	    SETCREDF_SUPP_GROUPS)) != 0)
1963 		return (EPERM);
1964 
1965 	/*
1966 	 * Browse rules, and for those that match the requestor, call specific
1967 	 * privilege granting functions interpreting the "to"/"target" part.
1968 	 */
1969 	error = EPERM;
1970 	TAILQ_FOREACH(rule, &rules->head, r_entries)
1971 	    if (rule_applies(rule, cred)) {
1972 		    error = rule_grant_setcred(rule, cred, new_cred);
1973 		    if (error != EPERM)
1974 			    break;
1975 	    }
1976 
1977 	return (error);
1978 }
1979 
1980 static int
1981 check_proc(void)
1982 {
1983 	char *path, *to_free;
1984 	int error;
1985 
1986 	/*
1987 	 * Only grant privileges if requested by the right executable.
1988 	 *
1989 	 * XXXOC: We may want to base this check on a tunable path and/or
1990 	 * a specific MAC label.  Going even further, e.g., envisioning to
1991 	 * completely replace the path check with the latter, we would need to
1992 	 * install FreeBSD on a FS with multilabel enabled by default, which in
1993 	 * practice entails adding an option to ZFS to set MNT_MULTILABEL
1994 	 * automatically on mounts, ensuring that root (and more if using
1995 	 * different partitions) ZFS or UFS filesystems are created with
1996 	 * multilabel turned on, and having the installation procedure support
1997 	 * setting a MAC label per file (perhaps via additions to mtree(1)).  So
1998 	 * this probably isn't going to happen overnight, if ever.
1999 	 */
2000 	if (vn_fullpath(curproc->p_textvp, &path, &to_free) != 0)
2001 		return (EPERM);
2002 	error = strcmp(path, "/usr/bin/mdo") == 0 ? 0 : EPERM;
2003 	free(to_free, M_TEMP);
2004 	return (error);
2005 }
2006 
2007 static void
2008 mac_do_setcred_enter(void)
2009 {
2010 	struct rules *rules;
2011 	struct prison *pr;
2012 	struct mac_do_setcred_data * data;
2013 	int error;
2014 
2015 	/*
2016 	 * If not enabled, don't prepare data.  Other hooks will check for that
2017 	 * to know if they have to do something.
2018 	 */
2019 	if (do_enabled == 0)
2020 		return;
2021 
2022 	/*
2023 	 * MAC/do only applies to a process launched from a given executable.
2024 	 * For other processes, we just won't intervene (we don't deny requests,
2025 	 * nor do we grant privileges to them).
2026 	 */
2027 	error = check_proc();
2028 	if (error != 0)
2029 		return;
2030 
2031 	/*
2032 	 * Find the currently applicable rules.
2033 	 */
2034 	rules = find_rules(curproc->p_ucred->cr_prison, &pr);
2035 	hold_rules(rules);
2036 	prison_unlock(pr);
2037 
2038 	/*
2039 	 * Setup thread data to be used by other hooks.
2040 	 */
2041 	data = fetch_data();
2042 	if (!is_data_reusable(data, sizeof(*data)))
2043 		data = alloc_data(data, sizeof(*data));
2044 	set_data_header(data, sizeof(*data), PRIV_CRED_SETCRED, rules);
2045 	/* Not really necessary, but helps to catch programming errors. */
2046 	data->new_cred = NULL;
2047 	data->setcred_flags = 0;
2048 }
2049 
2050 static int
2051 mac_do_check_setcred(u_int flags, const struct ucred *const old_cred,
2052     struct ucred *const new_cred)
2053 {
2054 	struct mac_do_setcred_data *const data = fetch_data();
2055 
2056 	/*
2057 	 * Do we have to do something?
2058 	 */
2059 	if (check_data_usable(data, sizeof(*data), PRIV_CRED_SETCRED) != 0)
2060 		/* No. */
2061 		return (0);
2062 
2063 	/*
2064 	 * Keep track of the setcred() flags and the new credentials for
2065 	 * priv_check*().
2066 	 */
2067 	data->new_cred = new_cred;
2068 	data->setcred_flags = flags;
2069 
2070 	return (0);
2071 }
2072 
2073 static void
2074 mac_do_setcred_exit(void)
2075 {
2076 	struct mac_do_setcred_data *const data = fetch_data();
2077 
2078 	if (check_data_usable(data, sizeof(*data), PRIV_CRED_SETCRED) == 0)
2079 		/*
2080 		 * This doesn't deallocate the small per-thread data storage,
2081 		 * which can be reused on subsequent calls.  (That data is of
2082 		 * course deallocated as the current thread dies or this module
2083 		 * is unloaded.)
2084 		 */
2085 		clear_data(data);
2086 }
2087 
2088 static void
2089 mac_do_init(struct mac_policy_conf *mpc)
2090 {
2091 	struct prison *pr;
2092 
2093 	osd_jail_slot = osd_jail_register(dealloc_jail_osd, osd_methods);
2094 	set_empty_rules(&prison0);
2095 	sx_slock(&allprison_lock);
2096 	TAILQ_FOREACH(pr, &allprison, pr_list)
2097 	    set_empty_rules(pr);
2098 	sx_sunlock(&allprison_lock);
2099 
2100 	osd_thread_slot = osd_thread_register(dealloc_thread_osd);
2101 }
2102 
2103 static void
2104 mac_do_destroy(struct mac_policy_conf *mpc)
2105 {
2106 	/*
2107 	 * osd_thread_deregister() must be called before osd_jail_deregister(),
2108 	 * for the reason explained in dealloc_jail_osd().
2109 	 */
2110 	osd_thread_deregister(osd_thread_slot);
2111 	osd_jail_deregister(osd_jail_slot);
2112 }
2113 
2114 static struct mac_policy_ops do_ops = {
2115 	.mpo_init = mac_do_init,
2116 	.mpo_destroy = mac_do_destroy,
2117 	.mpo_cred_setcred_enter = mac_do_setcred_enter,
2118 	.mpo_cred_check_setcred = mac_do_check_setcred,
2119 	.mpo_cred_setcred_exit = mac_do_setcred_exit,
2120 	.mpo_priv_grant = mac_do_priv_grant,
2121 };
2122 
2123 MAC_POLICY_SET(&do_ops, mac_do, "MAC/do", MPC_LOADTIME_FLAG_UNLOADOK, NULL);
2124 MODULE_VERSION(mac_do, 1);
2125