xref: /freebsd/usr.sbin/jail/command.c (revision 63f537551380d2dab29fa402ad1269feae17e594)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 James Gritton
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/types.h>
31 #include <sys/cpuset.h>
32 #include <sys/event.h>
33 #include <sys/mount.h>
34 #include <sys/stat.h>
35 #include <sys/sysctl.h>
36 #include <sys/user.h>
37 #include <sys/wait.h>
38 
39 #include <err.h>
40 #include <errno.h>
41 #include <fcntl.h>
42 #include <kvm.h>
43 #include <login_cap.h>
44 #include <paths.h>
45 #include <pwd.h>
46 #include <signal.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <vis.h>
52 
53 #include "jailp.h"
54 
55 #define DEFAULT_STOP_TIMEOUT	10
56 #define PHASH_SIZE		256
57 
58 LIST_HEAD(phhead, phash);
59 
60 struct phash {
61 	LIST_ENTRY(phash)	le;
62 	struct cfjail		*j;
63 	pid_t			pid;
64 };
65 
66 int paralimit = -1;
67 
68 extern char **environ;
69 
70 static int run_command(struct cfjail *j);
71 static int add_proc(struct cfjail *j, pid_t pid);
72 static void clear_procs(struct cfjail *j);
73 static struct cfjail *find_proc(pid_t pid);
74 static int term_procs(struct cfjail *j);
75 static int get_user_info(struct cfjail *j, const char *username,
76     const struct passwd **pwdp, login_cap_t **lcapp);
77 static int check_path(struct cfjail *j, const char *pname, const char *path,
78     int isfile, const char *umount_type);
79 
80 static struct cfjails sleeping = TAILQ_HEAD_INITIALIZER(sleeping);
81 static struct cfjails runnable = TAILQ_HEAD_INITIALIZER(runnable);
82 static struct cfstring dummystring = { .len = 1 };
83 static struct phhead phash[PHASH_SIZE];
84 static int kq;
85 
86 static cpusetid_t
87 root_cpuset_id(void)
88 {
89 	static cpusetid_t setid = CPUSET_INVALID;
90 	static int error;
91 
92 	/* Only try to get the cpuset once. */
93 	if (error == 0 && setid == CPUSET_INVALID)
94 		error = cpuset_getid(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, &setid);
95 	if (error != 0)
96 		return (CPUSET_INVALID);
97 	return (setid);
98 }
99 
100 /*
101  * Run the next command associated with a jail.
102  */
103 int
104 next_command(struct cfjail *j)
105 {
106 	enum intparam comparam;
107 	int create_failed, stopping;
108 
109 	if (paralimit == 0) {
110 		if (j->flags & JF_FROM_RUNQ)
111 			requeue_head(j, &runnable);
112 		else
113 			requeue(j, &runnable);
114 		return 1;
115 	}
116 	j->flags &= ~JF_FROM_RUNQ;
117 	create_failed = (j->flags & (JF_STOP | JF_FAILED)) == JF_FAILED;
118 	stopping = (j->flags & JF_STOP) != 0;
119 	comparam = *j->comparam;
120 	for (;;) {
121 		if (j->comstring == NULL) {
122 			j->comparam += create_failed ? -1 : 1;
123 			switch ((comparam = *j->comparam)) {
124 			case IP__NULL:
125 				return 0;
126 			case IP_MOUNT_DEVFS:
127 				if (!bool_param(j->intparams[IP_MOUNT_DEVFS]))
128 					continue;
129 				j->comstring = &dummystring;
130 				break;
131 			case IP_MOUNT_FDESCFS:
132 				if (!bool_param(j->intparams[IP_MOUNT_FDESCFS]))
133 					continue;
134 				j->comstring = &dummystring;
135 				break;
136 			case IP_MOUNT_PROCFS:
137 				if (!bool_param(j->intparams[IP_MOUNT_PROCFS]))
138 					continue;
139 				j->comstring = &dummystring;
140 				break;
141 			case IP__OP:
142 			case IP_STOP_TIMEOUT:
143 				j->comstring = &dummystring;
144 				break;
145 			default:
146 				if (j->intparams[comparam] == NULL)
147 					continue;
148 				j->comstring = create_failed || (stopping &&
149 				    (j->intparams[comparam]->flags & PF_REV))
150 				    ? TAILQ_LAST(&j->intparams[comparam]->val,
151 					cfstrings)
152 				    : TAILQ_FIRST(&j->intparams[comparam]->val);
153 			}
154 		} else {
155 			j->comstring = j->comstring == &dummystring ? NULL :
156 			    create_failed || (stopping &&
157 			    (j->intparams[comparam]->flags & PF_REV))
158 			    ? TAILQ_PREV(j->comstring, cfstrings, tq)
159 			    : TAILQ_NEXT(j->comstring, tq);
160 		}
161 		if (j->comstring == NULL || j->comstring->len == 0 ||
162 		    (create_failed && (comparam == IP_EXEC_PRESTART ||
163 		    comparam == IP_EXEC_CREATED || comparam == IP_EXEC_START ||
164 		    comparam == IP_COMMAND || comparam == IP_EXEC_POSTSTART ||
165 		    comparam == IP_EXEC_PREPARE)))
166 			continue;
167 		switch (run_command(j)) {
168 		case -1:
169 			failed(j);
170 			/* FALLTHROUGH */
171 		case 1:
172 			return 1;
173 		}
174 	}
175 }
176 
177 /*
178  * Check command exit status
179  */
180 int
181 finish_command(struct cfjail *j)
182 {
183 	struct cfjail *rj;
184 	int error;
185 
186 	if (!(j->flags & JF_SLEEPQ))
187 		return 0;
188 	j->flags &= ~JF_SLEEPQ;
189 	if (*j->comparam == IP_STOP_TIMEOUT) {
190 		j->flags &= ~JF_TIMEOUT;
191 		j->pstatus = 0;
192 		return 0;
193 	}
194 	paralimit++;
195 	if (!TAILQ_EMPTY(&runnable)) {
196 		rj = TAILQ_FIRST(&runnable);
197 		rj->flags |= JF_FROM_RUNQ;
198 		requeue(rj, &ready);
199 	}
200 	error = 0;
201 	if (j->flags & JF_TIMEOUT) {
202 		j->flags &= ~JF_TIMEOUT;
203 		if (*j->comparam != IP_STOP_TIMEOUT) {
204 			jail_warnx(j, "%s: timed out", j->comline);
205 			failed(j);
206 			error = -1;
207 		} else if (verbose > 0)
208 			jail_note(j, "timed out\n");
209 	} else if (j->pstatus != 0) {
210 		if (WIFSIGNALED(j->pstatus))
211 			jail_warnx(j, "%s: exited on signal %d",
212 			    j->comline, WTERMSIG(j->pstatus));
213 		else
214 			jail_warnx(j, "%s: failed", j->comline);
215 		j->pstatus = 0;
216 		failed(j);
217 		error = -1;
218 	}
219 	free(j->comline);
220 	j->comline = NULL;
221 	return error;
222 }
223 
224 /*
225  * Check for finished processes or timeouts.
226  */
227 struct cfjail *
228 next_proc(int nonblock)
229 {
230 	struct kevent ke;
231 	struct timespec ts;
232 	struct timespec *tsp;
233 	struct cfjail *j;
234 
235 	if (!TAILQ_EMPTY(&sleeping)) {
236 	again:
237 		tsp = NULL;
238 		if ((j = TAILQ_FIRST(&sleeping)) && j->timeout.tv_sec) {
239 			clock_gettime(CLOCK_REALTIME, &ts);
240 			ts.tv_sec = j->timeout.tv_sec - ts.tv_sec;
241 			ts.tv_nsec = j->timeout.tv_nsec - ts.tv_nsec;
242 			if (ts.tv_nsec < 0) {
243 				ts.tv_sec--;
244 				ts.tv_nsec += 1000000000;
245 			}
246 			if (ts.tv_sec < 0 ||
247 			    (ts.tv_sec == 0 && ts.tv_nsec == 0)) {
248 				j->flags |= JF_TIMEOUT;
249 				clear_procs(j);
250 				return j;
251 			}
252 			tsp = &ts;
253 		}
254 		if (nonblock) {
255 			ts.tv_sec = 0;
256 			ts.tv_nsec = 0;
257 			tsp = &ts;
258 		}
259 		switch (kevent(kq, NULL, 0, &ke, 1, tsp)) {
260 		case -1:
261 			if (errno != EINTR)
262 				err(1, "kevent");
263 			goto again;
264 		case 0:
265 			if (!nonblock) {
266 				j = TAILQ_FIRST(&sleeping);
267 				j->flags |= JF_TIMEOUT;
268 				clear_procs(j);
269 				return j;
270 			}
271 			break;
272 		case 1:
273 			(void)waitpid(ke.ident, NULL, WNOHANG);
274 			if ((j = find_proc(ke.ident))) {
275 				j->pstatus = ke.data;
276 				return j;
277 			}
278 			goto again;
279 		}
280 	}
281 	return NULL;
282 }
283 
284 /*
285  * Run a single command for a jail, possibly inside the jail.
286  */
287 static int
288 run_command(struct cfjail *j)
289 {
290 	const struct passwd *pwd;
291 	const struct cfstring *comstring, *s;
292 	login_cap_t *lcap;
293 	const char **argv;
294 	char *acs, *cs, *comcs, *devpath;
295 	const char *jidstr, *conslog, *path, *ruleset, *term, *username;
296 	enum intparam comparam;
297 	size_t comlen;
298 	pid_t pid;
299 	cpusetid_t setid;
300 	int argc, bg, clean, consfd, down, fib, i, injail, sjuser, timeout;
301 #if defined(INET) || defined(INET6)
302 	char *addr, *extrap, *p, *val;
303 #endif
304 
305 	static char *cleanenv;
306 
307 	/* Perform some operations that aren't actually commands */
308 	comparam = *j->comparam;
309 	down = j->flags & (JF_STOP | JF_FAILED);
310 	switch (comparam) {
311 	case IP_STOP_TIMEOUT:
312 		return term_procs(j);
313 
314 	case IP__OP:
315 		if (down) {
316 			if (jail_remove(j->jid) < 0 && errno == EPERM) {
317 				jail_warnx(j, "jail_remove: %s",
318 					   strerror(errno));
319 				return -1;
320 			}
321 			if (verbose > 0 || (verbose == 0 && (j->flags & JF_STOP
322 			    ? note_remove : j->name != NULL)))
323 			    jail_note(j, "removed\n");
324 			j->jid = -1;
325 			if (j->flags & JF_STOP)
326 				dep_done(j, DF_LIGHT);
327 			else
328 				j->flags &= ~JF_PERSIST;
329 		} else {
330 			if (create_jail(j) < 0)
331 				return -1;
332 			if (iflag)
333 				printf("%d\n", j->jid);
334 			if (verbose >= 0 && (j->name || verbose > 0))
335 				jail_note(j, "created\n");
336 			dep_done(j, DF_LIGHT);
337 		}
338 		return 0;
339 
340 	default: ;
341 	}
342 	/*
343 	 * Collect exec arguments.  Internal commands for network and
344 	 * mounting build their own argument lists.
345 	 */
346 	comstring = j->comstring;
347 	bg = 0;
348 	switch (comparam) {
349 #ifdef INET
350 	case IP__IP4_IFADDR:
351 		argc = 0;
352 		val = alloca(strlen(comstring->s) + 1);
353 		strcpy(val, comstring->s);
354 		cs = val;
355 		extrap = NULL;
356 		while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
357 			if (extrap == NULL) {
358 				*p = '\0';
359 				extrap = p + 1;
360 			}
361 			cs = p + 1;
362 			argc++;
363 		}
364 
365 		argv = alloca((8 + argc) * sizeof(char *));
366 		argv[0] = _PATH_IFCONFIG;
367 		if ((cs = strchr(val, '|'))) {
368 			argv[1] = acs = alloca(cs - val + 1);
369 			strlcpy(acs, val, cs - val + 1);
370 			addr = cs + 1;
371 		} else {
372 			argv[1] = string_param(j->intparams[IP_INTERFACE]);
373 			addr = val;
374 		}
375 		argv[2] = "inet";
376 		if (!(cs = strchr(addr, '/'))) {
377 			argv[3] = addr;
378 			argv[4] = "netmask";
379 			argv[5] = "255.255.255.255";
380 			argc = 6;
381 		} else if (strchr(cs + 1, '.')) {
382 			argv[3] = acs = alloca(cs - addr + 1);
383 			strlcpy(acs, addr, cs - addr + 1);
384 			argv[4] = "netmask";
385 			argv[5] = cs + 1;
386 			argc = 6;
387 		} else {
388 			argv[3] = addr;
389 			argc = 4;
390 		}
391 
392 		if (!down && extrap != NULL) {
393 			for (cs = strtok(extrap, " "); cs;
394 			     cs = strtok(NULL, " ")) {
395 				size_t len = strlen(cs) + 1;
396 				argv[argc++] = acs = alloca(len);
397 				strlcpy(acs, cs, len);
398 			}
399 		}
400 
401 		argv[argc] = down ? "-alias" : "alias";
402 		argv[argc + 1] = NULL;
403 		break;
404 #endif
405 
406 #ifdef INET6
407 	case IP__IP6_IFADDR:
408 		argc = 0;
409 		val = alloca(strlen(comstring->s) + 1);
410 		strcpy(val, comstring->s);
411 		cs = val;
412 		extrap = NULL;
413 		while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
414 			if (extrap == NULL) {
415 				*p = '\0';
416 				extrap = p + 1;
417 			}
418 			cs = p + 1;
419 			argc++;
420 		}
421 
422 		argv = alloca((8 + argc) * sizeof(char *));
423 		argv[0] = _PATH_IFCONFIG;
424 		if ((cs = strchr(val, '|'))) {
425 			argv[1] = acs = alloca(cs - val + 1);
426 			strlcpy(acs, val, cs - val + 1);
427 			addr = cs + 1;
428 		} else {
429 			argv[1] = string_param(j->intparams[IP_INTERFACE]);
430 			addr = val;
431 		}
432 		argv[2] = "inet6";
433 		argv[3] = addr;
434 		if (!(cs = strchr(addr, '/'))) {
435 			argv[4] = "prefixlen";
436 			argv[5] = "128";
437 			argc = 6;
438 		} else
439 			argc = 4;
440 
441 		if (!down && extrap != NULL) {
442 			for (cs = strtok(extrap, " "); cs;
443 			     cs = strtok(NULL, " ")) {
444 				size_t len = strlen(cs) + 1;
445 				argv[argc++] = acs = alloca(len);
446 				strlcpy(acs, cs, len);
447 			}
448 		}
449 
450 		argv[argc] = down ? "-alias" : "alias";
451 		argv[argc + 1] = NULL;
452 		break;
453 #endif
454 
455 	case IP_VNET_INTERFACE:
456 		argv = alloca(5 * sizeof(char *));
457 		argv[0] = _PATH_IFCONFIG;
458 		argv[1] = comstring->s;
459 		argv[2] = down ? "-vnet" : "vnet";
460 		jidstr = string_param(j->intparams[KP_JID]);
461 		argv[3] = jidstr ? jidstr : string_param(j->intparams[KP_NAME]);
462 		argv[4] = NULL;
463 		break;
464 
465 	case IP_MOUNT:
466 	case IP__MOUNT_FROM_FSTAB:
467 		argv = alloca(8 * sizeof(char *));
468 		comcs = alloca(comstring->len + 1);
469 		strcpy(comcs, comstring->s);
470 		argc = 0;
471 		for (cs = strtok(comcs, " \t\f\v\r\n"); cs && argc < 4;
472 		     cs = strtok(NULL, " \t\f\v\r\n")) {
473 			if (argc <= 1 && strunvis(cs, cs) < 0) {
474 				jail_warnx(j, "%s: %s: fstab parse error",
475 				    j->intparams[comparam]->name, comstring->s);
476 				return -1;
477 			}
478 			argv[argc++] = cs;
479 		}
480 		if (argc == 0)
481 			return 0;
482 		if (argc < 3) {
483 			jail_warnx(j, "%s: %s: missing information",
484 			    j->intparams[comparam]->name, comstring->s);
485 			return -1;
486 		}
487 		if (check_path(j, j->intparams[comparam]->name, argv[1], 0,
488 		    down ? argv[2] : NULL) < 0)
489 			return -1;
490 		if (down) {
491 			argv[4] = NULL;
492 			argv[3] = argv[1];
493 			argv[0] = "/sbin/umount";
494 		} else {
495 			if (argc == 4) {
496 				argv[7] = NULL;
497 				argv[6] = argv[1];
498 				argv[5] = argv[0];
499 				argv[4] = argv[3];
500 				argv[3] = "-o";
501 			} else {
502 				argv[5] = NULL;
503 				argv[4] = argv[1];
504 				argv[3] = argv[0];
505 			}
506 			argv[0] = _PATH_MOUNT;
507 		}
508 		argv[1] = "-t";
509 		break;
510 
511 	case IP_MOUNT_DEVFS:
512 		argv = alloca(7 * sizeof(char *));
513 		path = string_param(j->intparams[KP_PATH]);
514 		if (path == NULL) {
515 			jail_warnx(j, "mount.devfs: no jail root path defined");
516 			return -1;
517 		}
518 		devpath = alloca(strlen(path) + 5);
519 		sprintf(devpath, "%s/dev", path);
520 		if (check_path(j, "mount.devfs", devpath, 0,
521 		    down ? "devfs" : NULL) < 0)
522 			return -1;
523 		if (down) {
524 			argv[0] = "/sbin/umount";
525 			argv[1] = devpath;
526 			argv[2] = NULL;
527 		} else {
528 			argv[0] = _PATH_MOUNT;
529 			argv[1] = "-t";
530 			argv[2] = "devfs";
531 			ruleset = string_param(j->intparams[KP_DEVFS_RULESET]);
532 			if (!ruleset)
533 			    ruleset = "4";	/* devfsrules_jail */
534 			argv[3] = acs = alloca(11 + strlen(ruleset));
535 			sprintf(acs, "-oruleset=%s", ruleset);
536 			argv[4] = ".";
537 			argv[5] = devpath;
538 			argv[6] = NULL;
539 		}
540 		break;
541 
542 	case IP_MOUNT_FDESCFS:
543 		argv = alloca(7 * sizeof(char *));
544 		path = string_param(j->intparams[KP_PATH]);
545 		if (path == NULL) {
546 			jail_warnx(j, "mount.fdescfs: no jail root path defined");
547 			return -1;
548 		}
549 		devpath = alloca(strlen(path) + 8);
550 		sprintf(devpath, "%s/dev/fd", path);
551 		if (check_path(j, "mount.fdescfs", devpath, 0,
552 		    down ? "fdescfs" : NULL) < 0)
553 			return -1;
554 		if (down) {
555 			argv[0] = "/sbin/umount";
556 			argv[1] = devpath;
557 			argv[2] = NULL;
558 		} else {
559 			argv[0] = _PATH_MOUNT;
560 			argv[1] = "-t";
561 			argv[2] = "fdescfs";
562 			argv[3] = ".";
563 			argv[4] = devpath;
564 			argv[5] = NULL;
565 		}
566 		break;
567 
568 	case IP_MOUNT_PROCFS:
569 		argv = alloca(7 * sizeof(char *));
570 		path = string_param(j->intparams[KP_PATH]);
571 		if (path == NULL) {
572 			jail_warnx(j, "mount.procfs: no jail root path defined");
573 			return -1;
574 		}
575 		devpath = alloca(strlen(path) + 6);
576 		sprintf(devpath, "%s/proc", path);
577 		if (check_path(j, "mount.procfs", devpath, 0,
578 		    down ? "procfs" : NULL) < 0)
579 			return -1;
580 		if (down) {
581 			argv[0] = "/sbin/umount";
582 			argv[1] = devpath;
583 			argv[2] = NULL;
584 		} else {
585 			argv[0] = _PATH_MOUNT;
586 			argv[1] = "-t";
587 			argv[2] = "procfs";
588 			argv[3] = ".";
589 			argv[4] = devpath;
590 			argv[5] = NULL;
591 		}
592 		break;
593 
594 	case IP_COMMAND:
595 		if (j->name != NULL)
596 			goto default_command;
597 		argc = 0;
598 		TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
599 			argc++;
600 		argv = alloca((argc + 1) * sizeof(char *));
601 		argc = 0;
602 		TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
603 			argv[argc++] = s->s;
604 		argv[argc] = NULL;
605 		j->comstring = &dummystring;
606 		break;
607 
608 	default:
609 	default_command:
610 		if ((cs = strpbrk(comstring->s, "!\"$&'()*;<>?[\\]`{|}~")) &&
611 		    !(cs[0] == '&' && cs[1] == '\0')) {
612 			argv = alloca(4 * sizeof(char *));
613 			argv[0] = _PATH_BSHELL;
614 			argv[1] = "-c";
615 			argv[2] = comstring->s;
616 			argv[3] = NULL;
617 		} else {
618 			if (cs) {
619 				*cs = 0;
620 				bg = 1;
621 			}
622 			comcs = alloca(comstring->len + 1);
623 			strcpy(comcs, comstring->s);
624 			argc = 0;
625 			for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
626 			     cs = strtok(NULL, " \t\f\v\r\n"))
627 				argc++;
628 			argv = alloca((argc + 1) * sizeof(char *));
629 			strcpy(comcs, comstring->s);
630 			argc = 0;
631 			for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
632 			     cs = strtok(NULL, " \t\f\v\r\n"))
633 				argv[argc++] = cs;
634 			argv[argc] = NULL;
635 		}
636 	}
637 	if (argv[0] == NULL)
638 		return 0;
639 
640 	if (int_param(j->intparams[IP_EXEC_TIMEOUT], &timeout) &&
641 	    timeout != 0) {
642 		clock_gettime(CLOCK_REALTIME, &j->timeout);
643 		j->timeout.tv_sec += timeout;
644 	} else
645 		j->timeout.tv_sec = 0;
646 
647 	injail = comparam == IP_EXEC_START || comparam == IP_COMMAND ||
648 	    comparam == IP_EXEC_STOP;
649 	if (injail)
650 		setid = root_cpuset_id();
651 	else
652 		setid = CPUSET_INVALID;
653 	clean = bool_param(j->intparams[IP_EXEC_CLEAN]);
654 	username = string_param(j->intparams[injail
655 	    ? IP_EXEC_JAIL_USER : IP_EXEC_SYSTEM_USER]);
656 	sjuser = bool_param(j->intparams[IP_EXEC_SYSTEM_JAIL_USER]);
657 
658 	consfd = 0;
659 	if (injail &&
660 	    (conslog = string_param(j->intparams[IP_EXEC_CONSOLELOG]))) {
661 		if (check_path(j, "exec.consolelog", conslog, 1, NULL) < 0)
662 			return -1;
663 		consfd =
664 		    open(conslog, O_WRONLY | O_CREAT | O_APPEND, DEFFILEMODE);
665 		if (consfd < 0) {
666 			jail_warnx(j, "open %s: %s", conslog, strerror(errno));
667 			return -1;
668 		}
669 	}
670 
671 	comlen = 0;
672 	for (i = 0; argv[i]; i++)
673 		comlen += strlen(argv[i]) + 1;
674 	j->comline = cs = emalloc(comlen);
675 	for (i = 0; argv[i]; i++) {
676 		strcpy(cs, argv[i]);
677 		if (argv[i + 1]) {
678 			cs += strlen(argv[i]) + 1;
679 			cs[-1] = ' ';
680 		}
681 	}
682 	if (verbose > 0)
683 		jail_note(j, "run command%s%s%s: %s\n",
684 		    injail ? " in jail" : "", username ? " as " : "",
685 		    username ? username : "", j->comline);
686 
687 	pid = fork();
688 	if (pid < 0)
689 		err(1, "fork");
690 	if (pid > 0) {
691 		if (bg || !add_proc(j, pid)) {
692 			free(j->comline);
693 			j->comline = NULL;
694 			return 0;
695 		} else {
696 			paralimit--;
697 			return 1;
698 		}
699 	}
700 	if (bg)
701 		setsid();
702 
703 	/* Set up the environment and run the command */
704 	pwd = NULL;
705 	lcap = NULL;
706 	if ((clean || username) && injail && sjuser &&
707 	    get_user_info(j, username, &pwd, &lcap) < 0)
708 		exit(1);
709 	if (injail) {
710 		/* jail_attach won't chdir along with its chroot. */
711 		path = string_param(j->intparams[KP_PATH]);
712 		if (path && chdir(path) < 0) {
713 			jail_warnx(j, "chdir %s: %s", path, strerror(errno));
714 			exit(1);
715 		}
716 		if (int_param(j->intparams[IP_EXEC_FIB], &fib) &&
717 		    setfib(fib) < 0) {
718 			jail_warnx(j, "setfib: %s", strerror(errno));
719 			exit(1);
720 		}
721 
722 		/*
723 		 * We wouldn't have specialized our affinity, so just setid to
724 		 * root.  We do this prior to attaching to avoid the kernel
725 		 * having to create a transient cpuset that we'll promptly
726 		 * free up with a reset to the jail's cpuset.
727 		 *
728 		 * This is just a best-effort to use as wide of mask as
729 		 * possible.
730 		 */
731 		if (setid != CPUSET_INVALID)
732 			(void)cpuset_setid(CPU_WHICH_PID, -1, setid);
733 
734 		if (jail_attach(j->jid) < 0) {
735 			jail_warnx(j, "jail_attach: %s", strerror(errno));
736 			exit(1);
737 		}
738 	}
739 	if (clean || username) {
740 		if (!(injail && sjuser) &&
741 		    get_user_info(j, username, &pwd, &lcap) < 0)
742 			exit(1);
743 		if (clean) {
744 			term = getenv("TERM");
745 			environ = &cleanenv;
746 			setenv("PATH", "/bin:/usr/bin", 0);
747 			if (term != NULL)
748 				setenv("TERM", term, 1);
749 		}
750 		if (setgid(pwd->pw_gid) < 0) {
751 			jail_warnx(j, "setgid %d: %s", pwd->pw_gid,
752 			    strerror(errno));
753 			exit(1);
754 		}
755 		if (setusercontext(lcap, pwd, pwd->pw_uid, username
756 		    ? LOGIN_SETALL & ~LOGIN_SETGROUP & ~LOGIN_SETLOGIN
757 		    : LOGIN_SETPATH | LOGIN_SETENV) < 0) {
758 			jail_warnx(j, "setusercontext %s: %s", pwd->pw_name,
759 			    strerror(errno));
760 			exit(1);
761 		}
762 		login_close(lcap);
763 		setenv("USER", pwd->pw_name, 1);
764 		setenv("HOME", pwd->pw_dir, 1);
765 		setenv("SHELL",
766 		    *pwd->pw_shell ? pwd->pw_shell : _PATH_BSHELL, 1);
767 		if (clean && chdir(pwd->pw_dir) < 0) {
768 			jail_warnx(j, "chdir %s: %s",
769 			    pwd->pw_dir, strerror(errno));
770 			exit(1);
771 		}
772 		endpwent();
773 	}
774 
775 	if (consfd != 0 && (dup2(consfd, 1) < 0 || dup2(consfd, 2) < 0)) {
776 		jail_warnx(j, "exec.consolelog: %s", strerror(errno));
777 		exit(1);
778 	}
779 	closefrom(3);
780 	execvp(argv[0], __DECONST(char *const*, argv));
781 	jail_warnx(j, "exec %s: %s", argv[0], strerror(errno));
782 	exit(1);
783 }
784 
785 /*
786  * Add a process to the hash, tied to a jail.
787  */
788 static int
789 add_proc(struct cfjail *j, pid_t pid)
790 {
791 	struct kevent ke;
792 	struct cfjail *tj;
793 	struct phash *ph;
794 
795 	if (!kq && (kq = kqueue()) < 0)
796 		err(1, "kqueue");
797 	EV_SET(&ke, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
798 	if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
799 		if (errno == ESRCH)
800 			return 0;
801 		err(1, "kevent");
802 	}
803 	ph = emalloc(sizeof(struct phash));
804 	ph->j = j;
805 	ph->pid = pid;
806 	LIST_INSERT_HEAD(&phash[pid % PHASH_SIZE], ph, le);
807 	j->nprocs++;
808 	j->flags |= JF_SLEEPQ;
809 	if (j->timeout.tv_sec == 0)
810 		requeue(j, &sleeping);
811 	else {
812 		/* File the jail in the sleep queue according to its timeout. */
813 		TAILQ_REMOVE(j->queue, j, tq);
814 		TAILQ_FOREACH(tj, &sleeping, tq) {
815 			if (!tj->timeout.tv_sec ||
816 			    j->timeout.tv_sec < tj->timeout.tv_sec ||
817 			    (j->timeout.tv_sec == tj->timeout.tv_sec &&
818 			    j->timeout.tv_nsec <= tj->timeout.tv_nsec)) {
819 				TAILQ_INSERT_BEFORE(tj, j, tq);
820 				break;
821 			}
822 		}
823 		if (tj == NULL)
824 			TAILQ_INSERT_TAIL(&sleeping, j, tq);
825 		j->queue = &sleeping;
826 	}
827 	return 1;
828 }
829 
830 /*
831  * Remove any processes from the hash that correspond to a jail.
832  */
833 static void
834 clear_procs(struct cfjail *j)
835 {
836 	struct kevent ke;
837 	struct phash *ph, *tph;
838 	int i;
839 
840 	j->nprocs = 0;
841 	for (i = 0; i < PHASH_SIZE; i++)
842 		LIST_FOREACH_SAFE(ph, &phash[i], le, tph)
843 			if (ph->j == j) {
844 				EV_SET(&ke, ph->pid, EVFILT_PROC, EV_DELETE,
845 				    NOTE_EXIT, 0, NULL);
846 				(void)kevent(kq, &ke, 1, NULL, 0, NULL);
847 				LIST_REMOVE(ph, le);
848 				free(ph);
849 			}
850 }
851 
852 /*
853  * Find the jail that corresponds to an exited process.
854  */
855 static struct cfjail *
856 find_proc(pid_t pid)
857 {
858 	struct cfjail *j;
859 	struct phash *ph;
860 
861 	LIST_FOREACH(ph, &phash[pid % PHASH_SIZE], le)
862 		if (ph->pid == pid) {
863 			j = ph->j;
864 			LIST_REMOVE(ph, le);
865 			free(ph);
866 			return --j->nprocs ? NULL : j;
867 		}
868 	return NULL;
869 }
870 
871 /*
872  * Send SIGTERM to all processes in a jail and wait for them to die.
873  */
874 static int
875 term_procs(struct cfjail *j)
876 {
877 	struct kinfo_proc *ki;
878 	int i, noted, pcnt, timeout;
879 
880 	static kvm_t *kd;
881 
882 	if (!int_param(j->intparams[IP_STOP_TIMEOUT], &timeout))
883 		timeout = DEFAULT_STOP_TIMEOUT;
884 	else if (timeout == 0)
885 		return 0;
886 
887 	if (kd == NULL) {
888 		kd = kvm_open(NULL, NULL, NULL, O_RDONLY, NULL);
889 		if (kd == NULL)
890 			return 0;
891 	}
892 
893 	ki = kvm_getprocs(kd, KERN_PROC_PROC, 0, &pcnt);
894 	if (ki == NULL)
895 		return 0;
896 	noted = 0;
897 	for (i = 0; i < pcnt; i++)
898 		if (ki[i].ki_jid == j->jid &&
899 		    kill(ki[i].ki_pid, SIGTERM) == 0) {
900 			(void)add_proc(j, ki[i].ki_pid);
901 			if (verbose > 0) {
902 				if (!noted) {
903 					noted = 1;
904 					jail_note(j, "sent SIGTERM to:");
905 				}
906 				printf(" %d", ki[i].ki_pid);
907 			}
908 		}
909 	if (noted)
910 		printf("\n");
911 	if (j->nprocs > 0) {
912 		clock_gettime(CLOCK_REALTIME, &j->timeout);
913 		j->timeout.tv_sec += timeout;
914 		return 1;
915 	}
916 	return 0;
917 }
918 
919 /*
920  * Look up a user in the passwd and login.conf files.
921  */
922 static int
923 get_user_info(struct cfjail *j, const char *username,
924     const struct passwd **pwdp, login_cap_t **lcapp)
925 {
926 	const struct passwd *pwd;
927 
928 	errno = 0;
929 	*pwdp = pwd = username ? getpwnam(username) : getpwuid(getuid());
930 	if (pwd == NULL) {
931 		if (errno)
932 			jail_warnx(j, "getpwnam%s%s: %s", username ? " " : "",
933 			    username ? username : "", strerror(errno));
934 		else if (username)
935 			jail_warnx(j, "%s: no such user", username);
936 		else
937 			jail_warnx(j, "unknown uid %d", getuid());
938 		return -1;
939 	}
940 	*lcapp = login_getpwclass(pwd);
941 	if (*lcapp == NULL) {
942 		jail_warnx(j, "getpwclass %s: %s", pwd->pw_name,
943 		    strerror(errno));
944 		return -1;
945 	}
946 	/* Set the groups while the group file is still available */
947 	if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) {
948 		jail_warnx(j, "initgroups %s: %s", pwd->pw_name,
949 		    strerror(errno));
950 		return -1;
951 	}
952 	return 0;
953 }
954 
955 /*
956  * Make sure a mount or consolelog path is a valid absolute pathname
957  * with no symlinks.
958  */
959 static int
960 check_path(struct cfjail *j, const char *pname, const char *path, int isfile,
961     const char *umount_type)
962 {
963 	struct stat st, mpst;
964 	struct statfs stfs;
965 	char *tpath, *p;
966 	const char *jailpath;
967 	size_t jplen;
968 
969 	if (path[0] != '/') {
970 		jail_warnx(j, "%s: %s: not an absolute pathname",
971 		    pname, path);
972 		return -1;
973 	}
974 	/*
975 	 * Only check for symlinks in components below the jail's path,
976 	 * since that's where the security risk lies.
977 	 */
978 	jailpath = string_param(j->intparams[KP_PATH]);
979 	if (jailpath == NULL)
980 		jailpath = "";
981 	jplen = strlen(jailpath);
982 	if (!strncmp(path, jailpath, jplen) && path[jplen] == '/') {
983 		tpath = alloca(strlen(path) + 1);
984 		strcpy(tpath, path);
985 		for (p = tpath + jplen; p != NULL; ) {
986 			p = strchr(p + 1, '/');
987 			if (p)
988 				*p = '\0';
989 			if (lstat(tpath, &st) < 0) {
990 				if (errno == ENOENT && isfile && !p)
991 					break;
992 				jail_warnx(j, "%s: %s: %s", pname, tpath,
993 				    strerror(errno));
994 				return -1;
995 			}
996 			if (S_ISLNK(st.st_mode)) {
997 				jail_warnx(j, "%s: %s is a symbolic link",
998 				    pname, tpath);
999 				return -1;
1000 			}
1001 			if (p)
1002 				*p = '/';
1003 		}
1004 	}
1005 	if (umount_type != NULL) {
1006 		if (stat(path, &st) < 0 || statfs(path, &stfs) < 0) {
1007 			jail_warnx(j, "%s: %s: %s", pname, path,
1008 			    strerror(errno));
1009 			return -1;
1010 		}
1011 		if (stat(stfs.f_mntonname, &mpst) < 0) {
1012 			jail_warnx(j, "%s: %s: %s", pname, stfs.f_mntonname,
1013 			    strerror(errno));
1014 			return -1;
1015 		}
1016 		if (st.st_ino != mpst.st_ino) {
1017 			jail_warnx(j, "%s: %s: not a mount point",
1018 			    pname, path);
1019 			return -1;
1020 		}
1021 		if (strcmp(stfs.f_fstypename, umount_type)) {
1022 			jail_warnx(j, "%s: %s: not a %s mount",
1023 			    pname, path, umount_type);
1024 			return -1;
1025 		}
1026 	}
1027 	return 0;
1028 }
1029