xref: /freebsd/usr.sbin/jail/command.c (revision 3c4ba5f55438f7afd4f4b0b56f88f2bb505fd6a6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 James Gritton
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/types.h>
33 #include <sys/cpuset.h>
34 #include <sys/event.h>
35 #include <sys/mount.h>
36 #include <sys/stat.h>
37 #include <sys/sysctl.h>
38 #include <sys/user.h>
39 #include <sys/wait.h>
40 
41 #include <err.h>
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <kvm.h>
45 #include <login_cap.h>
46 #include <paths.h>
47 #include <pwd.h>
48 #include <signal.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <unistd.h>
53 #include <vis.h>
54 
55 #include "jailp.h"
56 
57 #define DEFAULT_STOP_TIMEOUT	10
58 #define PHASH_SIZE		256
59 
60 LIST_HEAD(phhead, phash);
61 
62 struct phash {
63 	LIST_ENTRY(phash)	le;
64 	struct cfjail		*j;
65 	pid_t			pid;
66 };
67 
68 int paralimit = -1;
69 
70 extern char **environ;
71 
72 static int run_command(struct cfjail *j);
73 static int add_proc(struct cfjail *j, pid_t pid);
74 static void clear_procs(struct cfjail *j);
75 static struct cfjail *find_proc(pid_t pid);
76 static int term_procs(struct cfjail *j);
77 static int get_user_info(struct cfjail *j, const char *username,
78     const struct passwd **pwdp, login_cap_t **lcapp);
79 static int check_path(struct cfjail *j, const char *pname, const char *path,
80     int isfile, const char *umount_type);
81 
82 static struct cfjails sleeping = TAILQ_HEAD_INITIALIZER(sleeping);
83 static struct cfjails runnable = TAILQ_HEAD_INITIALIZER(runnable);
84 static struct cfstring dummystring = { .len = 1 };
85 static struct phhead phash[PHASH_SIZE];
86 static int kq;
87 
88 static cpusetid_t
89 root_cpuset_id(void)
90 {
91 	static cpusetid_t setid = CPUSET_INVALID;
92 	static int error;
93 
94 	/* Only try to get the cpuset once. */
95 	if (error == 0 && setid == CPUSET_INVALID)
96 		error = cpuset_getid(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, &setid);
97 	if (error != 0)
98 		return (CPUSET_INVALID);
99 	return (setid);
100 }
101 
102 /*
103  * Run the next command associated with a jail.
104  */
105 int
106 next_command(struct cfjail *j)
107 {
108 	enum intparam comparam;
109 	int create_failed, stopping;
110 
111 	if (paralimit == 0) {
112 		if (j->flags & JF_FROM_RUNQ)
113 			requeue_head(j, &runnable);
114 		else
115 			requeue(j, &runnable);
116 		return 1;
117 	}
118 	j->flags &= ~JF_FROM_RUNQ;
119 	create_failed = (j->flags & (JF_STOP | JF_FAILED)) == JF_FAILED;
120 	stopping = (j->flags & JF_STOP) != 0;
121 	comparam = *j->comparam;
122 	for (;;) {
123 		if (j->comstring == NULL) {
124 			j->comparam += create_failed ? -1 : 1;
125 			switch ((comparam = *j->comparam)) {
126 			case IP__NULL:
127 				return 0;
128 			case IP_MOUNT_DEVFS:
129 				if (!bool_param(j->intparams[IP_MOUNT_DEVFS]))
130 					continue;
131 				j->comstring = &dummystring;
132 				break;
133 			case IP_MOUNT_FDESCFS:
134 				if (!bool_param(j->intparams[IP_MOUNT_FDESCFS]))
135 					continue;
136 				j->comstring = &dummystring;
137 				break;
138 			case IP_MOUNT_PROCFS:
139 				if (!bool_param(j->intparams[IP_MOUNT_PROCFS]))
140 					continue;
141 				j->comstring = &dummystring;
142 				break;
143 			case IP__OP:
144 			case IP_STOP_TIMEOUT:
145 				j->comstring = &dummystring;
146 				break;
147 			default:
148 				if (j->intparams[comparam] == NULL)
149 					continue;
150 				j->comstring = create_failed || (stopping &&
151 				    (j->intparams[comparam]->flags & PF_REV))
152 				    ? TAILQ_LAST(&j->intparams[comparam]->val,
153 					cfstrings)
154 				    : TAILQ_FIRST(&j->intparams[comparam]->val);
155 			}
156 		} else {
157 			j->comstring = j->comstring == &dummystring ? NULL :
158 			    create_failed || (stopping &&
159 			    (j->intparams[comparam]->flags & PF_REV))
160 			    ? TAILQ_PREV(j->comstring, cfstrings, tq)
161 			    : TAILQ_NEXT(j->comstring, tq);
162 		}
163 		if (j->comstring == NULL || j->comstring->len == 0 ||
164 		    (create_failed && (comparam == IP_EXEC_PRESTART ||
165 		    comparam == IP_EXEC_CREATED || comparam == IP_EXEC_START ||
166 		    comparam == IP_COMMAND || comparam == IP_EXEC_POSTSTART ||
167 		    comparam == IP_EXEC_PREPARE)))
168 			continue;
169 		switch (run_command(j)) {
170 		case -1:
171 			failed(j);
172 			/* FALLTHROUGH */
173 		case 1:
174 			return 1;
175 		}
176 	}
177 }
178 
179 /*
180  * Check command exit status
181  */
182 int
183 finish_command(struct cfjail *j)
184 {
185 	struct cfjail *rj;
186 	int error;
187 
188 	if (!(j->flags & JF_SLEEPQ))
189 		return 0;
190 	j->flags &= ~JF_SLEEPQ;
191 	if (*j->comparam == IP_STOP_TIMEOUT) {
192 		j->flags &= ~JF_TIMEOUT;
193 		j->pstatus = 0;
194 		return 0;
195 	}
196 	paralimit++;
197 	if (!TAILQ_EMPTY(&runnable)) {
198 		rj = TAILQ_FIRST(&runnable);
199 		rj->flags |= JF_FROM_RUNQ;
200 		requeue(rj, &ready);
201 	}
202 	error = 0;
203 	if (j->flags & JF_TIMEOUT) {
204 		j->flags &= ~JF_TIMEOUT;
205 		if (*j->comparam != IP_STOP_TIMEOUT) {
206 			jail_warnx(j, "%s: timed out", j->comline);
207 			failed(j);
208 			error = -1;
209 		} else if (verbose > 0)
210 			jail_note(j, "timed out\n");
211 	} else if (j->pstatus != 0) {
212 		if (WIFSIGNALED(j->pstatus))
213 			jail_warnx(j, "%s: exited on signal %d",
214 			    j->comline, WTERMSIG(j->pstatus));
215 		else
216 			jail_warnx(j, "%s: failed", j->comline);
217 		j->pstatus = 0;
218 		failed(j);
219 		error = -1;
220 	}
221 	free(j->comline);
222 	j->comline = NULL;
223 	return error;
224 }
225 
226 /*
227  * Check for finished processes or timeouts.
228  */
229 struct cfjail *
230 next_proc(int nonblock)
231 {
232 	struct kevent ke;
233 	struct timespec ts;
234 	struct timespec *tsp;
235 	struct cfjail *j;
236 
237 	if (!TAILQ_EMPTY(&sleeping)) {
238 	again:
239 		tsp = NULL;
240 		if ((j = TAILQ_FIRST(&sleeping)) && j->timeout.tv_sec) {
241 			clock_gettime(CLOCK_REALTIME, &ts);
242 			ts.tv_sec = j->timeout.tv_sec - ts.tv_sec;
243 			ts.tv_nsec = j->timeout.tv_nsec - ts.tv_nsec;
244 			if (ts.tv_nsec < 0) {
245 				ts.tv_sec--;
246 				ts.tv_nsec += 1000000000;
247 			}
248 			if (ts.tv_sec < 0 ||
249 			    (ts.tv_sec == 0 && ts.tv_nsec == 0)) {
250 				j->flags |= JF_TIMEOUT;
251 				clear_procs(j);
252 				return j;
253 			}
254 			tsp = &ts;
255 		}
256 		if (nonblock) {
257 			ts.tv_sec = 0;
258 			ts.tv_nsec = 0;
259 			tsp = &ts;
260 		}
261 		switch (kevent(kq, NULL, 0, &ke, 1, tsp)) {
262 		case -1:
263 			if (errno != EINTR)
264 				err(1, "kevent");
265 			goto again;
266 		case 0:
267 			if (!nonblock) {
268 				j = TAILQ_FIRST(&sleeping);
269 				j->flags |= JF_TIMEOUT;
270 				clear_procs(j);
271 				return j;
272 			}
273 			break;
274 		case 1:
275 			(void)waitpid(ke.ident, NULL, WNOHANG);
276 			if ((j = find_proc(ke.ident))) {
277 				j->pstatus = ke.data;
278 				return j;
279 			}
280 			goto again;
281 		}
282 	}
283 	return NULL;
284 }
285 
286 /*
287  * Run a single command for a jail, possibly inside the jail.
288  */
289 static int
290 run_command(struct cfjail *j)
291 {
292 	const struct passwd *pwd;
293 	const struct cfstring *comstring, *s;
294 	login_cap_t *lcap;
295 	const char **argv;
296 	char *acs, *cs, *comcs, *devpath;
297 	const char *jidstr, *conslog, *path, *ruleset, *term, *username;
298 	enum intparam comparam;
299 	size_t comlen;
300 	pid_t pid;
301 	cpusetid_t setid;
302 	int argc, bg, clean, consfd, down, fib, i, injail, sjuser, timeout;
303 #if defined(INET) || defined(INET6)
304 	char *addr, *extrap, *p, *val;
305 #endif
306 
307 	static char *cleanenv;
308 
309 	/* Perform some operations that aren't actually commands */
310 	comparam = *j->comparam;
311 	down = j->flags & (JF_STOP | JF_FAILED);
312 	switch (comparam) {
313 	case IP_STOP_TIMEOUT:
314 		return term_procs(j);
315 
316 	case IP__OP:
317 		if (down) {
318 			if (jail_remove(j->jid) < 0 && errno == EPERM) {
319 				jail_warnx(j, "jail_remove: %s",
320 					   strerror(errno));
321 				return -1;
322 			}
323 			if (verbose > 0 || (verbose == 0 && (j->flags & JF_STOP
324 			    ? note_remove : j->name != NULL)))
325 			    jail_note(j, "removed\n");
326 			j->jid = -1;
327 			if (j->flags & JF_STOP)
328 				dep_done(j, DF_LIGHT);
329 			else
330 				j->flags &= ~JF_PERSIST;
331 		} else {
332 			if (create_jail(j) < 0)
333 				return -1;
334 			if (iflag)
335 				printf("%d\n", j->jid);
336 			if (verbose >= 0 && (j->name || verbose > 0))
337 				jail_note(j, "created\n");
338 			dep_done(j, DF_LIGHT);
339 		}
340 		return 0;
341 
342 	default: ;
343 	}
344 	/*
345 	 * Collect exec arguments.  Internal commands for network and
346 	 * mounting build their own argument lists.
347 	 */
348 	comstring = j->comstring;
349 	bg = 0;
350 	switch (comparam) {
351 #ifdef INET
352 	case IP__IP4_IFADDR:
353 		argc = 0;
354 		val = alloca(strlen(comstring->s) + 1);
355 		strcpy(val, comstring->s);
356 		cs = val;
357 		extrap = NULL;
358 		while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
359 			if (extrap == NULL) {
360 				*p = '\0';
361 				extrap = p + 1;
362 			}
363 			cs = p + 1;
364 			argc++;
365 		}
366 
367 		argv = alloca((8 + argc) * sizeof(char *));
368 		argv[0] = _PATH_IFCONFIG;
369 		if ((cs = strchr(val, '|'))) {
370 			argv[1] = acs = alloca(cs - val + 1);
371 			strlcpy(acs, val, cs - val + 1);
372 			addr = cs + 1;
373 		} else {
374 			argv[1] = string_param(j->intparams[IP_INTERFACE]);
375 			addr = val;
376 		}
377 		argv[2] = "inet";
378 		if (!(cs = strchr(addr, '/'))) {
379 			argv[3] = addr;
380 			argv[4] = "netmask";
381 			argv[5] = "255.255.255.255";
382 			argc = 6;
383 		} else if (strchr(cs + 1, '.')) {
384 			argv[3] = acs = alloca(cs - addr + 1);
385 			strlcpy(acs, addr, cs - addr + 1);
386 			argv[4] = "netmask";
387 			argv[5] = cs + 1;
388 			argc = 6;
389 		} else {
390 			argv[3] = addr;
391 			argc = 4;
392 		}
393 
394 		if (!down && extrap != NULL) {
395 			for (cs = strtok(extrap, " "); cs;
396 			     cs = strtok(NULL, " ")) {
397 				size_t len = strlen(cs) + 1;
398 				argv[argc++] = acs = alloca(len);
399 				strlcpy(acs, cs, len);
400 			}
401 		}
402 
403 		argv[argc] = down ? "-alias" : "alias";
404 		argv[argc + 1] = NULL;
405 		break;
406 #endif
407 
408 #ifdef INET6
409 	case IP__IP6_IFADDR:
410 		argc = 0;
411 		val = alloca(strlen(comstring->s) + 1);
412 		strcpy(val, comstring->s);
413 		cs = val;
414 		extrap = NULL;
415 		while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
416 			if (extrap == NULL) {
417 				*p = '\0';
418 				extrap = p + 1;
419 			}
420 			cs = p + 1;
421 			argc++;
422 		}
423 
424 		argv = alloca((8 + argc) * sizeof(char *));
425 		argv[0] = _PATH_IFCONFIG;
426 		if ((cs = strchr(val, '|'))) {
427 			argv[1] = acs = alloca(cs - val + 1);
428 			strlcpy(acs, val, cs - val + 1);
429 			addr = cs + 1;
430 		} else {
431 			argv[1] = string_param(j->intparams[IP_INTERFACE]);
432 			addr = val;
433 		}
434 		argv[2] = "inet6";
435 		argv[3] = addr;
436 		if (!(cs = strchr(addr, '/'))) {
437 			argv[4] = "prefixlen";
438 			argv[5] = "128";
439 			argc = 6;
440 		} else
441 			argc = 4;
442 
443 		if (!down && extrap != NULL) {
444 			for (cs = strtok(extrap, " "); cs;
445 			     cs = strtok(NULL, " ")) {
446 				size_t len = strlen(cs) + 1;
447 				argv[argc++] = acs = alloca(len);
448 				strlcpy(acs, cs, len);
449 			}
450 		}
451 
452 		argv[argc] = down ? "-alias" : "alias";
453 		argv[argc + 1] = NULL;
454 		break;
455 #endif
456 
457 	case IP_VNET_INTERFACE:
458 		argv = alloca(5 * sizeof(char *));
459 		argv[0] = _PATH_IFCONFIG;
460 		argv[1] = comstring->s;
461 		argv[2] = down ? "-vnet" : "vnet";
462 		jidstr = string_param(j->intparams[KP_JID]);
463 		argv[3] = jidstr ? jidstr : string_param(j->intparams[KP_NAME]);
464 		argv[4] = NULL;
465 		break;
466 
467 	case IP_MOUNT:
468 	case IP__MOUNT_FROM_FSTAB:
469 		argv = alloca(8 * sizeof(char *));
470 		comcs = alloca(comstring->len + 1);
471 		strcpy(comcs, comstring->s);
472 		argc = 0;
473 		for (cs = strtok(comcs, " \t\f\v\r\n"); cs && argc < 4;
474 		     cs = strtok(NULL, " \t\f\v\r\n")) {
475 			if (argc <= 1 && strunvis(cs, cs) < 0) {
476 				jail_warnx(j, "%s: %s: fstab parse error",
477 				    j->intparams[comparam]->name, comstring->s);
478 				return -1;
479 			}
480 			argv[argc++] = cs;
481 		}
482 		if (argc == 0)
483 			return 0;
484 		if (argc < 3) {
485 			jail_warnx(j, "%s: %s: missing information",
486 			    j->intparams[comparam]->name, comstring->s);
487 			return -1;
488 		}
489 		if (check_path(j, j->intparams[comparam]->name, argv[1], 0,
490 		    down ? argv[2] : NULL) < 0)
491 			return -1;
492 		if (down) {
493 			argv[4] = NULL;
494 			argv[3] = argv[1];
495 			argv[0] = "/sbin/umount";
496 		} else {
497 			if (argc == 4) {
498 				argv[7] = NULL;
499 				argv[6] = argv[1];
500 				argv[5] = argv[0];
501 				argv[4] = argv[3];
502 				argv[3] = "-o";
503 			} else {
504 				argv[5] = NULL;
505 				argv[4] = argv[1];
506 				argv[3] = argv[0];
507 			}
508 			argv[0] = _PATH_MOUNT;
509 		}
510 		argv[1] = "-t";
511 		break;
512 
513 	case IP_MOUNT_DEVFS:
514 		argv = alloca(7 * sizeof(char *));
515 		path = string_param(j->intparams[KP_PATH]);
516 		if (path == NULL) {
517 			jail_warnx(j, "mount.devfs: no jail root path defined");
518 			return -1;
519 		}
520 		devpath = alloca(strlen(path) + 5);
521 		sprintf(devpath, "%s/dev", path);
522 		if (check_path(j, "mount.devfs", devpath, 0,
523 		    down ? "devfs" : NULL) < 0)
524 			return -1;
525 		if (down) {
526 			argv[0] = "/sbin/umount";
527 			argv[1] = devpath;
528 			argv[2] = NULL;
529 		} else {
530 			argv[0] = _PATH_MOUNT;
531 			argv[1] = "-t";
532 			argv[2] = "devfs";
533 			ruleset = string_param(j->intparams[KP_DEVFS_RULESET]);
534 			if (!ruleset)
535 			    ruleset = "4";	/* devfsrules_jail */
536 			argv[3] = acs = alloca(11 + strlen(ruleset));
537 			sprintf(acs, "-oruleset=%s", ruleset);
538 			argv[4] = ".";
539 			argv[5] = devpath;
540 			argv[6] = NULL;
541 		}
542 		break;
543 
544 	case IP_MOUNT_FDESCFS:
545 		argv = alloca(7 * sizeof(char *));
546 		path = string_param(j->intparams[KP_PATH]);
547 		if (path == NULL) {
548 			jail_warnx(j, "mount.fdescfs: no jail root path defined");
549 			return -1;
550 		}
551 		devpath = alloca(strlen(path) + 8);
552 		sprintf(devpath, "%s/dev/fd", path);
553 		if (check_path(j, "mount.fdescfs", devpath, 0,
554 		    down ? "fdescfs" : NULL) < 0)
555 			return -1;
556 		if (down) {
557 			argv[0] = "/sbin/umount";
558 			argv[1] = devpath;
559 			argv[2] = NULL;
560 		} else {
561 			argv[0] = _PATH_MOUNT;
562 			argv[1] = "-t";
563 			argv[2] = "fdescfs";
564 			argv[3] = ".";
565 			argv[4] = devpath;
566 			argv[5] = NULL;
567 		}
568 		break;
569 
570 	case IP_MOUNT_PROCFS:
571 		argv = alloca(7 * sizeof(char *));
572 		path = string_param(j->intparams[KP_PATH]);
573 		if (path == NULL) {
574 			jail_warnx(j, "mount.procfs: no jail root path defined");
575 			return -1;
576 		}
577 		devpath = alloca(strlen(path) + 6);
578 		sprintf(devpath, "%s/proc", path);
579 		if (check_path(j, "mount.procfs", devpath, 0,
580 		    down ? "procfs" : NULL) < 0)
581 			return -1;
582 		if (down) {
583 			argv[0] = "/sbin/umount";
584 			argv[1] = devpath;
585 			argv[2] = NULL;
586 		} else {
587 			argv[0] = _PATH_MOUNT;
588 			argv[1] = "-t";
589 			argv[2] = "procfs";
590 			argv[3] = ".";
591 			argv[4] = devpath;
592 			argv[5] = NULL;
593 		}
594 		break;
595 
596 	case IP_COMMAND:
597 		if (j->name != NULL)
598 			goto default_command;
599 		argc = 0;
600 		TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
601 			argc++;
602 		argv = alloca((argc + 1) * sizeof(char *));
603 		argc = 0;
604 		TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
605 			argv[argc++] = s->s;
606 		argv[argc] = NULL;
607 		j->comstring = &dummystring;
608 		break;
609 
610 	default:
611 	default_command:
612 		if ((cs = strpbrk(comstring->s, "!\"$&'()*;<>?[\\]`{|}~")) &&
613 		    !(cs[0] == '&' && cs[1] == '\0')) {
614 			argv = alloca(4 * sizeof(char *));
615 			argv[0] = _PATH_BSHELL;
616 			argv[1] = "-c";
617 			argv[2] = comstring->s;
618 			argv[3] = NULL;
619 		} else {
620 			if (cs) {
621 				*cs = 0;
622 				bg = 1;
623 			}
624 			comcs = alloca(comstring->len + 1);
625 			strcpy(comcs, comstring->s);
626 			argc = 0;
627 			for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
628 			     cs = strtok(NULL, " \t\f\v\r\n"))
629 				argc++;
630 			argv = alloca((argc + 1) * sizeof(char *));
631 			strcpy(comcs, comstring->s);
632 			argc = 0;
633 			for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
634 			     cs = strtok(NULL, " \t\f\v\r\n"))
635 				argv[argc++] = cs;
636 			argv[argc] = NULL;
637 		}
638 	}
639 	if (argv[0] == NULL)
640 		return 0;
641 
642 	if (int_param(j->intparams[IP_EXEC_TIMEOUT], &timeout) &&
643 	    timeout != 0) {
644 		clock_gettime(CLOCK_REALTIME, &j->timeout);
645 		j->timeout.tv_sec += timeout;
646 	} else
647 		j->timeout.tv_sec = 0;
648 
649 	injail = comparam == IP_EXEC_START || comparam == IP_COMMAND ||
650 	    comparam == IP_EXEC_STOP;
651 	if (injail)
652 		setid = root_cpuset_id();
653 	else
654 		setid = CPUSET_INVALID;
655 	clean = bool_param(j->intparams[IP_EXEC_CLEAN]);
656 	username = string_param(j->intparams[injail
657 	    ? IP_EXEC_JAIL_USER : IP_EXEC_SYSTEM_USER]);
658 	sjuser = bool_param(j->intparams[IP_EXEC_SYSTEM_JAIL_USER]);
659 
660 	consfd = 0;
661 	if (injail &&
662 	    (conslog = string_param(j->intparams[IP_EXEC_CONSOLELOG]))) {
663 		if (check_path(j, "exec.consolelog", conslog, 1, NULL) < 0)
664 			return -1;
665 		consfd =
666 		    open(conslog, O_WRONLY | O_CREAT | O_APPEND, DEFFILEMODE);
667 		if (consfd < 0) {
668 			jail_warnx(j, "open %s: %s", conslog, strerror(errno));
669 			return -1;
670 		}
671 	}
672 
673 	comlen = 0;
674 	for (i = 0; argv[i]; i++)
675 		comlen += strlen(argv[i]) + 1;
676 	j->comline = cs = emalloc(comlen);
677 	for (i = 0; argv[i]; i++) {
678 		strcpy(cs, argv[i]);
679 		if (argv[i + 1]) {
680 			cs += strlen(argv[i]) + 1;
681 			cs[-1] = ' ';
682 		}
683 	}
684 	if (verbose > 0)
685 		jail_note(j, "run command%s%s%s: %s\n",
686 		    injail ? " in jail" : "", username ? " as " : "",
687 		    username ? username : "", j->comline);
688 
689 	pid = fork();
690 	if (pid < 0)
691 		err(1, "fork");
692 	if (pid > 0) {
693 		if (bg || !add_proc(j, pid)) {
694 			free(j->comline);
695 			j->comline = NULL;
696 			return 0;
697 		} else {
698 			paralimit--;
699 			return 1;
700 		}
701 	}
702 	if (bg)
703 		setsid();
704 
705 	/* Set up the environment and run the command */
706 	pwd = NULL;
707 	lcap = NULL;
708 	if ((clean || username) && injail && sjuser &&
709 	    get_user_info(j, username, &pwd, &lcap) < 0)
710 		exit(1);
711 	if (injail) {
712 		/* jail_attach won't chdir along with its chroot. */
713 		path = string_param(j->intparams[KP_PATH]);
714 		if (path && chdir(path) < 0) {
715 			jail_warnx(j, "chdir %s: %s", path, strerror(errno));
716 			exit(1);
717 		}
718 		if (int_param(j->intparams[IP_EXEC_FIB], &fib) &&
719 		    setfib(fib) < 0) {
720 			jail_warnx(j, "setfib: %s", strerror(errno));
721 			exit(1);
722 		}
723 
724 		/*
725 		 * We wouldn't have specialized our affinity, so just setid to
726 		 * root.  We do this prior to attaching to avoid the kernel
727 		 * having to create a transient cpuset that we'll promptly
728 		 * free up with a reset to the jail's cpuset.
729 		 *
730 		 * This is just a best-effort to use as wide of mask as
731 		 * possible.
732 		 */
733 		if (setid != CPUSET_INVALID)
734 			(void)cpuset_setid(CPU_WHICH_PID, -1, setid);
735 
736 		if (jail_attach(j->jid) < 0) {
737 			jail_warnx(j, "jail_attach: %s", strerror(errno));
738 			exit(1);
739 		}
740 	}
741 	if (clean || username) {
742 		if (!(injail && sjuser) &&
743 		    get_user_info(j, username, &pwd, &lcap) < 0)
744 			exit(1);
745 		if (clean) {
746 			term = getenv("TERM");
747 			environ = &cleanenv;
748 			setenv("PATH", "/bin:/usr/bin", 0);
749 			if (term != NULL)
750 				setenv("TERM", term, 1);
751 		}
752 		if (setgid(pwd->pw_gid) < 0) {
753 			jail_warnx(j, "setgid %d: %s", pwd->pw_gid,
754 			    strerror(errno));
755 			exit(1);
756 		}
757 		if (setusercontext(lcap, pwd, pwd->pw_uid, username
758 		    ? LOGIN_SETALL & ~LOGIN_SETGROUP & ~LOGIN_SETLOGIN
759 		    : LOGIN_SETPATH | LOGIN_SETENV) < 0) {
760 			jail_warnx(j, "setusercontext %s: %s", pwd->pw_name,
761 			    strerror(errno));
762 			exit(1);
763 		}
764 		login_close(lcap);
765 		setenv("USER", pwd->pw_name, 1);
766 		setenv("HOME", pwd->pw_dir, 1);
767 		setenv("SHELL",
768 		    *pwd->pw_shell ? pwd->pw_shell : _PATH_BSHELL, 1);
769 		if (clean && chdir(pwd->pw_dir) < 0) {
770 			jail_warnx(j, "chdir %s: %s",
771 			    pwd->pw_dir, strerror(errno));
772 			exit(1);
773 		}
774 		endpwent();
775 	}
776 
777 	if (consfd != 0 && (dup2(consfd, 1) < 0 || dup2(consfd, 2) < 0)) {
778 		jail_warnx(j, "exec.consolelog: %s", strerror(errno));
779 		exit(1);
780 	}
781 	closefrom(3);
782 	execvp(argv[0], __DECONST(char *const*, argv));
783 	jail_warnx(j, "exec %s: %s", argv[0], strerror(errno));
784 	exit(1);
785 }
786 
787 /*
788  * Add a process to the hash, tied to a jail.
789  */
790 static int
791 add_proc(struct cfjail *j, pid_t pid)
792 {
793 	struct kevent ke;
794 	struct cfjail *tj;
795 	struct phash *ph;
796 
797 	if (!kq && (kq = kqueue()) < 0)
798 		err(1, "kqueue");
799 	EV_SET(&ke, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
800 	if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
801 		if (errno == ESRCH)
802 			return 0;
803 		err(1, "kevent");
804 	}
805 	ph = emalloc(sizeof(struct phash));
806 	ph->j = j;
807 	ph->pid = pid;
808 	LIST_INSERT_HEAD(&phash[pid % PHASH_SIZE], ph, le);
809 	j->nprocs++;
810 	j->flags |= JF_SLEEPQ;
811 	if (j->timeout.tv_sec == 0)
812 		requeue(j, &sleeping);
813 	else {
814 		/* File the jail in the sleep queue according to its timeout. */
815 		TAILQ_REMOVE(j->queue, j, tq);
816 		TAILQ_FOREACH(tj, &sleeping, tq) {
817 			if (!tj->timeout.tv_sec ||
818 			    j->timeout.tv_sec < tj->timeout.tv_sec ||
819 			    (j->timeout.tv_sec == tj->timeout.tv_sec &&
820 			    j->timeout.tv_nsec <= tj->timeout.tv_nsec)) {
821 				TAILQ_INSERT_BEFORE(tj, j, tq);
822 				break;
823 			}
824 		}
825 		if (tj == NULL)
826 			TAILQ_INSERT_TAIL(&sleeping, j, tq);
827 		j->queue = &sleeping;
828 	}
829 	return 1;
830 }
831 
832 /*
833  * Remove any processes from the hash that correspond to a jail.
834  */
835 static void
836 clear_procs(struct cfjail *j)
837 {
838 	struct kevent ke;
839 	struct phash *ph, *tph;
840 	int i;
841 
842 	j->nprocs = 0;
843 	for (i = 0; i < PHASH_SIZE; i++)
844 		LIST_FOREACH_SAFE(ph, &phash[i], le, tph)
845 			if (ph->j == j) {
846 				EV_SET(&ke, ph->pid, EVFILT_PROC, EV_DELETE,
847 				    NOTE_EXIT, 0, NULL);
848 				(void)kevent(kq, &ke, 1, NULL, 0, NULL);
849 				LIST_REMOVE(ph, le);
850 				free(ph);
851 			}
852 }
853 
854 /*
855  * Find the jail that corresponds to an exited process.
856  */
857 static struct cfjail *
858 find_proc(pid_t pid)
859 {
860 	struct cfjail *j;
861 	struct phash *ph;
862 
863 	LIST_FOREACH(ph, &phash[pid % PHASH_SIZE], le)
864 		if (ph->pid == pid) {
865 			j = ph->j;
866 			LIST_REMOVE(ph, le);
867 			free(ph);
868 			return --j->nprocs ? NULL : j;
869 		}
870 	return NULL;
871 }
872 
873 /*
874  * Send SIGTERM to all processes in a jail and wait for them to die.
875  */
876 static int
877 term_procs(struct cfjail *j)
878 {
879 	struct kinfo_proc *ki;
880 	int i, noted, pcnt, timeout;
881 
882 	static kvm_t *kd;
883 
884 	if (!int_param(j->intparams[IP_STOP_TIMEOUT], &timeout))
885 		timeout = DEFAULT_STOP_TIMEOUT;
886 	else if (timeout == 0)
887 		return 0;
888 
889 	if (kd == NULL) {
890 		kd = kvm_open(NULL, NULL, NULL, O_RDONLY, NULL);
891 		if (kd == NULL)
892 			return 0;
893 	}
894 
895 	ki = kvm_getprocs(kd, KERN_PROC_PROC, 0, &pcnt);
896 	if (ki == NULL)
897 		return 0;
898 	noted = 0;
899 	for (i = 0; i < pcnt; i++)
900 		if (ki[i].ki_jid == j->jid &&
901 		    kill(ki[i].ki_pid, SIGTERM) == 0) {
902 			(void)add_proc(j, ki[i].ki_pid);
903 			if (verbose > 0) {
904 				if (!noted) {
905 					noted = 1;
906 					jail_note(j, "sent SIGTERM to:");
907 				}
908 				printf(" %d", ki[i].ki_pid);
909 			}
910 		}
911 	if (noted)
912 		printf("\n");
913 	if (j->nprocs > 0) {
914 		clock_gettime(CLOCK_REALTIME, &j->timeout);
915 		j->timeout.tv_sec += timeout;
916 		return 1;
917 	}
918 	return 0;
919 }
920 
921 /*
922  * Look up a user in the passwd and login.conf files.
923  */
924 static int
925 get_user_info(struct cfjail *j, const char *username,
926     const struct passwd **pwdp, login_cap_t **lcapp)
927 {
928 	const struct passwd *pwd;
929 
930 	errno = 0;
931 	*pwdp = pwd = username ? getpwnam(username) : getpwuid(getuid());
932 	if (pwd == NULL) {
933 		if (errno)
934 			jail_warnx(j, "getpwnam%s%s: %s", username ? " " : "",
935 			    username ? username : "", strerror(errno));
936 		else if (username)
937 			jail_warnx(j, "%s: no such user", username);
938 		else
939 			jail_warnx(j, "unknown uid %d", getuid());
940 		return -1;
941 	}
942 	*lcapp = login_getpwclass(pwd);
943 	if (*lcapp == NULL) {
944 		jail_warnx(j, "getpwclass %s: %s", pwd->pw_name,
945 		    strerror(errno));
946 		return -1;
947 	}
948 	/* Set the groups while the group file is still available */
949 	if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) {
950 		jail_warnx(j, "initgroups %s: %s", pwd->pw_name,
951 		    strerror(errno));
952 		return -1;
953 	}
954 	return 0;
955 }
956 
957 /*
958  * Make sure a mount or consolelog path is a valid absolute pathname
959  * with no symlinks.
960  */
961 static int
962 check_path(struct cfjail *j, const char *pname, const char *path, int isfile,
963     const char *umount_type)
964 {
965 	struct stat st, mpst;
966 	struct statfs stfs;
967 	char *tpath, *p;
968 	const char *jailpath;
969 	size_t jplen;
970 
971 	if (path[0] != '/') {
972 		jail_warnx(j, "%s: %s: not an absolute pathname",
973 		    pname, path);
974 		return -1;
975 	}
976 	/*
977 	 * Only check for symlinks in components below the jail's path,
978 	 * since that's where the security risk lies.
979 	 */
980 	jailpath = string_param(j->intparams[KP_PATH]);
981 	if (jailpath == NULL)
982 		jailpath = "";
983 	jplen = strlen(jailpath);
984 	if (!strncmp(path, jailpath, jplen) && path[jplen] == '/') {
985 		tpath = alloca(strlen(path) + 1);
986 		strcpy(tpath, path);
987 		for (p = tpath + jplen; p != NULL; ) {
988 			p = strchr(p + 1, '/');
989 			if (p)
990 				*p = '\0';
991 			if (lstat(tpath, &st) < 0) {
992 				if (errno == ENOENT && isfile && !p)
993 					break;
994 				jail_warnx(j, "%s: %s: %s", pname, tpath,
995 				    strerror(errno));
996 				return -1;
997 			}
998 			if (S_ISLNK(st.st_mode)) {
999 				jail_warnx(j, "%s: %s is a symbolic link",
1000 				    pname, tpath);
1001 				return -1;
1002 			}
1003 			if (p)
1004 				*p = '/';
1005 		}
1006 	}
1007 	if (umount_type != NULL) {
1008 		if (stat(path, &st) < 0 || statfs(path, &stfs) < 0) {
1009 			jail_warnx(j, "%s: %s: %s", pname, path,
1010 			    strerror(errno));
1011 			return -1;
1012 		}
1013 		if (stat(stfs.f_mntonname, &mpst) < 0) {
1014 			jail_warnx(j, "%s: %s: %s", pname, stfs.f_mntonname,
1015 			    strerror(errno));
1016 			return -1;
1017 		}
1018 		if (st.st_ino != mpst.st_ino) {
1019 			jail_warnx(j, "%s: %s: not a mount point",
1020 			    pname, path);
1021 			return -1;
1022 		}
1023 		if (strcmp(stfs.f_fstypename, umount_type)) {
1024 			jail_warnx(j, "%s: %s: not a %s mount",
1025 			    pname, path, umount_type);
1026 			return -1;
1027 		}
1028 	}
1029 	return 0;
1030 }
1031