xref: /freebsd/usr.sbin/jail/command.c (revision f5f40dd63bc7acbb5312b26ac1ea1103c12352a6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 James Gritton
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/types.h>
30 #include <sys/cpuset.h>
31 #include <sys/event.h>
32 #include <sys/mount.h>
33 #include <sys/stat.h>
34 #include <sys/sysctl.h>
35 #include <sys/user.h>
36 #include <sys/wait.h>
37 
38 #include <err.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <kvm.h>
42 #include <login_cap.h>
43 #include <paths.h>
44 #include <pwd.h>
45 #include <signal.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <vis.h>
51 
52 #include "jailp.h"
53 
54 #define DEFAULT_STOP_TIMEOUT	10
55 #define PHASH_SIZE		256
56 
57 LIST_HEAD(phhead, phash);
58 
59 struct phash {
60 	LIST_ENTRY(phash)	le;
61 	struct cfjail		*j;
62 	pid_t			pid;
63 };
64 
65 int paralimit = -1;
66 
67 extern char **environ;
68 
69 static int run_command(struct cfjail *j);
70 static int add_proc(struct cfjail *j, pid_t pid);
71 static void clear_procs(struct cfjail *j);
72 static struct cfjail *find_proc(pid_t pid);
73 static int term_procs(struct cfjail *j);
74 static int get_user_info(struct cfjail *j, const char *username,
75     const struct passwd **pwdp, login_cap_t **lcapp);
76 static int check_path(struct cfjail *j, const char *pname, const char *path,
77     int isfile, const char *umount_type);
78 
79 static struct cfjails sleeping = TAILQ_HEAD_INITIALIZER(sleeping);
80 static struct cfjails runnable = TAILQ_HEAD_INITIALIZER(runnable);
81 static struct cfstring dummystring = { .len = 1 };
82 static struct phhead phash[PHASH_SIZE];
83 static int kq;
84 
85 static cpusetid_t
86 root_cpuset_id(void)
87 {
88 	static cpusetid_t setid = CPUSET_INVALID;
89 	static int error;
90 
91 	/* Only try to get the cpuset once. */
92 	if (error == 0 && setid == CPUSET_INVALID)
93 		error = cpuset_getid(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, &setid);
94 	if (error != 0)
95 		return (CPUSET_INVALID);
96 	return (setid);
97 }
98 
99 /*
100  * Run the next command associated with a jail.
101  */
102 int
103 next_command(struct cfjail *j)
104 {
105 	enum intparam comparam;
106 	int create_failed, stopping;
107 
108 	if (paralimit == 0) {
109 		if (j->flags & JF_FROM_RUNQ)
110 			requeue_head(j, &runnable);
111 		else
112 			requeue(j, &runnable);
113 		return 1;
114 	}
115 	j->flags &= ~JF_FROM_RUNQ;
116 	create_failed = (j->flags & (JF_STOP | JF_FAILED)) == JF_FAILED;
117 	stopping = (j->flags & JF_STOP) != 0;
118 	comparam = *j->comparam;
119 	for (;;) {
120 		if (j->comstring == NULL) {
121 			j->comparam += create_failed ? -1 : 1;
122 			switch ((comparam = *j->comparam)) {
123 			case IP__NULL:
124 				return 0;
125 			case IP_MOUNT_DEVFS:
126 				if (!bool_param(j->intparams[IP_MOUNT_DEVFS]))
127 					continue;
128 				j->comstring = &dummystring;
129 				break;
130 			case IP_MOUNT_FDESCFS:
131 				if (!bool_param(j->intparams[IP_MOUNT_FDESCFS]))
132 					continue;
133 				j->comstring = &dummystring;
134 				break;
135 			case IP_MOUNT_PROCFS:
136 				if (!bool_param(j->intparams[IP_MOUNT_PROCFS]))
137 					continue;
138 				j->comstring = &dummystring;
139 				break;
140 			case IP__OP:
141 			case IP_STOP_TIMEOUT:
142 				j->comstring = &dummystring;
143 				break;
144 			default:
145 				if (j->intparams[comparam] == NULL)
146 					continue;
147 				j->comstring = create_failed || (stopping &&
148 				    (j->intparams[comparam]->flags & PF_REV))
149 				    ? TAILQ_LAST(&j->intparams[comparam]->val,
150 					cfstrings)
151 				    : TAILQ_FIRST(&j->intparams[comparam]->val);
152 			}
153 		} else {
154 			j->comstring = j->comstring == &dummystring ? NULL :
155 			    create_failed || (stopping &&
156 			    (j->intparams[comparam]->flags & PF_REV))
157 			    ? TAILQ_PREV(j->comstring, cfstrings, tq)
158 			    : TAILQ_NEXT(j->comstring, tq);
159 		}
160 		if (j->comstring == NULL || j->comstring->len == 0 ||
161 		    (create_failed && (comparam == IP_EXEC_PRESTART ||
162 		    comparam == IP_EXEC_CREATED || comparam == IP_EXEC_START ||
163 		    comparam == IP_COMMAND || comparam == IP_EXEC_POSTSTART ||
164 		    comparam == IP_EXEC_PREPARE)))
165 			continue;
166 		switch (run_command(j)) {
167 		case -1:
168 			failed(j);
169 			/* FALLTHROUGH */
170 		case 1:
171 			return 1;
172 		}
173 	}
174 }
175 
176 /*
177  * Check command exit status
178  */
179 int
180 finish_command(struct cfjail *j)
181 {
182 	struct cfjail *rj;
183 	int error;
184 
185 	if (!(j->flags & JF_SLEEPQ))
186 		return 0;
187 	j->flags &= ~JF_SLEEPQ;
188 	if (*j->comparam == IP_STOP_TIMEOUT) {
189 		j->flags &= ~JF_TIMEOUT;
190 		j->pstatus = 0;
191 		return 0;
192 	}
193 	paralimit++;
194 	if (!TAILQ_EMPTY(&runnable)) {
195 		rj = TAILQ_FIRST(&runnable);
196 		rj->flags |= JF_FROM_RUNQ;
197 		requeue(rj, &ready);
198 	}
199 	error = 0;
200 	if (j->flags & JF_TIMEOUT) {
201 		j->flags &= ~JF_TIMEOUT;
202 		if (*j->comparam != IP_STOP_TIMEOUT) {
203 			jail_warnx(j, "%s: timed out", j->comline);
204 			failed(j);
205 			error = -1;
206 		} else if (verbose > 0)
207 			jail_note(j, "timed out\n");
208 	} else if (j->pstatus != 0) {
209 		if (WIFSIGNALED(j->pstatus))
210 			jail_warnx(j, "%s: exited on signal %d",
211 			    j->comline, WTERMSIG(j->pstatus));
212 		else
213 			jail_warnx(j, "%s: failed", j->comline);
214 		j->pstatus = 0;
215 		failed(j);
216 		error = -1;
217 	}
218 	free(j->comline);
219 	j->comline = NULL;
220 	return error;
221 }
222 
223 /*
224  * Check for finished processes or timeouts.
225  */
226 struct cfjail *
227 next_proc(int nonblock)
228 {
229 	struct kevent ke;
230 	struct timespec ts;
231 	struct timespec *tsp;
232 	struct cfjail *j;
233 
234 	if (!TAILQ_EMPTY(&sleeping)) {
235 	again:
236 		tsp = NULL;
237 		if ((j = TAILQ_FIRST(&sleeping)) && j->timeout.tv_sec) {
238 			clock_gettime(CLOCK_REALTIME, &ts);
239 			ts.tv_sec = j->timeout.tv_sec - ts.tv_sec;
240 			ts.tv_nsec = j->timeout.tv_nsec - ts.tv_nsec;
241 			if (ts.tv_nsec < 0) {
242 				ts.tv_sec--;
243 				ts.tv_nsec += 1000000000;
244 			}
245 			if (ts.tv_sec < 0 ||
246 			    (ts.tv_sec == 0 && ts.tv_nsec == 0)) {
247 				j->flags |= JF_TIMEOUT;
248 				clear_procs(j);
249 				return j;
250 			}
251 			tsp = &ts;
252 		}
253 		if (nonblock) {
254 			ts.tv_sec = 0;
255 			ts.tv_nsec = 0;
256 			tsp = &ts;
257 		}
258 		switch (kevent(kq, NULL, 0, &ke, 1, tsp)) {
259 		case -1:
260 			if (errno != EINTR)
261 				err(1, "kevent");
262 			goto again;
263 		case 0:
264 			if (!nonblock) {
265 				j = TAILQ_FIRST(&sleeping);
266 				j->flags |= JF_TIMEOUT;
267 				clear_procs(j);
268 				return j;
269 			}
270 			break;
271 		case 1:
272 			(void)waitpid(ke.ident, NULL, WNOHANG);
273 			if ((j = find_proc(ke.ident))) {
274 				j->pstatus = ke.data;
275 				return j;
276 			}
277 			goto again;
278 		}
279 	}
280 	return NULL;
281 }
282 
283 /*
284  * Run a single command for a jail, possibly inside the jail.
285  */
286 static int
287 run_command(struct cfjail *j)
288 {
289 	const struct passwd *pwd;
290 	const struct cfstring *comstring, *s;
291 	login_cap_t *lcap;
292 	const char **argv;
293 	char *acs, *cs, *comcs, *devpath;
294 	const char *jidstr, *conslog, *fmt, *path, *ruleset, *term, *username;
295 	enum intparam comparam;
296 	size_t comlen, ret;
297 	pid_t pid;
298 	cpusetid_t setid;
299 	int argc, bg, clean, consfd, down, fib, i, injail, sjuser, timeout;
300 #if defined(INET) || defined(INET6)
301 	char *addr, *extrap, *p, *val;
302 #endif
303 
304 	static char *cleanenv;
305 
306 	/* Perform some operations that aren't actually commands */
307 	comparam = *j->comparam;
308 	down = j->flags & (JF_STOP | JF_FAILED);
309 	switch (comparam) {
310 	case IP_STOP_TIMEOUT:
311 		return term_procs(j);
312 
313 	case IP__OP:
314 		if (down) {
315 			if (jail_remove(j->jid) < 0 && errno == EPERM) {
316 				jail_warnx(j, "jail_remove: %s",
317 					   strerror(errno));
318 				return -1;
319 			}
320 			if (verbose > 0 || (verbose == 0 && (j->flags & JF_STOP
321 			    ? note_remove : j->name != NULL)))
322 			    jail_note(j, "removed\n");
323 			j->jid = -1;
324 			if (j->flags & JF_STOP)
325 				dep_done(j, DF_LIGHT);
326 			else
327 				j->flags &= ~JF_PERSIST;
328 		} else {
329 			if (create_jail(j) < 0)
330 				return -1;
331 			if (iflag)
332 				printf("%d\n", j->jid);
333 			if (verbose >= 0 && (j->name || verbose > 0))
334 				jail_note(j, "created\n");
335 			dep_done(j, DF_LIGHT);
336 		}
337 		return 0;
338 
339 	default: ;
340 	}
341 	/*
342 	 * Collect exec arguments.  Internal commands for network and
343 	 * mounting build their own argument lists.
344 	 */
345 	comstring = j->comstring;
346 	bg = 0;
347 	switch (comparam) {
348 #ifdef INET
349 	case IP__IP4_IFADDR:
350 		argc = 0;
351 		val = alloca(strlen(comstring->s) + 1);
352 		strcpy(val, comstring->s);
353 		cs = val;
354 		extrap = NULL;
355 		while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
356 			if (extrap == NULL) {
357 				*p = '\0';
358 				extrap = p + 1;
359 			}
360 			cs = p + 1;
361 			argc++;
362 		}
363 
364 		argv = alloca((8 + argc) * sizeof(char *));
365 		argv[0] = _PATH_IFCONFIG;
366 		if ((cs = strchr(val, '|'))) {
367 			argv[1] = acs = alloca(cs - val + 1);
368 			strlcpy(acs, val, cs - val + 1);
369 			addr = cs + 1;
370 		} else {
371 			argv[1] = string_param(j->intparams[IP_INTERFACE]);
372 			addr = val;
373 		}
374 		argv[2] = "inet";
375 		if (!(cs = strchr(addr, '/'))) {
376 			argv[3] = addr;
377 			argv[4] = "netmask";
378 			argv[5] = "255.255.255.255";
379 			argc = 6;
380 		} else if (strchr(cs + 1, '.')) {
381 			argv[3] = acs = alloca(cs - addr + 1);
382 			strlcpy(acs, addr, cs - addr + 1);
383 			argv[4] = "netmask";
384 			argv[5] = cs + 1;
385 			argc = 6;
386 		} else {
387 			argv[3] = addr;
388 			argc = 4;
389 		}
390 
391 		if (!down && extrap != NULL) {
392 			for (cs = strtok(extrap, " "); cs;
393 			     cs = strtok(NULL, " ")) {
394 				size_t len = strlen(cs) + 1;
395 				argv[argc++] = acs = alloca(len);
396 				strlcpy(acs, cs, len);
397 			}
398 		}
399 
400 		argv[argc] = down ? "-alias" : "alias";
401 		argv[argc + 1] = NULL;
402 		break;
403 #endif
404 
405 #ifdef INET6
406 	case IP__IP6_IFADDR:
407 		argc = 0;
408 		val = alloca(strlen(comstring->s) + 1);
409 		strcpy(val, comstring->s);
410 		cs = val;
411 		extrap = NULL;
412 		while ((p = strchr(cs, ' ')) != NULL && strlen(p) > 1) {
413 			if (extrap == NULL) {
414 				*p = '\0';
415 				extrap = p + 1;
416 			}
417 			cs = p + 1;
418 			argc++;
419 		}
420 
421 		argv = alloca((8 + argc) * sizeof(char *));
422 		argv[0] = _PATH_IFCONFIG;
423 		if ((cs = strchr(val, '|'))) {
424 			argv[1] = acs = alloca(cs - val + 1);
425 			strlcpy(acs, val, cs - val + 1);
426 			addr = cs + 1;
427 		} else {
428 			argv[1] = string_param(j->intparams[IP_INTERFACE]);
429 			addr = val;
430 		}
431 		argv[2] = "inet6";
432 		argv[3] = addr;
433 		if (!(cs = strchr(addr, '/'))) {
434 			argv[4] = "prefixlen";
435 			argv[5] = "128";
436 			argc = 6;
437 		} else
438 			argc = 4;
439 
440 		if (!down && extrap != NULL) {
441 			for (cs = strtok(extrap, " "); cs;
442 			     cs = strtok(NULL, " ")) {
443 				size_t len = strlen(cs) + 1;
444 				argv[argc++] = acs = alloca(len);
445 				strlcpy(acs, cs, len);
446 			}
447 		}
448 
449 		argv[argc] = down ? "-alias" : "alias";
450 		argv[argc + 1] = NULL;
451 		break;
452 #endif
453 
454 	case IP_VNET_INTERFACE:
455 		argv = alloca(5 * sizeof(char *));
456 		argv[0] = _PATH_IFCONFIG;
457 		argv[1] = comstring->s;
458 		argv[2] = down ? "-vnet" : "vnet";
459 		jidstr = string_param(j->intparams[KP_JID]);
460 		argv[3] = jidstr ? jidstr : string_param(j->intparams[KP_NAME]);
461 		argv[4] = NULL;
462 		break;
463 
464 	case IP_MOUNT:
465 	case IP__MOUNT_FROM_FSTAB:
466 		argv = alloca(8 * sizeof(char *));
467 		comcs = alloca(comstring->len + 1);
468 		strcpy(comcs, comstring->s);
469 		argc = 0;
470 		for (cs = strtok(comcs, " \t\f\v\r\n"); cs && argc < 4;
471 		     cs = strtok(NULL, " \t\f\v\r\n")) {
472 			if (argc <= 1 && strunvis(cs, cs) < 0) {
473 				jail_warnx(j, "%s: %s: fstab parse error",
474 				    j->intparams[comparam]->name, comstring->s);
475 				return -1;
476 			}
477 			argv[argc++] = cs;
478 		}
479 		if (argc == 0)
480 			return 0;
481 		if (argc < 3) {
482 			jail_warnx(j, "%s: %s: missing information",
483 			    j->intparams[comparam]->name, comstring->s);
484 			return -1;
485 		}
486 		if (check_path(j, j->intparams[comparam]->name, argv[1], 0,
487 		    down ? argv[2] : NULL) < 0)
488 			return -1;
489 		if (down) {
490 			argv[4] = NULL;
491 			argv[3] = argv[1];
492 			argv[0] = "/sbin/umount";
493 		} else {
494 			if (argc == 4) {
495 				argv[7] = NULL;
496 				argv[6] = argv[1];
497 				argv[5] = argv[0];
498 				argv[4] = argv[3];
499 				argv[3] = "-o";
500 			} else {
501 				argv[5] = NULL;
502 				argv[4] = argv[1];
503 				argv[3] = argv[0];
504 			}
505 			argv[0] = _PATH_MOUNT;
506 		}
507 		argv[1] = "-t";
508 		break;
509 
510 	case IP_MOUNT_DEVFS:
511 		argv = alloca(7 * sizeof(char *));
512 		path = string_param(j->intparams[KP_PATH]);
513 		if (path == NULL) {
514 			jail_warnx(j, "mount.devfs: no jail root path defined");
515 			return -1;
516 		}
517 		devpath = alloca(strlen(path) + 5);
518 		sprintf(devpath, "%s/dev", path);
519 		if (check_path(j, "mount.devfs", devpath, 0,
520 		    down ? "devfs" : NULL) < 0)
521 			return -1;
522 		if (down) {
523 			argv[0] = "/sbin/umount";
524 			argv[1] = devpath;
525 			argv[2] = NULL;
526 		} else {
527 			argv[0] = _PATH_MOUNT;
528 			argv[1] = "-t";
529 			argv[2] = "devfs";
530 			ruleset = string_param(j->intparams[KP_DEVFS_RULESET]);
531 			if (!ruleset)
532 			    ruleset = "4";	/* devfsrules_jail */
533 			argv[3] = acs = alloca(11 + strlen(ruleset));
534 			sprintf(acs, "-oruleset=%s", ruleset);
535 			argv[4] = ".";
536 			argv[5] = devpath;
537 			argv[6] = NULL;
538 		}
539 		break;
540 
541 	case IP_MOUNT_FDESCFS:
542 		argv = alloca(7 * sizeof(char *));
543 		path = string_param(j->intparams[KP_PATH]);
544 		if (path == NULL) {
545 			jail_warnx(j, "mount.fdescfs: no jail root path defined");
546 			return -1;
547 		}
548 		devpath = alloca(strlen(path) + 8);
549 		sprintf(devpath, "%s/dev/fd", path);
550 		if (check_path(j, "mount.fdescfs", devpath, 0,
551 		    down ? "fdescfs" : NULL) < 0)
552 			return -1;
553 		if (down) {
554 			argv[0] = "/sbin/umount";
555 			argv[1] = devpath;
556 			argv[2] = NULL;
557 		} else {
558 			argv[0] = _PATH_MOUNT;
559 			argv[1] = "-t";
560 			argv[2] = "fdescfs";
561 			argv[3] = ".";
562 			argv[4] = devpath;
563 			argv[5] = NULL;
564 		}
565 		break;
566 
567 	case IP_MOUNT_PROCFS:
568 		argv = alloca(7 * sizeof(char *));
569 		path = string_param(j->intparams[KP_PATH]);
570 		if (path == NULL) {
571 			jail_warnx(j, "mount.procfs: no jail root path defined");
572 			return -1;
573 		}
574 		devpath = alloca(strlen(path) + 6);
575 		sprintf(devpath, "%s/proc", path);
576 		if (check_path(j, "mount.procfs", devpath, 0,
577 		    down ? "procfs" : NULL) < 0)
578 			return -1;
579 		if (down) {
580 			argv[0] = "/sbin/umount";
581 			argv[1] = devpath;
582 			argv[2] = NULL;
583 		} else {
584 			argv[0] = _PATH_MOUNT;
585 			argv[1] = "-t";
586 			argv[2] = "procfs";
587 			argv[3] = ".";
588 			argv[4] = devpath;
589 			argv[5] = NULL;
590 		}
591 		break;
592 
593 	case IP_ZFS_DATASET:
594 		argv = alloca(4 * sizeof(char *));
595 		jidstr = string_param(j->intparams[KP_JID]) ?
596 		    string_param(j->intparams[KP_JID]) :
597 		    string_param(j->intparams[KP_NAME]);
598 		fmt = "if [ $(/sbin/zfs get -H -o value jailed %s) = on ]; then /sbin/zfs jail %s %s || echo error, attaching %s to jail %s failed; else echo error, you need to set jailed=on for dataset %s; fi";
599 		comlen = strlen(fmt)
600 		    + 2 * strlen(jidstr)
601 		    + 4 * comstring->len
602 		    - 6 * 2	/* 6 * "%s" */
603 		    + 1;
604 		comcs = alloca(comlen);
605 		ret = snprintf(comcs, comlen, fmt, comstring->s,
606 		    jidstr, comstring->s, comstring->s, jidstr,
607 		    comstring->s);
608 		if (ret >= comlen) {
609 			jail_warnx(j, "internal error in ZFS dataset handling");
610 			exit(1);
611 		}
612 		argv[0] = _PATH_BSHELL;
613 		argv[1] = "-c";
614 		argv[2] = comcs;
615 		argv[3] = NULL;
616 		break;
617 
618 	case IP_COMMAND:
619 		if (j->name != NULL)
620 			goto default_command;
621 		argc = 0;
622 		TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
623 			argc++;
624 		argv = alloca((argc + 1) * sizeof(char *));
625 		argc = 0;
626 		TAILQ_FOREACH(s, &j->intparams[IP_COMMAND]->val, tq)
627 			argv[argc++] = s->s;
628 		argv[argc] = NULL;
629 		j->comstring = &dummystring;
630 		break;
631 
632 	default:
633 	default_command:
634 		if ((cs = strpbrk(comstring->s, "!\"$&'()*;<>?[\\]`{|}~")) &&
635 		    !(cs[0] == '&' && cs[1] == '\0')) {
636 			argv = alloca(4 * sizeof(char *));
637 			argv[0] = _PATH_BSHELL;
638 			argv[1] = "-c";
639 			argv[2] = comstring->s;
640 			argv[3] = NULL;
641 		} else {
642 			if (cs) {
643 				*cs = 0;
644 				bg = 1;
645 			}
646 			comcs = alloca(comstring->len + 1);
647 			strcpy(comcs, comstring->s);
648 			argc = 0;
649 			for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
650 			     cs = strtok(NULL, " \t\f\v\r\n"))
651 				argc++;
652 			argv = alloca((argc + 1) * sizeof(char *));
653 			strcpy(comcs, comstring->s);
654 			argc = 0;
655 			for (cs = strtok(comcs, " \t\f\v\r\n"); cs;
656 			     cs = strtok(NULL, " \t\f\v\r\n"))
657 				argv[argc++] = cs;
658 			argv[argc] = NULL;
659 		}
660 	}
661 	if (argv[0] == NULL)
662 		return 0;
663 
664 	if (int_param(j->intparams[IP_EXEC_TIMEOUT], &timeout) &&
665 	    timeout != 0) {
666 		clock_gettime(CLOCK_REALTIME, &j->timeout);
667 		j->timeout.tv_sec += timeout;
668 	} else
669 		j->timeout.tv_sec = 0;
670 
671 	injail = comparam == IP_EXEC_START || comparam == IP_COMMAND ||
672 	    comparam == IP_EXEC_STOP;
673 	if (injail)
674 		setid = root_cpuset_id();
675 	else
676 		setid = CPUSET_INVALID;
677 	clean = bool_param(j->intparams[IP_EXEC_CLEAN]);
678 	username = string_param(j->intparams[injail
679 	    ? IP_EXEC_JAIL_USER : IP_EXEC_SYSTEM_USER]);
680 	sjuser = bool_param(j->intparams[IP_EXEC_SYSTEM_JAIL_USER]);
681 
682 	consfd = 0;
683 	if (injail &&
684 	    (conslog = string_param(j->intparams[IP_EXEC_CONSOLELOG]))) {
685 		if (check_path(j, "exec.consolelog", conslog, 1, NULL) < 0)
686 			return -1;
687 		consfd =
688 		    open(conslog, O_WRONLY | O_CREAT | O_APPEND, DEFFILEMODE);
689 		if (consfd < 0) {
690 			jail_warnx(j, "open %s: %s", conslog, strerror(errno));
691 			return -1;
692 		}
693 	}
694 
695 	comlen = 0;
696 	for (i = 0; argv[i]; i++)
697 		comlen += strlen(argv[i]) + 1;
698 	j->comline = cs = emalloc(comlen);
699 	for (i = 0; argv[i]; i++) {
700 		strcpy(cs, argv[i]);
701 		if (argv[i + 1]) {
702 			cs += strlen(argv[i]) + 1;
703 			cs[-1] = ' ';
704 		}
705 	}
706 	if (verbose > 0)
707 		jail_note(j, "run command%s%s%s: %s\n",
708 		    injail ? " in jail" : "", username ? " as " : "",
709 		    username ? username : "", j->comline);
710 
711 	pid = fork();
712 	if (pid < 0)
713 		err(1, "fork");
714 	if (pid > 0) {
715 		if (bg || !add_proc(j, pid)) {
716 			free(j->comline);
717 			j->comline = NULL;
718 			return 0;
719 		} else {
720 			paralimit--;
721 			return 1;
722 		}
723 	}
724 	if (bg)
725 		setsid();
726 
727 	/* Set up the environment and run the command */
728 	pwd = NULL;
729 	lcap = NULL;
730 	if ((clean || username) && injail && sjuser &&
731 	    get_user_info(j, username, &pwd, &lcap) < 0)
732 		exit(1);
733 	if (injail) {
734 		/* jail_attach won't chdir along with its chroot. */
735 		path = string_param(j->intparams[KP_PATH]);
736 		if (path && chdir(path) < 0) {
737 			jail_warnx(j, "chdir %s: %s", path, strerror(errno));
738 			exit(1);
739 		}
740 		if (int_param(j->intparams[IP_EXEC_FIB], &fib) &&
741 		    setfib(fib) < 0) {
742 			jail_warnx(j, "setfib: %s", strerror(errno));
743 			exit(1);
744 		}
745 
746 		/*
747 		 * We wouldn't have specialized our affinity, so just setid to
748 		 * root.  We do this prior to attaching to avoid the kernel
749 		 * having to create a transient cpuset that we'll promptly
750 		 * free up with a reset to the jail's cpuset.
751 		 *
752 		 * This is just a best-effort to use as wide of mask as
753 		 * possible.
754 		 */
755 		if (setid != CPUSET_INVALID)
756 			(void)cpuset_setid(CPU_WHICH_PID, -1, setid);
757 
758 		if (jail_attach(j->jid) < 0) {
759 			jail_warnx(j, "jail_attach: %s", strerror(errno));
760 			exit(1);
761 		}
762 	}
763 	if (clean || username) {
764 		if (!(injail && sjuser) &&
765 		    get_user_info(j, username, &pwd, &lcap) < 0)
766 			exit(1);
767 		if (clean) {
768 			term = getenv("TERM");
769 			environ = &cleanenv;
770 			setenv("PATH", "/bin:/usr/bin", 0);
771 			if (term != NULL)
772 				setenv("TERM", term, 1);
773 		}
774 		if (setgid(pwd->pw_gid) < 0) {
775 			jail_warnx(j, "setgid %d: %s", pwd->pw_gid,
776 			    strerror(errno));
777 			exit(1);
778 		}
779 		if (setusercontext(lcap, pwd, pwd->pw_uid, username
780 		    ? LOGIN_SETALL & ~LOGIN_SETGROUP & ~LOGIN_SETLOGIN
781 		    : LOGIN_SETPATH | LOGIN_SETENV) < 0) {
782 			jail_warnx(j, "setusercontext %s: %s", pwd->pw_name,
783 			    strerror(errno));
784 			exit(1);
785 		}
786 		login_close(lcap);
787 		setenv("USER", pwd->pw_name, 1);
788 		setenv("HOME", pwd->pw_dir, 1);
789 		setenv("SHELL",
790 		    *pwd->pw_shell ? pwd->pw_shell : _PATH_BSHELL, 1);
791 		if (clean && chdir(pwd->pw_dir) < 0) {
792 			jail_warnx(j, "chdir %s: %s",
793 			    pwd->pw_dir, strerror(errno));
794 			exit(1);
795 		}
796 		endpwent();
797 	}
798 
799 	if (consfd != 0 && (dup2(consfd, 1) < 0 || dup2(consfd, 2) < 0)) {
800 		jail_warnx(j, "exec.consolelog: %s", strerror(errno));
801 		exit(1);
802 	}
803 	closefrom(3);
804 	execvp(argv[0], __DECONST(char *const*, argv));
805 	jail_warnx(j, "exec %s: %s", argv[0], strerror(errno));
806 	exit(1);
807 }
808 
809 /*
810  * Add a process to the hash, tied to a jail.
811  */
812 static int
813 add_proc(struct cfjail *j, pid_t pid)
814 {
815 	struct kevent ke;
816 	struct cfjail *tj;
817 	struct phash *ph;
818 
819 	if (!kq && (kq = kqueue()) < 0)
820 		err(1, "kqueue");
821 	EV_SET(&ke, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
822 	if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
823 		if (errno == ESRCH)
824 			return 0;
825 		err(1, "kevent");
826 	}
827 	ph = emalloc(sizeof(struct phash));
828 	ph->j = j;
829 	ph->pid = pid;
830 	LIST_INSERT_HEAD(&phash[pid % PHASH_SIZE], ph, le);
831 	j->nprocs++;
832 	j->flags |= JF_SLEEPQ;
833 	if (j->timeout.tv_sec == 0)
834 		requeue(j, &sleeping);
835 	else {
836 		/* File the jail in the sleep queue according to its timeout. */
837 		TAILQ_REMOVE(j->queue, j, tq);
838 		TAILQ_FOREACH(tj, &sleeping, tq) {
839 			if (!tj->timeout.tv_sec ||
840 			    j->timeout.tv_sec < tj->timeout.tv_sec ||
841 			    (j->timeout.tv_sec == tj->timeout.tv_sec &&
842 			    j->timeout.tv_nsec <= tj->timeout.tv_nsec)) {
843 				TAILQ_INSERT_BEFORE(tj, j, tq);
844 				break;
845 			}
846 		}
847 		if (tj == NULL)
848 			TAILQ_INSERT_TAIL(&sleeping, j, tq);
849 		j->queue = &sleeping;
850 	}
851 	return 1;
852 }
853 
854 /*
855  * Remove any processes from the hash that correspond to a jail.
856  */
857 static void
858 clear_procs(struct cfjail *j)
859 {
860 	struct kevent ke;
861 	struct phash *ph, *tph;
862 	int i;
863 
864 	j->nprocs = 0;
865 	for (i = 0; i < PHASH_SIZE; i++)
866 		LIST_FOREACH_SAFE(ph, &phash[i], le, tph)
867 			if (ph->j == j) {
868 				EV_SET(&ke, ph->pid, EVFILT_PROC, EV_DELETE,
869 				    NOTE_EXIT, 0, NULL);
870 				(void)kevent(kq, &ke, 1, NULL, 0, NULL);
871 				LIST_REMOVE(ph, le);
872 				free(ph);
873 			}
874 }
875 
876 /*
877  * Find the jail that corresponds to an exited process.
878  */
879 static struct cfjail *
880 find_proc(pid_t pid)
881 {
882 	struct cfjail *j;
883 	struct phash *ph;
884 
885 	LIST_FOREACH(ph, &phash[pid % PHASH_SIZE], le)
886 		if (ph->pid == pid) {
887 			j = ph->j;
888 			LIST_REMOVE(ph, le);
889 			free(ph);
890 			return --j->nprocs ? NULL : j;
891 		}
892 	return NULL;
893 }
894 
895 /*
896  * Send SIGTERM to all processes in a jail and wait for them to die.
897  */
898 static int
899 term_procs(struct cfjail *j)
900 {
901 	struct kinfo_proc *ki;
902 	int i, noted, pcnt, timeout;
903 
904 	static kvm_t *kd;
905 
906 	if (!int_param(j->intparams[IP_STOP_TIMEOUT], &timeout))
907 		timeout = DEFAULT_STOP_TIMEOUT;
908 	else if (timeout == 0)
909 		return 0;
910 
911 	if (kd == NULL) {
912 		kd = kvm_open(NULL, NULL, NULL, O_RDONLY, NULL);
913 		if (kd == NULL)
914 			return 0;
915 	}
916 
917 	ki = kvm_getprocs(kd, KERN_PROC_PROC, 0, &pcnt);
918 	if (ki == NULL)
919 		return 0;
920 	noted = 0;
921 	for (i = 0; i < pcnt; i++)
922 		if (ki[i].ki_jid == j->jid &&
923 		    kill(ki[i].ki_pid, SIGTERM) == 0) {
924 			(void)add_proc(j, ki[i].ki_pid);
925 			if (verbose > 0) {
926 				if (!noted) {
927 					noted = 1;
928 					jail_note(j, "sent SIGTERM to:");
929 				}
930 				printf(" %d", ki[i].ki_pid);
931 			}
932 		}
933 	if (noted)
934 		printf("\n");
935 	if (j->nprocs > 0) {
936 		clock_gettime(CLOCK_REALTIME, &j->timeout);
937 		j->timeout.tv_sec += timeout;
938 		return 1;
939 	}
940 	return 0;
941 }
942 
943 /*
944  * Look up a user in the passwd and login.conf files.
945  */
946 static int
947 get_user_info(struct cfjail *j, const char *username,
948     const struct passwd **pwdp, login_cap_t **lcapp)
949 {
950 	const struct passwd *pwd;
951 
952 	errno = 0;
953 	*pwdp = pwd = username ? getpwnam(username) : getpwuid(getuid());
954 	if (pwd == NULL) {
955 		if (errno)
956 			jail_warnx(j, "getpwnam%s%s: %s", username ? " " : "",
957 			    username ? username : "", strerror(errno));
958 		else if (username)
959 			jail_warnx(j, "%s: no such user", username);
960 		else
961 			jail_warnx(j, "unknown uid %d", getuid());
962 		return -1;
963 	}
964 	*lcapp = login_getpwclass(pwd);
965 	if (*lcapp == NULL) {
966 		jail_warnx(j, "getpwclass %s: %s", pwd->pw_name,
967 		    strerror(errno));
968 		return -1;
969 	}
970 	/* Set the groups while the group file is still available */
971 	if (initgroups(pwd->pw_name, pwd->pw_gid) < 0) {
972 		jail_warnx(j, "initgroups %s: %s", pwd->pw_name,
973 		    strerror(errno));
974 		return -1;
975 	}
976 	return 0;
977 }
978 
979 /*
980  * Make sure a mount or consolelog path is a valid absolute pathname
981  * with no symlinks.
982  */
983 static int
984 check_path(struct cfjail *j, const char *pname, const char *path, int isfile,
985     const char *umount_type)
986 {
987 	struct stat st, mpst;
988 	struct statfs stfs;
989 	char *tpath, *p;
990 	const char *jailpath;
991 	size_t jplen;
992 
993 	if (path[0] != '/') {
994 		jail_warnx(j, "%s: %s: not an absolute pathname",
995 		    pname, path);
996 		return -1;
997 	}
998 	/*
999 	 * Only check for symlinks in components below the jail's path,
1000 	 * since that's where the security risk lies.
1001 	 */
1002 	jailpath = string_param(j->intparams[KP_PATH]);
1003 	if (jailpath == NULL)
1004 		jailpath = "";
1005 	jplen = strlen(jailpath);
1006 	if (!strncmp(path, jailpath, jplen) && path[jplen] == '/') {
1007 		tpath = alloca(strlen(path) + 1);
1008 		strcpy(tpath, path);
1009 		for (p = tpath + jplen; p != NULL; ) {
1010 			p = strchr(p + 1, '/');
1011 			if (p)
1012 				*p = '\0';
1013 			if (lstat(tpath, &st) < 0) {
1014 				if (errno == ENOENT && isfile && !p)
1015 					break;
1016 				jail_warnx(j, "%s: %s: %s", pname, tpath,
1017 				    strerror(errno));
1018 				return -1;
1019 			}
1020 			if (S_ISLNK(st.st_mode)) {
1021 				jail_warnx(j, "%s: %s is a symbolic link",
1022 				    pname, tpath);
1023 				return -1;
1024 			}
1025 			if (p)
1026 				*p = '/';
1027 		}
1028 	}
1029 	if (umount_type != NULL) {
1030 		if (stat(path, &st) < 0 || statfs(path, &stfs) < 0) {
1031 			jail_warnx(j, "%s: %s: %s", pname, path,
1032 			    strerror(errno));
1033 			return -1;
1034 		}
1035 		if (stat(stfs.f_mntonname, &mpst) < 0) {
1036 			jail_warnx(j, "%s: %s: %s", pname, stfs.f_mntonname,
1037 			    strerror(errno));
1038 			return -1;
1039 		}
1040 		if (st.st_ino != mpst.st_ino) {
1041 			jail_warnx(j, "%s: %s: not a mount point",
1042 			    pname, path);
1043 			return -1;
1044 		}
1045 		if (strcmp(stfs.f_fstypename, umount_type)) {
1046 			jail_warnx(j, "%s: %s: not a %s mount",
1047 			    pname, path, umount_type);
1048 			return -1;
1049 		}
1050 	}
1051 	return 0;
1052 }
1053