xref: /titanic_50/usr/src/cmd/zoneadmd/zoneadmd.c (revision e2738c5e21a9e5d9a6525e48af4738deda3df455)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * zoneadmd manages zones; one zoneadmd process is launched for each
31  * non-global zone on the system.  This daemon juggles four jobs:
32  *
33  * - Implement setup and teardown of the zone "virtual platform": mount and
34  *   unmount filesystems; create and destroy network interfaces; communicate
35  *   with devfsadmd to lay out devices for the zone; instantiate the zone
36  *   console device; configure process runtime attributes such as resource
37  *   controls, pool bindings, fine-grained privileges.
38  *
39  * - Launch the zone's init(1M) process.
40  *
41  * - Implement a door server; clients (like zoneadm) connect to the door
42  *   server and request zone state changes.  The kernel is also a client of
43  *   this door server.  A request to halt or reboot the zone which originates
44  *   *inside* the zone results in a door upcall from the kernel into zoneadmd.
45  *
46  *   One minor problem is that messages emitted by zoneadmd need to be passed
47  *   back to the zoneadm process making the request.  These messages need to
48  *   be rendered in the client's locale; so, this is passed in as part of the
49  *   request.  The exception is the kernel upcall to zoneadmd, in which case
50  *   messages are syslog'd.
51  *
52  *   To make all of this work, the Makefile adds -a to xgettext to extract *all*
53  *   strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
54  *   strings which do not need to be translated.
55  *
56  * - Act as a console server for zlogin -C processes; see comments in zcons.c
57  *   for more information about the zone console architecture.
58  *
59  * DESIGN NOTES
60  *
61  * Restart:
62  *   A chief design constraint of zoneadmd is that it should be restartable in
63  *   the case that the administrator kills it off, or it suffers a fatal error,
64  *   without the running zone being impacted; this is akin to being able to
65  *   reboot the service processor of a server without affecting the OS instance.
66  */
67 
68 #include <sys/param.h>
69 #include <sys/mman.h>
70 #include <sys/types.h>
71 #include <sys/stat.h>
72 #include <sys/sysmacros.h>
73 
74 #include <bsm/adt.h>
75 #include <bsm/adt_event.h>
76 
77 #include <alloca.h>
78 #include <assert.h>
79 #include <errno.h>
80 #include <door.h>
81 #include <fcntl.h>
82 #include <locale.h>
83 #include <signal.h>
84 #include <stdarg.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <strings.h>
89 #include <synch.h>
90 #include <syslog.h>
91 #include <thread.h>
92 #include <unistd.h>
93 #include <wait.h>
94 #include <limits.h>
95 #include <zone.h>
96 #include <libcontract.h>
97 #include <libcontract_priv.h>
98 #include <sys/contract/process.h>
99 #include <sys/ctfs.h>
100 #include <sys/objfs.h>
101 
102 #include <libzonecfg.h>
103 #include "zoneadmd.h"
104 
105 static char *progname;
106 char *zone_name;	/* zone which we are managing */
107 static zoneid_t zone_id;
108 
109 zlog_t logsys;
110 
111 mutex_t	lock = DEFAULTMUTEX;	/* to serialize stuff */
112 mutex_t	msglock = DEFAULTMUTEX;	/* for calling setlocale() */
113 
114 static sema_t scratch_sem;	/* for scratch zones */
115 
116 static char	zone_door_path[MAXPATHLEN];
117 static int	zone_door = -1;
118 
119 boolean_t in_death_throes = B_FALSE;	/* daemon is dying */
120 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
121 
122 #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
123 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
124 #endif
125 
126 #define	PATH_TO_INIT	"/sbin/init"
127 
128 #define	DEFAULT_LOCALE	"C"
129 
130 static const char *
131 z_cmd_name(zone_cmd_t zcmd)
132 {
133 	/* This list needs to match the enum in sys/zone.h */
134 	static const char *zcmdstr[] = {
135 		"ready", "boot", "reboot", "halt", "note_uninstalling",
136 		"mount", "unmount"
137 	};
138 
139 	if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
140 		return ("unknown");
141 	else
142 		return (zcmdstr[(int)zcmd]);
143 }
144 
145 static char *
146 get_execbasename(char *execfullname)
147 {
148 	char *last_slash, *execbasename;
149 
150 	/* guard against '/' at end of command invocation */
151 	for (;;) {
152 		last_slash = strrchr(execfullname, '/');
153 		if (last_slash == NULL) {
154 			execbasename = execfullname;
155 			break;
156 		} else {
157 			execbasename = last_slash + 1;
158 			if (*execbasename == '\0') {
159 				*last_slash = '\0';
160 				continue;
161 			}
162 			break;
163 		}
164 	}
165 	return (execbasename);
166 }
167 
168 static void
169 usage(void)
170 {
171 	(void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
172 	(void) fprintf(stderr,
173 	    gettext("\tNote: %s should not be run directly.\n"), progname);
174 	exit(2);
175 }
176 
177 /* ARGSUSED */
178 static void
179 sigchld(int sig)
180 {
181 }
182 
183 char *
184 localize_msg(char *locale, const char *msg)
185 {
186 	char *out;
187 
188 	(void) mutex_lock(&msglock);
189 	(void) setlocale(LC_MESSAGES, locale);
190 	out = gettext(msg);
191 	(void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
192 	(void) mutex_unlock(&msglock);
193 	return (out);
194 }
195 
196 /* PRINTFLIKE3 */
197 void
198 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
199 {
200 	va_list alist;
201 	char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
202 	char *bp;
203 	int saved_errno = errno;
204 
205 	if (zlogp == NULL)
206 		return;
207 	if (zlogp == &logsys)
208 		(void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
209 		    zone_name);
210 	else
211 		buf[0] = '\0';
212 	bp = &(buf[strlen(buf)]);
213 
214 	/*
215 	 * In theory, the locale pointer should be set to either "C" or a
216 	 * char array, so it should never be NULL
217 	 */
218 	assert(zlogp->locale != NULL);
219 	/* Locale is per process, but we are multi-threaded... */
220 	fmt = localize_msg(zlogp->locale, fmt);
221 
222 	va_start(alist, fmt);
223 	(void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
224 	va_end(alist);
225 	bp = &(buf[strlen(buf)]);
226 	if (use_strerror)
227 		(void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
228 		    strerror(saved_errno));
229 	if (zlogp == &logsys) {
230 		(void) syslog(LOG_ERR, "%s", buf);
231 	} else if (zlogp->logfile != NULL) {
232 		(void) fprintf(zlogp->logfile, "%s\n", buf);
233 	} else {
234 		size_t buflen;
235 		size_t copylen;
236 
237 		buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
238 		copylen = MIN(buflen, zlogp->loglen);
239 		zlogp->log += copylen;
240 		zlogp->loglen -= copylen;
241 	}
242 }
243 
244 /*
245  * Emit a warning for any boot arguments which are unrecognized.  Since
246  * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
247  * put the arguments into an argv style array, use getopt to process them,
248  * and put the resultant argument string back into outargs.
249  *
250  * During the filtering, we pull out any arguments which are truly "boot"
251  * arguments, leaving only those which are to be passed intact to the
252  * progenitor process.  The one we support at the moment is -i, which
253  * indicates to the kernel which program should be launched as 'init'.
254  *
255  * A return of Z_INVAL indicates specifically that the arguments are
256  * not valid; this is a non-fatal error.  Except for Z_OK, all other return
257  * values are treated as fatal.
258  */
259 static int
260 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
261     char *init_file, char *badarg)
262 {
263 	int argc = 0, argc_save;
264 	int i;
265 	int err;
266 	char *arg, *lasts, **argv = NULL, **argv_save;
267 	char zonecfg_args[BOOTARGS_MAX];
268 	char scratchargs[BOOTARGS_MAX], *sargs;
269 	char c;
270 
271 	bzero(outargs, BOOTARGS_MAX);
272 	bzero(badarg, BOOTARGS_MAX);
273 
274 	(void) strlcpy(init_file, PATH_TO_INIT, MAXPATHLEN);
275 
276 	/*
277 	 * If the user didn't specify transient boot arguments, check
278 	 * to see if there were any specified in the zone configuration,
279 	 * and use them if applicable.
280 	 */
281 	if (inargs == NULL || inargs[0] == '\0')  {
282 		zone_dochandle_t handle;
283 		if ((handle = zonecfg_init_handle()) == NULL) {
284 			zerror(zlogp, B_TRUE,
285 			    "getting zone configuration handle");
286 			return (Z_BAD_HANDLE);
287 		}
288 		err = zonecfg_get_snapshot_handle(zone_name, handle);
289 		if (err != Z_OK) {
290 			zerror(zlogp, B_FALSE,
291 			    "invalid configuration snapshot");
292 			zonecfg_fini_handle(handle);
293 			return (Z_BAD_HANDLE);
294 		}
295 
296 		bzero(zonecfg_args, sizeof (zonecfg_args));
297 		(void) zonecfg_get_bootargs(handle, zonecfg_args,
298 		    sizeof (zonecfg_args));
299 		inargs = zonecfg_args;
300 		zonecfg_fini_handle(handle);
301 	}
302 
303 	if (strlen(inargs) >= BOOTARGS_MAX) {
304 		zerror(zlogp, B_FALSE, "boot argument string too long");
305 		return (Z_INVAL);
306 	}
307 
308 	(void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
309 	sargs = scratchargs;
310 	while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
311 		sargs = NULL;
312 		argc++;
313 	}
314 
315 	if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
316 		zerror(zlogp, B_FALSE, "memory allocation failed");
317 		return (Z_NOMEM);
318 	}
319 
320 	argv_save = argv;
321 	argc_save = argc;
322 
323 	(void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
324 	sargs = scratchargs;
325 	i = 0;
326 	while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
327 		sargs = NULL;
328 		if ((argv[i] = strdup(arg)) == NULL) {
329 			err = Z_NOMEM;
330 			zerror(zlogp, B_FALSE, "memory allocation failed");
331 			goto done;
332 		}
333 		i++;
334 	}
335 
336 	/*
337 	 * We preserve compatibility with the Solaris system boot behavior,
338 	 * which allows:
339 	 *
340 	 * 	# reboot kernel/unix -s -m verbose
341 	 *
342 	 * In this example, kernel/unix tells the booter what file to
343 	 * boot.  We don't want reboot in a zone to be gratuitously different,
344 	 * so we silently ignore the boot file, if necessary.
345 	 */
346 	if (argv[0] == NULL)
347 		goto done;
348 
349 	assert(argv[0][0] != ' ');
350 	assert(argv[0][0] != '\t');
351 
352 	if (argv[0][0] != '-' && argv[0][0] != '\0') {
353 		argv = &argv[1];
354 		argc--;
355 	}
356 
357 	optind = 0;
358 	opterr = 0;
359 	err = Z_OK;
360 	while ((c = getopt(argc, argv, "i:m:s")) != -1) {
361 		switch (c) {
362 		case 'i':
363 			/*
364 			 * -i is handled by the runtime and is not passed
365 			 * along to userland
366 			 */
367 			(void) strlcpy(init_file, optarg, MAXPATHLEN);
368 			break;
369 		case 'm':
370 		case 's':
371 			/* These pass through unmolested */
372 			(void) snprintf(outargs, BOOTARGS_MAX,
373 			    "%s -%c %s ", outargs, c, optarg ? optarg : "");
374 			break;
375 		case '?':
376 			/*
377 			 * We warn about unknown arguments but pass them
378 			 * along anyway-- if someone wants to develop their
379 			 * own init replacement, they can pass it whatever
380 			 * args they want.
381 			 */
382 			err = Z_INVAL;
383 			(void) snprintf(outargs, BOOTARGS_MAX,
384 			    "%s -%c", outargs, optopt);
385 			(void) snprintf(badarg, BOOTARGS_MAX,
386 			    "%s -%c", badarg, optopt);
387 			break;
388 		}
389 	}
390 
391 	/*
392 	 * For Solaris Zones we warn about and discard non-option arguments.
393 	 * Hence 'boot foo bar baz gub' --> 'boot'.  However, to be similar
394 	 * to the kernel, we concat up all the other remaining boot args.
395 	 * and warn on them as a group.
396 	 */
397 	if (optind < argc) {
398 		err = Z_INVAL;
399 		while (optind < argc) {
400 			(void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
401 			    badarg, strlen(badarg) > 0 ? " " : "",
402 			    argv[optind]);
403 			optind++;
404 		}
405 		zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
406 		    "arguments `%s'.", badarg);
407 	}
408 
409 done:
410 	for (i = 0; i < argc_save; i++) {
411 		if (argv_save[i] != NULL)
412 			free(argv_save[i]);
413 	}
414 	free(argv_save);
415 	return (err);
416 }
417 
418 
419 static int
420 mkzonedir(zlog_t *zlogp)
421 {
422 	struct stat st;
423 	/*
424 	 * We must create and lock everyone but root out of ZONES_TMPDIR
425 	 * since anyone can open any UNIX domain socket, regardless of
426 	 * its file system permissions.  Sigh...
427 	 */
428 	if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
429 		zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
430 		return (-1);
431 	}
432 	/* paranoia */
433 	if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
434 		zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
435 		return (-1);
436 	}
437 	(void) chmod(ZONES_TMPDIR, S_IRWXU);
438 	return (0);
439 }
440 
441 /*
442  * Bring a zone up to the pre-boot "ready" stage.  The mount_cmd argument is
443  * 'true' if this is being invoked as part of the processing for the "mount"
444  * subcommand.
445  */
446 static int
447 zone_ready(zlog_t *zlogp, boolean_t mount_cmd)
448 {
449 	int err;
450 
451 	if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
452 		zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
453 		    zonecfg_strerror(err));
454 		return (-1);
455 	}
456 
457 	if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
458 		if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
459 			zerror(zlogp, B_FALSE, "destroying snapshot: %s",
460 			    zonecfg_strerror(err));
461 		return (-1);
462 	}
463 	if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
464 		bringup_failure_recovery = B_TRUE;
465 		(void) vplat_teardown(NULL, mount_cmd);
466 		if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
467 			zerror(zlogp, B_FALSE, "destroying snapshot: %s",
468 			    zonecfg_strerror(err));
469 		return (-1);
470 	}
471 
472 	return (0);
473 }
474 
475 int
476 init_template(void)
477 {
478 	int fd;
479 	int err = 0;
480 
481 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
482 	if (fd == -1)
483 		return (-1);
484 
485 	/*
486 	 * For now, zoneadmd doesn't do anything with the contract.
487 	 * Deliver no events, don't inherit, and allow it to be orphaned.
488 	 */
489 	err |= ct_tmpl_set_critical(fd, 0);
490 	err |= ct_tmpl_set_informative(fd, 0);
491 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
492 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
493 	if (err || ct_tmpl_activate(fd)) {
494 		(void) close(fd);
495 		return (-1);
496 	}
497 
498 	return (fd);
499 }
500 
501 static int
502 mount_early_fs(zlog_t *zlogp, zoneid_t zoneid, const char *spec,
503     const char *dir, char *fstype)
504 {
505 	pid_t child;
506 	int child_status;
507 	int tmpl_fd;
508 	ctid_t ct;
509 
510 	if ((tmpl_fd = init_template()) == -1) {
511 		zerror(zlogp, B_TRUE, "failed to create contract");
512 		return (-1);
513 	}
514 
515 	if ((child = fork()) == -1) {
516 		(void) ct_tmpl_clear(tmpl_fd);
517 		(void) close(tmpl_fd);
518 		zerror(zlogp, B_TRUE, "failed to fork");
519 		return (-1);
520 
521 	} else if (child == 0) {	/* child */
522 		(void) ct_tmpl_clear(tmpl_fd);
523 		/*
524 		 * Even though there are no procs running in the zone, we
525 		 * do this for paranoia's sake.
526 		 */
527 		(void) closefrom(0);
528 
529 		if (zone_enter(zoneid) == -1) {
530 			_exit(errno);
531 		}
532 		if (mount(spec, dir, MS_DATA, fstype, NULL, 0, NULL, 0) != 0)
533 			_exit(errno);
534 		_exit(0);
535 	}
536 
537 	/* parent */
538 	if (contract_latest(&ct) == -1)
539 		ct = -1;
540 	(void) ct_tmpl_clear(tmpl_fd);
541 	(void) close(tmpl_fd);
542 	if (waitpid(child, &child_status, 0) != child) {
543 		/* unexpected: we must have been signalled */
544 		(void) contract_abandon_id(ct);
545 		return (-1);
546 	}
547 	(void) contract_abandon_id(ct);
548 	if (WEXITSTATUS(child_status) != 0) {
549 		errno = WEXITSTATUS(child_status);
550 		zerror(zlogp, B_TRUE, "mount of %s failed", dir);
551 		return (-1);
552 	}
553 
554 	return (0);
555 }
556 
557 static int
558 zone_mount_early(zlog_t *zlogp, zoneid_t zoneid)
559 {
560 	if (mount_early_fs(zlogp, zoneid, "/proc", "/proc", "proc") != 0)
561 		return (-1);
562 
563 	if (mount_early_fs(zlogp, zoneid, "ctfs", CTFS_ROOT, "ctfs") != 0)
564 		return (-1);
565 
566 	if (mount_early_fs(zlogp, zoneid, "objfs", OBJFS_ROOT, "objfs") != 0)
567 		return (-1);
568 
569 	if (mount_early_fs(zlogp, zoneid, "swap", "/etc/svc/volatile",
570 	    "tmpfs") != 0)
571 		return (-1);
572 
573 	if (mount_early_fs(zlogp, zoneid, "mnttab", "/etc/mnttab",
574 	    "mntfs") != 0)
575 		return (-1);
576 
577 	return (0);
578 }
579 
580 static int
581 zone_bootup(zlog_t *zlogp, const char *bootargs)
582 {
583 	zoneid_t zoneid;
584 	struct stat st;
585 	char zroot[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
586 	char nbootargs[BOOTARGS_MAX];
587 	int err;
588 
589 	if (init_console_slave(zlogp) != 0)
590 		return (-1);
591 	reset_slave_terminal(zlogp);
592 
593 	if ((zoneid = getzoneidbyname(zone_name)) == -1) {
594 		zerror(zlogp, B_TRUE, "unable to get zoneid");
595 		return (-1);
596 	}
597 
598 	if (zone_mount_early(zlogp, zoneid) != 0)
599 		return (-1);
600 
601 	err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
602 	    bad_boot_arg);
603 	if (err == Z_INVAL)
604 		eventstream_write(Z_EVT_ZONE_BADARGS);
605 	else if (err != Z_OK)
606 		return (-1);
607 
608 	assert(init_file[0] != '\0');
609 
610 	/*
611 	 * Try to anticipate possible problems: Make sure whatever binary
612 	 * is supposed to be init is executable.
613 	 */
614 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
615 		zerror(zlogp, B_FALSE, "unable to determine zone root");
616 		return (-1);
617 	}
618 	(void) snprintf(initpath, sizeof (initpath), "%s%s", zroot, init_file);
619 
620 	if (stat(initpath, &st) == -1) {
621 		zerror(zlogp, B_TRUE, "could not stat %s", initpath);
622 		return (-1);
623 	}
624 
625 	if ((st.st_mode & S_IXUSR) == 0) {
626 		zerror(zlogp, B_FALSE, "%s is not executable", initpath);
627 		return (-1);
628 	}
629 
630 	if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
631 		zerror(zlogp, B_TRUE, "could not set zone boot file");
632 		return (-1);
633 	}
634 
635 	if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
636 		zerror(zlogp, B_TRUE, "could not set zone boot arguments");
637 		return (-1);
638 	}
639 
640 	if (zone_boot(zoneid) == -1) {
641 		zerror(zlogp, B_TRUE, "unable to boot zone");
642 		return (-1);
643 	}
644 
645 	return (0);
646 }
647 
648 static int
649 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd)
650 {
651 	int err;
652 
653 	if (vplat_teardown(zlogp, unmount_cmd) != 0) {
654 		if (!bringup_failure_recovery)
655 			zerror(zlogp, B_FALSE, "unable to destroy zone");
656 		return (-1);
657 	}
658 
659 	if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
660 		zerror(zlogp, B_FALSE, "destroying snapshot: %s",
661 		    zonecfg_strerror(err));
662 
663 	return (0);
664 }
665 
666 /*
667  * Generate AUE_zone_state for a command that boots a zone.
668  */
669 static void
670 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
671     char *new_state)
672 {
673 	adt_session_data_t	*ah;
674 	adt_event_data_t	*event;
675 	int			pass_fail, fail_reason;
676 
677 	if (!adt_audit_enabled())
678 		return;
679 
680 	if (return_val == 0) {
681 		pass_fail = ADT_SUCCESS;
682 		fail_reason = ADT_SUCCESS;
683 	} else {
684 		pass_fail = ADT_FAILURE;
685 		fail_reason = ADT_FAIL_VALUE_PROGRAM;
686 	}
687 
688 	if (adt_start_session(&ah, NULL, 0)) {
689 		zerror(zlogp, B_TRUE, gettext("audit failure."));
690 		return;
691 	}
692 	if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
693 		zerror(zlogp, B_TRUE, gettext("audit failure."));
694 		(void) adt_end_session(ah);
695 		return;
696 	}
697 
698 	event = adt_alloc_event(ah, ADT_zone_state);
699 	if (event == NULL) {
700 		zerror(zlogp, B_TRUE, gettext("audit failure."));
701 		(void) adt_end_session(ah);
702 		return;
703 	}
704 	event->adt_zone_state.zonename = zone_name;
705 	event->adt_zone_state.new_state = new_state;
706 
707 	if (adt_put_event(event, pass_fail, fail_reason))
708 		zerror(zlogp, B_TRUE, gettext("audit failure."));
709 
710 	adt_free_event(event);
711 
712 	(void) adt_end_session(ah);
713 }
714 
715 /*
716  * The main routine for the door server that deals with zone state transitions.
717  */
718 /* ARGSUSED */
719 static void
720 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
721     uint_t n_desc)
722 {
723 	ucred_t *uc = NULL;
724 	const priv_set_t *eset;
725 
726 	zone_state_t zstate;
727 	zone_cmd_t cmd;
728 	zone_cmd_arg_t *zargp;
729 
730 	boolean_t kernelcall;
731 
732 	int rval = -1;
733 	uint64_t uniqid;
734 	zoneid_t zoneid = -1;
735 	zlog_t zlog;
736 	zlog_t *zlogp;
737 	zone_cmd_rval_t *rvalp;
738 	size_t rlen = getpagesize(); /* conservative */
739 
740 	/* LINTED E_BAD_PTR_CAST_ALIGN */
741 	zargp = (zone_cmd_arg_t *)args;
742 
743 	/*
744 	 * When we get the door unref message, we've fdetach'd the door, and
745 	 * it is time for us to shut down zoneadmd.
746 	 */
747 	if (zargp == DOOR_UNREF_DATA) {
748 		/*
749 		 * See comment at end of main() for info on the last rites.
750 		 */
751 		exit(0);
752 	}
753 
754 	if (zargp == NULL) {
755 		(void) door_return(NULL, 0, 0, 0);
756 	}
757 
758 	rvalp = alloca(rlen);
759 	bzero(rvalp, rlen);
760 	zlog.logfile = NULL;
761 	zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
762 	zlog.buf = rvalp->errbuf;
763 	zlog.log = zlog.buf;
764 	/* defer initialization of zlog.locale until after credential check */
765 	zlogp = &zlog;
766 
767 	if (alen != sizeof (zone_cmd_arg_t)) {
768 		/*
769 		 * This really shouldn't be happening.
770 		 */
771 		zerror(&logsys, B_FALSE, "argument size (%d bytes) "
772 		    "unexpected (expected %d bytes)", alen,
773 		    sizeof (zone_cmd_arg_t));
774 		goto out;
775 	}
776 	cmd = zargp->cmd;
777 
778 	if (door_ucred(&uc) != 0) {
779 		zerror(&logsys, B_TRUE, "door_ucred");
780 		goto out;
781 	}
782 	eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
783 	if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
784 	    (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
785 	    ucred_geteuid(uc) != 0)) {
786 		zerror(&logsys, B_FALSE, "insufficient privileges");
787 		goto out;
788 	}
789 
790 	kernelcall = ucred_getpid(uc) == 0;
791 
792 	/*
793 	 * This is safe because we only use a zlog_t throughout the
794 	 * duration of a door call; i.e., by the time the pointer
795 	 * might become invalid, the door call would be over.
796 	 */
797 	zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
798 
799 	(void) mutex_lock(&lock);
800 
801 	/*
802 	 * Once we start to really die off, we don't want more connections.
803 	 */
804 	if (in_death_throes) {
805 		(void) mutex_unlock(&lock);
806 		ucred_free(uc);
807 		(void) door_return(NULL, 0, 0, 0);
808 		thr_exit(NULL);
809 	}
810 
811 	/*
812 	 * Check for validity of command.
813 	 */
814 	if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_REBOOT &&
815 	    cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
816 	    cmd != Z_UNMOUNT) {
817 		zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
818 		goto out;
819 	}
820 
821 	if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
822 		/*
823 		 * Can't happen
824 		 */
825 		zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
826 		    cmd);
827 		goto out;
828 	}
829 	/*
830 	 * We ignore the possibility of someone calling zone_create(2)
831 	 * explicitly; all requests must come through zoneadmd.
832 	 */
833 	if (zone_get_state(zone_name, &zstate) != Z_OK) {
834 		/*
835 		 * Something terribly wrong happened
836 		 */
837 		zerror(&logsys, B_FALSE, "unable to determine state of zone");
838 		goto out;
839 	}
840 
841 	if (kernelcall) {
842 		/*
843 		 * Kernel-initiated requests may lose their validity if the
844 		 * zone_t the kernel was referring to has gone away.
845 		 */
846 		if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
847 		    zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
848 		    sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
849 			/*
850 			 * We're not talking about the same zone. The request
851 			 * must have arrived too late.  Return error.
852 			 */
853 			rval = -1;
854 			goto out;
855 		}
856 		zlogp = &logsys;	/* Log errors to syslog */
857 	}
858 
859 	switch (zstate) {
860 	case ZONE_STATE_CONFIGURED:
861 	case ZONE_STATE_INCOMPLETE:
862 		/*
863 		 * Not our area of expertise; we just print a nice message
864 		 * and die off.
865 		 */
866 		zerror(zlogp, B_FALSE,
867 		    "%s operation is invalid for zones in state '%s'",
868 		    z_cmd_name(cmd), zone_state_str(zstate));
869 		break;
870 
871 	case ZONE_STATE_INSTALLED:
872 		switch (cmd) {
873 		case Z_READY:
874 			rval = zone_ready(zlogp, B_FALSE);
875 			if (rval == 0)
876 				eventstream_write(Z_EVT_ZONE_READIED);
877 			break;
878 		case Z_BOOT:
879 			eventstream_write(Z_EVT_ZONE_BOOTING);
880 			if ((rval = zone_ready(zlogp, B_FALSE)) == 0)
881 				rval = zone_bootup(zlogp, zargp->bootbuf);
882 			audit_put_record(zlogp, uc, rval, "boot");
883 			if (rval != 0) {
884 				bringup_failure_recovery = B_TRUE;
885 				(void) zone_halt(zlogp, B_FALSE);
886 				eventstream_write(Z_EVT_ZONE_BOOTFAILED);
887 			}
888 			break;
889 		case Z_HALT:
890 			if (kernelcall)	/* Invalid; can't happen */
891 				abort();
892 			/*
893 			 * We could have two clients racing to halt this
894 			 * zone; the second client loses, but his request
895 			 * doesn't fail, since the zone is now in the desired
896 			 * state.
897 			 */
898 			zerror(zlogp, B_FALSE, "zone is already halted");
899 			rval = 0;
900 			break;
901 		case Z_REBOOT:
902 			if (kernelcall)	/* Invalid; can't happen */
903 				abort();
904 			zerror(zlogp, B_FALSE, "%s operation is invalid "
905 			    "for zones in state '%s'", z_cmd_name(cmd),
906 			    zone_state_str(zstate));
907 			rval = -1;
908 			break;
909 		case Z_NOTE_UNINSTALLING:
910 			if (kernelcall)	/* Invalid; can't happen */
911 				abort();
912 			/*
913 			 * Tell the console to print out a message about this.
914 			 * Once it does, we will be in_death_throes.
915 			 */
916 			eventstream_write(Z_EVT_ZONE_UNINSTALLING);
917 			break;
918 		case Z_MOUNT:
919 			if (kernelcall)	/* Invalid; can't happen */
920 				abort();
921 			rval = zone_ready(zlogp, B_TRUE);
922 			if (rval == 0) {
923 				eventstream_write(Z_EVT_ZONE_READIED);
924 				rval = zone_mount_early(zlogp, zone_id);
925 			}
926 
927 			/*
928 			 * Ordinarily, /dev/fd would be mounted inside the zone
929 			 * by svc:/system/filesystem/usr:default, but since
930 			 * we're not booting the zone, we need to do this
931 			 * manually.
932 			 */
933 			if (rval == 0)
934 				rval = mount_early_fs(zlogp, zone_id, "fd",
935 				    "/dev/fd", "fd");
936 			break;
937 		case Z_UNMOUNT:
938 			if (kernelcall)	/* Invalid; can't happen */
939 				abort();
940 			zerror(zlogp, B_FALSE, "zone is already unmounted");
941 			rval = 0;
942 			break;
943 		}
944 		break;
945 
946 	case ZONE_STATE_READY:
947 		switch (cmd) {
948 		case Z_READY:
949 			/*
950 			 * We could have two clients racing to ready this
951 			 * zone; the second client loses, but his request
952 			 * doesn't fail, since the zone is now in the desired
953 			 * state.
954 			 */
955 			zerror(zlogp, B_FALSE, "zone is already ready");
956 			rval = 0;
957 			break;
958 		case Z_BOOT:
959 			(void) strlcpy(boot_args, zargp->bootbuf,
960 			    sizeof (boot_args));
961 			eventstream_write(Z_EVT_ZONE_BOOTING);
962 			rval = zone_bootup(zlogp, zargp->bootbuf);
963 			audit_put_record(zlogp, uc, rval, "boot");
964 			if (rval != 0) {
965 				bringup_failure_recovery = B_TRUE;
966 				(void) zone_halt(zlogp, B_FALSE);
967 				eventstream_write(Z_EVT_ZONE_BOOTFAILED);
968 			}
969 			boot_args[0] = '\0';
970 			break;
971 		case Z_HALT:
972 			if (kernelcall)	/* Invalid; can't happen */
973 				abort();
974 			if ((rval = zone_halt(zlogp, B_FALSE)) != 0)
975 				break;
976 			eventstream_write(Z_EVT_ZONE_HALTED);
977 			break;
978 		case Z_REBOOT:
979 		case Z_NOTE_UNINSTALLING:
980 		case Z_MOUNT:
981 		case Z_UNMOUNT:
982 			if (kernelcall)	/* Invalid; can't happen */
983 				abort();
984 			zerror(zlogp, B_FALSE, "%s operation is invalid "
985 			    "for zones in state '%s'", z_cmd_name(cmd),
986 			    zone_state_str(zstate));
987 			rval = -1;
988 			break;
989 		}
990 		break;
991 
992 	case ZONE_STATE_MOUNTED:
993 		switch (cmd) {
994 		case Z_UNMOUNT:
995 			if (kernelcall)	/* Invalid; can't happen */
996 				abort();
997 			rval = zone_halt(zlogp, B_TRUE);
998 			if (rval == 0) {
999 				eventstream_write(Z_EVT_ZONE_HALTED);
1000 				(void) sema_post(&scratch_sem);
1001 			}
1002 			break;
1003 		default:
1004 			if (kernelcall)	/* Invalid; can't happen */
1005 				abort();
1006 			zerror(zlogp, B_FALSE, "%s operation is invalid "
1007 			    "for zones in state '%s'", z_cmd_name(cmd),
1008 			    zone_state_str(zstate));
1009 			rval = -1;
1010 			break;
1011 		}
1012 		break;
1013 
1014 	case ZONE_STATE_RUNNING:
1015 	case ZONE_STATE_SHUTTING_DOWN:
1016 	case ZONE_STATE_DOWN:
1017 		switch (cmd) {
1018 		case Z_READY:
1019 			if ((rval = zone_halt(zlogp, B_FALSE)) != 0)
1020 				break;
1021 			if ((rval = zone_ready(zlogp, B_FALSE)) == 0)
1022 				eventstream_write(Z_EVT_ZONE_READIED);
1023 			else
1024 				eventstream_write(Z_EVT_ZONE_HALTED);
1025 			break;
1026 		case Z_BOOT:
1027 			/*
1028 			 * We could have two clients racing to boot this
1029 			 * zone; the second client loses, but his request
1030 			 * doesn't fail, since the zone is now in the desired
1031 			 * state.
1032 			 */
1033 			zerror(zlogp, B_FALSE, "zone is already booted");
1034 			rval = 0;
1035 			break;
1036 		case Z_HALT:
1037 			if ((rval = zone_halt(zlogp, B_FALSE)) != 0)
1038 				break;
1039 			eventstream_write(Z_EVT_ZONE_HALTED);
1040 			break;
1041 		case Z_REBOOT:
1042 			(void) strlcpy(boot_args, zargp->bootbuf,
1043 			    sizeof (boot_args));
1044 			eventstream_write(Z_EVT_ZONE_REBOOTING);
1045 			if ((rval = zone_halt(zlogp, B_FALSE)) != 0) {
1046 				eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1047 				boot_args[0] = '\0';
1048 				break;
1049 			}
1050 			if ((rval = zone_ready(zlogp, B_FALSE)) != 0) {
1051 				eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1052 				boot_args[0] = '\0';
1053 				break;
1054 			}
1055 			rval = zone_bootup(zlogp, zargp->bootbuf);
1056 			audit_put_record(zlogp, uc, rval, "reboot");
1057 			if (rval != 0) {
1058 				(void) zone_halt(zlogp, B_FALSE);
1059 				eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1060 			}
1061 			boot_args[0] = '\0';
1062 			break;
1063 		case Z_NOTE_UNINSTALLING:
1064 		case Z_MOUNT:
1065 		case Z_UNMOUNT:
1066 			zerror(zlogp, B_FALSE, "%s operation is invalid "
1067 			    "for zones in state '%s'", z_cmd_name(cmd),
1068 			    zone_state_str(zstate));
1069 			rval = -1;
1070 			break;
1071 		}
1072 		break;
1073 	default:
1074 		abort();
1075 	}
1076 
1077 	/*
1078 	 * Because the state of the zone may have changed, we make sure
1079 	 * to wake the console poller, which is in charge of initiating
1080 	 * the shutdown procedure as necessary.
1081 	 */
1082 	eventstream_write(Z_EVT_NULL);
1083 
1084 out:
1085 	(void) mutex_unlock(&lock);
1086 	if (kernelcall) {
1087 		rvalp = NULL;
1088 		rlen = 0;
1089 	} else {
1090 		rvalp->rval = rval;
1091 	}
1092 	if (uc != NULL)
1093 		ucred_free(uc);
1094 	(void) door_return((char *)rvalp, rlen, NULL, 0);
1095 	thr_exit(NULL);
1096 }
1097 
1098 static int
1099 setup_door(zlog_t *zlogp)
1100 {
1101 	if ((zone_door = door_create(server, NULL,
1102 	    DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1103 		zerror(zlogp, B_TRUE, "%s failed", "door_create");
1104 		return (-1);
1105 	}
1106 	(void) fdetach(zone_door_path);
1107 
1108 	if (fattach(zone_door, zone_door_path) != 0) {
1109 		zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1110 		(void) door_revoke(zone_door);
1111 		(void) fdetach(zone_door_path);
1112 		zone_door = -1;
1113 		return (-1);
1114 	}
1115 	return (0);
1116 }
1117 
1118 /*
1119  * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1120  * is where zoneadmd itself will check to see that another instance of
1121  * zoneadmd isn't already controlling this zone.
1122  *
1123  * The idea here is that we want to open the path to which we will
1124  * attach our door, lock it, and then make sure that no-one has beat us
1125  * to fattach(3c)ing onto it.
1126  *
1127  * fattach(3c) is really a mount, so there are actually two possible
1128  * vnodes we could be dealing with.  Our strategy is as follows:
1129  *
1130  * - If the file we opened is a regular file (common case):
1131  * 	There is no fattach(3c)ed door, so we have a chance of becoming
1132  * 	the managing zoneadmd. We attempt to lock the file: if it is
1133  * 	already locked, that means someone else raced us here, so we
1134  * 	lose and give up.  zoneadm(1m) will try to contact the zoneadmd
1135  * 	that beat us to it.
1136  *
1137  * - If the file we opened is a namefs file:
1138  * 	This means there is already an established door fattach(3c)'ed
1139  * 	to the rendezvous path.  We've lost the race, so we give up.
1140  * 	Note that in this case we also try to grab the file lock, and
1141  * 	will succeed in acquiring it since the vnode locked by the
1142  * 	"winning" zoneadmd was a regular one, and the one we locked was
1143  * 	the fattach(3c)'ed door node.  At any rate, no harm is done, and
1144  * 	we just return to zoneadm(1m) which knows to retry.
1145  */
1146 static int
1147 make_daemon_exclusive(zlog_t *zlogp)
1148 {
1149 	int doorfd = -1;
1150 	int err, ret = -1;
1151 	struct stat st;
1152 	struct flock flock;
1153 	zone_state_t zstate;
1154 
1155 top:
1156 	if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1157 		zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1158 		    zonecfg_strerror(err));
1159 		goto out;
1160 	}
1161 	if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1162 	    S_IREAD|S_IWRITE)) < 0) {
1163 		zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1164 		goto out;
1165 	}
1166 	if (fstat(doorfd, &st) < 0) {
1167 		zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1168 		goto out;
1169 	}
1170 	/*
1171 	 * Lock the file to synchronize with other zoneadmd
1172 	 */
1173 	flock.l_type = F_WRLCK;
1174 	flock.l_whence = SEEK_SET;
1175 	flock.l_start = (off_t)0;
1176 	flock.l_len = (off_t)0;
1177 	if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1178 		/*
1179 		 * Someone else raced us here and grabbed the lock file
1180 		 * first.  A warning here is inappropriate since nothing
1181 		 * went wrong.
1182 		 */
1183 		goto out;
1184 	}
1185 
1186 	if (strcmp(st.st_fstype, "namefs") == 0) {
1187 		struct door_info info;
1188 
1189 		/*
1190 		 * There is already something fattach()'ed to this file.
1191 		 * Lets see what the door is up to.
1192 		 */
1193 		if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1194 			/*
1195 			 * Another zoneadmd process seems to be in
1196 			 * control of the situation and we don't need to
1197 			 * be here.  A warning here is inappropriate
1198 			 * since nothing went wrong.
1199 			 *
1200 			 * If the door has been revoked, the zoneadmd
1201 			 * process currently managing the zone is going
1202 			 * away.  We'll return control to zoneadm(1m)
1203 			 * which will try again (by which time zoneadmd
1204 			 * will hopefully have exited).
1205 			 */
1206 			goto out;
1207 		}
1208 
1209 		/*
1210 		 * If we got this far, there's a fattach(3c)'ed door
1211 		 * that belongs to a process that has exited, which can
1212 		 * happen if the previous zoneadmd died unexpectedly.
1213 		 *
1214 		 * Let user know that something is amiss, but that we can
1215 		 * recover; if the zone is in the installed state, then don't
1216 		 * message, since having a running zoneadmd isn't really
1217 		 * expected/needed.  We want to keep occurences of this message
1218 		 * limited to times when zoneadmd is picking back up from a
1219 		 * zoneadmd that died while the zone was in some non-trivial
1220 		 * state.
1221 		 */
1222 		if (zstate > ZONE_STATE_INSTALLED) {
1223 			zerror(zlogp, B_FALSE,
1224 			    "zone '%s': WARNING: zone is in state '%s', but "
1225 			    "zoneadmd does not appear to be available; "
1226 			    "restarted zoneadmd to recover.",
1227 			    zone_name, zone_state_str(zstate));
1228 		}
1229 
1230 		(void) fdetach(zone_door_path);
1231 		(void) close(doorfd);
1232 		goto top;
1233 	}
1234 	ret = 0;
1235 out:
1236 	(void) close(doorfd);
1237 	return (ret);
1238 }
1239 
1240 int
1241 main(int argc, char *argv[])
1242 {
1243 	int opt;
1244 	zoneid_t zid;
1245 	priv_set_t *privset;
1246 	zone_state_t zstate;
1247 	char parents_locale[MAXPATHLEN];
1248 	int err;
1249 
1250 	pid_t pid;
1251 	sigset_t blockset;
1252 	sigset_t block_cld;
1253 
1254 	struct {
1255 		sema_t sem;
1256 		int status;
1257 		zlog_t log;
1258 	} *shstate;
1259 	size_t shstatelen = getpagesize();
1260 
1261 	zlog_t errlog;
1262 	zlog_t *zlogp;
1263 
1264 	int ctfd;
1265 
1266 	progname = get_execbasename(argv[0]);
1267 
1268 	/*
1269 	 * Make sure stderr is unbuffered
1270 	 */
1271 	(void) setbuffer(stderr, NULL, 0);
1272 
1273 	/*
1274 	 * Get out of the way of mounted filesystems, since we will daemonize
1275 	 * soon.
1276 	 */
1277 	(void) chdir("/");
1278 
1279 	/*
1280 	 * Use the default system umask per PSARC 1998/110 rather than
1281 	 * anything that may have been set by the caller.
1282 	 */
1283 	(void) umask(CMASK);
1284 
1285 	/*
1286 	 * Initially we want to use our parent's locale.
1287 	 */
1288 	(void) setlocale(LC_ALL, "");
1289 	(void) textdomain(TEXT_DOMAIN);
1290 	(void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
1291 	    sizeof (parents_locale));
1292 
1293 	/*
1294 	 * This zlog_t is used for writing to stderr
1295 	 */
1296 	errlog.logfile = stderr;
1297 	errlog.buflen = errlog.loglen = 0;
1298 	errlog.buf = errlog.log = NULL;
1299 	errlog.locale = parents_locale;
1300 
1301 	/*
1302 	 * We start off writing to stderr until we're ready to daemonize.
1303 	 */
1304 	zlogp = &errlog;
1305 
1306 	/*
1307 	 * Process options.
1308 	 */
1309 	while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
1310 		switch (opt) {
1311 		case 'R':
1312 			zonecfg_set_root(optarg);
1313 			break;
1314 		case 'z':
1315 			zone_name = optarg;
1316 			break;
1317 		default:
1318 			usage();
1319 		}
1320 	}
1321 
1322 	if (zone_name == NULL)
1323 		usage();
1324 
1325 	/*
1326 	 * Because usage() prints directly to stderr, it has gettext()
1327 	 * wrapping, which depends on the locale.  But since zerror() calls
1328 	 * localize() which tweaks the locale, it is not safe to call zerror()
1329 	 * until after the last call to usage().  Fortunately, the last call
1330 	 * to usage() is just above and the first call to zerror() is just
1331 	 * below.  Don't mess this up.
1332 	 */
1333 	if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
1334 		zerror(zlogp, B_FALSE, "cannot manage the %s zone",
1335 		    GLOBAL_ZONENAME);
1336 		return (1);
1337 	}
1338 
1339 	if (zone_get_id(zone_name, &zid) != 0) {
1340 		zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
1341 		    zonecfg_strerror(Z_NO_ZONE));
1342 		return (1);
1343 	}
1344 
1345 	if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1346 		zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1347 		    zonecfg_strerror(err));
1348 		return (1);
1349 	}
1350 	if (zstate < ZONE_STATE_INSTALLED) {
1351 		zerror(zlogp, B_FALSE,
1352 		    "cannot manage a zone which is in state '%s'",
1353 		    zone_state_str(zstate));
1354 		return (1);
1355 	}
1356 
1357 	/*
1358 	 * Check that we have all privileges.  It would be nice to pare
1359 	 * this down, but this is at least a first cut.
1360 	 */
1361 	if ((privset = priv_allocset()) == NULL) {
1362 		zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
1363 		return (1);
1364 	}
1365 
1366 	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1367 		zerror(zlogp, B_TRUE, "%s failed", "getppriv");
1368 		priv_freeset(privset);
1369 		return (1);
1370 	}
1371 
1372 	if (priv_isfullset(privset) == B_FALSE) {
1373 		zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
1374 		    "run this command (all privs required)");
1375 		priv_freeset(privset);
1376 		return (1);
1377 	}
1378 	priv_freeset(privset);
1379 
1380 	if (mkzonedir(zlogp) != 0)
1381 		return (1);
1382 
1383 	/*
1384 	 * Pre-fork: setup shared state
1385 	 */
1386 	if ((shstate = (void *)mmap(NULL, shstatelen,
1387 	    PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
1388 	    MAP_FAILED) {
1389 		zerror(zlogp, B_TRUE, "%s failed", "mmap");
1390 		return (1);
1391 	}
1392 	if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
1393 		zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
1394 		(void) munmap((char *)shstate, shstatelen);
1395 		return (1);
1396 	}
1397 	shstate->log.logfile = NULL;
1398 	shstate->log.buflen = shstatelen - sizeof (*shstate);
1399 	shstate->log.loglen = shstate->log.buflen;
1400 	shstate->log.buf = (char *)shstate + sizeof (*shstate);
1401 	shstate->log.log = shstate->log.buf;
1402 	shstate->log.locale = parents_locale;
1403 	shstate->status = -1;
1404 
1405 	/*
1406 	 * We need a SIGCHLD handler so the sema_wait() below will wake
1407 	 * up if the child dies without doing a sema_post().
1408 	 */
1409 	(void) sigset(SIGCHLD, sigchld);
1410 	/*
1411 	 * We must mask SIGCHLD until after we've coped with the fork
1412 	 * sufficiently to deal with it; otherwise we can race and
1413 	 * receive the signal before pid has been initialized
1414 	 * (yes, this really happens).
1415 	 */
1416 	(void) sigemptyset(&block_cld);
1417 	(void) sigaddset(&block_cld, SIGCHLD);
1418 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1419 
1420 	if ((ctfd = init_template()) == -1) {
1421 		zerror(zlogp, B_TRUE, "failed to create contract");
1422 		return (1);
1423 	}
1424 
1425 	/*
1426 	 * Do not let another thread localize a message while we are forking.
1427 	 */
1428 	(void) mutex_lock(&msglock);
1429 	pid = fork();
1430 	(void) mutex_unlock(&msglock);
1431 
1432 	/*
1433 	 * In all cases (parent, child, and in the event of an error) we
1434 	 * don't want to cause creation of contracts on subsequent fork()s.
1435 	 */
1436 	(void) ct_tmpl_clear(ctfd);
1437 	(void) close(ctfd);
1438 
1439 	if (pid == -1) {
1440 		zerror(zlogp, B_TRUE, "could not fork");
1441 		return (1);
1442 
1443 	} else if (pid > 0) { /* parent */
1444 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1445 		/*
1446 		 * This marks a window of vulnerability in which we receive
1447 		 * the SIGCLD before falling into sema_wait (normally we would
1448 		 * get woken up from sema_wait with EINTR upon receipt of
1449 		 * SIGCLD).  So we may need to use some other scheme like
1450 		 * sema_posting in the sigcld handler.
1451 		 * blech
1452 		 */
1453 		(void) sema_wait(&shstate->sem);
1454 		(void) sema_destroy(&shstate->sem);
1455 		if (shstate->status != 0)
1456 			(void) waitpid(pid, NULL, WNOHANG);
1457 		/*
1458 		 * It's ok if we die with SIGPIPE.  It's not like we could have
1459 		 * done anything about it.
1460 		 */
1461 		(void) fprintf(stderr, "%s", shstate->log.buf);
1462 		_exit(shstate->status == 0 ? 0 : 1);
1463 	}
1464 
1465 	/*
1466 	 * The child charges on.
1467 	 */
1468 	(void) sigset(SIGCHLD, SIG_DFL);
1469 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1470 
1471 	/*
1472 	 * SIGPIPE can be delivered if we write to a socket for which the
1473 	 * peer endpoint is gone.  That can lead to too-early termination
1474 	 * of zoneadmd, and that's not good eats.
1475 	 */
1476 	(void) sigset(SIGPIPE, SIG_IGN);
1477 	/*
1478 	 * Stop using stderr
1479 	 */
1480 	zlogp = &shstate->log;
1481 
1482 	/*
1483 	 * We don't need stdout/stderr from now on.
1484 	 */
1485 	closefrom(0);
1486 
1487 	/*
1488 	 * Initialize the syslog zlog_t.  This needs to be done after
1489 	 * the call to closefrom().
1490 	 */
1491 	logsys.buf = logsys.log = NULL;
1492 	logsys.buflen = logsys.loglen = 0;
1493 	logsys.logfile = NULL;
1494 	logsys.locale = DEFAULT_LOCALE;
1495 
1496 	openlog("zoneadmd", LOG_PID, LOG_DAEMON);
1497 
1498 	/*
1499 	 * The eventstream is used to publish state changes in the zone
1500 	 * from the door threads to the console I/O poller.
1501 	 */
1502 	if (eventstream_init() == -1) {
1503 		zerror(zlogp, B_TRUE, "unable to create eventstream");
1504 		goto child_out;
1505 	}
1506 
1507 	(void) snprintf(zone_door_path, sizeof (zone_door_path),
1508 	    "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
1509 
1510 	/*
1511 	 * See if another zoneadmd is running for this zone.  If not, then we
1512 	 * can now modify system state.
1513 	 */
1514 	if (make_daemon_exclusive(zlogp) == -1)
1515 		goto child_out;
1516 
1517 
1518 	/*
1519 	 * Create/join a new session; we need to be careful of what we do with
1520 	 * the console from now on so we don't end up being the session leader
1521 	 * for the terminal we're going to be handing out.
1522 	 */
1523 	(void) setsid();
1524 
1525 	/*
1526 	 * This thread shouldn't be receiving any signals; in particular,
1527 	 * SIGCHLD should be received by the thread doing the fork().
1528 	 */
1529 	(void) sigfillset(&blockset);
1530 	(void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
1531 
1532 	/*
1533 	 * Setup the console device and get ready to serve the console;
1534 	 * once this has completed, we're ready to let console clients
1535 	 * make an attempt to connect (they will block until
1536 	 * serve_console_sock() below gets called, and any pending
1537 	 * connection is accept()ed).
1538 	 */
1539 	if (!zonecfg_in_alt_root() && init_console(zlogp) == -1)
1540 		goto child_out;
1541 
1542 	/*
1543 	 * Take the lock now, so that when the door server gets going, we
1544 	 * are guaranteed that it won't take a request until we are sure
1545 	 * that everything is completely set up.  See the child_out: label
1546 	 * below to see why this matters.
1547 	 */
1548 	(void) mutex_lock(&lock);
1549 
1550 	/* Init semaphore for scratch zones. */
1551 	if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
1552 		zerror(zlogp, B_TRUE,
1553 		    "failed to initialize semaphore for scratch zone");
1554 		goto child_out;
1555 	}
1556 
1557 	/*
1558 	 * Note: door setup must occur *after* the console is setup.
1559 	 * This is so that as zlogin tests the door to see if zoneadmd
1560 	 * is ready yet, we know that the console will get serviced
1561 	 * once door_info() indicates that the door is "up".
1562 	 */
1563 	if (setup_door(zlogp) == -1)
1564 		goto child_out;
1565 
1566 	/*
1567 	 * Things seem OK so far; tell the parent process that we're done
1568 	 * with setup tasks.  This will cause the parent to exit, signalling
1569 	 * to zoneadm, zlogin, or whatever forked it that we are ready to
1570 	 * service requests.
1571 	 */
1572 	shstate->status = 0;
1573 	(void) sema_post(&shstate->sem);
1574 	(void) munmap((char *)shstate, shstatelen);
1575 	shstate = NULL;
1576 
1577 	(void) mutex_unlock(&lock);
1578 
1579 	/*
1580 	 * zlogp is now invalid, so reset it to the syslog logger.
1581 	 */
1582 	zlogp = &logsys;
1583 
1584 	/*
1585 	 * Now that we are free of any parents, switch to the default locale.
1586 	 */
1587 	(void) setlocale(LC_ALL, DEFAULT_LOCALE);
1588 
1589 	/*
1590 	 * At this point the setup portion of main() is basically done, so
1591 	 * we reuse this thread to manage the zone console.  When
1592 	 * serve_console() has returned, we are past the point of no return
1593 	 * in the life of this zoneadmd.
1594 	 */
1595 	if (zonecfg_in_alt_root()) {
1596 		/*
1597 		 * This is just awful, but mounted scratch zones don't (and
1598 		 * can't) have consoles.  We just wait for unmount instead.
1599 		 */
1600 		while (sema_wait(&scratch_sem) == EINTR)
1601 			;
1602 	} else {
1603 		serve_console(zlogp);
1604 		assert(in_death_throes);
1605 	}
1606 
1607 	/*
1608 	 * This is the next-to-last part of the exit interlock.  Upon calling
1609 	 * fdetach(), the door will go unreferenced; once any
1610 	 * outstanding requests (like the door thread doing Z_HALT) are
1611 	 * done, the door will get an UNREF notification; when it handles
1612 	 * the UNREF, the door server will cause the exit.
1613 	 */
1614 	assert(!MUTEX_HELD(&lock));
1615 	(void) fdetach(zone_door_path);
1616 	for (;;)
1617 		(void) pause();
1618 
1619 child_out:
1620 	assert(pid == 0);
1621 	if (shstate != NULL) {
1622 		shstate->status = -1;
1623 		(void) sema_post(&shstate->sem);
1624 		(void) munmap((char *)shstate, shstatelen);
1625 	}
1626 
1627 	/*
1628 	 * This might trigger an unref notification, but if so,
1629 	 * we are still holding the lock, so our call to exit will
1630 	 * ultimately win the race and will publish the right exit
1631 	 * code.
1632 	 */
1633 	if (zone_door != -1) {
1634 		assert(MUTEX_HELD(&lock));
1635 		(void) door_revoke(zone_door);
1636 		(void) fdetach(zone_door_path);
1637 	}
1638 	return (1); /* return from main() forcibly exits an MT process */
1639 }
1640