1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
25 */
26
27 /*
28 * zoneadmd manages zones; one zoneadmd process is launched for each
29 * non-global zone on the system. This daemon juggles four jobs:
30 *
31 * - Implement setup and teardown of the zone "virtual platform": mount and
32 * unmount filesystems; create and destroy network interfaces; communicate
33 * with devfsadmd to lay out devices for the zone; instantiate the zone
34 * console device; configure process runtime attributes such as resource
35 * controls, pool bindings, fine-grained privileges.
36 *
37 * - Launch the zone's init(1M) process.
38 *
39 * - Implement a door server; clients (like zoneadm) connect to the door
40 * server and request zone state changes. The kernel is also a client of
41 * this door server. A request to halt or reboot the zone which originates
42 * *inside* the zone results in a door upcall from the kernel into zoneadmd.
43 *
44 * One minor problem is that messages emitted by zoneadmd need to be passed
45 * back to the zoneadm process making the request. These messages need to
46 * be rendered in the client's locale; so, this is passed in as part of the
47 * request. The exception is the kernel upcall to zoneadmd, in which case
48 * messages are syslog'd.
49 *
50 * To make all of this work, the Makefile adds -a to xgettext to extract *all*
51 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those
52 * strings which do not need to be translated.
53 *
54 * - Act as a console server for zlogin -C processes; see comments in zcons.c
55 * for more information about the zone console architecture.
56 *
57 * DESIGN NOTES
58 *
59 * Restart:
60 * A chief design constraint of zoneadmd is that it should be restartable in
61 * the case that the administrator kills it off, or it suffers a fatal error,
62 * without the running zone being impacted; this is akin to being able to
63 * reboot the service processor of a server without affecting the OS instance.
64 */
65
66 #include <sys/param.h>
67 #include <sys/mman.h>
68 #include <sys/types.h>
69 #include <sys/stat.h>
70 #include <sys/sysmacros.h>
71
72 #include <bsm/adt.h>
73 #include <bsm/adt_event.h>
74
75 #include <alloca.h>
76 #include <assert.h>
77 #include <errno.h>
78 #include <door.h>
79 #include <fcntl.h>
80 #include <locale.h>
81 #include <signal.h>
82 #include <stdarg.h>
83 #include <stdio.h>
84 #include <stdlib.h>
85 #include <string.h>
86 #include <strings.h>
87 #include <synch.h>
88 #include <syslog.h>
89 #include <thread.h>
90 #include <unistd.h>
91 #include <wait.h>
92 #include <limits.h>
93 #include <zone.h>
94 #include <libbrand.h>
95 #include <sys/brand.h>
96 #include <libcontract.h>
97 #include <libcontract_priv.h>
98 #include <sys/brand.h>
99 #include <sys/contract/process.h>
100 #include <sys/ctfs.h>
101 #include <libdladm.h>
102 #include <sys/dls_mgmt.h>
103 #include <libscf.h>
104
105 #include <libzonecfg.h>
106 #include <zonestat_impl.h>
107 #include "zoneadmd.h"
108
109 static char *progname;
110 char *zone_name; /* zone which we are managing */
111 char pool_name[MAXNAMELEN];
112 char default_brand[MAXNAMELEN];
113 char brand_name[MAXNAMELEN];
114 boolean_t zone_isnative;
115 boolean_t zone_iscluster;
116 boolean_t zone_islabeled;
117 boolean_t shutdown_in_progress;
118 static zoneid_t zone_id;
119 dladm_handle_t dld_handle = NULL;
120
121 static char pre_statechg_hook[2 * MAXPATHLEN];
122 static char post_statechg_hook[2 * MAXPATHLEN];
123 char query_hook[2 * MAXPATHLEN];
124
125 zlog_t logsys;
126
127 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
128 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
129
130 static sema_t scratch_sem; /* for scratch zones */
131
132 static char zone_door_path[MAXPATHLEN];
133 static int zone_door = -1;
134
135 boolean_t in_death_throes = B_FALSE; /* daemon is dying */
136 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
137
138 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
139 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
140 #endif
141
142 #define DEFAULT_LOCALE "C"
143
144 static const char *
z_cmd_name(zone_cmd_t zcmd)145 z_cmd_name(zone_cmd_t zcmd)
146 {
147 /* This list needs to match the enum in sys/zone.h */
148 static const char *zcmdstr[] = {
149 "ready", "boot", "forceboot", "reboot", "halt",
150 "note_uninstalling", "mount", "forcemount", "unmount",
151 "shutdown"
152 };
153
154 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr))
155 return ("unknown");
156 else
157 return (zcmdstr[(int)zcmd]);
158 }
159
160 static char *
get_execbasename(char * execfullname)161 get_execbasename(char *execfullname)
162 {
163 char *last_slash, *execbasename;
164
165 /* guard against '/' at end of command invocation */
166 for (;;) {
167 last_slash = strrchr(execfullname, '/');
168 if (last_slash == NULL) {
169 execbasename = execfullname;
170 break;
171 } else {
172 execbasename = last_slash + 1;
173 if (*execbasename == '\0') {
174 *last_slash = '\0';
175 continue;
176 }
177 break;
178 }
179 }
180 return (execbasename);
181 }
182
183 static void
usage(void)184 usage(void)
185 {
186 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname);
187 (void) fprintf(stderr,
188 gettext("\tNote: %s should not be run directly.\n"), progname);
189 exit(2);
190 }
191
192 /* ARGSUSED */
193 static void
sigchld(int sig)194 sigchld(int sig)
195 {
196 }
197
198 char *
localize_msg(char * locale,const char * msg)199 localize_msg(char *locale, const char *msg)
200 {
201 char *out;
202
203 (void) mutex_lock(&msglock);
204 (void) setlocale(LC_MESSAGES, locale);
205 out = gettext(msg);
206 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE);
207 (void) mutex_unlock(&msglock);
208 return (out);
209 }
210
211 /* PRINTFLIKE3 */
212 void
zerror(zlog_t * zlogp,boolean_t use_strerror,const char * fmt,...)213 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
214 {
215 va_list alist;
216 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
217 char *bp;
218 int saved_errno = errno;
219
220 if (zlogp == NULL)
221 return;
222 if (zlogp == &logsys)
223 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
224 zone_name);
225 else
226 buf[0] = '\0';
227 bp = &(buf[strlen(buf)]);
228
229 /*
230 * In theory, the locale pointer should be set to either "C" or a
231 * char array, so it should never be NULL
232 */
233 assert(zlogp->locale != NULL);
234 /* Locale is per process, but we are multi-threaded... */
235 fmt = localize_msg(zlogp->locale, fmt);
236
237 va_start(alist, fmt);
238 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
239 va_end(alist);
240 bp = &(buf[strlen(buf)]);
241 if (use_strerror)
242 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
243 strerror(saved_errno));
244 if (zlogp == &logsys) {
245 (void) syslog(LOG_ERR, "%s", buf);
246 } else if (zlogp->logfile != NULL) {
247 (void) fprintf(zlogp->logfile, "%s\n", buf);
248 } else {
249 size_t buflen;
250 size_t copylen;
251
252 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
253 copylen = MIN(buflen, zlogp->loglen);
254 zlogp->log += copylen;
255 zlogp->loglen -= copylen;
256 }
257 }
258
259 /*
260 * Emit a warning for any boot arguments which are unrecognized. Since
261 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
262 * put the arguments into an argv style array, use getopt to process them,
263 * and put the resultant argument string back into outargs.
264 *
265 * During the filtering, we pull out any arguments which are truly "boot"
266 * arguments, leaving only those which are to be passed intact to the
267 * progenitor process. The one we support at the moment is -i, which
268 * indicates to the kernel which program should be launched as 'init'.
269 *
270 * A return of Z_INVAL indicates specifically that the arguments are
271 * not valid; this is a non-fatal error. Except for Z_OK, all other return
272 * values are treated as fatal.
273 */
274 static int
filter_bootargs(zlog_t * zlogp,const char * inargs,char * outargs,char * init_file,char * badarg)275 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
276 char *init_file, char *badarg)
277 {
278 int argc = 0, argc_save;
279 int i;
280 int err;
281 char *arg, *lasts, **argv = NULL, **argv_save;
282 char zonecfg_args[BOOTARGS_MAX];
283 char scratchargs[BOOTARGS_MAX], *sargs;
284 char c;
285
286 bzero(outargs, BOOTARGS_MAX);
287 bzero(badarg, BOOTARGS_MAX);
288
289 /*
290 * If the user didn't specify transient boot arguments, check
291 * to see if there were any specified in the zone configuration,
292 * and use them if applicable.
293 */
294 if (inargs == NULL || inargs[0] == '\0') {
295 zone_dochandle_t handle;
296 if ((handle = zonecfg_init_handle()) == NULL) {
297 zerror(zlogp, B_TRUE,
298 "getting zone configuration handle");
299 return (Z_BAD_HANDLE);
300 }
301 err = zonecfg_get_snapshot_handle(zone_name, handle);
302 if (err != Z_OK) {
303 zerror(zlogp, B_FALSE,
304 "invalid configuration snapshot");
305 zonecfg_fini_handle(handle);
306 return (Z_BAD_HANDLE);
307 }
308
309 bzero(zonecfg_args, sizeof (zonecfg_args));
310 (void) zonecfg_get_bootargs(handle, zonecfg_args,
311 sizeof (zonecfg_args));
312 inargs = zonecfg_args;
313 zonecfg_fini_handle(handle);
314 }
315
316 if (strlen(inargs) >= BOOTARGS_MAX) {
317 zerror(zlogp, B_FALSE, "boot argument string too long");
318 return (Z_INVAL);
319 }
320
321 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
322 sargs = scratchargs;
323 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
324 sargs = NULL;
325 argc++;
326 }
327
328 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) {
329 zerror(zlogp, B_FALSE, "memory allocation failed");
330 return (Z_NOMEM);
331 }
332
333 argv_save = argv;
334 argc_save = argc;
335
336 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs));
337 sargs = scratchargs;
338 i = 0;
339 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) {
340 sargs = NULL;
341 if ((argv[i] = strdup(arg)) == NULL) {
342 err = Z_NOMEM;
343 zerror(zlogp, B_FALSE, "memory allocation failed");
344 goto done;
345 }
346 i++;
347 }
348
349 /*
350 * We preserve compatibility with the Solaris system boot behavior,
351 * which allows:
352 *
353 * # reboot kernel/unix -s -m verbose
354 *
355 * In this example, kernel/unix tells the booter what file to
356 * boot. We don't want reboot in a zone to be gratuitously different,
357 * so we silently ignore the boot file, if necessary.
358 */
359 if (argv[0] == NULL)
360 goto done;
361
362 assert(argv[0][0] != ' ');
363 assert(argv[0][0] != '\t');
364
365 if (argv[0][0] != '-' && argv[0][0] != '\0') {
366 argv = &argv[1];
367 argc--;
368 }
369
370 optind = 0;
371 opterr = 0;
372 err = Z_OK;
373 while ((c = getopt(argc, argv, "fi:m:s")) != -1) {
374 switch (c) {
375 case 'i':
376 /*
377 * -i is handled by the runtime and is not passed
378 * along to userland
379 */
380 (void) strlcpy(init_file, optarg, MAXPATHLEN);
381 break;
382 case 'f':
383 /* This has already been processed by zoneadm */
384 break;
385 case 'm':
386 case 's':
387 /* These pass through unmolested */
388 (void) snprintf(outargs, BOOTARGS_MAX,
389 "%s -%c %s ", outargs, c, optarg ? optarg : "");
390 break;
391 case '?':
392 /*
393 * We warn about unknown arguments but pass them
394 * along anyway-- if someone wants to develop their
395 * own init replacement, they can pass it whatever
396 * args they want.
397 */
398 err = Z_INVAL;
399 (void) snprintf(outargs, BOOTARGS_MAX,
400 "%s -%c", outargs, optopt);
401 (void) snprintf(badarg, BOOTARGS_MAX,
402 "%s -%c", badarg, optopt);
403 break;
404 }
405 }
406
407 /*
408 * For Solaris Zones we warn about and discard non-option arguments.
409 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar
410 * to the kernel, we concat up all the other remaining boot args.
411 * and warn on them as a group.
412 */
413 if (optind < argc) {
414 err = Z_INVAL;
415 while (optind < argc) {
416 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
417 badarg, strlen(badarg) > 0 ? " " : "",
418 argv[optind]);
419 optind++;
420 }
421 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
422 "arguments `%s'.", badarg);
423 }
424
425 done:
426 for (i = 0; i < argc_save; i++) {
427 if (argv_save[i] != NULL)
428 free(argv_save[i]);
429 }
430 free(argv_save);
431 return (err);
432 }
433
434
435 static int
mkzonedir(zlog_t * zlogp)436 mkzonedir(zlog_t *zlogp)
437 {
438 struct stat st;
439 /*
440 * We must create and lock everyone but root out of ZONES_TMPDIR
441 * since anyone can open any UNIX domain socket, regardless of
442 * its file system permissions. Sigh...
443 */
444 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
445 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR);
446 return (-1);
447 }
448 /* paranoia */
449 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) {
450 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR);
451 return (-1);
452 }
453 (void) chmod(ZONES_TMPDIR, S_IRWXU);
454 return (0);
455 }
456
457 /*
458 * Run the brand's pre-state change callback, if it exists.
459 */
460 static int
brand_prestatechg(zlog_t * zlogp,int state,int cmd)461 brand_prestatechg(zlog_t *zlogp, int state, int cmd)
462 {
463 char cmdbuf[2 * MAXPATHLEN];
464 const char *altroot;
465
466 if (pre_statechg_hook[0] == '\0')
467 return (0);
468
469 altroot = zonecfg_get_root();
470 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook,
471 state, cmd, altroot) > sizeof (cmdbuf))
472 return (-1);
473
474 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
475 return (-1);
476
477 return (0);
478 }
479
480 /*
481 * Run the brand's post-state change callback, if it exists.
482 */
483 static int
brand_poststatechg(zlog_t * zlogp,int state,int cmd)484 brand_poststatechg(zlog_t *zlogp, int state, int cmd)
485 {
486 char cmdbuf[2 * MAXPATHLEN];
487 const char *altroot;
488
489 if (post_statechg_hook[0] == '\0')
490 return (0);
491
492 altroot = zonecfg_get_root();
493 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
494 state, cmd, altroot) > sizeof (cmdbuf))
495 return (-1);
496
497 if (do_subproc(zlogp, cmdbuf, NULL) != 0)
498 return (-1);
499
500 return (0);
501 }
502
503 /*
504 * Notify zonestatd of the new zone. If zonestatd is not running, this
505 * will do nothing.
506 */
507 static void
notify_zonestatd(zoneid_t zoneid)508 notify_zonestatd(zoneid_t zoneid)
509 {
510 int cmd[2];
511 int fd;
512 door_arg_t params;
513
514 fd = open(ZS_DOOR_PATH, O_RDONLY);
515 if (fd < 0)
516 return;
517
518 cmd[0] = ZSD_CMD_NEW_ZONE;
519 cmd[1] = zoneid;
520 params.data_ptr = (char *)&cmd;
521 params.data_size = sizeof (cmd);
522 params.desc_ptr = NULL;
523 params.desc_num = 0;
524 params.rbuf = NULL;
525 params.rsize = NULL;
526 (void) door_call(fd, ¶ms);
527 (void) close(fd);
528 }
529
530 /*
531 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
532 * 'true' if this is being invoked as part of the processing for the "mount"
533 * subcommand.
534 */
535 static int
zone_ready(zlog_t * zlogp,zone_mnt_t mount_cmd,int zstate)536 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
537 {
538 int err;
539
540 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
541 return (-1);
542
543 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
544 zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
545 zonecfg_strerror(err));
546 goto bad;
547 }
548
549 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
550 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
551 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
552 zonecfg_strerror(err));
553 goto bad;
554 }
555 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
556 bringup_failure_recovery = B_TRUE;
557 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
558 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
559 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
560 zonecfg_strerror(err));
561 goto bad;
562 }
563
564 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
565 goto bad;
566
567 return (0);
568
569 bad:
570 /*
571 * If something goes wrong, we up the zones's state to the target
572 * state, READY, and then invoke the hook as if we're halting.
573 */
574 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
575 return (-1);
576 }
577
578 int
init_template(void)579 init_template(void)
580 {
581 int fd;
582 int err = 0;
583
584 fd = open64(CTFS_ROOT "/process/template", O_RDWR);
585 if (fd == -1)
586 return (-1);
587
588 /*
589 * For now, zoneadmd doesn't do anything with the contract.
590 * Deliver no events, don't inherit, and allow it to be orphaned.
591 */
592 err |= ct_tmpl_set_critical(fd, 0);
593 err |= ct_tmpl_set_informative(fd, 0);
594 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
595 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
596 if (err || ct_tmpl_activate(fd)) {
597 (void) close(fd);
598 return (-1);
599 }
600
601 return (fd);
602 }
603
604 typedef struct fs_callback {
605 zlog_t *zlogp;
606 zoneid_t zoneid;
607 boolean_t mount_cmd;
608 } fs_callback_t;
609
610 static int
mount_early_fs(void * data,const char * spec,const char * dir,const char * fstype,const char * opt)611 mount_early_fs(void *data, const char *spec, const char *dir,
612 const char *fstype, const char *opt)
613 {
614 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp;
615 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid;
616 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd;
617 char rootpath[MAXPATHLEN];
618 pid_t child;
619 int child_status;
620 int tmpl_fd;
621 int rv;
622 ctid_t ct;
623
624 /* determine the zone rootpath */
625 if (mount_cmd) {
626 char zonepath[MAXPATHLEN];
627 char luroot[MAXPATHLEN];
628
629 if (zone_get_zonepath(zone_name,
630 zonepath, sizeof (zonepath)) != Z_OK) {
631 zerror(zlogp, B_FALSE, "unable to determine zone path");
632 return (-1);
633 }
634
635 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
636 resolve_lofs(zlogp, luroot, sizeof (luroot));
637 (void) strlcpy(rootpath, luroot, sizeof (rootpath));
638 } else {
639 if (zone_get_rootpath(zone_name,
640 rootpath, sizeof (rootpath)) != Z_OK) {
641 zerror(zlogp, B_FALSE, "unable to determine zone root");
642 return (-1);
643 }
644 }
645
646 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) {
647 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
648 rootpath, dir);
649 return (-1);
650 } else if (rv > 0) {
651 /* The mount point path doesn't exist, create it now. */
652 if (make_one_dir(zlogp, rootpath, dir,
653 DEFAULT_DIR_MODE, DEFAULT_DIR_USER,
654 DEFAULT_DIR_GROUP) != 0) {
655 zerror(zlogp, B_FALSE, "failed to create mount point");
656 return (-1);
657 }
658
659 /*
660 * Now this might seem weird, but we need to invoke
661 * valid_mount_path() again. Why? Because it checks
662 * to make sure that the mount point path is canonical,
663 * which it can only do if the path exists, so now that
664 * we've created the path we have to verify it again.
665 */
666 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir,
667 fstype)) < 0) {
668 zerror(zlogp, B_FALSE,
669 "%s%s is not a valid mount point", rootpath, dir);
670 return (-1);
671 }
672 }
673
674 if ((tmpl_fd = init_template()) == -1) {
675 zerror(zlogp, B_TRUE, "failed to create contract");
676 return (-1);
677 }
678
679 if ((child = fork()) == -1) {
680 (void) ct_tmpl_clear(tmpl_fd);
681 (void) close(tmpl_fd);
682 zerror(zlogp, B_TRUE, "failed to fork");
683 return (-1);
684
685 } else if (child == 0) { /* child */
686 char opt_buf[MAX_MNTOPT_STR];
687 int optlen = 0;
688 int mflag = MS_DATA;
689
690 (void) ct_tmpl_clear(tmpl_fd);
691 /*
692 * Even though there are no procs running in the zone, we
693 * do this for paranoia's sake.
694 */
695 (void) closefrom(0);
696
697 if (zone_enter(zoneid) == -1) {
698 _exit(errno);
699 }
700 if (opt != NULL) {
701 /*
702 * The mount() system call is incredibly annoying.
703 * If options are specified, we need to copy them
704 * into a temporary buffer since the mount() system
705 * call will overwrite the options string. It will
706 * also fail if the new option string it wants to
707 * write is bigger than the one we passed in, so
708 * you must pass in a buffer of the maximum possible
709 * option string length. sigh.
710 */
711 (void) strlcpy(opt_buf, opt, sizeof (opt_buf));
712 opt = opt_buf;
713 optlen = MAX_MNTOPT_STR;
714 mflag = MS_OPTIONSTR;
715 }
716 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
717 _exit(errno);
718 _exit(0);
719 }
720
721 /* parent */
722 if (contract_latest(&ct) == -1)
723 ct = -1;
724 (void) ct_tmpl_clear(tmpl_fd);
725 (void) close(tmpl_fd);
726 if (waitpid(child, &child_status, 0) != child) {
727 /* unexpected: we must have been signalled */
728 (void) contract_abandon_id(ct);
729 return (-1);
730 }
731 (void) contract_abandon_id(ct);
732 if (WEXITSTATUS(child_status) != 0) {
733 errno = WEXITSTATUS(child_status);
734 zerror(zlogp, B_TRUE, "mount of %s failed", dir);
735 return (-1);
736 }
737
738 return (0);
739 }
740
741 /*
742 * If retstr is not NULL, the output of the subproc is returned in the str,
743 * otherwise it is output using zerror(). Any memory allocated for retstr
744 * should be freed by the caller.
745 */
746 int
do_subproc(zlog_t * zlogp,char * cmdbuf,char ** retstr)747 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
748 {
749 char buf[1024]; /* arbitrary large amount */
750 char *inbuf;
751 FILE *file;
752 int status;
753 int rd_cnt;
754
755 if (retstr != NULL) {
756 if ((*retstr = malloc(1024)) == NULL) {
757 zerror(zlogp, B_FALSE, "out of memory");
758 return (-1);
759 }
760 inbuf = *retstr;
761 rd_cnt = 0;
762 } else {
763 inbuf = buf;
764 }
765
766 file = popen(cmdbuf, "r");
767 if (file == NULL) {
768 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
769 return (-1);
770 }
771
772 while (fgets(inbuf, 1024, file) != NULL) {
773 if (retstr == NULL) {
774 if (zlogp != &logsys)
775 zerror(zlogp, B_FALSE, "%s", inbuf);
776 } else {
777 char *p;
778
779 rd_cnt += 1024 - 1;
780 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
781 zerror(zlogp, B_FALSE, "out of memory");
782 (void) pclose(file);
783 return (-1);
784 }
785
786 *retstr = p;
787 inbuf = *retstr + rd_cnt;
788 }
789 }
790 status = pclose(file);
791
792 if (WIFSIGNALED(status)) {
793 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
794 "signal %d", cmdbuf, WTERMSIG(status));
795 return (-1);
796 }
797 assert(WIFEXITED(status));
798 if (WEXITSTATUS(status) == ZEXIT_EXEC) {
799 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf);
800 return (-1);
801 }
802 return (WEXITSTATUS(status));
803 }
804
805 static int
zone_bootup(zlog_t * zlogp,const char * bootargs,int zstate)806 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
807 {
808 zoneid_t zoneid;
809 struct stat st;
810 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
811 char nbootargs[BOOTARGS_MAX];
812 char cmdbuf[MAXPATHLEN];
813 fs_callback_t cb;
814 brand_handle_t bh;
815 zone_iptype_t iptype;
816 boolean_t links_loaded = B_FALSE;
817 dladm_status_t status;
818 char errmsg[DLADM_STRSIZE];
819 int err;
820 boolean_t restart_init;
821
822 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
823 return (-1);
824
825 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
826 zerror(zlogp, B_TRUE, "unable to get zoneid");
827 goto bad;
828 }
829
830 cb.zlogp = zlogp;
831 cb.zoneid = zoneid;
832 cb.mount_cmd = B_FALSE;
833
834 /* Get a handle to the brand info for this zone */
835 if ((bh = brand_open(brand_name)) == NULL) {
836 zerror(zlogp, B_FALSE, "unable to determine zone brand");
837 goto bad;
838 }
839
840 /*
841 * Get the list of filesystems to mount from the brand
842 * configuration. These mounts are done via a thread that will
843 * enter the zone, so they are done from within the context of the
844 * zone.
845 */
846 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) {
847 zerror(zlogp, B_FALSE, "unable to mount filesystems");
848 brand_close(bh);
849 goto bad;
850 }
851
852 /*
853 * Get the brand's boot callback if it exists.
854 */
855 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
856 zerror(zlogp, B_FALSE, "unable to determine zone path");
857 brand_close(bh);
858 goto bad;
859 }
860 (void) strcpy(cmdbuf, EXEC_PREFIX);
861 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
862 sizeof (cmdbuf) - EXEC_LEN) != 0) {
863 zerror(zlogp, B_FALSE,
864 "unable to determine branded zone's boot callback");
865 brand_close(bh);
866 goto bad;
867 }
868
869 /* Get the path for this zone's init(1M) (or equivalent) process. */
870 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
871 zerror(zlogp, B_FALSE,
872 "unable to determine zone's init(1M) location");
873 brand_close(bh);
874 goto bad;
875 }
876
877 /* See if this zone's brand should restart init if it dies. */
878 restart_init = brand_restartinit(bh);
879
880 brand_close(bh);
881
882 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
883 bad_boot_arg);
884 if (err == Z_INVAL)
885 eventstream_write(Z_EVT_ZONE_BADARGS);
886 else if (err != Z_OK)
887 goto bad;
888
889 assert(init_file[0] != '\0');
890
891 /* Try to anticipate possible problems: Make sure init is executable. */
892 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
893 zerror(zlogp, B_FALSE, "unable to determine zone root");
894 goto bad;
895 }
896
897 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file);
898
899 if (stat(initpath, &st) == -1) {
900 zerror(zlogp, B_TRUE, "could not stat %s", initpath);
901 goto bad;
902 }
903
904 if ((st.st_mode & S_IXUSR) == 0) {
905 zerror(zlogp, B_FALSE, "%s is not executable", initpath);
906 goto bad;
907 }
908
909 /*
910 * Exclusive stack zones interact with the dlmgmtd running in the
911 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is
912 * booting, and loads its datalinks from the zone's datalink
913 * configuration file.
914 */
915 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) {
916 status = dladm_zone_boot(dld_handle, zoneid);
917 if (status != DLADM_STATUS_OK) {
918 zerror(zlogp, B_FALSE, "unable to load zone datalinks: "
919 " %s", dladm_status2str(status, errmsg));
920 goto bad;
921 }
922 links_loaded = B_TRUE;
923 }
924
925 /*
926 * If there is a brand 'boot' callback, execute it now to give the
927 * brand one last chance to do any additional setup before the zone
928 * is booted.
929 */
930 if ((strlen(cmdbuf) > EXEC_LEN) &&
931 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
932 zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
933 goto bad;
934 }
935
936 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) {
937 zerror(zlogp, B_TRUE, "could not set zone boot file");
938 goto bad;
939 }
940
941 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) {
942 zerror(zlogp, B_TRUE, "could not set zone boot arguments");
943 goto bad;
944 }
945
946 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART,
947 NULL, 0) == -1) {
948 zerror(zlogp, B_TRUE, "could not set zone init-no-restart");
949 goto bad;
950 }
951
952 /*
953 * Inform zonestatd of a new zone so that it can install a door for
954 * the zone to contact it.
955 */
956 notify_zonestatd(zone_id);
957
958 if (zone_boot(zoneid) == -1) {
959 zerror(zlogp, B_TRUE, "unable to boot zone");
960 goto bad;
961 }
962
963 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
964 goto bad;
965
966 return (0);
967
968 bad:
969 /*
970 * If something goes wrong, we up the zones's state to the target
971 * state, RUNNING, and then invoke the hook as if we're halting.
972 */
973 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
974 if (links_loaded)
975 (void) dladm_zone_halt(dld_handle, zoneid);
976 return (-1);
977 }
978
979 static int
zone_halt(zlog_t * zlogp,boolean_t unmount_cmd,boolean_t rebooting,int zstate)980 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
981 {
982 int err;
983
984 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
985 return (-1);
986
987 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
988 if (!bringup_failure_recovery)
989 zerror(zlogp, B_FALSE, "unable to destroy zone");
990 return (-1);
991 }
992
993 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
994 zerror(zlogp, B_FALSE, "destroying snapshot: %s",
995 zonecfg_strerror(err));
996
997 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
998 return (-1);
999
1000 return (0);
1001 }
1002
1003 static int
zone_graceful_shutdown(zlog_t * zlogp)1004 zone_graceful_shutdown(zlog_t *zlogp)
1005 {
1006 zoneid_t zoneid;
1007 pid_t child;
1008 char cmdbuf[MAXPATHLEN];
1009 brand_handle_t bh = NULL;
1010 char zpath[MAXPATHLEN];
1011 ctid_t ct;
1012 int tmpl_fd;
1013 int child_status;
1014
1015 if (shutdown_in_progress) {
1016 zerror(zlogp, B_FALSE, "shutdown already in progress");
1017 return (-1);
1018 }
1019
1020 if ((zoneid = getzoneidbyname(zone_name)) == -1) {
1021 zerror(zlogp, B_TRUE, "unable to get zoneid");
1022 return (-1);
1023 }
1024
1025 /* Get a handle to the brand info for this zone */
1026 if ((bh = brand_open(brand_name)) == NULL) {
1027 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1028 return (-1);
1029 }
1030
1031 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
1032 zerror(zlogp, B_FALSE, "unable to determine zone path");
1033 brand_close(bh);
1034 return (-1);
1035 }
1036
1037 /*
1038 * If there is a brand 'shutdown' callback, execute it now to give the
1039 * brand a chance to cleanup any custom configuration.
1040 */
1041 (void) strcpy(cmdbuf, EXEC_PREFIX);
1042 if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
1043 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
1044 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
1045 }
1046 brand_close(bh);
1047
1048 if ((tmpl_fd = init_template()) == -1) {
1049 zerror(zlogp, B_TRUE, "failed to create contract");
1050 return (-1);
1051 }
1052
1053 if ((child = fork()) == -1) {
1054 (void) ct_tmpl_clear(tmpl_fd);
1055 (void) close(tmpl_fd);
1056 zerror(zlogp, B_TRUE, "failed to fork");
1057 return (-1);
1058 } else if (child == 0) {
1059 (void) ct_tmpl_clear(tmpl_fd);
1060 if (zone_enter(zoneid) == -1) {
1061 _exit(errno);
1062 }
1063 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL));
1064 }
1065
1066 if (contract_latest(&ct) == -1)
1067 ct = -1;
1068 (void) ct_tmpl_clear(tmpl_fd);
1069 (void) close(tmpl_fd);
1070
1071 if (waitpid(child, &child_status, 0) != child) {
1072 /* unexpected: we must have been signalled */
1073 (void) contract_abandon_id(ct);
1074 return (-1);
1075 }
1076
1077 (void) contract_abandon_id(ct);
1078 if (WEXITSTATUS(child_status) != 0) {
1079 errno = WEXITSTATUS(child_status);
1080 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1081 return (-1);
1082 }
1083
1084 shutdown_in_progress = B_TRUE;
1085
1086 return (0);
1087 }
1088
1089 static int
zone_wait_shutdown(zlog_t * zlogp)1090 zone_wait_shutdown(zlog_t *zlogp)
1091 {
1092 zone_state_t zstate;
1093 uint64_t *tm = NULL;
1094 scf_simple_prop_t *prop = NULL;
1095 int timeout;
1096 int tries;
1097 int rc = -1;
1098
1099 /* Get default stop timeout from SMF framework */
1100 timeout = SHUTDOWN_WAIT;
1101 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop",
1102 SCF_PROPERTY_TIMEOUT)) != NULL) {
1103 if ((tm = scf_simple_prop_next_count(prop)) != NULL) {
1104 if (tm != 0)
1105 timeout = *tm;
1106 }
1107 scf_simple_prop_free(prop);
1108 }
1109
1110 /* allow time for zone to shutdown cleanly */
1111 for (tries = 0; tries < timeout; tries ++) {
1112 (void) sleep(1);
1113 if (zone_get_state(zone_name, &zstate) == Z_OK &&
1114 zstate == ZONE_STATE_INSTALLED) {
1115 rc = 0;
1116 break;
1117 }
1118 }
1119
1120 if (rc != 0)
1121 zerror(zlogp, B_FALSE, "unable to shutdown zone");
1122
1123 shutdown_in_progress = B_FALSE;
1124
1125 return (rc);
1126 }
1127
1128
1129
1130 /*
1131 * Generate AUE_zone_state for a command that boots a zone.
1132 */
1133 static void
audit_put_record(zlog_t * zlogp,ucred_t * uc,int return_val,char * new_state)1134 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
1135 char *new_state)
1136 {
1137 adt_session_data_t *ah;
1138 adt_event_data_t *event;
1139 int pass_fail, fail_reason;
1140
1141 if (!adt_audit_enabled())
1142 return;
1143
1144 if (return_val == 0) {
1145 pass_fail = ADT_SUCCESS;
1146 fail_reason = ADT_SUCCESS;
1147 } else {
1148 pass_fail = ADT_FAILURE;
1149 fail_reason = ADT_FAIL_VALUE_PROGRAM;
1150 }
1151
1152 if (adt_start_session(&ah, NULL, 0)) {
1153 zerror(zlogp, B_TRUE, gettext("audit failure."));
1154 return;
1155 }
1156 if (adt_set_from_ucred(ah, uc, ADT_NEW)) {
1157 zerror(zlogp, B_TRUE, gettext("audit failure."));
1158 (void) adt_end_session(ah);
1159 return;
1160 }
1161
1162 event = adt_alloc_event(ah, ADT_zone_state);
1163 if (event == NULL) {
1164 zerror(zlogp, B_TRUE, gettext("audit failure."));
1165 (void) adt_end_session(ah);
1166 return;
1167 }
1168 event->adt_zone_state.zonename = zone_name;
1169 event->adt_zone_state.new_state = new_state;
1170
1171 if (adt_put_event(event, pass_fail, fail_reason))
1172 zerror(zlogp, B_TRUE, gettext("audit failure."));
1173
1174 adt_free_event(event);
1175
1176 (void) adt_end_session(ah);
1177 }
1178
1179 /*
1180 * The main routine for the door server that deals with zone state transitions.
1181 */
1182 /* ARGSUSED */
1183 static void
server(void * cookie,char * args,size_t alen,door_desc_t * dp,uint_t n_desc)1184 server(void *cookie, char *args, size_t alen, door_desc_t *dp,
1185 uint_t n_desc)
1186 {
1187 ucred_t *uc = NULL;
1188 const priv_set_t *eset;
1189
1190 zone_state_t zstate;
1191 zone_cmd_t cmd;
1192 zone_cmd_arg_t *zargp;
1193
1194 boolean_t kernelcall;
1195
1196 int rval = -1;
1197 uint64_t uniqid;
1198 zoneid_t zoneid = -1;
1199 zlog_t zlog;
1200 zlog_t *zlogp;
1201 zone_cmd_rval_t *rvalp;
1202 size_t rlen = getpagesize(); /* conservative */
1203 fs_callback_t cb;
1204 brand_handle_t bh;
1205 boolean_t wait_shut = B_FALSE;
1206
1207 /* LINTED E_BAD_PTR_CAST_ALIGN */
1208 zargp = (zone_cmd_arg_t *)args;
1209
1210 /*
1211 * When we get the door unref message, we've fdetach'd the door, and
1212 * it is time for us to shut down zoneadmd.
1213 */
1214 if (zargp == DOOR_UNREF_DATA) {
1215 /*
1216 * See comment at end of main() for info on the last rites.
1217 */
1218 exit(0);
1219 }
1220
1221 if (zargp == NULL) {
1222 (void) door_return(NULL, 0, 0, 0);
1223 }
1224
1225 rvalp = alloca(rlen);
1226 bzero(rvalp, rlen);
1227 zlog.logfile = NULL;
1228 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1;
1229 zlog.buf = rvalp->errbuf;
1230 zlog.log = zlog.buf;
1231 /* defer initialization of zlog.locale until after credential check */
1232 zlogp = &zlog;
1233
1234 if (alen != sizeof (zone_cmd_arg_t)) {
1235 /*
1236 * This really shouldn't be happening.
1237 */
1238 zerror(&logsys, B_FALSE, "argument size (%d bytes) "
1239 "unexpected (expected %d bytes)", alen,
1240 sizeof (zone_cmd_arg_t));
1241 goto out;
1242 }
1243 cmd = zargp->cmd;
1244
1245 if (door_ucred(&uc) != 0) {
1246 zerror(&logsys, B_TRUE, "door_ucred");
1247 goto out;
1248 }
1249 eset = ucred_getprivset(uc, PRIV_EFFECTIVE);
1250 if (ucred_getzoneid(uc) != GLOBAL_ZONEID ||
1251 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) :
1252 ucred_geteuid(uc) != 0)) {
1253 zerror(&logsys, B_FALSE, "insufficient privileges");
1254 goto out;
1255 }
1256
1257 kernelcall = ucred_getpid(uc) == 0;
1258
1259 /*
1260 * This is safe because we only use a zlog_t throughout the
1261 * duration of a door call; i.e., by the time the pointer
1262 * might become invalid, the door call would be over.
1263 */
1264 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale;
1265
1266 (void) mutex_lock(&lock);
1267
1268 /*
1269 * Once we start to really die off, we don't want more connections.
1270 */
1271 if (in_death_throes) {
1272 (void) mutex_unlock(&lock);
1273 ucred_free(uc);
1274 (void) door_return(NULL, 0, 0, 0);
1275 thr_exit(NULL);
1276 }
1277
1278 /*
1279 * Check for validity of command.
1280 */
1281 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT &&
1282 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT &&
1283 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT &&
1284 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) {
1285 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd);
1286 goto out;
1287 }
1288
1289 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) {
1290 /*
1291 * Can't happen
1292 */
1293 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d",
1294 cmd);
1295 goto out;
1296 }
1297 /*
1298 * We ignore the possibility of someone calling zone_create(2)
1299 * explicitly; all requests must come through zoneadmd.
1300 */
1301 if (zone_get_state(zone_name, &zstate) != Z_OK) {
1302 /*
1303 * Something terribly wrong happened
1304 */
1305 zerror(&logsys, B_FALSE, "unable to determine state of zone");
1306 goto out;
1307 }
1308
1309 if (kernelcall) {
1310 /*
1311 * Kernel-initiated requests may lose their validity if the
1312 * zone_t the kernel was referring to has gone away.
1313 */
1314 if ((zoneid = getzoneidbyname(zone_name)) == -1 ||
1315 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid,
1316 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) {
1317 /*
1318 * We're not talking about the same zone. The request
1319 * must have arrived too late. Return error.
1320 */
1321 rval = -1;
1322 goto out;
1323 }
1324 zlogp = &logsys; /* Log errors to syslog */
1325 }
1326
1327 /*
1328 * If we are being asked to forcibly mount or boot a zone, we
1329 * pretend that an INCOMPLETE zone is actually INSTALLED.
1330 */
1331 if (zstate == ZONE_STATE_INCOMPLETE &&
1332 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT))
1333 zstate = ZONE_STATE_INSTALLED;
1334
1335 switch (zstate) {
1336 case ZONE_STATE_CONFIGURED:
1337 case ZONE_STATE_INCOMPLETE:
1338 /*
1339 * Not our area of expertise; we just print a nice message
1340 * and die off.
1341 */
1342 zerror(zlogp, B_FALSE,
1343 "%s operation is invalid for zones in state '%s'",
1344 z_cmd_name(cmd), zone_state_str(zstate));
1345 break;
1346
1347 case ZONE_STATE_INSTALLED:
1348 switch (cmd) {
1349 case Z_READY:
1350 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
1351 if (rval == 0)
1352 eventstream_write(Z_EVT_ZONE_READIED);
1353 break;
1354 case Z_BOOT:
1355 case Z_FORCEBOOT:
1356 eventstream_write(Z_EVT_ZONE_BOOTING);
1357 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1358 == 0) {
1359 rval = zone_bootup(zlogp, zargp->bootbuf,
1360 zstate);
1361 }
1362 audit_put_record(zlogp, uc, rval, "boot");
1363 if (rval != 0) {
1364 bringup_failure_recovery = B_TRUE;
1365 (void) zone_halt(zlogp, B_FALSE, B_FALSE,
1366 zstate);
1367 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1368 }
1369 break;
1370 case Z_SHUTDOWN:
1371 case Z_HALT:
1372 if (kernelcall) /* Invalid; can't happen */
1373 abort();
1374 /*
1375 * We could have two clients racing to halt this
1376 * zone; the second client loses, but his request
1377 * doesn't fail, since the zone is now in the desired
1378 * state.
1379 */
1380 zerror(zlogp, B_FALSE, "zone is already halted");
1381 rval = 0;
1382 break;
1383 case Z_REBOOT:
1384 if (kernelcall) /* Invalid; can't happen */
1385 abort();
1386 zerror(zlogp, B_FALSE, "%s operation is invalid "
1387 "for zones in state '%s'", z_cmd_name(cmd),
1388 zone_state_str(zstate));
1389 rval = -1;
1390 break;
1391 case Z_NOTE_UNINSTALLING:
1392 if (kernelcall) /* Invalid; can't happen */
1393 abort();
1394 /*
1395 * Tell the console to print out a message about this.
1396 * Once it does, we will be in_death_throes.
1397 */
1398 eventstream_write(Z_EVT_ZONE_UNINSTALLING);
1399 break;
1400 case Z_MOUNT:
1401 case Z_FORCEMOUNT:
1402 if (kernelcall) /* Invalid; can't happen */
1403 abort();
1404 if (!zone_isnative && !zone_iscluster &&
1405 !zone_islabeled) {
1406 /*
1407 * -U mounts the zone without lofs mounting
1408 * zone file systems back into the scratch
1409 * zone. This is required when mounting
1410 * non-native branded zones.
1411 */
1412 (void) strlcpy(zargp->bootbuf, "-U",
1413 BOOTARGS_MAX);
1414 }
1415
1416 rval = zone_ready(zlogp,
1417 strcmp(zargp->bootbuf, "-U") == 0 ?
1418 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
1419 if (rval != 0)
1420 break;
1421
1422 eventstream_write(Z_EVT_ZONE_READIED);
1423
1424 /*
1425 * Get a handle to the default brand info.
1426 * We must always use the default brand file system
1427 * list when mounting the zone.
1428 */
1429 if ((bh = brand_open(default_brand)) == NULL) {
1430 rval = -1;
1431 break;
1432 }
1433
1434 /*
1435 * Get the list of filesystems to mount from
1436 * the brand configuration. These mounts are done
1437 * via a thread that will enter the zone, so they
1438 * are done from within the context of the zone.
1439 */
1440 cb.zlogp = zlogp;
1441 cb.zoneid = zone_id;
1442 cb.mount_cmd = B_TRUE;
1443 rval = brand_platform_iter_mounts(bh,
1444 mount_early_fs, &cb);
1445
1446 brand_close(bh);
1447
1448 /*
1449 * Ordinarily, /dev/fd would be mounted inside the zone
1450 * by svc:/system/filesystem/usr:default, but since
1451 * we're not booting the zone, we need to do this
1452 * manually.
1453 */
1454 if (rval == 0)
1455 rval = mount_early_fs(&cb,
1456 "fd", "/dev/fd", "fd", NULL);
1457 break;
1458 case Z_UNMOUNT:
1459 if (kernelcall) /* Invalid; can't happen */
1460 abort();
1461 zerror(zlogp, B_FALSE, "zone is already unmounted");
1462 rval = 0;
1463 break;
1464 }
1465 break;
1466
1467 case ZONE_STATE_READY:
1468 switch (cmd) {
1469 case Z_READY:
1470 /*
1471 * We could have two clients racing to ready this
1472 * zone; the second client loses, but his request
1473 * doesn't fail, since the zone is now in the desired
1474 * state.
1475 */
1476 zerror(zlogp, B_FALSE, "zone is already ready");
1477 rval = 0;
1478 break;
1479 case Z_BOOT:
1480 (void) strlcpy(boot_args, zargp->bootbuf,
1481 sizeof (boot_args));
1482 eventstream_write(Z_EVT_ZONE_BOOTING);
1483 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1484 audit_put_record(zlogp, uc, rval, "boot");
1485 if (rval != 0) {
1486 bringup_failure_recovery = B_TRUE;
1487 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1488 zstate);
1489 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1490 }
1491 boot_args[0] = '\0';
1492 break;
1493 case Z_HALT:
1494 if (kernelcall) /* Invalid; can't happen */
1495 abort();
1496 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1497 != 0)
1498 break;
1499 eventstream_write(Z_EVT_ZONE_HALTED);
1500 break;
1501 case Z_SHUTDOWN:
1502 case Z_REBOOT:
1503 case Z_NOTE_UNINSTALLING:
1504 case Z_MOUNT:
1505 case Z_UNMOUNT:
1506 if (kernelcall) /* Invalid; can't happen */
1507 abort();
1508 zerror(zlogp, B_FALSE, "%s operation is invalid "
1509 "for zones in state '%s'", z_cmd_name(cmd),
1510 zone_state_str(zstate));
1511 rval = -1;
1512 break;
1513 }
1514 break;
1515
1516 case ZONE_STATE_MOUNTED:
1517 switch (cmd) {
1518 case Z_UNMOUNT:
1519 if (kernelcall) /* Invalid; can't happen */
1520 abort();
1521 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
1522 if (rval == 0) {
1523 eventstream_write(Z_EVT_ZONE_HALTED);
1524 (void) sema_post(&scratch_sem);
1525 }
1526 break;
1527 default:
1528 if (kernelcall) /* Invalid; can't happen */
1529 abort();
1530 zerror(zlogp, B_FALSE, "%s operation is invalid "
1531 "for zones in state '%s'", z_cmd_name(cmd),
1532 zone_state_str(zstate));
1533 rval = -1;
1534 break;
1535 }
1536 break;
1537
1538 case ZONE_STATE_RUNNING:
1539 case ZONE_STATE_SHUTTING_DOWN:
1540 case ZONE_STATE_DOWN:
1541 switch (cmd) {
1542 case Z_READY:
1543 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1544 != 0)
1545 break;
1546 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
1547 eventstream_write(Z_EVT_ZONE_READIED);
1548 else
1549 eventstream_write(Z_EVT_ZONE_HALTED);
1550 break;
1551 case Z_BOOT:
1552 /*
1553 * We could have two clients racing to boot this
1554 * zone; the second client loses, but his request
1555 * doesn't fail, since the zone is now in the desired
1556 * state.
1557 */
1558 zerror(zlogp, B_FALSE, "zone is already booted");
1559 rval = 0;
1560 break;
1561 case Z_HALT:
1562 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
1563 != 0)
1564 break;
1565 eventstream_write(Z_EVT_ZONE_HALTED);
1566 break;
1567 case Z_REBOOT:
1568 (void) strlcpy(boot_args, zargp->bootbuf,
1569 sizeof (boot_args));
1570 eventstream_write(Z_EVT_ZONE_REBOOTING);
1571 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
1572 != 0) {
1573 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1574 boot_args[0] = '\0';
1575 break;
1576 }
1577 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
1578 != 0) {
1579 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1580 boot_args[0] = '\0';
1581 break;
1582 }
1583 rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
1584 audit_put_record(zlogp, uc, rval, "reboot");
1585 if (rval != 0) {
1586 (void) zone_halt(zlogp, B_FALSE, B_TRUE,
1587 zstate);
1588 eventstream_write(Z_EVT_ZONE_BOOTFAILED);
1589 }
1590 boot_args[0] = '\0';
1591 break;
1592 case Z_SHUTDOWN:
1593 if ((rval = zone_graceful_shutdown(zlogp)) == 0) {
1594 wait_shut = B_TRUE;
1595 }
1596 break;
1597 case Z_NOTE_UNINSTALLING:
1598 case Z_MOUNT:
1599 case Z_UNMOUNT:
1600 zerror(zlogp, B_FALSE, "%s operation is invalid "
1601 "for zones in state '%s'", z_cmd_name(cmd),
1602 zone_state_str(zstate));
1603 rval = -1;
1604 break;
1605 }
1606 break;
1607 default:
1608 abort();
1609 }
1610
1611 /*
1612 * Because the state of the zone may have changed, we make sure
1613 * to wake the console poller, which is in charge of initiating
1614 * the shutdown procedure as necessary.
1615 */
1616 eventstream_write(Z_EVT_NULL);
1617
1618 out:
1619 (void) mutex_unlock(&lock);
1620
1621 /* Wait for the Z_SHUTDOWN commands to complete */
1622 if (wait_shut)
1623 rval = zone_wait_shutdown(zlogp);
1624
1625 if (kernelcall) {
1626 rvalp = NULL;
1627 rlen = 0;
1628 } else {
1629 rvalp->rval = rval;
1630 }
1631 if (uc != NULL)
1632 ucred_free(uc);
1633 (void) door_return((char *)rvalp, rlen, NULL, 0);
1634 thr_exit(NULL);
1635 }
1636
1637 static int
setup_door(zlog_t * zlogp)1638 setup_door(zlog_t *zlogp)
1639 {
1640 if ((zone_door = door_create(server, NULL,
1641 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) {
1642 zerror(zlogp, B_TRUE, "%s failed", "door_create");
1643 return (-1);
1644 }
1645 (void) fdetach(zone_door_path);
1646
1647 if (fattach(zone_door, zone_door_path) != 0) {
1648 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path);
1649 (void) door_revoke(zone_door);
1650 (void) fdetach(zone_door_path);
1651 zone_door = -1;
1652 return (-1);
1653 }
1654 return (0);
1655 }
1656
1657 /*
1658 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this
1659 * is where zoneadmd itself will check to see that another instance of
1660 * zoneadmd isn't already controlling this zone.
1661 *
1662 * The idea here is that we want to open the path to which we will
1663 * attach our door, lock it, and then make sure that no-one has beat us
1664 * to fattach(3c)ing onto it.
1665 *
1666 * fattach(3c) is really a mount, so there are actually two possible
1667 * vnodes we could be dealing with. Our strategy is as follows:
1668 *
1669 * - If the file we opened is a regular file (common case):
1670 * There is no fattach(3c)ed door, so we have a chance of becoming
1671 * the managing zoneadmd. We attempt to lock the file: if it is
1672 * already locked, that means someone else raced us here, so we
1673 * lose and give up. zoneadm(1m) will try to contact the zoneadmd
1674 * that beat us to it.
1675 *
1676 * - If the file we opened is a namefs file:
1677 * This means there is already an established door fattach(3c)'ed
1678 * to the rendezvous path. We've lost the race, so we give up.
1679 * Note that in this case we also try to grab the file lock, and
1680 * will succeed in acquiring it since the vnode locked by the
1681 * "winning" zoneadmd was a regular one, and the one we locked was
1682 * the fattach(3c)'ed door node. At any rate, no harm is done, and
1683 * we just return to zoneadm(1m) which knows to retry.
1684 */
1685 static int
make_daemon_exclusive(zlog_t * zlogp)1686 make_daemon_exclusive(zlog_t *zlogp)
1687 {
1688 int doorfd = -1;
1689 int err, ret = -1;
1690 struct stat st;
1691 struct flock flock;
1692 zone_state_t zstate;
1693
1694 top:
1695 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1696 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1697 zonecfg_strerror(err));
1698 goto out;
1699 }
1700 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR,
1701 S_IREAD|S_IWRITE)) < 0) {
1702 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path);
1703 goto out;
1704 }
1705 if (fstat(doorfd, &st) < 0) {
1706 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path);
1707 goto out;
1708 }
1709 /*
1710 * Lock the file to synchronize with other zoneadmd
1711 */
1712 flock.l_type = F_WRLCK;
1713 flock.l_whence = SEEK_SET;
1714 flock.l_start = (off_t)0;
1715 flock.l_len = (off_t)0;
1716 if (fcntl(doorfd, F_SETLK, &flock) < 0) {
1717 /*
1718 * Someone else raced us here and grabbed the lock file
1719 * first. A warning here is inappropriate since nothing
1720 * went wrong.
1721 */
1722 goto out;
1723 }
1724
1725 if (strcmp(st.st_fstype, "namefs") == 0) {
1726 struct door_info info;
1727
1728 /*
1729 * There is already something fattach()'ed to this file.
1730 * Lets see what the door is up to.
1731 */
1732 if (door_info(doorfd, &info) == 0 && info.di_target != -1) {
1733 /*
1734 * Another zoneadmd process seems to be in
1735 * control of the situation and we don't need to
1736 * be here. A warning here is inappropriate
1737 * since nothing went wrong.
1738 *
1739 * If the door has been revoked, the zoneadmd
1740 * process currently managing the zone is going
1741 * away. We'll return control to zoneadm(1m)
1742 * which will try again (by which time zoneadmd
1743 * will hopefully have exited).
1744 */
1745 goto out;
1746 }
1747
1748 /*
1749 * If we got this far, there's a fattach(3c)'ed door
1750 * that belongs to a process that has exited, which can
1751 * happen if the previous zoneadmd died unexpectedly.
1752 *
1753 * Let user know that something is amiss, but that we can
1754 * recover; if the zone is in the installed state, then don't
1755 * message, since having a running zoneadmd isn't really
1756 * expected/needed. We want to keep occurences of this message
1757 * limited to times when zoneadmd is picking back up from a
1758 * zoneadmd that died while the zone was in some non-trivial
1759 * state.
1760 */
1761 if (zstate > ZONE_STATE_INSTALLED) {
1762 zerror(zlogp, B_FALSE,
1763 "zone '%s': WARNING: zone is in state '%s', but "
1764 "zoneadmd does not appear to be available; "
1765 "restarted zoneadmd to recover.",
1766 zone_name, zone_state_str(zstate));
1767 }
1768
1769 (void) fdetach(zone_door_path);
1770 (void) close(doorfd);
1771 goto top;
1772 }
1773 ret = 0;
1774 out:
1775 (void) close(doorfd);
1776 return (ret);
1777 }
1778
1779 /*
1780 * Setup the brand's pre and post state change callbacks, as well as the
1781 * query callback, if any of these exist.
1782 */
1783 static int
brand_callback_init(brand_handle_t bh,char * zone_name)1784 brand_callback_init(brand_handle_t bh, char *zone_name)
1785 {
1786 char zpath[MAXPATHLEN];
1787
1788 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK)
1789 return (-1);
1790
1791 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
1792 sizeof (pre_statechg_hook));
1793
1794 if (brand_get_prestatechange(bh, zone_name, zpath,
1795 pre_statechg_hook + EXEC_LEN,
1796 sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
1797 return (-1);
1798
1799 if (strlen(pre_statechg_hook) <= EXEC_LEN)
1800 pre_statechg_hook[0] = '\0';
1801
1802 (void) strlcpy(post_statechg_hook, EXEC_PREFIX,
1803 sizeof (post_statechg_hook));
1804
1805 if (brand_get_poststatechange(bh, zone_name, zpath,
1806 post_statechg_hook + EXEC_LEN,
1807 sizeof (post_statechg_hook) - EXEC_LEN) != 0)
1808 return (-1);
1809
1810 if (strlen(post_statechg_hook) <= EXEC_LEN)
1811 post_statechg_hook[0] = '\0';
1812
1813 (void) strlcpy(query_hook, EXEC_PREFIX,
1814 sizeof (query_hook));
1815
1816 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN,
1817 sizeof (query_hook) - EXEC_LEN) != 0)
1818 return (-1);
1819
1820 if (strlen(query_hook) <= EXEC_LEN)
1821 query_hook[0] = '\0';
1822
1823 return (0);
1824 }
1825
1826 int
main(int argc,char * argv[])1827 main(int argc, char *argv[])
1828 {
1829 int opt;
1830 zoneid_t zid;
1831 priv_set_t *privset;
1832 zone_state_t zstate;
1833 char parents_locale[MAXPATHLEN];
1834 brand_handle_t bh;
1835 int err;
1836
1837 pid_t pid;
1838 sigset_t blockset;
1839 sigset_t block_cld;
1840
1841 struct {
1842 sema_t sem;
1843 int status;
1844 zlog_t log;
1845 } *shstate;
1846 size_t shstatelen = getpagesize();
1847
1848 zlog_t errlog;
1849 zlog_t *zlogp;
1850
1851 int ctfd;
1852
1853 progname = get_execbasename(argv[0]);
1854
1855 /*
1856 * Make sure stderr is unbuffered
1857 */
1858 (void) setbuffer(stderr, NULL, 0);
1859
1860 /*
1861 * Get out of the way of mounted filesystems, since we will daemonize
1862 * soon.
1863 */
1864 (void) chdir("/");
1865
1866 /*
1867 * Use the default system umask per PSARC 1998/110 rather than
1868 * anything that may have been set by the caller.
1869 */
1870 (void) umask(CMASK);
1871
1872 /*
1873 * Initially we want to use our parent's locale.
1874 */
1875 (void) setlocale(LC_ALL, "");
1876 (void) textdomain(TEXT_DOMAIN);
1877 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL),
1878 sizeof (parents_locale));
1879
1880 /*
1881 * This zlog_t is used for writing to stderr
1882 */
1883 errlog.logfile = stderr;
1884 errlog.buflen = errlog.loglen = 0;
1885 errlog.buf = errlog.log = NULL;
1886 errlog.locale = parents_locale;
1887
1888 /*
1889 * We start off writing to stderr until we're ready to daemonize.
1890 */
1891 zlogp = &errlog;
1892
1893 /*
1894 * Process options.
1895 */
1896 while ((opt = getopt(argc, argv, "R:z:")) != EOF) {
1897 switch (opt) {
1898 case 'R':
1899 zonecfg_set_root(optarg);
1900 break;
1901 case 'z':
1902 zone_name = optarg;
1903 break;
1904 default:
1905 usage();
1906 }
1907 }
1908
1909 if (zone_name == NULL)
1910 usage();
1911
1912 /*
1913 * Because usage() prints directly to stderr, it has gettext()
1914 * wrapping, which depends on the locale. But since zerror() calls
1915 * localize() which tweaks the locale, it is not safe to call zerror()
1916 * until after the last call to usage(). Fortunately, the last call
1917 * to usage() is just above and the first call to zerror() is just
1918 * below. Don't mess this up.
1919 */
1920 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) {
1921 zerror(zlogp, B_FALSE, "cannot manage the %s zone",
1922 GLOBAL_ZONENAME);
1923 return (1);
1924 }
1925
1926 if (zone_get_id(zone_name, &zid) != 0) {
1927 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name,
1928 zonecfg_strerror(Z_NO_ZONE));
1929 return (1);
1930 }
1931
1932 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) {
1933 zerror(zlogp, B_FALSE, "failed to get zone state: %s",
1934 zonecfg_strerror(err));
1935 return (1);
1936 }
1937 if (zstate < ZONE_STATE_INCOMPLETE) {
1938 zerror(zlogp, B_FALSE,
1939 "cannot manage a zone which is in state '%s'",
1940 zone_state_str(zstate));
1941 return (1);
1942 }
1943
1944 if (zonecfg_default_brand(default_brand,
1945 sizeof (default_brand)) != Z_OK) {
1946 zerror(zlogp, B_FALSE, "unable to determine default brand");
1947 return (1);
1948 }
1949
1950 /* Get a handle to the brand info for this zone */
1951 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name))
1952 != Z_OK) {
1953 zerror(zlogp, B_FALSE, "unable to determine zone brand");
1954 return (1);
1955 }
1956 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0);
1957 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0);
1958
1959 /*
1960 * In the alternate root environment, the only supported
1961 * operations are mount and unmount. In this case, just treat
1962 * the zone as native if it is cluster. Cluster zones can be
1963 * native for the purpose of LU or upgrade, and the cluster
1964 * brand may not exist in the miniroot (such as in net install
1965 * upgrade).
1966 */
1967 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) {
1968 zone_iscluster = B_TRUE;
1969 if (zonecfg_in_alt_root()) {
1970 (void) strlcpy(brand_name, default_brand,
1971 sizeof (brand_name));
1972 }
1973 } else {
1974 zone_iscluster = B_FALSE;
1975 }
1976
1977 if ((bh = brand_open(brand_name)) == NULL) {
1978 zerror(zlogp, B_FALSE, "unable to open zone brand");
1979 return (1);
1980 }
1981
1982 /* Get state change brand hooks. */
1983 if (brand_callback_init(bh, zone_name) == -1) {
1984 zerror(zlogp, B_TRUE,
1985 "failed to initialize brand state change hooks");
1986 brand_close(bh);
1987 return (1);
1988 }
1989
1990 brand_close(bh);
1991
1992 /*
1993 * Check that we have all privileges. It would be nice to pare
1994 * this down, but this is at least a first cut.
1995 */
1996 if ((privset = priv_allocset()) == NULL) {
1997 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
1998 return (1);
1999 }
2000
2001 if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
2002 zerror(zlogp, B_TRUE, "%s failed", "getppriv");
2003 priv_freeset(privset);
2004 return (1);
2005 }
2006
2007 if (priv_isfullset(privset) == B_FALSE) {
2008 zerror(zlogp, B_FALSE, "You lack sufficient privilege to "
2009 "run this command (all privs required)");
2010 priv_freeset(privset);
2011 return (1);
2012 }
2013 priv_freeset(privset);
2014
2015 if (mkzonedir(zlogp) != 0)
2016 return (1);
2017
2018 /*
2019 * Pre-fork: setup shared state
2020 */
2021 if ((shstate = (void *)mmap(NULL, shstatelen,
2022 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) ==
2023 MAP_FAILED) {
2024 zerror(zlogp, B_TRUE, "%s failed", "mmap");
2025 return (1);
2026 }
2027 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) {
2028 zerror(zlogp, B_TRUE, "%s failed", "sema_init()");
2029 (void) munmap((char *)shstate, shstatelen);
2030 return (1);
2031 }
2032 shstate->log.logfile = NULL;
2033 shstate->log.buflen = shstatelen - sizeof (*shstate);
2034 shstate->log.loglen = shstate->log.buflen;
2035 shstate->log.buf = (char *)shstate + sizeof (*shstate);
2036 shstate->log.log = shstate->log.buf;
2037 shstate->log.locale = parents_locale;
2038 shstate->status = -1;
2039
2040 /*
2041 * We need a SIGCHLD handler so the sema_wait() below will wake
2042 * up if the child dies without doing a sema_post().
2043 */
2044 (void) sigset(SIGCHLD, sigchld);
2045 /*
2046 * We must mask SIGCHLD until after we've coped with the fork
2047 * sufficiently to deal with it; otherwise we can race and
2048 * receive the signal before pid has been initialized
2049 * (yes, this really happens).
2050 */
2051 (void) sigemptyset(&block_cld);
2052 (void) sigaddset(&block_cld, SIGCHLD);
2053 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2054
2055 if ((ctfd = init_template()) == -1) {
2056 zerror(zlogp, B_TRUE, "failed to create contract");
2057 return (1);
2058 }
2059
2060 /*
2061 * Do not let another thread localize a message while we are forking.
2062 */
2063 (void) mutex_lock(&msglock);
2064 pid = fork();
2065 (void) mutex_unlock(&msglock);
2066
2067 /*
2068 * In all cases (parent, child, and in the event of an error) we
2069 * don't want to cause creation of contracts on subsequent fork()s.
2070 */
2071 (void) ct_tmpl_clear(ctfd);
2072 (void) close(ctfd);
2073
2074 if (pid == -1) {
2075 zerror(zlogp, B_TRUE, "could not fork");
2076 return (1);
2077
2078 } else if (pid > 0) { /* parent */
2079 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2080 /*
2081 * This marks a window of vulnerability in which we receive
2082 * the SIGCLD before falling into sema_wait (normally we would
2083 * get woken up from sema_wait with EINTR upon receipt of
2084 * SIGCLD). So we may need to use some other scheme like
2085 * sema_posting in the sigcld handler.
2086 * blech
2087 */
2088 (void) sema_wait(&shstate->sem);
2089 (void) sema_destroy(&shstate->sem);
2090 if (shstate->status != 0)
2091 (void) waitpid(pid, NULL, WNOHANG);
2092 /*
2093 * It's ok if we die with SIGPIPE. It's not like we could have
2094 * done anything about it.
2095 */
2096 (void) fprintf(stderr, "%s", shstate->log.buf);
2097 _exit(shstate->status == 0 ? 0 : 1);
2098 }
2099
2100 /*
2101 * The child charges on.
2102 */
2103 (void) sigset(SIGCHLD, SIG_DFL);
2104 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2105
2106 /*
2107 * SIGPIPE can be delivered if we write to a socket for which the
2108 * peer endpoint is gone. That can lead to too-early termination
2109 * of zoneadmd, and that's not good eats.
2110 */
2111 (void) sigset(SIGPIPE, SIG_IGN);
2112 /*
2113 * Stop using stderr
2114 */
2115 zlogp = &shstate->log;
2116
2117 /*
2118 * We don't need stdout/stderr from now on.
2119 */
2120 closefrom(0);
2121
2122 /*
2123 * Initialize the syslog zlog_t. This needs to be done after
2124 * the call to closefrom().
2125 */
2126 logsys.buf = logsys.log = NULL;
2127 logsys.buflen = logsys.loglen = 0;
2128 logsys.logfile = NULL;
2129 logsys.locale = DEFAULT_LOCALE;
2130
2131 openlog("zoneadmd", LOG_PID, LOG_DAEMON);
2132
2133 /*
2134 * The eventstream is used to publish state changes in the zone
2135 * from the door threads to the console I/O poller.
2136 */
2137 if (eventstream_init() == -1) {
2138 zerror(zlogp, B_TRUE, "unable to create eventstream");
2139 goto child_out;
2140 }
2141
2142 (void) snprintf(zone_door_path, sizeof (zone_door_path),
2143 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name);
2144
2145 /*
2146 * See if another zoneadmd is running for this zone. If not, then we
2147 * can now modify system state.
2148 */
2149 if (make_daemon_exclusive(zlogp) == -1)
2150 goto child_out;
2151
2152
2153 /*
2154 * Create/join a new session; we need to be careful of what we do with
2155 * the console from now on so we don't end up being the session leader
2156 * for the terminal we're going to be handing out.
2157 */
2158 (void) setsid();
2159
2160 /*
2161 * This thread shouldn't be receiving any signals; in particular,
2162 * SIGCHLD should be received by the thread doing the fork().
2163 */
2164 (void) sigfillset(&blockset);
2165 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
2166
2167 /*
2168 * Setup the console device and get ready to serve the console;
2169 * once this has completed, we're ready to let console clients
2170 * make an attempt to connect (they will block until
2171 * serve_console_sock() below gets called, and any pending
2172 * connection is accept()ed).
2173 */
2174 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0)
2175 goto child_out;
2176
2177 /*
2178 * Take the lock now, so that when the door server gets going, we
2179 * are guaranteed that it won't take a request until we are sure
2180 * that everything is completely set up. See the child_out: label
2181 * below to see why this matters.
2182 */
2183 (void) mutex_lock(&lock);
2184
2185 /* Init semaphore for scratch zones. */
2186 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) {
2187 zerror(zlogp, B_TRUE,
2188 "failed to initialize semaphore for scratch zone");
2189 goto child_out;
2190 }
2191
2192 /* open the dladm handle */
2193 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) {
2194 zerror(zlogp, B_FALSE, "failed to open dladm handle");
2195 goto child_out;
2196 }
2197
2198 /*
2199 * Note: door setup must occur *after* the console is setup.
2200 * This is so that as zlogin tests the door to see if zoneadmd
2201 * is ready yet, we know that the console will get serviced
2202 * once door_info() indicates that the door is "up".
2203 */
2204 if (setup_door(zlogp) == -1)
2205 goto child_out;
2206
2207 /*
2208 * Things seem OK so far; tell the parent process that we're done
2209 * with setup tasks. This will cause the parent to exit, signalling
2210 * to zoneadm, zlogin, or whatever forked it that we are ready to
2211 * service requests.
2212 */
2213 shstate->status = 0;
2214 (void) sema_post(&shstate->sem);
2215 (void) munmap((char *)shstate, shstatelen);
2216 shstate = NULL;
2217
2218 (void) mutex_unlock(&lock);
2219
2220 /*
2221 * zlogp is now invalid, so reset it to the syslog logger.
2222 */
2223 zlogp = &logsys;
2224
2225 /*
2226 * Now that we are free of any parents, switch to the default locale.
2227 */
2228 (void) setlocale(LC_ALL, DEFAULT_LOCALE);
2229
2230 /*
2231 * At this point the setup portion of main() is basically done, so
2232 * we reuse this thread to manage the zone console. When
2233 * serve_console() has returned, we are past the point of no return
2234 * in the life of this zoneadmd.
2235 */
2236 if (zonecfg_in_alt_root()) {
2237 /*
2238 * This is just awful, but mounted scratch zones don't (and
2239 * can't) have consoles. We just wait for unmount instead.
2240 */
2241 while (sema_wait(&scratch_sem) == EINTR)
2242 ;
2243 } else {
2244 serve_console(zlogp);
2245 assert(in_death_throes);
2246 }
2247
2248 /*
2249 * This is the next-to-last part of the exit interlock. Upon calling
2250 * fdetach(), the door will go unreferenced; once any
2251 * outstanding requests (like the door thread doing Z_HALT) are
2252 * done, the door will get an UNREF notification; when it handles
2253 * the UNREF, the door server will cause the exit. It's possible
2254 * that fdetach() can fail because the file is in use, in which
2255 * case we'll retry the operation.
2256 */
2257 assert(!MUTEX_HELD(&lock));
2258 for (;;) {
2259 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY))
2260 break;
2261 yield();
2262 }
2263
2264 for (;;)
2265 (void) pause();
2266
2267 child_out:
2268 assert(pid == 0);
2269 if (shstate != NULL) {
2270 shstate->status = -1;
2271 (void) sema_post(&shstate->sem);
2272 (void) munmap((char *)shstate, shstatelen);
2273 }
2274
2275 /*
2276 * This might trigger an unref notification, but if so,
2277 * we are still holding the lock, so our call to exit will
2278 * ultimately win the race and will publish the right exit
2279 * code.
2280 */
2281 if (zone_door != -1) {
2282 assert(MUTEX_HELD(&lock));
2283 (void) door_revoke(zone_door);
2284 (void) fdetach(zone_door_path);
2285 }
2286
2287 if (dld_handle != NULL)
2288 dladm_close(dld_handle);
2289
2290 return (1); /* return from main() forcibly exits an MT process */
2291 }
2292