1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * zoneadmd manages zones; one zoneadmd process is launched for each 29 * non-global zone on the system. This daemon juggles four jobs: 30 * 31 * - Implement setup and teardown of the zone "virtual platform": mount and 32 * unmount filesystems; create and destroy network interfaces; communicate 33 * with devfsadmd to lay out devices for the zone; instantiate the zone 34 * console device; configure process runtime attributes such as resource 35 * controls, pool bindings, fine-grained privileges. 36 * 37 * - Launch the zone's init(1M) process. 38 * 39 * - Implement a door server; clients (like zoneadm) connect to the door 40 * server and request zone state changes. The kernel is also a client of 41 * this door server. A request to halt or reboot the zone which originates 42 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 43 * 44 * One minor problem is that messages emitted by zoneadmd need to be passed 45 * back to the zoneadm process making the request. These messages need to 46 * be rendered in the client's locale; so, this is passed in as part of the 47 * request. The exception is the kernel upcall to zoneadmd, in which case 48 * messages are syslog'd. 49 * 50 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 51 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 52 * strings which do not need to be translated. 53 * 54 * - Act as a console server for zlogin -C processes; see comments in zcons.c 55 * for more information about the zone console architecture. 56 * 57 * DESIGN NOTES 58 * 59 * Restart: 60 * A chief design constraint of zoneadmd is that it should be restartable in 61 * the case that the administrator kills it off, or it suffers a fatal error, 62 * without the running zone being impacted; this is akin to being able to 63 * reboot the service processor of a server without affecting the OS instance. 64 */ 65 66 #include <sys/param.h> 67 #include <sys/mman.h> 68 #include <sys/types.h> 69 #include <sys/stat.h> 70 #include <sys/sysmacros.h> 71 72 #include <bsm/adt.h> 73 #include <bsm/adt_event.h> 74 75 #include <alloca.h> 76 #include <assert.h> 77 #include <errno.h> 78 #include <door.h> 79 #include <fcntl.h> 80 #include <locale.h> 81 #include <signal.h> 82 #include <stdarg.h> 83 #include <stdio.h> 84 #include <stdlib.h> 85 #include <string.h> 86 #include <strings.h> 87 #include <synch.h> 88 #include <syslog.h> 89 #include <thread.h> 90 #include <unistd.h> 91 #include <wait.h> 92 #include <limits.h> 93 #include <zone.h> 94 #include <libbrand.h> 95 #include <sys/brand.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <sys/brand.h> 99 #include <sys/contract/process.h> 100 #include <sys/ctfs.h> 101 #include <libdladm.h> 102 #include <sys/dls_mgmt.h> 103 104 #include <libzonecfg.h> 105 #include "zoneadmd.h" 106 107 static char *progname; 108 char *zone_name; /* zone which we are managing */ 109 char default_brand[MAXNAMELEN]; 110 char brand_name[MAXNAMELEN]; 111 boolean_t zone_isnative; 112 boolean_t zone_iscluster; 113 boolean_t zone_islabeled; 114 static zoneid_t zone_id; 115 dladm_handle_t dld_handle = NULL; 116 117 static char pre_statechg_hook[2 * MAXPATHLEN]; 118 static char post_statechg_hook[2 * MAXPATHLEN]; 119 char query_hook[2 * MAXPATHLEN]; 120 121 zlog_t logsys; 122 123 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 124 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 125 126 static sema_t scratch_sem; /* for scratch zones */ 127 128 static char zone_door_path[MAXPATHLEN]; 129 static int zone_door = -1; 130 131 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 132 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 133 134 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 135 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 136 #endif 137 138 #define DEFAULT_LOCALE "C" 139 140 static const char * 141 z_cmd_name(zone_cmd_t zcmd) 142 { 143 /* This list needs to match the enum in sys/zone.h */ 144 static const char *zcmdstr[] = { 145 "ready", "boot", "forceboot", "reboot", "halt", 146 "note_uninstalling", "mount", "forcemount", "unmount" 147 }; 148 149 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 150 return ("unknown"); 151 else 152 return (zcmdstr[(int)zcmd]); 153 } 154 155 static char * 156 get_execbasename(char *execfullname) 157 { 158 char *last_slash, *execbasename; 159 160 /* guard against '/' at end of command invocation */ 161 for (;;) { 162 last_slash = strrchr(execfullname, '/'); 163 if (last_slash == NULL) { 164 execbasename = execfullname; 165 break; 166 } else { 167 execbasename = last_slash + 1; 168 if (*execbasename == '\0') { 169 *last_slash = '\0'; 170 continue; 171 } 172 break; 173 } 174 } 175 return (execbasename); 176 } 177 178 static void 179 usage(void) 180 { 181 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 182 (void) fprintf(stderr, 183 gettext("\tNote: %s should not be run directly.\n"), progname); 184 exit(2); 185 } 186 187 /* ARGSUSED */ 188 static void 189 sigchld(int sig) 190 { 191 } 192 193 char * 194 localize_msg(char *locale, const char *msg) 195 { 196 char *out; 197 198 (void) mutex_lock(&msglock); 199 (void) setlocale(LC_MESSAGES, locale); 200 out = gettext(msg); 201 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 202 (void) mutex_unlock(&msglock); 203 return (out); 204 } 205 206 /* PRINTFLIKE3 */ 207 void 208 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 209 { 210 va_list alist; 211 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 212 char *bp; 213 int saved_errno = errno; 214 215 if (zlogp == NULL) 216 return; 217 if (zlogp == &logsys) 218 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 219 zone_name); 220 else 221 buf[0] = '\0'; 222 bp = &(buf[strlen(buf)]); 223 224 /* 225 * In theory, the locale pointer should be set to either "C" or a 226 * char array, so it should never be NULL 227 */ 228 assert(zlogp->locale != NULL); 229 /* Locale is per process, but we are multi-threaded... */ 230 fmt = localize_msg(zlogp->locale, fmt); 231 232 va_start(alist, fmt); 233 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 234 va_end(alist); 235 bp = &(buf[strlen(buf)]); 236 if (use_strerror) 237 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 238 strerror(saved_errno)); 239 if (zlogp == &logsys) { 240 (void) syslog(LOG_ERR, "%s", buf); 241 } else if (zlogp->logfile != NULL) { 242 (void) fprintf(zlogp->logfile, "%s\n", buf); 243 } else { 244 size_t buflen; 245 size_t copylen; 246 247 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 248 copylen = MIN(buflen, zlogp->loglen); 249 zlogp->log += copylen; 250 zlogp->loglen -= copylen; 251 } 252 } 253 254 /* 255 * Emit a warning for any boot arguments which are unrecognized. Since 256 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 257 * put the arguments into an argv style array, use getopt to process them, 258 * and put the resultant argument string back into outargs. 259 * 260 * During the filtering, we pull out any arguments which are truly "boot" 261 * arguments, leaving only those which are to be passed intact to the 262 * progenitor process. The one we support at the moment is -i, which 263 * indicates to the kernel which program should be launched as 'init'. 264 * 265 * A return of Z_INVAL indicates specifically that the arguments are 266 * not valid; this is a non-fatal error. Except for Z_OK, all other return 267 * values are treated as fatal. 268 */ 269 static int 270 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 271 char *init_file, char *badarg) 272 { 273 int argc = 0, argc_save; 274 int i; 275 int err; 276 char *arg, *lasts, **argv = NULL, **argv_save; 277 char zonecfg_args[BOOTARGS_MAX]; 278 char scratchargs[BOOTARGS_MAX], *sargs; 279 char c; 280 281 bzero(outargs, BOOTARGS_MAX); 282 bzero(badarg, BOOTARGS_MAX); 283 284 /* 285 * If the user didn't specify transient boot arguments, check 286 * to see if there were any specified in the zone configuration, 287 * and use them if applicable. 288 */ 289 if (inargs == NULL || inargs[0] == '\0') { 290 zone_dochandle_t handle; 291 if ((handle = zonecfg_init_handle()) == NULL) { 292 zerror(zlogp, B_TRUE, 293 "getting zone configuration handle"); 294 return (Z_BAD_HANDLE); 295 } 296 err = zonecfg_get_snapshot_handle(zone_name, handle); 297 if (err != Z_OK) { 298 zerror(zlogp, B_FALSE, 299 "invalid configuration snapshot"); 300 zonecfg_fini_handle(handle); 301 return (Z_BAD_HANDLE); 302 } 303 304 bzero(zonecfg_args, sizeof (zonecfg_args)); 305 (void) zonecfg_get_bootargs(handle, zonecfg_args, 306 sizeof (zonecfg_args)); 307 inargs = zonecfg_args; 308 zonecfg_fini_handle(handle); 309 } 310 311 if (strlen(inargs) >= BOOTARGS_MAX) { 312 zerror(zlogp, B_FALSE, "boot argument string too long"); 313 return (Z_INVAL); 314 } 315 316 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 317 sargs = scratchargs; 318 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 319 sargs = NULL; 320 argc++; 321 } 322 323 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 324 zerror(zlogp, B_FALSE, "memory allocation failed"); 325 return (Z_NOMEM); 326 } 327 328 argv_save = argv; 329 argc_save = argc; 330 331 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 332 sargs = scratchargs; 333 i = 0; 334 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 335 sargs = NULL; 336 if ((argv[i] = strdup(arg)) == NULL) { 337 err = Z_NOMEM; 338 zerror(zlogp, B_FALSE, "memory allocation failed"); 339 goto done; 340 } 341 i++; 342 } 343 344 /* 345 * We preserve compatibility with the Solaris system boot behavior, 346 * which allows: 347 * 348 * # reboot kernel/unix -s -m verbose 349 * 350 * In this example, kernel/unix tells the booter what file to 351 * boot. We don't want reboot in a zone to be gratuitously different, 352 * so we silently ignore the boot file, if necessary. 353 */ 354 if (argv[0] == NULL) 355 goto done; 356 357 assert(argv[0][0] != ' '); 358 assert(argv[0][0] != '\t'); 359 360 if (argv[0][0] != '-' && argv[0][0] != '\0') { 361 argv = &argv[1]; 362 argc--; 363 } 364 365 optind = 0; 366 opterr = 0; 367 err = Z_OK; 368 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 369 switch (c) { 370 case 'i': 371 /* 372 * -i is handled by the runtime and is not passed 373 * along to userland 374 */ 375 (void) strlcpy(init_file, optarg, MAXPATHLEN); 376 break; 377 case 'f': 378 /* This has already been processed by zoneadm */ 379 break; 380 case 'm': 381 case 's': 382 /* These pass through unmolested */ 383 (void) snprintf(outargs, BOOTARGS_MAX, 384 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 385 break; 386 case '?': 387 /* 388 * We warn about unknown arguments but pass them 389 * along anyway-- if someone wants to develop their 390 * own init replacement, they can pass it whatever 391 * args they want. 392 */ 393 err = Z_INVAL; 394 (void) snprintf(outargs, BOOTARGS_MAX, 395 "%s -%c", outargs, optopt); 396 (void) snprintf(badarg, BOOTARGS_MAX, 397 "%s -%c", badarg, optopt); 398 break; 399 } 400 } 401 402 /* 403 * For Solaris Zones we warn about and discard non-option arguments. 404 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 405 * to the kernel, we concat up all the other remaining boot args. 406 * and warn on them as a group. 407 */ 408 if (optind < argc) { 409 err = Z_INVAL; 410 while (optind < argc) { 411 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 412 badarg, strlen(badarg) > 0 ? " " : "", 413 argv[optind]); 414 optind++; 415 } 416 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 417 "arguments `%s'.", badarg); 418 } 419 420 done: 421 for (i = 0; i < argc_save; i++) { 422 if (argv_save[i] != NULL) 423 free(argv_save[i]); 424 } 425 free(argv_save); 426 return (err); 427 } 428 429 430 static int 431 mkzonedir(zlog_t *zlogp) 432 { 433 struct stat st; 434 /* 435 * We must create and lock everyone but root out of ZONES_TMPDIR 436 * since anyone can open any UNIX domain socket, regardless of 437 * its file system permissions. Sigh... 438 */ 439 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 440 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 441 return (-1); 442 } 443 /* paranoia */ 444 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 445 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 446 return (-1); 447 } 448 (void) chmod(ZONES_TMPDIR, S_IRWXU); 449 return (0); 450 } 451 452 /* 453 * Run the brand's pre-state change callback, if it exists. 454 */ 455 static int 456 brand_prestatechg(zlog_t *zlogp, int state, int cmd) 457 { 458 char cmdbuf[2 * MAXPATHLEN]; 459 460 if (pre_statechg_hook[0] == '\0') 461 return (0); 462 463 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d", pre_statechg_hook, 464 state, cmd) > sizeof (cmdbuf)) 465 return (-1); 466 467 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 468 return (-1); 469 470 return (0); 471 } 472 473 /* 474 * Run the brand's post-state change callback, if it exists. 475 */ 476 static int 477 brand_poststatechg(zlog_t *zlogp, int state, int cmd) 478 { 479 char cmdbuf[2 * MAXPATHLEN]; 480 481 if (post_statechg_hook[0] == '\0') 482 return (0); 483 484 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d", post_statechg_hook, 485 state, cmd) > sizeof (cmdbuf)) 486 return (-1); 487 488 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 489 return (-1); 490 491 return (0); 492 } 493 494 /* 495 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 496 * 'true' if this is being invoked as part of the processing for the "mount" 497 * subcommand. 498 */ 499 static int 500 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate) 501 { 502 int err; 503 504 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0) 505 return (-1); 506 507 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 508 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 509 zonecfg_strerror(err)); 510 goto bad; 511 } 512 513 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 514 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 515 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 516 zonecfg_strerror(err)); 517 goto bad; 518 } 519 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 520 bringup_failure_recovery = B_TRUE; 521 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE); 522 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 523 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 524 zonecfg_strerror(err)); 525 goto bad; 526 } 527 528 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0) 529 goto bad; 530 531 return (0); 532 533 bad: 534 /* 535 * If something goes wrong, we up the zones's state to the target 536 * state, READY, and then invoke the hook as if we're halting. 537 */ 538 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT); 539 return (-1); 540 } 541 542 int 543 init_template(void) 544 { 545 int fd; 546 int err = 0; 547 548 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 549 if (fd == -1) 550 return (-1); 551 552 /* 553 * For now, zoneadmd doesn't do anything with the contract. 554 * Deliver no events, don't inherit, and allow it to be orphaned. 555 */ 556 err |= ct_tmpl_set_critical(fd, 0); 557 err |= ct_tmpl_set_informative(fd, 0); 558 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 559 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 560 if (err || ct_tmpl_activate(fd)) { 561 (void) close(fd); 562 return (-1); 563 } 564 565 return (fd); 566 } 567 568 typedef struct fs_callback { 569 zlog_t *zlogp; 570 zoneid_t zoneid; 571 boolean_t mount_cmd; 572 } fs_callback_t; 573 574 static int 575 mount_early_fs(void *data, const char *spec, const char *dir, 576 const char *fstype, const char *opt) 577 { 578 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 579 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 580 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd; 581 char rootpath[MAXPATHLEN]; 582 pid_t child; 583 int child_status; 584 int tmpl_fd; 585 int rv; 586 ctid_t ct; 587 588 /* determine the zone rootpath */ 589 if (mount_cmd) { 590 char zonepath[MAXPATHLEN]; 591 char luroot[MAXPATHLEN]; 592 593 if (zone_get_zonepath(zone_name, 594 zonepath, sizeof (zonepath)) != Z_OK) { 595 zerror(zlogp, B_FALSE, "unable to determine zone path"); 596 return (-1); 597 } 598 599 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 600 resolve_lofs(zlogp, luroot, sizeof (luroot)); 601 (void) strlcpy(rootpath, luroot, sizeof (rootpath)); 602 } else { 603 if (zone_get_rootpath(zone_name, 604 rootpath, sizeof (rootpath)) != Z_OK) { 605 zerror(zlogp, B_FALSE, "unable to determine zone root"); 606 return (-1); 607 } 608 } 609 610 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) { 611 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 612 rootpath, dir); 613 return (-1); 614 } else if (rv > 0) { 615 /* The mount point path doesn't exist, create it now. */ 616 if (make_one_dir(zlogp, rootpath, dir, 617 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 618 DEFAULT_DIR_GROUP) != 0) { 619 zerror(zlogp, B_FALSE, "failed to create mount point"); 620 return (-1); 621 } 622 623 /* 624 * Now this might seem weird, but we need to invoke 625 * valid_mount_path() again. Why? Because it checks 626 * to make sure that the mount point path is canonical, 627 * which it can only do if the path exists, so now that 628 * we've created the path we have to verify it again. 629 */ 630 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, 631 fstype)) < 0) { 632 zerror(zlogp, B_FALSE, 633 "%s%s is not a valid mount point", rootpath, dir); 634 return (-1); 635 } 636 } 637 638 if ((tmpl_fd = init_template()) == -1) { 639 zerror(zlogp, B_TRUE, "failed to create contract"); 640 return (-1); 641 } 642 643 if ((child = fork()) == -1) { 644 (void) ct_tmpl_clear(tmpl_fd); 645 (void) close(tmpl_fd); 646 zerror(zlogp, B_TRUE, "failed to fork"); 647 return (-1); 648 649 } else if (child == 0) { /* child */ 650 char opt_buf[MAX_MNTOPT_STR]; 651 int optlen = 0; 652 int mflag = MS_DATA; 653 654 (void) ct_tmpl_clear(tmpl_fd); 655 /* 656 * Even though there are no procs running in the zone, we 657 * do this for paranoia's sake. 658 */ 659 (void) closefrom(0); 660 661 if (zone_enter(zoneid) == -1) { 662 _exit(errno); 663 } 664 if (opt != NULL) { 665 /* 666 * The mount() system call is incredibly annoying. 667 * If options are specified, we need to copy them 668 * into a temporary buffer since the mount() system 669 * call will overwrite the options string. It will 670 * also fail if the new option string it wants to 671 * write is bigger than the one we passed in, so 672 * you must pass in a buffer of the maximum possible 673 * option string length. sigh. 674 */ 675 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 676 opt = opt_buf; 677 optlen = MAX_MNTOPT_STR; 678 mflag = MS_OPTIONSTR; 679 } 680 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 681 _exit(errno); 682 _exit(0); 683 } 684 685 /* parent */ 686 if (contract_latest(&ct) == -1) 687 ct = -1; 688 (void) ct_tmpl_clear(tmpl_fd); 689 (void) close(tmpl_fd); 690 if (waitpid(child, &child_status, 0) != child) { 691 /* unexpected: we must have been signalled */ 692 (void) contract_abandon_id(ct); 693 return (-1); 694 } 695 (void) contract_abandon_id(ct); 696 if (WEXITSTATUS(child_status) != 0) { 697 errno = WEXITSTATUS(child_status); 698 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 699 return (-1); 700 } 701 702 return (0); 703 } 704 705 /* 706 * If retstr is not NULL, the output of the subproc is returned in the str, 707 * otherwise it is output using zerror(). Any memory allocated for retstr 708 * should be freed by the caller. 709 */ 710 int 711 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) 712 { 713 char buf[1024]; /* arbitrary large amount */ 714 char *inbuf; 715 FILE *file; 716 int status; 717 int rd_cnt; 718 719 if (retstr != NULL) { 720 if ((*retstr = malloc(1024)) == NULL) { 721 zerror(zlogp, B_FALSE, "out of memory"); 722 return (-1); 723 } 724 inbuf = *retstr; 725 rd_cnt = 0; 726 } else { 727 inbuf = buf; 728 } 729 730 file = popen(cmdbuf, "r"); 731 if (file == NULL) { 732 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 733 return (-1); 734 } 735 736 while (fgets(inbuf, 1024, file) != NULL) { 737 if (retstr == NULL) { 738 if (zlogp != &logsys) 739 zerror(zlogp, B_FALSE, "%s", inbuf); 740 } else { 741 char *p; 742 743 rd_cnt += 1024 - 1; 744 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) { 745 zerror(zlogp, B_FALSE, "out of memory"); 746 (void) pclose(file); 747 return (-1); 748 } 749 750 *retstr = p; 751 inbuf = *retstr + rd_cnt; 752 } 753 } 754 status = pclose(file); 755 756 if (WIFSIGNALED(status)) { 757 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 758 "signal %d", cmdbuf, WTERMSIG(status)); 759 return (-1); 760 } 761 assert(WIFEXITED(status)); 762 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 763 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 764 return (-1); 765 } 766 return (WEXITSTATUS(status)); 767 } 768 769 static int 770 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) 771 { 772 zoneid_t zoneid; 773 struct stat st; 774 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 775 char nbootargs[BOOTARGS_MAX]; 776 char cmdbuf[MAXPATHLEN]; 777 fs_callback_t cb; 778 brand_handle_t bh; 779 zone_iptype_t iptype; 780 boolean_t links_loaded = B_FALSE; 781 dladm_status_t status; 782 char errmsg[DLADM_STRSIZE]; 783 int err; 784 785 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) 786 return (-1); 787 788 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 789 zerror(zlogp, B_TRUE, "unable to get zoneid"); 790 goto bad; 791 } 792 793 cb.zlogp = zlogp; 794 cb.zoneid = zoneid; 795 cb.mount_cmd = B_FALSE; 796 797 /* Get a handle to the brand info for this zone */ 798 if ((bh = brand_open(brand_name)) == NULL) { 799 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 800 goto bad; 801 } 802 803 /* 804 * Get the list of filesystems to mount from the brand 805 * configuration. These mounts are done via a thread that will 806 * enter the zone, so they are done from within the context of the 807 * zone. 808 */ 809 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 810 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 811 brand_close(bh); 812 goto bad; 813 } 814 815 /* 816 * Get the brand's boot callback if it exists. 817 */ 818 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 819 zerror(zlogp, B_FALSE, "unable to determine zone path"); 820 brand_close(bh); 821 goto bad; 822 } 823 (void) strcpy(cmdbuf, EXEC_PREFIX); 824 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 825 sizeof (cmdbuf) - EXEC_LEN) != 0) { 826 zerror(zlogp, B_FALSE, 827 "unable to determine branded zone's boot callback"); 828 brand_close(bh); 829 goto bad; 830 } 831 832 /* Get the path for this zone's init(1M) (or equivalent) process. */ 833 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { 834 zerror(zlogp, B_FALSE, 835 "unable to determine zone's init(1M) location"); 836 brand_close(bh); 837 goto bad; 838 } 839 840 brand_close(bh); 841 842 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 843 bad_boot_arg); 844 if (err == Z_INVAL) 845 eventstream_write(Z_EVT_ZONE_BADARGS); 846 else if (err != Z_OK) 847 goto bad; 848 849 assert(init_file[0] != '\0'); 850 851 /* Try to anticipate possible problems: Make sure init is executable. */ 852 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 853 zerror(zlogp, B_FALSE, "unable to determine zone root"); 854 goto bad; 855 } 856 857 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); 858 859 if (stat(initpath, &st) == -1) { 860 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 861 goto bad; 862 } 863 864 if ((st.st_mode & S_IXUSR) == 0) { 865 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 866 goto bad; 867 } 868 869 /* 870 * Exclusive stack zones interact with the dlmgmtd running in the 871 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is 872 * booting, and loads its datalinks from the zone's datalink 873 * configuration file. 874 */ 875 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) { 876 status = dladm_zone_boot(dld_handle, zoneid); 877 if (status != DLADM_STATUS_OK) { 878 zerror(zlogp, B_FALSE, "unable to load zone datalinks: " 879 " %s", dladm_status2str(status, errmsg)); 880 goto bad; 881 } 882 links_loaded = B_TRUE; 883 } 884 885 /* 886 * If there is a brand 'boot' callback, execute it now to give the 887 * brand one last chance to do any additional setup before the zone 888 * is booted. 889 */ 890 if ((strlen(cmdbuf) > EXEC_LEN) && 891 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { 892 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 893 goto bad; 894 } 895 896 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 897 zerror(zlogp, B_TRUE, "could not set zone boot file"); 898 goto bad; 899 } 900 901 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 902 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 903 goto bad; 904 } 905 906 if (zone_boot(zoneid) == -1) { 907 zerror(zlogp, B_TRUE, "unable to boot zone"); 908 goto bad; 909 } 910 911 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0) 912 goto bad; 913 914 return (0); 915 916 bad: 917 /* 918 * If something goes wrong, we up the zones's state to the target 919 * state, RUNNING, and then invoke the hook as if we're halting. 920 */ 921 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT); 922 if (links_loaded) 923 (void) dladm_zone_halt(dld_handle, zoneid); 924 return (-1); 925 } 926 927 static int 928 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate) 929 { 930 int err; 931 932 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0) 933 return (-1); 934 935 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { 936 if (!bringup_failure_recovery) 937 zerror(zlogp, B_FALSE, "unable to destroy zone"); 938 return (-1); 939 } 940 941 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 942 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 943 zonecfg_strerror(err)); 944 945 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0) 946 return (-1); 947 948 return (0); 949 } 950 951 /* 952 * Generate AUE_zone_state for a command that boots a zone. 953 */ 954 static void 955 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 956 char *new_state) 957 { 958 adt_session_data_t *ah; 959 adt_event_data_t *event; 960 int pass_fail, fail_reason; 961 962 if (!adt_audit_enabled()) 963 return; 964 965 if (return_val == 0) { 966 pass_fail = ADT_SUCCESS; 967 fail_reason = ADT_SUCCESS; 968 } else { 969 pass_fail = ADT_FAILURE; 970 fail_reason = ADT_FAIL_VALUE_PROGRAM; 971 } 972 973 if (adt_start_session(&ah, NULL, 0)) { 974 zerror(zlogp, B_TRUE, gettext("audit failure.")); 975 return; 976 } 977 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 978 zerror(zlogp, B_TRUE, gettext("audit failure.")); 979 (void) adt_end_session(ah); 980 return; 981 } 982 983 event = adt_alloc_event(ah, ADT_zone_state); 984 if (event == NULL) { 985 zerror(zlogp, B_TRUE, gettext("audit failure.")); 986 (void) adt_end_session(ah); 987 return; 988 } 989 event->adt_zone_state.zonename = zone_name; 990 event->adt_zone_state.new_state = new_state; 991 992 if (adt_put_event(event, pass_fail, fail_reason)) 993 zerror(zlogp, B_TRUE, gettext("audit failure.")); 994 995 adt_free_event(event); 996 997 (void) adt_end_session(ah); 998 } 999 1000 /* 1001 * The main routine for the door server that deals with zone state transitions. 1002 */ 1003 /* ARGSUSED */ 1004 static void 1005 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 1006 uint_t n_desc) 1007 { 1008 ucred_t *uc = NULL; 1009 const priv_set_t *eset; 1010 1011 zone_state_t zstate; 1012 zone_cmd_t cmd; 1013 zone_cmd_arg_t *zargp; 1014 1015 boolean_t kernelcall; 1016 1017 int rval = -1; 1018 uint64_t uniqid; 1019 zoneid_t zoneid = -1; 1020 zlog_t zlog; 1021 zlog_t *zlogp; 1022 zone_cmd_rval_t *rvalp; 1023 size_t rlen = getpagesize(); /* conservative */ 1024 fs_callback_t cb; 1025 brand_handle_t bh; 1026 1027 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1028 zargp = (zone_cmd_arg_t *)args; 1029 1030 /* 1031 * When we get the door unref message, we've fdetach'd the door, and 1032 * it is time for us to shut down zoneadmd. 1033 */ 1034 if (zargp == DOOR_UNREF_DATA) { 1035 /* 1036 * See comment at end of main() for info on the last rites. 1037 */ 1038 exit(0); 1039 } 1040 1041 if (zargp == NULL) { 1042 (void) door_return(NULL, 0, 0, 0); 1043 } 1044 1045 rvalp = alloca(rlen); 1046 bzero(rvalp, rlen); 1047 zlog.logfile = NULL; 1048 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 1049 zlog.buf = rvalp->errbuf; 1050 zlog.log = zlog.buf; 1051 /* defer initialization of zlog.locale until after credential check */ 1052 zlogp = &zlog; 1053 1054 if (alen != sizeof (zone_cmd_arg_t)) { 1055 /* 1056 * This really shouldn't be happening. 1057 */ 1058 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 1059 "unexpected (expected %d bytes)", alen, 1060 sizeof (zone_cmd_arg_t)); 1061 goto out; 1062 } 1063 cmd = zargp->cmd; 1064 1065 if (door_ucred(&uc) != 0) { 1066 zerror(&logsys, B_TRUE, "door_ucred"); 1067 goto out; 1068 } 1069 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 1070 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 1071 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 1072 ucred_geteuid(uc) != 0)) { 1073 zerror(&logsys, B_FALSE, "insufficient privileges"); 1074 goto out; 1075 } 1076 1077 kernelcall = ucred_getpid(uc) == 0; 1078 1079 /* 1080 * This is safe because we only use a zlog_t throughout the 1081 * duration of a door call; i.e., by the time the pointer 1082 * might become invalid, the door call would be over. 1083 */ 1084 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 1085 1086 (void) mutex_lock(&lock); 1087 1088 /* 1089 * Once we start to really die off, we don't want more connections. 1090 */ 1091 if (in_death_throes) { 1092 (void) mutex_unlock(&lock); 1093 ucred_free(uc); 1094 (void) door_return(NULL, 0, 0, 0); 1095 thr_exit(NULL); 1096 } 1097 1098 /* 1099 * Check for validity of command. 1100 */ 1101 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 1102 cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && 1103 cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 1104 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 1105 goto out; 1106 } 1107 1108 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 1109 /* 1110 * Can't happen 1111 */ 1112 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 1113 cmd); 1114 goto out; 1115 } 1116 /* 1117 * We ignore the possibility of someone calling zone_create(2) 1118 * explicitly; all requests must come through zoneadmd. 1119 */ 1120 if (zone_get_state(zone_name, &zstate) != Z_OK) { 1121 /* 1122 * Something terribly wrong happened 1123 */ 1124 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 1125 goto out; 1126 } 1127 1128 if (kernelcall) { 1129 /* 1130 * Kernel-initiated requests may lose their validity if the 1131 * zone_t the kernel was referring to has gone away. 1132 */ 1133 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 1134 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 1135 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 1136 /* 1137 * We're not talking about the same zone. The request 1138 * must have arrived too late. Return error. 1139 */ 1140 rval = -1; 1141 goto out; 1142 } 1143 zlogp = &logsys; /* Log errors to syslog */ 1144 } 1145 1146 /* 1147 * If we are being asked to forcibly mount or boot a zone, we 1148 * pretend that an INCOMPLETE zone is actually INSTALLED. 1149 */ 1150 if (zstate == ZONE_STATE_INCOMPLETE && 1151 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 1152 zstate = ZONE_STATE_INSTALLED; 1153 1154 switch (zstate) { 1155 case ZONE_STATE_CONFIGURED: 1156 case ZONE_STATE_INCOMPLETE: 1157 /* 1158 * Not our area of expertise; we just print a nice message 1159 * and die off. 1160 */ 1161 zerror(zlogp, B_FALSE, 1162 "%s operation is invalid for zones in state '%s'", 1163 z_cmd_name(cmd), zone_state_str(zstate)); 1164 break; 1165 1166 case ZONE_STATE_INSTALLED: 1167 switch (cmd) { 1168 case Z_READY: 1169 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate); 1170 if (rval == 0) 1171 eventstream_write(Z_EVT_ZONE_READIED); 1172 break; 1173 case Z_BOOT: 1174 case Z_FORCEBOOT: 1175 eventstream_write(Z_EVT_ZONE_BOOTING); 1176 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1177 == 0) { 1178 rval = zone_bootup(zlogp, zargp->bootbuf, 1179 zstate); 1180 } 1181 audit_put_record(zlogp, uc, rval, "boot"); 1182 if (rval != 0) { 1183 bringup_failure_recovery = B_TRUE; 1184 (void) zone_halt(zlogp, B_FALSE, B_FALSE, 1185 zstate); 1186 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1187 } 1188 break; 1189 case Z_HALT: 1190 if (kernelcall) /* Invalid; can't happen */ 1191 abort(); 1192 /* 1193 * We could have two clients racing to halt this 1194 * zone; the second client loses, but his request 1195 * doesn't fail, since the zone is now in the desired 1196 * state. 1197 */ 1198 zerror(zlogp, B_FALSE, "zone is already halted"); 1199 rval = 0; 1200 break; 1201 case Z_REBOOT: 1202 if (kernelcall) /* Invalid; can't happen */ 1203 abort(); 1204 zerror(zlogp, B_FALSE, "%s operation is invalid " 1205 "for zones in state '%s'", z_cmd_name(cmd), 1206 zone_state_str(zstate)); 1207 rval = -1; 1208 break; 1209 case Z_NOTE_UNINSTALLING: 1210 if (kernelcall) /* Invalid; can't happen */ 1211 abort(); 1212 /* 1213 * Tell the console to print out a message about this. 1214 * Once it does, we will be in_death_throes. 1215 */ 1216 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1217 break; 1218 case Z_MOUNT: 1219 case Z_FORCEMOUNT: 1220 if (kernelcall) /* Invalid; can't happen */ 1221 abort(); 1222 if (!zone_isnative && !zone_iscluster && 1223 !zone_islabeled) { 1224 /* 1225 * -U mounts the zone without lofs mounting 1226 * zone file systems back into the scratch 1227 * zone. This is required when mounting 1228 * non-native branded zones. 1229 */ 1230 (void) strlcpy(zargp->bootbuf, "-U", 1231 BOOTARGS_MAX); 1232 } 1233 1234 rval = zone_ready(zlogp, 1235 strcmp(zargp->bootbuf, "-U") == 0 ? 1236 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate); 1237 if (rval != 0) 1238 break; 1239 1240 eventstream_write(Z_EVT_ZONE_READIED); 1241 1242 /* 1243 * Get a handle to the default brand info. 1244 * We must always use the default brand file system 1245 * list when mounting the zone. 1246 */ 1247 if ((bh = brand_open(default_brand)) == NULL) { 1248 rval = -1; 1249 break; 1250 } 1251 1252 /* 1253 * Get the list of filesystems to mount from 1254 * the brand configuration. These mounts are done 1255 * via a thread that will enter the zone, so they 1256 * are done from within the context of the zone. 1257 */ 1258 cb.zlogp = zlogp; 1259 cb.zoneid = zone_id; 1260 cb.mount_cmd = B_TRUE; 1261 rval = brand_platform_iter_mounts(bh, 1262 mount_early_fs, &cb); 1263 1264 brand_close(bh); 1265 1266 /* 1267 * Ordinarily, /dev/fd would be mounted inside the zone 1268 * by svc:/system/filesystem/usr:default, but since 1269 * we're not booting the zone, we need to do this 1270 * manually. 1271 */ 1272 if (rval == 0) 1273 rval = mount_early_fs(&cb, 1274 "fd", "/dev/fd", "fd", NULL); 1275 break; 1276 case Z_UNMOUNT: 1277 if (kernelcall) /* Invalid; can't happen */ 1278 abort(); 1279 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1280 rval = 0; 1281 break; 1282 } 1283 break; 1284 1285 case ZONE_STATE_READY: 1286 switch (cmd) { 1287 case Z_READY: 1288 /* 1289 * We could have two clients racing to ready this 1290 * zone; the second client loses, but his request 1291 * doesn't fail, since the zone is now in the desired 1292 * state. 1293 */ 1294 zerror(zlogp, B_FALSE, "zone is already ready"); 1295 rval = 0; 1296 break; 1297 case Z_BOOT: 1298 (void) strlcpy(boot_args, zargp->bootbuf, 1299 sizeof (boot_args)); 1300 eventstream_write(Z_EVT_ZONE_BOOTING); 1301 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1302 audit_put_record(zlogp, uc, rval, "boot"); 1303 if (rval != 0) { 1304 bringup_failure_recovery = B_TRUE; 1305 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1306 zstate); 1307 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1308 } 1309 boot_args[0] = '\0'; 1310 break; 1311 case Z_HALT: 1312 if (kernelcall) /* Invalid; can't happen */ 1313 abort(); 1314 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1315 != 0) 1316 break; 1317 eventstream_write(Z_EVT_ZONE_HALTED); 1318 break; 1319 case Z_REBOOT: 1320 case Z_NOTE_UNINSTALLING: 1321 case Z_MOUNT: 1322 case Z_UNMOUNT: 1323 if (kernelcall) /* Invalid; can't happen */ 1324 abort(); 1325 zerror(zlogp, B_FALSE, "%s operation is invalid " 1326 "for zones in state '%s'", z_cmd_name(cmd), 1327 zone_state_str(zstate)); 1328 rval = -1; 1329 break; 1330 } 1331 break; 1332 1333 case ZONE_STATE_MOUNTED: 1334 switch (cmd) { 1335 case Z_UNMOUNT: 1336 if (kernelcall) /* Invalid; can't happen */ 1337 abort(); 1338 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate); 1339 if (rval == 0) { 1340 eventstream_write(Z_EVT_ZONE_HALTED); 1341 (void) sema_post(&scratch_sem); 1342 } 1343 break; 1344 default: 1345 if (kernelcall) /* Invalid; can't happen */ 1346 abort(); 1347 zerror(zlogp, B_FALSE, "%s operation is invalid " 1348 "for zones in state '%s'", z_cmd_name(cmd), 1349 zone_state_str(zstate)); 1350 rval = -1; 1351 break; 1352 } 1353 break; 1354 1355 case ZONE_STATE_RUNNING: 1356 case ZONE_STATE_SHUTTING_DOWN: 1357 case ZONE_STATE_DOWN: 1358 switch (cmd) { 1359 case Z_READY: 1360 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1361 != 0) 1362 break; 1363 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0) 1364 eventstream_write(Z_EVT_ZONE_READIED); 1365 else 1366 eventstream_write(Z_EVT_ZONE_HALTED); 1367 break; 1368 case Z_BOOT: 1369 /* 1370 * We could have two clients racing to boot this 1371 * zone; the second client loses, but his request 1372 * doesn't fail, since the zone is now in the desired 1373 * state. 1374 */ 1375 zerror(zlogp, B_FALSE, "zone is already booted"); 1376 rval = 0; 1377 break; 1378 case Z_HALT: 1379 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1380 != 0) 1381 break; 1382 eventstream_write(Z_EVT_ZONE_HALTED); 1383 break; 1384 case Z_REBOOT: 1385 (void) strlcpy(boot_args, zargp->bootbuf, 1386 sizeof (boot_args)); 1387 eventstream_write(Z_EVT_ZONE_REBOOTING); 1388 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1389 != 0) { 1390 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1391 boot_args[0] = '\0'; 1392 break; 1393 } 1394 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1395 != 0) { 1396 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1397 boot_args[0] = '\0'; 1398 break; 1399 } 1400 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1401 audit_put_record(zlogp, uc, rval, "reboot"); 1402 if (rval != 0) { 1403 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1404 zstate); 1405 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1406 } 1407 boot_args[0] = '\0'; 1408 break; 1409 case Z_NOTE_UNINSTALLING: 1410 case Z_MOUNT: 1411 case Z_UNMOUNT: 1412 zerror(zlogp, B_FALSE, "%s operation is invalid " 1413 "for zones in state '%s'", z_cmd_name(cmd), 1414 zone_state_str(zstate)); 1415 rval = -1; 1416 break; 1417 } 1418 break; 1419 default: 1420 abort(); 1421 } 1422 1423 /* 1424 * Because the state of the zone may have changed, we make sure 1425 * to wake the console poller, which is in charge of initiating 1426 * the shutdown procedure as necessary. 1427 */ 1428 eventstream_write(Z_EVT_NULL); 1429 1430 out: 1431 (void) mutex_unlock(&lock); 1432 if (kernelcall) { 1433 rvalp = NULL; 1434 rlen = 0; 1435 } else { 1436 rvalp->rval = rval; 1437 } 1438 if (uc != NULL) 1439 ucred_free(uc); 1440 (void) door_return((char *)rvalp, rlen, NULL, 0); 1441 thr_exit(NULL); 1442 } 1443 1444 static int 1445 setup_door(zlog_t *zlogp) 1446 { 1447 if ((zone_door = door_create(server, NULL, 1448 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1449 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1450 return (-1); 1451 } 1452 (void) fdetach(zone_door_path); 1453 1454 if (fattach(zone_door, zone_door_path) != 0) { 1455 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1456 (void) door_revoke(zone_door); 1457 (void) fdetach(zone_door_path); 1458 zone_door = -1; 1459 return (-1); 1460 } 1461 return (0); 1462 } 1463 1464 /* 1465 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1466 * is where zoneadmd itself will check to see that another instance of 1467 * zoneadmd isn't already controlling this zone. 1468 * 1469 * The idea here is that we want to open the path to which we will 1470 * attach our door, lock it, and then make sure that no-one has beat us 1471 * to fattach(3c)ing onto it. 1472 * 1473 * fattach(3c) is really a mount, so there are actually two possible 1474 * vnodes we could be dealing with. Our strategy is as follows: 1475 * 1476 * - If the file we opened is a regular file (common case): 1477 * There is no fattach(3c)ed door, so we have a chance of becoming 1478 * the managing zoneadmd. We attempt to lock the file: if it is 1479 * already locked, that means someone else raced us here, so we 1480 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1481 * that beat us to it. 1482 * 1483 * - If the file we opened is a namefs file: 1484 * This means there is already an established door fattach(3c)'ed 1485 * to the rendezvous path. We've lost the race, so we give up. 1486 * Note that in this case we also try to grab the file lock, and 1487 * will succeed in acquiring it since the vnode locked by the 1488 * "winning" zoneadmd was a regular one, and the one we locked was 1489 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1490 * we just return to zoneadm(1m) which knows to retry. 1491 */ 1492 static int 1493 make_daemon_exclusive(zlog_t *zlogp) 1494 { 1495 int doorfd = -1; 1496 int err, ret = -1; 1497 struct stat st; 1498 struct flock flock; 1499 zone_state_t zstate; 1500 1501 top: 1502 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1503 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1504 zonecfg_strerror(err)); 1505 goto out; 1506 } 1507 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1508 S_IREAD|S_IWRITE)) < 0) { 1509 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1510 goto out; 1511 } 1512 if (fstat(doorfd, &st) < 0) { 1513 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1514 goto out; 1515 } 1516 /* 1517 * Lock the file to synchronize with other zoneadmd 1518 */ 1519 flock.l_type = F_WRLCK; 1520 flock.l_whence = SEEK_SET; 1521 flock.l_start = (off_t)0; 1522 flock.l_len = (off_t)0; 1523 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1524 /* 1525 * Someone else raced us here and grabbed the lock file 1526 * first. A warning here is inappropriate since nothing 1527 * went wrong. 1528 */ 1529 goto out; 1530 } 1531 1532 if (strcmp(st.st_fstype, "namefs") == 0) { 1533 struct door_info info; 1534 1535 /* 1536 * There is already something fattach()'ed to this file. 1537 * Lets see what the door is up to. 1538 */ 1539 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1540 /* 1541 * Another zoneadmd process seems to be in 1542 * control of the situation and we don't need to 1543 * be here. A warning here is inappropriate 1544 * since nothing went wrong. 1545 * 1546 * If the door has been revoked, the zoneadmd 1547 * process currently managing the zone is going 1548 * away. We'll return control to zoneadm(1m) 1549 * which will try again (by which time zoneadmd 1550 * will hopefully have exited). 1551 */ 1552 goto out; 1553 } 1554 1555 /* 1556 * If we got this far, there's a fattach(3c)'ed door 1557 * that belongs to a process that has exited, which can 1558 * happen if the previous zoneadmd died unexpectedly. 1559 * 1560 * Let user know that something is amiss, but that we can 1561 * recover; if the zone is in the installed state, then don't 1562 * message, since having a running zoneadmd isn't really 1563 * expected/needed. We want to keep occurences of this message 1564 * limited to times when zoneadmd is picking back up from a 1565 * zoneadmd that died while the zone was in some non-trivial 1566 * state. 1567 */ 1568 if (zstate > ZONE_STATE_INSTALLED) { 1569 zerror(zlogp, B_FALSE, 1570 "zone '%s': WARNING: zone is in state '%s', but " 1571 "zoneadmd does not appear to be available; " 1572 "restarted zoneadmd to recover.", 1573 zone_name, zone_state_str(zstate)); 1574 } 1575 1576 (void) fdetach(zone_door_path); 1577 (void) close(doorfd); 1578 goto top; 1579 } 1580 ret = 0; 1581 out: 1582 (void) close(doorfd); 1583 return (ret); 1584 } 1585 1586 /* 1587 * Setup the brand's pre and post state change callbacks, as well as the 1588 * query callback, if any of these exist. 1589 */ 1590 static int 1591 brand_callback_init(brand_handle_t bh, char *zone_name) 1592 { 1593 char zpath[MAXPATHLEN]; 1594 1595 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) 1596 return (-1); 1597 1598 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, 1599 sizeof (pre_statechg_hook)); 1600 1601 if (brand_get_prestatechange(bh, zone_name, zpath, 1602 pre_statechg_hook + EXEC_LEN, 1603 sizeof (pre_statechg_hook) - EXEC_LEN) != 0) 1604 return (-1); 1605 1606 if (strlen(pre_statechg_hook) <= EXEC_LEN) 1607 pre_statechg_hook[0] = '\0'; 1608 1609 (void) strlcpy(post_statechg_hook, EXEC_PREFIX, 1610 sizeof (post_statechg_hook)); 1611 1612 if (brand_get_poststatechange(bh, zone_name, zpath, 1613 post_statechg_hook + EXEC_LEN, 1614 sizeof (post_statechg_hook) - EXEC_LEN) != 0) 1615 return (-1); 1616 1617 if (strlen(post_statechg_hook) <= EXEC_LEN) 1618 post_statechg_hook[0] = '\0'; 1619 1620 (void) strlcpy(query_hook, EXEC_PREFIX, 1621 sizeof (query_hook)); 1622 1623 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, 1624 sizeof (query_hook) - EXEC_LEN) != 0) 1625 return (-1); 1626 1627 if (strlen(query_hook) <= EXEC_LEN) 1628 query_hook[0] = '\0'; 1629 1630 return (0); 1631 } 1632 1633 int 1634 main(int argc, char *argv[]) 1635 { 1636 int opt; 1637 zoneid_t zid; 1638 priv_set_t *privset; 1639 zone_state_t zstate; 1640 char parents_locale[MAXPATHLEN]; 1641 brand_handle_t bh; 1642 int err; 1643 1644 pid_t pid; 1645 sigset_t blockset; 1646 sigset_t block_cld; 1647 1648 struct { 1649 sema_t sem; 1650 int status; 1651 zlog_t log; 1652 } *shstate; 1653 size_t shstatelen = getpagesize(); 1654 1655 zlog_t errlog; 1656 zlog_t *zlogp; 1657 1658 int ctfd; 1659 1660 progname = get_execbasename(argv[0]); 1661 1662 /* 1663 * Make sure stderr is unbuffered 1664 */ 1665 (void) setbuffer(stderr, NULL, 0); 1666 1667 /* 1668 * Get out of the way of mounted filesystems, since we will daemonize 1669 * soon. 1670 */ 1671 (void) chdir("/"); 1672 1673 /* 1674 * Use the default system umask per PSARC 1998/110 rather than 1675 * anything that may have been set by the caller. 1676 */ 1677 (void) umask(CMASK); 1678 1679 /* 1680 * Initially we want to use our parent's locale. 1681 */ 1682 (void) setlocale(LC_ALL, ""); 1683 (void) textdomain(TEXT_DOMAIN); 1684 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1685 sizeof (parents_locale)); 1686 1687 /* 1688 * This zlog_t is used for writing to stderr 1689 */ 1690 errlog.logfile = stderr; 1691 errlog.buflen = errlog.loglen = 0; 1692 errlog.buf = errlog.log = NULL; 1693 errlog.locale = parents_locale; 1694 1695 /* 1696 * We start off writing to stderr until we're ready to daemonize. 1697 */ 1698 zlogp = &errlog; 1699 1700 /* 1701 * Process options. 1702 */ 1703 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1704 switch (opt) { 1705 case 'R': 1706 zonecfg_set_root(optarg); 1707 break; 1708 case 'z': 1709 zone_name = optarg; 1710 break; 1711 default: 1712 usage(); 1713 } 1714 } 1715 1716 if (zone_name == NULL) 1717 usage(); 1718 1719 /* 1720 * Because usage() prints directly to stderr, it has gettext() 1721 * wrapping, which depends on the locale. But since zerror() calls 1722 * localize() which tweaks the locale, it is not safe to call zerror() 1723 * until after the last call to usage(). Fortunately, the last call 1724 * to usage() is just above and the first call to zerror() is just 1725 * below. Don't mess this up. 1726 */ 1727 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1728 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1729 GLOBAL_ZONENAME); 1730 return (1); 1731 } 1732 1733 if (zone_get_id(zone_name, &zid) != 0) { 1734 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1735 zonecfg_strerror(Z_NO_ZONE)); 1736 return (1); 1737 } 1738 1739 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1740 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1741 zonecfg_strerror(err)); 1742 return (1); 1743 } 1744 if (zstate < ZONE_STATE_INCOMPLETE) { 1745 zerror(zlogp, B_FALSE, 1746 "cannot manage a zone which is in state '%s'", 1747 zone_state_str(zstate)); 1748 return (1); 1749 } 1750 1751 if (zonecfg_default_brand(default_brand, 1752 sizeof (default_brand)) != Z_OK) { 1753 zerror(zlogp, B_FALSE, "unable to determine default brand"); 1754 return (1); 1755 } 1756 1757 /* Get a handle to the brand info for this zone */ 1758 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 1759 != Z_OK) { 1760 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1761 return (1); 1762 } 1763 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0); 1764 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0); 1765 1766 /* 1767 * In the alternate root environment, the only supported 1768 * operations are mount and unmount. In this case, just treat 1769 * the zone as native if it is cluster. Cluster zones can be 1770 * native for the purpose of LU or upgrade, and the cluster 1771 * brand may not exist in the miniroot (such as in net install 1772 * upgrade). 1773 */ 1774 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) { 1775 zone_iscluster = B_TRUE; 1776 if (zonecfg_in_alt_root()) { 1777 (void) strlcpy(brand_name, default_brand, 1778 sizeof (brand_name)); 1779 } 1780 } else { 1781 zone_iscluster = B_FALSE; 1782 } 1783 1784 if ((bh = brand_open(brand_name)) == NULL) { 1785 zerror(zlogp, B_FALSE, "unable to open zone brand"); 1786 return (1); 1787 } 1788 1789 /* Get state change brand hooks. */ 1790 if (brand_callback_init(bh, zone_name) == -1) { 1791 zerror(zlogp, B_TRUE, 1792 "failed to initialize brand state change hooks"); 1793 brand_close(bh); 1794 return (1); 1795 } 1796 1797 brand_close(bh); 1798 1799 /* 1800 * Check that we have all privileges. It would be nice to pare 1801 * this down, but this is at least a first cut. 1802 */ 1803 if ((privset = priv_allocset()) == NULL) { 1804 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1805 return (1); 1806 } 1807 1808 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1809 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1810 priv_freeset(privset); 1811 return (1); 1812 } 1813 1814 if (priv_isfullset(privset) == B_FALSE) { 1815 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1816 "run this command (all privs required)"); 1817 priv_freeset(privset); 1818 return (1); 1819 } 1820 priv_freeset(privset); 1821 1822 if (mkzonedir(zlogp) != 0) 1823 return (1); 1824 1825 /* 1826 * Pre-fork: setup shared state 1827 */ 1828 if ((shstate = (void *)mmap(NULL, shstatelen, 1829 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1830 MAP_FAILED) { 1831 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1832 return (1); 1833 } 1834 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1835 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1836 (void) munmap((char *)shstate, shstatelen); 1837 return (1); 1838 } 1839 shstate->log.logfile = NULL; 1840 shstate->log.buflen = shstatelen - sizeof (*shstate); 1841 shstate->log.loglen = shstate->log.buflen; 1842 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1843 shstate->log.log = shstate->log.buf; 1844 shstate->log.locale = parents_locale; 1845 shstate->status = -1; 1846 1847 /* 1848 * We need a SIGCHLD handler so the sema_wait() below will wake 1849 * up if the child dies without doing a sema_post(). 1850 */ 1851 (void) sigset(SIGCHLD, sigchld); 1852 /* 1853 * We must mask SIGCHLD until after we've coped with the fork 1854 * sufficiently to deal with it; otherwise we can race and 1855 * receive the signal before pid has been initialized 1856 * (yes, this really happens). 1857 */ 1858 (void) sigemptyset(&block_cld); 1859 (void) sigaddset(&block_cld, SIGCHLD); 1860 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1861 1862 if ((ctfd = init_template()) == -1) { 1863 zerror(zlogp, B_TRUE, "failed to create contract"); 1864 return (1); 1865 } 1866 1867 /* 1868 * Do not let another thread localize a message while we are forking. 1869 */ 1870 (void) mutex_lock(&msglock); 1871 pid = fork(); 1872 (void) mutex_unlock(&msglock); 1873 1874 /* 1875 * In all cases (parent, child, and in the event of an error) we 1876 * don't want to cause creation of contracts on subsequent fork()s. 1877 */ 1878 (void) ct_tmpl_clear(ctfd); 1879 (void) close(ctfd); 1880 1881 if (pid == -1) { 1882 zerror(zlogp, B_TRUE, "could not fork"); 1883 return (1); 1884 1885 } else if (pid > 0) { /* parent */ 1886 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1887 /* 1888 * This marks a window of vulnerability in which we receive 1889 * the SIGCLD before falling into sema_wait (normally we would 1890 * get woken up from sema_wait with EINTR upon receipt of 1891 * SIGCLD). So we may need to use some other scheme like 1892 * sema_posting in the sigcld handler. 1893 * blech 1894 */ 1895 (void) sema_wait(&shstate->sem); 1896 (void) sema_destroy(&shstate->sem); 1897 if (shstate->status != 0) 1898 (void) waitpid(pid, NULL, WNOHANG); 1899 /* 1900 * It's ok if we die with SIGPIPE. It's not like we could have 1901 * done anything about it. 1902 */ 1903 (void) fprintf(stderr, "%s", shstate->log.buf); 1904 _exit(shstate->status == 0 ? 0 : 1); 1905 } 1906 1907 /* 1908 * The child charges on. 1909 */ 1910 (void) sigset(SIGCHLD, SIG_DFL); 1911 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1912 1913 /* 1914 * SIGPIPE can be delivered if we write to a socket for which the 1915 * peer endpoint is gone. That can lead to too-early termination 1916 * of zoneadmd, and that's not good eats. 1917 */ 1918 (void) sigset(SIGPIPE, SIG_IGN); 1919 /* 1920 * Stop using stderr 1921 */ 1922 zlogp = &shstate->log; 1923 1924 /* 1925 * We don't need stdout/stderr from now on. 1926 */ 1927 closefrom(0); 1928 1929 /* 1930 * Initialize the syslog zlog_t. This needs to be done after 1931 * the call to closefrom(). 1932 */ 1933 logsys.buf = logsys.log = NULL; 1934 logsys.buflen = logsys.loglen = 0; 1935 logsys.logfile = NULL; 1936 logsys.locale = DEFAULT_LOCALE; 1937 1938 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1939 1940 /* 1941 * The eventstream is used to publish state changes in the zone 1942 * from the door threads to the console I/O poller. 1943 */ 1944 if (eventstream_init() == -1) { 1945 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1946 goto child_out; 1947 } 1948 1949 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1950 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 1951 1952 /* 1953 * See if another zoneadmd is running for this zone. If not, then we 1954 * can now modify system state. 1955 */ 1956 if (make_daemon_exclusive(zlogp) == -1) 1957 goto child_out; 1958 1959 1960 /* 1961 * Create/join a new session; we need to be careful of what we do with 1962 * the console from now on so we don't end up being the session leader 1963 * for the terminal we're going to be handing out. 1964 */ 1965 (void) setsid(); 1966 1967 /* 1968 * This thread shouldn't be receiving any signals; in particular, 1969 * SIGCHLD should be received by the thread doing the fork(). 1970 */ 1971 (void) sigfillset(&blockset); 1972 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1973 1974 /* 1975 * Setup the console device and get ready to serve the console; 1976 * once this has completed, we're ready to let console clients 1977 * make an attempt to connect (they will block until 1978 * serve_console_sock() below gets called, and any pending 1979 * connection is accept()ed). 1980 */ 1981 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0) 1982 goto child_out; 1983 1984 /* 1985 * Take the lock now, so that when the door server gets going, we 1986 * are guaranteed that it won't take a request until we are sure 1987 * that everything is completely set up. See the child_out: label 1988 * below to see why this matters. 1989 */ 1990 (void) mutex_lock(&lock); 1991 1992 /* Init semaphore for scratch zones. */ 1993 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 1994 zerror(zlogp, B_TRUE, 1995 "failed to initialize semaphore for scratch zone"); 1996 goto child_out; 1997 } 1998 1999 /* open the dladm handle */ 2000 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) { 2001 zerror(zlogp, B_FALSE, "failed to open dladm handle"); 2002 goto child_out; 2003 } 2004 2005 /* 2006 * Note: door setup must occur *after* the console is setup. 2007 * This is so that as zlogin tests the door to see if zoneadmd 2008 * is ready yet, we know that the console will get serviced 2009 * once door_info() indicates that the door is "up". 2010 */ 2011 if (setup_door(zlogp) == -1) 2012 goto child_out; 2013 2014 /* 2015 * Things seem OK so far; tell the parent process that we're done 2016 * with setup tasks. This will cause the parent to exit, signalling 2017 * to zoneadm, zlogin, or whatever forked it that we are ready to 2018 * service requests. 2019 */ 2020 shstate->status = 0; 2021 (void) sema_post(&shstate->sem); 2022 (void) munmap((char *)shstate, shstatelen); 2023 shstate = NULL; 2024 2025 (void) mutex_unlock(&lock); 2026 2027 /* 2028 * zlogp is now invalid, so reset it to the syslog logger. 2029 */ 2030 zlogp = &logsys; 2031 2032 /* 2033 * Now that we are free of any parents, switch to the default locale. 2034 */ 2035 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 2036 2037 /* 2038 * At this point the setup portion of main() is basically done, so 2039 * we reuse this thread to manage the zone console. When 2040 * serve_console() has returned, we are past the point of no return 2041 * in the life of this zoneadmd. 2042 */ 2043 if (zonecfg_in_alt_root()) { 2044 /* 2045 * This is just awful, but mounted scratch zones don't (and 2046 * can't) have consoles. We just wait for unmount instead. 2047 */ 2048 while (sema_wait(&scratch_sem) == EINTR) 2049 ; 2050 } else { 2051 serve_console(zlogp); 2052 assert(in_death_throes); 2053 } 2054 2055 /* 2056 * This is the next-to-last part of the exit interlock. Upon calling 2057 * fdetach(), the door will go unreferenced; once any 2058 * outstanding requests (like the door thread doing Z_HALT) are 2059 * done, the door will get an UNREF notification; when it handles 2060 * the UNREF, the door server will cause the exit. It's possible 2061 * that fdetach() can fail because the file is in use, in which 2062 * case we'll retry the operation. 2063 */ 2064 assert(!MUTEX_HELD(&lock)); 2065 for (;;) { 2066 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY)) 2067 break; 2068 yield(); 2069 } 2070 2071 for (;;) 2072 (void) pause(); 2073 2074 child_out: 2075 assert(pid == 0); 2076 if (shstate != NULL) { 2077 shstate->status = -1; 2078 (void) sema_post(&shstate->sem); 2079 (void) munmap((char *)shstate, shstatelen); 2080 } 2081 2082 /* 2083 * This might trigger an unref notification, but if so, 2084 * we are still holding the lock, so our call to exit will 2085 * ultimately win the race and will publish the right exit 2086 * code. 2087 */ 2088 if (zone_door != -1) { 2089 assert(MUTEX_HELD(&lock)); 2090 (void) door_revoke(zone_door); 2091 (void) fdetach(zone_door_path); 2092 } 2093 2094 if (dld_handle != NULL) 2095 dladm_close(dld_handle); 2096 2097 return (1); /* return from main() forcibly exits an MT process */ 2098 } 2099