1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 */ 27 28 /* 29 * zoneadmd manages zones; one zoneadmd process is launched for each 30 * non-global zone on the system. This daemon juggles four jobs: 31 * 32 * - Implement setup and teardown of the zone "virtual platform": mount and 33 * unmount filesystems; create and destroy network interfaces; communicate 34 * with devfsadmd to lay out devices for the zone; instantiate the zone 35 * console device; configure process runtime attributes such as resource 36 * controls, pool bindings, fine-grained privileges. 37 * 38 * - Launch the zone's init(8) process. 39 * 40 * - Implement a door server; clients (like zoneadm) connect to the door 41 * server and request zone state changes. The kernel is also a client of 42 * this door server. A request to halt or reboot the zone which originates 43 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 44 * 45 * One minor problem is that messages emitted by zoneadmd need to be passed 46 * back to the zoneadm process making the request. These messages need to 47 * be rendered in the client's locale; so, this is passed in as part of the 48 * request. The exception is the kernel upcall to zoneadmd, in which case 49 * messages are syslog'd. 50 * 51 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 52 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 53 * strings which do not need to be translated. 54 * 55 * - Act as a console server for zlogin -C processes; see comments in zcons.c 56 * for more information about the zone console architecture. 57 * 58 * DESIGN NOTES 59 * 60 * Restart: 61 * A chief design constraint of zoneadmd is that it should be restartable in 62 * the case that the administrator kills it off, or it suffers a fatal error, 63 * without the running zone being impacted; this is akin to being able to 64 * reboot the service processor of a server without affecting the OS instance. 65 */ 66 67 #include <sys/param.h> 68 #include <sys/mman.h> 69 #include <sys/types.h> 70 #include <sys/stat.h> 71 #include <sys/sysmacros.h> 72 73 #include <bsm/adt.h> 74 #include <bsm/adt_event.h> 75 76 #include <alloca.h> 77 #include <assert.h> 78 #include <errno.h> 79 #include <door.h> 80 #include <fcntl.h> 81 #include <locale.h> 82 #include <signal.h> 83 #include <stdarg.h> 84 #include <stdio.h> 85 #include <stdlib.h> 86 #include <string.h> 87 #include <strings.h> 88 #include <synch.h> 89 #include <syslog.h> 90 #include <thread.h> 91 #include <unistd.h> 92 #include <wait.h> 93 #include <limits.h> 94 #include <zone.h> 95 #include <libbrand.h> 96 #include <sys/brand.h> 97 #include <libcontract.h> 98 #include <libcontract_priv.h> 99 #include <sys/brand.h> 100 #include <sys/contract/process.h> 101 #include <sys/ctfs.h> 102 #include <libdladm.h> 103 #include <sys/dls_mgmt.h> 104 #include <libscf.h> 105 106 #include <libzonecfg.h> 107 #include <zonestat_impl.h> 108 #include "zoneadmd.h" 109 110 static char *progname; 111 char *zone_name; /* zone which we are managing */ 112 char pool_name[MAXNAMELEN]; 113 char default_brand[MAXNAMELEN]; 114 char brand_name[MAXNAMELEN]; 115 boolean_t zone_isnative; 116 boolean_t zone_iscluster; 117 boolean_t zone_islabeled; 118 boolean_t shutdown_in_progress; 119 static zoneid_t zone_id; 120 dladm_handle_t dld_handle = NULL; 121 122 static char pre_statechg_hook[2 * MAXPATHLEN]; 123 static char post_statechg_hook[2 * MAXPATHLEN]; 124 char query_hook[2 * MAXPATHLEN]; 125 126 zlog_t logsys; 127 128 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 129 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 130 131 static sema_t scratch_sem; /* for scratch zones */ 132 133 static char zone_door_path[MAXPATHLEN]; 134 static int zone_door = -1; 135 136 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 137 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 138 139 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 140 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 141 #endif 142 143 #define DEFAULT_LOCALE "C" 144 145 static const char * 146 z_cmd_name(zone_cmd_t zcmd) 147 { 148 /* This list needs to match the enum in sys/zone.h */ 149 static const char *zcmdstr[] = { 150 "ready", "boot", "forceboot", "reboot", "halt", 151 "note_uninstalling", "mount", "forcemount", "unmount", 152 "shutdown" 153 }; 154 155 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 156 return ("unknown"); 157 else 158 return (zcmdstr[(int)zcmd]); 159 } 160 161 static char * 162 get_execbasename(char *execfullname) 163 { 164 char *last_slash, *execbasename; 165 166 /* guard against '/' at end of command invocation */ 167 for (;;) { 168 last_slash = strrchr(execfullname, '/'); 169 if (last_slash == NULL) { 170 execbasename = execfullname; 171 break; 172 } else { 173 execbasename = last_slash + 1; 174 if (*execbasename == '\0') { 175 *last_slash = '\0'; 176 continue; 177 } 178 break; 179 } 180 } 181 return (execbasename); 182 } 183 184 static void 185 usage(void) 186 { 187 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 188 (void) fprintf(stderr, 189 gettext("\tNote: %s should not be run directly.\n"), progname); 190 exit(2); 191 } 192 193 /* ARGSUSED */ 194 static void 195 sigchld(int sig) 196 { 197 } 198 199 char * 200 localize_msg(char *locale, const char *msg) 201 { 202 char *out; 203 204 (void) mutex_lock(&msglock); 205 (void) setlocale(LC_MESSAGES, locale); 206 out = gettext(msg); 207 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 208 (void) mutex_unlock(&msglock); 209 return (out); 210 } 211 212 /* PRINTFLIKE3 */ 213 void 214 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 215 { 216 va_list alist; 217 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 218 char *bp; 219 int saved_errno = errno; 220 221 if (zlogp == NULL) 222 return; 223 if (zlogp == &logsys) 224 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 225 zone_name); 226 else 227 buf[0] = '\0'; 228 bp = &(buf[strlen(buf)]); 229 230 /* 231 * In theory, the locale pointer should be set to either "C" or a 232 * char array, so it should never be NULL 233 */ 234 assert(zlogp->locale != NULL); 235 /* Locale is per process, but we are multi-threaded... */ 236 fmt = localize_msg(zlogp->locale, fmt); 237 238 va_start(alist, fmt); 239 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 240 va_end(alist); 241 bp = &(buf[strlen(buf)]); 242 if (use_strerror) 243 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 244 strerror(saved_errno)); 245 if (zlogp == &logsys) { 246 (void) syslog(LOG_ERR, "%s", buf); 247 } else if (zlogp->logfile != NULL) { 248 (void) fprintf(zlogp->logfile, "%s\n", buf); 249 } else { 250 size_t buflen; 251 size_t copylen; 252 253 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 254 copylen = MIN(buflen, zlogp->loglen); 255 zlogp->log += copylen; 256 zlogp->loglen -= copylen; 257 } 258 } 259 260 /* 261 * Emit a warning for any boot arguments which are unrecognized. Since 262 * Solaris boot arguments are getopt(3c) compatible (see kernel(8)), we 263 * put the arguments into an argv style array, use getopt to process them, 264 * and put the resultant argument string back into outargs. 265 * 266 * During the filtering, we pull out any arguments which are truly "boot" 267 * arguments, leaving only those which are to be passed intact to the 268 * progenitor process. The one we support at the moment is -i, which 269 * indicates to the kernel which program should be launched as 'init'. 270 * 271 * A return of Z_INVAL indicates specifically that the arguments are 272 * not valid; this is a non-fatal error. Except for Z_OK, all other return 273 * values are treated as fatal. 274 */ 275 static int 276 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 277 char *init_file, char *badarg) 278 { 279 int argc = 0, argc_save; 280 int i, c; 281 int err; 282 char *arg, *lasts, **argv = NULL, **argv_save; 283 char zonecfg_args[BOOTARGS_MAX]; 284 char scratchargs[BOOTARGS_MAX], *sargs; 285 char argsw[5]; 286 287 bzero(outargs, BOOTARGS_MAX); 288 bzero(badarg, BOOTARGS_MAX); 289 290 /* 291 * If the user didn't specify transient boot arguments, check 292 * to see if there were any specified in the zone configuration, 293 * and use them if applicable. 294 */ 295 if (inargs == NULL || inargs[0] == '\0') { 296 zone_dochandle_t handle; 297 if ((handle = zonecfg_init_handle()) == NULL) { 298 zerror(zlogp, B_TRUE, 299 "getting zone configuration handle"); 300 return (Z_BAD_HANDLE); 301 } 302 err = zonecfg_get_snapshot_handle(zone_name, handle); 303 if (err != Z_OK) { 304 zerror(zlogp, B_FALSE, 305 "invalid configuration snapshot"); 306 zonecfg_fini_handle(handle); 307 return (Z_BAD_HANDLE); 308 } 309 310 bzero(zonecfg_args, sizeof (zonecfg_args)); 311 (void) zonecfg_get_bootargs(handle, zonecfg_args, 312 sizeof (zonecfg_args)); 313 inargs = zonecfg_args; 314 zonecfg_fini_handle(handle); 315 } 316 317 if (strlen(inargs) >= BOOTARGS_MAX) { 318 zerror(zlogp, B_FALSE, "boot argument string too long"); 319 return (Z_INVAL); 320 } 321 322 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 323 sargs = scratchargs; 324 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 325 sargs = NULL; 326 argc++; 327 } 328 329 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 330 zerror(zlogp, B_FALSE, "memory allocation failed"); 331 return (Z_NOMEM); 332 } 333 334 argv_save = argv; 335 argc_save = argc; 336 337 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 338 sargs = scratchargs; 339 i = 0; 340 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 341 sargs = NULL; 342 if ((argv[i] = strdup(arg)) == NULL) { 343 err = Z_NOMEM; 344 zerror(zlogp, B_FALSE, "memory allocation failed"); 345 goto done; 346 } 347 i++; 348 } 349 350 /* 351 * We preserve compatibility with the Solaris system boot behavior, 352 * which allows: 353 * 354 * # reboot kernel/unix -s -m verbose 355 * 356 * In this example, kernel/unix tells the booter what file to 357 * boot. We don't want reboot in a zone to be gratuitously different, 358 * so we silently ignore the boot file, if necessary. 359 */ 360 if (argv[0] == NULL) 361 goto done; 362 363 assert(argv[0][0] != ' '); 364 assert(argv[0][0] != '\t'); 365 366 if (argv[0][0] != '-' && argv[0][0] != '\0') { 367 argv = &argv[1]; 368 argc--; 369 } 370 371 optind = 0; 372 opterr = 0; 373 err = Z_OK; 374 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 375 switch (c) { 376 case 'i': 377 /* 378 * -i is handled by the runtime and is not passed 379 * along to userland 380 */ 381 (void) strlcpy(init_file, optarg, MAXPATHLEN); 382 break; 383 case 'f': 384 /* This has already been processed by zoneadm */ 385 break; 386 case 'm': 387 case 's': 388 /* These pass through unmolested */ 389 (void) snprintf(argsw, sizeof (argsw), " -%c ", c); 390 (void) strlcat(outargs, argsw, BOOTARGS_MAX); 391 if (optarg) 392 (void) strlcat(outargs, optarg, BOOTARGS_MAX); 393 break; 394 case '?': 395 /* 396 * We warn about unknown arguments but pass them 397 * along anyway-- if someone wants to develop their 398 * own init replacement, they can pass it whatever 399 * args they want. 400 */ 401 err = Z_INVAL; 402 (void) snprintf(argsw, sizeof (argsw), " -%c", optopt); 403 (void) strlcat(outargs, argsw, BOOTARGS_MAX); 404 (void) strlcat(badarg, argsw, BOOTARGS_MAX); 405 break; 406 } 407 } 408 409 /* 410 * For Solaris Zones we warn about and discard non-option arguments. 411 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 412 * to the kernel, we concat up all the other remaining boot args. 413 * and warn on them as a group. 414 */ 415 if (optind < argc) { 416 const char *prefix = ""; 417 418 err = Z_INVAL; 419 do { 420 (void) strlcat(badarg, prefix, BOOTARGS_MAX); 421 (void) strlcat(badarg, argv[optind], BOOTARGS_MAX); 422 prefix = " "; 423 } while (++optind < argc); 424 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 425 "arguments `%s'.", badarg); 426 } 427 428 done: 429 for (i = 0; i < argc_save; i++) { 430 if (argv_save[i] != NULL) 431 free(argv_save[i]); 432 } 433 free(argv_save); 434 return (err); 435 } 436 437 438 static int 439 mkzonedir(zlog_t *zlogp) 440 { 441 struct stat st; 442 /* 443 * We must create and lock everyone but root out of ZONES_TMPDIR 444 * since anyone can open any UNIX domain socket, regardless of 445 * its file system permissions. Sigh... 446 */ 447 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 448 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 449 return (-1); 450 } 451 /* paranoia */ 452 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 453 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 454 return (-1); 455 } 456 (void) chmod(ZONES_TMPDIR, S_IRWXU); 457 return (0); 458 } 459 460 /* 461 * Run the brand's pre-state change callback, if it exists. 462 */ 463 static int 464 brand_prestatechg(zlog_t *zlogp, int state, int cmd) 465 { 466 char cmdbuf[2 * MAXPATHLEN]; 467 const char *altroot; 468 469 if (pre_statechg_hook[0] == '\0') 470 return (0); 471 472 altroot = zonecfg_get_root(); 473 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook, 474 state, cmd, altroot) > sizeof (cmdbuf)) 475 return (-1); 476 477 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 478 return (-1); 479 480 return (0); 481 } 482 483 /* 484 * Run the brand's post-state change callback, if it exists. 485 */ 486 static int 487 brand_poststatechg(zlog_t *zlogp, int state, int cmd) 488 { 489 char cmdbuf[2 * MAXPATHLEN]; 490 const char *altroot; 491 492 if (post_statechg_hook[0] == '\0') 493 return (0); 494 495 altroot = zonecfg_get_root(); 496 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook, 497 state, cmd, altroot) > sizeof (cmdbuf)) 498 return (-1); 499 500 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 501 return (-1); 502 503 return (0); 504 } 505 506 /* 507 * Notify zonestatd of the new zone. If zonestatd is not running, this 508 * will do nothing. 509 */ 510 static void 511 notify_zonestatd(zoneid_t zoneid) 512 { 513 int cmd[2]; 514 int fd; 515 door_arg_t params; 516 517 fd = open(ZS_DOOR_PATH, O_RDONLY); 518 if (fd < 0) 519 return; 520 521 cmd[0] = ZSD_CMD_NEW_ZONE; 522 cmd[1] = zoneid; 523 params.data_ptr = (char *)&cmd; 524 params.data_size = sizeof (cmd); 525 params.desc_ptr = NULL; 526 params.desc_num = 0; 527 params.rbuf = NULL; 528 params.rsize = 0; 529 (void) door_call(fd, ¶ms); 530 (void) close(fd); 531 } 532 533 /* 534 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 535 * 'true' if this is being invoked as part of the processing for the "mount" 536 * subcommand. 537 */ 538 static int 539 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate) 540 { 541 int err; 542 543 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0) 544 return (-1); 545 546 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 547 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 548 zonecfg_strerror(err)); 549 goto bad; 550 } 551 552 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 553 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 554 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 555 zonecfg_strerror(err)); 556 goto bad; 557 } 558 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 559 bringup_failure_recovery = B_TRUE; 560 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE); 561 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 562 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 563 zonecfg_strerror(err)); 564 goto bad; 565 } 566 567 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0) 568 goto bad; 569 570 return (0); 571 572 bad: 573 /* 574 * If something goes wrong, we up the zones's state to the target 575 * state, READY, and then invoke the hook as if we're halting. 576 */ 577 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT); 578 return (-1); 579 } 580 581 int 582 init_template(void) 583 { 584 int fd; 585 int err = 0; 586 587 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 588 if (fd == -1) 589 return (-1); 590 591 /* 592 * For now, zoneadmd doesn't do anything with the contract. 593 * Deliver no events, don't inherit, and allow it to be orphaned. 594 */ 595 err |= ct_tmpl_set_critical(fd, 0); 596 err |= ct_tmpl_set_informative(fd, 0); 597 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 598 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 599 if (err || ct_tmpl_activate(fd)) { 600 (void) close(fd); 601 return (-1); 602 } 603 604 return (fd); 605 } 606 607 typedef struct fs_callback { 608 zlog_t *zlogp; 609 zoneid_t zoneid; 610 boolean_t mount_cmd; 611 } fs_callback_t; 612 613 static int 614 mount_early_fs(void *data, const char *spec, const char *dir, 615 const char *fstype, const char *opt) 616 { 617 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 618 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 619 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd; 620 char rootpath[MAXPATHLEN]; 621 pid_t child; 622 int child_status; 623 int tmpl_fd; 624 int rv; 625 ctid_t ct; 626 627 /* determine the zone rootpath */ 628 if (mount_cmd) { 629 char zonepath[MAXPATHLEN]; 630 char luroot[MAXPATHLEN]; 631 632 if (zone_get_zonepath(zone_name, 633 zonepath, sizeof (zonepath)) != Z_OK) { 634 zerror(zlogp, B_FALSE, "unable to determine zone path"); 635 return (-1); 636 } 637 638 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 639 resolve_lofs(zlogp, luroot, sizeof (luroot)); 640 (void) strlcpy(rootpath, luroot, sizeof (rootpath)); 641 } else { 642 if (zone_get_rootpath(zone_name, 643 rootpath, sizeof (rootpath)) != Z_OK) { 644 zerror(zlogp, B_FALSE, "unable to determine zone root"); 645 return (-1); 646 } 647 } 648 649 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) { 650 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 651 rootpath, dir); 652 return (-1); 653 } else if (rv > 0) { 654 /* The mount point path doesn't exist, create it now. */ 655 if (make_one_dir(zlogp, rootpath, dir, 656 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 657 DEFAULT_DIR_GROUP) != 0) { 658 zerror(zlogp, B_FALSE, "failed to create mount point"); 659 return (-1); 660 } 661 662 /* 663 * Now this might seem weird, but we need to invoke 664 * valid_mount_path() again. Why? Because it checks 665 * to make sure that the mount point path is canonical, 666 * which it can only do if the path exists, so now that 667 * we've created the path we have to verify it again. 668 */ 669 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, 670 fstype)) < 0) { 671 zerror(zlogp, B_FALSE, 672 "%s%s is not a valid mount point", rootpath, dir); 673 return (-1); 674 } 675 } 676 677 if ((tmpl_fd = init_template()) == -1) { 678 zerror(zlogp, B_TRUE, "failed to create contract"); 679 return (-1); 680 } 681 682 if ((child = fork()) == -1) { 683 (void) ct_tmpl_clear(tmpl_fd); 684 (void) close(tmpl_fd); 685 zerror(zlogp, B_TRUE, "failed to fork"); 686 return (-1); 687 688 } else if (child == 0) { /* child */ 689 char opt_buf[MAX_MNTOPT_STR]; 690 int optlen = 0; 691 int mflag = MS_DATA; 692 693 (void) ct_tmpl_clear(tmpl_fd); 694 /* 695 * Even though there are no procs running in the zone, we 696 * do this for paranoia's sake. 697 */ 698 (void) closefrom(0); 699 700 if (zone_enter(zoneid) == -1) { 701 _exit(errno); 702 } 703 if (opt != NULL) { 704 /* 705 * The mount() system call is incredibly annoying. 706 * If options are specified, we need to copy them 707 * into a temporary buffer since the mount() system 708 * call will overwrite the options string. It will 709 * also fail if the new option string it wants to 710 * write is bigger than the one we passed in, so 711 * you must pass in a buffer of the maximum possible 712 * option string length. sigh. 713 */ 714 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 715 opt = opt_buf; 716 optlen = MAX_MNTOPT_STR; 717 mflag = MS_OPTIONSTR; 718 } 719 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 720 _exit(errno); 721 _exit(0); 722 } 723 724 /* parent */ 725 if (contract_latest(&ct) == -1) 726 ct = -1; 727 (void) ct_tmpl_clear(tmpl_fd); 728 (void) close(tmpl_fd); 729 if (waitpid(child, &child_status, 0) != child) { 730 /* unexpected: we must have been signalled */ 731 (void) contract_abandon_id(ct); 732 return (-1); 733 } 734 (void) contract_abandon_id(ct); 735 if (WEXITSTATUS(child_status) != 0) { 736 errno = WEXITSTATUS(child_status); 737 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 738 return (-1); 739 } 740 741 return (0); 742 } 743 744 /* 745 * If retstr is not NULL, the output of the subproc is returned in the str, 746 * otherwise it is output using zerror(). Any memory allocated for retstr 747 * should be freed by the caller. 748 */ 749 int 750 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) 751 { 752 char buf[1024]; /* arbitrary large amount */ 753 char *inbuf; 754 FILE *file; 755 int status; 756 int rd_cnt; 757 758 if (retstr != NULL) { 759 if ((*retstr = malloc(1024)) == NULL) { 760 zerror(zlogp, B_FALSE, "out of memory"); 761 return (-1); 762 } 763 inbuf = *retstr; 764 rd_cnt = 0; 765 } else { 766 inbuf = buf; 767 } 768 769 file = popen(cmdbuf, "r"); 770 if (file == NULL) { 771 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 772 return (-1); 773 } 774 775 while (fgets(inbuf, 1024, file) != NULL) { 776 if (retstr == NULL) { 777 if (zlogp != &logsys) { 778 int last = strlen(inbuf) - 1; 779 780 if (inbuf[last] == '\n') 781 inbuf[last] = '\0'; 782 zerror(zlogp, B_FALSE, "%s", inbuf); 783 } 784 } else { 785 char *p; 786 787 rd_cnt += 1024 - 1; 788 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) { 789 zerror(zlogp, B_FALSE, "out of memory"); 790 (void) pclose(file); 791 return (-1); 792 } 793 794 *retstr = p; 795 inbuf = *retstr + rd_cnt; 796 } 797 } 798 status = pclose(file); 799 800 if (WIFSIGNALED(status)) { 801 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 802 "signal %d", cmdbuf, WTERMSIG(status)); 803 return (-1); 804 } 805 assert(WIFEXITED(status)); 806 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 807 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 808 return (-1); 809 } 810 return (WEXITSTATUS(status)); 811 } 812 813 static int 814 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) 815 { 816 zoneid_t zoneid; 817 struct stat st; 818 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 819 char nbootargs[BOOTARGS_MAX]; 820 char cmdbuf[MAXPATHLEN]; 821 fs_callback_t cb; 822 brand_handle_t bh; 823 zone_iptype_t iptype; 824 boolean_t links_loaded = B_FALSE; 825 dladm_status_t status; 826 char errmsg[DLADM_STRSIZE]; 827 int err; 828 boolean_t restart_init, restart_init0, restart_initreboot; 829 830 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) 831 return (-1); 832 833 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 834 zerror(zlogp, B_TRUE, "unable to get zoneid"); 835 goto bad; 836 } 837 838 cb.zlogp = zlogp; 839 cb.zoneid = zoneid; 840 cb.mount_cmd = B_FALSE; 841 842 /* Get a handle to the brand info for this zone */ 843 if ((bh = brand_open(brand_name)) == NULL) { 844 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 845 goto bad; 846 } 847 848 /* 849 * Get the list of filesystems to mount from the brand 850 * configuration. These mounts are done via a thread that will 851 * enter the zone, so they are done from within the context of the 852 * zone. 853 */ 854 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 855 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 856 brand_close(bh); 857 goto bad; 858 } 859 860 /* 861 * Get the brand's boot callback if it exists. 862 */ 863 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 864 zerror(zlogp, B_FALSE, "unable to determine zone path"); 865 brand_close(bh); 866 goto bad; 867 } 868 (void) strcpy(cmdbuf, EXEC_PREFIX); 869 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 870 sizeof (cmdbuf) - EXEC_LEN) != 0) { 871 zerror(zlogp, B_FALSE, 872 "unable to determine branded zone's boot callback"); 873 brand_close(bh); 874 goto bad; 875 } 876 877 /* Get the path for this zone's init(8) (or equivalent) process. */ 878 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { 879 zerror(zlogp, B_FALSE, 880 "unable to determine zone's init(8) location"); 881 brand_close(bh); 882 goto bad; 883 } 884 885 /* See if this zone's brand should restart init if it dies. */ 886 restart_init = brand_restartinit(bh); 887 restart_init0 = brand_restartinit0(bh); 888 restart_initreboot = brand_restartinitreboot(bh); 889 890 brand_close(bh); 891 892 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 893 bad_boot_arg); 894 if (err == Z_INVAL) 895 eventstream_write(Z_EVT_ZONE_BADARGS); 896 else if (err != Z_OK) 897 goto bad; 898 899 assert(init_file[0] != '\0'); 900 901 /* Try to anticipate possible problems: Make sure init is executable. */ 902 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 903 zerror(zlogp, B_FALSE, "unable to determine zone root"); 904 goto bad; 905 } 906 907 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); 908 909 if (stat(initpath, &st) == -1) { 910 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 911 goto bad; 912 } 913 914 if ((st.st_mode & S_IXUSR) == 0) { 915 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 916 goto bad; 917 } 918 919 /* 920 * Exclusive stack zones interact with the dlmgmtd running in the 921 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is 922 * booting, and loads its datalinks from the zone's datalink 923 * configuration file. 924 */ 925 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) { 926 status = dladm_zone_boot(dld_handle, zoneid); 927 if (status != DLADM_STATUS_OK) { 928 zerror(zlogp, B_FALSE, "unable to load zone datalinks: " 929 " %s", dladm_status2str(status, errmsg)); 930 goto bad; 931 } 932 links_loaded = B_TRUE; 933 } 934 935 /* 936 * If there is a brand 'boot' callback, execute it now to give the 937 * brand one last chance to do any additional setup before the zone 938 * is booted. 939 */ 940 if ((strlen(cmdbuf) > EXEC_LEN) && 941 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { 942 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 943 goto bad; 944 } 945 946 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 947 zerror(zlogp, B_TRUE, "could not set zone boot file"); 948 goto bad; 949 } 950 951 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 952 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 953 goto bad; 954 } 955 956 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART, 957 NULL, 0) == -1) { 958 zerror(zlogp, B_TRUE, "could not set zone init-no-restart"); 959 goto bad; 960 } 961 if (restart_init0 && zone_setattr(zoneid, ZONE_ATTR_INITRESTART0, 962 NULL, 0) == -1) { 963 zerror(zlogp, B_TRUE, 964 "could not set zone init-restart-on-exit-0"); 965 goto bad; 966 } 967 if (restart_initreboot && zone_setattr(zoneid, ZONE_ATTR_INITREBOOT, 968 NULL, 0) == -1) { 969 zerror(zlogp, B_TRUE, "could not set zone reboot-on-init-exit"); 970 goto bad; 971 } 972 973 /* 974 * Inform zonestatd of a new zone so that it can install a door for 975 * the zone to contact it. 976 */ 977 notify_zonestatd(zone_id); 978 979 if (zone_boot(zoneid) == -1) { 980 zerror(zlogp, B_TRUE, "unable to boot zone"); 981 goto bad; 982 } 983 984 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0) 985 goto bad; 986 987 return (0); 988 989 bad: 990 /* 991 * If something goes wrong, we up the zones's state to the target 992 * state, RUNNING, and then invoke the hook as if we're halting. 993 */ 994 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT); 995 if (links_loaded) 996 (void) dladm_zone_halt(dld_handle, zoneid); 997 return (-1); 998 } 999 1000 static int 1001 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate) 1002 { 1003 int err; 1004 1005 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0) 1006 return (-1); 1007 1008 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { 1009 if (!bringup_failure_recovery) 1010 zerror(zlogp, B_FALSE, "unable to destroy zone"); 1011 return (-1); 1012 } 1013 1014 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 1015 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 1016 zonecfg_strerror(err)); 1017 1018 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0) 1019 return (-1); 1020 1021 return (0); 1022 } 1023 1024 static int 1025 zone_graceful_shutdown(zlog_t *zlogp) 1026 { 1027 zoneid_t zoneid; 1028 pid_t child; 1029 char cmdbuf[MAXPATHLEN]; 1030 brand_handle_t bh = NULL; 1031 char zpath[MAXPATHLEN]; 1032 ctid_t ct; 1033 int tmpl_fd; 1034 int child_status; 1035 1036 if (shutdown_in_progress) { 1037 zerror(zlogp, B_FALSE, "shutdown already in progress"); 1038 return (-1); 1039 } 1040 1041 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 1042 zerror(zlogp, B_TRUE, "unable to get zoneid"); 1043 return (-1); 1044 } 1045 1046 /* Get a handle to the brand info for this zone */ 1047 if ((bh = brand_open(brand_name)) == NULL) { 1048 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1049 return (-1); 1050 } 1051 1052 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 1053 zerror(zlogp, B_FALSE, "unable to determine zone path"); 1054 brand_close(bh); 1055 return (-1); 1056 } 1057 1058 /* 1059 * If there is a brand 'shutdown' callback, execute it now to give the 1060 * brand a chance to cleanup any custom configuration. 1061 */ 1062 (void) strcpy(cmdbuf, EXEC_PREFIX); 1063 if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 1064 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) { 1065 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT); 1066 } 1067 brand_close(bh); 1068 1069 if ((tmpl_fd = init_template()) == -1) { 1070 zerror(zlogp, B_TRUE, "failed to create contract"); 1071 return (-1); 1072 } 1073 1074 if ((child = fork()) == -1) { 1075 (void) ct_tmpl_clear(tmpl_fd); 1076 (void) close(tmpl_fd); 1077 zerror(zlogp, B_TRUE, "failed to fork"); 1078 return (-1); 1079 } else if (child == 0) { 1080 (void) ct_tmpl_clear(tmpl_fd); 1081 if (zone_enter(zoneid) == -1) { 1082 _exit(errno); 1083 } 1084 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL)); 1085 } 1086 1087 if (contract_latest(&ct) == -1) 1088 ct = -1; 1089 (void) ct_tmpl_clear(tmpl_fd); 1090 (void) close(tmpl_fd); 1091 1092 if (waitpid(child, &child_status, 0) != child) { 1093 /* unexpected: we must have been signalled */ 1094 (void) contract_abandon_id(ct); 1095 return (-1); 1096 } 1097 1098 (void) contract_abandon_id(ct); 1099 if (WEXITSTATUS(child_status) != 0) { 1100 errno = WEXITSTATUS(child_status); 1101 zerror(zlogp, B_FALSE, "unable to shutdown zone"); 1102 return (-1); 1103 } 1104 1105 shutdown_in_progress = B_TRUE; 1106 1107 return (0); 1108 } 1109 1110 static int 1111 zone_wait_shutdown(zlog_t *zlogp) 1112 { 1113 zone_state_t zstate; 1114 uint64_t *tm = NULL; 1115 scf_simple_prop_t *prop = NULL; 1116 int timeout; 1117 int tries; 1118 int rc = -1; 1119 1120 /* Get default stop timeout from SMF framework */ 1121 timeout = SHUTDOWN_WAIT; 1122 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop", 1123 SCF_PROPERTY_TIMEOUT)) != NULL) { 1124 if ((tm = scf_simple_prop_next_count(prop)) != NULL) { 1125 if (tm != 0) 1126 timeout = *tm; 1127 } 1128 scf_simple_prop_free(prop); 1129 } 1130 1131 /* allow time for zone to shutdown cleanly */ 1132 for (tries = 0; tries < timeout; tries ++) { 1133 (void) sleep(1); 1134 if (zone_get_state(zone_name, &zstate) == Z_OK && 1135 zstate == ZONE_STATE_INSTALLED) { 1136 rc = 0; 1137 break; 1138 } 1139 } 1140 1141 if (rc != 0) 1142 zerror(zlogp, B_FALSE, "unable to shutdown zone"); 1143 1144 shutdown_in_progress = B_FALSE; 1145 1146 return (rc); 1147 } 1148 1149 1150 1151 /* 1152 * Generate AUE_zone_state for a command that boots a zone. 1153 */ 1154 static void 1155 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 1156 char *new_state) 1157 { 1158 adt_session_data_t *ah; 1159 adt_event_data_t *event; 1160 int pass_fail, fail_reason; 1161 1162 if (!adt_audit_enabled()) 1163 return; 1164 1165 if (return_val == 0) { 1166 pass_fail = ADT_SUCCESS; 1167 fail_reason = ADT_SUCCESS; 1168 } else { 1169 pass_fail = ADT_FAILURE; 1170 fail_reason = ADT_FAIL_VALUE_PROGRAM; 1171 } 1172 1173 if (adt_start_session(&ah, NULL, 0)) { 1174 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1175 return; 1176 } 1177 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 1178 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1179 (void) adt_end_session(ah); 1180 return; 1181 } 1182 1183 event = adt_alloc_event(ah, ADT_zone_state); 1184 if (event == NULL) { 1185 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1186 (void) adt_end_session(ah); 1187 return; 1188 } 1189 event->adt_zone_state.zonename = zone_name; 1190 event->adt_zone_state.new_state = new_state; 1191 1192 if (adt_put_event(event, pass_fail, fail_reason)) 1193 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1194 1195 adt_free_event(event); 1196 1197 (void) adt_end_session(ah); 1198 } 1199 1200 /* 1201 * The main routine for the door server that deals with zone state transitions. 1202 */ 1203 /* ARGSUSED */ 1204 static void 1205 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 1206 uint_t n_desc) 1207 { 1208 ucred_t *uc = NULL; 1209 const priv_set_t *eset; 1210 1211 zone_state_t zstate; 1212 zone_cmd_t cmd; 1213 zone_cmd_arg_t *zargp; 1214 1215 boolean_t kernelcall = B_FALSE; 1216 1217 int rval = -1; 1218 uint64_t uniqid; 1219 zoneid_t zoneid = -1; 1220 zlog_t zlog; 1221 zlog_t *zlogp; 1222 zone_cmd_rval_t *rvalp; 1223 size_t rlen = getpagesize(); /* conservative */ 1224 fs_callback_t cb; 1225 brand_handle_t bh; 1226 boolean_t wait_shut = B_FALSE; 1227 1228 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1229 zargp = (zone_cmd_arg_t *)args; 1230 1231 /* 1232 * When we get the door unref message, we've fdetach'd the door, and 1233 * it is time for us to shut down zoneadmd. 1234 */ 1235 if (zargp == DOOR_UNREF_DATA) { 1236 /* 1237 * See comment at end of main() for info on the last rites. 1238 */ 1239 exit(0); 1240 } 1241 1242 if (zargp == NULL) { 1243 (void) door_return(NULL, 0, 0, 0); 1244 } 1245 1246 rvalp = alloca(rlen); 1247 bzero(rvalp, rlen); 1248 zlog.logfile = NULL; 1249 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 1250 zlog.buf = rvalp->errbuf; 1251 zlog.log = zlog.buf; 1252 /* defer initialization of zlog.locale until after credential check */ 1253 zlogp = &zlog; 1254 1255 if (alen != sizeof (zone_cmd_arg_t)) { 1256 /* 1257 * This really shouldn't be happening. 1258 */ 1259 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 1260 "unexpected (expected %d bytes)", alen, 1261 sizeof (zone_cmd_arg_t)); 1262 goto out; 1263 } 1264 cmd = zargp->cmd; 1265 1266 if (door_ucred(&uc) != 0) { 1267 zerror(&logsys, B_TRUE, "door_ucred"); 1268 goto out; 1269 } 1270 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 1271 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 1272 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 1273 ucred_geteuid(uc) != 0)) { 1274 zerror(&logsys, B_FALSE, "insufficient privileges"); 1275 goto out; 1276 } 1277 1278 kernelcall = ucred_getpid(uc) == 0; 1279 1280 /* 1281 * This is safe because we only use a zlog_t throughout the 1282 * duration of a door call; i.e., by the time the pointer 1283 * might become invalid, the door call would be over. 1284 */ 1285 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 1286 1287 (void) mutex_lock(&lock); 1288 1289 /* 1290 * Once we start to really die off, we don't want more connections. 1291 */ 1292 if (in_death_throes) { 1293 (void) mutex_unlock(&lock); 1294 ucred_free(uc); 1295 (void) door_return(NULL, 0, 0, 0); 1296 thr_exit(NULL); 1297 } 1298 1299 /* 1300 * Check for validity of command. 1301 */ 1302 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 1303 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT && 1304 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT && 1305 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 1306 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 1307 goto out; 1308 } 1309 1310 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 1311 /* 1312 * Can't happen 1313 */ 1314 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 1315 cmd); 1316 goto out; 1317 } 1318 /* 1319 * We ignore the possibility of someone calling zone_create(2) 1320 * explicitly; all requests must come through zoneadmd. 1321 */ 1322 if (zone_get_state(zone_name, &zstate) != Z_OK) { 1323 /* 1324 * Something terribly wrong happened 1325 */ 1326 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 1327 goto out; 1328 } 1329 1330 if (kernelcall) { 1331 /* 1332 * Kernel-initiated requests may lose their validity if the 1333 * zone_t the kernel was referring to has gone away. 1334 */ 1335 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 1336 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 1337 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 1338 /* 1339 * We're not talking about the same zone. The request 1340 * must have arrived too late. Return error. 1341 */ 1342 rval = -1; 1343 goto out; 1344 } 1345 zlogp = &logsys; /* Log errors to syslog */ 1346 } 1347 1348 /* 1349 * If we are being asked to forcibly mount or boot a zone, we 1350 * pretend that an INCOMPLETE zone is actually INSTALLED. 1351 */ 1352 if (zstate == ZONE_STATE_INCOMPLETE && 1353 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 1354 zstate = ZONE_STATE_INSTALLED; 1355 1356 switch (zstate) { 1357 case ZONE_STATE_CONFIGURED: 1358 case ZONE_STATE_INCOMPLETE: 1359 /* 1360 * Not our area of expertise; we just print a nice message 1361 * and die off. 1362 */ 1363 zerror(zlogp, B_FALSE, 1364 "%s operation is invalid for zones in state '%s'", 1365 z_cmd_name(cmd), zone_state_str(zstate)); 1366 break; 1367 1368 case ZONE_STATE_INSTALLED: 1369 switch (cmd) { 1370 case Z_READY: 1371 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate); 1372 if (rval == 0) 1373 eventstream_write(Z_EVT_ZONE_READIED); 1374 break; 1375 case Z_BOOT: 1376 case Z_FORCEBOOT: 1377 eventstream_write(Z_EVT_ZONE_BOOTING); 1378 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1379 == 0) { 1380 rval = zone_bootup(zlogp, zargp->bootbuf, 1381 zstate); 1382 } 1383 audit_put_record(zlogp, uc, rval, "boot"); 1384 if (rval != 0) { 1385 bringup_failure_recovery = B_TRUE; 1386 (void) zone_halt(zlogp, B_FALSE, B_FALSE, 1387 zstate); 1388 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1389 } 1390 break; 1391 case Z_SHUTDOWN: 1392 case Z_HALT: 1393 if (kernelcall) /* Invalid; can't happen */ 1394 abort(); 1395 /* 1396 * We could have two clients racing to halt this 1397 * zone; the second client loses, but its request 1398 * doesn't fail, since the zone is now in the desired 1399 * state. 1400 */ 1401 zerror(zlogp, B_FALSE, "zone is already halted"); 1402 rval = 0; 1403 break; 1404 case Z_REBOOT: 1405 if (kernelcall) /* Invalid; can't happen */ 1406 abort(); 1407 zerror(zlogp, B_FALSE, "%s operation is invalid " 1408 "for zones in state '%s'", z_cmd_name(cmd), 1409 zone_state_str(zstate)); 1410 rval = -1; 1411 break; 1412 case Z_NOTE_UNINSTALLING: 1413 if (kernelcall) /* Invalid; can't happen */ 1414 abort(); 1415 /* 1416 * Tell the console to print out a message about this. 1417 * Once it does, we will be in_death_throes. 1418 */ 1419 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1420 break; 1421 case Z_MOUNT: 1422 case Z_FORCEMOUNT: 1423 if (kernelcall) /* Invalid; can't happen */ 1424 abort(); 1425 if (!zone_isnative && !zone_iscluster && 1426 !zone_islabeled) { 1427 /* 1428 * -U mounts the zone without lofs mounting 1429 * zone file systems back into the scratch 1430 * zone. This is required when mounting 1431 * non-native branded zones. 1432 */ 1433 (void) strlcpy(zargp->bootbuf, "-U", 1434 BOOTARGS_MAX); 1435 } 1436 1437 rval = zone_ready(zlogp, 1438 strcmp(zargp->bootbuf, "-U") == 0 ? 1439 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate); 1440 if (rval != 0) 1441 break; 1442 1443 eventstream_write(Z_EVT_ZONE_READIED); 1444 1445 /* 1446 * Get a handle to the default brand info. 1447 * We must always use the default brand file system 1448 * list when mounting the zone. 1449 */ 1450 if ((bh = brand_open(default_brand)) == NULL) { 1451 rval = -1; 1452 break; 1453 } 1454 1455 /* 1456 * Get the list of filesystems to mount from 1457 * the brand configuration. These mounts are done 1458 * via a thread that will enter the zone, so they 1459 * are done from within the context of the zone. 1460 */ 1461 cb.zlogp = zlogp; 1462 cb.zoneid = zone_id; 1463 cb.mount_cmd = B_TRUE; 1464 rval = brand_platform_iter_mounts(bh, 1465 mount_early_fs, &cb); 1466 1467 brand_close(bh); 1468 1469 /* 1470 * Ordinarily, /dev/fd would be mounted inside the zone 1471 * by svc:/system/filesystem/usr:default, but since 1472 * we're not booting the zone, we need to do this 1473 * manually. 1474 */ 1475 if (rval == 0) 1476 rval = mount_early_fs(&cb, 1477 "fd", "/dev/fd", "fd", NULL); 1478 break; 1479 case Z_UNMOUNT: 1480 if (kernelcall) /* Invalid; can't happen */ 1481 abort(); 1482 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1483 rval = 0; 1484 break; 1485 } 1486 break; 1487 1488 case ZONE_STATE_READY: 1489 switch (cmd) { 1490 case Z_READY: 1491 /* 1492 * We could have two clients racing to ready this 1493 * zone; the second client loses, but its request 1494 * doesn't fail, since the zone is now in the desired 1495 * state. 1496 */ 1497 zerror(zlogp, B_FALSE, "zone is already ready"); 1498 rval = 0; 1499 break; 1500 case Z_FORCEBOOT: 1501 case Z_BOOT: 1502 (void) strlcpy(boot_args, zargp->bootbuf, 1503 sizeof (boot_args)); 1504 eventstream_write(Z_EVT_ZONE_BOOTING); 1505 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1506 audit_put_record(zlogp, uc, rval, "boot"); 1507 if (rval != 0) { 1508 bringup_failure_recovery = B_TRUE; 1509 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1510 zstate); 1511 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1512 } 1513 boot_args[0] = '\0'; 1514 break; 1515 case Z_HALT: 1516 if (kernelcall) /* Invalid; can't happen */ 1517 abort(); 1518 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1519 != 0) 1520 break; 1521 eventstream_write(Z_EVT_ZONE_HALTED); 1522 break; 1523 case Z_SHUTDOWN: 1524 case Z_REBOOT: 1525 case Z_NOTE_UNINSTALLING: 1526 case Z_FORCEMOUNT: 1527 case Z_MOUNT: 1528 case Z_UNMOUNT: 1529 if (kernelcall) /* Invalid; can't happen */ 1530 abort(); 1531 zerror(zlogp, B_FALSE, "%s operation is invalid " 1532 "for zones in state '%s'", z_cmd_name(cmd), 1533 zone_state_str(zstate)); 1534 rval = -1; 1535 break; 1536 } 1537 break; 1538 1539 case ZONE_STATE_MOUNTED: 1540 switch (cmd) { 1541 case Z_UNMOUNT: 1542 if (kernelcall) /* Invalid; can't happen */ 1543 abort(); 1544 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate); 1545 if (rval == 0) { 1546 eventstream_write(Z_EVT_ZONE_HALTED); 1547 (void) sema_post(&scratch_sem); 1548 } 1549 break; 1550 default: 1551 if (kernelcall) /* Invalid; can't happen */ 1552 abort(); 1553 zerror(zlogp, B_FALSE, "%s operation is invalid " 1554 "for zones in state '%s'", z_cmd_name(cmd), 1555 zone_state_str(zstate)); 1556 rval = -1; 1557 break; 1558 } 1559 break; 1560 1561 case ZONE_STATE_RUNNING: 1562 case ZONE_STATE_SHUTTING_DOWN: 1563 case ZONE_STATE_DOWN: 1564 switch (cmd) { 1565 case Z_READY: 1566 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1567 != 0) 1568 break; 1569 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0) 1570 eventstream_write(Z_EVT_ZONE_READIED); 1571 else 1572 eventstream_write(Z_EVT_ZONE_HALTED); 1573 break; 1574 case Z_FORCEBOOT: 1575 case Z_BOOT: 1576 /* 1577 * We could have two clients racing to boot this 1578 * zone; the second client loses, but its request 1579 * doesn't fail, since the zone is now in the desired 1580 * state. 1581 */ 1582 zerror(zlogp, B_FALSE, "zone is already booted"); 1583 rval = 0; 1584 break; 1585 case Z_HALT: 1586 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1587 != 0) 1588 break; 1589 eventstream_write(Z_EVT_ZONE_HALTED); 1590 break; 1591 case Z_REBOOT: 1592 (void) strlcpy(boot_args, zargp->bootbuf, 1593 sizeof (boot_args)); 1594 eventstream_write(Z_EVT_ZONE_REBOOTING); 1595 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1596 != 0) { 1597 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1598 boot_args[0] = '\0'; 1599 break; 1600 } 1601 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1602 != 0) { 1603 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1604 boot_args[0] = '\0'; 1605 break; 1606 } 1607 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1608 audit_put_record(zlogp, uc, rval, "reboot"); 1609 if (rval != 0) { 1610 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1611 zstate); 1612 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1613 } 1614 boot_args[0] = '\0'; 1615 break; 1616 case Z_SHUTDOWN: 1617 if ((rval = zone_graceful_shutdown(zlogp)) == 0) { 1618 wait_shut = B_TRUE; 1619 } 1620 break; 1621 case Z_NOTE_UNINSTALLING: 1622 case Z_FORCEMOUNT: 1623 case Z_MOUNT: 1624 case Z_UNMOUNT: 1625 zerror(zlogp, B_FALSE, "%s operation is invalid " 1626 "for zones in state '%s'", z_cmd_name(cmd), 1627 zone_state_str(zstate)); 1628 rval = -1; 1629 break; 1630 } 1631 break; 1632 default: 1633 abort(); 1634 } 1635 1636 /* 1637 * Because the state of the zone may have changed, we make sure 1638 * to wake the console poller, which is in charge of initiating 1639 * the shutdown procedure as necessary. 1640 */ 1641 eventstream_write(Z_EVT_NULL); 1642 1643 out: 1644 (void) mutex_unlock(&lock); 1645 1646 /* Wait for the Z_SHUTDOWN commands to complete */ 1647 if (wait_shut) 1648 rval = zone_wait_shutdown(zlogp); 1649 1650 if (kernelcall) { 1651 rvalp = NULL; 1652 rlen = 0; 1653 } else { 1654 rvalp->rval = rval; 1655 } 1656 if (uc != NULL) 1657 ucred_free(uc); 1658 (void) door_return((char *)rvalp, rlen, NULL, 0); 1659 thr_exit(NULL); 1660 } 1661 1662 static int 1663 setup_door(zlog_t *zlogp) 1664 { 1665 if ((zone_door = door_create(server, NULL, 1666 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1667 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1668 return (-1); 1669 } 1670 (void) fdetach(zone_door_path); 1671 1672 if (fattach(zone_door, zone_door_path) != 0) { 1673 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1674 (void) door_revoke(zone_door); 1675 (void) fdetach(zone_door_path); 1676 zone_door = -1; 1677 return (-1); 1678 } 1679 return (0); 1680 } 1681 1682 /* 1683 * zoneadm(8) will start zoneadmd if it thinks it isn't running; this 1684 * is where zoneadmd itself will check to see that another instance of 1685 * zoneadmd isn't already controlling this zone. 1686 * 1687 * The idea here is that we want to open the path to which we will 1688 * attach our door, lock it, and then make sure that no-one has beat us 1689 * to fattach(3c)ing onto it. 1690 * 1691 * fattach(3c) is really a mount, so there are actually two possible 1692 * vnodes we could be dealing with. Our strategy is as follows: 1693 * 1694 * - If the file we opened is a regular file (common case): 1695 * There is no fattach(3c)ed door, so we have a chance of becoming 1696 * the managing zoneadmd. We attempt to lock the file: if it is 1697 * already locked, that means someone else raced us here, so we 1698 * lose and give up. zoneadm(8) will try to contact the zoneadmd 1699 * that beat us to it. 1700 * 1701 * - If the file we opened is a namefs file: 1702 * This means there is already an established door fattach(3c)'ed 1703 * to the rendezvous path. We've lost the race, so we give up. 1704 * Note that in this case we also try to grab the file lock, and 1705 * will succeed in acquiring it since the vnode locked by the 1706 * "winning" zoneadmd was a regular one, and the one we locked was 1707 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1708 * we just return to zoneadm(8) which knows to retry. 1709 */ 1710 static int 1711 make_daemon_exclusive(zlog_t *zlogp) 1712 { 1713 int doorfd = -1; 1714 int err, ret = -1; 1715 struct stat st; 1716 struct flock flock; 1717 zone_state_t zstate; 1718 1719 top: 1720 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1721 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1722 zonecfg_strerror(err)); 1723 goto out; 1724 } 1725 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1726 S_IREAD|S_IWRITE)) < 0) { 1727 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1728 goto out; 1729 } 1730 if (fstat(doorfd, &st) < 0) { 1731 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1732 goto out; 1733 } 1734 /* 1735 * Lock the file to synchronize with other zoneadmd 1736 */ 1737 flock.l_type = F_WRLCK; 1738 flock.l_whence = SEEK_SET; 1739 flock.l_start = (off_t)0; 1740 flock.l_len = (off_t)0; 1741 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1742 /* 1743 * Someone else raced us here and grabbed the lock file 1744 * first. A warning here is inappropriate since nothing 1745 * went wrong. 1746 */ 1747 goto out; 1748 } 1749 1750 if (strcmp(st.st_fstype, "namefs") == 0) { 1751 struct door_info info; 1752 1753 /* 1754 * There is already something fattach()'ed to this file. 1755 * Lets see what the door is up to. 1756 */ 1757 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1758 /* 1759 * Another zoneadmd process seems to be in 1760 * control of the situation and we don't need to 1761 * be here. A warning here is inappropriate 1762 * since nothing went wrong. 1763 * 1764 * If the door has been revoked, the zoneadmd 1765 * process currently managing the zone is going 1766 * away. We'll return control to zoneadm(8) 1767 * which will try again (by which time zoneadmd 1768 * will hopefully have exited). 1769 */ 1770 goto out; 1771 } 1772 1773 /* 1774 * If we got this far, there's a fattach(3c)'ed door 1775 * that belongs to a process that has exited, which can 1776 * happen if the previous zoneadmd died unexpectedly. 1777 * 1778 * Let user know that something is amiss, but that we can 1779 * recover; if the zone is in the installed state, then don't 1780 * message, since having a running zoneadmd isn't really 1781 * expected/needed. We want to keep occurences of this message 1782 * limited to times when zoneadmd is picking back up from a 1783 * zoneadmd that died while the zone was in some non-trivial 1784 * state. 1785 */ 1786 if (zstate > ZONE_STATE_INSTALLED) { 1787 zerror(zlogp, B_FALSE, 1788 "zone '%s': WARNING: zone is in state '%s', but " 1789 "zoneadmd does not appear to be available; " 1790 "restarted zoneadmd to recover.", 1791 zone_name, zone_state_str(zstate)); 1792 } 1793 1794 (void) fdetach(zone_door_path); 1795 (void) close(doorfd); 1796 goto top; 1797 } 1798 ret = 0; 1799 out: 1800 (void) close(doorfd); 1801 return (ret); 1802 } 1803 1804 /* 1805 * Setup the brand's pre and post state change callbacks, as well as the 1806 * query callback, if any of these exist. 1807 */ 1808 static int 1809 brand_callback_init(brand_handle_t bh, char *zone_name) 1810 { 1811 char zpath[MAXPATHLEN]; 1812 1813 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) 1814 return (-1); 1815 1816 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, 1817 sizeof (pre_statechg_hook)); 1818 1819 if (brand_get_prestatechange(bh, zone_name, zpath, 1820 pre_statechg_hook + EXEC_LEN, 1821 sizeof (pre_statechg_hook) - EXEC_LEN) != 0) 1822 return (-1); 1823 1824 if (strlen(pre_statechg_hook) <= EXEC_LEN) 1825 pre_statechg_hook[0] = '\0'; 1826 1827 (void) strlcpy(post_statechg_hook, EXEC_PREFIX, 1828 sizeof (post_statechg_hook)); 1829 1830 if (brand_get_poststatechange(bh, zone_name, zpath, 1831 post_statechg_hook + EXEC_LEN, 1832 sizeof (post_statechg_hook) - EXEC_LEN) != 0) 1833 return (-1); 1834 1835 if (strlen(post_statechg_hook) <= EXEC_LEN) 1836 post_statechg_hook[0] = '\0'; 1837 1838 (void) strlcpy(query_hook, EXEC_PREFIX, 1839 sizeof (query_hook)); 1840 1841 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, 1842 sizeof (query_hook) - EXEC_LEN) != 0) 1843 return (-1); 1844 1845 if (strlen(query_hook) <= EXEC_LEN) 1846 query_hook[0] = '\0'; 1847 1848 return (0); 1849 } 1850 1851 int 1852 main(int argc, char *argv[]) 1853 { 1854 int opt; 1855 zoneid_t zid; 1856 priv_set_t *privset; 1857 zone_state_t zstate; 1858 char parents_locale[MAXPATHLEN]; 1859 brand_handle_t bh; 1860 int err; 1861 1862 pid_t pid; 1863 sigset_t blockset; 1864 sigset_t block_cld; 1865 1866 struct { 1867 sema_t sem; 1868 int status; 1869 zlog_t log; 1870 } *shstate; 1871 size_t shstatelen = getpagesize(); 1872 1873 zlog_t errlog; 1874 zlog_t *zlogp; 1875 1876 int ctfd; 1877 1878 progname = get_execbasename(argv[0]); 1879 1880 /* 1881 * Make sure stderr is unbuffered 1882 */ 1883 (void) setbuffer(stderr, NULL, 0); 1884 1885 /* 1886 * Get out of the way of mounted filesystems, since we will daemonize 1887 * soon. 1888 */ 1889 (void) chdir("/"); 1890 1891 /* 1892 * Use the default system umask per PSARC 1998/110 rather than 1893 * anything that may have been set by the caller. 1894 */ 1895 (void) umask(CMASK); 1896 1897 /* 1898 * Initially we want to use our parent's locale. 1899 */ 1900 (void) setlocale(LC_ALL, ""); 1901 (void) textdomain(TEXT_DOMAIN); 1902 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1903 sizeof (parents_locale)); 1904 1905 /* 1906 * This zlog_t is used for writing to stderr 1907 */ 1908 errlog.logfile = stderr; 1909 errlog.buflen = errlog.loglen = 0; 1910 errlog.buf = errlog.log = NULL; 1911 errlog.locale = parents_locale; 1912 1913 /* 1914 * We start off writing to stderr until we're ready to daemonize. 1915 */ 1916 zlogp = &errlog; 1917 1918 /* 1919 * Process options. 1920 */ 1921 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1922 switch (opt) { 1923 case 'R': 1924 zonecfg_set_root(optarg); 1925 break; 1926 case 'z': 1927 zone_name = optarg; 1928 break; 1929 default: 1930 usage(); 1931 } 1932 } 1933 1934 if (zone_name == NULL) 1935 usage(); 1936 1937 /* 1938 * Because usage() prints directly to stderr, it has gettext() 1939 * wrapping, which depends on the locale. But since zerror() calls 1940 * localize() which tweaks the locale, it is not safe to call zerror() 1941 * until after the last call to usage(). Fortunately, the last call 1942 * to usage() is just above and the first call to zerror() is just 1943 * below. Don't mess this up. 1944 */ 1945 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1946 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1947 GLOBAL_ZONENAME); 1948 return (1); 1949 } 1950 1951 if (zone_get_id(zone_name, &zid) != 0) { 1952 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1953 zonecfg_strerror(Z_NO_ZONE)); 1954 return (1); 1955 } 1956 1957 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1958 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1959 zonecfg_strerror(err)); 1960 return (1); 1961 } 1962 if (zstate < ZONE_STATE_INCOMPLETE) { 1963 zerror(zlogp, B_FALSE, 1964 "cannot manage a zone which is in state '%s'", 1965 zone_state_str(zstate)); 1966 return (1); 1967 } 1968 1969 if (zonecfg_default_brand(default_brand, 1970 sizeof (default_brand)) != Z_OK) { 1971 zerror(zlogp, B_FALSE, "unable to determine default brand"); 1972 return (1); 1973 } 1974 1975 /* Get a handle to the brand info for this zone */ 1976 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 1977 != Z_OK) { 1978 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1979 return (1); 1980 } 1981 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0); 1982 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0); 1983 1984 /* 1985 * In the alternate root environment, the only supported 1986 * operations are mount and unmount. In this case, just treat 1987 * the zone as native if it is cluster. Cluster zones can be 1988 * native for the purpose of LU or upgrade, and the cluster 1989 * brand may not exist in the miniroot (such as in net install 1990 * upgrade). 1991 */ 1992 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) { 1993 zone_iscluster = B_TRUE; 1994 if (zonecfg_in_alt_root()) { 1995 (void) strlcpy(brand_name, default_brand, 1996 sizeof (brand_name)); 1997 } 1998 } else { 1999 zone_iscluster = B_FALSE; 2000 } 2001 2002 if ((bh = brand_open(brand_name)) == NULL) { 2003 zerror(zlogp, B_FALSE, "unable to open zone brand"); 2004 return (1); 2005 } 2006 2007 /* Get state change brand hooks. */ 2008 if (brand_callback_init(bh, zone_name) == -1) { 2009 zerror(zlogp, B_TRUE, 2010 "failed to initialize brand state change hooks"); 2011 brand_close(bh); 2012 return (1); 2013 } 2014 2015 brand_close(bh); 2016 2017 /* 2018 * Check that we have all privileges. It would be nice to pare 2019 * this down, but this is at least a first cut. 2020 */ 2021 if ((privset = priv_allocset()) == NULL) { 2022 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 2023 return (1); 2024 } 2025 2026 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 2027 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 2028 priv_freeset(privset); 2029 return (1); 2030 } 2031 2032 if (priv_isfullset(privset) == B_FALSE) { 2033 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 2034 "run this command (all privs required)"); 2035 priv_freeset(privset); 2036 return (1); 2037 } 2038 priv_freeset(privset); 2039 2040 if (mkzonedir(zlogp) != 0) 2041 return (1); 2042 2043 /* 2044 * Pre-fork: setup shared state 2045 */ 2046 if ((shstate = (void *)mmap(NULL, shstatelen, 2047 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 2048 MAP_FAILED) { 2049 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 2050 return (1); 2051 } 2052 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 2053 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 2054 (void) munmap((char *)shstate, shstatelen); 2055 return (1); 2056 } 2057 shstate->log.logfile = NULL; 2058 shstate->log.buflen = shstatelen - sizeof (*shstate); 2059 shstate->log.loglen = shstate->log.buflen; 2060 shstate->log.buf = (char *)shstate + sizeof (*shstate); 2061 shstate->log.log = shstate->log.buf; 2062 shstate->log.locale = parents_locale; 2063 shstate->status = -1; 2064 2065 /* 2066 * We need a SIGCHLD handler so the sema_wait() below will wake 2067 * up if the child dies without doing a sema_post(). 2068 */ 2069 (void) sigset(SIGCHLD, sigchld); 2070 /* 2071 * We must mask SIGCHLD until after we've coped with the fork 2072 * sufficiently to deal with it; otherwise we can race and 2073 * receive the signal before pid has been initialized 2074 * (yes, this really happens). 2075 */ 2076 (void) sigemptyset(&block_cld); 2077 (void) sigaddset(&block_cld, SIGCHLD); 2078 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 2079 2080 /* 2081 * The parent only needs stderr after the fork, so close other fd's 2082 * that we inherited from zoneadm so that the parent doesn't have those 2083 * open while waiting. The child will close the rest after the fork. 2084 */ 2085 closefrom(3); 2086 2087 if ((ctfd = init_template()) == -1) { 2088 zerror(zlogp, B_TRUE, "failed to create contract"); 2089 return (1); 2090 } 2091 2092 /* 2093 * Do not let another thread localize a message while we are forking. 2094 */ 2095 (void) mutex_lock(&msglock); 2096 pid = fork(); 2097 (void) mutex_unlock(&msglock); 2098 2099 /* 2100 * In all cases (parent, child, and in the event of an error) we 2101 * don't want to cause creation of contracts on subsequent fork()s. 2102 */ 2103 (void) ct_tmpl_clear(ctfd); 2104 (void) close(ctfd); 2105 2106 if (pid == -1) { 2107 zerror(zlogp, B_TRUE, "could not fork"); 2108 return (1); 2109 2110 } else if (pid > 0) { /* parent */ 2111 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 2112 /* 2113 * This marks a window of vulnerability in which we receive 2114 * the SIGCLD before falling into sema_wait (normally we would 2115 * get woken up from sema_wait with EINTR upon receipt of 2116 * SIGCLD). So we may need to use some other scheme like 2117 * sema_posting in the sigcld handler. 2118 * blech 2119 */ 2120 (void) sema_wait(&shstate->sem); 2121 (void) sema_destroy(&shstate->sem); 2122 if (shstate->status != 0) 2123 (void) waitpid(pid, NULL, WNOHANG); 2124 /* 2125 * It's ok if we die with SIGPIPE. It's not like we could have 2126 * done anything about it. 2127 */ 2128 (void) fprintf(stderr, "%s", shstate->log.buf); 2129 _exit(shstate->status == 0 ? 0 : 1); 2130 } 2131 2132 /* 2133 * The child charges on. 2134 */ 2135 (void) sigset(SIGCHLD, SIG_DFL); 2136 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 2137 2138 /* 2139 * SIGPIPE can be delivered if we write to a socket for which the 2140 * peer endpoint is gone. That can lead to too-early termination 2141 * of zoneadmd, and that's not good eats. 2142 */ 2143 (void) sigset(SIGPIPE, SIG_IGN); 2144 /* 2145 * Stop using stderr 2146 */ 2147 zlogp = &shstate->log; 2148 2149 /* 2150 * We don't need stdout/stderr from now on. 2151 */ 2152 closefrom(0); 2153 2154 /* 2155 * Initialize the syslog zlog_t. This needs to be done after 2156 * the call to closefrom(). 2157 */ 2158 logsys.buf = logsys.log = NULL; 2159 logsys.buflen = logsys.loglen = 0; 2160 logsys.logfile = NULL; 2161 logsys.locale = DEFAULT_LOCALE; 2162 2163 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 2164 2165 /* 2166 * The eventstream is used to publish state changes in the zone 2167 * from the door threads to the console I/O poller. 2168 */ 2169 if (eventstream_init() == -1) { 2170 zerror(zlogp, B_TRUE, "unable to create eventstream"); 2171 goto child_out; 2172 } 2173 2174 (void) snprintf(zone_door_path, sizeof (zone_door_path), 2175 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 2176 2177 /* 2178 * See if another zoneadmd is running for this zone. If not, then we 2179 * can now modify system state. 2180 */ 2181 if (make_daemon_exclusive(zlogp) == -1) 2182 goto child_out; 2183 2184 2185 /* 2186 * Create/join a new session; we need to be careful of what we do with 2187 * the console from now on so we don't end up being the session leader 2188 * for the terminal we're going to be handing out. 2189 */ 2190 (void) setsid(); 2191 2192 /* 2193 * This thread shouldn't be receiving any signals; in particular, 2194 * SIGCHLD should be received by the thread doing the fork(). 2195 */ 2196 (void) sigfillset(&blockset); 2197 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 2198 2199 /* 2200 * Setup the console device and get ready to serve the console; 2201 * once this has completed, we're ready to let console clients 2202 * make an attempt to connect (they will block until 2203 * serve_console_sock() below gets called, and any pending 2204 * connection is accept()ed). 2205 */ 2206 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0) 2207 goto child_out; 2208 2209 /* 2210 * Take the lock now, so that when the door server gets going, we 2211 * are guaranteed that it won't take a request until we are sure 2212 * that everything is completely set up. See the child_out: label 2213 * below to see why this matters. 2214 */ 2215 (void) mutex_lock(&lock); 2216 2217 /* Init semaphore for scratch zones. */ 2218 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 2219 zerror(zlogp, B_TRUE, 2220 "failed to initialize semaphore for scratch zone"); 2221 goto child_out; 2222 } 2223 2224 /* open the dladm handle */ 2225 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) { 2226 zerror(zlogp, B_FALSE, "failed to open dladm handle"); 2227 goto child_out; 2228 } 2229 2230 /* 2231 * Note: door setup must occur *after* the console is setup. 2232 * This is so that as zlogin tests the door to see if zoneadmd 2233 * is ready yet, we know that the console will get serviced 2234 * once door_info() indicates that the door is "up". 2235 */ 2236 if (setup_door(zlogp) == -1) 2237 goto child_out; 2238 2239 /* 2240 * Things seem OK so far; tell the parent process that we're done 2241 * with setup tasks. This will cause the parent to exit, signalling 2242 * to zoneadm, zlogin, or whatever forked it that we are ready to 2243 * service requests. 2244 */ 2245 shstate->status = 0; 2246 (void) sema_post(&shstate->sem); 2247 (void) munmap((char *)shstate, shstatelen); 2248 shstate = NULL; 2249 2250 (void) mutex_unlock(&lock); 2251 2252 /* 2253 * zlogp is now invalid, so reset it to the syslog logger. 2254 */ 2255 zlogp = &logsys; 2256 2257 /* 2258 * Now that we are free of any parents, switch to the default locale. 2259 */ 2260 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 2261 2262 /* 2263 * At this point the setup portion of main() is basically done, so 2264 * we reuse this thread to manage the zone console. When 2265 * serve_console() has returned, we are past the point of no return 2266 * in the life of this zoneadmd. 2267 */ 2268 if (zonecfg_in_alt_root()) { 2269 /* 2270 * This is just awful, but mounted scratch zones don't (and 2271 * can't) have consoles. We just wait for unmount instead. 2272 */ 2273 while (sema_wait(&scratch_sem) == EINTR) 2274 ; 2275 } else { 2276 serve_console(zlogp); 2277 assert(in_death_throes); 2278 } 2279 2280 /* 2281 * This is the next-to-last part of the exit interlock. Upon calling 2282 * fdetach(), the door will go unreferenced; once any 2283 * outstanding requests (like the door thread doing Z_HALT) are 2284 * done, the door will get an UNREF notification; when it handles 2285 * the UNREF, the door server will cause the exit. It's possible 2286 * that fdetach() can fail because the file is in use, in which 2287 * case we'll retry the operation. 2288 */ 2289 assert(!MUTEX_HELD(&lock)); 2290 for (;;) { 2291 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY)) 2292 break; 2293 yield(); 2294 } 2295 2296 for (;;) 2297 (void) pause(); 2298 2299 child_out: 2300 assert(pid == 0); 2301 2302 shstate->status = -1; 2303 (void) sema_post(&shstate->sem); 2304 (void) munmap((char *)shstate, shstatelen); 2305 2306 /* 2307 * This might trigger an unref notification, but if so, 2308 * we are still holding the lock, so our call to exit will 2309 * ultimately win the race and will publish the right exit 2310 * code. 2311 */ 2312 if (zone_door != -1) { 2313 assert(MUTEX_HELD(&lock)); 2314 (void) door_revoke(zone_door); 2315 (void) fdetach(zone_door_path); 2316 } 2317 2318 if (dld_handle != NULL) 2319 dladm_close(dld_handle); 2320 2321 return (1); /* return from main() forcibly exits an MT process */ 2322 } 2323