1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 25 */ 26 27 /* 28 * zoneadmd manages zones; one zoneadmd process is launched for each 29 * non-global zone on the system. This daemon juggles four jobs: 30 * 31 * - Implement setup and teardown of the zone "virtual platform": mount and 32 * unmount filesystems; create and destroy network interfaces; communicate 33 * with devfsadmd to lay out devices for the zone; instantiate the zone 34 * console device; configure process runtime attributes such as resource 35 * controls, pool bindings, fine-grained privileges. 36 * 37 * - Launch the zone's init(1M) process. 38 * 39 * - Implement a door server; clients (like zoneadm) connect to the door 40 * server and request zone state changes. The kernel is also a client of 41 * this door server. A request to halt or reboot the zone which originates 42 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 43 * 44 * One minor problem is that messages emitted by zoneadmd need to be passed 45 * back to the zoneadm process making the request. These messages need to 46 * be rendered in the client's locale; so, this is passed in as part of the 47 * request. The exception is the kernel upcall to zoneadmd, in which case 48 * messages are syslog'd. 49 * 50 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 51 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 52 * strings which do not need to be translated. 53 * 54 * - Act as a console server for zlogin -C processes; see comments in zcons.c 55 * for more information about the zone console architecture. 56 * 57 * DESIGN NOTES 58 * 59 * Restart: 60 * A chief design constraint of zoneadmd is that it should be restartable in 61 * the case that the administrator kills it off, or it suffers a fatal error, 62 * without the running zone being impacted; this is akin to being able to 63 * reboot the service processor of a server without affecting the OS instance. 64 */ 65 66 #include <sys/param.h> 67 #include <sys/mman.h> 68 #include <sys/types.h> 69 #include <sys/stat.h> 70 #include <sys/sysmacros.h> 71 72 #include <bsm/adt.h> 73 #include <bsm/adt_event.h> 74 75 #include <alloca.h> 76 #include <assert.h> 77 #include <errno.h> 78 #include <door.h> 79 #include <fcntl.h> 80 #include <locale.h> 81 #include <signal.h> 82 #include <stdarg.h> 83 #include <stdio.h> 84 #include <stdlib.h> 85 #include <string.h> 86 #include <strings.h> 87 #include <synch.h> 88 #include <syslog.h> 89 #include <thread.h> 90 #include <unistd.h> 91 #include <wait.h> 92 #include <limits.h> 93 #include <zone.h> 94 #include <libbrand.h> 95 #include <sys/brand.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <sys/brand.h> 99 #include <sys/contract/process.h> 100 #include <sys/ctfs.h> 101 #include <libdladm.h> 102 #include <sys/dls_mgmt.h> 103 #include <libscf.h> 104 105 #include <libzonecfg.h> 106 #include <zonestat_impl.h> 107 #include "zoneadmd.h" 108 109 static char *progname; 110 char *zone_name; /* zone which we are managing */ 111 char pool_name[MAXNAMELEN]; 112 char default_brand[MAXNAMELEN]; 113 char brand_name[MAXNAMELEN]; 114 boolean_t zone_isnative; 115 boolean_t zone_iscluster; 116 boolean_t zone_islabeled; 117 boolean_t shutdown_in_progress; 118 static zoneid_t zone_id; 119 dladm_handle_t dld_handle = NULL; 120 121 static char pre_statechg_hook[2 * MAXPATHLEN]; 122 static char post_statechg_hook[2 * MAXPATHLEN]; 123 char query_hook[2 * MAXPATHLEN]; 124 125 zlog_t logsys; 126 127 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 128 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 129 130 static sema_t scratch_sem; /* for scratch zones */ 131 132 static char zone_door_path[MAXPATHLEN]; 133 static int zone_door = -1; 134 135 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 136 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 137 138 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 139 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 140 #endif 141 142 #define DEFAULT_LOCALE "C" 143 144 static const char * 145 z_cmd_name(zone_cmd_t zcmd) 146 { 147 /* This list needs to match the enum in sys/zone.h */ 148 static const char *zcmdstr[] = { 149 "ready", "boot", "forceboot", "reboot", "halt", 150 "note_uninstalling", "mount", "forcemount", "unmount", 151 "shutdown" 152 }; 153 154 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 155 return ("unknown"); 156 else 157 return (zcmdstr[(int)zcmd]); 158 } 159 160 static char * 161 get_execbasename(char *execfullname) 162 { 163 char *last_slash, *execbasename; 164 165 /* guard against '/' at end of command invocation */ 166 for (;;) { 167 last_slash = strrchr(execfullname, '/'); 168 if (last_slash == NULL) { 169 execbasename = execfullname; 170 break; 171 } else { 172 execbasename = last_slash + 1; 173 if (*execbasename == '\0') { 174 *last_slash = '\0'; 175 continue; 176 } 177 break; 178 } 179 } 180 return (execbasename); 181 } 182 183 static void 184 usage(void) 185 { 186 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 187 (void) fprintf(stderr, 188 gettext("\tNote: %s should not be run directly.\n"), progname); 189 exit(2); 190 } 191 192 /* ARGSUSED */ 193 static void 194 sigchld(int sig) 195 { 196 } 197 198 char * 199 localize_msg(char *locale, const char *msg) 200 { 201 char *out; 202 203 (void) mutex_lock(&msglock); 204 (void) setlocale(LC_MESSAGES, locale); 205 out = gettext(msg); 206 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 207 (void) mutex_unlock(&msglock); 208 return (out); 209 } 210 211 /* PRINTFLIKE3 */ 212 void 213 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 214 { 215 va_list alist; 216 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 217 char *bp; 218 int saved_errno = errno; 219 220 if (zlogp == NULL) 221 return; 222 if (zlogp == &logsys) 223 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 224 zone_name); 225 else 226 buf[0] = '\0'; 227 bp = &(buf[strlen(buf)]); 228 229 /* 230 * In theory, the locale pointer should be set to either "C" or a 231 * char array, so it should never be NULL 232 */ 233 assert(zlogp->locale != NULL); 234 /* Locale is per process, but we are multi-threaded... */ 235 fmt = localize_msg(zlogp->locale, fmt); 236 237 va_start(alist, fmt); 238 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 239 va_end(alist); 240 bp = &(buf[strlen(buf)]); 241 if (use_strerror) 242 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 243 strerror(saved_errno)); 244 if (zlogp == &logsys) { 245 (void) syslog(LOG_ERR, "%s", buf); 246 } else if (zlogp->logfile != NULL) { 247 (void) fprintf(zlogp->logfile, "%s\n", buf); 248 } else { 249 size_t buflen; 250 size_t copylen; 251 252 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 253 copylen = MIN(buflen, zlogp->loglen); 254 zlogp->log += copylen; 255 zlogp->loglen -= copylen; 256 } 257 } 258 259 /* 260 * Emit a warning for any boot arguments which are unrecognized. Since 261 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 262 * put the arguments into an argv style array, use getopt to process them, 263 * and put the resultant argument string back into outargs. 264 * 265 * During the filtering, we pull out any arguments which are truly "boot" 266 * arguments, leaving only those which are to be passed intact to the 267 * progenitor process. The one we support at the moment is -i, which 268 * indicates to the kernel which program should be launched as 'init'. 269 * 270 * A return of Z_INVAL indicates specifically that the arguments are 271 * not valid; this is a non-fatal error. Except for Z_OK, all other return 272 * values are treated as fatal. 273 */ 274 static int 275 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 276 char *init_file, char *badarg) 277 { 278 int argc = 0, argc_save; 279 int i; 280 int err; 281 char *arg, *lasts, **argv = NULL, **argv_save; 282 char zonecfg_args[BOOTARGS_MAX]; 283 char scratchargs[BOOTARGS_MAX], *sargs; 284 char c; 285 286 bzero(outargs, BOOTARGS_MAX); 287 bzero(badarg, BOOTARGS_MAX); 288 289 /* 290 * If the user didn't specify transient boot arguments, check 291 * to see if there were any specified in the zone configuration, 292 * and use them if applicable. 293 */ 294 if (inargs == NULL || inargs[0] == '\0') { 295 zone_dochandle_t handle; 296 if ((handle = zonecfg_init_handle()) == NULL) { 297 zerror(zlogp, B_TRUE, 298 "getting zone configuration handle"); 299 return (Z_BAD_HANDLE); 300 } 301 err = zonecfg_get_snapshot_handle(zone_name, handle); 302 if (err != Z_OK) { 303 zerror(zlogp, B_FALSE, 304 "invalid configuration snapshot"); 305 zonecfg_fini_handle(handle); 306 return (Z_BAD_HANDLE); 307 } 308 309 bzero(zonecfg_args, sizeof (zonecfg_args)); 310 (void) zonecfg_get_bootargs(handle, zonecfg_args, 311 sizeof (zonecfg_args)); 312 inargs = zonecfg_args; 313 zonecfg_fini_handle(handle); 314 } 315 316 if (strlen(inargs) >= BOOTARGS_MAX) { 317 zerror(zlogp, B_FALSE, "boot argument string too long"); 318 return (Z_INVAL); 319 } 320 321 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 322 sargs = scratchargs; 323 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 324 sargs = NULL; 325 argc++; 326 } 327 328 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 329 zerror(zlogp, B_FALSE, "memory allocation failed"); 330 return (Z_NOMEM); 331 } 332 333 argv_save = argv; 334 argc_save = argc; 335 336 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 337 sargs = scratchargs; 338 i = 0; 339 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 340 sargs = NULL; 341 if ((argv[i] = strdup(arg)) == NULL) { 342 err = Z_NOMEM; 343 zerror(zlogp, B_FALSE, "memory allocation failed"); 344 goto done; 345 } 346 i++; 347 } 348 349 /* 350 * We preserve compatibility with the Solaris system boot behavior, 351 * which allows: 352 * 353 * # reboot kernel/unix -s -m verbose 354 * 355 * In this example, kernel/unix tells the booter what file to 356 * boot. We don't want reboot in a zone to be gratuitously different, 357 * so we silently ignore the boot file, if necessary. 358 */ 359 if (argv[0] == NULL) 360 goto done; 361 362 assert(argv[0][0] != ' '); 363 assert(argv[0][0] != '\t'); 364 365 if (argv[0][0] != '-' && argv[0][0] != '\0') { 366 argv = &argv[1]; 367 argc--; 368 } 369 370 optind = 0; 371 opterr = 0; 372 err = Z_OK; 373 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 374 switch (c) { 375 case 'i': 376 /* 377 * -i is handled by the runtime and is not passed 378 * along to userland 379 */ 380 (void) strlcpy(init_file, optarg, MAXPATHLEN); 381 break; 382 case 'f': 383 /* This has already been processed by zoneadm */ 384 break; 385 case 'm': 386 case 's': 387 /* These pass through unmolested */ 388 (void) snprintf(outargs, BOOTARGS_MAX, 389 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 390 break; 391 case '?': 392 /* 393 * We warn about unknown arguments but pass them 394 * along anyway-- if someone wants to develop their 395 * own init replacement, they can pass it whatever 396 * args they want. 397 */ 398 err = Z_INVAL; 399 (void) snprintf(outargs, BOOTARGS_MAX, 400 "%s -%c", outargs, optopt); 401 (void) snprintf(badarg, BOOTARGS_MAX, 402 "%s -%c", badarg, optopt); 403 break; 404 } 405 } 406 407 /* 408 * For Solaris Zones we warn about and discard non-option arguments. 409 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 410 * to the kernel, we concat up all the other remaining boot args. 411 * and warn on them as a group. 412 */ 413 if (optind < argc) { 414 err = Z_INVAL; 415 while (optind < argc) { 416 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 417 badarg, strlen(badarg) > 0 ? " " : "", 418 argv[optind]); 419 optind++; 420 } 421 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 422 "arguments `%s'.", badarg); 423 } 424 425 done: 426 for (i = 0; i < argc_save; i++) { 427 if (argv_save[i] != NULL) 428 free(argv_save[i]); 429 } 430 free(argv_save); 431 return (err); 432 } 433 434 435 static int 436 mkzonedir(zlog_t *zlogp) 437 { 438 struct stat st; 439 /* 440 * We must create and lock everyone but root out of ZONES_TMPDIR 441 * since anyone can open any UNIX domain socket, regardless of 442 * its file system permissions. Sigh... 443 */ 444 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 445 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 446 return (-1); 447 } 448 /* paranoia */ 449 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 450 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 451 return (-1); 452 } 453 (void) chmod(ZONES_TMPDIR, S_IRWXU); 454 return (0); 455 } 456 457 /* 458 * Run the brand's pre-state change callback, if it exists. 459 */ 460 static int 461 brand_prestatechg(zlog_t *zlogp, int state, int cmd) 462 { 463 char cmdbuf[2 * MAXPATHLEN]; 464 const char *altroot; 465 466 if (pre_statechg_hook[0] == '\0') 467 return (0); 468 469 altroot = zonecfg_get_root(); 470 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook, 471 state, cmd, altroot) > sizeof (cmdbuf)) 472 return (-1); 473 474 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 475 return (-1); 476 477 return (0); 478 } 479 480 /* 481 * Run the brand's post-state change callback, if it exists. 482 */ 483 static int 484 brand_poststatechg(zlog_t *zlogp, int state, int cmd) 485 { 486 char cmdbuf[2 * MAXPATHLEN]; 487 const char *altroot; 488 489 if (post_statechg_hook[0] == '\0') 490 return (0); 491 492 altroot = zonecfg_get_root(); 493 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook, 494 state, cmd, altroot) > sizeof (cmdbuf)) 495 return (-1); 496 497 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 498 return (-1); 499 500 return (0); 501 } 502 503 /* 504 * Notify zonestatd of the new zone. If zonestatd is not running, this 505 * will do nothing. 506 */ 507 static void 508 notify_zonestatd(zoneid_t zoneid) 509 { 510 int cmd[2]; 511 int fd; 512 door_arg_t params; 513 514 fd = open(ZS_DOOR_PATH, O_RDONLY); 515 if (fd < 0) 516 return; 517 518 cmd[0] = ZSD_CMD_NEW_ZONE; 519 cmd[1] = zoneid; 520 params.data_ptr = (char *)&cmd; 521 params.data_size = sizeof (cmd); 522 params.desc_ptr = NULL; 523 params.desc_num = 0; 524 params.rbuf = NULL; 525 params.rsize = NULL; 526 (void) door_call(fd, ¶ms); 527 (void) close(fd); 528 } 529 530 /* 531 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 532 * 'true' if this is being invoked as part of the processing for the "mount" 533 * subcommand. 534 */ 535 static int 536 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate) 537 { 538 int err; 539 540 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0) 541 return (-1); 542 543 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 544 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 545 zonecfg_strerror(err)); 546 goto bad; 547 } 548 549 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 550 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 551 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 552 zonecfg_strerror(err)); 553 goto bad; 554 } 555 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 556 bringup_failure_recovery = B_TRUE; 557 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE); 558 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 559 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 560 zonecfg_strerror(err)); 561 goto bad; 562 } 563 564 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0) 565 goto bad; 566 567 return (0); 568 569 bad: 570 /* 571 * If something goes wrong, we up the zones's state to the target 572 * state, READY, and then invoke the hook as if we're halting. 573 */ 574 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT); 575 return (-1); 576 } 577 578 int 579 init_template(void) 580 { 581 int fd; 582 int err = 0; 583 584 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 585 if (fd == -1) 586 return (-1); 587 588 /* 589 * For now, zoneadmd doesn't do anything with the contract. 590 * Deliver no events, don't inherit, and allow it to be orphaned. 591 */ 592 err |= ct_tmpl_set_critical(fd, 0); 593 err |= ct_tmpl_set_informative(fd, 0); 594 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 595 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 596 if (err || ct_tmpl_activate(fd)) { 597 (void) close(fd); 598 return (-1); 599 } 600 601 return (fd); 602 } 603 604 typedef struct fs_callback { 605 zlog_t *zlogp; 606 zoneid_t zoneid; 607 boolean_t mount_cmd; 608 } fs_callback_t; 609 610 static int 611 mount_early_fs(void *data, const char *spec, const char *dir, 612 const char *fstype, const char *opt) 613 { 614 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 615 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 616 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd; 617 char rootpath[MAXPATHLEN]; 618 pid_t child; 619 int child_status; 620 int tmpl_fd; 621 int rv; 622 ctid_t ct; 623 624 /* determine the zone rootpath */ 625 if (mount_cmd) { 626 char zonepath[MAXPATHLEN]; 627 char luroot[MAXPATHLEN]; 628 629 if (zone_get_zonepath(zone_name, 630 zonepath, sizeof (zonepath)) != Z_OK) { 631 zerror(zlogp, B_FALSE, "unable to determine zone path"); 632 return (-1); 633 } 634 635 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 636 resolve_lofs(zlogp, luroot, sizeof (luroot)); 637 (void) strlcpy(rootpath, luroot, sizeof (rootpath)); 638 } else { 639 if (zone_get_rootpath(zone_name, 640 rootpath, sizeof (rootpath)) != Z_OK) { 641 zerror(zlogp, B_FALSE, "unable to determine zone root"); 642 return (-1); 643 } 644 } 645 646 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) { 647 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 648 rootpath, dir); 649 return (-1); 650 } else if (rv > 0) { 651 /* The mount point path doesn't exist, create it now. */ 652 if (make_one_dir(zlogp, rootpath, dir, 653 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 654 DEFAULT_DIR_GROUP) != 0) { 655 zerror(zlogp, B_FALSE, "failed to create mount point"); 656 return (-1); 657 } 658 659 /* 660 * Now this might seem weird, but we need to invoke 661 * valid_mount_path() again. Why? Because it checks 662 * to make sure that the mount point path is canonical, 663 * which it can only do if the path exists, so now that 664 * we've created the path we have to verify it again. 665 */ 666 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, 667 fstype)) < 0) { 668 zerror(zlogp, B_FALSE, 669 "%s%s is not a valid mount point", rootpath, dir); 670 return (-1); 671 } 672 } 673 674 if ((tmpl_fd = init_template()) == -1) { 675 zerror(zlogp, B_TRUE, "failed to create contract"); 676 return (-1); 677 } 678 679 if ((child = fork()) == -1) { 680 (void) ct_tmpl_clear(tmpl_fd); 681 (void) close(tmpl_fd); 682 zerror(zlogp, B_TRUE, "failed to fork"); 683 return (-1); 684 685 } else if (child == 0) { /* child */ 686 char opt_buf[MAX_MNTOPT_STR]; 687 int optlen = 0; 688 int mflag = MS_DATA; 689 690 (void) ct_tmpl_clear(tmpl_fd); 691 /* 692 * Even though there are no procs running in the zone, we 693 * do this for paranoia's sake. 694 */ 695 (void) closefrom(0); 696 697 if (zone_enter(zoneid) == -1) { 698 _exit(errno); 699 } 700 if (opt != NULL) { 701 /* 702 * The mount() system call is incredibly annoying. 703 * If options are specified, we need to copy them 704 * into a temporary buffer since the mount() system 705 * call will overwrite the options string. It will 706 * also fail if the new option string it wants to 707 * write is bigger than the one we passed in, so 708 * you must pass in a buffer of the maximum possible 709 * option string length. sigh. 710 */ 711 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 712 opt = opt_buf; 713 optlen = MAX_MNTOPT_STR; 714 mflag = MS_OPTIONSTR; 715 } 716 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 717 _exit(errno); 718 _exit(0); 719 } 720 721 /* parent */ 722 if (contract_latest(&ct) == -1) 723 ct = -1; 724 (void) ct_tmpl_clear(tmpl_fd); 725 (void) close(tmpl_fd); 726 if (waitpid(child, &child_status, 0) != child) { 727 /* unexpected: we must have been signalled */ 728 (void) contract_abandon_id(ct); 729 return (-1); 730 } 731 (void) contract_abandon_id(ct); 732 if (WEXITSTATUS(child_status) != 0) { 733 errno = WEXITSTATUS(child_status); 734 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 735 return (-1); 736 } 737 738 return (0); 739 } 740 741 /* 742 * If retstr is not NULL, the output of the subproc is returned in the str, 743 * otherwise it is output using zerror(). Any memory allocated for retstr 744 * should be freed by the caller. 745 */ 746 int 747 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) 748 { 749 char buf[1024]; /* arbitrary large amount */ 750 char *inbuf; 751 FILE *file; 752 int status; 753 int rd_cnt; 754 755 if (retstr != NULL) { 756 if ((*retstr = malloc(1024)) == NULL) { 757 zerror(zlogp, B_FALSE, "out of memory"); 758 return (-1); 759 } 760 inbuf = *retstr; 761 rd_cnt = 0; 762 } else { 763 inbuf = buf; 764 } 765 766 file = popen(cmdbuf, "r"); 767 if (file == NULL) { 768 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 769 return (-1); 770 } 771 772 while (fgets(inbuf, 1024, file) != NULL) { 773 if (retstr == NULL) { 774 if (zlogp != &logsys) 775 zerror(zlogp, B_FALSE, "%s", inbuf); 776 } else { 777 char *p; 778 779 rd_cnt += 1024 - 1; 780 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) { 781 zerror(zlogp, B_FALSE, "out of memory"); 782 (void) pclose(file); 783 return (-1); 784 } 785 786 *retstr = p; 787 inbuf = *retstr + rd_cnt; 788 } 789 } 790 status = pclose(file); 791 792 if (WIFSIGNALED(status)) { 793 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 794 "signal %d", cmdbuf, WTERMSIG(status)); 795 return (-1); 796 } 797 assert(WIFEXITED(status)); 798 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 799 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 800 return (-1); 801 } 802 return (WEXITSTATUS(status)); 803 } 804 805 static int 806 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) 807 { 808 zoneid_t zoneid; 809 struct stat st; 810 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 811 char nbootargs[BOOTARGS_MAX]; 812 char cmdbuf[MAXPATHLEN]; 813 fs_callback_t cb; 814 brand_handle_t bh; 815 zone_iptype_t iptype; 816 boolean_t links_loaded = B_FALSE; 817 dladm_status_t status; 818 char errmsg[DLADM_STRSIZE]; 819 int err; 820 boolean_t restart_init; 821 822 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) 823 return (-1); 824 825 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 826 zerror(zlogp, B_TRUE, "unable to get zoneid"); 827 goto bad; 828 } 829 830 cb.zlogp = zlogp; 831 cb.zoneid = zoneid; 832 cb.mount_cmd = B_FALSE; 833 834 /* Get a handle to the brand info for this zone */ 835 if ((bh = brand_open(brand_name)) == NULL) { 836 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 837 goto bad; 838 } 839 840 /* 841 * Get the list of filesystems to mount from the brand 842 * configuration. These mounts are done via a thread that will 843 * enter the zone, so they are done from within the context of the 844 * zone. 845 */ 846 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 847 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 848 brand_close(bh); 849 goto bad; 850 } 851 852 /* 853 * Get the brand's boot callback if it exists. 854 */ 855 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 856 zerror(zlogp, B_FALSE, "unable to determine zone path"); 857 brand_close(bh); 858 goto bad; 859 } 860 (void) strcpy(cmdbuf, EXEC_PREFIX); 861 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 862 sizeof (cmdbuf) - EXEC_LEN) != 0) { 863 zerror(zlogp, B_FALSE, 864 "unable to determine branded zone's boot callback"); 865 brand_close(bh); 866 goto bad; 867 } 868 869 /* Get the path for this zone's init(1M) (or equivalent) process. */ 870 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { 871 zerror(zlogp, B_FALSE, 872 "unable to determine zone's init(1M) location"); 873 brand_close(bh); 874 goto bad; 875 } 876 877 /* See if this zone's brand should restart init if it dies. */ 878 restart_init = brand_restartinit(bh); 879 880 brand_close(bh); 881 882 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 883 bad_boot_arg); 884 if (err == Z_INVAL) 885 eventstream_write(Z_EVT_ZONE_BADARGS); 886 else if (err != Z_OK) 887 goto bad; 888 889 assert(init_file[0] != '\0'); 890 891 /* Try to anticipate possible problems: Make sure init is executable. */ 892 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 893 zerror(zlogp, B_FALSE, "unable to determine zone root"); 894 goto bad; 895 } 896 897 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); 898 899 if (stat(initpath, &st) == -1) { 900 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 901 goto bad; 902 } 903 904 if ((st.st_mode & S_IXUSR) == 0) { 905 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 906 goto bad; 907 } 908 909 /* 910 * Exclusive stack zones interact with the dlmgmtd running in the 911 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is 912 * booting, and loads its datalinks from the zone's datalink 913 * configuration file. 914 */ 915 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) { 916 status = dladm_zone_boot(dld_handle, zoneid); 917 if (status != DLADM_STATUS_OK) { 918 zerror(zlogp, B_FALSE, "unable to load zone datalinks: " 919 " %s", dladm_status2str(status, errmsg)); 920 goto bad; 921 } 922 links_loaded = B_TRUE; 923 } 924 925 /* 926 * If there is a brand 'boot' callback, execute it now to give the 927 * brand one last chance to do any additional setup before the zone 928 * is booted. 929 */ 930 if ((strlen(cmdbuf) > EXEC_LEN) && 931 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { 932 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 933 goto bad; 934 } 935 936 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 937 zerror(zlogp, B_TRUE, "could not set zone boot file"); 938 goto bad; 939 } 940 941 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 942 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 943 goto bad; 944 } 945 946 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART, 947 NULL, 0) == -1) { 948 zerror(zlogp, B_TRUE, "could not set zone init-no-restart"); 949 goto bad; 950 } 951 952 /* 953 * Inform zonestatd of a new zone so that it can install a door for 954 * the zone to contact it. 955 */ 956 notify_zonestatd(zone_id); 957 958 if (zone_boot(zoneid) == -1) { 959 zerror(zlogp, B_TRUE, "unable to boot zone"); 960 goto bad; 961 } 962 963 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0) 964 goto bad; 965 966 return (0); 967 968 bad: 969 /* 970 * If something goes wrong, we up the zones's state to the target 971 * state, RUNNING, and then invoke the hook as if we're halting. 972 */ 973 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT); 974 if (links_loaded) 975 (void) dladm_zone_halt(dld_handle, zoneid); 976 return (-1); 977 } 978 979 static int 980 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate) 981 { 982 int err; 983 984 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0) 985 return (-1); 986 987 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { 988 if (!bringup_failure_recovery) 989 zerror(zlogp, B_FALSE, "unable to destroy zone"); 990 return (-1); 991 } 992 993 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 994 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 995 zonecfg_strerror(err)); 996 997 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0) 998 return (-1); 999 1000 return (0); 1001 } 1002 1003 static int 1004 zone_graceful_shutdown(zlog_t *zlogp) 1005 { 1006 zoneid_t zoneid; 1007 pid_t child; 1008 char cmdbuf[MAXPATHLEN]; 1009 brand_handle_t bh = NULL; 1010 char zpath[MAXPATHLEN]; 1011 ctid_t ct; 1012 int tmpl_fd; 1013 int child_status; 1014 1015 if (shutdown_in_progress) { 1016 zerror(zlogp, B_FALSE, "shutdown already in progress"); 1017 return (-1); 1018 } 1019 1020 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 1021 zerror(zlogp, B_TRUE, "unable to get zoneid"); 1022 return (-1); 1023 } 1024 1025 /* Get a handle to the brand info for this zone */ 1026 if ((bh = brand_open(brand_name)) == NULL) { 1027 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1028 return (-1); 1029 } 1030 1031 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 1032 zerror(zlogp, B_FALSE, "unable to determine zone path"); 1033 brand_close(bh); 1034 return (-1); 1035 } 1036 1037 /* 1038 * If there is a brand 'shutdown' callback, execute it now to give the 1039 * brand a chance to cleanup any custom configuration. 1040 */ 1041 (void) strcpy(cmdbuf, EXEC_PREFIX); 1042 if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 1043 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) { 1044 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT); 1045 } 1046 brand_close(bh); 1047 1048 if ((tmpl_fd = init_template()) == -1) { 1049 zerror(zlogp, B_TRUE, "failed to create contract"); 1050 return (-1); 1051 } 1052 1053 if ((child = fork()) == -1) { 1054 (void) ct_tmpl_clear(tmpl_fd); 1055 (void) close(tmpl_fd); 1056 zerror(zlogp, B_TRUE, "failed to fork"); 1057 return (-1); 1058 } else if (child == 0) { 1059 (void) ct_tmpl_clear(tmpl_fd); 1060 if (zone_enter(zoneid) == -1) { 1061 _exit(errno); 1062 } 1063 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL)); 1064 } 1065 1066 if (contract_latest(&ct) == -1) 1067 ct = -1; 1068 (void) ct_tmpl_clear(tmpl_fd); 1069 (void) close(tmpl_fd); 1070 1071 if (waitpid(child, &child_status, 0) != child) { 1072 /* unexpected: we must have been signalled */ 1073 (void) contract_abandon_id(ct); 1074 return (-1); 1075 } 1076 1077 (void) contract_abandon_id(ct); 1078 if (WEXITSTATUS(child_status) != 0) { 1079 errno = WEXITSTATUS(child_status); 1080 zerror(zlogp, B_FALSE, "unable to shutdown zone"); 1081 return (-1); 1082 } 1083 1084 shutdown_in_progress = B_TRUE; 1085 1086 return (0); 1087 } 1088 1089 static int 1090 zone_wait_shutdown(zlog_t *zlogp) 1091 { 1092 zone_state_t zstate; 1093 uint64_t *tm = NULL; 1094 scf_simple_prop_t *prop = NULL; 1095 int timeout; 1096 int tries; 1097 int rc = -1; 1098 1099 /* Get default stop timeout from SMF framework */ 1100 timeout = SHUTDOWN_WAIT; 1101 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop", 1102 SCF_PROPERTY_TIMEOUT)) != NULL) { 1103 if ((tm = scf_simple_prop_next_count(prop)) != NULL) { 1104 if (tm != 0) 1105 timeout = *tm; 1106 } 1107 scf_simple_prop_free(prop); 1108 } 1109 1110 /* allow time for zone to shutdown cleanly */ 1111 for (tries = 0; tries < timeout; tries ++) { 1112 (void) sleep(1); 1113 if (zone_get_state(zone_name, &zstate) == Z_OK && 1114 zstate == ZONE_STATE_INSTALLED) { 1115 rc = 0; 1116 break; 1117 } 1118 } 1119 1120 if (rc != 0) 1121 zerror(zlogp, B_FALSE, "unable to shutdown zone"); 1122 1123 shutdown_in_progress = B_FALSE; 1124 1125 return (rc); 1126 } 1127 1128 1129 1130 /* 1131 * Generate AUE_zone_state for a command that boots a zone. 1132 */ 1133 static void 1134 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 1135 char *new_state) 1136 { 1137 adt_session_data_t *ah; 1138 adt_event_data_t *event; 1139 int pass_fail, fail_reason; 1140 1141 if (!adt_audit_enabled()) 1142 return; 1143 1144 if (return_val == 0) { 1145 pass_fail = ADT_SUCCESS; 1146 fail_reason = ADT_SUCCESS; 1147 } else { 1148 pass_fail = ADT_FAILURE; 1149 fail_reason = ADT_FAIL_VALUE_PROGRAM; 1150 } 1151 1152 if (adt_start_session(&ah, NULL, 0)) { 1153 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1154 return; 1155 } 1156 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 1157 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1158 (void) adt_end_session(ah); 1159 return; 1160 } 1161 1162 event = adt_alloc_event(ah, ADT_zone_state); 1163 if (event == NULL) { 1164 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1165 (void) adt_end_session(ah); 1166 return; 1167 } 1168 event->adt_zone_state.zonename = zone_name; 1169 event->adt_zone_state.new_state = new_state; 1170 1171 if (adt_put_event(event, pass_fail, fail_reason)) 1172 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1173 1174 adt_free_event(event); 1175 1176 (void) adt_end_session(ah); 1177 } 1178 1179 /* 1180 * The main routine for the door server that deals with zone state transitions. 1181 */ 1182 /* ARGSUSED */ 1183 static void 1184 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 1185 uint_t n_desc) 1186 { 1187 ucred_t *uc = NULL; 1188 const priv_set_t *eset; 1189 1190 zone_state_t zstate; 1191 zone_cmd_t cmd; 1192 zone_cmd_arg_t *zargp; 1193 1194 boolean_t kernelcall; 1195 1196 int rval = -1; 1197 uint64_t uniqid; 1198 zoneid_t zoneid = -1; 1199 zlog_t zlog; 1200 zlog_t *zlogp; 1201 zone_cmd_rval_t *rvalp; 1202 size_t rlen = getpagesize(); /* conservative */ 1203 fs_callback_t cb; 1204 brand_handle_t bh; 1205 boolean_t wait_shut = B_FALSE; 1206 1207 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1208 zargp = (zone_cmd_arg_t *)args; 1209 1210 /* 1211 * When we get the door unref message, we've fdetach'd the door, and 1212 * it is time for us to shut down zoneadmd. 1213 */ 1214 if (zargp == DOOR_UNREF_DATA) { 1215 /* 1216 * See comment at end of main() for info on the last rites. 1217 */ 1218 exit(0); 1219 } 1220 1221 if (zargp == NULL) { 1222 (void) door_return(NULL, 0, 0, 0); 1223 } 1224 1225 rvalp = alloca(rlen); 1226 bzero(rvalp, rlen); 1227 zlog.logfile = NULL; 1228 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 1229 zlog.buf = rvalp->errbuf; 1230 zlog.log = zlog.buf; 1231 /* defer initialization of zlog.locale until after credential check */ 1232 zlogp = &zlog; 1233 1234 if (alen != sizeof (zone_cmd_arg_t)) { 1235 /* 1236 * This really shouldn't be happening. 1237 */ 1238 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 1239 "unexpected (expected %d bytes)", alen, 1240 sizeof (zone_cmd_arg_t)); 1241 goto out; 1242 } 1243 cmd = zargp->cmd; 1244 1245 if (door_ucred(&uc) != 0) { 1246 zerror(&logsys, B_TRUE, "door_ucred"); 1247 goto out; 1248 } 1249 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 1250 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 1251 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 1252 ucred_geteuid(uc) != 0)) { 1253 zerror(&logsys, B_FALSE, "insufficient privileges"); 1254 goto out; 1255 } 1256 1257 kernelcall = ucred_getpid(uc) == 0; 1258 1259 /* 1260 * This is safe because we only use a zlog_t throughout the 1261 * duration of a door call; i.e., by the time the pointer 1262 * might become invalid, the door call would be over. 1263 */ 1264 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 1265 1266 (void) mutex_lock(&lock); 1267 1268 /* 1269 * Once we start to really die off, we don't want more connections. 1270 */ 1271 if (in_death_throes) { 1272 (void) mutex_unlock(&lock); 1273 ucred_free(uc); 1274 (void) door_return(NULL, 0, 0, 0); 1275 thr_exit(NULL); 1276 } 1277 1278 /* 1279 * Check for validity of command. 1280 */ 1281 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 1282 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT && 1283 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT && 1284 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 1285 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 1286 goto out; 1287 } 1288 1289 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 1290 /* 1291 * Can't happen 1292 */ 1293 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 1294 cmd); 1295 goto out; 1296 } 1297 /* 1298 * We ignore the possibility of someone calling zone_create(2) 1299 * explicitly; all requests must come through zoneadmd. 1300 */ 1301 if (zone_get_state(zone_name, &zstate) != Z_OK) { 1302 /* 1303 * Something terribly wrong happened 1304 */ 1305 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 1306 goto out; 1307 } 1308 1309 if (kernelcall) { 1310 /* 1311 * Kernel-initiated requests may lose their validity if the 1312 * zone_t the kernel was referring to has gone away. 1313 */ 1314 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 1315 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 1316 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 1317 /* 1318 * We're not talking about the same zone. The request 1319 * must have arrived too late. Return error. 1320 */ 1321 rval = -1; 1322 goto out; 1323 } 1324 zlogp = &logsys; /* Log errors to syslog */ 1325 } 1326 1327 /* 1328 * If we are being asked to forcibly mount or boot a zone, we 1329 * pretend that an INCOMPLETE zone is actually INSTALLED. 1330 */ 1331 if (zstate == ZONE_STATE_INCOMPLETE && 1332 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 1333 zstate = ZONE_STATE_INSTALLED; 1334 1335 switch (zstate) { 1336 case ZONE_STATE_CONFIGURED: 1337 case ZONE_STATE_INCOMPLETE: 1338 /* 1339 * Not our area of expertise; we just print a nice message 1340 * and die off. 1341 */ 1342 zerror(zlogp, B_FALSE, 1343 "%s operation is invalid for zones in state '%s'", 1344 z_cmd_name(cmd), zone_state_str(zstate)); 1345 break; 1346 1347 case ZONE_STATE_INSTALLED: 1348 switch (cmd) { 1349 case Z_READY: 1350 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate); 1351 if (rval == 0) 1352 eventstream_write(Z_EVT_ZONE_READIED); 1353 break; 1354 case Z_BOOT: 1355 case Z_FORCEBOOT: 1356 eventstream_write(Z_EVT_ZONE_BOOTING); 1357 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1358 == 0) { 1359 rval = zone_bootup(zlogp, zargp->bootbuf, 1360 zstate); 1361 } 1362 audit_put_record(zlogp, uc, rval, "boot"); 1363 if (rval != 0) { 1364 bringup_failure_recovery = B_TRUE; 1365 (void) zone_halt(zlogp, B_FALSE, B_FALSE, 1366 zstate); 1367 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1368 } 1369 break; 1370 case Z_SHUTDOWN: 1371 case Z_HALT: 1372 if (kernelcall) /* Invalid; can't happen */ 1373 abort(); 1374 /* 1375 * We could have two clients racing to halt this 1376 * zone; the second client loses, but his request 1377 * doesn't fail, since the zone is now in the desired 1378 * state. 1379 */ 1380 zerror(zlogp, B_FALSE, "zone is already halted"); 1381 rval = 0; 1382 break; 1383 case Z_REBOOT: 1384 if (kernelcall) /* Invalid; can't happen */ 1385 abort(); 1386 zerror(zlogp, B_FALSE, "%s operation is invalid " 1387 "for zones in state '%s'", z_cmd_name(cmd), 1388 zone_state_str(zstate)); 1389 rval = -1; 1390 break; 1391 case Z_NOTE_UNINSTALLING: 1392 if (kernelcall) /* Invalid; can't happen */ 1393 abort(); 1394 /* 1395 * Tell the console to print out a message about this. 1396 * Once it does, we will be in_death_throes. 1397 */ 1398 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1399 break; 1400 case Z_MOUNT: 1401 case Z_FORCEMOUNT: 1402 if (kernelcall) /* Invalid; can't happen */ 1403 abort(); 1404 if (!zone_isnative && !zone_iscluster && 1405 !zone_islabeled) { 1406 /* 1407 * -U mounts the zone without lofs mounting 1408 * zone file systems back into the scratch 1409 * zone. This is required when mounting 1410 * non-native branded zones. 1411 */ 1412 (void) strlcpy(zargp->bootbuf, "-U", 1413 BOOTARGS_MAX); 1414 } 1415 1416 rval = zone_ready(zlogp, 1417 strcmp(zargp->bootbuf, "-U") == 0 ? 1418 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate); 1419 if (rval != 0) 1420 break; 1421 1422 eventstream_write(Z_EVT_ZONE_READIED); 1423 1424 /* 1425 * Get a handle to the default brand info. 1426 * We must always use the default brand file system 1427 * list when mounting the zone. 1428 */ 1429 if ((bh = brand_open(default_brand)) == NULL) { 1430 rval = -1; 1431 break; 1432 } 1433 1434 /* 1435 * Get the list of filesystems to mount from 1436 * the brand configuration. These mounts are done 1437 * via a thread that will enter the zone, so they 1438 * are done from within the context of the zone. 1439 */ 1440 cb.zlogp = zlogp; 1441 cb.zoneid = zone_id; 1442 cb.mount_cmd = B_TRUE; 1443 rval = brand_platform_iter_mounts(bh, 1444 mount_early_fs, &cb); 1445 1446 brand_close(bh); 1447 1448 /* 1449 * Ordinarily, /dev/fd would be mounted inside the zone 1450 * by svc:/system/filesystem/usr:default, but since 1451 * we're not booting the zone, we need to do this 1452 * manually. 1453 */ 1454 if (rval == 0) 1455 rval = mount_early_fs(&cb, 1456 "fd", "/dev/fd", "fd", NULL); 1457 break; 1458 case Z_UNMOUNT: 1459 if (kernelcall) /* Invalid; can't happen */ 1460 abort(); 1461 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1462 rval = 0; 1463 break; 1464 } 1465 break; 1466 1467 case ZONE_STATE_READY: 1468 switch (cmd) { 1469 case Z_READY: 1470 /* 1471 * We could have two clients racing to ready this 1472 * zone; the second client loses, but his request 1473 * doesn't fail, since the zone is now in the desired 1474 * state. 1475 */ 1476 zerror(zlogp, B_FALSE, "zone is already ready"); 1477 rval = 0; 1478 break; 1479 case Z_BOOT: 1480 (void) strlcpy(boot_args, zargp->bootbuf, 1481 sizeof (boot_args)); 1482 eventstream_write(Z_EVT_ZONE_BOOTING); 1483 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1484 audit_put_record(zlogp, uc, rval, "boot"); 1485 if (rval != 0) { 1486 bringup_failure_recovery = B_TRUE; 1487 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1488 zstate); 1489 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1490 } 1491 boot_args[0] = '\0'; 1492 break; 1493 case Z_HALT: 1494 if (kernelcall) /* Invalid; can't happen */ 1495 abort(); 1496 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1497 != 0) 1498 break; 1499 eventstream_write(Z_EVT_ZONE_HALTED); 1500 break; 1501 case Z_SHUTDOWN: 1502 case Z_REBOOT: 1503 case Z_NOTE_UNINSTALLING: 1504 case Z_MOUNT: 1505 case Z_UNMOUNT: 1506 if (kernelcall) /* Invalid; can't happen */ 1507 abort(); 1508 zerror(zlogp, B_FALSE, "%s operation is invalid " 1509 "for zones in state '%s'", z_cmd_name(cmd), 1510 zone_state_str(zstate)); 1511 rval = -1; 1512 break; 1513 } 1514 break; 1515 1516 case ZONE_STATE_MOUNTED: 1517 switch (cmd) { 1518 case Z_UNMOUNT: 1519 if (kernelcall) /* Invalid; can't happen */ 1520 abort(); 1521 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate); 1522 if (rval == 0) { 1523 eventstream_write(Z_EVT_ZONE_HALTED); 1524 (void) sema_post(&scratch_sem); 1525 } 1526 break; 1527 default: 1528 if (kernelcall) /* Invalid; can't happen */ 1529 abort(); 1530 zerror(zlogp, B_FALSE, "%s operation is invalid " 1531 "for zones in state '%s'", z_cmd_name(cmd), 1532 zone_state_str(zstate)); 1533 rval = -1; 1534 break; 1535 } 1536 break; 1537 1538 case ZONE_STATE_RUNNING: 1539 case ZONE_STATE_SHUTTING_DOWN: 1540 case ZONE_STATE_DOWN: 1541 switch (cmd) { 1542 case Z_READY: 1543 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1544 != 0) 1545 break; 1546 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0) 1547 eventstream_write(Z_EVT_ZONE_READIED); 1548 else 1549 eventstream_write(Z_EVT_ZONE_HALTED); 1550 break; 1551 case Z_BOOT: 1552 /* 1553 * We could have two clients racing to boot this 1554 * zone; the second client loses, but his request 1555 * doesn't fail, since the zone is now in the desired 1556 * state. 1557 */ 1558 zerror(zlogp, B_FALSE, "zone is already booted"); 1559 rval = 0; 1560 break; 1561 case Z_HALT: 1562 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1563 != 0) 1564 break; 1565 eventstream_write(Z_EVT_ZONE_HALTED); 1566 break; 1567 case Z_REBOOT: 1568 (void) strlcpy(boot_args, zargp->bootbuf, 1569 sizeof (boot_args)); 1570 eventstream_write(Z_EVT_ZONE_REBOOTING); 1571 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1572 != 0) { 1573 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1574 boot_args[0] = '\0'; 1575 break; 1576 } 1577 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1578 != 0) { 1579 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1580 boot_args[0] = '\0'; 1581 break; 1582 } 1583 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1584 audit_put_record(zlogp, uc, rval, "reboot"); 1585 if (rval != 0) { 1586 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1587 zstate); 1588 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1589 } 1590 boot_args[0] = '\0'; 1591 break; 1592 case Z_SHUTDOWN: 1593 if ((rval = zone_graceful_shutdown(zlogp)) == 0) { 1594 wait_shut = B_TRUE; 1595 } 1596 break; 1597 case Z_NOTE_UNINSTALLING: 1598 case Z_MOUNT: 1599 case Z_UNMOUNT: 1600 zerror(zlogp, B_FALSE, "%s operation is invalid " 1601 "for zones in state '%s'", z_cmd_name(cmd), 1602 zone_state_str(zstate)); 1603 rval = -1; 1604 break; 1605 } 1606 break; 1607 default: 1608 abort(); 1609 } 1610 1611 /* 1612 * Because the state of the zone may have changed, we make sure 1613 * to wake the console poller, which is in charge of initiating 1614 * the shutdown procedure as necessary. 1615 */ 1616 eventstream_write(Z_EVT_NULL); 1617 1618 out: 1619 (void) mutex_unlock(&lock); 1620 1621 /* Wait for the Z_SHUTDOWN commands to complete */ 1622 if (wait_shut) 1623 rval = zone_wait_shutdown(zlogp); 1624 1625 if (kernelcall) { 1626 rvalp = NULL; 1627 rlen = 0; 1628 } else { 1629 rvalp->rval = rval; 1630 } 1631 if (uc != NULL) 1632 ucred_free(uc); 1633 (void) door_return((char *)rvalp, rlen, NULL, 0); 1634 thr_exit(NULL); 1635 } 1636 1637 static int 1638 setup_door(zlog_t *zlogp) 1639 { 1640 if ((zone_door = door_create(server, NULL, 1641 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1642 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1643 return (-1); 1644 } 1645 (void) fdetach(zone_door_path); 1646 1647 if (fattach(zone_door, zone_door_path) != 0) { 1648 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1649 (void) door_revoke(zone_door); 1650 (void) fdetach(zone_door_path); 1651 zone_door = -1; 1652 return (-1); 1653 } 1654 return (0); 1655 } 1656 1657 /* 1658 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1659 * is where zoneadmd itself will check to see that another instance of 1660 * zoneadmd isn't already controlling this zone. 1661 * 1662 * The idea here is that we want to open the path to which we will 1663 * attach our door, lock it, and then make sure that no-one has beat us 1664 * to fattach(3c)ing onto it. 1665 * 1666 * fattach(3c) is really a mount, so there are actually two possible 1667 * vnodes we could be dealing with. Our strategy is as follows: 1668 * 1669 * - If the file we opened is a regular file (common case): 1670 * There is no fattach(3c)ed door, so we have a chance of becoming 1671 * the managing zoneadmd. We attempt to lock the file: if it is 1672 * already locked, that means someone else raced us here, so we 1673 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1674 * that beat us to it. 1675 * 1676 * - If the file we opened is a namefs file: 1677 * This means there is already an established door fattach(3c)'ed 1678 * to the rendezvous path. We've lost the race, so we give up. 1679 * Note that in this case we also try to grab the file lock, and 1680 * will succeed in acquiring it since the vnode locked by the 1681 * "winning" zoneadmd was a regular one, and the one we locked was 1682 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1683 * we just return to zoneadm(1m) which knows to retry. 1684 */ 1685 static int 1686 make_daemon_exclusive(zlog_t *zlogp) 1687 { 1688 int doorfd = -1; 1689 int err, ret = -1; 1690 struct stat st; 1691 struct flock flock; 1692 zone_state_t zstate; 1693 1694 top: 1695 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1696 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1697 zonecfg_strerror(err)); 1698 goto out; 1699 } 1700 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1701 S_IREAD|S_IWRITE)) < 0) { 1702 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1703 goto out; 1704 } 1705 if (fstat(doorfd, &st) < 0) { 1706 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1707 goto out; 1708 } 1709 /* 1710 * Lock the file to synchronize with other zoneadmd 1711 */ 1712 flock.l_type = F_WRLCK; 1713 flock.l_whence = SEEK_SET; 1714 flock.l_start = (off_t)0; 1715 flock.l_len = (off_t)0; 1716 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1717 /* 1718 * Someone else raced us here and grabbed the lock file 1719 * first. A warning here is inappropriate since nothing 1720 * went wrong. 1721 */ 1722 goto out; 1723 } 1724 1725 if (strcmp(st.st_fstype, "namefs") == 0) { 1726 struct door_info info; 1727 1728 /* 1729 * There is already something fattach()'ed to this file. 1730 * Lets see what the door is up to. 1731 */ 1732 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1733 /* 1734 * Another zoneadmd process seems to be in 1735 * control of the situation and we don't need to 1736 * be here. A warning here is inappropriate 1737 * since nothing went wrong. 1738 * 1739 * If the door has been revoked, the zoneadmd 1740 * process currently managing the zone is going 1741 * away. We'll return control to zoneadm(1m) 1742 * which will try again (by which time zoneadmd 1743 * will hopefully have exited). 1744 */ 1745 goto out; 1746 } 1747 1748 /* 1749 * If we got this far, there's a fattach(3c)'ed door 1750 * that belongs to a process that has exited, which can 1751 * happen if the previous zoneadmd died unexpectedly. 1752 * 1753 * Let user know that something is amiss, but that we can 1754 * recover; if the zone is in the installed state, then don't 1755 * message, since having a running zoneadmd isn't really 1756 * expected/needed. We want to keep occurences of this message 1757 * limited to times when zoneadmd is picking back up from a 1758 * zoneadmd that died while the zone was in some non-trivial 1759 * state. 1760 */ 1761 if (zstate > ZONE_STATE_INSTALLED) { 1762 zerror(zlogp, B_FALSE, 1763 "zone '%s': WARNING: zone is in state '%s', but " 1764 "zoneadmd does not appear to be available; " 1765 "restarted zoneadmd to recover.", 1766 zone_name, zone_state_str(zstate)); 1767 } 1768 1769 (void) fdetach(zone_door_path); 1770 (void) close(doorfd); 1771 goto top; 1772 } 1773 ret = 0; 1774 out: 1775 (void) close(doorfd); 1776 return (ret); 1777 } 1778 1779 /* 1780 * Setup the brand's pre and post state change callbacks, as well as the 1781 * query callback, if any of these exist. 1782 */ 1783 static int 1784 brand_callback_init(brand_handle_t bh, char *zone_name) 1785 { 1786 char zpath[MAXPATHLEN]; 1787 1788 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) 1789 return (-1); 1790 1791 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, 1792 sizeof (pre_statechg_hook)); 1793 1794 if (brand_get_prestatechange(bh, zone_name, zpath, 1795 pre_statechg_hook + EXEC_LEN, 1796 sizeof (pre_statechg_hook) - EXEC_LEN) != 0) 1797 return (-1); 1798 1799 if (strlen(pre_statechg_hook) <= EXEC_LEN) 1800 pre_statechg_hook[0] = '\0'; 1801 1802 (void) strlcpy(post_statechg_hook, EXEC_PREFIX, 1803 sizeof (post_statechg_hook)); 1804 1805 if (brand_get_poststatechange(bh, zone_name, zpath, 1806 post_statechg_hook + EXEC_LEN, 1807 sizeof (post_statechg_hook) - EXEC_LEN) != 0) 1808 return (-1); 1809 1810 if (strlen(post_statechg_hook) <= EXEC_LEN) 1811 post_statechg_hook[0] = '\0'; 1812 1813 (void) strlcpy(query_hook, EXEC_PREFIX, 1814 sizeof (query_hook)); 1815 1816 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, 1817 sizeof (query_hook) - EXEC_LEN) != 0) 1818 return (-1); 1819 1820 if (strlen(query_hook) <= EXEC_LEN) 1821 query_hook[0] = '\0'; 1822 1823 return (0); 1824 } 1825 1826 int 1827 main(int argc, char *argv[]) 1828 { 1829 int opt; 1830 zoneid_t zid; 1831 priv_set_t *privset; 1832 zone_state_t zstate; 1833 char parents_locale[MAXPATHLEN]; 1834 brand_handle_t bh; 1835 int err; 1836 1837 pid_t pid; 1838 sigset_t blockset; 1839 sigset_t block_cld; 1840 1841 struct { 1842 sema_t sem; 1843 int status; 1844 zlog_t log; 1845 } *shstate; 1846 size_t shstatelen = getpagesize(); 1847 1848 zlog_t errlog; 1849 zlog_t *zlogp; 1850 1851 int ctfd; 1852 1853 progname = get_execbasename(argv[0]); 1854 1855 /* 1856 * Make sure stderr is unbuffered 1857 */ 1858 (void) setbuffer(stderr, NULL, 0); 1859 1860 /* 1861 * Get out of the way of mounted filesystems, since we will daemonize 1862 * soon. 1863 */ 1864 (void) chdir("/"); 1865 1866 /* 1867 * Use the default system umask per PSARC 1998/110 rather than 1868 * anything that may have been set by the caller. 1869 */ 1870 (void) umask(CMASK); 1871 1872 /* 1873 * Initially we want to use our parent's locale. 1874 */ 1875 (void) setlocale(LC_ALL, ""); 1876 (void) textdomain(TEXT_DOMAIN); 1877 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1878 sizeof (parents_locale)); 1879 1880 /* 1881 * This zlog_t is used for writing to stderr 1882 */ 1883 errlog.logfile = stderr; 1884 errlog.buflen = errlog.loglen = 0; 1885 errlog.buf = errlog.log = NULL; 1886 errlog.locale = parents_locale; 1887 1888 /* 1889 * We start off writing to stderr until we're ready to daemonize. 1890 */ 1891 zlogp = &errlog; 1892 1893 /* 1894 * Process options. 1895 */ 1896 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1897 switch (opt) { 1898 case 'R': 1899 zonecfg_set_root(optarg); 1900 break; 1901 case 'z': 1902 zone_name = optarg; 1903 break; 1904 default: 1905 usage(); 1906 } 1907 } 1908 1909 if (zone_name == NULL) 1910 usage(); 1911 1912 /* 1913 * Because usage() prints directly to stderr, it has gettext() 1914 * wrapping, which depends on the locale. But since zerror() calls 1915 * localize() which tweaks the locale, it is not safe to call zerror() 1916 * until after the last call to usage(). Fortunately, the last call 1917 * to usage() is just above and the first call to zerror() is just 1918 * below. Don't mess this up. 1919 */ 1920 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1921 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1922 GLOBAL_ZONENAME); 1923 return (1); 1924 } 1925 1926 if (zone_get_id(zone_name, &zid) != 0) { 1927 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1928 zonecfg_strerror(Z_NO_ZONE)); 1929 return (1); 1930 } 1931 1932 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1933 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1934 zonecfg_strerror(err)); 1935 return (1); 1936 } 1937 if (zstate < ZONE_STATE_INCOMPLETE) { 1938 zerror(zlogp, B_FALSE, 1939 "cannot manage a zone which is in state '%s'", 1940 zone_state_str(zstate)); 1941 return (1); 1942 } 1943 1944 if (zonecfg_default_brand(default_brand, 1945 sizeof (default_brand)) != Z_OK) { 1946 zerror(zlogp, B_FALSE, "unable to determine default brand"); 1947 return (1); 1948 } 1949 1950 /* Get a handle to the brand info for this zone */ 1951 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 1952 != Z_OK) { 1953 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1954 return (1); 1955 } 1956 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0); 1957 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0); 1958 1959 /* 1960 * In the alternate root environment, the only supported 1961 * operations are mount and unmount. In this case, just treat 1962 * the zone as native if it is cluster. Cluster zones can be 1963 * native for the purpose of LU or upgrade, and the cluster 1964 * brand may not exist in the miniroot (such as in net install 1965 * upgrade). 1966 */ 1967 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) { 1968 zone_iscluster = B_TRUE; 1969 if (zonecfg_in_alt_root()) { 1970 (void) strlcpy(brand_name, default_brand, 1971 sizeof (brand_name)); 1972 } 1973 } else { 1974 zone_iscluster = B_FALSE; 1975 } 1976 1977 if ((bh = brand_open(brand_name)) == NULL) { 1978 zerror(zlogp, B_FALSE, "unable to open zone brand"); 1979 return (1); 1980 } 1981 1982 /* Get state change brand hooks. */ 1983 if (brand_callback_init(bh, zone_name) == -1) { 1984 zerror(zlogp, B_TRUE, 1985 "failed to initialize brand state change hooks"); 1986 brand_close(bh); 1987 return (1); 1988 } 1989 1990 brand_close(bh); 1991 1992 /* 1993 * Check that we have all privileges. It would be nice to pare 1994 * this down, but this is at least a first cut. 1995 */ 1996 if ((privset = priv_allocset()) == NULL) { 1997 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1998 return (1); 1999 } 2000 2001 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 2002 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 2003 priv_freeset(privset); 2004 return (1); 2005 } 2006 2007 if (priv_isfullset(privset) == B_FALSE) { 2008 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 2009 "run this command (all privs required)"); 2010 priv_freeset(privset); 2011 return (1); 2012 } 2013 priv_freeset(privset); 2014 2015 if (mkzonedir(zlogp) != 0) 2016 return (1); 2017 2018 /* 2019 * Pre-fork: setup shared state 2020 */ 2021 if ((shstate = (void *)mmap(NULL, shstatelen, 2022 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 2023 MAP_FAILED) { 2024 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 2025 return (1); 2026 } 2027 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 2028 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 2029 (void) munmap((char *)shstate, shstatelen); 2030 return (1); 2031 } 2032 shstate->log.logfile = NULL; 2033 shstate->log.buflen = shstatelen - sizeof (*shstate); 2034 shstate->log.loglen = shstate->log.buflen; 2035 shstate->log.buf = (char *)shstate + sizeof (*shstate); 2036 shstate->log.log = shstate->log.buf; 2037 shstate->log.locale = parents_locale; 2038 shstate->status = -1; 2039 2040 /* 2041 * We need a SIGCHLD handler so the sema_wait() below will wake 2042 * up if the child dies without doing a sema_post(). 2043 */ 2044 (void) sigset(SIGCHLD, sigchld); 2045 /* 2046 * We must mask SIGCHLD until after we've coped with the fork 2047 * sufficiently to deal with it; otherwise we can race and 2048 * receive the signal before pid has been initialized 2049 * (yes, this really happens). 2050 */ 2051 (void) sigemptyset(&block_cld); 2052 (void) sigaddset(&block_cld, SIGCHLD); 2053 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 2054 2055 /* 2056 * The parent only needs stderr after the fork, so close other fd's 2057 * that we inherited from zoneadm so that the parent doesn't have those 2058 * open while waiting. The child will close the rest after the fork. 2059 */ 2060 closefrom(3); 2061 2062 if ((ctfd = init_template()) == -1) { 2063 zerror(zlogp, B_TRUE, "failed to create contract"); 2064 return (1); 2065 } 2066 2067 /* 2068 * Do not let another thread localize a message while we are forking. 2069 */ 2070 (void) mutex_lock(&msglock); 2071 pid = fork(); 2072 (void) mutex_unlock(&msglock); 2073 2074 /* 2075 * In all cases (parent, child, and in the event of an error) we 2076 * don't want to cause creation of contracts on subsequent fork()s. 2077 */ 2078 (void) ct_tmpl_clear(ctfd); 2079 (void) close(ctfd); 2080 2081 if (pid == -1) { 2082 zerror(zlogp, B_TRUE, "could not fork"); 2083 return (1); 2084 2085 } else if (pid > 0) { /* parent */ 2086 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 2087 /* 2088 * This marks a window of vulnerability in which we receive 2089 * the SIGCLD before falling into sema_wait (normally we would 2090 * get woken up from sema_wait with EINTR upon receipt of 2091 * SIGCLD). So we may need to use some other scheme like 2092 * sema_posting in the sigcld handler. 2093 * blech 2094 */ 2095 (void) sema_wait(&shstate->sem); 2096 (void) sema_destroy(&shstate->sem); 2097 if (shstate->status != 0) 2098 (void) waitpid(pid, NULL, WNOHANG); 2099 /* 2100 * It's ok if we die with SIGPIPE. It's not like we could have 2101 * done anything about it. 2102 */ 2103 (void) fprintf(stderr, "%s", shstate->log.buf); 2104 _exit(shstate->status == 0 ? 0 : 1); 2105 } 2106 2107 /* 2108 * The child charges on. 2109 */ 2110 (void) sigset(SIGCHLD, SIG_DFL); 2111 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 2112 2113 /* 2114 * SIGPIPE can be delivered if we write to a socket for which the 2115 * peer endpoint is gone. That can lead to too-early termination 2116 * of zoneadmd, and that's not good eats. 2117 */ 2118 (void) sigset(SIGPIPE, SIG_IGN); 2119 /* 2120 * Stop using stderr 2121 */ 2122 zlogp = &shstate->log; 2123 2124 /* 2125 * We don't need stdout/stderr from now on. 2126 */ 2127 closefrom(0); 2128 2129 /* 2130 * Initialize the syslog zlog_t. This needs to be done after 2131 * the call to closefrom(). 2132 */ 2133 logsys.buf = logsys.log = NULL; 2134 logsys.buflen = logsys.loglen = 0; 2135 logsys.logfile = NULL; 2136 logsys.locale = DEFAULT_LOCALE; 2137 2138 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 2139 2140 /* 2141 * The eventstream is used to publish state changes in the zone 2142 * from the door threads to the console I/O poller. 2143 */ 2144 if (eventstream_init() == -1) { 2145 zerror(zlogp, B_TRUE, "unable to create eventstream"); 2146 goto child_out; 2147 } 2148 2149 (void) snprintf(zone_door_path, sizeof (zone_door_path), 2150 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 2151 2152 /* 2153 * See if another zoneadmd is running for this zone. If not, then we 2154 * can now modify system state. 2155 */ 2156 if (make_daemon_exclusive(zlogp) == -1) 2157 goto child_out; 2158 2159 2160 /* 2161 * Create/join a new session; we need to be careful of what we do with 2162 * the console from now on so we don't end up being the session leader 2163 * for the terminal we're going to be handing out. 2164 */ 2165 (void) setsid(); 2166 2167 /* 2168 * This thread shouldn't be receiving any signals; in particular, 2169 * SIGCHLD should be received by the thread doing the fork(). 2170 */ 2171 (void) sigfillset(&blockset); 2172 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 2173 2174 /* 2175 * Setup the console device and get ready to serve the console; 2176 * once this has completed, we're ready to let console clients 2177 * make an attempt to connect (they will block until 2178 * serve_console_sock() below gets called, and any pending 2179 * connection is accept()ed). 2180 */ 2181 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0) 2182 goto child_out; 2183 2184 /* 2185 * Take the lock now, so that when the door server gets going, we 2186 * are guaranteed that it won't take a request until we are sure 2187 * that everything is completely set up. See the child_out: label 2188 * below to see why this matters. 2189 */ 2190 (void) mutex_lock(&lock); 2191 2192 /* Init semaphore for scratch zones. */ 2193 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 2194 zerror(zlogp, B_TRUE, 2195 "failed to initialize semaphore for scratch zone"); 2196 goto child_out; 2197 } 2198 2199 /* open the dladm handle */ 2200 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) { 2201 zerror(zlogp, B_FALSE, "failed to open dladm handle"); 2202 goto child_out; 2203 } 2204 2205 /* 2206 * Note: door setup must occur *after* the console is setup. 2207 * This is so that as zlogin tests the door to see if zoneadmd 2208 * is ready yet, we know that the console will get serviced 2209 * once door_info() indicates that the door is "up". 2210 */ 2211 if (setup_door(zlogp) == -1) 2212 goto child_out; 2213 2214 /* 2215 * Things seem OK so far; tell the parent process that we're done 2216 * with setup tasks. This will cause the parent to exit, signalling 2217 * to zoneadm, zlogin, or whatever forked it that we are ready to 2218 * service requests. 2219 */ 2220 shstate->status = 0; 2221 (void) sema_post(&shstate->sem); 2222 (void) munmap((char *)shstate, shstatelen); 2223 shstate = NULL; 2224 2225 (void) mutex_unlock(&lock); 2226 2227 /* 2228 * zlogp is now invalid, so reset it to the syslog logger. 2229 */ 2230 zlogp = &logsys; 2231 2232 /* 2233 * Now that we are free of any parents, switch to the default locale. 2234 */ 2235 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 2236 2237 /* 2238 * At this point the setup portion of main() is basically done, so 2239 * we reuse this thread to manage the zone console. When 2240 * serve_console() has returned, we are past the point of no return 2241 * in the life of this zoneadmd. 2242 */ 2243 if (zonecfg_in_alt_root()) { 2244 /* 2245 * This is just awful, but mounted scratch zones don't (and 2246 * can't) have consoles. We just wait for unmount instead. 2247 */ 2248 while (sema_wait(&scratch_sem) == EINTR) 2249 ; 2250 } else { 2251 serve_console(zlogp); 2252 assert(in_death_throes); 2253 } 2254 2255 /* 2256 * This is the next-to-last part of the exit interlock. Upon calling 2257 * fdetach(), the door will go unreferenced; once any 2258 * outstanding requests (like the door thread doing Z_HALT) are 2259 * done, the door will get an UNREF notification; when it handles 2260 * the UNREF, the door server will cause the exit. It's possible 2261 * that fdetach() can fail because the file is in use, in which 2262 * case we'll retry the operation. 2263 */ 2264 assert(!MUTEX_HELD(&lock)); 2265 for (;;) { 2266 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY)) 2267 break; 2268 yield(); 2269 } 2270 2271 for (;;) 2272 (void) pause(); 2273 2274 child_out: 2275 assert(pid == 0); 2276 if (shstate != NULL) { 2277 shstate->status = -1; 2278 (void) sema_post(&shstate->sem); 2279 (void) munmap((char *)shstate, shstatelen); 2280 } 2281 2282 /* 2283 * This might trigger an unref notification, but if so, 2284 * we are still holding the lock, so our call to exit will 2285 * ultimately win the race and will publish the right exit 2286 * code. 2287 */ 2288 if (zone_door != -1) { 2289 assert(MUTEX_HELD(&lock)); 2290 (void) door_revoke(zone_door); 2291 (void) fdetach(zone_door_path); 2292 } 2293 2294 if (dld_handle != NULL) 2295 dladm_close(dld_handle); 2296 2297 return (1); /* return from main() forcibly exits an MT process */ 2298 } 2299