1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 */ 27 28 /* 29 * zoneadmd manages zones; one zoneadmd process is launched for each 30 * non-global zone on the system. This daemon juggles four jobs: 31 * 32 * - Implement setup and teardown of the zone "virtual platform": mount and 33 * unmount filesystems; create and destroy network interfaces; communicate 34 * with devfsadmd to lay out devices for the zone; instantiate the zone 35 * console device; configure process runtime attributes such as resource 36 * controls, pool bindings, fine-grained privileges. 37 * 38 * - Launch the zone's init(1M) process. 39 * 40 * - Implement a door server; clients (like zoneadm) connect to the door 41 * server and request zone state changes. The kernel is also a client of 42 * this door server. A request to halt or reboot the zone which originates 43 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 44 * 45 * One minor problem is that messages emitted by zoneadmd need to be passed 46 * back to the zoneadm process making the request. These messages need to 47 * be rendered in the client's locale; so, this is passed in as part of the 48 * request. The exception is the kernel upcall to zoneadmd, in which case 49 * messages are syslog'd. 50 * 51 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 52 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 53 * strings which do not need to be translated. 54 * 55 * - Act as a console server for zlogin -C processes; see comments in zcons.c 56 * for more information about the zone console architecture. 57 * 58 * DESIGN NOTES 59 * 60 * Restart: 61 * A chief design constraint of zoneadmd is that it should be restartable in 62 * the case that the administrator kills it off, or it suffers a fatal error, 63 * without the running zone being impacted; this is akin to being able to 64 * reboot the service processor of a server without affecting the OS instance. 65 */ 66 67 #include <sys/param.h> 68 #include <sys/mman.h> 69 #include <sys/types.h> 70 #include <sys/stat.h> 71 #include <sys/sysmacros.h> 72 73 #include <bsm/adt.h> 74 #include <bsm/adt_event.h> 75 76 #include <alloca.h> 77 #include <assert.h> 78 #include <errno.h> 79 #include <door.h> 80 #include <fcntl.h> 81 #include <locale.h> 82 #include <signal.h> 83 #include <stdarg.h> 84 #include <stdio.h> 85 #include <stdlib.h> 86 #include <string.h> 87 #include <strings.h> 88 #include <synch.h> 89 #include <syslog.h> 90 #include <thread.h> 91 #include <unistd.h> 92 #include <wait.h> 93 #include <limits.h> 94 #include <zone.h> 95 #include <libbrand.h> 96 #include <sys/brand.h> 97 #include <libcontract.h> 98 #include <libcontract_priv.h> 99 #include <sys/brand.h> 100 #include <sys/contract/process.h> 101 #include <sys/ctfs.h> 102 #include <libdladm.h> 103 #include <sys/dls_mgmt.h> 104 #include <libscf.h> 105 106 #include <libzonecfg.h> 107 #include <zonestat_impl.h> 108 #include "zoneadmd.h" 109 110 static char *progname; 111 char *zone_name; /* zone which we are managing */ 112 char pool_name[MAXNAMELEN]; 113 char default_brand[MAXNAMELEN]; 114 char brand_name[MAXNAMELEN]; 115 boolean_t zone_isnative; 116 boolean_t zone_iscluster; 117 boolean_t zone_islabeled; 118 boolean_t shutdown_in_progress; 119 static zoneid_t zone_id; 120 dladm_handle_t dld_handle = NULL; 121 122 static char pre_statechg_hook[2 * MAXPATHLEN]; 123 static char post_statechg_hook[2 * MAXPATHLEN]; 124 char query_hook[2 * MAXPATHLEN]; 125 126 zlog_t logsys; 127 128 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 129 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 130 131 static sema_t scratch_sem; /* for scratch zones */ 132 133 static char zone_door_path[MAXPATHLEN]; 134 static int zone_door = -1; 135 136 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 137 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 138 139 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 140 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 141 #endif 142 143 #define DEFAULT_LOCALE "C" 144 145 static const char * 146 z_cmd_name(zone_cmd_t zcmd) 147 { 148 /* This list needs to match the enum in sys/zone.h */ 149 static const char *zcmdstr[] = { 150 "ready", "boot", "forceboot", "reboot", "halt", 151 "note_uninstalling", "mount", "forcemount", "unmount", 152 "shutdown" 153 }; 154 155 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 156 return ("unknown"); 157 else 158 return (zcmdstr[(int)zcmd]); 159 } 160 161 static char * 162 get_execbasename(char *execfullname) 163 { 164 char *last_slash, *execbasename; 165 166 /* guard against '/' at end of command invocation */ 167 for (;;) { 168 last_slash = strrchr(execfullname, '/'); 169 if (last_slash == NULL) { 170 execbasename = execfullname; 171 break; 172 } else { 173 execbasename = last_slash + 1; 174 if (*execbasename == '\0') { 175 *last_slash = '\0'; 176 continue; 177 } 178 break; 179 } 180 } 181 return (execbasename); 182 } 183 184 static void 185 usage(void) 186 { 187 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 188 (void) fprintf(stderr, 189 gettext("\tNote: %s should not be run directly.\n"), progname); 190 exit(2); 191 } 192 193 /* ARGSUSED */ 194 static void 195 sigchld(int sig) 196 { 197 } 198 199 char * 200 localize_msg(char *locale, const char *msg) 201 { 202 char *out; 203 204 (void) mutex_lock(&msglock); 205 (void) setlocale(LC_MESSAGES, locale); 206 out = gettext(msg); 207 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 208 (void) mutex_unlock(&msglock); 209 return (out); 210 } 211 212 /* PRINTFLIKE3 */ 213 void 214 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 215 { 216 va_list alist; 217 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 218 char *bp; 219 int saved_errno = errno; 220 221 if (zlogp == NULL) 222 return; 223 if (zlogp == &logsys) 224 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 225 zone_name); 226 else 227 buf[0] = '\0'; 228 bp = &(buf[strlen(buf)]); 229 230 /* 231 * In theory, the locale pointer should be set to either "C" or a 232 * char array, so it should never be NULL 233 */ 234 assert(zlogp->locale != NULL); 235 /* Locale is per process, but we are multi-threaded... */ 236 fmt = localize_msg(zlogp->locale, fmt); 237 238 va_start(alist, fmt); 239 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 240 va_end(alist); 241 bp = &(buf[strlen(buf)]); 242 if (use_strerror) 243 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 244 strerror(saved_errno)); 245 if (zlogp == &logsys) { 246 (void) syslog(LOG_ERR, "%s", buf); 247 } else if (zlogp->logfile != NULL) { 248 (void) fprintf(zlogp->logfile, "%s\n", buf); 249 } else { 250 size_t buflen; 251 size_t copylen; 252 253 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 254 copylen = MIN(buflen, zlogp->loglen); 255 zlogp->log += copylen; 256 zlogp->loglen -= copylen; 257 } 258 } 259 260 /* 261 * Emit a warning for any boot arguments which are unrecognized. Since 262 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 263 * put the arguments into an argv style array, use getopt to process them, 264 * and put the resultant argument string back into outargs. 265 * 266 * During the filtering, we pull out any arguments which are truly "boot" 267 * arguments, leaving only those which are to be passed intact to the 268 * progenitor process. The one we support at the moment is -i, which 269 * indicates to the kernel which program should be launched as 'init'. 270 * 271 * A return of Z_INVAL indicates specifically that the arguments are 272 * not valid; this is a non-fatal error. Except for Z_OK, all other return 273 * values are treated as fatal. 274 */ 275 static int 276 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 277 char *init_file, char *badarg) 278 { 279 int argc = 0, argc_save; 280 int i; 281 int err; 282 char *arg, *lasts, **argv = NULL, **argv_save; 283 char zonecfg_args[BOOTARGS_MAX]; 284 char scratchargs[BOOTARGS_MAX], *sargs; 285 char c; 286 287 bzero(outargs, BOOTARGS_MAX); 288 bzero(badarg, BOOTARGS_MAX); 289 290 /* 291 * If the user didn't specify transient boot arguments, check 292 * to see if there were any specified in the zone configuration, 293 * and use them if applicable. 294 */ 295 if (inargs == NULL || inargs[0] == '\0') { 296 zone_dochandle_t handle; 297 if ((handle = zonecfg_init_handle()) == NULL) { 298 zerror(zlogp, B_TRUE, 299 "getting zone configuration handle"); 300 return (Z_BAD_HANDLE); 301 } 302 err = zonecfg_get_snapshot_handle(zone_name, handle); 303 if (err != Z_OK) { 304 zerror(zlogp, B_FALSE, 305 "invalid configuration snapshot"); 306 zonecfg_fini_handle(handle); 307 return (Z_BAD_HANDLE); 308 } 309 310 bzero(zonecfg_args, sizeof (zonecfg_args)); 311 (void) zonecfg_get_bootargs(handle, zonecfg_args, 312 sizeof (zonecfg_args)); 313 inargs = zonecfg_args; 314 zonecfg_fini_handle(handle); 315 } 316 317 if (strlen(inargs) >= BOOTARGS_MAX) { 318 zerror(zlogp, B_FALSE, "boot argument string too long"); 319 return (Z_INVAL); 320 } 321 322 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 323 sargs = scratchargs; 324 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 325 sargs = NULL; 326 argc++; 327 } 328 329 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 330 zerror(zlogp, B_FALSE, "memory allocation failed"); 331 return (Z_NOMEM); 332 } 333 334 argv_save = argv; 335 argc_save = argc; 336 337 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 338 sargs = scratchargs; 339 i = 0; 340 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 341 sargs = NULL; 342 if ((argv[i] = strdup(arg)) == NULL) { 343 err = Z_NOMEM; 344 zerror(zlogp, B_FALSE, "memory allocation failed"); 345 goto done; 346 } 347 i++; 348 } 349 350 /* 351 * We preserve compatibility with the Solaris system boot behavior, 352 * which allows: 353 * 354 * # reboot kernel/unix -s -m verbose 355 * 356 * In this example, kernel/unix tells the booter what file to 357 * boot. We don't want reboot in a zone to be gratuitously different, 358 * so we silently ignore the boot file, if necessary. 359 */ 360 if (argv[0] == NULL) 361 goto done; 362 363 assert(argv[0][0] != ' '); 364 assert(argv[0][0] != '\t'); 365 366 if (argv[0][0] != '-' && argv[0][0] != '\0') { 367 argv = &argv[1]; 368 argc--; 369 } 370 371 optind = 0; 372 opterr = 0; 373 err = Z_OK; 374 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 375 switch (c) { 376 case 'i': 377 /* 378 * -i is handled by the runtime and is not passed 379 * along to userland 380 */ 381 (void) strlcpy(init_file, optarg, MAXPATHLEN); 382 break; 383 case 'f': 384 /* This has already been processed by zoneadm */ 385 break; 386 case 'm': 387 case 's': 388 /* These pass through unmolested */ 389 (void) snprintf(outargs, BOOTARGS_MAX, 390 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 391 break; 392 case '?': 393 /* 394 * We warn about unknown arguments but pass them 395 * along anyway-- if someone wants to develop their 396 * own init replacement, they can pass it whatever 397 * args they want. 398 */ 399 err = Z_INVAL; 400 (void) snprintf(outargs, BOOTARGS_MAX, 401 "%s -%c", outargs, optopt); 402 (void) snprintf(badarg, BOOTARGS_MAX, 403 "%s -%c", badarg, optopt); 404 break; 405 } 406 } 407 408 /* 409 * For Solaris Zones we warn about and discard non-option arguments. 410 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 411 * to the kernel, we concat up all the other remaining boot args. 412 * and warn on them as a group. 413 */ 414 if (optind < argc) { 415 err = Z_INVAL; 416 while (optind < argc) { 417 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 418 badarg, strlen(badarg) > 0 ? " " : "", 419 argv[optind]); 420 optind++; 421 } 422 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 423 "arguments `%s'.", badarg); 424 } 425 426 done: 427 for (i = 0; i < argc_save; i++) { 428 if (argv_save[i] != NULL) 429 free(argv_save[i]); 430 } 431 free(argv_save); 432 return (err); 433 } 434 435 436 static int 437 mkzonedir(zlog_t *zlogp) 438 { 439 struct stat st; 440 /* 441 * We must create and lock everyone but root out of ZONES_TMPDIR 442 * since anyone can open any UNIX domain socket, regardless of 443 * its file system permissions. Sigh... 444 */ 445 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 446 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 447 return (-1); 448 } 449 /* paranoia */ 450 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 451 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 452 return (-1); 453 } 454 (void) chmod(ZONES_TMPDIR, S_IRWXU); 455 return (0); 456 } 457 458 /* 459 * Run the brand's pre-state change callback, if it exists. 460 */ 461 static int 462 brand_prestatechg(zlog_t *zlogp, int state, int cmd) 463 { 464 char cmdbuf[2 * MAXPATHLEN]; 465 const char *altroot; 466 467 if (pre_statechg_hook[0] == '\0') 468 return (0); 469 470 altroot = zonecfg_get_root(); 471 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", pre_statechg_hook, 472 state, cmd, altroot) > sizeof (cmdbuf)) 473 return (-1); 474 475 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 476 return (-1); 477 478 return (0); 479 } 480 481 /* 482 * Run the brand's post-state change callback, if it exists. 483 */ 484 static int 485 brand_poststatechg(zlog_t *zlogp, int state, int cmd) 486 { 487 char cmdbuf[2 * MAXPATHLEN]; 488 const char *altroot; 489 490 if (post_statechg_hook[0] == '\0') 491 return (0); 492 493 altroot = zonecfg_get_root(); 494 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook, 495 state, cmd, altroot) > sizeof (cmdbuf)) 496 return (-1); 497 498 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 499 return (-1); 500 501 return (0); 502 } 503 504 /* 505 * Notify zonestatd of the new zone. If zonestatd is not running, this 506 * will do nothing. 507 */ 508 static void 509 notify_zonestatd(zoneid_t zoneid) 510 { 511 int cmd[2]; 512 int fd; 513 door_arg_t params; 514 515 fd = open(ZS_DOOR_PATH, O_RDONLY); 516 if (fd < 0) 517 return; 518 519 cmd[0] = ZSD_CMD_NEW_ZONE; 520 cmd[1] = zoneid; 521 params.data_ptr = (char *)&cmd; 522 params.data_size = sizeof (cmd); 523 params.desc_ptr = NULL; 524 params.desc_num = 0; 525 params.rbuf = NULL; 526 params.rsize = 0; 527 (void) door_call(fd, ¶ms); 528 (void) close(fd); 529 } 530 531 /* 532 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 533 * 'true' if this is being invoked as part of the processing for the "mount" 534 * subcommand. 535 */ 536 static int 537 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate) 538 { 539 int err; 540 541 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0) 542 return (-1); 543 544 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 545 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 546 zonecfg_strerror(err)); 547 goto bad; 548 } 549 550 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 551 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 552 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 553 zonecfg_strerror(err)); 554 goto bad; 555 } 556 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 557 bringup_failure_recovery = B_TRUE; 558 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE); 559 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 560 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 561 zonecfg_strerror(err)); 562 goto bad; 563 } 564 565 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0) 566 goto bad; 567 568 return (0); 569 570 bad: 571 /* 572 * If something goes wrong, we up the zones's state to the target 573 * state, READY, and then invoke the hook as if we're halting. 574 */ 575 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT); 576 return (-1); 577 } 578 579 int 580 init_template(void) 581 { 582 int fd; 583 int err = 0; 584 585 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 586 if (fd == -1) 587 return (-1); 588 589 /* 590 * For now, zoneadmd doesn't do anything with the contract. 591 * Deliver no events, don't inherit, and allow it to be orphaned. 592 */ 593 err |= ct_tmpl_set_critical(fd, 0); 594 err |= ct_tmpl_set_informative(fd, 0); 595 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 596 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 597 if (err || ct_tmpl_activate(fd)) { 598 (void) close(fd); 599 return (-1); 600 } 601 602 return (fd); 603 } 604 605 typedef struct fs_callback { 606 zlog_t *zlogp; 607 zoneid_t zoneid; 608 boolean_t mount_cmd; 609 } fs_callback_t; 610 611 static int 612 mount_early_fs(void *data, const char *spec, const char *dir, 613 const char *fstype, const char *opt) 614 { 615 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 616 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 617 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd; 618 char rootpath[MAXPATHLEN]; 619 pid_t child; 620 int child_status; 621 int tmpl_fd; 622 int rv; 623 ctid_t ct; 624 625 /* determine the zone rootpath */ 626 if (mount_cmd) { 627 char zonepath[MAXPATHLEN]; 628 char luroot[MAXPATHLEN]; 629 630 if (zone_get_zonepath(zone_name, 631 zonepath, sizeof (zonepath)) != Z_OK) { 632 zerror(zlogp, B_FALSE, "unable to determine zone path"); 633 return (-1); 634 } 635 636 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 637 resolve_lofs(zlogp, luroot, sizeof (luroot)); 638 (void) strlcpy(rootpath, luroot, sizeof (rootpath)); 639 } else { 640 if (zone_get_rootpath(zone_name, 641 rootpath, sizeof (rootpath)) != Z_OK) { 642 zerror(zlogp, B_FALSE, "unable to determine zone root"); 643 return (-1); 644 } 645 } 646 647 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) { 648 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 649 rootpath, dir); 650 return (-1); 651 } else if (rv > 0) { 652 /* The mount point path doesn't exist, create it now. */ 653 if (make_one_dir(zlogp, rootpath, dir, 654 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 655 DEFAULT_DIR_GROUP) != 0) { 656 zerror(zlogp, B_FALSE, "failed to create mount point"); 657 return (-1); 658 } 659 660 /* 661 * Now this might seem weird, but we need to invoke 662 * valid_mount_path() again. Why? Because it checks 663 * to make sure that the mount point path is canonical, 664 * which it can only do if the path exists, so now that 665 * we've created the path we have to verify it again. 666 */ 667 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, 668 fstype)) < 0) { 669 zerror(zlogp, B_FALSE, 670 "%s%s is not a valid mount point", rootpath, dir); 671 return (-1); 672 } 673 } 674 675 if ((tmpl_fd = init_template()) == -1) { 676 zerror(zlogp, B_TRUE, "failed to create contract"); 677 return (-1); 678 } 679 680 if ((child = fork()) == -1) { 681 (void) ct_tmpl_clear(tmpl_fd); 682 (void) close(tmpl_fd); 683 zerror(zlogp, B_TRUE, "failed to fork"); 684 return (-1); 685 686 } else if (child == 0) { /* child */ 687 char opt_buf[MAX_MNTOPT_STR]; 688 int optlen = 0; 689 int mflag = MS_DATA; 690 691 (void) ct_tmpl_clear(tmpl_fd); 692 /* 693 * Even though there are no procs running in the zone, we 694 * do this for paranoia's sake. 695 */ 696 (void) closefrom(0); 697 698 if (zone_enter(zoneid) == -1) { 699 _exit(errno); 700 } 701 if (opt != NULL) { 702 /* 703 * The mount() system call is incredibly annoying. 704 * If options are specified, we need to copy them 705 * into a temporary buffer since the mount() system 706 * call will overwrite the options string. It will 707 * also fail if the new option string it wants to 708 * write is bigger than the one we passed in, so 709 * you must pass in a buffer of the maximum possible 710 * option string length. sigh. 711 */ 712 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 713 opt = opt_buf; 714 optlen = MAX_MNTOPT_STR; 715 mflag = MS_OPTIONSTR; 716 } 717 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 718 _exit(errno); 719 _exit(0); 720 } 721 722 /* parent */ 723 if (contract_latest(&ct) == -1) 724 ct = -1; 725 (void) ct_tmpl_clear(tmpl_fd); 726 (void) close(tmpl_fd); 727 if (waitpid(child, &child_status, 0) != child) { 728 /* unexpected: we must have been signalled */ 729 (void) contract_abandon_id(ct); 730 return (-1); 731 } 732 (void) contract_abandon_id(ct); 733 if (WEXITSTATUS(child_status) != 0) { 734 errno = WEXITSTATUS(child_status); 735 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 736 return (-1); 737 } 738 739 return (0); 740 } 741 742 /* 743 * If retstr is not NULL, the output of the subproc is returned in the str, 744 * otherwise it is output using zerror(). Any memory allocated for retstr 745 * should be freed by the caller. 746 */ 747 int 748 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) 749 { 750 char buf[1024]; /* arbitrary large amount */ 751 char *inbuf; 752 FILE *file; 753 int status; 754 int rd_cnt; 755 756 if (retstr != NULL) { 757 if ((*retstr = malloc(1024)) == NULL) { 758 zerror(zlogp, B_FALSE, "out of memory"); 759 return (-1); 760 } 761 inbuf = *retstr; 762 rd_cnt = 0; 763 } else { 764 inbuf = buf; 765 } 766 767 file = popen(cmdbuf, "r"); 768 if (file == NULL) { 769 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 770 return (-1); 771 } 772 773 while (fgets(inbuf, 1024, file) != NULL) { 774 if (retstr == NULL) { 775 if (zlogp != &logsys) 776 zerror(zlogp, B_FALSE, "%s", inbuf); 777 } else { 778 char *p; 779 780 rd_cnt += 1024 - 1; 781 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) { 782 zerror(zlogp, B_FALSE, "out of memory"); 783 (void) pclose(file); 784 return (-1); 785 } 786 787 *retstr = p; 788 inbuf = *retstr + rd_cnt; 789 } 790 } 791 status = pclose(file); 792 793 if (WIFSIGNALED(status)) { 794 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 795 "signal %d", cmdbuf, WTERMSIG(status)); 796 return (-1); 797 } 798 assert(WIFEXITED(status)); 799 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 800 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 801 return (-1); 802 } 803 return (WEXITSTATUS(status)); 804 } 805 806 static int 807 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) 808 { 809 zoneid_t zoneid; 810 struct stat st; 811 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 812 char nbootargs[BOOTARGS_MAX]; 813 char cmdbuf[MAXPATHLEN]; 814 fs_callback_t cb; 815 brand_handle_t bh; 816 zone_iptype_t iptype; 817 boolean_t links_loaded = B_FALSE; 818 dladm_status_t status; 819 char errmsg[DLADM_STRSIZE]; 820 int err; 821 boolean_t restart_init; 822 823 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) 824 return (-1); 825 826 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 827 zerror(zlogp, B_TRUE, "unable to get zoneid"); 828 goto bad; 829 } 830 831 cb.zlogp = zlogp; 832 cb.zoneid = zoneid; 833 cb.mount_cmd = B_FALSE; 834 835 /* Get a handle to the brand info for this zone */ 836 if ((bh = brand_open(brand_name)) == NULL) { 837 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 838 goto bad; 839 } 840 841 /* 842 * Get the list of filesystems to mount from the brand 843 * configuration. These mounts are done via a thread that will 844 * enter the zone, so they are done from within the context of the 845 * zone. 846 */ 847 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 848 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 849 brand_close(bh); 850 goto bad; 851 } 852 853 /* 854 * Get the brand's boot callback if it exists. 855 */ 856 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 857 zerror(zlogp, B_FALSE, "unable to determine zone path"); 858 brand_close(bh); 859 goto bad; 860 } 861 (void) strcpy(cmdbuf, EXEC_PREFIX); 862 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 863 sizeof (cmdbuf) - EXEC_LEN) != 0) { 864 zerror(zlogp, B_FALSE, 865 "unable to determine branded zone's boot callback"); 866 brand_close(bh); 867 goto bad; 868 } 869 870 /* Get the path for this zone's init(1M) (or equivalent) process. */ 871 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { 872 zerror(zlogp, B_FALSE, 873 "unable to determine zone's init(1M) location"); 874 brand_close(bh); 875 goto bad; 876 } 877 878 /* See if this zone's brand should restart init if it dies. */ 879 restart_init = brand_restartinit(bh); 880 881 brand_close(bh); 882 883 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 884 bad_boot_arg); 885 if (err == Z_INVAL) 886 eventstream_write(Z_EVT_ZONE_BADARGS); 887 else if (err != Z_OK) 888 goto bad; 889 890 assert(init_file[0] != '\0'); 891 892 /* Try to anticipate possible problems: Make sure init is executable. */ 893 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 894 zerror(zlogp, B_FALSE, "unable to determine zone root"); 895 goto bad; 896 } 897 898 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); 899 900 if (stat(initpath, &st) == -1) { 901 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 902 goto bad; 903 } 904 905 if ((st.st_mode & S_IXUSR) == 0) { 906 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 907 goto bad; 908 } 909 910 /* 911 * Exclusive stack zones interact with the dlmgmtd running in the 912 * global zone. dladm_zone_boot() tells dlmgmtd that this zone is 913 * booting, and loads its datalinks from the zone's datalink 914 * configuration file. 915 */ 916 if (vplat_get_iptype(zlogp, &iptype) == 0 && iptype == ZS_EXCLUSIVE) { 917 status = dladm_zone_boot(dld_handle, zoneid); 918 if (status != DLADM_STATUS_OK) { 919 zerror(zlogp, B_FALSE, "unable to load zone datalinks: " 920 " %s", dladm_status2str(status, errmsg)); 921 goto bad; 922 } 923 links_loaded = B_TRUE; 924 } 925 926 /* 927 * If there is a brand 'boot' callback, execute it now to give the 928 * brand one last chance to do any additional setup before the zone 929 * is booted. 930 */ 931 if ((strlen(cmdbuf) > EXEC_LEN) && 932 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { 933 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 934 goto bad; 935 } 936 937 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 938 zerror(zlogp, B_TRUE, "could not set zone boot file"); 939 goto bad; 940 } 941 942 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 943 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 944 goto bad; 945 } 946 947 if (!restart_init && zone_setattr(zoneid, ZONE_ATTR_INITNORESTART, 948 NULL, 0) == -1) { 949 zerror(zlogp, B_TRUE, "could not set zone init-no-restart"); 950 goto bad; 951 } 952 953 /* 954 * Inform zonestatd of a new zone so that it can install a door for 955 * the zone to contact it. 956 */ 957 notify_zonestatd(zone_id); 958 959 if (zone_boot(zoneid) == -1) { 960 zerror(zlogp, B_TRUE, "unable to boot zone"); 961 goto bad; 962 } 963 964 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0) 965 goto bad; 966 967 return (0); 968 969 bad: 970 /* 971 * If something goes wrong, we up the zones's state to the target 972 * state, RUNNING, and then invoke the hook as if we're halting. 973 */ 974 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT); 975 if (links_loaded) 976 (void) dladm_zone_halt(dld_handle, zoneid); 977 return (-1); 978 } 979 980 static int 981 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate) 982 { 983 int err; 984 985 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0) 986 return (-1); 987 988 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { 989 if (!bringup_failure_recovery) 990 zerror(zlogp, B_FALSE, "unable to destroy zone"); 991 return (-1); 992 } 993 994 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 995 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 996 zonecfg_strerror(err)); 997 998 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0) 999 return (-1); 1000 1001 return (0); 1002 } 1003 1004 static int 1005 zone_graceful_shutdown(zlog_t *zlogp) 1006 { 1007 zoneid_t zoneid; 1008 pid_t child; 1009 char cmdbuf[MAXPATHLEN]; 1010 brand_handle_t bh = NULL; 1011 char zpath[MAXPATHLEN]; 1012 ctid_t ct; 1013 int tmpl_fd; 1014 int child_status; 1015 1016 if (shutdown_in_progress) { 1017 zerror(zlogp, B_FALSE, "shutdown already in progress"); 1018 return (-1); 1019 } 1020 1021 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 1022 zerror(zlogp, B_TRUE, "unable to get zoneid"); 1023 return (-1); 1024 } 1025 1026 /* Get a handle to the brand info for this zone */ 1027 if ((bh = brand_open(brand_name)) == NULL) { 1028 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1029 return (-1); 1030 } 1031 1032 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 1033 zerror(zlogp, B_FALSE, "unable to determine zone path"); 1034 brand_close(bh); 1035 return (-1); 1036 } 1037 1038 /* 1039 * If there is a brand 'shutdown' callback, execute it now to give the 1040 * brand a chance to cleanup any custom configuration. 1041 */ 1042 (void) strcpy(cmdbuf, EXEC_PREFIX); 1043 if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 1044 sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) { 1045 (void) strcat(cmdbuf, SHUTDOWN_DEFAULT); 1046 } 1047 brand_close(bh); 1048 1049 if ((tmpl_fd = init_template()) == -1) { 1050 zerror(zlogp, B_TRUE, "failed to create contract"); 1051 return (-1); 1052 } 1053 1054 if ((child = fork()) == -1) { 1055 (void) ct_tmpl_clear(tmpl_fd); 1056 (void) close(tmpl_fd); 1057 zerror(zlogp, B_TRUE, "failed to fork"); 1058 return (-1); 1059 } else if (child == 0) { 1060 (void) ct_tmpl_clear(tmpl_fd); 1061 if (zone_enter(zoneid) == -1) { 1062 _exit(errno); 1063 } 1064 _exit(execl("/bin/sh", "sh", "-c", cmdbuf, (char *)NULL)); 1065 } 1066 1067 if (contract_latest(&ct) == -1) 1068 ct = -1; 1069 (void) ct_tmpl_clear(tmpl_fd); 1070 (void) close(tmpl_fd); 1071 1072 if (waitpid(child, &child_status, 0) != child) { 1073 /* unexpected: we must have been signalled */ 1074 (void) contract_abandon_id(ct); 1075 return (-1); 1076 } 1077 1078 (void) contract_abandon_id(ct); 1079 if (WEXITSTATUS(child_status) != 0) { 1080 errno = WEXITSTATUS(child_status); 1081 zerror(zlogp, B_FALSE, "unable to shutdown zone"); 1082 return (-1); 1083 } 1084 1085 shutdown_in_progress = B_TRUE; 1086 1087 return (0); 1088 } 1089 1090 static int 1091 zone_wait_shutdown(zlog_t *zlogp) 1092 { 1093 zone_state_t zstate; 1094 uint64_t *tm = NULL; 1095 scf_simple_prop_t *prop = NULL; 1096 int timeout; 1097 int tries; 1098 int rc = -1; 1099 1100 /* Get default stop timeout from SMF framework */ 1101 timeout = SHUTDOWN_WAIT; 1102 if ((prop = scf_simple_prop_get(NULL, SHUTDOWN_FMRI, "stop", 1103 SCF_PROPERTY_TIMEOUT)) != NULL) { 1104 if ((tm = scf_simple_prop_next_count(prop)) != NULL) { 1105 if (tm != 0) 1106 timeout = *tm; 1107 } 1108 scf_simple_prop_free(prop); 1109 } 1110 1111 /* allow time for zone to shutdown cleanly */ 1112 for (tries = 0; tries < timeout; tries ++) { 1113 (void) sleep(1); 1114 if (zone_get_state(zone_name, &zstate) == Z_OK && 1115 zstate == ZONE_STATE_INSTALLED) { 1116 rc = 0; 1117 break; 1118 } 1119 } 1120 1121 if (rc != 0) 1122 zerror(zlogp, B_FALSE, "unable to shutdown zone"); 1123 1124 shutdown_in_progress = B_FALSE; 1125 1126 return (rc); 1127 } 1128 1129 1130 1131 /* 1132 * Generate AUE_zone_state for a command that boots a zone. 1133 */ 1134 static void 1135 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 1136 char *new_state) 1137 { 1138 adt_session_data_t *ah; 1139 adt_event_data_t *event; 1140 int pass_fail, fail_reason; 1141 1142 if (!adt_audit_enabled()) 1143 return; 1144 1145 if (return_val == 0) { 1146 pass_fail = ADT_SUCCESS; 1147 fail_reason = ADT_SUCCESS; 1148 } else { 1149 pass_fail = ADT_FAILURE; 1150 fail_reason = ADT_FAIL_VALUE_PROGRAM; 1151 } 1152 1153 if (adt_start_session(&ah, NULL, 0)) { 1154 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1155 return; 1156 } 1157 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 1158 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1159 (void) adt_end_session(ah); 1160 return; 1161 } 1162 1163 event = adt_alloc_event(ah, ADT_zone_state); 1164 if (event == NULL) { 1165 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1166 (void) adt_end_session(ah); 1167 return; 1168 } 1169 event->adt_zone_state.zonename = zone_name; 1170 event->adt_zone_state.new_state = new_state; 1171 1172 if (adt_put_event(event, pass_fail, fail_reason)) 1173 zerror(zlogp, B_TRUE, gettext("audit failure.")); 1174 1175 adt_free_event(event); 1176 1177 (void) adt_end_session(ah); 1178 } 1179 1180 /* 1181 * The main routine for the door server that deals with zone state transitions. 1182 */ 1183 /* ARGSUSED */ 1184 static void 1185 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 1186 uint_t n_desc) 1187 { 1188 ucred_t *uc = NULL; 1189 const priv_set_t *eset; 1190 1191 zone_state_t zstate; 1192 zone_cmd_t cmd; 1193 zone_cmd_arg_t *zargp; 1194 1195 boolean_t kernelcall; 1196 1197 int rval = -1; 1198 uint64_t uniqid; 1199 zoneid_t zoneid = -1; 1200 zlog_t zlog; 1201 zlog_t *zlogp; 1202 zone_cmd_rval_t *rvalp; 1203 size_t rlen = getpagesize(); /* conservative */ 1204 fs_callback_t cb; 1205 brand_handle_t bh; 1206 boolean_t wait_shut = B_FALSE; 1207 1208 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1209 zargp = (zone_cmd_arg_t *)args; 1210 1211 /* 1212 * When we get the door unref message, we've fdetach'd the door, and 1213 * it is time for us to shut down zoneadmd. 1214 */ 1215 if (zargp == DOOR_UNREF_DATA) { 1216 /* 1217 * See comment at end of main() for info on the last rites. 1218 */ 1219 exit(0); 1220 } 1221 1222 if (zargp == NULL) { 1223 (void) door_return(NULL, 0, 0, 0); 1224 } 1225 1226 rvalp = alloca(rlen); 1227 bzero(rvalp, rlen); 1228 zlog.logfile = NULL; 1229 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 1230 zlog.buf = rvalp->errbuf; 1231 zlog.log = zlog.buf; 1232 /* defer initialization of zlog.locale until after credential check */ 1233 zlogp = &zlog; 1234 1235 if (alen != sizeof (zone_cmd_arg_t)) { 1236 /* 1237 * This really shouldn't be happening. 1238 */ 1239 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 1240 "unexpected (expected %d bytes)", alen, 1241 sizeof (zone_cmd_arg_t)); 1242 goto out; 1243 } 1244 cmd = zargp->cmd; 1245 1246 if (door_ucred(&uc) != 0) { 1247 zerror(&logsys, B_TRUE, "door_ucred"); 1248 goto out; 1249 } 1250 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 1251 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 1252 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 1253 ucred_geteuid(uc) != 0)) { 1254 zerror(&logsys, B_FALSE, "insufficient privileges"); 1255 goto out; 1256 } 1257 1258 kernelcall = ucred_getpid(uc) == 0; 1259 1260 /* 1261 * This is safe because we only use a zlog_t throughout the 1262 * duration of a door call; i.e., by the time the pointer 1263 * might become invalid, the door call would be over. 1264 */ 1265 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 1266 1267 (void) mutex_lock(&lock); 1268 1269 /* 1270 * Once we start to really die off, we don't want more connections. 1271 */ 1272 if (in_death_throes) { 1273 (void) mutex_unlock(&lock); 1274 ucred_free(uc); 1275 (void) door_return(NULL, 0, 0, 0); 1276 thr_exit(NULL); 1277 } 1278 1279 /* 1280 * Check for validity of command. 1281 */ 1282 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 1283 cmd != Z_REBOOT && cmd != Z_SHUTDOWN && cmd != Z_HALT && 1284 cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT && 1285 cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 1286 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 1287 goto out; 1288 } 1289 1290 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 1291 /* 1292 * Can't happen 1293 */ 1294 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 1295 cmd); 1296 goto out; 1297 } 1298 /* 1299 * We ignore the possibility of someone calling zone_create(2) 1300 * explicitly; all requests must come through zoneadmd. 1301 */ 1302 if (zone_get_state(zone_name, &zstate) != Z_OK) { 1303 /* 1304 * Something terribly wrong happened 1305 */ 1306 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 1307 goto out; 1308 } 1309 1310 if (kernelcall) { 1311 /* 1312 * Kernel-initiated requests may lose their validity if the 1313 * zone_t the kernel was referring to has gone away. 1314 */ 1315 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 1316 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 1317 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 1318 /* 1319 * We're not talking about the same zone. The request 1320 * must have arrived too late. Return error. 1321 */ 1322 rval = -1; 1323 goto out; 1324 } 1325 zlogp = &logsys; /* Log errors to syslog */ 1326 } 1327 1328 /* 1329 * If we are being asked to forcibly mount or boot a zone, we 1330 * pretend that an INCOMPLETE zone is actually INSTALLED. 1331 */ 1332 if (zstate == ZONE_STATE_INCOMPLETE && 1333 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 1334 zstate = ZONE_STATE_INSTALLED; 1335 1336 switch (zstate) { 1337 case ZONE_STATE_CONFIGURED: 1338 case ZONE_STATE_INCOMPLETE: 1339 /* 1340 * Not our area of expertise; we just print a nice message 1341 * and die off. 1342 */ 1343 zerror(zlogp, B_FALSE, 1344 "%s operation is invalid for zones in state '%s'", 1345 z_cmd_name(cmd), zone_state_str(zstate)); 1346 break; 1347 1348 case ZONE_STATE_INSTALLED: 1349 switch (cmd) { 1350 case Z_READY: 1351 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate); 1352 if (rval == 0) 1353 eventstream_write(Z_EVT_ZONE_READIED); 1354 break; 1355 case Z_BOOT: 1356 case Z_FORCEBOOT: 1357 eventstream_write(Z_EVT_ZONE_BOOTING); 1358 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1359 == 0) { 1360 rval = zone_bootup(zlogp, zargp->bootbuf, 1361 zstate); 1362 } 1363 audit_put_record(zlogp, uc, rval, "boot"); 1364 if (rval != 0) { 1365 bringup_failure_recovery = B_TRUE; 1366 (void) zone_halt(zlogp, B_FALSE, B_FALSE, 1367 zstate); 1368 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1369 } 1370 break; 1371 case Z_SHUTDOWN: 1372 case Z_HALT: 1373 if (kernelcall) /* Invalid; can't happen */ 1374 abort(); 1375 /* 1376 * We could have two clients racing to halt this 1377 * zone; the second client loses, but its request 1378 * doesn't fail, since the zone is now in the desired 1379 * state. 1380 */ 1381 zerror(zlogp, B_FALSE, "zone is already halted"); 1382 rval = 0; 1383 break; 1384 case Z_REBOOT: 1385 if (kernelcall) /* Invalid; can't happen */ 1386 abort(); 1387 zerror(zlogp, B_FALSE, "%s operation is invalid " 1388 "for zones in state '%s'", z_cmd_name(cmd), 1389 zone_state_str(zstate)); 1390 rval = -1; 1391 break; 1392 case Z_NOTE_UNINSTALLING: 1393 if (kernelcall) /* Invalid; can't happen */ 1394 abort(); 1395 /* 1396 * Tell the console to print out a message about this. 1397 * Once it does, we will be in_death_throes. 1398 */ 1399 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1400 break; 1401 case Z_MOUNT: 1402 case Z_FORCEMOUNT: 1403 if (kernelcall) /* Invalid; can't happen */ 1404 abort(); 1405 if (!zone_isnative && !zone_iscluster && 1406 !zone_islabeled) { 1407 /* 1408 * -U mounts the zone without lofs mounting 1409 * zone file systems back into the scratch 1410 * zone. This is required when mounting 1411 * non-native branded zones. 1412 */ 1413 (void) strlcpy(zargp->bootbuf, "-U", 1414 BOOTARGS_MAX); 1415 } 1416 1417 rval = zone_ready(zlogp, 1418 strcmp(zargp->bootbuf, "-U") == 0 ? 1419 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate); 1420 if (rval != 0) 1421 break; 1422 1423 eventstream_write(Z_EVT_ZONE_READIED); 1424 1425 /* 1426 * Get a handle to the default brand info. 1427 * We must always use the default brand file system 1428 * list when mounting the zone. 1429 */ 1430 if ((bh = brand_open(default_brand)) == NULL) { 1431 rval = -1; 1432 break; 1433 } 1434 1435 /* 1436 * Get the list of filesystems to mount from 1437 * the brand configuration. These mounts are done 1438 * via a thread that will enter the zone, so they 1439 * are done from within the context of the zone. 1440 */ 1441 cb.zlogp = zlogp; 1442 cb.zoneid = zone_id; 1443 cb.mount_cmd = B_TRUE; 1444 rval = brand_platform_iter_mounts(bh, 1445 mount_early_fs, &cb); 1446 1447 brand_close(bh); 1448 1449 /* 1450 * Ordinarily, /dev/fd would be mounted inside the zone 1451 * by svc:/system/filesystem/usr:default, but since 1452 * we're not booting the zone, we need to do this 1453 * manually. 1454 */ 1455 if (rval == 0) 1456 rval = mount_early_fs(&cb, 1457 "fd", "/dev/fd", "fd", NULL); 1458 break; 1459 case Z_UNMOUNT: 1460 if (kernelcall) /* Invalid; can't happen */ 1461 abort(); 1462 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1463 rval = 0; 1464 break; 1465 } 1466 break; 1467 1468 case ZONE_STATE_READY: 1469 switch (cmd) { 1470 case Z_READY: 1471 /* 1472 * We could have two clients racing to ready this 1473 * zone; the second client loses, but its request 1474 * doesn't fail, since the zone is now in the desired 1475 * state. 1476 */ 1477 zerror(zlogp, B_FALSE, "zone is already ready"); 1478 rval = 0; 1479 break; 1480 case Z_BOOT: 1481 (void) strlcpy(boot_args, zargp->bootbuf, 1482 sizeof (boot_args)); 1483 eventstream_write(Z_EVT_ZONE_BOOTING); 1484 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1485 audit_put_record(zlogp, uc, rval, "boot"); 1486 if (rval != 0) { 1487 bringup_failure_recovery = B_TRUE; 1488 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1489 zstate); 1490 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1491 } 1492 boot_args[0] = '\0'; 1493 break; 1494 case Z_HALT: 1495 if (kernelcall) /* Invalid; can't happen */ 1496 abort(); 1497 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1498 != 0) 1499 break; 1500 eventstream_write(Z_EVT_ZONE_HALTED); 1501 break; 1502 case Z_SHUTDOWN: 1503 case Z_REBOOT: 1504 case Z_NOTE_UNINSTALLING: 1505 case Z_MOUNT: 1506 case Z_UNMOUNT: 1507 if (kernelcall) /* Invalid; can't happen */ 1508 abort(); 1509 zerror(zlogp, B_FALSE, "%s operation is invalid " 1510 "for zones in state '%s'", z_cmd_name(cmd), 1511 zone_state_str(zstate)); 1512 rval = -1; 1513 break; 1514 } 1515 break; 1516 1517 case ZONE_STATE_MOUNTED: 1518 switch (cmd) { 1519 case Z_UNMOUNT: 1520 if (kernelcall) /* Invalid; can't happen */ 1521 abort(); 1522 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate); 1523 if (rval == 0) { 1524 eventstream_write(Z_EVT_ZONE_HALTED); 1525 (void) sema_post(&scratch_sem); 1526 } 1527 break; 1528 default: 1529 if (kernelcall) /* Invalid; can't happen */ 1530 abort(); 1531 zerror(zlogp, B_FALSE, "%s operation is invalid " 1532 "for zones in state '%s'", z_cmd_name(cmd), 1533 zone_state_str(zstate)); 1534 rval = -1; 1535 break; 1536 } 1537 break; 1538 1539 case ZONE_STATE_RUNNING: 1540 case ZONE_STATE_SHUTTING_DOWN: 1541 case ZONE_STATE_DOWN: 1542 switch (cmd) { 1543 case Z_READY: 1544 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1545 != 0) 1546 break; 1547 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0) 1548 eventstream_write(Z_EVT_ZONE_READIED); 1549 else 1550 eventstream_write(Z_EVT_ZONE_HALTED); 1551 break; 1552 case Z_BOOT: 1553 /* 1554 * We could have two clients racing to boot this 1555 * zone; the second client loses, but its request 1556 * doesn't fail, since the zone is now in the desired 1557 * state. 1558 */ 1559 zerror(zlogp, B_FALSE, "zone is already booted"); 1560 rval = 0; 1561 break; 1562 case Z_HALT: 1563 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1564 != 0) 1565 break; 1566 eventstream_write(Z_EVT_ZONE_HALTED); 1567 break; 1568 case Z_REBOOT: 1569 (void) strlcpy(boot_args, zargp->bootbuf, 1570 sizeof (boot_args)); 1571 eventstream_write(Z_EVT_ZONE_REBOOTING); 1572 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1573 != 0) { 1574 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1575 boot_args[0] = '\0'; 1576 break; 1577 } 1578 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1579 != 0) { 1580 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1581 boot_args[0] = '\0'; 1582 break; 1583 } 1584 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1585 audit_put_record(zlogp, uc, rval, "reboot"); 1586 if (rval != 0) { 1587 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1588 zstate); 1589 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1590 } 1591 boot_args[0] = '\0'; 1592 break; 1593 case Z_SHUTDOWN: 1594 if ((rval = zone_graceful_shutdown(zlogp)) == 0) { 1595 wait_shut = B_TRUE; 1596 } 1597 break; 1598 case Z_NOTE_UNINSTALLING: 1599 case Z_MOUNT: 1600 case Z_UNMOUNT: 1601 zerror(zlogp, B_FALSE, "%s operation is invalid " 1602 "for zones in state '%s'", z_cmd_name(cmd), 1603 zone_state_str(zstate)); 1604 rval = -1; 1605 break; 1606 } 1607 break; 1608 default: 1609 abort(); 1610 } 1611 1612 /* 1613 * Because the state of the zone may have changed, we make sure 1614 * to wake the console poller, which is in charge of initiating 1615 * the shutdown procedure as necessary. 1616 */ 1617 eventstream_write(Z_EVT_NULL); 1618 1619 out: 1620 (void) mutex_unlock(&lock); 1621 1622 /* Wait for the Z_SHUTDOWN commands to complete */ 1623 if (wait_shut) 1624 rval = zone_wait_shutdown(zlogp); 1625 1626 if (kernelcall) { 1627 rvalp = NULL; 1628 rlen = 0; 1629 } else { 1630 rvalp->rval = rval; 1631 } 1632 if (uc != NULL) 1633 ucred_free(uc); 1634 (void) door_return((char *)rvalp, rlen, NULL, 0); 1635 thr_exit(NULL); 1636 } 1637 1638 static int 1639 setup_door(zlog_t *zlogp) 1640 { 1641 if ((zone_door = door_create(server, NULL, 1642 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1643 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1644 return (-1); 1645 } 1646 (void) fdetach(zone_door_path); 1647 1648 if (fattach(zone_door, zone_door_path) != 0) { 1649 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1650 (void) door_revoke(zone_door); 1651 (void) fdetach(zone_door_path); 1652 zone_door = -1; 1653 return (-1); 1654 } 1655 return (0); 1656 } 1657 1658 /* 1659 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1660 * is where zoneadmd itself will check to see that another instance of 1661 * zoneadmd isn't already controlling this zone. 1662 * 1663 * The idea here is that we want to open the path to which we will 1664 * attach our door, lock it, and then make sure that no-one has beat us 1665 * to fattach(3c)ing onto it. 1666 * 1667 * fattach(3c) is really a mount, so there are actually two possible 1668 * vnodes we could be dealing with. Our strategy is as follows: 1669 * 1670 * - If the file we opened is a regular file (common case): 1671 * There is no fattach(3c)ed door, so we have a chance of becoming 1672 * the managing zoneadmd. We attempt to lock the file: if it is 1673 * already locked, that means someone else raced us here, so we 1674 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1675 * that beat us to it. 1676 * 1677 * - If the file we opened is a namefs file: 1678 * This means there is already an established door fattach(3c)'ed 1679 * to the rendezvous path. We've lost the race, so we give up. 1680 * Note that in this case we also try to grab the file lock, and 1681 * will succeed in acquiring it since the vnode locked by the 1682 * "winning" zoneadmd was a regular one, and the one we locked was 1683 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1684 * we just return to zoneadm(1m) which knows to retry. 1685 */ 1686 static int 1687 make_daemon_exclusive(zlog_t *zlogp) 1688 { 1689 int doorfd = -1; 1690 int err, ret = -1; 1691 struct stat st; 1692 struct flock flock; 1693 zone_state_t zstate; 1694 1695 top: 1696 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1697 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1698 zonecfg_strerror(err)); 1699 goto out; 1700 } 1701 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1702 S_IREAD|S_IWRITE)) < 0) { 1703 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1704 goto out; 1705 } 1706 if (fstat(doorfd, &st) < 0) { 1707 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1708 goto out; 1709 } 1710 /* 1711 * Lock the file to synchronize with other zoneadmd 1712 */ 1713 flock.l_type = F_WRLCK; 1714 flock.l_whence = SEEK_SET; 1715 flock.l_start = (off_t)0; 1716 flock.l_len = (off_t)0; 1717 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1718 /* 1719 * Someone else raced us here and grabbed the lock file 1720 * first. A warning here is inappropriate since nothing 1721 * went wrong. 1722 */ 1723 goto out; 1724 } 1725 1726 if (strcmp(st.st_fstype, "namefs") == 0) { 1727 struct door_info info; 1728 1729 /* 1730 * There is already something fattach()'ed to this file. 1731 * Lets see what the door is up to. 1732 */ 1733 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1734 /* 1735 * Another zoneadmd process seems to be in 1736 * control of the situation and we don't need to 1737 * be here. A warning here is inappropriate 1738 * since nothing went wrong. 1739 * 1740 * If the door has been revoked, the zoneadmd 1741 * process currently managing the zone is going 1742 * away. We'll return control to zoneadm(1m) 1743 * which will try again (by which time zoneadmd 1744 * will hopefully have exited). 1745 */ 1746 goto out; 1747 } 1748 1749 /* 1750 * If we got this far, there's a fattach(3c)'ed door 1751 * that belongs to a process that has exited, which can 1752 * happen if the previous zoneadmd died unexpectedly. 1753 * 1754 * Let user know that something is amiss, but that we can 1755 * recover; if the zone is in the installed state, then don't 1756 * message, since having a running zoneadmd isn't really 1757 * expected/needed. We want to keep occurences of this message 1758 * limited to times when zoneadmd is picking back up from a 1759 * zoneadmd that died while the zone was in some non-trivial 1760 * state. 1761 */ 1762 if (zstate > ZONE_STATE_INSTALLED) { 1763 zerror(zlogp, B_FALSE, 1764 "zone '%s': WARNING: zone is in state '%s', but " 1765 "zoneadmd does not appear to be available; " 1766 "restarted zoneadmd to recover.", 1767 zone_name, zone_state_str(zstate)); 1768 } 1769 1770 (void) fdetach(zone_door_path); 1771 (void) close(doorfd); 1772 goto top; 1773 } 1774 ret = 0; 1775 out: 1776 (void) close(doorfd); 1777 return (ret); 1778 } 1779 1780 /* 1781 * Setup the brand's pre and post state change callbacks, as well as the 1782 * query callback, if any of these exist. 1783 */ 1784 static int 1785 brand_callback_init(brand_handle_t bh, char *zone_name) 1786 { 1787 char zpath[MAXPATHLEN]; 1788 1789 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) 1790 return (-1); 1791 1792 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, 1793 sizeof (pre_statechg_hook)); 1794 1795 if (brand_get_prestatechange(bh, zone_name, zpath, 1796 pre_statechg_hook + EXEC_LEN, 1797 sizeof (pre_statechg_hook) - EXEC_LEN) != 0) 1798 return (-1); 1799 1800 if (strlen(pre_statechg_hook) <= EXEC_LEN) 1801 pre_statechg_hook[0] = '\0'; 1802 1803 (void) strlcpy(post_statechg_hook, EXEC_PREFIX, 1804 sizeof (post_statechg_hook)); 1805 1806 if (brand_get_poststatechange(bh, zone_name, zpath, 1807 post_statechg_hook + EXEC_LEN, 1808 sizeof (post_statechg_hook) - EXEC_LEN) != 0) 1809 return (-1); 1810 1811 if (strlen(post_statechg_hook) <= EXEC_LEN) 1812 post_statechg_hook[0] = '\0'; 1813 1814 (void) strlcpy(query_hook, EXEC_PREFIX, 1815 sizeof (query_hook)); 1816 1817 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, 1818 sizeof (query_hook) - EXEC_LEN) != 0) 1819 return (-1); 1820 1821 if (strlen(query_hook) <= EXEC_LEN) 1822 query_hook[0] = '\0'; 1823 1824 return (0); 1825 } 1826 1827 int 1828 main(int argc, char *argv[]) 1829 { 1830 int opt; 1831 zoneid_t zid; 1832 priv_set_t *privset; 1833 zone_state_t zstate; 1834 char parents_locale[MAXPATHLEN]; 1835 brand_handle_t bh; 1836 int err; 1837 1838 pid_t pid; 1839 sigset_t blockset; 1840 sigset_t block_cld; 1841 1842 struct { 1843 sema_t sem; 1844 int status; 1845 zlog_t log; 1846 } *shstate; 1847 size_t shstatelen = getpagesize(); 1848 1849 zlog_t errlog; 1850 zlog_t *zlogp; 1851 1852 int ctfd; 1853 1854 progname = get_execbasename(argv[0]); 1855 1856 /* 1857 * Make sure stderr is unbuffered 1858 */ 1859 (void) setbuffer(stderr, NULL, 0); 1860 1861 /* 1862 * Get out of the way of mounted filesystems, since we will daemonize 1863 * soon. 1864 */ 1865 (void) chdir("/"); 1866 1867 /* 1868 * Use the default system umask per PSARC 1998/110 rather than 1869 * anything that may have been set by the caller. 1870 */ 1871 (void) umask(CMASK); 1872 1873 /* 1874 * Initially we want to use our parent's locale. 1875 */ 1876 (void) setlocale(LC_ALL, ""); 1877 (void) textdomain(TEXT_DOMAIN); 1878 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1879 sizeof (parents_locale)); 1880 1881 /* 1882 * This zlog_t is used for writing to stderr 1883 */ 1884 errlog.logfile = stderr; 1885 errlog.buflen = errlog.loglen = 0; 1886 errlog.buf = errlog.log = NULL; 1887 errlog.locale = parents_locale; 1888 1889 /* 1890 * We start off writing to stderr until we're ready to daemonize. 1891 */ 1892 zlogp = &errlog; 1893 1894 /* 1895 * Process options. 1896 */ 1897 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1898 switch (opt) { 1899 case 'R': 1900 zonecfg_set_root(optarg); 1901 break; 1902 case 'z': 1903 zone_name = optarg; 1904 break; 1905 default: 1906 usage(); 1907 } 1908 } 1909 1910 if (zone_name == NULL) 1911 usage(); 1912 1913 /* 1914 * Because usage() prints directly to stderr, it has gettext() 1915 * wrapping, which depends on the locale. But since zerror() calls 1916 * localize() which tweaks the locale, it is not safe to call zerror() 1917 * until after the last call to usage(). Fortunately, the last call 1918 * to usage() is just above and the first call to zerror() is just 1919 * below. Don't mess this up. 1920 */ 1921 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1922 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1923 GLOBAL_ZONENAME); 1924 return (1); 1925 } 1926 1927 if (zone_get_id(zone_name, &zid) != 0) { 1928 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1929 zonecfg_strerror(Z_NO_ZONE)); 1930 return (1); 1931 } 1932 1933 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1934 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1935 zonecfg_strerror(err)); 1936 return (1); 1937 } 1938 if (zstate < ZONE_STATE_INCOMPLETE) { 1939 zerror(zlogp, B_FALSE, 1940 "cannot manage a zone which is in state '%s'", 1941 zone_state_str(zstate)); 1942 return (1); 1943 } 1944 1945 if (zonecfg_default_brand(default_brand, 1946 sizeof (default_brand)) != Z_OK) { 1947 zerror(zlogp, B_FALSE, "unable to determine default brand"); 1948 return (1); 1949 } 1950 1951 /* Get a handle to the brand info for this zone */ 1952 if (zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 1953 != Z_OK) { 1954 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1955 return (1); 1956 } 1957 zone_isnative = (strcmp(brand_name, NATIVE_BRAND_NAME) == 0); 1958 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0); 1959 1960 /* 1961 * In the alternate root environment, the only supported 1962 * operations are mount and unmount. In this case, just treat 1963 * the zone as native if it is cluster. Cluster zones can be 1964 * native for the purpose of LU or upgrade, and the cluster 1965 * brand may not exist in the miniroot (such as in net install 1966 * upgrade). 1967 */ 1968 if (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0) { 1969 zone_iscluster = B_TRUE; 1970 if (zonecfg_in_alt_root()) { 1971 (void) strlcpy(brand_name, default_brand, 1972 sizeof (brand_name)); 1973 } 1974 } else { 1975 zone_iscluster = B_FALSE; 1976 } 1977 1978 if ((bh = brand_open(brand_name)) == NULL) { 1979 zerror(zlogp, B_FALSE, "unable to open zone brand"); 1980 return (1); 1981 } 1982 1983 /* Get state change brand hooks. */ 1984 if (brand_callback_init(bh, zone_name) == -1) { 1985 zerror(zlogp, B_TRUE, 1986 "failed to initialize brand state change hooks"); 1987 brand_close(bh); 1988 return (1); 1989 } 1990 1991 brand_close(bh); 1992 1993 /* 1994 * Check that we have all privileges. It would be nice to pare 1995 * this down, but this is at least a first cut. 1996 */ 1997 if ((privset = priv_allocset()) == NULL) { 1998 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1999 return (1); 2000 } 2001 2002 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 2003 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 2004 priv_freeset(privset); 2005 return (1); 2006 } 2007 2008 if (priv_isfullset(privset) == B_FALSE) { 2009 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 2010 "run this command (all privs required)"); 2011 priv_freeset(privset); 2012 return (1); 2013 } 2014 priv_freeset(privset); 2015 2016 if (mkzonedir(zlogp) != 0) 2017 return (1); 2018 2019 /* 2020 * Pre-fork: setup shared state 2021 */ 2022 if ((shstate = (void *)mmap(NULL, shstatelen, 2023 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 2024 MAP_FAILED) { 2025 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 2026 return (1); 2027 } 2028 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 2029 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 2030 (void) munmap((char *)shstate, shstatelen); 2031 return (1); 2032 } 2033 shstate->log.logfile = NULL; 2034 shstate->log.buflen = shstatelen - sizeof (*shstate); 2035 shstate->log.loglen = shstate->log.buflen; 2036 shstate->log.buf = (char *)shstate + sizeof (*shstate); 2037 shstate->log.log = shstate->log.buf; 2038 shstate->log.locale = parents_locale; 2039 shstate->status = -1; 2040 2041 /* 2042 * We need a SIGCHLD handler so the sema_wait() below will wake 2043 * up if the child dies without doing a sema_post(). 2044 */ 2045 (void) sigset(SIGCHLD, sigchld); 2046 /* 2047 * We must mask SIGCHLD until after we've coped with the fork 2048 * sufficiently to deal with it; otherwise we can race and 2049 * receive the signal before pid has been initialized 2050 * (yes, this really happens). 2051 */ 2052 (void) sigemptyset(&block_cld); 2053 (void) sigaddset(&block_cld, SIGCHLD); 2054 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 2055 2056 /* 2057 * The parent only needs stderr after the fork, so close other fd's 2058 * that we inherited from zoneadm so that the parent doesn't have those 2059 * open while waiting. The child will close the rest after the fork. 2060 */ 2061 closefrom(3); 2062 2063 if ((ctfd = init_template()) == -1) { 2064 zerror(zlogp, B_TRUE, "failed to create contract"); 2065 return (1); 2066 } 2067 2068 /* 2069 * Do not let another thread localize a message while we are forking. 2070 */ 2071 (void) mutex_lock(&msglock); 2072 pid = fork(); 2073 (void) mutex_unlock(&msglock); 2074 2075 /* 2076 * In all cases (parent, child, and in the event of an error) we 2077 * don't want to cause creation of contracts on subsequent fork()s. 2078 */ 2079 (void) ct_tmpl_clear(ctfd); 2080 (void) close(ctfd); 2081 2082 if (pid == -1) { 2083 zerror(zlogp, B_TRUE, "could not fork"); 2084 return (1); 2085 2086 } else if (pid > 0) { /* parent */ 2087 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 2088 /* 2089 * This marks a window of vulnerability in which we receive 2090 * the SIGCLD before falling into sema_wait (normally we would 2091 * get woken up from sema_wait with EINTR upon receipt of 2092 * SIGCLD). So we may need to use some other scheme like 2093 * sema_posting in the sigcld handler. 2094 * blech 2095 */ 2096 (void) sema_wait(&shstate->sem); 2097 (void) sema_destroy(&shstate->sem); 2098 if (shstate->status != 0) 2099 (void) waitpid(pid, NULL, WNOHANG); 2100 /* 2101 * It's ok if we die with SIGPIPE. It's not like we could have 2102 * done anything about it. 2103 */ 2104 (void) fprintf(stderr, "%s", shstate->log.buf); 2105 _exit(shstate->status == 0 ? 0 : 1); 2106 } 2107 2108 /* 2109 * The child charges on. 2110 */ 2111 (void) sigset(SIGCHLD, SIG_DFL); 2112 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 2113 2114 /* 2115 * SIGPIPE can be delivered if we write to a socket for which the 2116 * peer endpoint is gone. That can lead to too-early termination 2117 * of zoneadmd, and that's not good eats. 2118 */ 2119 (void) sigset(SIGPIPE, SIG_IGN); 2120 /* 2121 * Stop using stderr 2122 */ 2123 zlogp = &shstate->log; 2124 2125 /* 2126 * We don't need stdout/stderr from now on. 2127 */ 2128 closefrom(0); 2129 2130 /* 2131 * Initialize the syslog zlog_t. This needs to be done after 2132 * the call to closefrom(). 2133 */ 2134 logsys.buf = logsys.log = NULL; 2135 logsys.buflen = logsys.loglen = 0; 2136 logsys.logfile = NULL; 2137 logsys.locale = DEFAULT_LOCALE; 2138 2139 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 2140 2141 /* 2142 * The eventstream is used to publish state changes in the zone 2143 * from the door threads to the console I/O poller. 2144 */ 2145 if (eventstream_init() == -1) { 2146 zerror(zlogp, B_TRUE, "unable to create eventstream"); 2147 goto child_out; 2148 } 2149 2150 (void) snprintf(zone_door_path, sizeof (zone_door_path), 2151 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 2152 2153 /* 2154 * See if another zoneadmd is running for this zone. If not, then we 2155 * can now modify system state. 2156 */ 2157 if (make_daemon_exclusive(zlogp) == -1) 2158 goto child_out; 2159 2160 2161 /* 2162 * Create/join a new session; we need to be careful of what we do with 2163 * the console from now on so we don't end up being the session leader 2164 * for the terminal we're going to be handing out. 2165 */ 2166 (void) setsid(); 2167 2168 /* 2169 * This thread shouldn't be receiving any signals; in particular, 2170 * SIGCHLD should be received by the thread doing the fork(). 2171 */ 2172 (void) sigfillset(&blockset); 2173 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 2174 2175 /* 2176 * Setup the console device and get ready to serve the console; 2177 * once this has completed, we're ready to let console clients 2178 * make an attempt to connect (they will block until 2179 * serve_console_sock() below gets called, and any pending 2180 * connection is accept()ed). 2181 */ 2182 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0) 2183 goto child_out; 2184 2185 /* 2186 * Take the lock now, so that when the door server gets going, we 2187 * are guaranteed that it won't take a request until we are sure 2188 * that everything is completely set up. See the child_out: label 2189 * below to see why this matters. 2190 */ 2191 (void) mutex_lock(&lock); 2192 2193 /* Init semaphore for scratch zones. */ 2194 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 2195 zerror(zlogp, B_TRUE, 2196 "failed to initialize semaphore for scratch zone"); 2197 goto child_out; 2198 } 2199 2200 /* open the dladm handle */ 2201 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) { 2202 zerror(zlogp, B_FALSE, "failed to open dladm handle"); 2203 goto child_out; 2204 } 2205 2206 /* 2207 * Note: door setup must occur *after* the console is setup. 2208 * This is so that as zlogin tests the door to see if zoneadmd 2209 * is ready yet, we know that the console will get serviced 2210 * once door_info() indicates that the door is "up". 2211 */ 2212 if (setup_door(zlogp) == -1) 2213 goto child_out; 2214 2215 /* 2216 * Things seem OK so far; tell the parent process that we're done 2217 * with setup tasks. This will cause the parent to exit, signalling 2218 * to zoneadm, zlogin, or whatever forked it that we are ready to 2219 * service requests. 2220 */ 2221 shstate->status = 0; 2222 (void) sema_post(&shstate->sem); 2223 (void) munmap((char *)shstate, shstatelen); 2224 shstate = NULL; 2225 2226 (void) mutex_unlock(&lock); 2227 2228 /* 2229 * zlogp is now invalid, so reset it to the syslog logger. 2230 */ 2231 zlogp = &logsys; 2232 2233 /* 2234 * Now that we are free of any parents, switch to the default locale. 2235 */ 2236 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 2237 2238 /* 2239 * At this point the setup portion of main() is basically done, so 2240 * we reuse this thread to manage the zone console. When 2241 * serve_console() has returned, we are past the point of no return 2242 * in the life of this zoneadmd. 2243 */ 2244 if (zonecfg_in_alt_root()) { 2245 /* 2246 * This is just awful, but mounted scratch zones don't (and 2247 * can't) have consoles. We just wait for unmount instead. 2248 */ 2249 while (sema_wait(&scratch_sem) == EINTR) 2250 ; 2251 } else { 2252 serve_console(zlogp); 2253 assert(in_death_throes); 2254 } 2255 2256 /* 2257 * This is the next-to-last part of the exit interlock. Upon calling 2258 * fdetach(), the door will go unreferenced; once any 2259 * outstanding requests (like the door thread doing Z_HALT) are 2260 * done, the door will get an UNREF notification; when it handles 2261 * the UNREF, the door server will cause the exit. It's possible 2262 * that fdetach() can fail because the file is in use, in which 2263 * case we'll retry the operation. 2264 */ 2265 assert(!MUTEX_HELD(&lock)); 2266 for (;;) { 2267 if ((fdetach(zone_door_path) == 0) || (errno != EBUSY)) 2268 break; 2269 yield(); 2270 } 2271 2272 for (;;) 2273 (void) pause(); 2274 2275 child_out: 2276 assert(pid == 0); 2277 2278 shstate->status = -1; 2279 (void) sema_post(&shstate->sem); 2280 (void) munmap((char *)shstate, shstatelen); 2281 2282 /* 2283 * This might trigger an unref notification, but if so, 2284 * we are still holding the lock, so our call to exit will 2285 * ultimately win the race and will publish the right exit 2286 * code. 2287 */ 2288 if (zone_door != -1) { 2289 assert(MUTEX_HELD(&lock)); 2290 (void) door_revoke(zone_door); 2291 (void) fdetach(zone_door_path); 2292 } 2293 2294 if (dld_handle != NULL) 2295 dladm_close(dld_handle); 2296 2297 return (1); /* return from main() forcibly exits an MT process */ 2298 } 2299