1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * zoneadmd manages zones; one zoneadmd process is launched for each 31 * non-global zone on the system. This daemon juggles four jobs: 32 * 33 * - Implement setup and teardown of the zone "virtual platform": mount and 34 * unmount filesystems; create and destroy network interfaces; communicate 35 * with devfsadmd to lay out devices for the zone; instantiate the zone 36 * console device; configure process runtime attributes such as resource 37 * controls, pool bindings, fine-grained privileges. 38 * 39 * - Launch the zone's init(1M) process. 40 * 41 * - Implement a door server; clients (like zoneadm) connect to the door 42 * server and request zone state changes. The kernel is also a client of 43 * this door server. A request to halt or reboot the zone which originates 44 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 45 * 46 * One minor problem is that messages emitted by zoneadmd need to be passed 47 * back to the zoneadm process making the request. These messages need to 48 * be rendered in the client's locale; so, this is passed in as part of the 49 * request. The exception is the kernel upcall to zoneadmd, in which case 50 * messages are syslog'd. 51 * 52 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 53 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 54 * strings which do not need to be translated. 55 * 56 * - Act as a console server for zlogin -C processes; see comments in zcons.c 57 * for more information about the zone console architecture. 58 * 59 * DESIGN NOTES 60 * 61 * Restart: 62 * A chief design constraint of zoneadmd is that it should be restartable in 63 * the case that the administrator kills it off, or it suffers a fatal error, 64 * without the running zone being impacted; this is akin to being able to 65 * reboot the service processor of a server without affecting the OS instance. 66 */ 67 68 #include <sys/param.h> 69 #include <sys/mman.h> 70 #include <sys/types.h> 71 #include <sys/stat.h> 72 #include <sys/sysmacros.h> 73 74 #include <bsm/adt.h> 75 #include <bsm/adt_event.h> 76 77 #include <alloca.h> 78 #include <assert.h> 79 #include <errno.h> 80 #include <door.h> 81 #include <fcntl.h> 82 #include <locale.h> 83 #include <signal.h> 84 #include <stdarg.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <strings.h> 89 #include <synch.h> 90 #include <syslog.h> 91 #include <thread.h> 92 #include <unistd.h> 93 #include <wait.h> 94 #include <limits.h> 95 #include <zone.h> 96 #include <libbrand.h> 97 #include <libcontract.h> 98 #include <libcontract_priv.h> 99 #include <sys/contract/process.h> 100 #include <sys/ctfs.h> 101 102 #include <libzonecfg.h> 103 #include "zoneadmd.h" 104 105 static char *progname; 106 char *zone_name; /* zone which we are managing */ 107 char brand_name[MAXNAMELEN]; 108 boolean_t zone_isnative; 109 static zoneid_t zone_id; 110 111 zlog_t logsys; 112 113 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 114 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 115 116 static sema_t scratch_sem; /* for scratch zones */ 117 118 static char zone_door_path[MAXPATHLEN]; 119 static int zone_door = -1; 120 121 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 122 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 123 124 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 125 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 126 #endif 127 128 #define DEFAULT_LOCALE "C" 129 130 static const char * 131 z_cmd_name(zone_cmd_t zcmd) 132 { 133 /* This list needs to match the enum in sys/zone.h */ 134 static const char *zcmdstr[] = { 135 "ready", "boot", "forceboot", "reboot", "halt", 136 "note_uninstalling", "mount", "forcemount", "unmount" 137 }; 138 139 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 140 return ("unknown"); 141 else 142 return (zcmdstr[(int)zcmd]); 143 } 144 145 static char * 146 get_execbasename(char *execfullname) 147 { 148 char *last_slash, *execbasename; 149 150 /* guard against '/' at end of command invocation */ 151 for (;;) { 152 last_slash = strrchr(execfullname, '/'); 153 if (last_slash == NULL) { 154 execbasename = execfullname; 155 break; 156 } else { 157 execbasename = last_slash + 1; 158 if (*execbasename == '\0') { 159 *last_slash = '\0'; 160 continue; 161 } 162 break; 163 } 164 } 165 return (execbasename); 166 } 167 168 static void 169 usage(void) 170 { 171 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 172 (void) fprintf(stderr, 173 gettext("\tNote: %s should not be run directly.\n"), progname); 174 exit(2); 175 } 176 177 /* ARGSUSED */ 178 static void 179 sigchld(int sig) 180 { 181 } 182 183 char * 184 localize_msg(char *locale, const char *msg) 185 { 186 char *out; 187 188 (void) mutex_lock(&msglock); 189 (void) setlocale(LC_MESSAGES, locale); 190 out = gettext(msg); 191 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 192 (void) mutex_unlock(&msglock); 193 return (out); 194 } 195 196 /* PRINTFLIKE3 */ 197 void 198 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 199 { 200 va_list alist; 201 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 202 char *bp; 203 int saved_errno = errno; 204 205 if (zlogp == NULL) 206 return; 207 if (zlogp == &logsys) 208 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 209 zone_name); 210 else 211 buf[0] = '\0'; 212 bp = &(buf[strlen(buf)]); 213 214 /* 215 * In theory, the locale pointer should be set to either "C" or a 216 * char array, so it should never be NULL 217 */ 218 assert(zlogp->locale != NULL); 219 /* Locale is per process, but we are multi-threaded... */ 220 fmt = localize_msg(zlogp->locale, fmt); 221 222 va_start(alist, fmt); 223 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 224 va_end(alist); 225 bp = &(buf[strlen(buf)]); 226 if (use_strerror) 227 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 228 strerror(saved_errno)); 229 if (zlogp == &logsys) { 230 (void) syslog(LOG_ERR, "%s", buf); 231 } else if (zlogp->logfile != NULL) { 232 (void) fprintf(zlogp->logfile, "%s\n", buf); 233 } else { 234 size_t buflen; 235 size_t copylen; 236 237 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 238 copylen = MIN(buflen, zlogp->loglen); 239 zlogp->log += copylen; 240 zlogp->loglen -= copylen; 241 } 242 } 243 244 /* 245 * Emit a warning for any boot arguments which are unrecognized. Since 246 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 247 * put the arguments into an argv style array, use getopt to process them, 248 * and put the resultant argument string back into outargs. 249 * 250 * During the filtering, we pull out any arguments which are truly "boot" 251 * arguments, leaving only those which are to be passed intact to the 252 * progenitor process. The one we support at the moment is -i, which 253 * indicates to the kernel which program should be launched as 'init'. 254 * 255 * A return of Z_INVAL indicates specifically that the arguments are 256 * not valid; this is a non-fatal error. Except for Z_OK, all other return 257 * values are treated as fatal. 258 */ 259 static int 260 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 261 char *init_file, char *badarg) 262 { 263 int argc = 0, argc_save; 264 int i; 265 int err; 266 char *arg, *lasts, **argv = NULL, **argv_save; 267 char zonecfg_args[BOOTARGS_MAX]; 268 char scratchargs[BOOTARGS_MAX], *sargs; 269 char c; 270 271 bzero(outargs, BOOTARGS_MAX); 272 bzero(badarg, BOOTARGS_MAX); 273 274 /* 275 * If the user didn't specify transient boot arguments, check 276 * to see if there were any specified in the zone configuration, 277 * and use them if applicable. 278 */ 279 if (inargs == NULL || inargs[0] == '\0') { 280 zone_dochandle_t handle; 281 if ((handle = zonecfg_init_handle()) == NULL) { 282 zerror(zlogp, B_TRUE, 283 "getting zone configuration handle"); 284 return (Z_BAD_HANDLE); 285 } 286 err = zonecfg_get_snapshot_handle(zone_name, handle); 287 if (err != Z_OK) { 288 zerror(zlogp, B_FALSE, 289 "invalid configuration snapshot"); 290 zonecfg_fini_handle(handle); 291 return (Z_BAD_HANDLE); 292 } 293 294 bzero(zonecfg_args, sizeof (zonecfg_args)); 295 (void) zonecfg_get_bootargs(handle, zonecfg_args, 296 sizeof (zonecfg_args)); 297 inargs = zonecfg_args; 298 zonecfg_fini_handle(handle); 299 } 300 301 if (strlen(inargs) >= BOOTARGS_MAX) { 302 zerror(zlogp, B_FALSE, "boot argument string too long"); 303 return (Z_INVAL); 304 } 305 306 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 307 sargs = scratchargs; 308 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 309 sargs = NULL; 310 argc++; 311 } 312 313 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 314 zerror(zlogp, B_FALSE, "memory allocation failed"); 315 return (Z_NOMEM); 316 } 317 318 argv_save = argv; 319 argc_save = argc; 320 321 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 322 sargs = scratchargs; 323 i = 0; 324 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 325 sargs = NULL; 326 if ((argv[i] = strdup(arg)) == NULL) { 327 err = Z_NOMEM; 328 zerror(zlogp, B_FALSE, "memory allocation failed"); 329 goto done; 330 } 331 i++; 332 } 333 334 /* 335 * We preserve compatibility with the Solaris system boot behavior, 336 * which allows: 337 * 338 * # reboot kernel/unix -s -m verbose 339 * 340 * In this example, kernel/unix tells the booter what file to 341 * boot. We don't want reboot in a zone to be gratuitously different, 342 * so we silently ignore the boot file, if necessary. 343 */ 344 if (argv[0] == NULL) 345 goto done; 346 347 assert(argv[0][0] != ' '); 348 assert(argv[0][0] != '\t'); 349 350 if (argv[0][0] != '-' && argv[0][0] != '\0') { 351 argv = &argv[1]; 352 argc--; 353 } 354 355 optind = 0; 356 opterr = 0; 357 err = Z_OK; 358 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 359 switch (c) { 360 case 'i': 361 /* 362 * -i is handled by the runtime and is not passed 363 * along to userland 364 */ 365 (void) strlcpy(init_file, optarg, MAXPATHLEN); 366 break; 367 case 'f': 368 /* This has already been processed by zoneadm */ 369 break; 370 case 'm': 371 case 's': 372 /* These pass through unmolested */ 373 (void) snprintf(outargs, BOOTARGS_MAX, 374 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 375 break; 376 case '?': 377 /* 378 * We warn about unknown arguments but pass them 379 * along anyway-- if someone wants to develop their 380 * own init replacement, they can pass it whatever 381 * args they want. 382 */ 383 err = Z_INVAL; 384 (void) snprintf(outargs, BOOTARGS_MAX, 385 "%s -%c", outargs, optopt); 386 (void) snprintf(badarg, BOOTARGS_MAX, 387 "%s -%c", badarg, optopt); 388 break; 389 } 390 } 391 392 /* 393 * For Solaris Zones we warn about and discard non-option arguments. 394 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 395 * to the kernel, we concat up all the other remaining boot args. 396 * and warn on them as a group. 397 */ 398 if (optind < argc) { 399 err = Z_INVAL; 400 while (optind < argc) { 401 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 402 badarg, strlen(badarg) > 0 ? " " : "", 403 argv[optind]); 404 optind++; 405 } 406 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 407 "arguments `%s'.", badarg); 408 } 409 410 done: 411 for (i = 0; i < argc_save; i++) { 412 if (argv_save[i] != NULL) 413 free(argv_save[i]); 414 } 415 free(argv_save); 416 return (err); 417 } 418 419 420 static int 421 mkzonedir(zlog_t *zlogp) 422 { 423 struct stat st; 424 /* 425 * We must create and lock everyone but root out of ZONES_TMPDIR 426 * since anyone can open any UNIX domain socket, regardless of 427 * its file system permissions. Sigh... 428 */ 429 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 430 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 431 return (-1); 432 } 433 /* paranoia */ 434 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 435 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 436 return (-1); 437 } 438 (void) chmod(ZONES_TMPDIR, S_IRWXU); 439 return (0); 440 } 441 442 /* 443 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 444 * 'true' if this is being invoked as part of the processing for the "mount" 445 * subcommand. 446 */ 447 static int 448 zone_ready(zlog_t *zlogp, boolean_t mount_cmd) 449 { 450 int err; 451 452 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 453 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 454 zonecfg_strerror(err)); 455 return (-1); 456 } 457 458 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 459 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 460 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 461 zonecfg_strerror(err)); 462 return (-1); 463 } 464 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 465 bringup_failure_recovery = B_TRUE; 466 (void) vplat_teardown(NULL, mount_cmd, B_FALSE); 467 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 468 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 469 zonecfg_strerror(err)); 470 return (-1); 471 } 472 473 return (0); 474 } 475 476 int 477 init_template(void) 478 { 479 int fd; 480 int err = 0; 481 482 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 483 if (fd == -1) 484 return (-1); 485 486 /* 487 * For now, zoneadmd doesn't do anything with the contract. 488 * Deliver no events, don't inherit, and allow it to be orphaned. 489 */ 490 err |= ct_tmpl_set_critical(fd, 0); 491 err |= ct_tmpl_set_informative(fd, 0); 492 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 493 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 494 if (err || ct_tmpl_activate(fd)) { 495 (void) close(fd); 496 return (-1); 497 } 498 499 return (fd); 500 } 501 502 typedef struct fs_callback { 503 zlog_t *zlogp; 504 zoneid_t zoneid; 505 } fs_callback_t; 506 507 static int 508 mount_early_fs(void *data, const char *spec, const char *dir, 509 const char *fstype, const char *opt) 510 { 511 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 512 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 513 pid_t child; 514 int child_status; 515 int tmpl_fd; 516 ctid_t ct; 517 518 if ((tmpl_fd = init_template()) == -1) { 519 zerror(zlogp, B_TRUE, "failed to create contract"); 520 return (-1); 521 } 522 523 if ((child = fork()) == -1) { 524 (void) ct_tmpl_clear(tmpl_fd); 525 (void) close(tmpl_fd); 526 zerror(zlogp, B_TRUE, "failed to fork"); 527 return (-1); 528 529 } else if (child == 0) { /* child */ 530 char opt_buf[MAX_MNTOPT_STR]; 531 int optlen = 0; 532 int mflag = MS_DATA; 533 534 (void) ct_tmpl_clear(tmpl_fd); 535 /* 536 * Even though there are no procs running in the zone, we 537 * do this for paranoia's sake. 538 */ 539 (void) closefrom(0); 540 541 if (zone_enter(zoneid) == -1) { 542 _exit(errno); 543 } 544 if (opt != NULL) { 545 /* 546 * The mount() system call is incredibly annoying. 547 * If options are specified, we need to copy them 548 * into a temporary buffer since the mount() system 549 * call will overwrite the options string. It will 550 * also fail if the new option string it wants to 551 * write is bigger than the one we passed in, so 552 * you must pass in a buffer of the maximum possible 553 * option string length. sigh. 554 */ 555 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 556 opt = opt_buf; 557 optlen = MAX_MNTOPT_STR; 558 mflag = MS_OPTIONSTR; 559 } 560 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 561 _exit(errno); 562 _exit(0); 563 } 564 565 /* parent */ 566 if (contract_latest(&ct) == -1) 567 ct = -1; 568 (void) ct_tmpl_clear(tmpl_fd); 569 (void) close(tmpl_fd); 570 if (waitpid(child, &child_status, 0) != child) { 571 /* unexpected: we must have been signalled */ 572 (void) contract_abandon_id(ct); 573 return (-1); 574 } 575 (void) contract_abandon_id(ct); 576 if (WEXITSTATUS(child_status) != 0) { 577 errno = WEXITSTATUS(child_status); 578 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 579 return (-1); 580 } 581 582 return (0); 583 } 584 585 int 586 do_subproc(zlog_t *zlogp, char *cmdbuf) 587 { 588 char inbuf[1024]; /* arbitrary large amount */ 589 FILE *file; 590 int status; 591 592 file = popen(cmdbuf, "r"); 593 if (file == NULL) { 594 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 595 return (-1); 596 } 597 598 while (fgets(inbuf, sizeof (inbuf), file) != NULL) 599 if (zlogp != &logsys) 600 zerror(zlogp, B_FALSE, "%s", inbuf); 601 status = pclose(file); 602 603 if (WIFSIGNALED(status)) { 604 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 605 "signal %d", cmdbuf, WTERMSIG(status)); 606 return (-1); 607 } 608 assert(WIFEXITED(status)); 609 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 610 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 611 return (-1); 612 } 613 return (WEXITSTATUS(status)); 614 } 615 616 static int 617 zone_bootup(zlog_t *zlogp, const char *bootargs) 618 { 619 zoneid_t zoneid; 620 struct stat st; 621 char zroot[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 622 char nbootargs[BOOTARGS_MAX]; 623 char cmdbuf[MAXPATHLEN]; 624 fs_callback_t cb; 625 brand_handle_t bh; 626 int err; 627 628 if (init_console_slave(zlogp) != 0) 629 return (-1); 630 reset_slave_terminal(zlogp); 631 632 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 633 zerror(zlogp, B_TRUE, "unable to get zoneid"); 634 return (-1); 635 } 636 637 cb.zlogp = zlogp; 638 cb.zoneid = zoneid; 639 640 /* Get a handle to the brand info for this zone */ 641 if ((bh = brand_open(brand_name)) == NULL) { 642 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 643 return (-1); 644 } 645 646 /* 647 * Get the list of filesystems to mount from the brand 648 * configuration. These mounts are done via a thread that will 649 * enter the zone, so they are done from within the context of the 650 * zone. 651 */ 652 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 653 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 654 brand_close(bh); 655 return (-1); 656 } 657 658 /* 659 * Get the brand's boot callback if it exists. 660 */ 661 if (zone_get_zonepath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 662 zerror(zlogp, B_FALSE, "unable to determine zone root"); 663 brand_close(bh); 664 return (-1); 665 } 666 (void) strcpy(cmdbuf, EXEC_PREFIX); 667 if (brand_get_boot(bh, zone_name, zroot, cmdbuf + EXEC_LEN, 668 sizeof (cmdbuf) - EXEC_LEN, 0, NULL) != 0) { 669 zerror(zlogp, B_FALSE, 670 "unable to determine branded zone's boot callback"); 671 brand_close(bh); 672 return (-1); 673 } 674 675 /* Get the path for this zone's init(1M) (or equivalent) process. */ 676 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { 677 zerror(zlogp, B_FALSE, 678 "unable to determine zone's init(1M) location"); 679 brand_close(bh); 680 return (-1); 681 } 682 683 brand_close(bh); 684 685 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 686 bad_boot_arg); 687 if (err == Z_INVAL) 688 eventstream_write(Z_EVT_ZONE_BADARGS); 689 else if (err != Z_OK) 690 return (-1); 691 692 assert(init_file[0] != '\0'); 693 694 /* Try to anticipate possible problems: Make sure init is executable. */ 695 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 696 zerror(zlogp, B_FALSE, "unable to determine zone root"); 697 return (-1); 698 } 699 700 (void) snprintf(initpath, sizeof (initpath), "%s%s", zroot, init_file); 701 702 if (stat(initpath, &st) == -1) { 703 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 704 return (-1); 705 } 706 707 if ((st.st_mode & S_IXUSR) == 0) { 708 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 709 return (-1); 710 } 711 712 /* 713 * If there is a brand 'boot' callback, execute it now to give the 714 * brand one last chance to do any additional setup before the zone 715 * is booted. 716 */ 717 if ((strlen(cmdbuf) > EXEC_LEN) && 718 (do_subproc(zlogp, cmdbuf) != Z_OK)) { 719 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 720 return (-1); 721 } 722 723 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 724 zerror(zlogp, B_TRUE, "could not set zone boot file"); 725 return (-1); 726 } 727 728 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 729 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 730 return (-1); 731 } 732 733 if (zone_boot(zoneid) == -1) { 734 zerror(zlogp, B_TRUE, "unable to boot zone"); 735 return (-1); 736 } 737 738 return (0); 739 } 740 741 static int 742 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) 743 { 744 int err; 745 746 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { 747 if (!bringup_failure_recovery) 748 zerror(zlogp, B_FALSE, "unable to destroy zone"); 749 return (-1); 750 } 751 752 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 753 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 754 zonecfg_strerror(err)); 755 756 return (0); 757 } 758 759 /* 760 * Generate AUE_zone_state for a command that boots a zone. 761 */ 762 static void 763 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 764 char *new_state) 765 { 766 adt_session_data_t *ah; 767 adt_event_data_t *event; 768 int pass_fail, fail_reason; 769 770 if (!adt_audit_enabled()) 771 return; 772 773 if (return_val == 0) { 774 pass_fail = ADT_SUCCESS; 775 fail_reason = ADT_SUCCESS; 776 } else { 777 pass_fail = ADT_FAILURE; 778 fail_reason = ADT_FAIL_VALUE_PROGRAM; 779 } 780 781 if (adt_start_session(&ah, NULL, 0)) { 782 zerror(zlogp, B_TRUE, gettext("audit failure.")); 783 return; 784 } 785 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 786 zerror(zlogp, B_TRUE, gettext("audit failure.")); 787 (void) adt_end_session(ah); 788 return; 789 } 790 791 event = adt_alloc_event(ah, ADT_zone_state); 792 if (event == NULL) { 793 zerror(zlogp, B_TRUE, gettext("audit failure.")); 794 (void) adt_end_session(ah); 795 return; 796 } 797 event->adt_zone_state.zonename = zone_name; 798 event->adt_zone_state.new_state = new_state; 799 800 if (adt_put_event(event, pass_fail, fail_reason)) 801 zerror(zlogp, B_TRUE, gettext("audit failure.")); 802 803 adt_free_event(event); 804 805 (void) adt_end_session(ah); 806 } 807 808 /* 809 * The main routine for the door server that deals with zone state transitions. 810 */ 811 /* ARGSUSED */ 812 static void 813 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 814 uint_t n_desc) 815 { 816 ucred_t *uc = NULL; 817 const priv_set_t *eset; 818 819 zone_state_t zstate; 820 zone_cmd_t cmd; 821 zone_cmd_arg_t *zargp; 822 823 boolean_t kernelcall; 824 825 int rval = -1; 826 uint64_t uniqid; 827 zoneid_t zoneid = -1; 828 zlog_t zlog; 829 zlog_t *zlogp; 830 zone_cmd_rval_t *rvalp; 831 size_t rlen = getpagesize(); /* conservative */ 832 fs_callback_t cb; 833 brand_handle_t bh; 834 835 /* LINTED E_BAD_PTR_CAST_ALIGN */ 836 zargp = (zone_cmd_arg_t *)args; 837 838 /* 839 * When we get the door unref message, we've fdetach'd the door, and 840 * it is time for us to shut down zoneadmd. 841 */ 842 if (zargp == DOOR_UNREF_DATA) { 843 /* 844 * See comment at end of main() for info on the last rites. 845 */ 846 exit(0); 847 } 848 849 if (zargp == NULL) { 850 (void) door_return(NULL, 0, 0, 0); 851 } 852 853 rvalp = alloca(rlen); 854 bzero(rvalp, rlen); 855 zlog.logfile = NULL; 856 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 857 zlog.buf = rvalp->errbuf; 858 zlog.log = zlog.buf; 859 /* defer initialization of zlog.locale until after credential check */ 860 zlogp = &zlog; 861 862 if (alen != sizeof (zone_cmd_arg_t)) { 863 /* 864 * This really shouldn't be happening. 865 */ 866 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 867 "unexpected (expected %d bytes)", alen, 868 sizeof (zone_cmd_arg_t)); 869 goto out; 870 } 871 cmd = zargp->cmd; 872 873 if (door_ucred(&uc) != 0) { 874 zerror(&logsys, B_TRUE, "door_ucred"); 875 goto out; 876 } 877 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 878 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 879 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 880 ucred_geteuid(uc) != 0)) { 881 zerror(&logsys, B_FALSE, "insufficient privileges"); 882 goto out; 883 } 884 885 kernelcall = ucred_getpid(uc) == 0; 886 887 /* 888 * This is safe because we only use a zlog_t throughout the 889 * duration of a door call; i.e., by the time the pointer 890 * might become invalid, the door call would be over. 891 */ 892 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 893 894 (void) mutex_lock(&lock); 895 896 /* 897 * Once we start to really die off, we don't want more connections. 898 */ 899 if (in_death_throes) { 900 (void) mutex_unlock(&lock); 901 ucred_free(uc); 902 (void) door_return(NULL, 0, 0, 0); 903 thr_exit(NULL); 904 } 905 906 /* 907 * Check for validity of command. 908 */ 909 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 910 cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && 911 cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 912 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 913 goto out; 914 } 915 916 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 917 /* 918 * Can't happen 919 */ 920 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 921 cmd); 922 goto out; 923 } 924 /* 925 * We ignore the possibility of someone calling zone_create(2) 926 * explicitly; all requests must come through zoneadmd. 927 */ 928 if (zone_get_state(zone_name, &zstate) != Z_OK) { 929 /* 930 * Something terribly wrong happened 931 */ 932 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 933 goto out; 934 } 935 936 if (kernelcall) { 937 /* 938 * Kernel-initiated requests may lose their validity if the 939 * zone_t the kernel was referring to has gone away. 940 */ 941 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 942 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 943 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 944 /* 945 * We're not talking about the same zone. The request 946 * must have arrived too late. Return error. 947 */ 948 rval = -1; 949 goto out; 950 } 951 zlogp = &logsys; /* Log errors to syslog */ 952 } 953 954 /* 955 * If we are being asked to forcibly mount or boot a zone, we 956 * pretend that an INCOMPLETE zone is actually INSTALLED. 957 */ 958 if (zstate == ZONE_STATE_INCOMPLETE && 959 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 960 zstate = ZONE_STATE_INSTALLED; 961 962 switch (zstate) { 963 case ZONE_STATE_CONFIGURED: 964 case ZONE_STATE_INCOMPLETE: 965 /* 966 * Not our area of expertise; we just print a nice message 967 * and die off. 968 */ 969 zerror(zlogp, B_FALSE, 970 "%s operation is invalid for zones in state '%s'", 971 z_cmd_name(cmd), zone_state_str(zstate)); 972 break; 973 974 case ZONE_STATE_INSTALLED: 975 switch (cmd) { 976 case Z_READY: 977 rval = zone_ready(zlogp, B_FALSE); 978 if (rval == 0) 979 eventstream_write(Z_EVT_ZONE_READIED); 980 break; 981 case Z_BOOT: 982 case Z_FORCEBOOT: 983 eventstream_write(Z_EVT_ZONE_BOOTING); 984 if ((rval = zone_ready(zlogp, B_FALSE)) == 0) 985 rval = zone_bootup(zlogp, zargp->bootbuf); 986 audit_put_record(zlogp, uc, rval, "boot"); 987 if (rval != 0) { 988 bringup_failure_recovery = B_TRUE; 989 (void) zone_halt(zlogp, B_FALSE, B_FALSE); 990 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 991 } 992 break; 993 case Z_HALT: 994 if (kernelcall) /* Invalid; can't happen */ 995 abort(); 996 /* 997 * We could have two clients racing to halt this 998 * zone; the second client loses, but his request 999 * doesn't fail, since the zone is now in the desired 1000 * state. 1001 */ 1002 zerror(zlogp, B_FALSE, "zone is already halted"); 1003 rval = 0; 1004 break; 1005 case Z_REBOOT: 1006 if (kernelcall) /* Invalid; can't happen */ 1007 abort(); 1008 zerror(zlogp, B_FALSE, "%s operation is invalid " 1009 "for zones in state '%s'", z_cmd_name(cmd), 1010 zone_state_str(zstate)); 1011 rval = -1; 1012 break; 1013 case Z_NOTE_UNINSTALLING: 1014 if (kernelcall) /* Invalid; can't happen */ 1015 abort(); 1016 /* 1017 * Tell the console to print out a message about this. 1018 * Once it does, we will be in_death_throes. 1019 */ 1020 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1021 break; 1022 case Z_MOUNT: 1023 case Z_FORCEMOUNT: 1024 if (kernelcall) /* Invalid; can't happen */ 1025 abort(); 1026 if (!zone_isnative) { 1027 zerror(zlogp, B_FALSE, 1028 "%s operation is invalid for branded " 1029 "zones", z_cmd_name(cmd)); 1030 rval = -1; 1031 break; 1032 } 1033 1034 rval = zone_ready(zlogp, B_TRUE); 1035 if (rval != 0) 1036 break; 1037 1038 eventstream_write(Z_EVT_ZONE_READIED); 1039 1040 /* Get a handle to the brand info for this zone */ 1041 if ((bh = brand_open(brand_name)) == NULL) { 1042 rval = -1; 1043 break; 1044 } 1045 1046 /* 1047 * Get the list of filesystems to mount from 1048 * the brand configuration. These mounts are done 1049 * via a thread that will enter the zone, so they 1050 * are done from within the context of the zone. 1051 */ 1052 cb.zlogp = zlogp; 1053 cb.zoneid = zone_id; 1054 rval = brand_platform_iter_mounts(bh, 1055 mount_early_fs, &cb); 1056 1057 brand_close(bh); 1058 1059 /* 1060 * Ordinarily, /dev/fd would be mounted inside the zone 1061 * by svc:/system/filesystem/usr:default, but since 1062 * we're not booting the zone, we need to do this 1063 * manually. 1064 */ 1065 if (rval == 0) 1066 rval = mount_early_fs(&cb, 1067 "fd", "/dev/fd", "fd", NULL); 1068 break; 1069 case Z_UNMOUNT: 1070 if (kernelcall) /* Invalid; can't happen */ 1071 abort(); 1072 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1073 rval = 0; 1074 break; 1075 } 1076 break; 1077 1078 case ZONE_STATE_READY: 1079 switch (cmd) { 1080 case Z_READY: 1081 /* 1082 * We could have two clients racing to ready this 1083 * zone; the second client loses, but his request 1084 * doesn't fail, since the zone is now in the desired 1085 * state. 1086 */ 1087 zerror(zlogp, B_FALSE, "zone is already ready"); 1088 rval = 0; 1089 break; 1090 case Z_BOOT: 1091 (void) strlcpy(boot_args, zargp->bootbuf, 1092 sizeof (boot_args)); 1093 eventstream_write(Z_EVT_ZONE_BOOTING); 1094 rval = zone_bootup(zlogp, zargp->bootbuf); 1095 audit_put_record(zlogp, uc, rval, "boot"); 1096 if (rval != 0) { 1097 bringup_failure_recovery = B_TRUE; 1098 (void) zone_halt(zlogp, B_FALSE, B_TRUE); 1099 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1100 } 1101 boot_args[0] = '\0'; 1102 break; 1103 case Z_HALT: 1104 if (kernelcall) /* Invalid; can't happen */ 1105 abort(); 1106 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE)) != 0) 1107 break; 1108 eventstream_write(Z_EVT_ZONE_HALTED); 1109 break; 1110 case Z_REBOOT: 1111 case Z_NOTE_UNINSTALLING: 1112 case Z_MOUNT: 1113 case Z_UNMOUNT: 1114 if (kernelcall) /* Invalid; can't happen */ 1115 abort(); 1116 zerror(zlogp, B_FALSE, "%s operation is invalid " 1117 "for zones in state '%s'", z_cmd_name(cmd), 1118 zone_state_str(zstate)); 1119 rval = -1; 1120 break; 1121 } 1122 break; 1123 1124 case ZONE_STATE_MOUNTED: 1125 switch (cmd) { 1126 case Z_UNMOUNT: 1127 if (kernelcall) /* Invalid; can't happen */ 1128 abort(); 1129 rval = zone_halt(zlogp, B_TRUE, B_FALSE); 1130 if (rval == 0) { 1131 eventstream_write(Z_EVT_ZONE_HALTED); 1132 (void) sema_post(&scratch_sem); 1133 } 1134 break; 1135 default: 1136 if (kernelcall) /* Invalid; can't happen */ 1137 abort(); 1138 zerror(zlogp, B_FALSE, "%s operation is invalid " 1139 "for zones in state '%s'", z_cmd_name(cmd), 1140 zone_state_str(zstate)); 1141 rval = -1; 1142 break; 1143 } 1144 break; 1145 1146 case ZONE_STATE_RUNNING: 1147 case ZONE_STATE_SHUTTING_DOWN: 1148 case ZONE_STATE_DOWN: 1149 switch (cmd) { 1150 case Z_READY: 1151 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE)) != 0) 1152 break; 1153 if ((rval = zone_ready(zlogp, B_FALSE)) == 0) 1154 eventstream_write(Z_EVT_ZONE_READIED); 1155 else 1156 eventstream_write(Z_EVT_ZONE_HALTED); 1157 break; 1158 case Z_BOOT: 1159 /* 1160 * We could have two clients racing to boot this 1161 * zone; the second client loses, but his request 1162 * doesn't fail, since the zone is now in the desired 1163 * state. 1164 */ 1165 zerror(zlogp, B_FALSE, "zone is already booted"); 1166 rval = 0; 1167 break; 1168 case Z_HALT: 1169 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE)) != 0) 1170 break; 1171 eventstream_write(Z_EVT_ZONE_HALTED); 1172 break; 1173 case Z_REBOOT: 1174 (void) strlcpy(boot_args, zargp->bootbuf, 1175 sizeof (boot_args)); 1176 eventstream_write(Z_EVT_ZONE_REBOOTING); 1177 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE)) != 0) { 1178 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1179 boot_args[0] = '\0'; 1180 break; 1181 } 1182 if ((rval = zone_ready(zlogp, B_FALSE)) != 0) { 1183 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1184 boot_args[0] = '\0'; 1185 break; 1186 } 1187 rval = zone_bootup(zlogp, zargp->bootbuf); 1188 audit_put_record(zlogp, uc, rval, "reboot"); 1189 if (rval != 0) { 1190 (void) zone_halt(zlogp, B_FALSE, B_TRUE); 1191 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1192 } 1193 boot_args[0] = '\0'; 1194 break; 1195 case Z_NOTE_UNINSTALLING: 1196 case Z_MOUNT: 1197 case Z_UNMOUNT: 1198 zerror(zlogp, B_FALSE, "%s operation is invalid " 1199 "for zones in state '%s'", z_cmd_name(cmd), 1200 zone_state_str(zstate)); 1201 rval = -1; 1202 break; 1203 } 1204 break; 1205 default: 1206 abort(); 1207 } 1208 1209 /* 1210 * Because the state of the zone may have changed, we make sure 1211 * to wake the console poller, which is in charge of initiating 1212 * the shutdown procedure as necessary. 1213 */ 1214 eventstream_write(Z_EVT_NULL); 1215 1216 out: 1217 (void) mutex_unlock(&lock); 1218 if (kernelcall) { 1219 rvalp = NULL; 1220 rlen = 0; 1221 } else { 1222 rvalp->rval = rval; 1223 } 1224 if (uc != NULL) 1225 ucred_free(uc); 1226 (void) door_return((char *)rvalp, rlen, NULL, 0); 1227 thr_exit(NULL); 1228 } 1229 1230 static int 1231 setup_door(zlog_t *zlogp) 1232 { 1233 if ((zone_door = door_create(server, NULL, 1234 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1235 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1236 return (-1); 1237 } 1238 (void) fdetach(zone_door_path); 1239 1240 if (fattach(zone_door, zone_door_path) != 0) { 1241 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1242 (void) door_revoke(zone_door); 1243 (void) fdetach(zone_door_path); 1244 zone_door = -1; 1245 return (-1); 1246 } 1247 return (0); 1248 } 1249 1250 /* 1251 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1252 * is where zoneadmd itself will check to see that another instance of 1253 * zoneadmd isn't already controlling this zone. 1254 * 1255 * The idea here is that we want to open the path to which we will 1256 * attach our door, lock it, and then make sure that no-one has beat us 1257 * to fattach(3c)ing onto it. 1258 * 1259 * fattach(3c) is really a mount, so there are actually two possible 1260 * vnodes we could be dealing with. Our strategy is as follows: 1261 * 1262 * - If the file we opened is a regular file (common case): 1263 * There is no fattach(3c)ed door, so we have a chance of becoming 1264 * the managing zoneadmd. We attempt to lock the file: if it is 1265 * already locked, that means someone else raced us here, so we 1266 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1267 * that beat us to it. 1268 * 1269 * - If the file we opened is a namefs file: 1270 * This means there is already an established door fattach(3c)'ed 1271 * to the rendezvous path. We've lost the race, so we give up. 1272 * Note that in this case we also try to grab the file lock, and 1273 * will succeed in acquiring it since the vnode locked by the 1274 * "winning" zoneadmd was a regular one, and the one we locked was 1275 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1276 * we just return to zoneadm(1m) which knows to retry. 1277 */ 1278 static int 1279 make_daemon_exclusive(zlog_t *zlogp) 1280 { 1281 int doorfd = -1; 1282 int err, ret = -1; 1283 struct stat st; 1284 struct flock flock; 1285 zone_state_t zstate; 1286 1287 top: 1288 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1289 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1290 zonecfg_strerror(err)); 1291 goto out; 1292 } 1293 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1294 S_IREAD|S_IWRITE)) < 0) { 1295 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1296 goto out; 1297 } 1298 if (fstat(doorfd, &st) < 0) { 1299 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1300 goto out; 1301 } 1302 /* 1303 * Lock the file to synchronize with other zoneadmd 1304 */ 1305 flock.l_type = F_WRLCK; 1306 flock.l_whence = SEEK_SET; 1307 flock.l_start = (off_t)0; 1308 flock.l_len = (off_t)0; 1309 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1310 /* 1311 * Someone else raced us here and grabbed the lock file 1312 * first. A warning here is inappropriate since nothing 1313 * went wrong. 1314 */ 1315 goto out; 1316 } 1317 1318 if (strcmp(st.st_fstype, "namefs") == 0) { 1319 struct door_info info; 1320 1321 /* 1322 * There is already something fattach()'ed to this file. 1323 * Lets see what the door is up to. 1324 */ 1325 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1326 /* 1327 * Another zoneadmd process seems to be in 1328 * control of the situation and we don't need to 1329 * be here. A warning here is inappropriate 1330 * since nothing went wrong. 1331 * 1332 * If the door has been revoked, the zoneadmd 1333 * process currently managing the zone is going 1334 * away. We'll return control to zoneadm(1m) 1335 * which will try again (by which time zoneadmd 1336 * will hopefully have exited). 1337 */ 1338 goto out; 1339 } 1340 1341 /* 1342 * If we got this far, there's a fattach(3c)'ed door 1343 * that belongs to a process that has exited, which can 1344 * happen if the previous zoneadmd died unexpectedly. 1345 * 1346 * Let user know that something is amiss, but that we can 1347 * recover; if the zone is in the installed state, then don't 1348 * message, since having a running zoneadmd isn't really 1349 * expected/needed. We want to keep occurences of this message 1350 * limited to times when zoneadmd is picking back up from a 1351 * zoneadmd that died while the zone was in some non-trivial 1352 * state. 1353 */ 1354 if (zstate > ZONE_STATE_INSTALLED) { 1355 zerror(zlogp, B_FALSE, 1356 "zone '%s': WARNING: zone is in state '%s', but " 1357 "zoneadmd does not appear to be available; " 1358 "restarted zoneadmd to recover.", 1359 zone_name, zone_state_str(zstate)); 1360 } 1361 1362 (void) fdetach(zone_door_path); 1363 (void) close(doorfd); 1364 goto top; 1365 } 1366 ret = 0; 1367 out: 1368 (void) close(doorfd); 1369 return (ret); 1370 } 1371 1372 int 1373 main(int argc, char *argv[]) 1374 { 1375 int opt; 1376 zoneid_t zid; 1377 priv_set_t *privset; 1378 zone_state_t zstate; 1379 char parents_locale[MAXPATHLEN]; 1380 brand_handle_t bh; 1381 int err; 1382 1383 pid_t pid; 1384 sigset_t blockset; 1385 sigset_t block_cld; 1386 1387 struct { 1388 sema_t sem; 1389 int status; 1390 zlog_t log; 1391 } *shstate; 1392 size_t shstatelen = getpagesize(); 1393 1394 zlog_t errlog; 1395 zlog_t *zlogp; 1396 1397 int ctfd; 1398 1399 progname = get_execbasename(argv[0]); 1400 1401 /* 1402 * Make sure stderr is unbuffered 1403 */ 1404 (void) setbuffer(stderr, NULL, 0); 1405 1406 /* 1407 * Get out of the way of mounted filesystems, since we will daemonize 1408 * soon. 1409 */ 1410 (void) chdir("/"); 1411 1412 /* 1413 * Use the default system umask per PSARC 1998/110 rather than 1414 * anything that may have been set by the caller. 1415 */ 1416 (void) umask(CMASK); 1417 1418 /* 1419 * Initially we want to use our parent's locale. 1420 */ 1421 (void) setlocale(LC_ALL, ""); 1422 (void) textdomain(TEXT_DOMAIN); 1423 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1424 sizeof (parents_locale)); 1425 1426 /* 1427 * This zlog_t is used for writing to stderr 1428 */ 1429 errlog.logfile = stderr; 1430 errlog.buflen = errlog.loglen = 0; 1431 errlog.buf = errlog.log = NULL; 1432 errlog.locale = parents_locale; 1433 1434 /* 1435 * We start off writing to stderr until we're ready to daemonize. 1436 */ 1437 zlogp = &errlog; 1438 1439 /* 1440 * Process options. 1441 */ 1442 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1443 switch (opt) { 1444 case 'R': 1445 zonecfg_set_root(optarg); 1446 break; 1447 case 'z': 1448 zone_name = optarg; 1449 break; 1450 default: 1451 usage(); 1452 } 1453 } 1454 1455 if (zone_name == NULL) 1456 usage(); 1457 1458 /* 1459 * Because usage() prints directly to stderr, it has gettext() 1460 * wrapping, which depends on the locale. But since zerror() calls 1461 * localize() which tweaks the locale, it is not safe to call zerror() 1462 * until after the last call to usage(). Fortunately, the last call 1463 * to usage() is just above and the first call to zerror() is just 1464 * below. Don't mess this up. 1465 */ 1466 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1467 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1468 GLOBAL_ZONENAME); 1469 return (1); 1470 } 1471 1472 if (zone_get_id(zone_name, &zid) != 0) { 1473 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1474 zonecfg_strerror(Z_NO_ZONE)); 1475 return (1); 1476 } 1477 1478 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1479 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1480 zonecfg_strerror(err)); 1481 return (1); 1482 } 1483 if (zstate < ZONE_STATE_INCOMPLETE) { 1484 zerror(zlogp, B_FALSE, 1485 "cannot manage a zone which is in state '%s'", 1486 zone_state_str(zstate)); 1487 return (1); 1488 } 1489 1490 /* Get a handle to the brand info for this zone */ 1491 if ((zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 1492 != Z_OK) || (bh = brand_open(brand_name)) == NULL) { 1493 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1494 return (1); 1495 } 1496 zone_isnative = brand_is_native(bh); 1497 brand_close(bh); 1498 1499 /* 1500 * Check that we have all privileges. It would be nice to pare 1501 * this down, but this is at least a first cut. 1502 */ 1503 if ((privset = priv_allocset()) == NULL) { 1504 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1505 return (1); 1506 } 1507 1508 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1509 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1510 priv_freeset(privset); 1511 return (1); 1512 } 1513 1514 if (priv_isfullset(privset) == B_FALSE) { 1515 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1516 "run this command (all privs required)"); 1517 priv_freeset(privset); 1518 return (1); 1519 } 1520 priv_freeset(privset); 1521 1522 if (mkzonedir(zlogp) != 0) 1523 return (1); 1524 1525 /* 1526 * Pre-fork: setup shared state 1527 */ 1528 if ((shstate = (void *)mmap(NULL, shstatelen, 1529 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1530 MAP_FAILED) { 1531 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1532 return (1); 1533 } 1534 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1535 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1536 (void) munmap((char *)shstate, shstatelen); 1537 return (1); 1538 } 1539 shstate->log.logfile = NULL; 1540 shstate->log.buflen = shstatelen - sizeof (*shstate); 1541 shstate->log.loglen = shstate->log.buflen; 1542 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1543 shstate->log.log = shstate->log.buf; 1544 shstate->log.locale = parents_locale; 1545 shstate->status = -1; 1546 1547 /* 1548 * We need a SIGCHLD handler so the sema_wait() below will wake 1549 * up if the child dies without doing a sema_post(). 1550 */ 1551 (void) sigset(SIGCHLD, sigchld); 1552 /* 1553 * We must mask SIGCHLD until after we've coped with the fork 1554 * sufficiently to deal with it; otherwise we can race and 1555 * receive the signal before pid has been initialized 1556 * (yes, this really happens). 1557 */ 1558 (void) sigemptyset(&block_cld); 1559 (void) sigaddset(&block_cld, SIGCHLD); 1560 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1561 1562 if ((ctfd = init_template()) == -1) { 1563 zerror(zlogp, B_TRUE, "failed to create contract"); 1564 return (1); 1565 } 1566 1567 /* 1568 * Do not let another thread localize a message while we are forking. 1569 */ 1570 (void) mutex_lock(&msglock); 1571 pid = fork(); 1572 (void) mutex_unlock(&msglock); 1573 1574 /* 1575 * In all cases (parent, child, and in the event of an error) we 1576 * don't want to cause creation of contracts on subsequent fork()s. 1577 */ 1578 (void) ct_tmpl_clear(ctfd); 1579 (void) close(ctfd); 1580 1581 if (pid == -1) { 1582 zerror(zlogp, B_TRUE, "could not fork"); 1583 return (1); 1584 1585 } else if (pid > 0) { /* parent */ 1586 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1587 /* 1588 * This marks a window of vulnerability in which we receive 1589 * the SIGCLD before falling into sema_wait (normally we would 1590 * get woken up from sema_wait with EINTR upon receipt of 1591 * SIGCLD). So we may need to use some other scheme like 1592 * sema_posting in the sigcld handler. 1593 * blech 1594 */ 1595 (void) sema_wait(&shstate->sem); 1596 (void) sema_destroy(&shstate->sem); 1597 if (shstate->status != 0) 1598 (void) waitpid(pid, NULL, WNOHANG); 1599 /* 1600 * It's ok if we die with SIGPIPE. It's not like we could have 1601 * done anything about it. 1602 */ 1603 (void) fprintf(stderr, "%s", shstate->log.buf); 1604 _exit(shstate->status == 0 ? 0 : 1); 1605 } 1606 1607 /* 1608 * The child charges on. 1609 */ 1610 (void) sigset(SIGCHLD, SIG_DFL); 1611 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1612 1613 /* 1614 * SIGPIPE can be delivered if we write to a socket for which the 1615 * peer endpoint is gone. That can lead to too-early termination 1616 * of zoneadmd, and that's not good eats. 1617 */ 1618 (void) sigset(SIGPIPE, SIG_IGN); 1619 /* 1620 * Stop using stderr 1621 */ 1622 zlogp = &shstate->log; 1623 1624 /* 1625 * We don't need stdout/stderr from now on. 1626 */ 1627 closefrom(0); 1628 1629 /* 1630 * Initialize the syslog zlog_t. This needs to be done after 1631 * the call to closefrom(). 1632 */ 1633 logsys.buf = logsys.log = NULL; 1634 logsys.buflen = logsys.loglen = 0; 1635 logsys.logfile = NULL; 1636 logsys.locale = DEFAULT_LOCALE; 1637 1638 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1639 1640 /* 1641 * The eventstream is used to publish state changes in the zone 1642 * from the door threads to the console I/O poller. 1643 */ 1644 if (eventstream_init() == -1) { 1645 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1646 goto child_out; 1647 } 1648 1649 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1650 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 1651 1652 /* 1653 * See if another zoneadmd is running for this zone. If not, then we 1654 * can now modify system state. 1655 */ 1656 if (make_daemon_exclusive(zlogp) == -1) 1657 goto child_out; 1658 1659 1660 /* 1661 * Create/join a new session; we need to be careful of what we do with 1662 * the console from now on so we don't end up being the session leader 1663 * for the terminal we're going to be handing out. 1664 */ 1665 (void) setsid(); 1666 1667 /* 1668 * This thread shouldn't be receiving any signals; in particular, 1669 * SIGCHLD should be received by the thread doing the fork(). 1670 */ 1671 (void) sigfillset(&blockset); 1672 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1673 1674 /* 1675 * Setup the console device and get ready to serve the console; 1676 * once this has completed, we're ready to let console clients 1677 * make an attempt to connect (they will block until 1678 * serve_console_sock() below gets called, and any pending 1679 * connection is accept()ed). 1680 */ 1681 if (!zonecfg_in_alt_root() && init_console(zlogp) == -1) 1682 goto child_out; 1683 1684 /* 1685 * Take the lock now, so that when the door server gets going, we 1686 * are guaranteed that it won't take a request until we are sure 1687 * that everything is completely set up. See the child_out: label 1688 * below to see why this matters. 1689 */ 1690 (void) mutex_lock(&lock); 1691 1692 /* Init semaphore for scratch zones. */ 1693 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 1694 zerror(zlogp, B_TRUE, 1695 "failed to initialize semaphore for scratch zone"); 1696 goto child_out; 1697 } 1698 1699 /* 1700 * Note: door setup must occur *after* the console is setup. 1701 * This is so that as zlogin tests the door to see if zoneadmd 1702 * is ready yet, we know that the console will get serviced 1703 * once door_info() indicates that the door is "up". 1704 */ 1705 if (setup_door(zlogp) == -1) 1706 goto child_out; 1707 1708 /* 1709 * Things seem OK so far; tell the parent process that we're done 1710 * with setup tasks. This will cause the parent to exit, signalling 1711 * to zoneadm, zlogin, or whatever forked it that we are ready to 1712 * service requests. 1713 */ 1714 shstate->status = 0; 1715 (void) sema_post(&shstate->sem); 1716 (void) munmap((char *)shstate, shstatelen); 1717 shstate = NULL; 1718 1719 (void) mutex_unlock(&lock); 1720 1721 /* 1722 * zlogp is now invalid, so reset it to the syslog logger. 1723 */ 1724 zlogp = &logsys; 1725 1726 /* 1727 * Now that we are free of any parents, switch to the default locale. 1728 */ 1729 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 1730 1731 /* 1732 * At this point the setup portion of main() is basically done, so 1733 * we reuse this thread to manage the zone console. When 1734 * serve_console() has returned, we are past the point of no return 1735 * in the life of this zoneadmd. 1736 */ 1737 if (zonecfg_in_alt_root()) { 1738 /* 1739 * This is just awful, but mounted scratch zones don't (and 1740 * can't) have consoles. We just wait for unmount instead. 1741 */ 1742 while (sema_wait(&scratch_sem) == EINTR) 1743 ; 1744 } else { 1745 serve_console(zlogp); 1746 assert(in_death_throes); 1747 } 1748 1749 /* 1750 * This is the next-to-last part of the exit interlock. Upon calling 1751 * fdetach(), the door will go unreferenced; once any 1752 * outstanding requests (like the door thread doing Z_HALT) are 1753 * done, the door will get an UNREF notification; when it handles 1754 * the UNREF, the door server will cause the exit. 1755 */ 1756 assert(!MUTEX_HELD(&lock)); 1757 (void) fdetach(zone_door_path); 1758 for (;;) 1759 (void) pause(); 1760 1761 child_out: 1762 assert(pid == 0); 1763 if (shstate != NULL) { 1764 shstate->status = -1; 1765 (void) sema_post(&shstate->sem); 1766 (void) munmap((char *)shstate, shstatelen); 1767 } 1768 1769 /* 1770 * This might trigger an unref notification, but if so, 1771 * we are still holding the lock, so our call to exit will 1772 * ultimately win the race and will publish the right exit 1773 * code. 1774 */ 1775 if (zone_door != -1) { 1776 assert(MUTEX_HELD(&lock)); 1777 (void) door_revoke(zone_door); 1778 (void) fdetach(zone_door_path); 1779 } 1780 return (1); /* return from main() forcibly exits an MT process */ 1781 } 1782