1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * zoneadmd manages zones; one zoneadmd process is launched for each 31 * non-global zone on the system. This daemon juggles four jobs: 32 * 33 * - Implement setup and teardown of the zone "virtual platform": mount and 34 * unmount filesystems; create and destroy network interfaces; communicate 35 * with devfsadmd to lay out devices for the zone; instantiate the zone 36 * console device; configure process runtime attributes such as resource 37 * controls, pool bindings, fine-grained privileges. 38 * 39 * - Launch the zone's init(1M) process. 40 * 41 * - Implement a door server; clients (like zoneadm) connect to the door 42 * server and request zone state changes. The kernel is also a client of 43 * this door server. A request to halt or reboot the zone which originates 44 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 45 * 46 * One minor problem is that messages emitted by zoneadmd need to be passed 47 * back to the zoneadm process making the request. These messages need to 48 * be rendered in the client's locale; so, this is passed in as part of the 49 * request. The exception is the kernel upcall to zoneadmd, in which case 50 * messages are syslog'd. 51 * 52 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 53 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 54 * strings which do not need to be translated. 55 * 56 * - Act as a console server for zlogin -C processes; see comments in zcons.c 57 * for more information about the zone console architecture. 58 * 59 * DESIGN NOTES 60 * 61 * Restart: 62 * A chief design constraint of zoneadmd is that it should be restartable in 63 * the case that the administrator kills it off, or it suffers a fatal error, 64 * without the running zone being impacted; this is akin to being able to 65 * reboot the service processor of a server without affecting the OS instance. 66 */ 67 68 #include <sys/param.h> 69 #include <sys/mman.h> 70 #include <sys/types.h> 71 #include <sys/stat.h> 72 #include <sys/sysmacros.h> 73 74 #include <bsm/adt.h> 75 #include <bsm/adt_event.h> 76 77 #include <alloca.h> 78 #include <assert.h> 79 #include <errno.h> 80 #include <door.h> 81 #include <fcntl.h> 82 #include <locale.h> 83 #include <signal.h> 84 #include <stdarg.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <strings.h> 89 #include <synch.h> 90 #include <syslog.h> 91 #include <thread.h> 92 #include <unistd.h> 93 #include <wait.h> 94 #include <limits.h> 95 #include <zone.h> 96 #include <libbrand.h> 97 #include <libcontract.h> 98 #include <libcontract_priv.h> 99 #include <sys/contract/process.h> 100 #include <sys/ctfs.h> 101 102 #include <libzonecfg.h> 103 #include "zoneadmd.h" 104 105 static char *progname; 106 char *zone_name; /* zone which we are managing */ 107 char brand_name[MAXNAMELEN]; 108 boolean_t zone_isnative; 109 boolean_t zone_iscluster; 110 static zoneid_t zone_id; 111 112 zlog_t logsys; 113 114 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 115 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 116 117 static sema_t scratch_sem; /* for scratch zones */ 118 119 static char zone_door_path[MAXPATHLEN]; 120 static int zone_door = -1; 121 122 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 123 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 124 125 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 126 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 127 #endif 128 129 #define DEFAULT_LOCALE "C" 130 131 static const char * 132 z_cmd_name(zone_cmd_t zcmd) 133 { 134 /* This list needs to match the enum in sys/zone.h */ 135 static const char *zcmdstr[] = { 136 "ready", "boot", "forceboot", "reboot", "halt", 137 "note_uninstalling", "mount", "forcemount", "unmount" 138 }; 139 140 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 141 return ("unknown"); 142 else 143 return (zcmdstr[(int)zcmd]); 144 } 145 146 static char * 147 get_execbasename(char *execfullname) 148 { 149 char *last_slash, *execbasename; 150 151 /* guard against '/' at end of command invocation */ 152 for (;;) { 153 last_slash = strrchr(execfullname, '/'); 154 if (last_slash == NULL) { 155 execbasename = execfullname; 156 break; 157 } else { 158 execbasename = last_slash + 1; 159 if (*execbasename == '\0') { 160 *last_slash = '\0'; 161 continue; 162 } 163 break; 164 } 165 } 166 return (execbasename); 167 } 168 169 static void 170 usage(void) 171 { 172 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 173 (void) fprintf(stderr, 174 gettext("\tNote: %s should not be run directly.\n"), progname); 175 exit(2); 176 } 177 178 /* ARGSUSED */ 179 static void 180 sigchld(int sig) 181 { 182 } 183 184 char * 185 localize_msg(char *locale, const char *msg) 186 { 187 char *out; 188 189 (void) mutex_lock(&msglock); 190 (void) setlocale(LC_MESSAGES, locale); 191 out = gettext(msg); 192 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 193 (void) mutex_unlock(&msglock); 194 return (out); 195 } 196 197 /* PRINTFLIKE3 */ 198 void 199 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 200 { 201 va_list alist; 202 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 203 char *bp; 204 int saved_errno = errno; 205 206 if (zlogp == NULL) 207 return; 208 if (zlogp == &logsys) 209 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 210 zone_name); 211 else 212 buf[0] = '\0'; 213 bp = &(buf[strlen(buf)]); 214 215 /* 216 * In theory, the locale pointer should be set to either "C" or a 217 * char array, so it should never be NULL 218 */ 219 assert(zlogp->locale != NULL); 220 /* Locale is per process, but we are multi-threaded... */ 221 fmt = localize_msg(zlogp->locale, fmt); 222 223 va_start(alist, fmt); 224 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 225 va_end(alist); 226 bp = &(buf[strlen(buf)]); 227 if (use_strerror) 228 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 229 strerror(saved_errno)); 230 if (zlogp == &logsys) { 231 (void) syslog(LOG_ERR, "%s", buf); 232 } else if (zlogp->logfile != NULL) { 233 (void) fprintf(zlogp->logfile, "%s\n", buf); 234 } else { 235 size_t buflen; 236 size_t copylen; 237 238 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 239 copylen = MIN(buflen, zlogp->loglen); 240 zlogp->log += copylen; 241 zlogp->loglen -= copylen; 242 } 243 } 244 245 /* 246 * Emit a warning for any boot arguments which are unrecognized. Since 247 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 248 * put the arguments into an argv style array, use getopt to process them, 249 * and put the resultant argument string back into outargs. 250 * 251 * During the filtering, we pull out any arguments which are truly "boot" 252 * arguments, leaving only those which are to be passed intact to the 253 * progenitor process. The one we support at the moment is -i, which 254 * indicates to the kernel which program should be launched as 'init'. 255 * 256 * A return of Z_INVAL indicates specifically that the arguments are 257 * not valid; this is a non-fatal error. Except for Z_OK, all other return 258 * values are treated as fatal. 259 */ 260 static int 261 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 262 char *init_file, char *badarg) 263 { 264 int argc = 0, argc_save; 265 int i; 266 int err; 267 char *arg, *lasts, **argv = NULL, **argv_save; 268 char zonecfg_args[BOOTARGS_MAX]; 269 char scratchargs[BOOTARGS_MAX], *sargs; 270 char c; 271 272 bzero(outargs, BOOTARGS_MAX); 273 bzero(badarg, BOOTARGS_MAX); 274 275 /* 276 * If the user didn't specify transient boot arguments, check 277 * to see if there were any specified in the zone configuration, 278 * and use them if applicable. 279 */ 280 if (inargs == NULL || inargs[0] == '\0') { 281 zone_dochandle_t handle; 282 if ((handle = zonecfg_init_handle()) == NULL) { 283 zerror(zlogp, B_TRUE, 284 "getting zone configuration handle"); 285 return (Z_BAD_HANDLE); 286 } 287 err = zonecfg_get_snapshot_handle(zone_name, handle); 288 if (err != Z_OK) { 289 zerror(zlogp, B_FALSE, 290 "invalid configuration snapshot"); 291 zonecfg_fini_handle(handle); 292 return (Z_BAD_HANDLE); 293 } 294 295 bzero(zonecfg_args, sizeof (zonecfg_args)); 296 (void) zonecfg_get_bootargs(handle, zonecfg_args, 297 sizeof (zonecfg_args)); 298 inargs = zonecfg_args; 299 zonecfg_fini_handle(handle); 300 } 301 302 if (strlen(inargs) >= BOOTARGS_MAX) { 303 zerror(zlogp, B_FALSE, "boot argument string too long"); 304 return (Z_INVAL); 305 } 306 307 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 308 sargs = scratchargs; 309 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 310 sargs = NULL; 311 argc++; 312 } 313 314 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 315 zerror(zlogp, B_FALSE, "memory allocation failed"); 316 return (Z_NOMEM); 317 } 318 319 argv_save = argv; 320 argc_save = argc; 321 322 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 323 sargs = scratchargs; 324 i = 0; 325 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 326 sargs = NULL; 327 if ((argv[i] = strdup(arg)) == NULL) { 328 err = Z_NOMEM; 329 zerror(zlogp, B_FALSE, "memory allocation failed"); 330 goto done; 331 } 332 i++; 333 } 334 335 /* 336 * We preserve compatibility with the Solaris system boot behavior, 337 * which allows: 338 * 339 * # reboot kernel/unix -s -m verbose 340 * 341 * In this example, kernel/unix tells the booter what file to 342 * boot. We don't want reboot in a zone to be gratuitously different, 343 * so we silently ignore the boot file, if necessary. 344 */ 345 if (argv[0] == NULL) 346 goto done; 347 348 assert(argv[0][0] != ' '); 349 assert(argv[0][0] != '\t'); 350 351 if (argv[0][0] != '-' && argv[0][0] != '\0') { 352 argv = &argv[1]; 353 argc--; 354 } 355 356 optind = 0; 357 opterr = 0; 358 err = Z_OK; 359 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 360 switch (c) { 361 case 'i': 362 /* 363 * -i is handled by the runtime and is not passed 364 * along to userland 365 */ 366 (void) strlcpy(init_file, optarg, MAXPATHLEN); 367 break; 368 case 'f': 369 /* This has already been processed by zoneadm */ 370 break; 371 case 'm': 372 case 's': 373 /* These pass through unmolested */ 374 (void) snprintf(outargs, BOOTARGS_MAX, 375 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 376 break; 377 case '?': 378 /* 379 * We warn about unknown arguments but pass them 380 * along anyway-- if someone wants to develop their 381 * own init replacement, they can pass it whatever 382 * args they want. 383 */ 384 err = Z_INVAL; 385 (void) snprintf(outargs, BOOTARGS_MAX, 386 "%s -%c", outargs, optopt); 387 (void) snprintf(badarg, BOOTARGS_MAX, 388 "%s -%c", badarg, optopt); 389 break; 390 } 391 } 392 393 /* 394 * For Solaris Zones we warn about and discard non-option arguments. 395 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 396 * to the kernel, we concat up all the other remaining boot args. 397 * and warn on them as a group. 398 */ 399 if (optind < argc) { 400 err = Z_INVAL; 401 while (optind < argc) { 402 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 403 badarg, strlen(badarg) > 0 ? " " : "", 404 argv[optind]); 405 optind++; 406 } 407 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 408 "arguments `%s'.", badarg); 409 } 410 411 done: 412 for (i = 0; i < argc_save; i++) { 413 if (argv_save[i] != NULL) 414 free(argv_save[i]); 415 } 416 free(argv_save); 417 return (err); 418 } 419 420 421 static int 422 mkzonedir(zlog_t *zlogp) 423 { 424 struct stat st; 425 /* 426 * We must create and lock everyone but root out of ZONES_TMPDIR 427 * since anyone can open any UNIX domain socket, regardless of 428 * its file system permissions. Sigh... 429 */ 430 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 431 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 432 return (-1); 433 } 434 /* paranoia */ 435 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 436 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 437 return (-1); 438 } 439 (void) chmod(ZONES_TMPDIR, S_IRWXU); 440 return (0); 441 } 442 443 /* 444 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 445 * 'true' if this is being invoked as part of the processing for the "mount" 446 * subcommand. 447 */ 448 static int 449 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd) 450 { 451 int err; 452 453 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 454 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 455 zonecfg_strerror(err)); 456 return (-1); 457 } 458 459 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 460 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 461 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 462 zonecfg_strerror(err)); 463 return (-1); 464 } 465 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 466 bringup_failure_recovery = B_TRUE; 467 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE); 468 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 469 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 470 zonecfg_strerror(err)); 471 return (-1); 472 } 473 474 return (0); 475 } 476 477 int 478 init_template(void) 479 { 480 int fd; 481 int err = 0; 482 483 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 484 if (fd == -1) 485 return (-1); 486 487 /* 488 * For now, zoneadmd doesn't do anything with the contract. 489 * Deliver no events, don't inherit, and allow it to be orphaned. 490 */ 491 err |= ct_tmpl_set_critical(fd, 0); 492 err |= ct_tmpl_set_informative(fd, 0); 493 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 494 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 495 if (err || ct_tmpl_activate(fd)) { 496 (void) close(fd); 497 return (-1); 498 } 499 500 return (fd); 501 } 502 503 typedef struct fs_callback { 504 zlog_t *zlogp; 505 zoneid_t zoneid; 506 boolean_t mount_cmd; 507 } fs_callback_t; 508 509 static int 510 mount_early_fs(void *data, const char *spec, const char *dir, 511 const char *fstype, const char *opt) 512 { 513 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 514 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 515 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd; 516 char rootpath[MAXPATHLEN]; 517 pid_t child; 518 int child_status; 519 int tmpl_fd; 520 int rv; 521 ctid_t ct; 522 523 /* determine the zone rootpath */ 524 if (mount_cmd) { 525 char zonepath[MAXPATHLEN]; 526 char luroot[MAXPATHLEN]; 527 528 assert(zone_isnative || zone_iscluster); 529 530 if (zone_get_zonepath(zone_name, 531 zonepath, sizeof (zonepath)) != Z_OK) { 532 zerror(zlogp, B_FALSE, "unable to determine zone path"); 533 return (-1); 534 } 535 536 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 537 resolve_lofs(zlogp, luroot, sizeof (luroot)); 538 (void) strlcpy(rootpath, luroot, sizeof (rootpath)); 539 } else { 540 if (zone_get_rootpath(zone_name, 541 rootpath, sizeof (rootpath)) != Z_OK) { 542 zerror(zlogp, B_FALSE, "unable to determine zone root"); 543 return (-1); 544 } 545 } 546 547 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) { 548 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 549 rootpath, dir); 550 return (-1); 551 } else if (rv > 0) { 552 /* The mount point path doesn't exist, create it now. */ 553 if (make_one_dir(zlogp, rootpath, dir, 554 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 555 DEFAULT_DIR_GROUP) != 0) { 556 zerror(zlogp, B_FALSE, "failed to create mount point"); 557 return (-1); 558 } 559 560 /* 561 * Now this might seem weird, but we need to invoke 562 * valid_mount_path() again. Why? Because it checks 563 * to make sure that the mount point path is canonical, 564 * which it can only do if the path exists, so now that 565 * we've created the path we have to verify it again. 566 */ 567 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, 568 fstype)) < 0) { 569 zerror(zlogp, B_FALSE, 570 "%s%s is not a valid mount point", rootpath, dir); 571 return (-1); 572 } 573 } 574 575 if ((tmpl_fd = init_template()) == -1) { 576 zerror(zlogp, B_TRUE, "failed to create contract"); 577 return (-1); 578 } 579 580 if ((child = fork()) == -1) { 581 (void) ct_tmpl_clear(tmpl_fd); 582 (void) close(tmpl_fd); 583 zerror(zlogp, B_TRUE, "failed to fork"); 584 return (-1); 585 586 } else if (child == 0) { /* child */ 587 char opt_buf[MAX_MNTOPT_STR]; 588 int optlen = 0; 589 int mflag = MS_DATA; 590 591 (void) ct_tmpl_clear(tmpl_fd); 592 /* 593 * Even though there are no procs running in the zone, we 594 * do this for paranoia's sake. 595 */ 596 (void) closefrom(0); 597 598 if (zone_enter(zoneid) == -1) { 599 _exit(errno); 600 } 601 if (opt != NULL) { 602 /* 603 * The mount() system call is incredibly annoying. 604 * If options are specified, we need to copy them 605 * into a temporary buffer since the mount() system 606 * call will overwrite the options string. It will 607 * also fail if the new option string it wants to 608 * write is bigger than the one we passed in, so 609 * you must pass in a buffer of the maximum possible 610 * option string length. sigh. 611 */ 612 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 613 opt = opt_buf; 614 optlen = MAX_MNTOPT_STR; 615 mflag = MS_OPTIONSTR; 616 } 617 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 618 _exit(errno); 619 _exit(0); 620 } 621 622 /* parent */ 623 if (contract_latest(&ct) == -1) 624 ct = -1; 625 (void) ct_tmpl_clear(tmpl_fd); 626 (void) close(tmpl_fd); 627 if (waitpid(child, &child_status, 0) != child) { 628 /* unexpected: we must have been signalled */ 629 (void) contract_abandon_id(ct); 630 return (-1); 631 } 632 (void) contract_abandon_id(ct); 633 if (WEXITSTATUS(child_status) != 0) { 634 errno = WEXITSTATUS(child_status); 635 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 636 return (-1); 637 } 638 639 return (0); 640 } 641 642 int 643 do_subproc(zlog_t *zlogp, char *cmdbuf) 644 { 645 char inbuf[1024]; /* arbitrary large amount */ 646 FILE *file; 647 int status; 648 649 file = popen(cmdbuf, "r"); 650 if (file == NULL) { 651 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 652 return (-1); 653 } 654 655 while (fgets(inbuf, sizeof (inbuf), file) != NULL) 656 if (zlogp != &logsys) 657 zerror(zlogp, B_FALSE, "%s", inbuf); 658 status = pclose(file); 659 660 if (WIFSIGNALED(status)) { 661 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 662 "signal %d", cmdbuf, WTERMSIG(status)); 663 return (-1); 664 } 665 assert(WIFEXITED(status)); 666 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 667 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 668 return (-1); 669 } 670 return (WEXITSTATUS(status)); 671 } 672 673 static int 674 zone_bootup(zlog_t *zlogp, const char *bootargs) 675 { 676 zoneid_t zoneid; 677 struct stat st; 678 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 679 char nbootargs[BOOTARGS_MAX]; 680 char cmdbuf[MAXPATHLEN]; 681 fs_callback_t cb; 682 brand_handle_t bh; 683 int err; 684 685 if (init_console_slave(zlogp) != 0) 686 return (-1); 687 reset_slave_terminal(zlogp); 688 689 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 690 zerror(zlogp, B_TRUE, "unable to get zoneid"); 691 return (-1); 692 } 693 694 cb.zlogp = zlogp; 695 cb.zoneid = zoneid; 696 cb.mount_cmd = B_FALSE; 697 698 /* Get a handle to the brand info for this zone */ 699 if ((bh = brand_open(brand_name)) == NULL) { 700 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 701 return (-1); 702 } 703 704 /* 705 * Get the list of filesystems to mount from the brand 706 * configuration. These mounts are done via a thread that will 707 * enter the zone, so they are done from within the context of the 708 * zone. 709 */ 710 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 711 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 712 brand_close(bh); 713 return (-1); 714 } 715 716 /* 717 * Get the brand's boot callback if it exists. 718 */ 719 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 720 zerror(zlogp, B_FALSE, "unable to determine zone path"); 721 brand_close(bh); 722 return (-1); 723 } 724 (void) strcpy(cmdbuf, EXEC_PREFIX); 725 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 726 sizeof (cmdbuf) - EXEC_LEN) != 0) { 727 zerror(zlogp, B_FALSE, 728 "unable to determine branded zone's boot callback"); 729 brand_close(bh); 730 return (-1); 731 } 732 733 /* Get the path for this zone's init(1M) (or equivalent) process. */ 734 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { 735 zerror(zlogp, B_FALSE, 736 "unable to determine zone's init(1M) location"); 737 brand_close(bh); 738 return (-1); 739 } 740 741 brand_close(bh); 742 743 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 744 bad_boot_arg); 745 if (err == Z_INVAL) 746 eventstream_write(Z_EVT_ZONE_BADARGS); 747 else if (err != Z_OK) 748 return (-1); 749 750 assert(init_file[0] != '\0'); 751 752 /* Try to anticipate possible problems: Make sure init is executable. */ 753 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 754 zerror(zlogp, B_FALSE, "unable to determine zone root"); 755 return (-1); 756 } 757 758 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); 759 760 if (stat(initpath, &st) == -1) { 761 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 762 return (-1); 763 } 764 765 if ((st.st_mode & S_IXUSR) == 0) { 766 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 767 return (-1); 768 } 769 770 /* 771 * If there is a brand 'boot' callback, execute it now to give the 772 * brand one last chance to do any additional setup before the zone 773 * is booted. 774 */ 775 if ((strlen(cmdbuf) > EXEC_LEN) && 776 (do_subproc(zlogp, cmdbuf) != Z_OK)) { 777 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 778 return (-1); 779 } 780 781 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 782 zerror(zlogp, B_TRUE, "could not set zone boot file"); 783 return (-1); 784 } 785 786 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 787 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 788 return (-1); 789 } 790 791 if (zone_boot(zoneid) == -1) { 792 zerror(zlogp, B_TRUE, "unable to boot zone"); 793 return (-1); 794 } 795 796 return (0); 797 } 798 799 static int 800 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) 801 { 802 int err; 803 804 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { 805 if (!bringup_failure_recovery) 806 zerror(zlogp, B_FALSE, "unable to destroy zone"); 807 return (-1); 808 } 809 810 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 811 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 812 zonecfg_strerror(err)); 813 814 return (0); 815 } 816 817 /* 818 * Generate AUE_zone_state for a command that boots a zone. 819 */ 820 static void 821 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 822 char *new_state) 823 { 824 adt_session_data_t *ah; 825 adt_event_data_t *event; 826 int pass_fail, fail_reason; 827 828 if (!adt_audit_enabled()) 829 return; 830 831 if (return_val == 0) { 832 pass_fail = ADT_SUCCESS; 833 fail_reason = ADT_SUCCESS; 834 } else { 835 pass_fail = ADT_FAILURE; 836 fail_reason = ADT_FAIL_VALUE_PROGRAM; 837 } 838 839 if (adt_start_session(&ah, NULL, 0)) { 840 zerror(zlogp, B_TRUE, gettext("audit failure.")); 841 return; 842 } 843 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 844 zerror(zlogp, B_TRUE, gettext("audit failure.")); 845 (void) adt_end_session(ah); 846 return; 847 } 848 849 event = adt_alloc_event(ah, ADT_zone_state); 850 if (event == NULL) { 851 zerror(zlogp, B_TRUE, gettext("audit failure.")); 852 (void) adt_end_session(ah); 853 return; 854 } 855 event->adt_zone_state.zonename = zone_name; 856 event->adt_zone_state.new_state = new_state; 857 858 if (adt_put_event(event, pass_fail, fail_reason)) 859 zerror(zlogp, B_TRUE, gettext("audit failure.")); 860 861 adt_free_event(event); 862 863 (void) adt_end_session(ah); 864 } 865 866 /* 867 * The main routine for the door server that deals with zone state transitions. 868 */ 869 /* ARGSUSED */ 870 static void 871 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 872 uint_t n_desc) 873 { 874 ucred_t *uc = NULL; 875 const priv_set_t *eset; 876 877 zone_state_t zstate; 878 zone_cmd_t cmd; 879 zone_cmd_arg_t *zargp; 880 881 boolean_t kernelcall; 882 883 int rval = -1; 884 uint64_t uniqid; 885 zoneid_t zoneid = -1; 886 zlog_t zlog; 887 zlog_t *zlogp; 888 zone_cmd_rval_t *rvalp; 889 size_t rlen = getpagesize(); /* conservative */ 890 fs_callback_t cb; 891 brand_handle_t bh; 892 893 /* LINTED E_BAD_PTR_CAST_ALIGN */ 894 zargp = (zone_cmd_arg_t *)args; 895 896 /* 897 * When we get the door unref message, we've fdetach'd the door, and 898 * it is time for us to shut down zoneadmd. 899 */ 900 if (zargp == DOOR_UNREF_DATA) { 901 /* 902 * See comment at end of main() for info on the last rites. 903 */ 904 exit(0); 905 } 906 907 if (zargp == NULL) { 908 (void) door_return(NULL, 0, 0, 0); 909 } 910 911 rvalp = alloca(rlen); 912 bzero(rvalp, rlen); 913 zlog.logfile = NULL; 914 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 915 zlog.buf = rvalp->errbuf; 916 zlog.log = zlog.buf; 917 /* defer initialization of zlog.locale until after credential check */ 918 zlogp = &zlog; 919 920 if (alen != sizeof (zone_cmd_arg_t)) { 921 /* 922 * This really shouldn't be happening. 923 */ 924 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 925 "unexpected (expected %d bytes)", alen, 926 sizeof (zone_cmd_arg_t)); 927 goto out; 928 } 929 cmd = zargp->cmd; 930 931 if (door_ucred(&uc) != 0) { 932 zerror(&logsys, B_TRUE, "door_ucred"); 933 goto out; 934 } 935 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 936 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 937 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 938 ucred_geteuid(uc) != 0)) { 939 zerror(&logsys, B_FALSE, "insufficient privileges"); 940 goto out; 941 } 942 943 kernelcall = ucred_getpid(uc) == 0; 944 945 /* 946 * This is safe because we only use a zlog_t throughout the 947 * duration of a door call; i.e., by the time the pointer 948 * might become invalid, the door call would be over. 949 */ 950 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 951 952 (void) mutex_lock(&lock); 953 954 /* 955 * Once we start to really die off, we don't want more connections. 956 */ 957 if (in_death_throes) { 958 (void) mutex_unlock(&lock); 959 ucred_free(uc); 960 (void) door_return(NULL, 0, 0, 0); 961 thr_exit(NULL); 962 } 963 964 /* 965 * Check for validity of command. 966 */ 967 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 968 cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && 969 cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 970 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 971 goto out; 972 } 973 974 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 975 /* 976 * Can't happen 977 */ 978 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 979 cmd); 980 goto out; 981 } 982 /* 983 * We ignore the possibility of someone calling zone_create(2) 984 * explicitly; all requests must come through zoneadmd. 985 */ 986 if (zone_get_state(zone_name, &zstate) != Z_OK) { 987 /* 988 * Something terribly wrong happened 989 */ 990 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 991 goto out; 992 } 993 994 if (kernelcall) { 995 /* 996 * Kernel-initiated requests may lose their validity if the 997 * zone_t the kernel was referring to has gone away. 998 */ 999 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 1000 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 1001 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 1002 /* 1003 * We're not talking about the same zone. The request 1004 * must have arrived too late. Return error. 1005 */ 1006 rval = -1; 1007 goto out; 1008 } 1009 zlogp = &logsys; /* Log errors to syslog */ 1010 } 1011 1012 /* 1013 * If we are being asked to forcibly mount or boot a zone, we 1014 * pretend that an INCOMPLETE zone is actually INSTALLED. 1015 */ 1016 if (zstate == ZONE_STATE_INCOMPLETE && 1017 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 1018 zstate = ZONE_STATE_INSTALLED; 1019 1020 switch (zstate) { 1021 case ZONE_STATE_CONFIGURED: 1022 case ZONE_STATE_INCOMPLETE: 1023 /* 1024 * Not our area of expertise; we just print a nice message 1025 * and die off. 1026 */ 1027 zerror(zlogp, B_FALSE, 1028 "%s operation is invalid for zones in state '%s'", 1029 z_cmd_name(cmd), zone_state_str(zstate)); 1030 break; 1031 1032 case ZONE_STATE_INSTALLED: 1033 switch (cmd) { 1034 case Z_READY: 1035 rval = zone_ready(zlogp, Z_MNT_BOOT); 1036 if (rval == 0) 1037 eventstream_write(Z_EVT_ZONE_READIED); 1038 break; 1039 case Z_BOOT: 1040 case Z_FORCEBOOT: 1041 eventstream_write(Z_EVT_ZONE_BOOTING); 1042 if ((rval = zone_ready(zlogp, Z_MNT_BOOT)) == 0) 1043 rval = zone_bootup(zlogp, zargp->bootbuf); 1044 audit_put_record(zlogp, uc, rval, "boot"); 1045 if (rval != 0) { 1046 bringup_failure_recovery = B_TRUE; 1047 (void) zone_halt(zlogp, B_FALSE, B_FALSE); 1048 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1049 } 1050 break; 1051 case Z_HALT: 1052 if (kernelcall) /* Invalid; can't happen */ 1053 abort(); 1054 /* 1055 * We could have two clients racing to halt this 1056 * zone; the second client loses, but his request 1057 * doesn't fail, since the zone is now in the desired 1058 * state. 1059 */ 1060 zerror(zlogp, B_FALSE, "zone is already halted"); 1061 rval = 0; 1062 break; 1063 case Z_REBOOT: 1064 if (kernelcall) /* Invalid; can't happen */ 1065 abort(); 1066 zerror(zlogp, B_FALSE, "%s operation is invalid " 1067 "for zones in state '%s'", z_cmd_name(cmd), 1068 zone_state_str(zstate)); 1069 rval = -1; 1070 break; 1071 case Z_NOTE_UNINSTALLING: 1072 if (kernelcall) /* Invalid; can't happen */ 1073 abort(); 1074 /* 1075 * Tell the console to print out a message about this. 1076 * Once it does, we will be in_death_throes. 1077 */ 1078 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1079 break; 1080 case Z_MOUNT: 1081 case Z_FORCEMOUNT: 1082 if (kernelcall) /* Invalid; can't happen */ 1083 abort(); 1084 if (!zone_isnative && !zone_iscluster) { 1085 zerror(zlogp, B_FALSE, 1086 "%s operation is invalid for branded " 1087 "zones", z_cmd_name(cmd)); 1088 rval = -1; 1089 break; 1090 } 1091 1092 rval = zone_ready(zlogp, 1093 strcmp(zargp->bootbuf, "-U") == 0 ? 1094 Z_MNT_UPDATE : Z_MNT_SCRATCH); 1095 if (rval != 0) 1096 break; 1097 1098 eventstream_write(Z_EVT_ZONE_READIED); 1099 1100 /* Get a handle to the brand info for this zone */ 1101 if ((bh = brand_open(brand_name)) == NULL) { 1102 rval = -1; 1103 break; 1104 } 1105 1106 /* 1107 * Get the list of filesystems to mount from 1108 * the brand configuration. These mounts are done 1109 * via a thread that will enter the zone, so they 1110 * are done from within the context of the zone. 1111 */ 1112 cb.zlogp = zlogp; 1113 cb.zoneid = zone_id; 1114 cb.mount_cmd = B_TRUE; 1115 rval = brand_platform_iter_mounts(bh, 1116 mount_early_fs, &cb); 1117 1118 brand_close(bh); 1119 1120 /* 1121 * Ordinarily, /dev/fd would be mounted inside the zone 1122 * by svc:/system/filesystem/usr:default, but since 1123 * we're not booting the zone, we need to do this 1124 * manually. 1125 */ 1126 if (rval == 0) 1127 rval = mount_early_fs(&cb, 1128 "fd", "/dev/fd", "fd", NULL); 1129 break; 1130 case Z_UNMOUNT: 1131 if (kernelcall) /* Invalid; can't happen */ 1132 abort(); 1133 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1134 rval = 0; 1135 break; 1136 } 1137 break; 1138 1139 case ZONE_STATE_READY: 1140 switch (cmd) { 1141 case Z_READY: 1142 /* 1143 * We could have two clients racing to ready this 1144 * zone; the second client loses, but his request 1145 * doesn't fail, since the zone is now in the desired 1146 * state. 1147 */ 1148 zerror(zlogp, B_FALSE, "zone is already ready"); 1149 rval = 0; 1150 break; 1151 case Z_BOOT: 1152 (void) strlcpy(boot_args, zargp->bootbuf, 1153 sizeof (boot_args)); 1154 eventstream_write(Z_EVT_ZONE_BOOTING); 1155 rval = zone_bootup(zlogp, zargp->bootbuf); 1156 audit_put_record(zlogp, uc, rval, "boot"); 1157 if (rval != 0) { 1158 bringup_failure_recovery = B_TRUE; 1159 (void) zone_halt(zlogp, B_FALSE, B_TRUE); 1160 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1161 } 1162 boot_args[0] = '\0'; 1163 break; 1164 case Z_HALT: 1165 if (kernelcall) /* Invalid; can't happen */ 1166 abort(); 1167 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE)) != 0) 1168 break; 1169 eventstream_write(Z_EVT_ZONE_HALTED); 1170 break; 1171 case Z_REBOOT: 1172 case Z_NOTE_UNINSTALLING: 1173 case Z_MOUNT: 1174 case Z_UNMOUNT: 1175 if (kernelcall) /* Invalid; can't happen */ 1176 abort(); 1177 zerror(zlogp, B_FALSE, "%s operation is invalid " 1178 "for zones in state '%s'", z_cmd_name(cmd), 1179 zone_state_str(zstate)); 1180 rval = -1; 1181 break; 1182 } 1183 break; 1184 1185 case ZONE_STATE_MOUNTED: 1186 switch (cmd) { 1187 case Z_UNMOUNT: 1188 if (kernelcall) /* Invalid; can't happen */ 1189 abort(); 1190 rval = zone_halt(zlogp, B_TRUE, B_FALSE); 1191 if (rval == 0) { 1192 eventstream_write(Z_EVT_ZONE_HALTED); 1193 (void) sema_post(&scratch_sem); 1194 } 1195 break; 1196 default: 1197 if (kernelcall) /* Invalid; can't happen */ 1198 abort(); 1199 zerror(zlogp, B_FALSE, "%s operation is invalid " 1200 "for zones in state '%s'", z_cmd_name(cmd), 1201 zone_state_str(zstate)); 1202 rval = -1; 1203 break; 1204 } 1205 break; 1206 1207 case ZONE_STATE_RUNNING: 1208 case ZONE_STATE_SHUTTING_DOWN: 1209 case ZONE_STATE_DOWN: 1210 switch (cmd) { 1211 case Z_READY: 1212 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE)) != 0) 1213 break; 1214 if ((rval = zone_ready(zlogp, Z_MNT_BOOT)) == 0) 1215 eventstream_write(Z_EVT_ZONE_READIED); 1216 else 1217 eventstream_write(Z_EVT_ZONE_HALTED); 1218 break; 1219 case Z_BOOT: 1220 /* 1221 * We could have two clients racing to boot this 1222 * zone; the second client loses, but his request 1223 * doesn't fail, since the zone is now in the desired 1224 * state. 1225 */ 1226 zerror(zlogp, B_FALSE, "zone is already booted"); 1227 rval = 0; 1228 break; 1229 case Z_HALT: 1230 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE)) != 0) 1231 break; 1232 eventstream_write(Z_EVT_ZONE_HALTED); 1233 break; 1234 case Z_REBOOT: 1235 (void) strlcpy(boot_args, zargp->bootbuf, 1236 sizeof (boot_args)); 1237 eventstream_write(Z_EVT_ZONE_REBOOTING); 1238 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE)) != 0) { 1239 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1240 boot_args[0] = '\0'; 1241 break; 1242 } 1243 if ((rval = zone_ready(zlogp, Z_MNT_BOOT)) != 0) { 1244 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1245 boot_args[0] = '\0'; 1246 break; 1247 } 1248 rval = zone_bootup(zlogp, zargp->bootbuf); 1249 audit_put_record(zlogp, uc, rval, "reboot"); 1250 if (rval != 0) { 1251 (void) zone_halt(zlogp, B_FALSE, B_TRUE); 1252 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1253 } 1254 boot_args[0] = '\0'; 1255 break; 1256 case Z_NOTE_UNINSTALLING: 1257 case Z_MOUNT: 1258 case Z_UNMOUNT: 1259 zerror(zlogp, B_FALSE, "%s operation is invalid " 1260 "for zones in state '%s'", z_cmd_name(cmd), 1261 zone_state_str(zstate)); 1262 rval = -1; 1263 break; 1264 } 1265 break; 1266 default: 1267 abort(); 1268 } 1269 1270 /* 1271 * Because the state of the zone may have changed, we make sure 1272 * to wake the console poller, which is in charge of initiating 1273 * the shutdown procedure as necessary. 1274 */ 1275 eventstream_write(Z_EVT_NULL); 1276 1277 out: 1278 (void) mutex_unlock(&lock); 1279 if (kernelcall) { 1280 rvalp = NULL; 1281 rlen = 0; 1282 } else { 1283 rvalp->rval = rval; 1284 } 1285 if (uc != NULL) 1286 ucred_free(uc); 1287 (void) door_return((char *)rvalp, rlen, NULL, 0); 1288 thr_exit(NULL); 1289 } 1290 1291 static int 1292 setup_door(zlog_t *zlogp) 1293 { 1294 if ((zone_door = door_create(server, NULL, 1295 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1296 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1297 return (-1); 1298 } 1299 (void) fdetach(zone_door_path); 1300 1301 if (fattach(zone_door, zone_door_path) != 0) { 1302 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1303 (void) door_revoke(zone_door); 1304 (void) fdetach(zone_door_path); 1305 zone_door = -1; 1306 return (-1); 1307 } 1308 return (0); 1309 } 1310 1311 /* 1312 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1313 * is where zoneadmd itself will check to see that another instance of 1314 * zoneadmd isn't already controlling this zone. 1315 * 1316 * The idea here is that we want to open the path to which we will 1317 * attach our door, lock it, and then make sure that no-one has beat us 1318 * to fattach(3c)ing onto it. 1319 * 1320 * fattach(3c) is really a mount, so there are actually two possible 1321 * vnodes we could be dealing with. Our strategy is as follows: 1322 * 1323 * - If the file we opened is a regular file (common case): 1324 * There is no fattach(3c)ed door, so we have a chance of becoming 1325 * the managing zoneadmd. We attempt to lock the file: if it is 1326 * already locked, that means someone else raced us here, so we 1327 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1328 * that beat us to it. 1329 * 1330 * - If the file we opened is a namefs file: 1331 * This means there is already an established door fattach(3c)'ed 1332 * to the rendezvous path. We've lost the race, so we give up. 1333 * Note that in this case we also try to grab the file lock, and 1334 * will succeed in acquiring it since the vnode locked by the 1335 * "winning" zoneadmd was a regular one, and the one we locked was 1336 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1337 * we just return to zoneadm(1m) which knows to retry. 1338 */ 1339 static int 1340 make_daemon_exclusive(zlog_t *zlogp) 1341 { 1342 int doorfd = -1; 1343 int err, ret = -1; 1344 struct stat st; 1345 struct flock flock; 1346 zone_state_t zstate; 1347 1348 top: 1349 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1350 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1351 zonecfg_strerror(err)); 1352 goto out; 1353 } 1354 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1355 S_IREAD|S_IWRITE)) < 0) { 1356 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1357 goto out; 1358 } 1359 if (fstat(doorfd, &st) < 0) { 1360 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1361 goto out; 1362 } 1363 /* 1364 * Lock the file to synchronize with other zoneadmd 1365 */ 1366 flock.l_type = F_WRLCK; 1367 flock.l_whence = SEEK_SET; 1368 flock.l_start = (off_t)0; 1369 flock.l_len = (off_t)0; 1370 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1371 /* 1372 * Someone else raced us here and grabbed the lock file 1373 * first. A warning here is inappropriate since nothing 1374 * went wrong. 1375 */ 1376 goto out; 1377 } 1378 1379 if (strcmp(st.st_fstype, "namefs") == 0) { 1380 struct door_info info; 1381 1382 /* 1383 * There is already something fattach()'ed to this file. 1384 * Lets see what the door is up to. 1385 */ 1386 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1387 /* 1388 * Another zoneadmd process seems to be in 1389 * control of the situation and we don't need to 1390 * be here. A warning here is inappropriate 1391 * since nothing went wrong. 1392 * 1393 * If the door has been revoked, the zoneadmd 1394 * process currently managing the zone is going 1395 * away. We'll return control to zoneadm(1m) 1396 * which will try again (by which time zoneadmd 1397 * will hopefully have exited). 1398 */ 1399 goto out; 1400 } 1401 1402 /* 1403 * If we got this far, there's a fattach(3c)'ed door 1404 * that belongs to a process that has exited, which can 1405 * happen if the previous zoneadmd died unexpectedly. 1406 * 1407 * Let user know that something is amiss, but that we can 1408 * recover; if the zone is in the installed state, then don't 1409 * message, since having a running zoneadmd isn't really 1410 * expected/needed. We want to keep occurences of this message 1411 * limited to times when zoneadmd is picking back up from a 1412 * zoneadmd that died while the zone was in some non-trivial 1413 * state. 1414 */ 1415 if (zstate > ZONE_STATE_INSTALLED) { 1416 zerror(zlogp, B_FALSE, 1417 "zone '%s': WARNING: zone is in state '%s', but " 1418 "zoneadmd does not appear to be available; " 1419 "restarted zoneadmd to recover.", 1420 zone_name, zone_state_str(zstate)); 1421 } 1422 1423 (void) fdetach(zone_door_path); 1424 (void) close(doorfd); 1425 goto top; 1426 } 1427 ret = 0; 1428 out: 1429 (void) close(doorfd); 1430 return (ret); 1431 } 1432 1433 int 1434 main(int argc, char *argv[]) 1435 { 1436 int opt; 1437 zoneid_t zid; 1438 priv_set_t *privset; 1439 zone_state_t zstate; 1440 char parents_locale[MAXPATHLEN]; 1441 brand_handle_t bh; 1442 int err; 1443 1444 pid_t pid; 1445 sigset_t blockset; 1446 sigset_t block_cld; 1447 1448 struct { 1449 sema_t sem; 1450 int status; 1451 zlog_t log; 1452 } *shstate; 1453 size_t shstatelen = getpagesize(); 1454 1455 zlog_t errlog; 1456 zlog_t *zlogp; 1457 1458 int ctfd; 1459 1460 progname = get_execbasename(argv[0]); 1461 1462 /* 1463 * Make sure stderr is unbuffered 1464 */ 1465 (void) setbuffer(stderr, NULL, 0); 1466 1467 /* 1468 * Get out of the way of mounted filesystems, since we will daemonize 1469 * soon. 1470 */ 1471 (void) chdir("/"); 1472 1473 /* 1474 * Use the default system umask per PSARC 1998/110 rather than 1475 * anything that may have been set by the caller. 1476 */ 1477 (void) umask(CMASK); 1478 1479 /* 1480 * Initially we want to use our parent's locale. 1481 */ 1482 (void) setlocale(LC_ALL, ""); 1483 (void) textdomain(TEXT_DOMAIN); 1484 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1485 sizeof (parents_locale)); 1486 1487 /* 1488 * This zlog_t is used for writing to stderr 1489 */ 1490 errlog.logfile = stderr; 1491 errlog.buflen = errlog.loglen = 0; 1492 errlog.buf = errlog.log = NULL; 1493 errlog.locale = parents_locale; 1494 1495 /* 1496 * We start off writing to stderr until we're ready to daemonize. 1497 */ 1498 zlogp = &errlog; 1499 1500 /* 1501 * Process options. 1502 */ 1503 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1504 switch (opt) { 1505 case 'R': 1506 zonecfg_set_root(optarg); 1507 break; 1508 case 'z': 1509 zone_name = optarg; 1510 break; 1511 default: 1512 usage(); 1513 } 1514 } 1515 1516 if (zone_name == NULL) 1517 usage(); 1518 1519 /* 1520 * Because usage() prints directly to stderr, it has gettext() 1521 * wrapping, which depends on the locale. But since zerror() calls 1522 * localize() which tweaks the locale, it is not safe to call zerror() 1523 * until after the last call to usage(). Fortunately, the last call 1524 * to usage() is just above and the first call to zerror() is just 1525 * below. Don't mess this up. 1526 */ 1527 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1528 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1529 GLOBAL_ZONENAME); 1530 return (1); 1531 } 1532 1533 if (zone_get_id(zone_name, &zid) != 0) { 1534 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1535 zonecfg_strerror(Z_NO_ZONE)); 1536 return (1); 1537 } 1538 1539 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1540 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1541 zonecfg_strerror(err)); 1542 return (1); 1543 } 1544 if (zstate < ZONE_STATE_INCOMPLETE) { 1545 zerror(zlogp, B_FALSE, 1546 "cannot manage a zone which is in state '%s'", 1547 zone_state_str(zstate)); 1548 return (1); 1549 } 1550 1551 /* Get a handle to the brand info for this zone */ 1552 if ((zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 1553 != Z_OK) || (bh = brand_open(brand_name)) == NULL) { 1554 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1555 return (1); 1556 } 1557 zone_isnative = brand_is_native(bh); 1558 zone_iscluster = (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0); 1559 brand_close(bh); 1560 1561 /* 1562 * Check that we have all privileges. It would be nice to pare 1563 * this down, but this is at least a first cut. 1564 */ 1565 if ((privset = priv_allocset()) == NULL) { 1566 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1567 return (1); 1568 } 1569 1570 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1571 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1572 priv_freeset(privset); 1573 return (1); 1574 } 1575 1576 if (priv_isfullset(privset) == B_FALSE) { 1577 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1578 "run this command (all privs required)"); 1579 priv_freeset(privset); 1580 return (1); 1581 } 1582 priv_freeset(privset); 1583 1584 if (mkzonedir(zlogp) != 0) 1585 return (1); 1586 1587 /* 1588 * Pre-fork: setup shared state 1589 */ 1590 if ((shstate = (void *)mmap(NULL, shstatelen, 1591 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1592 MAP_FAILED) { 1593 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1594 return (1); 1595 } 1596 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1597 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1598 (void) munmap((char *)shstate, shstatelen); 1599 return (1); 1600 } 1601 shstate->log.logfile = NULL; 1602 shstate->log.buflen = shstatelen - sizeof (*shstate); 1603 shstate->log.loglen = shstate->log.buflen; 1604 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1605 shstate->log.log = shstate->log.buf; 1606 shstate->log.locale = parents_locale; 1607 shstate->status = -1; 1608 1609 /* 1610 * We need a SIGCHLD handler so the sema_wait() below will wake 1611 * up if the child dies without doing a sema_post(). 1612 */ 1613 (void) sigset(SIGCHLD, sigchld); 1614 /* 1615 * We must mask SIGCHLD until after we've coped with the fork 1616 * sufficiently to deal with it; otherwise we can race and 1617 * receive the signal before pid has been initialized 1618 * (yes, this really happens). 1619 */ 1620 (void) sigemptyset(&block_cld); 1621 (void) sigaddset(&block_cld, SIGCHLD); 1622 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1623 1624 if ((ctfd = init_template()) == -1) { 1625 zerror(zlogp, B_TRUE, "failed to create contract"); 1626 return (1); 1627 } 1628 1629 /* 1630 * Do not let another thread localize a message while we are forking. 1631 */ 1632 (void) mutex_lock(&msglock); 1633 pid = fork(); 1634 (void) mutex_unlock(&msglock); 1635 1636 /* 1637 * In all cases (parent, child, and in the event of an error) we 1638 * don't want to cause creation of contracts on subsequent fork()s. 1639 */ 1640 (void) ct_tmpl_clear(ctfd); 1641 (void) close(ctfd); 1642 1643 if (pid == -1) { 1644 zerror(zlogp, B_TRUE, "could not fork"); 1645 return (1); 1646 1647 } else if (pid > 0) { /* parent */ 1648 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1649 /* 1650 * This marks a window of vulnerability in which we receive 1651 * the SIGCLD before falling into sema_wait (normally we would 1652 * get woken up from sema_wait with EINTR upon receipt of 1653 * SIGCLD). So we may need to use some other scheme like 1654 * sema_posting in the sigcld handler. 1655 * blech 1656 */ 1657 (void) sema_wait(&shstate->sem); 1658 (void) sema_destroy(&shstate->sem); 1659 if (shstate->status != 0) 1660 (void) waitpid(pid, NULL, WNOHANG); 1661 /* 1662 * It's ok if we die with SIGPIPE. It's not like we could have 1663 * done anything about it. 1664 */ 1665 (void) fprintf(stderr, "%s", shstate->log.buf); 1666 _exit(shstate->status == 0 ? 0 : 1); 1667 } 1668 1669 /* 1670 * The child charges on. 1671 */ 1672 (void) sigset(SIGCHLD, SIG_DFL); 1673 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1674 1675 /* 1676 * SIGPIPE can be delivered if we write to a socket for which the 1677 * peer endpoint is gone. That can lead to too-early termination 1678 * of zoneadmd, and that's not good eats. 1679 */ 1680 (void) sigset(SIGPIPE, SIG_IGN); 1681 /* 1682 * Stop using stderr 1683 */ 1684 zlogp = &shstate->log; 1685 1686 /* 1687 * We don't need stdout/stderr from now on. 1688 */ 1689 closefrom(0); 1690 1691 /* 1692 * Initialize the syslog zlog_t. This needs to be done after 1693 * the call to closefrom(). 1694 */ 1695 logsys.buf = logsys.log = NULL; 1696 logsys.buflen = logsys.loglen = 0; 1697 logsys.logfile = NULL; 1698 logsys.locale = DEFAULT_LOCALE; 1699 1700 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1701 1702 /* 1703 * The eventstream is used to publish state changes in the zone 1704 * from the door threads to the console I/O poller. 1705 */ 1706 if (eventstream_init() == -1) { 1707 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1708 goto child_out; 1709 } 1710 1711 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1712 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 1713 1714 /* 1715 * See if another zoneadmd is running for this zone. If not, then we 1716 * can now modify system state. 1717 */ 1718 if (make_daemon_exclusive(zlogp) == -1) 1719 goto child_out; 1720 1721 1722 /* 1723 * Create/join a new session; we need to be careful of what we do with 1724 * the console from now on so we don't end up being the session leader 1725 * for the terminal we're going to be handing out. 1726 */ 1727 (void) setsid(); 1728 1729 /* 1730 * This thread shouldn't be receiving any signals; in particular, 1731 * SIGCHLD should be received by the thread doing the fork(). 1732 */ 1733 (void) sigfillset(&blockset); 1734 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1735 1736 /* 1737 * Setup the console device and get ready to serve the console; 1738 * once this has completed, we're ready to let console clients 1739 * make an attempt to connect (they will block until 1740 * serve_console_sock() below gets called, and any pending 1741 * connection is accept()ed). 1742 */ 1743 if (!zonecfg_in_alt_root() && init_console(zlogp) == -1) 1744 goto child_out; 1745 1746 /* 1747 * Take the lock now, so that when the door server gets going, we 1748 * are guaranteed that it won't take a request until we are sure 1749 * that everything is completely set up. See the child_out: label 1750 * below to see why this matters. 1751 */ 1752 (void) mutex_lock(&lock); 1753 1754 /* Init semaphore for scratch zones. */ 1755 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 1756 zerror(zlogp, B_TRUE, 1757 "failed to initialize semaphore for scratch zone"); 1758 goto child_out; 1759 } 1760 1761 /* 1762 * Note: door setup must occur *after* the console is setup. 1763 * This is so that as zlogin tests the door to see if zoneadmd 1764 * is ready yet, we know that the console will get serviced 1765 * once door_info() indicates that the door is "up". 1766 */ 1767 if (setup_door(zlogp) == -1) 1768 goto child_out; 1769 1770 /* 1771 * Things seem OK so far; tell the parent process that we're done 1772 * with setup tasks. This will cause the parent to exit, signalling 1773 * to zoneadm, zlogin, or whatever forked it that we are ready to 1774 * service requests. 1775 */ 1776 shstate->status = 0; 1777 (void) sema_post(&shstate->sem); 1778 (void) munmap((char *)shstate, shstatelen); 1779 shstate = NULL; 1780 1781 (void) mutex_unlock(&lock); 1782 1783 /* 1784 * zlogp is now invalid, so reset it to the syslog logger. 1785 */ 1786 zlogp = &logsys; 1787 1788 /* 1789 * Now that we are free of any parents, switch to the default locale. 1790 */ 1791 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 1792 1793 /* 1794 * At this point the setup portion of main() is basically done, so 1795 * we reuse this thread to manage the zone console. When 1796 * serve_console() has returned, we are past the point of no return 1797 * in the life of this zoneadmd. 1798 */ 1799 if (zonecfg_in_alt_root()) { 1800 /* 1801 * This is just awful, but mounted scratch zones don't (and 1802 * can't) have consoles. We just wait for unmount instead. 1803 */ 1804 while (sema_wait(&scratch_sem) == EINTR) 1805 ; 1806 } else { 1807 serve_console(zlogp); 1808 assert(in_death_throes); 1809 } 1810 1811 /* 1812 * This is the next-to-last part of the exit interlock. Upon calling 1813 * fdetach(), the door will go unreferenced; once any 1814 * outstanding requests (like the door thread doing Z_HALT) are 1815 * done, the door will get an UNREF notification; when it handles 1816 * the UNREF, the door server will cause the exit. 1817 */ 1818 assert(!MUTEX_HELD(&lock)); 1819 (void) fdetach(zone_door_path); 1820 for (;;) 1821 (void) pause(); 1822 1823 child_out: 1824 assert(pid == 0); 1825 if (shstate != NULL) { 1826 shstate->status = -1; 1827 (void) sema_post(&shstate->sem); 1828 (void) munmap((char *)shstate, shstatelen); 1829 } 1830 1831 /* 1832 * This might trigger an unref notification, but if so, 1833 * we are still holding the lock, so our call to exit will 1834 * ultimately win the race and will publish the right exit 1835 * code. 1836 */ 1837 if (zone_door != -1) { 1838 assert(MUTEX_HELD(&lock)); 1839 (void) door_revoke(zone_door); 1840 (void) fdetach(zone_door_path); 1841 } 1842 return (1); /* return from main() forcibly exits an MT process */ 1843 } 1844