1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * zoneadmd manages zones; one zoneadmd process is launched for each 29 * non-global zone on the system. This daemon juggles four jobs: 30 * 31 * - Implement setup and teardown of the zone "virtual platform": mount and 32 * unmount filesystems; create and destroy network interfaces; communicate 33 * with devfsadmd to lay out devices for the zone; instantiate the zone 34 * console device; configure process runtime attributes such as resource 35 * controls, pool bindings, fine-grained privileges. 36 * 37 * - Launch the zone's init(1M) process. 38 * 39 * - Implement a door server; clients (like zoneadm) connect to the door 40 * server and request zone state changes. The kernel is also a client of 41 * this door server. A request to halt or reboot the zone which originates 42 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 43 * 44 * One minor problem is that messages emitted by zoneadmd need to be passed 45 * back to the zoneadm process making the request. These messages need to 46 * be rendered in the client's locale; so, this is passed in as part of the 47 * request. The exception is the kernel upcall to zoneadmd, in which case 48 * messages are syslog'd. 49 * 50 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 51 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 52 * strings which do not need to be translated. 53 * 54 * - Act as a console server for zlogin -C processes; see comments in zcons.c 55 * for more information about the zone console architecture. 56 * 57 * DESIGN NOTES 58 * 59 * Restart: 60 * A chief design constraint of zoneadmd is that it should be restartable in 61 * the case that the administrator kills it off, or it suffers a fatal error, 62 * without the running zone being impacted; this is akin to being able to 63 * reboot the service processor of a server without affecting the OS instance. 64 */ 65 66 #include <sys/param.h> 67 #include <sys/mman.h> 68 #include <sys/types.h> 69 #include <sys/stat.h> 70 #include <sys/sysmacros.h> 71 72 #include <bsm/adt.h> 73 #include <bsm/adt_event.h> 74 75 #include <alloca.h> 76 #include <assert.h> 77 #include <errno.h> 78 #include <door.h> 79 #include <fcntl.h> 80 #include <locale.h> 81 #include <signal.h> 82 #include <stdarg.h> 83 #include <stdio.h> 84 #include <stdlib.h> 85 #include <string.h> 86 #include <strings.h> 87 #include <synch.h> 88 #include <syslog.h> 89 #include <thread.h> 90 #include <unistd.h> 91 #include <wait.h> 92 #include <limits.h> 93 #include <zone.h> 94 #include <libbrand.h> 95 #include <sys/brand.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <sys/contract/process.h> 99 #include <sys/ctfs.h> 100 101 #include <libzonecfg.h> 102 #include "zoneadmd.h" 103 104 static char *progname; 105 char *zone_name; /* zone which we are managing */ 106 char brand_name[MAXNAMELEN]; 107 boolean_t zone_isnative; 108 boolean_t zone_iscluster; 109 boolean_t zone_islabeled; 110 static zoneid_t zone_id; 111 dladm_handle_t dld_handle = NULL; 112 113 static char pre_statechg_hook[2 * MAXPATHLEN]; 114 static char post_statechg_hook[2 * MAXPATHLEN]; 115 char query_hook[2 * MAXPATHLEN]; 116 117 zlog_t logsys; 118 119 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 120 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 121 122 static sema_t scratch_sem; /* for scratch zones */ 123 124 static char zone_door_path[MAXPATHLEN]; 125 static int zone_door = -1; 126 127 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 128 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 129 130 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 131 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 132 #endif 133 134 #define DEFAULT_LOCALE "C" 135 136 static const char * 137 z_cmd_name(zone_cmd_t zcmd) 138 { 139 /* This list needs to match the enum in sys/zone.h */ 140 static const char *zcmdstr[] = { 141 "ready", "boot", "forceboot", "reboot", "halt", 142 "note_uninstalling", "mount", "forcemount", "unmount" 143 }; 144 145 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 146 return ("unknown"); 147 else 148 return (zcmdstr[(int)zcmd]); 149 } 150 151 static char * 152 get_execbasename(char *execfullname) 153 { 154 char *last_slash, *execbasename; 155 156 /* guard against '/' at end of command invocation */ 157 for (;;) { 158 last_slash = strrchr(execfullname, '/'); 159 if (last_slash == NULL) { 160 execbasename = execfullname; 161 break; 162 } else { 163 execbasename = last_slash + 1; 164 if (*execbasename == '\0') { 165 *last_slash = '\0'; 166 continue; 167 } 168 break; 169 } 170 } 171 return (execbasename); 172 } 173 174 static void 175 usage(void) 176 { 177 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 178 (void) fprintf(stderr, 179 gettext("\tNote: %s should not be run directly.\n"), progname); 180 exit(2); 181 } 182 183 /* ARGSUSED */ 184 static void 185 sigchld(int sig) 186 { 187 } 188 189 char * 190 localize_msg(char *locale, const char *msg) 191 { 192 char *out; 193 194 (void) mutex_lock(&msglock); 195 (void) setlocale(LC_MESSAGES, locale); 196 out = gettext(msg); 197 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 198 (void) mutex_unlock(&msglock); 199 return (out); 200 } 201 202 /* PRINTFLIKE3 */ 203 void 204 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 205 { 206 va_list alist; 207 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 208 char *bp; 209 int saved_errno = errno; 210 211 if (zlogp == NULL) 212 return; 213 if (zlogp == &logsys) 214 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 215 zone_name); 216 else 217 buf[0] = '\0'; 218 bp = &(buf[strlen(buf)]); 219 220 /* 221 * In theory, the locale pointer should be set to either "C" or a 222 * char array, so it should never be NULL 223 */ 224 assert(zlogp->locale != NULL); 225 /* Locale is per process, but we are multi-threaded... */ 226 fmt = localize_msg(zlogp->locale, fmt); 227 228 va_start(alist, fmt); 229 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 230 va_end(alist); 231 bp = &(buf[strlen(buf)]); 232 if (use_strerror) 233 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 234 strerror(saved_errno)); 235 if (zlogp == &logsys) { 236 (void) syslog(LOG_ERR, "%s", buf); 237 } else if (zlogp->logfile != NULL) { 238 (void) fprintf(zlogp->logfile, "%s\n", buf); 239 } else { 240 size_t buflen; 241 size_t copylen; 242 243 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 244 copylen = MIN(buflen, zlogp->loglen); 245 zlogp->log += copylen; 246 zlogp->loglen -= copylen; 247 } 248 } 249 250 /* 251 * Emit a warning for any boot arguments which are unrecognized. Since 252 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 253 * put the arguments into an argv style array, use getopt to process them, 254 * and put the resultant argument string back into outargs. 255 * 256 * During the filtering, we pull out any arguments which are truly "boot" 257 * arguments, leaving only those which are to be passed intact to the 258 * progenitor process. The one we support at the moment is -i, which 259 * indicates to the kernel which program should be launched as 'init'. 260 * 261 * A return of Z_INVAL indicates specifically that the arguments are 262 * not valid; this is a non-fatal error. Except for Z_OK, all other return 263 * values are treated as fatal. 264 */ 265 static int 266 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 267 char *init_file, char *badarg) 268 { 269 int argc = 0, argc_save; 270 int i; 271 int err; 272 char *arg, *lasts, **argv = NULL, **argv_save; 273 char zonecfg_args[BOOTARGS_MAX]; 274 char scratchargs[BOOTARGS_MAX], *sargs; 275 char c; 276 277 bzero(outargs, BOOTARGS_MAX); 278 bzero(badarg, BOOTARGS_MAX); 279 280 /* 281 * If the user didn't specify transient boot arguments, check 282 * to see if there were any specified in the zone configuration, 283 * and use them if applicable. 284 */ 285 if (inargs == NULL || inargs[0] == '\0') { 286 zone_dochandle_t handle; 287 if ((handle = zonecfg_init_handle()) == NULL) { 288 zerror(zlogp, B_TRUE, 289 "getting zone configuration handle"); 290 return (Z_BAD_HANDLE); 291 } 292 err = zonecfg_get_snapshot_handle(zone_name, handle); 293 if (err != Z_OK) { 294 zerror(zlogp, B_FALSE, 295 "invalid configuration snapshot"); 296 zonecfg_fini_handle(handle); 297 return (Z_BAD_HANDLE); 298 } 299 300 bzero(zonecfg_args, sizeof (zonecfg_args)); 301 (void) zonecfg_get_bootargs(handle, zonecfg_args, 302 sizeof (zonecfg_args)); 303 inargs = zonecfg_args; 304 zonecfg_fini_handle(handle); 305 } 306 307 if (strlen(inargs) >= BOOTARGS_MAX) { 308 zerror(zlogp, B_FALSE, "boot argument string too long"); 309 return (Z_INVAL); 310 } 311 312 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 313 sargs = scratchargs; 314 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 315 sargs = NULL; 316 argc++; 317 } 318 319 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 320 zerror(zlogp, B_FALSE, "memory allocation failed"); 321 return (Z_NOMEM); 322 } 323 324 argv_save = argv; 325 argc_save = argc; 326 327 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 328 sargs = scratchargs; 329 i = 0; 330 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 331 sargs = NULL; 332 if ((argv[i] = strdup(arg)) == NULL) { 333 err = Z_NOMEM; 334 zerror(zlogp, B_FALSE, "memory allocation failed"); 335 goto done; 336 } 337 i++; 338 } 339 340 /* 341 * We preserve compatibility with the Solaris system boot behavior, 342 * which allows: 343 * 344 * # reboot kernel/unix -s -m verbose 345 * 346 * In this example, kernel/unix tells the booter what file to 347 * boot. We don't want reboot in a zone to be gratuitously different, 348 * so we silently ignore the boot file, if necessary. 349 */ 350 if (argv[0] == NULL) 351 goto done; 352 353 assert(argv[0][0] != ' '); 354 assert(argv[0][0] != '\t'); 355 356 if (argv[0][0] != '-' && argv[0][0] != '\0') { 357 argv = &argv[1]; 358 argc--; 359 } 360 361 optind = 0; 362 opterr = 0; 363 err = Z_OK; 364 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 365 switch (c) { 366 case 'i': 367 /* 368 * -i is handled by the runtime and is not passed 369 * along to userland 370 */ 371 (void) strlcpy(init_file, optarg, MAXPATHLEN); 372 break; 373 case 'f': 374 /* This has already been processed by zoneadm */ 375 break; 376 case 'm': 377 case 's': 378 /* These pass through unmolested */ 379 (void) snprintf(outargs, BOOTARGS_MAX, 380 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 381 break; 382 case '?': 383 /* 384 * We warn about unknown arguments but pass them 385 * along anyway-- if someone wants to develop their 386 * own init replacement, they can pass it whatever 387 * args they want. 388 */ 389 err = Z_INVAL; 390 (void) snprintf(outargs, BOOTARGS_MAX, 391 "%s -%c", outargs, optopt); 392 (void) snprintf(badarg, BOOTARGS_MAX, 393 "%s -%c", badarg, optopt); 394 break; 395 } 396 } 397 398 /* 399 * For Solaris Zones we warn about and discard non-option arguments. 400 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 401 * to the kernel, we concat up all the other remaining boot args. 402 * and warn on them as a group. 403 */ 404 if (optind < argc) { 405 err = Z_INVAL; 406 while (optind < argc) { 407 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 408 badarg, strlen(badarg) > 0 ? " " : "", 409 argv[optind]); 410 optind++; 411 } 412 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 413 "arguments `%s'.", badarg); 414 } 415 416 done: 417 for (i = 0; i < argc_save; i++) { 418 if (argv_save[i] != NULL) 419 free(argv_save[i]); 420 } 421 free(argv_save); 422 return (err); 423 } 424 425 426 static int 427 mkzonedir(zlog_t *zlogp) 428 { 429 struct stat st; 430 /* 431 * We must create and lock everyone but root out of ZONES_TMPDIR 432 * since anyone can open any UNIX domain socket, regardless of 433 * its file system permissions. Sigh... 434 */ 435 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 436 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 437 return (-1); 438 } 439 /* paranoia */ 440 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 441 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 442 return (-1); 443 } 444 (void) chmod(ZONES_TMPDIR, S_IRWXU); 445 return (0); 446 } 447 448 /* 449 * Run the brand's pre-state change callback, if it exists. 450 */ 451 static int 452 brand_prestatechg(zlog_t *zlogp, int state, int cmd) 453 { 454 char cmdbuf[2 * MAXPATHLEN]; 455 456 if (pre_statechg_hook[0] == '\0') 457 return (0); 458 459 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d", pre_statechg_hook, 460 state, cmd) > sizeof (cmdbuf)) 461 return (-1); 462 463 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 464 return (-1); 465 466 return (0); 467 } 468 469 /* 470 * Run the brand's post-state change callback, if it exists. 471 */ 472 static int 473 brand_poststatechg(zlog_t *zlogp, int state, int cmd) 474 { 475 char cmdbuf[2 * MAXPATHLEN]; 476 477 if (post_statechg_hook[0] == '\0') 478 return (0); 479 480 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d", post_statechg_hook, 481 state, cmd) > sizeof (cmdbuf)) 482 return (-1); 483 484 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 485 return (-1); 486 487 return (0); 488 } 489 490 /* 491 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 492 * 'true' if this is being invoked as part of the processing for the "mount" 493 * subcommand. 494 */ 495 static int 496 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate) 497 { 498 int err; 499 500 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0) 501 return (-1); 502 503 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 504 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 505 zonecfg_strerror(err)); 506 goto bad; 507 } 508 509 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 510 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 511 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 512 zonecfg_strerror(err)); 513 goto bad; 514 } 515 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 516 bringup_failure_recovery = B_TRUE; 517 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE); 518 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 519 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 520 zonecfg_strerror(err)); 521 goto bad; 522 } 523 524 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0) 525 goto bad; 526 527 return (0); 528 529 bad: 530 /* 531 * If something goes wrong, we up the zones's state to the target 532 * state, READY, and then invoke the hook as if we're halting. 533 */ 534 (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT); 535 return (-1); 536 } 537 538 int 539 init_template(void) 540 { 541 int fd; 542 int err = 0; 543 544 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 545 if (fd == -1) 546 return (-1); 547 548 /* 549 * For now, zoneadmd doesn't do anything with the contract. 550 * Deliver no events, don't inherit, and allow it to be orphaned. 551 */ 552 err |= ct_tmpl_set_critical(fd, 0); 553 err |= ct_tmpl_set_informative(fd, 0); 554 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 555 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 556 if (err || ct_tmpl_activate(fd)) { 557 (void) close(fd); 558 return (-1); 559 } 560 561 return (fd); 562 } 563 564 typedef struct fs_callback { 565 zlog_t *zlogp; 566 zoneid_t zoneid; 567 boolean_t mount_cmd; 568 } fs_callback_t; 569 570 static int 571 mount_early_fs(void *data, const char *spec, const char *dir, 572 const char *fstype, const char *opt) 573 { 574 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 575 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 576 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd; 577 char rootpath[MAXPATHLEN]; 578 pid_t child; 579 int child_status; 580 int tmpl_fd; 581 int rv; 582 ctid_t ct; 583 584 /* determine the zone rootpath */ 585 if (mount_cmd) { 586 char zonepath[MAXPATHLEN]; 587 char luroot[MAXPATHLEN]; 588 589 if (zone_get_zonepath(zone_name, 590 zonepath, sizeof (zonepath)) != Z_OK) { 591 zerror(zlogp, B_FALSE, "unable to determine zone path"); 592 return (-1); 593 } 594 595 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 596 resolve_lofs(zlogp, luroot, sizeof (luroot)); 597 (void) strlcpy(rootpath, luroot, sizeof (rootpath)); 598 } else { 599 if (zone_get_rootpath(zone_name, 600 rootpath, sizeof (rootpath)) != Z_OK) { 601 zerror(zlogp, B_FALSE, "unable to determine zone root"); 602 return (-1); 603 } 604 } 605 606 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) { 607 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 608 rootpath, dir); 609 return (-1); 610 } else if (rv > 0) { 611 /* The mount point path doesn't exist, create it now. */ 612 if (make_one_dir(zlogp, rootpath, dir, 613 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 614 DEFAULT_DIR_GROUP) != 0) { 615 zerror(zlogp, B_FALSE, "failed to create mount point"); 616 return (-1); 617 } 618 619 /* 620 * Now this might seem weird, but we need to invoke 621 * valid_mount_path() again. Why? Because it checks 622 * to make sure that the mount point path is canonical, 623 * which it can only do if the path exists, so now that 624 * we've created the path we have to verify it again. 625 */ 626 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, 627 fstype)) < 0) { 628 zerror(zlogp, B_FALSE, 629 "%s%s is not a valid mount point", rootpath, dir); 630 return (-1); 631 } 632 } 633 634 if ((tmpl_fd = init_template()) == -1) { 635 zerror(zlogp, B_TRUE, "failed to create contract"); 636 return (-1); 637 } 638 639 if ((child = fork()) == -1) { 640 (void) ct_tmpl_clear(tmpl_fd); 641 (void) close(tmpl_fd); 642 zerror(zlogp, B_TRUE, "failed to fork"); 643 return (-1); 644 645 } else if (child == 0) { /* child */ 646 char opt_buf[MAX_MNTOPT_STR]; 647 int optlen = 0; 648 int mflag = MS_DATA; 649 650 (void) ct_tmpl_clear(tmpl_fd); 651 /* 652 * Even though there are no procs running in the zone, we 653 * do this for paranoia's sake. 654 */ 655 (void) closefrom(0); 656 657 if (zone_enter(zoneid) == -1) { 658 _exit(errno); 659 } 660 if (opt != NULL) { 661 /* 662 * The mount() system call is incredibly annoying. 663 * If options are specified, we need to copy them 664 * into a temporary buffer since the mount() system 665 * call will overwrite the options string. It will 666 * also fail if the new option string it wants to 667 * write is bigger than the one we passed in, so 668 * you must pass in a buffer of the maximum possible 669 * option string length. sigh. 670 */ 671 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 672 opt = opt_buf; 673 optlen = MAX_MNTOPT_STR; 674 mflag = MS_OPTIONSTR; 675 } 676 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 677 _exit(errno); 678 _exit(0); 679 } 680 681 /* parent */ 682 if (contract_latest(&ct) == -1) 683 ct = -1; 684 (void) ct_tmpl_clear(tmpl_fd); 685 (void) close(tmpl_fd); 686 if (waitpid(child, &child_status, 0) != child) { 687 /* unexpected: we must have been signalled */ 688 (void) contract_abandon_id(ct); 689 return (-1); 690 } 691 (void) contract_abandon_id(ct); 692 if (WEXITSTATUS(child_status) != 0) { 693 errno = WEXITSTATUS(child_status); 694 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 695 return (-1); 696 } 697 698 return (0); 699 } 700 701 /* 702 * If retstr is not NULL, the output of the subproc is returned in the str, 703 * otherwise it is output using zerror(). Any memory allocated for retstr 704 * should be freed by the caller. 705 */ 706 int 707 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) 708 { 709 char buf[1024]; /* arbitrary large amount */ 710 char *inbuf; 711 FILE *file; 712 int status; 713 int rd_cnt; 714 715 if (retstr != NULL) { 716 if ((*retstr = malloc(1024)) == NULL) { 717 zerror(zlogp, B_FALSE, "out of memory"); 718 return (-1); 719 } 720 inbuf = *retstr; 721 rd_cnt = 0; 722 } else { 723 inbuf = buf; 724 } 725 726 file = popen(cmdbuf, "r"); 727 if (file == NULL) { 728 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 729 return (-1); 730 } 731 732 while (fgets(inbuf, 1024, file) != NULL) { 733 if (retstr == NULL) { 734 if (zlogp != &logsys) 735 zerror(zlogp, B_FALSE, "%s", inbuf); 736 } else { 737 char *p; 738 739 rd_cnt += 1024 - 1; 740 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) { 741 zerror(zlogp, B_FALSE, "out of memory"); 742 (void) pclose(file); 743 return (-1); 744 } 745 746 *retstr = p; 747 inbuf = *retstr + rd_cnt; 748 } 749 } 750 status = pclose(file); 751 752 if (WIFSIGNALED(status)) { 753 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 754 "signal %d", cmdbuf, WTERMSIG(status)); 755 return (-1); 756 } 757 assert(WIFEXITED(status)); 758 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 759 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 760 return (-1); 761 } 762 return (WEXITSTATUS(status)); 763 } 764 765 static int 766 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) 767 { 768 zoneid_t zoneid; 769 struct stat st; 770 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 771 char nbootargs[BOOTARGS_MAX]; 772 char cmdbuf[MAXPATHLEN]; 773 fs_callback_t cb; 774 brand_handle_t bh; 775 int err; 776 777 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) 778 return (-1); 779 780 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 781 zerror(zlogp, B_TRUE, "unable to get zoneid"); 782 goto bad; 783 } 784 785 cb.zlogp = zlogp; 786 cb.zoneid = zoneid; 787 cb.mount_cmd = B_FALSE; 788 789 /* Get a handle to the brand info for this zone */ 790 if ((bh = brand_open(brand_name)) == NULL) { 791 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 792 goto bad; 793 } 794 795 /* 796 * Get the list of filesystems to mount from the brand 797 * configuration. These mounts are done via a thread that will 798 * enter the zone, so they are done from within the context of the 799 * zone. 800 */ 801 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 802 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 803 brand_close(bh); 804 goto bad; 805 } 806 807 /* 808 * Get the brand's boot callback if it exists. 809 */ 810 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 811 zerror(zlogp, B_FALSE, "unable to determine zone path"); 812 brand_close(bh); 813 goto bad; 814 } 815 (void) strcpy(cmdbuf, EXEC_PREFIX); 816 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 817 sizeof (cmdbuf) - EXEC_LEN) != 0) { 818 zerror(zlogp, B_FALSE, 819 "unable to determine branded zone's boot callback"); 820 brand_close(bh); 821 goto bad; 822 } 823 824 /* Get the path for this zone's init(1M) (or equivalent) process. */ 825 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { 826 zerror(zlogp, B_FALSE, 827 "unable to determine zone's init(1M) location"); 828 brand_close(bh); 829 goto bad; 830 } 831 832 brand_close(bh); 833 834 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 835 bad_boot_arg); 836 if (err == Z_INVAL) 837 eventstream_write(Z_EVT_ZONE_BADARGS); 838 else if (err != Z_OK) 839 goto bad; 840 841 assert(init_file[0] != '\0'); 842 843 /* Try to anticipate possible problems: Make sure init is executable. */ 844 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 845 zerror(zlogp, B_FALSE, "unable to determine zone root"); 846 goto bad; 847 } 848 849 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); 850 851 if (stat(initpath, &st) == -1) { 852 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 853 goto bad; 854 } 855 856 if ((st.st_mode & S_IXUSR) == 0) { 857 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 858 goto bad; 859 } 860 861 /* 862 * If there is a brand 'boot' callback, execute it now to give the 863 * brand one last chance to do any additional setup before the zone 864 * is booted. 865 */ 866 if ((strlen(cmdbuf) > EXEC_LEN) && 867 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { 868 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 869 goto bad; 870 } 871 872 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 873 zerror(zlogp, B_TRUE, "could not set zone boot file"); 874 goto bad; 875 } 876 877 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 878 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 879 goto bad; 880 } 881 882 if (zone_boot(zoneid) == -1) { 883 zerror(zlogp, B_TRUE, "unable to boot zone"); 884 goto bad; 885 } 886 887 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0) 888 goto bad; 889 890 return (0); 891 892 bad: 893 /* 894 * If something goes wrong, we up the zones's state to the target 895 * state, RUNNING, and then invoke the hook as if we're halting. 896 */ 897 (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT); 898 return (-1); 899 } 900 901 static int 902 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate) 903 { 904 int err; 905 906 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0) 907 return (-1); 908 909 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { 910 if (!bringup_failure_recovery) 911 zerror(zlogp, B_FALSE, "unable to destroy zone"); 912 return (-1); 913 } 914 915 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 916 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 917 zonecfg_strerror(err)); 918 919 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0) 920 return (-1); 921 922 return (0); 923 } 924 925 /* 926 * Generate AUE_zone_state for a command that boots a zone. 927 */ 928 static void 929 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 930 char *new_state) 931 { 932 adt_session_data_t *ah; 933 adt_event_data_t *event; 934 int pass_fail, fail_reason; 935 936 if (!adt_audit_enabled()) 937 return; 938 939 if (return_val == 0) { 940 pass_fail = ADT_SUCCESS; 941 fail_reason = ADT_SUCCESS; 942 } else { 943 pass_fail = ADT_FAILURE; 944 fail_reason = ADT_FAIL_VALUE_PROGRAM; 945 } 946 947 if (adt_start_session(&ah, NULL, 0)) { 948 zerror(zlogp, B_TRUE, gettext("audit failure.")); 949 return; 950 } 951 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 952 zerror(zlogp, B_TRUE, gettext("audit failure.")); 953 (void) adt_end_session(ah); 954 return; 955 } 956 957 event = adt_alloc_event(ah, ADT_zone_state); 958 if (event == NULL) { 959 zerror(zlogp, B_TRUE, gettext("audit failure.")); 960 (void) adt_end_session(ah); 961 return; 962 } 963 event->adt_zone_state.zonename = zone_name; 964 event->adt_zone_state.new_state = new_state; 965 966 if (adt_put_event(event, pass_fail, fail_reason)) 967 zerror(zlogp, B_TRUE, gettext("audit failure.")); 968 969 adt_free_event(event); 970 971 (void) adt_end_session(ah); 972 } 973 974 /* 975 * The main routine for the door server that deals with zone state transitions. 976 */ 977 /* ARGSUSED */ 978 static void 979 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 980 uint_t n_desc) 981 { 982 ucred_t *uc = NULL; 983 const priv_set_t *eset; 984 985 zone_state_t zstate; 986 zone_cmd_t cmd; 987 zone_cmd_arg_t *zargp; 988 989 boolean_t kernelcall; 990 991 int rval = -1; 992 uint64_t uniqid; 993 zoneid_t zoneid = -1; 994 zlog_t zlog; 995 zlog_t *zlogp; 996 zone_cmd_rval_t *rvalp; 997 size_t rlen = getpagesize(); /* conservative */ 998 fs_callback_t cb; 999 brand_handle_t bh; 1000 1001 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1002 zargp = (zone_cmd_arg_t *)args; 1003 1004 /* 1005 * When we get the door unref message, we've fdetach'd the door, and 1006 * it is time for us to shut down zoneadmd. 1007 */ 1008 if (zargp == DOOR_UNREF_DATA) { 1009 /* 1010 * See comment at end of main() for info on the last rites. 1011 */ 1012 exit(0); 1013 } 1014 1015 if (zargp == NULL) { 1016 (void) door_return(NULL, 0, 0, 0); 1017 } 1018 1019 rvalp = alloca(rlen); 1020 bzero(rvalp, rlen); 1021 zlog.logfile = NULL; 1022 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 1023 zlog.buf = rvalp->errbuf; 1024 zlog.log = zlog.buf; 1025 /* defer initialization of zlog.locale until after credential check */ 1026 zlogp = &zlog; 1027 1028 if (alen != sizeof (zone_cmd_arg_t)) { 1029 /* 1030 * This really shouldn't be happening. 1031 */ 1032 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 1033 "unexpected (expected %d bytes)", alen, 1034 sizeof (zone_cmd_arg_t)); 1035 goto out; 1036 } 1037 cmd = zargp->cmd; 1038 1039 if (door_ucred(&uc) != 0) { 1040 zerror(&logsys, B_TRUE, "door_ucred"); 1041 goto out; 1042 } 1043 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 1044 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 1045 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 1046 ucred_geteuid(uc) != 0)) { 1047 zerror(&logsys, B_FALSE, "insufficient privileges"); 1048 goto out; 1049 } 1050 1051 kernelcall = ucred_getpid(uc) == 0; 1052 1053 /* 1054 * This is safe because we only use a zlog_t throughout the 1055 * duration of a door call; i.e., by the time the pointer 1056 * might become invalid, the door call would be over. 1057 */ 1058 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 1059 1060 (void) mutex_lock(&lock); 1061 1062 /* 1063 * Once we start to really die off, we don't want more connections. 1064 */ 1065 if (in_death_throes) { 1066 (void) mutex_unlock(&lock); 1067 ucred_free(uc); 1068 (void) door_return(NULL, 0, 0, 0); 1069 thr_exit(NULL); 1070 } 1071 1072 /* 1073 * Check for validity of command. 1074 */ 1075 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 1076 cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && 1077 cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 1078 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 1079 goto out; 1080 } 1081 1082 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 1083 /* 1084 * Can't happen 1085 */ 1086 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 1087 cmd); 1088 goto out; 1089 } 1090 /* 1091 * We ignore the possibility of someone calling zone_create(2) 1092 * explicitly; all requests must come through zoneadmd. 1093 */ 1094 if (zone_get_state(zone_name, &zstate) != Z_OK) { 1095 /* 1096 * Something terribly wrong happened 1097 */ 1098 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 1099 goto out; 1100 } 1101 1102 if (kernelcall) { 1103 /* 1104 * Kernel-initiated requests may lose their validity if the 1105 * zone_t the kernel was referring to has gone away. 1106 */ 1107 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 1108 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 1109 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 1110 /* 1111 * We're not talking about the same zone. The request 1112 * must have arrived too late. Return error. 1113 */ 1114 rval = -1; 1115 goto out; 1116 } 1117 zlogp = &logsys; /* Log errors to syslog */ 1118 } 1119 1120 /* 1121 * If we are being asked to forcibly mount or boot a zone, we 1122 * pretend that an INCOMPLETE zone is actually INSTALLED. 1123 */ 1124 if (zstate == ZONE_STATE_INCOMPLETE && 1125 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 1126 zstate = ZONE_STATE_INSTALLED; 1127 1128 switch (zstate) { 1129 case ZONE_STATE_CONFIGURED: 1130 case ZONE_STATE_INCOMPLETE: 1131 /* 1132 * Not our area of expertise; we just print a nice message 1133 * and die off. 1134 */ 1135 zerror(zlogp, B_FALSE, 1136 "%s operation is invalid for zones in state '%s'", 1137 z_cmd_name(cmd), zone_state_str(zstate)); 1138 break; 1139 1140 case ZONE_STATE_INSTALLED: 1141 switch (cmd) { 1142 case Z_READY: 1143 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate); 1144 if (rval == 0) 1145 eventstream_write(Z_EVT_ZONE_READIED); 1146 break; 1147 case Z_BOOT: 1148 case Z_FORCEBOOT: 1149 eventstream_write(Z_EVT_ZONE_BOOTING); 1150 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1151 == 0) { 1152 rval = zone_bootup(zlogp, zargp->bootbuf, 1153 zstate); 1154 } 1155 audit_put_record(zlogp, uc, rval, "boot"); 1156 if (rval != 0) { 1157 bringup_failure_recovery = B_TRUE; 1158 (void) zone_halt(zlogp, B_FALSE, B_FALSE, 1159 zstate); 1160 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1161 } 1162 break; 1163 case Z_HALT: 1164 if (kernelcall) /* Invalid; can't happen */ 1165 abort(); 1166 /* 1167 * We could have two clients racing to halt this 1168 * zone; the second client loses, but his request 1169 * doesn't fail, since the zone is now in the desired 1170 * state. 1171 */ 1172 zerror(zlogp, B_FALSE, "zone is already halted"); 1173 rval = 0; 1174 break; 1175 case Z_REBOOT: 1176 if (kernelcall) /* Invalid; can't happen */ 1177 abort(); 1178 zerror(zlogp, B_FALSE, "%s operation is invalid " 1179 "for zones in state '%s'", z_cmd_name(cmd), 1180 zone_state_str(zstate)); 1181 rval = -1; 1182 break; 1183 case Z_NOTE_UNINSTALLING: 1184 if (kernelcall) /* Invalid; can't happen */ 1185 abort(); 1186 /* 1187 * Tell the console to print out a message about this. 1188 * Once it does, we will be in_death_throes. 1189 */ 1190 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1191 break; 1192 case Z_MOUNT: 1193 case Z_FORCEMOUNT: 1194 if (kernelcall) /* Invalid; can't happen */ 1195 abort(); 1196 if (!zone_isnative && !zone_iscluster && 1197 !zone_islabeled) { 1198 /* 1199 * -U mounts the zone without lofs mounting 1200 * zone file systems back into the scratch 1201 * zone. This is required when mounting 1202 * non-native branded zones. 1203 */ 1204 (void) strlcpy(zargp->bootbuf, "-U", 1205 BOOTARGS_MAX); 1206 } 1207 1208 rval = zone_ready(zlogp, 1209 strcmp(zargp->bootbuf, "-U") == 0 ? 1210 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate); 1211 if (rval != 0) 1212 break; 1213 1214 eventstream_write(Z_EVT_ZONE_READIED); 1215 1216 /* 1217 * Get a handle to the native brand info. 1218 * We must always use the native brand file system 1219 * list when mounting the zone. 1220 */ 1221 if ((bh = brand_open(NATIVE_BRAND_NAME)) == NULL) { 1222 rval = -1; 1223 break; 1224 } 1225 1226 /* 1227 * Get the list of filesystems to mount from 1228 * the brand configuration. These mounts are done 1229 * via a thread that will enter the zone, so they 1230 * are done from within the context of the zone. 1231 */ 1232 cb.zlogp = zlogp; 1233 cb.zoneid = zone_id; 1234 cb.mount_cmd = B_TRUE; 1235 rval = brand_platform_iter_mounts(bh, 1236 mount_early_fs, &cb); 1237 1238 brand_close(bh); 1239 1240 /* 1241 * Ordinarily, /dev/fd would be mounted inside the zone 1242 * by svc:/system/filesystem/usr:default, but since 1243 * we're not booting the zone, we need to do this 1244 * manually. 1245 */ 1246 if (rval == 0) 1247 rval = mount_early_fs(&cb, 1248 "fd", "/dev/fd", "fd", NULL); 1249 break; 1250 case Z_UNMOUNT: 1251 if (kernelcall) /* Invalid; can't happen */ 1252 abort(); 1253 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1254 rval = 0; 1255 break; 1256 } 1257 break; 1258 1259 case ZONE_STATE_READY: 1260 switch (cmd) { 1261 case Z_READY: 1262 /* 1263 * We could have two clients racing to ready this 1264 * zone; the second client loses, but his request 1265 * doesn't fail, since the zone is now in the desired 1266 * state. 1267 */ 1268 zerror(zlogp, B_FALSE, "zone is already ready"); 1269 rval = 0; 1270 break; 1271 case Z_BOOT: 1272 (void) strlcpy(boot_args, zargp->bootbuf, 1273 sizeof (boot_args)); 1274 eventstream_write(Z_EVT_ZONE_BOOTING); 1275 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1276 audit_put_record(zlogp, uc, rval, "boot"); 1277 if (rval != 0) { 1278 bringup_failure_recovery = B_TRUE; 1279 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1280 zstate); 1281 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1282 } 1283 boot_args[0] = '\0'; 1284 break; 1285 case Z_HALT: 1286 if (kernelcall) /* Invalid; can't happen */ 1287 abort(); 1288 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1289 != 0) 1290 break; 1291 eventstream_write(Z_EVT_ZONE_HALTED); 1292 break; 1293 case Z_REBOOT: 1294 case Z_NOTE_UNINSTALLING: 1295 case Z_MOUNT: 1296 case Z_UNMOUNT: 1297 if (kernelcall) /* Invalid; can't happen */ 1298 abort(); 1299 zerror(zlogp, B_FALSE, "%s operation is invalid " 1300 "for zones in state '%s'", z_cmd_name(cmd), 1301 zone_state_str(zstate)); 1302 rval = -1; 1303 break; 1304 } 1305 break; 1306 1307 case ZONE_STATE_MOUNTED: 1308 switch (cmd) { 1309 case Z_UNMOUNT: 1310 if (kernelcall) /* Invalid; can't happen */ 1311 abort(); 1312 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate); 1313 if (rval == 0) { 1314 eventstream_write(Z_EVT_ZONE_HALTED); 1315 (void) sema_post(&scratch_sem); 1316 } 1317 break; 1318 default: 1319 if (kernelcall) /* Invalid; can't happen */ 1320 abort(); 1321 zerror(zlogp, B_FALSE, "%s operation is invalid " 1322 "for zones in state '%s'", z_cmd_name(cmd), 1323 zone_state_str(zstate)); 1324 rval = -1; 1325 break; 1326 } 1327 break; 1328 1329 case ZONE_STATE_RUNNING: 1330 case ZONE_STATE_SHUTTING_DOWN: 1331 case ZONE_STATE_DOWN: 1332 switch (cmd) { 1333 case Z_READY: 1334 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1335 != 0) 1336 break; 1337 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0) 1338 eventstream_write(Z_EVT_ZONE_READIED); 1339 else 1340 eventstream_write(Z_EVT_ZONE_HALTED); 1341 break; 1342 case Z_BOOT: 1343 /* 1344 * We could have two clients racing to boot this 1345 * zone; the second client loses, but his request 1346 * doesn't fail, since the zone is now in the desired 1347 * state. 1348 */ 1349 zerror(zlogp, B_FALSE, "zone is already booted"); 1350 rval = 0; 1351 break; 1352 case Z_HALT: 1353 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1354 != 0) 1355 break; 1356 eventstream_write(Z_EVT_ZONE_HALTED); 1357 break; 1358 case Z_REBOOT: 1359 (void) strlcpy(boot_args, zargp->bootbuf, 1360 sizeof (boot_args)); 1361 eventstream_write(Z_EVT_ZONE_REBOOTING); 1362 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1363 != 0) { 1364 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1365 boot_args[0] = '\0'; 1366 break; 1367 } 1368 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1369 != 0) { 1370 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1371 boot_args[0] = '\0'; 1372 break; 1373 } 1374 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1375 audit_put_record(zlogp, uc, rval, "reboot"); 1376 if (rval != 0) { 1377 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1378 zstate); 1379 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1380 } 1381 boot_args[0] = '\0'; 1382 break; 1383 case Z_NOTE_UNINSTALLING: 1384 case Z_MOUNT: 1385 case Z_UNMOUNT: 1386 zerror(zlogp, B_FALSE, "%s operation is invalid " 1387 "for zones in state '%s'", z_cmd_name(cmd), 1388 zone_state_str(zstate)); 1389 rval = -1; 1390 break; 1391 } 1392 break; 1393 default: 1394 abort(); 1395 } 1396 1397 /* 1398 * Because the state of the zone may have changed, we make sure 1399 * to wake the console poller, which is in charge of initiating 1400 * the shutdown procedure as necessary. 1401 */ 1402 eventstream_write(Z_EVT_NULL); 1403 1404 out: 1405 (void) mutex_unlock(&lock); 1406 if (kernelcall) { 1407 rvalp = NULL; 1408 rlen = 0; 1409 } else { 1410 rvalp->rval = rval; 1411 } 1412 if (uc != NULL) 1413 ucred_free(uc); 1414 (void) door_return((char *)rvalp, rlen, NULL, 0); 1415 thr_exit(NULL); 1416 } 1417 1418 static int 1419 setup_door(zlog_t *zlogp) 1420 { 1421 if ((zone_door = door_create(server, NULL, 1422 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1423 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1424 return (-1); 1425 } 1426 (void) fdetach(zone_door_path); 1427 1428 if (fattach(zone_door, zone_door_path) != 0) { 1429 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1430 (void) door_revoke(zone_door); 1431 (void) fdetach(zone_door_path); 1432 zone_door = -1; 1433 return (-1); 1434 } 1435 return (0); 1436 } 1437 1438 /* 1439 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1440 * is where zoneadmd itself will check to see that another instance of 1441 * zoneadmd isn't already controlling this zone. 1442 * 1443 * The idea here is that we want to open the path to which we will 1444 * attach our door, lock it, and then make sure that no-one has beat us 1445 * to fattach(3c)ing onto it. 1446 * 1447 * fattach(3c) is really a mount, so there are actually two possible 1448 * vnodes we could be dealing with. Our strategy is as follows: 1449 * 1450 * - If the file we opened is a regular file (common case): 1451 * There is no fattach(3c)ed door, so we have a chance of becoming 1452 * the managing zoneadmd. We attempt to lock the file: if it is 1453 * already locked, that means someone else raced us here, so we 1454 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1455 * that beat us to it. 1456 * 1457 * - If the file we opened is a namefs file: 1458 * This means there is already an established door fattach(3c)'ed 1459 * to the rendezvous path. We've lost the race, so we give up. 1460 * Note that in this case we also try to grab the file lock, and 1461 * will succeed in acquiring it since the vnode locked by the 1462 * "winning" zoneadmd was a regular one, and the one we locked was 1463 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1464 * we just return to zoneadm(1m) which knows to retry. 1465 */ 1466 static int 1467 make_daemon_exclusive(zlog_t *zlogp) 1468 { 1469 int doorfd = -1; 1470 int err, ret = -1; 1471 struct stat st; 1472 struct flock flock; 1473 zone_state_t zstate; 1474 1475 top: 1476 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1477 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1478 zonecfg_strerror(err)); 1479 goto out; 1480 } 1481 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1482 S_IREAD|S_IWRITE)) < 0) { 1483 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1484 goto out; 1485 } 1486 if (fstat(doorfd, &st) < 0) { 1487 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1488 goto out; 1489 } 1490 /* 1491 * Lock the file to synchronize with other zoneadmd 1492 */ 1493 flock.l_type = F_WRLCK; 1494 flock.l_whence = SEEK_SET; 1495 flock.l_start = (off_t)0; 1496 flock.l_len = (off_t)0; 1497 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1498 /* 1499 * Someone else raced us here and grabbed the lock file 1500 * first. A warning here is inappropriate since nothing 1501 * went wrong. 1502 */ 1503 goto out; 1504 } 1505 1506 if (strcmp(st.st_fstype, "namefs") == 0) { 1507 struct door_info info; 1508 1509 /* 1510 * There is already something fattach()'ed to this file. 1511 * Lets see what the door is up to. 1512 */ 1513 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1514 /* 1515 * Another zoneadmd process seems to be in 1516 * control of the situation and we don't need to 1517 * be here. A warning here is inappropriate 1518 * since nothing went wrong. 1519 * 1520 * If the door has been revoked, the zoneadmd 1521 * process currently managing the zone is going 1522 * away. We'll return control to zoneadm(1m) 1523 * which will try again (by which time zoneadmd 1524 * will hopefully have exited). 1525 */ 1526 goto out; 1527 } 1528 1529 /* 1530 * If we got this far, there's a fattach(3c)'ed door 1531 * that belongs to a process that has exited, which can 1532 * happen if the previous zoneadmd died unexpectedly. 1533 * 1534 * Let user know that something is amiss, but that we can 1535 * recover; if the zone is in the installed state, then don't 1536 * message, since having a running zoneadmd isn't really 1537 * expected/needed. We want to keep occurences of this message 1538 * limited to times when zoneadmd is picking back up from a 1539 * zoneadmd that died while the zone was in some non-trivial 1540 * state. 1541 */ 1542 if (zstate > ZONE_STATE_INSTALLED) { 1543 zerror(zlogp, B_FALSE, 1544 "zone '%s': WARNING: zone is in state '%s', but " 1545 "zoneadmd does not appear to be available; " 1546 "restarted zoneadmd to recover.", 1547 zone_name, zone_state_str(zstate)); 1548 } 1549 1550 (void) fdetach(zone_door_path); 1551 (void) close(doorfd); 1552 goto top; 1553 } 1554 ret = 0; 1555 out: 1556 (void) close(doorfd); 1557 return (ret); 1558 } 1559 1560 /* 1561 * Setup the brand's pre and post state change callbacks, as well as the 1562 * query callback, if any of these exist. 1563 */ 1564 static int 1565 brand_callback_init(brand_handle_t bh, char *zone_name) 1566 { 1567 char zpath[MAXPATHLEN]; 1568 1569 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) 1570 return (-1); 1571 1572 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, 1573 sizeof (pre_statechg_hook)); 1574 1575 if (brand_get_prestatechange(bh, zone_name, zpath, 1576 pre_statechg_hook + EXEC_LEN, 1577 sizeof (pre_statechg_hook) - EXEC_LEN) != 0) 1578 return (-1); 1579 1580 if (strlen(pre_statechg_hook) <= EXEC_LEN) 1581 pre_statechg_hook[0] = '\0'; 1582 1583 (void) strlcpy(post_statechg_hook, EXEC_PREFIX, 1584 sizeof (post_statechg_hook)); 1585 1586 if (brand_get_poststatechange(bh, zone_name, zpath, 1587 post_statechg_hook + EXEC_LEN, 1588 sizeof (post_statechg_hook) - EXEC_LEN) != 0) 1589 return (-1); 1590 1591 if (strlen(post_statechg_hook) <= EXEC_LEN) 1592 post_statechg_hook[0] = '\0'; 1593 1594 (void) strlcpy(query_hook, EXEC_PREFIX, 1595 sizeof (query_hook)); 1596 1597 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, 1598 sizeof (query_hook) - EXEC_LEN) != 0) 1599 return (-1); 1600 1601 if (strlen(query_hook) <= EXEC_LEN) 1602 query_hook[0] = '\0'; 1603 1604 return (0); 1605 } 1606 1607 int 1608 main(int argc, char *argv[]) 1609 { 1610 int opt; 1611 zoneid_t zid; 1612 priv_set_t *privset; 1613 zone_state_t zstate; 1614 char parents_locale[MAXPATHLEN]; 1615 brand_handle_t bh; 1616 int err; 1617 1618 pid_t pid; 1619 sigset_t blockset; 1620 sigset_t block_cld; 1621 1622 struct { 1623 sema_t sem; 1624 int status; 1625 zlog_t log; 1626 } *shstate; 1627 size_t shstatelen = getpagesize(); 1628 1629 zlog_t errlog; 1630 zlog_t *zlogp; 1631 1632 int ctfd; 1633 1634 progname = get_execbasename(argv[0]); 1635 1636 /* 1637 * Make sure stderr is unbuffered 1638 */ 1639 (void) setbuffer(stderr, NULL, 0); 1640 1641 /* 1642 * Get out of the way of mounted filesystems, since we will daemonize 1643 * soon. 1644 */ 1645 (void) chdir("/"); 1646 1647 /* 1648 * Use the default system umask per PSARC 1998/110 rather than 1649 * anything that may have been set by the caller. 1650 */ 1651 (void) umask(CMASK); 1652 1653 /* 1654 * Initially we want to use our parent's locale. 1655 */ 1656 (void) setlocale(LC_ALL, ""); 1657 (void) textdomain(TEXT_DOMAIN); 1658 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1659 sizeof (parents_locale)); 1660 1661 /* 1662 * This zlog_t is used for writing to stderr 1663 */ 1664 errlog.logfile = stderr; 1665 errlog.buflen = errlog.loglen = 0; 1666 errlog.buf = errlog.log = NULL; 1667 errlog.locale = parents_locale; 1668 1669 /* 1670 * We start off writing to stderr until we're ready to daemonize. 1671 */ 1672 zlogp = &errlog; 1673 1674 /* 1675 * Process options. 1676 */ 1677 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1678 switch (opt) { 1679 case 'R': 1680 zonecfg_set_root(optarg); 1681 break; 1682 case 'z': 1683 zone_name = optarg; 1684 break; 1685 default: 1686 usage(); 1687 } 1688 } 1689 1690 if (zone_name == NULL) 1691 usage(); 1692 1693 /* 1694 * Because usage() prints directly to stderr, it has gettext() 1695 * wrapping, which depends on the locale. But since zerror() calls 1696 * localize() which tweaks the locale, it is not safe to call zerror() 1697 * until after the last call to usage(). Fortunately, the last call 1698 * to usage() is just above and the first call to zerror() is just 1699 * below. Don't mess this up. 1700 */ 1701 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1702 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1703 GLOBAL_ZONENAME); 1704 return (1); 1705 } 1706 1707 if (zone_get_id(zone_name, &zid) != 0) { 1708 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1709 zonecfg_strerror(Z_NO_ZONE)); 1710 return (1); 1711 } 1712 1713 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1714 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1715 zonecfg_strerror(err)); 1716 return (1); 1717 } 1718 if (zstate < ZONE_STATE_INCOMPLETE) { 1719 zerror(zlogp, B_FALSE, 1720 "cannot manage a zone which is in state '%s'", 1721 zone_state_str(zstate)); 1722 return (1); 1723 } 1724 1725 /* Get a handle to the brand info for this zone */ 1726 if ((zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 1727 != Z_OK) || (bh = brand_open(brand_name)) == NULL) { 1728 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1729 return (1); 1730 } 1731 zone_isnative = brand_is_native(bh); 1732 zone_iscluster = (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0); 1733 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0); 1734 1735 /* Get state change brand hooks. */ 1736 if (brand_callback_init(bh, zone_name) == -1) { 1737 zerror(zlogp, B_TRUE, 1738 "failed to initialize brand state change hooks"); 1739 brand_close(bh); 1740 return (1); 1741 } 1742 1743 brand_close(bh); 1744 1745 /* 1746 * Check that we have all privileges. It would be nice to pare 1747 * this down, but this is at least a first cut. 1748 */ 1749 if ((privset = priv_allocset()) == NULL) { 1750 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1751 return (1); 1752 } 1753 1754 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1755 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1756 priv_freeset(privset); 1757 return (1); 1758 } 1759 1760 if (priv_isfullset(privset) == B_FALSE) { 1761 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1762 "run this command (all privs required)"); 1763 priv_freeset(privset); 1764 return (1); 1765 } 1766 priv_freeset(privset); 1767 1768 if (mkzonedir(zlogp) != 0) 1769 return (1); 1770 1771 /* 1772 * Pre-fork: setup shared state 1773 */ 1774 if ((shstate = (void *)mmap(NULL, shstatelen, 1775 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1776 MAP_FAILED) { 1777 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1778 return (1); 1779 } 1780 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1781 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1782 (void) munmap((char *)shstate, shstatelen); 1783 return (1); 1784 } 1785 shstate->log.logfile = NULL; 1786 shstate->log.buflen = shstatelen - sizeof (*shstate); 1787 shstate->log.loglen = shstate->log.buflen; 1788 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1789 shstate->log.log = shstate->log.buf; 1790 shstate->log.locale = parents_locale; 1791 shstate->status = -1; 1792 1793 /* 1794 * We need a SIGCHLD handler so the sema_wait() below will wake 1795 * up if the child dies without doing a sema_post(). 1796 */ 1797 (void) sigset(SIGCHLD, sigchld); 1798 /* 1799 * We must mask SIGCHLD until after we've coped with the fork 1800 * sufficiently to deal with it; otherwise we can race and 1801 * receive the signal before pid has been initialized 1802 * (yes, this really happens). 1803 */ 1804 (void) sigemptyset(&block_cld); 1805 (void) sigaddset(&block_cld, SIGCHLD); 1806 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1807 1808 if ((ctfd = init_template()) == -1) { 1809 zerror(zlogp, B_TRUE, "failed to create contract"); 1810 return (1); 1811 } 1812 1813 /* 1814 * Do not let another thread localize a message while we are forking. 1815 */ 1816 (void) mutex_lock(&msglock); 1817 pid = fork(); 1818 (void) mutex_unlock(&msglock); 1819 1820 /* 1821 * In all cases (parent, child, and in the event of an error) we 1822 * don't want to cause creation of contracts on subsequent fork()s. 1823 */ 1824 (void) ct_tmpl_clear(ctfd); 1825 (void) close(ctfd); 1826 1827 if (pid == -1) { 1828 zerror(zlogp, B_TRUE, "could not fork"); 1829 return (1); 1830 1831 } else if (pid > 0) { /* parent */ 1832 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1833 /* 1834 * This marks a window of vulnerability in which we receive 1835 * the SIGCLD before falling into sema_wait (normally we would 1836 * get woken up from sema_wait with EINTR upon receipt of 1837 * SIGCLD). So we may need to use some other scheme like 1838 * sema_posting in the sigcld handler. 1839 * blech 1840 */ 1841 (void) sema_wait(&shstate->sem); 1842 (void) sema_destroy(&shstate->sem); 1843 if (shstate->status != 0) 1844 (void) waitpid(pid, NULL, WNOHANG); 1845 /* 1846 * It's ok if we die with SIGPIPE. It's not like we could have 1847 * done anything about it. 1848 */ 1849 (void) fprintf(stderr, "%s", shstate->log.buf); 1850 _exit(shstate->status == 0 ? 0 : 1); 1851 } 1852 1853 /* 1854 * The child charges on. 1855 */ 1856 (void) sigset(SIGCHLD, SIG_DFL); 1857 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1858 1859 /* 1860 * SIGPIPE can be delivered if we write to a socket for which the 1861 * peer endpoint is gone. That can lead to too-early termination 1862 * of zoneadmd, and that's not good eats. 1863 */ 1864 (void) sigset(SIGPIPE, SIG_IGN); 1865 /* 1866 * Stop using stderr 1867 */ 1868 zlogp = &shstate->log; 1869 1870 /* 1871 * We don't need stdout/stderr from now on. 1872 */ 1873 closefrom(0); 1874 1875 /* 1876 * Initialize the syslog zlog_t. This needs to be done after 1877 * the call to closefrom(). 1878 */ 1879 logsys.buf = logsys.log = NULL; 1880 logsys.buflen = logsys.loglen = 0; 1881 logsys.logfile = NULL; 1882 logsys.locale = DEFAULT_LOCALE; 1883 1884 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1885 1886 /* 1887 * The eventstream is used to publish state changes in the zone 1888 * from the door threads to the console I/O poller. 1889 */ 1890 if (eventstream_init() == -1) { 1891 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1892 goto child_out; 1893 } 1894 1895 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1896 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 1897 1898 /* 1899 * See if another zoneadmd is running for this zone. If not, then we 1900 * can now modify system state. 1901 */ 1902 if (make_daemon_exclusive(zlogp) == -1) 1903 goto child_out; 1904 1905 1906 /* 1907 * Create/join a new session; we need to be careful of what we do with 1908 * the console from now on so we don't end up being the session leader 1909 * for the terminal we're going to be handing out. 1910 */ 1911 (void) setsid(); 1912 1913 /* 1914 * This thread shouldn't be receiving any signals; in particular, 1915 * SIGCHLD should be received by the thread doing the fork(). 1916 */ 1917 (void) sigfillset(&blockset); 1918 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1919 1920 /* 1921 * Setup the console device and get ready to serve the console; 1922 * once this has completed, we're ready to let console clients 1923 * make an attempt to connect (they will block until 1924 * serve_console_sock() below gets called, and any pending 1925 * connection is accept()ed). 1926 */ 1927 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0) 1928 goto child_out; 1929 1930 /* 1931 * Take the lock now, so that when the door server gets going, we 1932 * are guaranteed that it won't take a request until we are sure 1933 * that everything is completely set up. See the child_out: label 1934 * below to see why this matters. 1935 */ 1936 (void) mutex_lock(&lock); 1937 1938 /* Init semaphore for scratch zones. */ 1939 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 1940 zerror(zlogp, B_TRUE, 1941 "failed to initialize semaphore for scratch zone"); 1942 goto child_out; 1943 } 1944 1945 /* open the dladm handle */ 1946 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) { 1947 zerror(zlogp, B_FALSE, "failed to open dladm handle"); 1948 goto child_out; 1949 } 1950 1951 /* 1952 * Note: door setup must occur *after* the console is setup. 1953 * This is so that as zlogin tests the door to see if zoneadmd 1954 * is ready yet, we know that the console will get serviced 1955 * once door_info() indicates that the door is "up". 1956 */ 1957 if (setup_door(zlogp) == -1) 1958 goto child_out; 1959 1960 /* 1961 * Things seem OK so far; tell the parent process that we're done 1962 * with setup tasks. This will cause the parent to exit, signalling 1963 * to zoneadm, zlogin, or whatever forked it that we are ready to 1964 * service requests. 1965 */ 1966 shstate->status = 0; 1967 (void) sema_post(&shstate->sem); 1968 (void) munmap((char *)shstate, shstatelen); 1969 shstate = NULL; 1970 1971 (void) mutex_unlock(&lock); 1972 1973 /* 1974 * zlogp is now invalid, so reset it to the syslog logger. 1975 */ 1976 zlogp = &logsys; 1977 1978 /* 1979 * Now that we are free of any parents, switch to the default locale. 1980 */ 1981 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 1982 1983 /* 1984 * At this point the setup portion of main() is basically done, so 1985 * we reuse this thread to manage the zone console. When 1986 * serve_console() has returned, we are past the point of no return 1987 * in the life of this zoneadmd. 1988 */ 1989 if (zonecfg_in_alt_root()) { 1990 /* 1991 * This is just awful, but mounted scratch zones don't (and 1992 * can't) have consoles. We just wait for unmount instead. 1993 */ 1994 while (sema_wait(&scratch_sem) == EINTR) 1995 ; 1996 } else { 1997 serve_console(zlogp); 1998 assert(in_death_throes); 1999 } 2000 2001 /* 2002 * This is the next-to-last part of the exit interlock. Upon calling 2003 * fdetach(), the door will go unreferenced; once any 2004 * outstanding requests (like the door thread doing Z_HALT) are 2005 * done, the door will get an UNREF notification; when it handles 2006 * the UNREF, the door server will cause the exit. 2007 */ 2008 assert(!MUTEX_HELD(&lock)); 2009 (void) fdetach(zone_door_path); 2010 2011 for (;;) 2012 (void) pause(); 2013 2014 child_out: 2015 assert(pid == 0); 2016 if (shstate != NULL) { 2017 shstate->status = -1; 2018 (void) sema_post(&shstate->sem); 2019 (void) munmap((char *)shstate, shstatelen); 2020 } 2021 2022 /* 2023 * This might trigger an unref notification, but if so, 2024 * we are still holding the lock, so our call to exit will 2025 * ultimately win the race and will publish the right exit 2026 * code. 2027 */ 2028 if (zone_door != -1) { 2029 assert(MUTEX_HELD(&lock)); 2030 (void) door_revoke(zone_door); 2031 (void) fdetach(zone_door_path); 2032 } 2033 2034 if (dld_handle != NULL) 2035 dladm_close(dld_handle); 2036 2037 return (1); /* return from main() forcibly exits an MT process */ 2038 } 2039