1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * zoneadmd manages zones; one zoneadmd process is launched for each 29 * non-global zone on the system. This daemon juggles four jobs: 30 * 31 * - Implement setup and teardown of the zone "virtual platform": mount and 32 * unmount filesystems; create and destroy network interfaces; communicate 33 * with devfsadmd to lay out devices for the zone; instantiate the zone 34 * console device; configure process runtime attributes such as resource 35 * controls, pool bindings, fine-grained privileges. 36 * 37 * - Launch the zone's init(1M) process. 38 * 39 * - Implement a door server; clients (like zoneadm) connect to the door 40 * server and request zone state changes. The kernel is also a client of 41 * this door server. A request to halt or reboot the zone which originates 42 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 43 * 44 * One minor problem is that messages emitted by zoneadmd need to be passed 45 * back to the zoneadm process making the request. These messages need to 46 * be rendered in the client's locale; so, this is passed in as part of the 47 * request. The exception is the kernel upcall to zoneadmd, in which case 48 * messages are syslog'd. 49 * 50 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 51 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 52 * strings which do not need to be translated. 53 * 54 * - Act as a console server for zlogin -C processes; see comments in zcons.c 55 * for more information about the zone console architecture. 56 * 57 * DESIGN NOTES 58 * 59 * Restart: 60 * A chief design constraint of zoneadmd is that it should be restartable in 61 * the case that the administrator kills it off, or it suffers a fatal error, 62 * without the running zone being impacted; this is akin to being able to 63 * reboot the service processor of a server without affecting the OS instance. 64 */ 65 66 #include <sys/param.h> 67 #include <sys/mman.h> 68 #include <sys/types.h> 69 #include <sys/stat.h> 70 #include <sys/sysmacros.h> 71 72 #include <bsm/adt.h> 73 #include <bsm/adt_event.h> 74 75 #include <alloca.h> 76 #include <assert.h> 77 #include <errno.h> 78 #include <door.h> 79 #include <fcntl.h> 80 #include <locale.h> 81 #include <signal.h> 82 #include <stdarg.h> 83 #include <stdio.h> 84 #include <stdlib.h> 85 #include <string.h> 86 #include <strings.h> 87 #include <synch.h> 88 #include <syslog.h> 89 #include <thread.h> 90 #include <unistd.h> 91 #include <wait.h> 92 #include <limits.h> 93 #include <zone.h> 94 #include <libbrand.h> 95 #include <sys/brand.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <sys/contract/process.h> 99 #include <sys/ctfs.h> 100 101 #include <libzonecfg.h> 102 #include "zoneadmd.h" 103 104 static char *progname; 105 char *zone_name; /* zone which we are managing */ 106 char brand_name[MAXNAMELEN]; 107 boolean_t zone_isnative; 108 boolean_t zone_iscluster; 109 static zoneid_t zone_id; 110 111 static char pre_statechg_hook[2 * MAXPATHLEN]; 112 static char post_statechg_hook[2 * MAXPATHLEN]; 113 char query_hook[2 * MAXPATHLEN]; 114 115 zlog_t logsys; 116 117 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 118 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 119 120 static sema_t scratch_sem; /* for scratch zones */ 121 122 static char zone_door_path[MAXPATHLEN]; 123 static int zone_door = -1; 124 125 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 126 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 127 128 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 129 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 130 #endif 131 132 #define DEFAULT_LOCALE "C" 133 134 static const char * 135 z_cmd_name(zone_cmd_t zcmd) 136 { 137 /* This list needs to match the enum in sys/zone.h */ 138 static const char *zcmdstr[] = { 139 "ready", "boot", "forceboot", "reboot", "halt", 140 "note_uninstalling", "mount", "forcemount", "unmount" 141 }; 142 143 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 144 return ("unknown"); 145 else 146 return (zcmdstr[(int)zcmd]); 147 } 148 149 static char * 150 get_execbasename(char *execfullname) 151 { 152 char *last_slash, *execbasename; 153 154 /* guard against '/' at end of command invocation */ 155 for (;;) { 156 last_slash = strrchr(execfullname, '/'); 157 if (last_slash == NULL) { 158 execbasename = execfullname; 159 break; 160 } else { 161 execbasename = last_slash + 1; 162 if (*execbasename == '\0') { 163 *last_slash = '\0'; 164 continue; 165 } 166 break; 167 } 168 } 169 return (execbasename); 170 } 171 172 static void 173 usage(void) 174 { 175 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 176 (void) fprintf(stderr, 177 gettext("\tNote: %s should not be run directly.\n"), progname); 178 exit(2); 179 } 180 181 /* ARGSUSED */ 182 static void 183 sigchld(int sig) 184 { 185 } 186 187 char * 188 localize_msg(char *locale, const char *msg) 189 { 190 char *out; 191 192 (void) mutex_lock(&msglock); 193 (void) setlocale(LC_MESSAGES, locale); 194 out = gettext(msg); 195 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 196 (void) mutex_unlock(&msglock); 197 return (out); 198 } 199 200 /* PRINTFLIKE3 */ 201 void 202 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 203 { 204 va_list alist; 205 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 206 char *bp; 207 int saved_errno = errno; 208 209 if (zlogp == NULL) 210 return; 211 if (zlogp == &logsys) 212 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 213 zone_name); 214 else 215 buf[0] = '\0'; 216 bp = &(buf[strlen(buf)]); 217 218 /* 219 * In theory, the locale pointer should be set to either "C" or a 220 * char array, so it should never be NULL 221 */ 222 assert(zlogp->locale != NULL); 223 /* Locale is per process, but we are multi-threaded... */ 224 fmt = localize_msg(zlogp->locale, fmt); 225 226 va_start(alist, fmt); 227 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 228 va_end(alist); 229 bp = &(buf[strlen(buf)]); 230 if (use_strerror) 231 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 232 strerror(saved_errno)); 233 if (zlogp == &logsys) { 234 (void) syslog(LOG_ERR, "%s", buf); 235 } else if (zlogp->logfile != NULL) { 236 (void) fprintf(zlogp->logfile, "%s\n", buf); 237 } else { 238 size_t buflen; 239 size_t copylen; 240 241 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 242 copylen = MIN(buflen, zlogp->loglen); 243 zlogp->log += copylen; 244 zlogp->loglen -= copylen; 245 } 246 } 247 248 /* 249 * Emit a warning for any boot arguments which are unrecognized. Since 250 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 251 * put the arguments into an argv style array, use getopt to process them, 252 * and put the resultant argument string back into outargs. 253 * 254 * During the filtering, we pull out any arguments which are truly "boot" 255 * arguments, leaving only those which are to be passed intact to the 256 * progenitor process. The one we support at the moment is -i, which 257 * indicates to the kernel which program should be launched as 'init'. 258 * 259 * A return of Z_INVAL indicates specifically that the arguments are 260 * not valid; this is a non-fatal error. Except for Z_OK, all other return 261 * values are treated as fatal. 262 */ 263 static int 264 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 265 char *init_file, char *badarg) 266 { 267 int argc = 0, argc_save; 268 int i; 269 int err; 270 char *arg, *lasts, **argv = NULL, **argv_save; 271 char zonecfg_args[BOOTARGS_MAX]; 272 char scratchargs[BOOTARGS_MAX], *sargs; 273 char c; 274 275 bzero(outargs, BOOTARGS_MAX); 276 bzero(badarg, BOOTARGS_MAX); 277 278 /* 279 * If the user didn't specify transient boot arguments, check 280 * to see if there were any specified in the zone configuration, 281 * and use them if applicable. 282 */ 283 if (inargs == NULL || inargs[0] == '\0') { 284 zone_dochandle_t handle; 285 if ((handle = zonecfg_init_handle()) == NULL) { 286 zerror(zlogp, B_TRUE, 287 "getting zone configuration handle"); 288 return (Z_BAD_HANDLE); 289 } 290 err = zonecfg_get_snapshot_handle(zone_name, handle); 291 if (err != Z_OK) { 292 zerror(zlogp, B_FALSE, 293 "invalid configuration snapshot"); 294 zonecfg_fini_handle(handle); 295 return (Z_BAD_HANDLE); 296 } 297 298 bzero(zonecfg_args, sizeof (zonecfg_args)); 299 (void) zonecfg_get_bootargs(handle, zonecfg_args, 300 sizeof (zonecfg_args)); 301 inargs = zonecfg_args; 302 zonecfg_fini_handle(handle); 303 } 304 305 if (strlen(inargs) >= BOOTARGS_MAX) { 306 zerror(zlogp, B_FALSE, "boot argument string too long"); 307 return (Z_INVAL); 308 } 309 310 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 311 sargs = scratchargs; 312 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 313 sargs = NULL; 314 argc++; 315 } 316 317 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 318 zerror(zlogp, B_FALSE, "memory allocation failed"); 319 return (Z_NOMEM); 320 } 321 322 argv_save = argv; 323 argc_save = argc; 324 325 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 326 sargs = scratchargs; 327 i = 0; 328 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 329 sargs = NULL; 330 if ((argv[i] = strdup(arg)) == NULL) { 331 err = Z_NOMEM; 332 zerror(zlogp, B_FALSE, "memory allocation failed"); 333 goto done; 334 } 335 i++; 336 } 337 338 /* 339 * We preserve compatibility with the Solaris system boot behavior, 340 * which allows: 341 * 342 * # reboot kernel/unix -s -m verbose 343 * 344 * In this example, kernel/unix tells the booter what file to 345 * boot. We don't want reboot in a zone to be gratuitously different, 346 * so we silently ignore the boot file, if necessary. 347 */ 348 if (argv[0] == NULL) 349 goto done; 350 351 assert(argv[0][0] != ' '); 352 assert(argv[0][0] != '\t'); 353 354 if (argv[0][0] != '-' && argv[0][0] != '\0') { 355 argv = &argv[1]; 356 argc--; 357 } 358 359 optind = 0; 360 opterr = 0; 361 err = Z_OK; 362 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 363 switch (c) { 364 case 'i': 365 /* 366 * -i is handled by the runtime and is not passed 367 * along to userland 368 */ 369 (void) strlcpy(init_file, optarg, MAXPATHLEN); 370 break; 371 case 'f': 372 /* This has already been processed by zoneadm */ 373 break; 374 case 'm': 375 case 's': 376 /* These pass through unmolested */ 377 (void) snprintf(outargs, BOOTARGS_MAX, 378 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 379 break; 380 case '?': 381 /* 382 * We warn about unknown arguments but pass them 383 * along anyway-- if someone wants to develop their 384 * own init replacement, they can pass it whatever 385 * args they want. 386 */ 387 err = Z_INVAL; 388 (void) snprintf(outargs, BOOTARGS_MAX, 389 "%s -%c", outargs, optopt); 390 (void) snprintf(badarg, BOOTARGS_MAX, 391 "%s -%c", badarg, optopt); 392 break; 393 } 394 } 395 396 /* 397 * For Solaris Zones we warn about and discard non-option arguments. 398 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 399 * to the kernel, we concat up all the other remaining boot args. 400 * and warn on them as a group. 401 */ 402 if (optind < argc) { 403 err = Z_INVAL; 404 while (optind < argc) { 405 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 406 badarg, strlen(badarg) > 0 ? " " : "", 407 argv[optind]); 408 optind++; 409 } 410 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 411 "arguments `%s'.", badarg); 412 } 413 414 done: 415 for (i = 0; i < argc_save; i++) { 416 if (argv_save[i] != NULL) 417 free(argv_save[i]); 418 } 419 free(argv_save); 420 return (err); 421 } 422 423 424 static int 425 mkzonedir(zlog_t *zlogp) 426 { 427 struct stat st; 428 /* 429 * We must create and lock everyone but root out of ZONES_TMPDIR 430 * since anyone can open any UNIX domain socket, regardless of 431 * its file system permissions. Sigh... 432 */ 433 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 434 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 435 return (-1); 436 } 437 /* paranoia */ 438 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 439 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 440 return (-1); 441 } 442 (void) chmod(ZONES_TMPDIR, S_IRWXU); 443 return (0); 444 } 445 446 /* 447 * Run the brand's pre-state change callback, if it exists. 448 */ 449 static int 450 brand_prestatechg(zlog_t *zlogp, int state, int cmd) 451 { 452 char cmdbuf[2 * MAXPATHLEN]; 453 454 if (pre_statechg_hook[0] == '\0') 455 return (0); 456 457 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d", pre_statechg_hook, 458 state, cmd) > sizeof (cmdbuf)) 459 return (-1); 460 461 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 462 return (-1); 463 464 return (0); 465 } 466 467 /* 468 * Run the brand's post-state change callback, if it exists. 469 */ 470 static int 471 brand_poststatechg(zlog_t *zlogp, int state, int cmd) 472 { 473 char cmdbuf[2 * MAXPATHLEN]; 474 475 if (post_statechg_hook[0] == '\0') 476 return (0); 477 478 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d", post_statechg_hook, 479 state, cmd) > sizeof (cmdbuf)) 480 return (-1); 481 482 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 483 return (-1); 484 485 return (0); 486 } 487 488 /* 489 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 490 * 'true' if this is being invoked as part of the processing for the "mount" 491 * subcommand. 492 */ 493 static int 494 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate) 495 { 496 int err; 497 498 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0) 499 return (-1); 500 501 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 502 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 503 zonecfg_strerror(err)); 504 return (-1); 505 } 506 507 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 508 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 509 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 510 zonecfg_strerror(err)); 511 return (-1); 512 } 513 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 514 bringup_failure_recovery = B_TRUE; 515 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE); 516 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 517 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 518 zonecfg_strerror(err)); 519 return (-1); 520 } 521 522 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0) 523 return (-1); 524 525 return (0); 526 } 527 528 int 529 init_template(void) 530 { 531 int fd; 532 int err = 0; 533 534 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 535 if (fd == -1) 536 return (-1); 537 538 /* 539 * For now, zoneadmd doesn't do anything with the contract. 540 * Deliver no events, don't inherit, and allow it to be orphaned. 541 */ 542 err |= ct_tmpl_set_critical(fd, 0); 543 err |= ct_tmpl_set_informative(fd, 0); 544 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 545 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 546 if (err || ct_tmpl_activate(fd)) { 547 (void) close(fd); 548 return (-1); 549 } 550 551 return (fd); 552 } 553 554 typedef struct fs_callback { 555 zlog_t *zlogp; 556 zoneid_t zoneid; 557 boolean_t mount_cmd; 558 } fs_callback_t; 559 560 static int 561 mount_early_fs(void *data, const char *spec, const char *dir, 562 const char *fstype, const char *opt) 563 { 564 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 565 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 566 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd; 567 char rootpath[MAXPATHLEN]; 568 pid_t child; 569 int child_status; 570 int tmpl_fd; 571 int rv; 572 ctid_t ct; 573 574 /* determine the zone rootpath */ 575 if (mount_cmd) { 576 char zonepath[MAXPATHLEN]; 577 char luroot[MAXPATHLEN]; 578 579 if (zone_get_zonepath(zone_name, 580 zonepath, sizeof (zonepath)) != Z_OK) { 581 zerror(zlogp, B_FALSE, "unable to determine zone path"); 582 return (-1); 583 } 584 585 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 586 resolve_lofs(zlogp, luroot, sizeof (luroot)); 587 (void) strlcpy(rootpath, luroot, sizeof (rootpath)); 588 } else { 589 if (zone_get_rootpath(zone_name, 590 rootpath, sizeof (rootpath)) != Z_OK) { 591 zerror(zlogp, B_FALSE, "unable to determine zone root"); 592 return (-1); 593 } 594 } 595 596 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) { 597 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 598 rootpath, dir); 599 return (-1); 600 } else if (rv > 0) { 601 /* The mount point path doesn't exist, create it now. */ 602 if (make_one_dir(zlogp, rootpath, dir, 603 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 604 DEFAULT_DIR_GROUP) != 0) { 605 zerror(zlogp, B_FALSE, "failed to create mount point"); 606 return (-1); 607 } 608 609 /* 610 * Now this might seem weird, but we need to invoke 611 * valid_mount_path() again. Why? Because it checks 612 * to make sure that the mount point path is canonical, 613 * which it can only do if the path exists, so now that 614 * we've created the path we have to verify it again. 615 */ 616 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, 617 fstype)) < 0) { 618 zerror(zlogp, B_FALSE, 619 "%s%s is not a valid mount point", rootpath, dir); 620 return (-1); 621 } 622 } 623 624 if ((tmpl_fd = init_template()) == -1) { 625 zerror(zlogp, B_TRUE, "failed to create contract"); 626 return (-1); 627 } 628 629 if ((child = fork()) == -1) { 630 (void) ct_tmpl_clear(tmpl_fd); 631 (void) close(tmpl_fd); 632 zerror(zlogp, B_TRUE, "failed to fork"); 633 return (-1); 634 635 } else if (child == 0) { /* child */ 636 char opt_buf[MAX_MNTOPT_STR]; 637 int optlen = 0; 638 int mflag = MS_DATA; 639 640 (void) ct_tmpl_clear(tmpl_fd); 641 /* 642 * Even though there are no procs running in the zone, we 643 * do this for paranoia's sake. 644 */ 645 (void) closefrom(0); 646 647 if (zone_enter(zoneid) == -1) { 648 _exit(errno); 649 } 650 if (opt != NULL) { 651 /* 652 * The mount() system call is incredibly annoying. 653 * If options are specified, we need to copy them 654 * into a temporary buffer since the mount() system 655 * call will overwrite the options string. It will 656 * also fail if the new option string it wants to 657 * write is bigger than the one we passed in, so 658 * you must pass in a buffer of the maximum possible 659 * option string length. sigh. 660 */ 661 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 662 opt = opt_buf; 663 optlen = MAX_MNTOPT_STR; 664 mflag = MS_OPTIONSTR; 665 } 666 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 667 _exit(errno); 668 _exit(0); 669 } 670 671 /* parent */ 672 if (contract_latest(&ct) == -1) 673 ct = -1; 674 (void) ct_tmpl_clear(tmpl_fd); 675 (void) close(tmpl_fd); 676 if (waitpid(child, &child_status, 0) != child) { 677 /* unexpected: we must have been signalled */ 678 (void) contract_abandon_id(ct); 679 return (-1); 680 } 681 (void) contract_abandon_id(ct); 682 if (WEXITSTATUS(child_status) != 0) { 683 errno = WEXITSTATUS(child_status); 684 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 685 return (-1); 686 } 687 688 return (0); 689 } 690 691 /* 692 * If retstr is not NULL, the output of the subproc is returned in the str, 693 * otherwise it is output using zerror(). Any memory allocated for retstr 694 * should be freed by the caller. 695 */ 696 int 697 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) 698 { 699 char buf[1024]; /* arbitrary large amount */ 700 char *inbuf; 701 FILE *file; 702 int status; 703 int rd_cnt; 704 705 if (retstr != NULL) { 706 if ((*retstr = malloc(1024)) == NULL) { 707 zerror(zlogp, B_FALSE, "out of memory"); 708 return (-1); 709 } 710 inbuf = *retstr; 711 rd_cnt = 0; 712 } else { 713 inbuf = buf; 714 } 715 716 file = popen(cmdbuf, "r"); 717 if (file == NULL) { 718 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 719 return (-1); 720 } 721 722 while (fgets(inbuf, 1024, file) != NULL) { 723 if (retstr == NULL && zlogp != &logsys) { 724 zerror(zlogp, B_FALSE, "%s", inbuf); 725 } else { 726 char *p; 727 728 rd_cnt += 1024 - 1; 729 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) { 730 zerror(zlogp, B_FALSE, "out of memory"); 731 (void) pclose(file); 732 return (-1); 733 } 734 735 *retstr = p; 736 inbuf = *retstr + rd_cnt; 737 } 738 } 739 status = pclose(file); 740 741 if (WIFSIGNALED(status)) { 742 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 743 "signal %d", cmdbuf, WTERMSIG(status)); 744 return (-1); 745 } 746 assert(WIFEXITED(status)); 747 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 748 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 749 return (-1); 750 } 751 return (WEXITSTATUS(status)); 752 } 753 754 static int 755 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) 756 { 757 zoneid_t zoneid; 758 struct stat st; 759 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 760 char nbootargs[BOOTARGS_MAX]; 761 char cmdbuf[MAXPATHLEN]; 762 fs_callback_t cb; 763 brand_handle_t bh; 764 int err; 765 766 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) 767 return (-1); 768 769 if (init_console_slave(zlogp) != 0) 770 return (-1); 771 reset_slave_terminal(zlogp); 772 773 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 774 zerror(zlogp, B_TRUE, "unable to get zoneid"); 775 return (-1); 776 } 777 778 cb.zlogp = zlogp; 779 cb.zoneid = zoneid; 780 cb.mount_cmd = B_FALSE; 781 782 /* Get a handle to the brand info for this zone */ 783 if ((bh = brand_open(brand_name)) == NULL) { 784 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 785 return (-1); 786 } 787 788 /* 789 * Get the list of filesystems to mount from the brand 790 * configuration. These mounts are done via a thread that will 791 * enter the zone, so they are done from within the context of the 792 * zone. 793 */ 794 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 795 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 796 brand_close(bh); 797 return (-1); 798 } 799 800 /* 801 * Get the brand's boot callback if it exists. 802 */ 803 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 804 zerror(zlogp, B_FALSE, "unable to determine zone path"); 805 brand_close(bh); 806 return (-1); 807 } 808 (void) strcpy(cmdbuf, EXEC_PREFIX); 809 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 810 sizeof (cmdbuf) - EXEC_LEN) != 0) { 811 zerror(zlogp, B_FALSE, 812 "unable to determine branded zone's boot callback"); 813 brand_close(bh); 814 return (-1); 815 } 816 817 /* Get the path for this zone's init(1M) (or equivalent) process. */ 818 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { 819 zerror(zlogp, B_FALSE, 820 "unable to determine zone's init(1M) location"); 821 brand_close(bh); 822 return (-1); 823 } 824 825 brand_close(bh); 826 827 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 828 bad_boot_arg); 829 if (err == Z_INVAL) 830 eventstream_write(Z_EVT_ZONE_BADARGS); 831 else if (err != Z_OK) 832 return (-1); 833 834 assert(init_file[0] != '\0'); 835 836 /* Try to anticipate possible problems: Make sure init is executable. */ 837 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 838 zerror(zlogp, B_FALSE, "unable to determine zone root"); 839 return (-1); 840 } 841 842 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); 843 844 if (stat(initpath, &st) == -1) { 845 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 846 return (-1); 847 } 848 849 if ((st.st_mode & S_IXUSR) == 0) { 850 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 851 return (-1); 852 } 853 854 /* 855 * If there is a brand 'boot' callback, execute it now to give the 856 * brand one last chance to do any additional setup before the zone 857 * is booted. 858 */ 859 if ((strlen(cmdbuf) > EXEC_LEN) && 860 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { 861 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 862 return (-1); 863 } 864 865 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 866 zerror(zlogp, B_TRUE, "could not set zone boot file"); 867 return (-1); 868 } 869 870 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 871 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 872 return (-1); 873 } 874 875 if (zone_boot(zoneid) == -1) { 876 zerror(zlogp, B_TRUE, "unable to boot zone"); 877 return (-1); 878 } 879 880 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0) 881 return (-1); 882 883 return (0); 884 } 885 886 static int 887 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate) 888 { 889 int err; 890 891 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0) 892 return (-1); 893 894 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { 895 if (!bringup_failure_recovery) 896 zerror(zlogp, B_FALSE, "unable to destroy zone"); 897 return (-1); 898 } 899 900 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 901 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 902 zonecfg_strerror(err)); 903 904 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0) 905 return (-1); 906 907 return (0); 908 } 909 910 /* 911 * Generate AUE_zone_state for a command that boots a zone. 912 */ 913 static void 914 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 915 char *new_state) 916 { 917 adt_session_data_t *ah; 918 adt_event_data_t *event; 919 int pass_fail, fail_reason; 920 921 if (!adt_audit_enabled()) 922 return; 923 924 if (return_val == 0) { 925 pass_fail = ADT_SUCCESS; 926 fail_reason = ADT_SUCCESS; 927 } else { 928 pass_fail = ADT_FAILURE; 929 fail_reason = ADT_FAIL_VALUE_PROGRAM; 930 } 931 932 if (adt_start_session(&ah, NULL, 0)) { 933 zerror(zlogp, B_TRUE, gettext("audit failure.")); 934 return; 935 } 936 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 937 zerror(zlogp, B_TRUE, gettext("audit failure.")); 938 (void) adt_end_session(ah); 939 return; 940 } 941 942 event = adt_alloc_event(ah, ADT_zone_state); 943 if (event == NULL) { 944 zerror(zlogp, B_TRUE, gettext("audit failure.")); 945 (void) adt_end_session(ah); 946 return; 947 } 948 event->adt_zone_state.zonename = zone_name; 949 event->adt_zone_state.new_state = new_state; 950 951 if (adt_put_event(event, pass_fail, fail_reason)) 952 zerror(zlogp, B_TRUE, gettext("audit failure.")); 953 954 adt_free_event(event); 955 956 (void) adt_end_session(ah); 957 } 958 959 /* 960 * The main routine for the door server that deals with zone state transitions. 961 */ 962 /* ARGSUSED */ 963 static void 964 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 965 uint_t n_desc) 966 { 967 ucred_t *uc = NULL; 968 const priv_set_t *eset; 969 970 zone_state_t zstate; 971 zone_cmd_t cmd; 972 zone_cmd_arg_t *zargp; 973 974 boolean_t kernelcall; 975 976 int rval = -1; 977 uint64_t uniqid; 978 zoneid_t zoneid = -1; 979 zlog_t zlog; 980 zlog_t *zlogp; 981 zone_cmd_rval_t *rvalp; 982 size_t rlen = getpagesize(); /* conservative */ 983 fs_callback_t cb; 984 brand_handle_t bh; 985 986 /* LINTED E_BAD_PTR_CAST_ALIGN */ 987 zargp = (zone_cmd_arg_t *)args; 988 989 /* 990 * When we get the door unref message, we've fdetach'd the door, and 991 * it is time for us to shut down zoneadmd. 992 */ 993 if (zargp == DOOR_UNREF_DATA) { 994 /* 995 * See comment at end of main() for info on the last rites. 996 */ 997 exit(0); 998 } 999 1000 if (zargp == NULL) { 1001 (void) door_return(NULL, 0, 0, 0); 1002 } 1003 1004 rvalp = alloca(rlen); 1005 bzero(rvalp, rlen); 1006 zlog.logfile = NULL; 1007 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 1008 zlog.buf = rvalp->errbuf; 1009 zlog.log = zlog.buf; 1010 /* defer initialization of zlog.locale until after credential check */ 1011 zlogp = &zlog; 1012 1013 if (alen != sizeof (zone_cmd_arg_t)) { 1014 /* 1015 * This really shouldn't be happening. 1016 */ 1017 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 1018 "unexpected (expected %d bytes)", alen, 1019 sizeof (zone_cmd_arg_t)); 1020 goto out; 1021 } 1022 cmd = zargp->cmd; 1023 1024 if (door_ucred(&uc) != 0) { 1025 zerror(&logsys, B_TRUE, "door_ucred"); 1026 goto out; 1027 } 1028 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 1029 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 1030 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 1031 ucred_geteuid(uc) != 0)) { 1032 zerror(&logsys, B_FALSE, "insufficient privileges"); 1033 goto out; 1034 } 1035 1036 kernelcall = ucred_getpid(uc) == 0; 1037 1038 /* 1039 * This is safe because we only use a zlog_t throughout the 1040 * duration of a door call; i.e., by the time the pointer 1041 * might become invalid, the door call would be over. 1042 */ 1043 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 1044 1045 (void) mutex_lock(&lock); 1046 1047 /* 1048 * Once we start to really die off, we don't want more connections. 1049 */ 1050 if (in_death_throes) { 1051 (void) mutex_unlock(&lock); 1052 ucred_free(uc); 1053 (void) door_return(NULL, 0, 0, 0); 1054 thr_exit(NULL); 1055 } 1056 1057 /* 1058 * Check for validity of command. 1059 */ 1060 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 1061 cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && 1062 cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 1063 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 1064 goto out; 1065 } 1066 1067 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 1068 /* 1069 * Can't happen 1070 */ 1071 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 1072 cmd); 1073 goto out; 1074 } 1075 /* 1076 * We ignore the possibility of someone calling zone_create(2) 1077 * explicitly; all requests must come through zoneadmd. 1078 */ 1079 if (zone_get_state(zone_name, &zstate) != Z_OK) { 1080 /* 1081 * Something terribly wrong happened 1082 */ 1083 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 1084 goto out; 1085 } 1086 1087 if (kernelcall) { 1088 /* 1089 * Kernel-initiated requests may lose their validity if the 1090 * zone_t the kernel was referring to has gone away. 1091 */ 1092 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 1093 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 1094 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 1095 /* 1096 * We're not talking about the same zone. The request 1097 * must have arrived too late. Return error. 1098 */ 1099 rval = -1; 1100 goto out; 1101 } 1102 zlogp = &logsys; /* Log errors to syslog */ 1103 } 1104 1105 /* 1106 * If we are being asked to forcibly mount or boot a zone, we 1107 * pretend that an INCOMPLETE zone is actually INSTALLED. 1108 */ 1109 if (zstate == ZONE_STATE_INCOMPLETE && 1110 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 1111 zstate = ZONE_STATE_INSTALLED; 1112 1113 switch (zstate) { 1114 case ZONE_STATE_CONFIGURED: 1115 case ZONE_STATE_INCOMPLETE: 1116 /* 1117 * Not our area of expertise; we just print a nice message 1118 * and die off. 1119 */ 1120 zerror(zlogp, B_FALSE, 1121 "%s operation is invalid for zones in state '%s'", 1122 z_cmd_name(cmd), zone_state_str(zstate)); 1123 break; 1124 1125 case ZONE_STATE_INSTALLED: 1126 switch (cmd) { 1127 case Z_READY: 1128 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate); 1129 if (rval == 0) 1130 eventstream_write(Z_EVT_ZONE_READIED); 1131 break; 1132 case Z_BOOT: 1133 case Z_FORCEBOOT: 1134 eventstream_write(Z_EVT_ZONE_BOOTING); 1135 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1136 == 0) { 1137 rval = zone_bootup(zlogp, zargp->bootbuf, 1138 zstate); 1139 } 1140 audit_put_record(zlogp, uc, rval, "boot"); 1141 if (rval != 0) { 1142 bringup_failure_recovery = B_TRUE; 1143 (void) zone_halt(zlogp, B_FALSE, B_FALSE, 1144 zstate); 1145 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1146 } 1147 break; 1148 case Z_HALT: 1149 if (kernelcall) /* Invalid; can't happen */ 1150 abort(); 1151 /* 1152 * We could have two clients racing to halt this 1153 * zone; the second client loses, but his request 1154 * doesn't fail, since the zone is now in the desired 1155 * state. 1156 */ 1157 zerror(zlogp, B_FALSE, "zone is already halted"); 1158 rval = 0; 1159 break; 1160 case Z_REBOOT: 1161 if (kernelcall) /* Invalid; can't happen */ 1162 abort(); 1163 zerror(zlogp, B_FALSE, "%s operation is invalid " 1164 "for zones in state '%s'", z_cmd_name(cmd), 1165 zone_state_str(zstate)); 1166 rval = -1; 1167 break; 1168 case Z_NOTE_UNINSTALLING: 1169 if (kernelcall) /* Invalid; can't happen */ 1170 abort(); 1171 /* 1172 * Tell the console to print out a message about this. 1173 * Once it does, we will be in_death_throes. 1174 */ 1175 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1176 break; 1177 case Z_MOUNT: 1178 case Z_FORCEMOUNT: 1179 if (kernelcall) /* Invalid; can't happen */ 1180 abort(); 1181 if (!zone_isnative && !zone_iscluster) { 1182 /* 1183 * -U mounts the zone without lofs mounting 1184 * zone file systems back into the scratch 1185 * zone. This is required when mounting 1186 * non-native branded zones. 1187 */ 1188 (void) strlcpy(zargp->bootbuf, "-U", 1189 BOOTARGS_MAX); 1190 } 1191 1192 rval = zone_ready(zlogp, 1193 strcmp(zargp->bootbuf, "-U") == 0 ? 1194 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate); 1195 if (rval != 0) 1196 break; 1197 1198 eventstream_write(Z_EVT_ZONE_READIED); 1199 1200 /* 1201 * Get a handle to the native brand info. 1202 * We must always use the native brand file system 1203 * list when mounting the zone. 1204 */ 1205 if ((bh = brand_open(NATIVE_BRAND_NAME)) == NULL) { 1206 rval = -1; 1207 break; 1208 } 1209 1210 /* 1211 * Get the list of filesystems to mount from 1212 * the brand configuration. These mounts are done 1213 * via a thread that will enter the zone, so they 1214 * are done from within the context of the zone. 1215 */ 1216 cb.zlogp = zlogp; 1217 cb.zoneid = zone_id; 1218 cb.mount_cmd = B_TRUE; 1219 rval = brand_platform_iter_mounts(bh, 1220 mount_early_fs, &cb); 1221 1222 brand_close(bh); 1223 1224 /* 1225 * Ordinarily, /dev/fd would be mounted inside the zone 1226 * by svc:/system/filesystem/usr:default, but since 1227 * we're not booting the zone, we need to do this 1228 * manually. 1229 */ 1230 if (rval == 0) 1231 rval = mount_early_fs(&cb, 1232 "fd", "/dev/fd", "fd", NULL); 1233 break; 1234 case Z_UNMOUNT: 1235 if (kernelcall) /* Invalid; can't happen */ 1236 abort(); 1237 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1238 rval = 0; 1239 break; 1240 } 1241 break; 1242 1243 case ZONE_STATE_READY: 1244 switch (cmd) { 1245 case Z_READY: 1246 /* 1247 * We could have two clients racing to ready this 1248 * zone; the second client loses, but his request 1249 * doesn't fail, since the zone is now in the desired 1250 * state. 1251 */ 1252 zerror(zlogp, B_FALSE, "zone is already ready"); 1253 rval = 0; 1254 break; 1255 case Z_BOOT: 1256 (void) strlcpy(boot_args, zargp->bootbuf, 1257 sizeof (boot_args)); 1258 eventstream_write(Z_EVT_ZONE_BOOTING); 1259 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1260 audit_put_record(zlogp, uc, rval, "boot"); 1261 if (rval != 0) { 1262 bringup_failure_recovery = B_TRUE; 1263 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1264 zstate); 1265 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1266 } 1267 boot_args[0] = '\0'; 1268 break; 1269 case Z_HALT: 1270 if (kernelcall) /* Invalid; can't happen */ 1271 abort(); 1272 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1273 != 0) 1274 break; 1275 eventstream_write(Z_EVT_ZONE_HALTED); 1276 break; 1277 case Z_REBOOT: 1278 case Z_NOTE_UNINSTALLING: 1279 case Z_MOUNT: 1280 case Z_UNMOUNT: 1281 if (kernelcall) /* Invalid; can't happen */ 1282 abort(); 1283 zerror(zlogp, B_FALSE, "%s operation is invalid " 1284 "for zones in state '%s'", z_cmd_name(cmd), 1285 zone_state_str(zstate)); 1286 rval = -1; 1287 break; 1288 } 1289 break; 1290 1291 case ZONE_STATE_MOUNTED: 1292 switch (cmd) { 1293 case Z_UNMOUNT: 1294 if (kernelcall) /* Invalid; can't happen */ 1295 abort(); 1296 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate); 1297 if (rval == 0) { 1298 eventstream_write(Z_EVT_ZONE_HALTED); 1299 (void) sema_post(&scratch_sem); 1300 } 1301 break; 1302 default: 1303 if (kernelcall) /* Invalid; can't happen */ 1304 abort(); 1305 zerror(zlogp, B_FALSE, "%s operation is invalid " 1306 "for zones in state '%s'", z_cmd_name(cmd), 1307 zone_state_str(zstate)); 1308 rval = -1; 1309 break; 1310 } 1311 break; 1312 1313 case ZONE_STATE_RUNNING: 1314 case ZONE_STATE_SHUTTING_DOWN: 1315 case ZONE_STATE_DOWN: 1316 switch (cmd) { 1317 case Z_READY: 1318 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1319 != 0) 1320 break; 1321 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0) 1322 eventstream_write(Z_EVT_ZONE_READIED); 1323 else 1324 eventstream_write(Z_EVT_ZONE_HALTED); 1325 break; 1326 case Z_BOOT: 1327 /* 1328 * We could have two clients racing to boot this 1329 * zone; the second client loses, but his request 1330 * doesn't fail, since the zone is now in the desired 1331 * state. 1332 */ 1333 zerror(zlogp, B_FALSE, "zone is already booted"); 1334 rval = 0; 1335 break; 1336 case Z_HALT: 1337 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1338 != 0) 1339 break; 1340 eventstream_write(Z_EVT_ZONE_HALTED); 1341 break; 1342 case Z_REBOOT: 1343 (void) strlcpy(boot_args, zargp->bootbuf, 1344 sizeof (boot_args)); 1345 eventstream_write(Z_EVT_ZONE_REBOOTING); 1346 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1347 != 0) { 1348 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1349 boot_args[0] = '\0'; 1350 break; 1351 } 1352 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1353 != 0) { 1354 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1355 boot_args[0] = '\0'; 1356 break; 1357 } 1358 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1359 audit_put_record(zlogp, uc, rval, "reboot"); 1360 if (rval != 0) { 1361 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1362 zstate); 1363 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1364 } 1365 boot_args[0] = '\0'; 1366 break; 1367 case Z_NOTE_UNINSTALLING: 1368 case Z_MOUNT: 1369 case Z_UNMOUNT: 1370 zerror(zlogp, B_FALSE, "%s operation is invalid " 1371 "for zones in state '%s'", z_cmd_name(cmd), 1372 zone_state_str(zstate)); 1373 rval = -1; 1374 break; 1375 } 1376 break; 1377 default: 1378 abort(); 1379 } 1380 1381 /* 1382 * Because the state of the zone may have changed, we make sure 1383 * to wake the console poller, which is in charge of initiating 1384 * the shutdown procedure as necessary. 1385 */ 1386 eventstream_write(Z_EVT_NULL); 1387 1388 out: 1389 (void) mutex_unlock(&lock); 1390 if (kernelcall) { 1391 rvalp = NULL; 1392 rlen = 0; 1393 } else { 1394 rvalp->rval = rval; 1395 } 1396 if (uc != NULL) 1397 ucred_free(uc); 1398 (void) door_return((char *)rvalp, rlen, NULL, 0); 1399 thr_exit(NULL); 1400 } 1401 1402 static int 1403 setup_door(zlog_t *zlogp) 1404 { 1405 if ((zone_door = door_create(server, NULL, 1406 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1407 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1408 return (-1); 1409 } 1410 (void) fdetach(zone_door_path); 1411 1412 if (fattach(zone_door, zone_door_path) != 0) { 1413 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1414 (void) door_revoke(zone_door); 1415 (void) fdetach(zone_door_path); 1416 zone_door = -1; 1417 return (-1); 1418 } 1419 return (0); 1420 } 1421 1422 /* 1423 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1424 * is where zoneadmd itself will check to see that another instance of 1425 * zoneadmd isn't already controlling this zone. 1426 * 1427 * The idea here is that we want to open the path to which we will 1428 * attach our door, lock it, and then make sure that no-one has beat us 1429 * to fattach(3c)ing onto it. 1430 * 1431 * fattach(3c) is really a mount, so there are actually two possible 1432 * vnodes we could be dealing with. Our strategy is as follows: 1433 * 1434 * - If the file we opened is a regular file (common case): 1435 * There is no fattach(3c)ed door, so we have a chance of becoming 1436 * the managing zoneadmd. We attempt to lock the file: if it is 1437 * already locked, that means someone else raced us here, so we 1438 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1439 * that beat us to it. 1440 * 1441 * - If the file we opened is a namefs file: 1442 * This means there is already an established door fattach(3c)'ed 1443 * to the rendezvous path. We've lost the race, so we give up. 1444 * Note that in this case we also try to grab the file lock, and 1445 * will succeed in acquiring it since the vnode locked by the 1446 * "winning" zoneadmd was a regular one, and the one we locked was 1447 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1448 * we just return to zoneadm(1m) which knows to retry. 1449 */ 1450 static int 1451 make_daemon_exclusive(zlog_t *zlogp) 1452 { 1453 int doorfd = -1; 1454 int err, ret = -1; 1455 struct stat st; 1456 struct flock flock; 1457 zone_state_t zstate; 1458 1459 top: 1460 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1461 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1462 zonecfg_strerror(err)); 1463 goto out; 1464 } 1465 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1466 S_IREAD|S_IWRITE)) < 0) { 1467 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1468 goto out; 1469 } 1470 if (fstat(doorfd, &st) < 0) { 1471 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1472 goto out; 1473 } 1474 /* 1475 * Lock the file to synchronize with other zoneadmd 1476 */ 1477 flock.l_type = F_WRLCK; 1478 flock.l_whence = SEEK_SET; 1479 flock.l_start = (off_t)0; 1480 flock.l_len = (off_t)0; 1481 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1482 /* 1483 * Someone else raced us here and grabbed the lock file 1484 * first. A warning here is inappropriate since nothing 1485 * went wrong. 1486 */ 1487 goto out; 1488 } 1489 1490 if (strcmp(st.st_fstype, "namefs") == 0) { 1491 struct door_info info; 1492 1493 /* 1494 * There is already something fattach()'ed to this file. 1495 * Lets see what the door is up to. 1496 */ 1497 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1498 /* 1499 * Another zoneadmd process seems to be in 1500 * control of the situation and we don't need to 1501 * be here. A warning here is inappropriate 1502 * since nothing went wrong. 1503 * 1504 * If the door has been revoked, the zoneadmd 1505 * process currently managing the zone is going 1506 * away. We'll return control to zoneadm(1m) 1507 * which will try again (by which time zoneadmd 1508 * will hopefully have exited). 1509 */ 1510 goto out; 1511 } 1512 1513 /* 1514 * If we got this far, there's a fattach(3c)'ed door 1515 * that belongs to a process that has exited, which can 1516 * happen if the previous zoneadmd died unexpectedly. 1517 * 1518 * Let user know that something is amiss, but that we can 1519 * recover; if the zone is in the installed state, then don't 1520 * message, since having a running zoneadmd isn't really 1521 * expected/needed. We want to keep occurences of this message 1522 * limited to times when zoneadmd is picking back up from a 1523 * zoneadmd that died while the zone was in some non-trivial 1524 * state. 1525 */ 1526 if (zstate > ZONE_STATE_INSTALLED) { 1527 zerror(zlogp, B_FALSE, 1528 "zone '%s': WARNING: zone is in state '%s', but " 1529 "zoneadmd does not appear to be available; " 1530 "restarted zoneadmd to recover.", 1531 zone_name, zone_state_str(zstate)); 1532 } 1533 1534 (void) fdetach(zone_door_path); 1535 (void) close(doorfd); 1536 goto top; 1537 } 1538 ret = 0; 1539 out: 1540 (void) close(doorfd); 1541 return (ret); 1542 } 1543 1544 /* 1545 * Setup the brand's pre and post state change callbacks, as well as the 1546 * query callback, if any of these exist. 1547 */ 1548 static int 1549 brand_callback_init(brand_handle_t bh, char *zone_name) 1550 { 1551 char zpath[MAXPATHLEN]; 1552 1553 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) 1554 return (-1); 1555 1556 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, 1557 sizeof (pre_statechg_hook)); 1558 1559 if (brand_get_prestatechange(bh, zone_name, zpath, 1560 pre_statechg_hook + EXEC_LEN, 1561 sizeof (pre_statechg_hook) - EXEC_LEN) != 0) 1562 return (-1); 1563 1564 if (strlen(pre_statechg_hook) <= EXEC_LEN) 1565 pre_statechg_hook[0] = '\0'; 1566 1567 (void) strlcpy(post_statechg_hook, EXEC_PREFIX, 1568 sizeof (post_statechg_hook)); 1569 1570 if (brand_get_poststatechange(bh, zone_name, zpath, 1571 post_statechg_hook + EXEC_LEN, 1572 sizeof (post_statechg_hook) - EXEC_LEN) != 0) 1573 return (-1); 1574 1575 if (strlen(post_statechg_hook) <= EXEC_LEN) 1576 post_statechg_hook[0] = '\0'; 1577 1578 (void) strlcpy(query_hook, EXEC_PREFIX, 1579 sizeof (query_hook)); 1580 1581 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, 1582 sizeof (query_hook) - EXEC_LEN) != 0) 1583 return (-1); 1584 1585 if (strlen(query_hook) <= EXEC_LEN) 1586 query_hook[0] = '\0'; 1587 1588 return (0); 1589 } 1590 1591 int 1592 main(int argc, char *argv[]) 1593 { 1594 int opt; 1595 zoneid_t zid; 1596 priv_set_t *privset; 1597 zone_state_t zstate; 1598 char parents_locale[MAXPATHLEN]; 1599 brand_handle_t bh; 1600 int err; 1601 1602 pid_t pid; 1603 sigset_t blockset; 1604 sigset_t block_cld; 1605 1606 struct { 1607 sema_t sem; 1608 int status; 1609 zlog_t log; 1610 } *shstate; 1611 size_t shstatelen = getpagesize(); 1612 1613 zlog_t errlog; 1614 zlog_t *zlogp; 1615 1616 int ctfd; 1617 1618 progname = get_execbasename(argv[0]); 1619 1620 /* 1621 * Make sure stderr is unbuffered 1622 */ 1623 (void) setbuffer(stderr, NULL, 0); 1624 1625 /* 1626 * Get out of the way of mounted filesystems, since we will daemonize 1627 * soon. 1628 */ 1629 (void) chdir("/"); 1630 1631 /* 1632 * Use the default system umask per PSARC 1998/110 rather than 1633 * anything that may have been set by the caller. 1634 */ 1635 (void) umask(CMASK); 1636 1637 /* 1638 * Initially we want to use our parent's locale. 1639 */ 1640 (void) setlocale(LC_ALL, ""); 1641 (void) textdomain(TEXT_DOMAIN); 1642 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1643 sizeof (parents_locale)); 1644 1645 /* 1646 * This zlog_t is used for writing to stderr 1647 */ 1648 errlog.logfile = stderr; 1649 errlog.buflen = errlog.loglen = 0; 1650 errlog.buf = errlog.log = NULL; 1651 errlog.locale = parents_locale; 1652 1653 /* 1654 * We start off writing to stderr until we're ready to daemonize. 1655 */ 1656 zlogp = &errlog; 1657 1658 /* 1659 * Process options. 1660 */ 1661 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1662 switch (opt) { 1663 case 'R': 1664 zonecfg_set_root(optarg); 1665 break; 1666 case 'z': 1667 zone_name = optarg; 1668 break; 1669 default: 1670 usage(); 1671 } 1672 } 1673 1674 if (zone_name == NULL) 1675 usage(); 1676 1677 /* 1678 * Because usage() prints directly to stderr, it has gettext() 1679 * wrapping, which depends on the locale. But since zerror() calls 1680 * localize() which tweaks the locale, it is not safe to call zerror() 1681 * until after the last call to usage(). Fortunately, the last call 1682 * to usage() is just above and the first call to zerror() is just 1683 * below. Don't mess this up. 1684 */ 1685 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1686 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1687 GLOBAL_ZONENAME); 1688 return (1); 1689 } 1690 1691 if (zone_get_id(zone_name, &zid) != 0) { 1692 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1693 zonecfg_strerror(Z_NO_ZONE)); 1694 return (1); 1695 } 1696 1697 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1698 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1699 zonecfg_strerror(err)); 1700 return (1); 1701 } 1702 if (zstate < ZONE_STATE_INCOMPLETE) { 1703 zerror(zlogp, B_FALSE, 1704 "cannot manage a zone which is in state '%s'", 1705 zone_state_str(zstate)); 1706 return (1); 1707 } 1708 1709 /* Get a handle to the brand info for this zone */ 1710 if ((zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 1711 != Z_OK) || (bh = brand_open(brand_name)) == NULL) { 1712 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1713 return (1); 1714 } 1715 zone_isnative = brand_is_native(bh); 1716 zone_iscluster = (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0); 1717 1718 /* Get state change brand hooks. */ 1719 if (brand_callback_init(bh, zone_name) == -1) { 1720 zerror(zlogp, B_TRUE, 1721 "failed to initialize brand state change hooks"); 1722 brand_close(bh); 1723 return (1); 1724 } 1725 1726 brand_close(bh); 1727 1728 /* 1729 * Check that we have all privileges. It would be nice to pare 1730 * this down, but this is at least a first cut. 1731 */ 1732 if ((privset = priv_allocset()) == NULL) { 1733 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1734 return (1); 1735 } 1736 1737 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1738 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1739 priv_freeset(privset); 1740 return (1); 1741 } 1742 1743 if (priv_isfullset(privset) == B_FALSE) { 1744 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1745 "run this command (all privs required)"); 1746 priv_freeset(privset); 1747 return (1); 1748 } 1749 priv_freeset(privset); 1750 1751 if (mkzonedir(zlogp) != 0) 1752 return (1); 1753 1754 /* 1755 * Pre-fork: setup shared state 1756 */ 1757 if ((shstate = (void *)mmap(NULL, shstatelen, 1758 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1759 MAP_FAILED) { 1760 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1761 return (1); 1762 } 1763 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1764 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1765 (void) munmap((char *)shstate, shstatelen); 1766 return (1); 1767 } 1768 shstate->log.logfile = NULL; 1769 shstate->log.buflen = shstatelen - sizeof (*shstate); 1770 shstate->log.loglen = shstate->log.buflen; 1771 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1772 shstate->log.log = shstate->log.buf; 1773 shstate->log.locale = parents_locale; 1774 shstate->status = -1; 1775 1776 /* 1777 * We need a SIGCHLD handler so the sema_wait() below will wake 1778 * up if the child dies without doing a sema_post(). 1779 */ 1780 (void) sigset(SIGCHLD, sigchld); 1781 /* 1782 * We must mask SIGCHLD until after we've coped with the fork 1783 * sufficiently to deal with it; otherwise we can race and 1784 * receive the signal before pid has been initialized 1785 * (yes, this really happens). 1786 */ 1787 (void) sigemptyset(&block_cld); 1788 (void) sigaddset(&block_cld, SIGCHLD); 1789 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1790 1791 if ((ctfd = init_template()) == -1) { 1792 zerror(zlogp, B_TRUE, "failed to create contract"); 1793 return (1); 1794 } 1795 1796 /* 1797 * Do not let another thread localize a message while we are forking. 1798 */ 1799 (void) mutex_lock(&msglock); 1800 pid = fork(); 1801 (void) mutex_unlock(&msglock); 1802 1803 /* 1804 * In all cases (parent, child, and in the event of an error) we 1805 * don't want to cause creation of contracts on subsequent fork()s. 1806 */ 1807 (void) ct_tmpl_clear(ctfd); 1808 (void) close(ctfd); 1809 1810 if (pid == -1) { 1811 zerror(zlogp, B_TRUE, "could not fork"); 1812 return (1); 1813 1814 } else if (pid > 0) { /* parent */ 1815 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1816 /* 1817 * This marks a window of vulnerability in which we receive 1818 * the SIGCLD before falling into sema_wait (normally we would 1819 * get woken up from sema_wait with EINTR upon receipt of 1820 * SIGCLD). So we may need to use some other scheme like 1821 * sema_posting in the sigcld handler. 1822 * blech 1823 */ 1824 (void) sema_wait(&shstate->sem); 1825 (void) sema_destroy(&shstate->sem); 1826 if (shstate->status != 0) 1827 (void) waitpid(pid, NULL, WNOHANG); 1828 /* 1829 * It's ok if we die with SIGPIPE. It's not like we could have 1830 * done anything about it. 1831 */ 1832 (void) fprintf(stderr, "%s", shstate->log.buf); 1833 _exit(shstate->status == 0 ? 0 : 1); 1834 } 1835 1836 /* 1837 * The child charges on. 1838 */ 1839 (void) sigset(SIGCHLD, SIG_DFL); 1840 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1841 1842 /* 1843 * SIGPIPE can be delivered if we write to a socket for which the 1844 * peer endpoint is gone. That can lead to too-early termination 1845 * of zoneadmd, and that's not good eats. 1846 */ 1847 (void) sigset(SIGPIPE, SIG_IGN); 1848 /* 1849 * Stop using stderr 1850 */ 1851 zlogp = &shstate->log; 1852 1853 /* 1854 * We don't need stdout/stderr from now on. 1855 */ 1856 closefrom(0); 1857 1858 /* 1859 * Initialize the syslog zlog_t. This needs to be done after 1860 * the call to closefrom(). 1861 */ 1862 logsys.buf = logsys.log = NULL; 1863 logsys.buflen = logsys.loglen = 0; 1864 logsys.logfile = NULL; 1865 logsys.locale = DEFAULT_LOCALE; 1866 1867 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1868 1869 /* 1870 * The eventstream is used to publish state changes in the zone 1871 * from the door threads to the console I/O poller. 1872 */ 1873 if (eventstream_init() == -1) { 1874 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1875 goto child_out; 1876 } 1877 1878 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1879 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 1880 1881 /* 1882 * See if another zoneadmd is running for this zone. If not, then we 1883 * can now modify system state. 1884 */ 1885 if (make_daemon_exclusive(zlogp) == -1) 1886 goto child_out; 1887 1888 1889 /* 1890 * Create/join a new session; we need to be careful of what we do with 1891 * the console from now on so we don't end up being the session leader 1892 * for the terminal we're going to be handing out. 1893 */ 1894 (void) setsid(); 1895 1896 /* 1897 * This thread shouldn't be receiving any signals; in particular, 1898 * SIGCHLD should be received by the thread doing the fork(). 1899 */ 1900 (void) sigfillset(&blockset); 1901 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1902 1903 /* 1904 * Setup the console device and get ready to serve the console; 1905 * once this has completed, we're ready to let console clients 1906 * make an attempt to connect (they will block until 1907 * serve_console_sock() below gets called, and any pending 1908 * connection is accept()ed). 1909 */ 1910 if (!zonecfg_in_alt_root() && init_console(zlogp) == -1) 1911 goto child_out; 1912 1913 /* 1914 * Take the lock now, so that when the door server gets going, we 1915 * are guaranteed that it won't take a request until we are sure 1916 * that everything is completely set up. See the child_out: label 1917 * below to see why this matters. 1918 */ 1919 (void) mutex_lock(&lock); 1920 1921 /* Init semaphore for scratch zones. */ 1922 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 1923 zerror(zlogp, B_TRUE, 1924 "failed to initialize semaphore for scratch zone"); 1925 goto child_out; 1926 } 1927 1928 /* 1929 * Note: door setup must occur *after* the console is setup. 1930 * This is so that as zlogin tests the door to see if zoneadmd 1931 * is ready yet, we know that the console will get serviced 1932 * once door_info() indicates that the door is "up". 1933 */ 1934 if (setup_door(zlogp) == -1) 1935 goto child_out; 1936 1937 /* 1938 * Things seem OK so far; tell the parent process that we're done 1939 * with setup tasks. This will cause the parent to exit, signalling 1940 * to zoneadm, zlogin, or whatever forked it that we are ready to 1941 * service requests. 1942 */ 1943 shstate->status = 0; 1944 (void) sema_post(&shstate->sem); 1945 (void) munmap((char *)shstate, shstatelen); 1946 shstate = NULL; 1947 1948 (void) mutex_unlock(&lock); 1949 1950 /* 1951 * zlogp is now invalid, so reset it to the syslog logger. 1952 */ 1953 zlogp = &logsys; 1954 1955 /* 1956 * Now that we are free of any parents, switch to the default locale. 1957 */ 1958 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 1959 1960 /* 1961 * At this point the setup portion of main() is basically done, so 1962 * we reuse this thread to manage the zone console. When 1963 * serve_console() has returned, we are past the point of no return 1964 * in the life of this zoneadmd. 1965 */ 1966 if (zonecfg_in_alt_root()) { 1967 /* 1968 * This is just awful, but mounted scratch zones don't (and 1969 * can't) have consoles. We just wait for unmount instead. 1970 */ 1971 while (sema_wait(&scratch_sem) == EINTR) 1972 ; 1973 } else { 1974 serve_console(zlogp); 1975 assert(in_death_throes); 1976 } 1977 1978 /* 1979 * This is the next-to-last part of the exit interlock. Upon calling 1980 * fdetach(), the door will go unreferenced; once any 1981 * outstanding requests (like the door thread doing Z_HALT) are 1982 * done, the door will get an UNREF notification; when it handles 1983 * the UNREF, the door server will cause the exit. 1984 */ 1985 assert(!MUTEX_HELD(&lock)); 1986 (void) fdetach(zone_door_path); 1987 for (;;) 1988 (void) pause(); 1989 1990 child_out: 1991 assert(pid == 0); 1992 if (shstate != NULL) { 1993 shstate->status = -1; 1994 (void) sema_post(&shstate->sem); 1995 (void) munmap((char *)shstate, shstatelen); 1996 } 1997 1998 /* 1999 * This might trigger an unref notification, but if so, 2000 * we are still holding the lock, so our call to exit will 2001 * ultimately win the race and will publish the right exit 2002 * code. 2003 */ 2004 if (zone_door != -1) { 2005 assert(MUTEX_HELD(&lock)); 2006 (void) door_revoke(zone_door); 2007 (void) fdetach(zone_door_path); 2008 } 2009 return (1); /* return from main() forcibly exits an MT process */ 2010 } 2011