1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * zoneadmd manages zones; one zoneadmd process is launched for each 29 * non-global zone on the system. This daemon juggles four jobs: 30 * 31 * - Implement setup and teardown of the zone "virtual platform": mount and 32 * unmount filesystems; create and destroy network interfaces; communicate 33 * with devfsadmd to lay out devices for the zone; instantiate the zone 34 * console device; configure process runtime attributes such as resource 35 * controls, pool bindings, fine-grained privileges. 36 * 37 * - Launch the zone's init(1M) process. 38 * 39 * - Implement a door server; clients (like zoneadm) connect to the door 40 * server and request zone state changes. The kernel is also a client of 41 * this door server. A request to halt or reboot the zone which originates 42 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 43 * 44 * One minor problem is that messages emitted by zoneadmd need to be passed 45 * back to the zoneadm process making the request. These messages need to 46 * be rendered in the client's locale; so, this is passed in as part of the 47 * request. The exception is the kernel upcall to zoneadmd, in which case 48 * messages are syslog'd. 49 * 50 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 51 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 52 * strings which do not need to be translated. 53 * 54 * - Act as a console server for zlogin -C processes; see comments in zcons.c 55 * for more information about the zone console architecture. 56 * 57 * DESIGN NOTES 58 * 59 * Restart: 60 * A chief design constraint of zoneadmd is that it should be restartable in 61 * the case that the administrator kills it off, or it suffers a fatal error, 62 * without the running zone being impacted; this is akin to being able to 63 * reboot the service processor of a server without affecting the OS instance. 64 */ 65 66 #include <sys/param.h> 67 #include <sys/mman.h> 68 #include <sys/types.h> 69 #include <sys/stat.h> 70 #include <sys/sysmacros.h> 71 72 #include <bsm/adt.h> 73 #include <bsm/adt_event.h> 74 75 #include <alloca.h> 76 #include <assert.h> 77 #include <errno.h> 78 #include <door.h> 79 #include <fcntl.h> 80 #include <locale.h> 81 #include <signal.h> 82 #include <stdarg.h> 83 #include <stdio.h> 84 #include <stdlib.h> 85 #include <string.h> 86 #include <strings.h> 87 #include <synch.h> 88 #include <syslog.h> 89 #include <thread.h> 90 #include <unistd.h> 91 #include <wait.h> 92 #include <limits.h> 93 #include <zone.h> 94 #include <libbrand.h> 95 #include <libcontract.h> 96 #include <libcontract_priv.h> 97 #include <sys/contract/process.h> 98 #include <sys/ctfs.h> 99 100 #include <libzonecfg.h> 101 #include "zoneadmd.h" 102 103 static char *progname; 104 char *zone_name; /* zone which we are managing */ 105 char brand_name[MAXNAMELEN]; 106 boolean_t zone_isnative; 107 boolean_t zone_iscluster; 108 static zoneid_t zone_id; 109 110 static char pre_statechg_hook[2 * MAXPATHLEN]; 111 static char post_statechg_hook[2 * MAXPATHLEN]; 112 char query_hook[2 * MAXPATHLEN]; 113 114 zlog_t logsys; 115 116 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 117 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 118 119 static sema_t scratch_sem; /* for scratch zones */ 120 121 static char zone_door_path[MAXPATHLEN]; 122 static int zone_door = -1; 123 124 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 125 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 126 127 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 128 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 129 #endif 130 131 #define DEFAULT_LOCALE "C" 132 133 static const char * 134 z_cmd_name(zone_cmd_t zcmd) 135 { 136 /* This list needs to match the enum in sys/zone.h */ 137 static const char *zcmdstr[] = { 138 "ready", "boot", "forceboot", "reboot", "halt", 139 "note_uninstalling", "mount", "forcemount", "unmount" 140 }; 141 142 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 143 return ("unknown"); 144 else 145 return (zcmdstr[(int)zcmd]); 146 } 147 148 static char * 149 get_execbasename(char *execfullname) 150 { 151 char *last_slash, *execbasename; 152 153 /* guard against '/' at end of command invocation */ 154 for (;;) { 155 last_slash = strrchr(execfullname, '/'); 156 if (last_slash == NULL) { 157 execbasename = execfullname; 158 break; 159 } else { 160 execbasename = last_slash + 1; 161 if (*execbasename == '\0') { 162 *last_slash = '\0'; 163 continue; 164 } 165 break; 166 } 167 } 168 return (execbasename); 169 } 170 171 static void 172 usage(void) 173 { 174 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 175 (void) fprintf(stderr, 176 gettext("\tNote: %s should not be run directly.\n"), progname); 177 exit(2); 178 } 179 180 /* ARGSUSED */ 181 static void 182 sigchld(int sig) 183 { 184 } 185 186 char * 187 localize_msg(char *locale, const char *msg) 188 { 189 char *out; 190 191 (void) mutex_lock(&msglock); 192 (void) setlocale(LC_MESSAGES, locale); 193 out = gettext(msg); 194 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 195 (void) mutex_unlock(&msglock); 196 return (out); 197 } 198 199 /* PRINTFLIKE3 */ 200 void 201 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 202 { 203 va_list alist; 204 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 205 char *bp; 206 int saved_errno = errno; 207 208 if (zlogp == NULL) 209 return; 210 if (zlogp == &logsys) 211 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 212 zone_name); 213 else 214 buf[0] = '\0'; 215 bp = &(buf[strlen(buf)]); 216 217 /* 218 * In theory, the locale pointer should be set to either "C" or a 219 * char array, so it should never be NULL 220 */ 221 assert(zlogp->locale != NULL); 222 /* Locale is per process, but we are multi-threaded... */ 223 fmt = localize_msg(zlogp->locale, fmt); 224 225 va_start(alist, fmt); 226 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 227 va_end(alist); 228 bp = &(buf[strlen(buf)]); 229 if (use_strerror) 230 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 231 strerror(saved_errno)); 232 if (zlogp == &logsys) { 233 (void) syslog(LOG_ERR, "%s", buf); 234 } else if (zlogp->logfile != NULL) { 235 (void) fprintf(zlogp->logfile, "%s\n", buf); 236 } else { 237 size_t buflen; 238 size_t copylen; 239 240 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 241 copylen = MIN(buflen, zlogp->loglen); 242 zlogp->log += copylen; 243 zlogp->loglen -= copylen; 244 } 245 } 246 247 /* 248 * Emit a warning for any boot arguments which are unrecognized. Since 249 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 250 * put the arguments into an argv style array, use getopt to process them, 251 * and put the resultant argument string back into outargs. 252 * 253 * During the filtering, we pull out any arguments which are truly "boot" 254 * arguments, leaving only those which are to be passed intact to the 255 * progenitor process. The one we support at the moment is -i, which 256 * indicates to the kernel which program should be launched as 'init'. 257 * 258 * A return of Z_INVAL indicates specifically that the arguments are 259 * not valid; this is a non-fatal error. Except for Z_OK, all other return 260 * values are treated as fatal. 261 */ 262 static int 263 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 264 char *init_file, char *badarg) 265 { 266 int argc = 0, argc_save; 267 int i; 268 int err; 269 char *arg, *lasts, **argv = NULL, **argv_save; 270 char zonecfg_args[BOOTARGS_MAX]; 271 char scratchargs[BOOTARGS_MAX], *sargs; 272 char c; 273 274 bzero(outargs, BOOTARGS_MAX); 275 bzero(badarg, BOOTARGS_MAX); 276 277 /* 278 * If the user didn't specify transient boot arguments, check 279 * to see if there were any specified in the zone configuration, 280 * and use them if applicable. 281 */ 282 if (inargs == NULL || inargs[0] == '\0') { 283 zone_dochandle_t handle; 284 if ((handle = zonecfg_init_handle()) == NULL) { 285 zerror(zlogp, B_TRUE, 286 "getting zone configuration handle"); 287 return (Z_BAD_HANDLE); 288 } 289 err = zonecfg_get_snapshot_handle(zone_name, handle); 290 if (err != Z_OK) { 291 zerror(zlogp, B_FALSE, 292 "invalid configuration snapshot"); 293 zonecfg_fini_handle(handle); 294 return (Z_BAD_HANDLE); 295 } 296 297 bzero(zonecfg_args, sizeof (zonecfg_args)); 298 (void) zonecfg_get_bootargs(handle, zonecfg_args, 299 sizeof (zonecfg_args)); 300 inargs = zonecfg_args; 301 zonecfg_fini_handle(handle); 302 } 303 304 if (strlen(inargs) >= BOOTARGS_MAX) { 305 zerror(zlogp, B_FALSE, "boot argument string too long"); 306 return (Z_INVAL); 307 } 308 309 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 310 sargs = scratchargs; 311 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 312 sargs = NULL; 313 argc++; 314 } 315 316 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 317 zerror(zlogp, B_FALSE, "memory allocation failed"); 318 return (Z_NOMEM); 319 } 320 321 argv_save = argv; 322 argc_save = argc; 323 324 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 325 sargs = scratchargs; 326 i = 0; 327 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 328 sargs = NULL; 329 if ((argv[i] = strdup(arg)) == NULL) { 330 err = Z_NOMEM; 331 zerror(zlogp, B_FALSE, "memory allocation failed"); 332 goto done; 333 } 334 i++; 335 } 336 337 /* 338 * We preserve compatibility with the Solaris system boot behavior, 339 * which allows: 340 * 341 * # reboot kernel/unix -s -m verbose 342 * 343 * In this example, kernel/unix tells the booter what file to 344 * boot. We don't want reboot in a zone to be gratuitously different, 345 * so we silently ignore the boot file, if necessary. 346 */ 347 if (argv[0] == NULL) 348 goto done; 349 350 assert(argv[0][0] != ' '); 351 assert(argv[0][0] != '\t'); 352 353 if (argv[0][0] != '-' && argv[0][0] != '\0') { 354 argv = &argv[1]; 355 argc--; 356 } 357 358 optind = 0; 359 opterr = 0; 360 err = Z_OK; 361 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 362 switch (c) { 363 case 'i': 364 /* 365 * -i is handled by the runtime and is not passed 366 * along to userland 367 */ 368 (void) strlcpy(init_file, optarg, MAXPATHLEN); 369 break; 370 case 'f': 371 /* This has already been processed by zoneadm */ 372 break; 373 case 'm': 374 case 's': 375 /* These pass through unmolested */ 376 (void) snprintf(outargs, BOOTARGS_MAX, 377 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 378 break; 379 case '?': 380 /* 381 * We warn about unknown arguments but pass them 382 * along anyway-- if someone wants to develop their 383 * own init replacement, they can pass it whatever 384 * args they want. 385 */ 386 err = Z_INVAL; 387 (void) snprintf(outargs, BOOTARGS_MAX, 388 "%s -%c", outargs, optopt); 389 (void) snprintf(badarg, BOOTARGS_MAX, 390 "%s -%c", badarg, optopt); 391 break; 392 } 393 } 394 395 /* 396 * For Solaris Zones we warn about and discard non-option arguments. 397 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 398 * to the kernel, we concat up all the other remaining boot args. 399 * and warn on them as a group. 400 */ 401 if (optind < argc) { 402 err = Z_INVAL; 403 while (optind < argc) { 404 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 405 badarg, strlen(badarg) > 0 ? " " : "", 406 argv[optind]); 407 optind++; 408 } 409 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 410 "arguments `%s'.", badarg); 411 } 412 413 done: 414 for (i = 0; i < argc_save; i++) { 415 if (argv_save[i] != NULL) 416 free(argv_save[i]); 417 } 418 free(argv_save); 419 return (err); 420 } 421 422 423 static int 424 mkzonedir(zlog_t *zlogp) 425 { 426 struct stat st; 427 /* 428 * We must create and lock everyone but root out of ZONES_TMPDIR 429 * since anyone can open any UNIX domain socket, regardless of 430 * its file system permissions. Sigh... 431 */ 432 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 433 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 434 return (-1); 435 } 436 /* paranoia */ 437 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 438 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 439 return (-1); 440 } 441 (void) chmod(ZONES_TMPDIR, S_IRWXU); 442 return (0); 443 } 444 445 /* 446 * Run the brand's pre-state change callback, if it exists. 447 */ 448 static int 449 brand_prestatechg(zlog_t *zlogp, int state, int cmd) 450 { 451 char cmdbuf[2 * MAXPATHLEN]; 452 453 if (pre_statechg_hook[0] == '\0') 454 return (0); 455 456 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d", pre_statechg_hook, 457 state, cmd) > sizeof (cmdbuf)) 458 return (-1); 459 460 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 461 return (-1); 462 463 return (0); 464 } 465 466 /* 467 * Run the brand's post-state change callback, if it exists. 468 */ 469 static int 470 brand_poststatechg(zlog_t *zlogp, int state, int cmd) 471 { 472 char cmdbuf[2 * MAXPATHLEN]; 473 474 if (post_statechg_hook[0] == '\0') 475 return (0); 476 477 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d", post_statechg_hook, 478 state, cmd) > sizeof (cmdbuf)) 479 return (-1); 480 481 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 482 return (-1); 483 484 return (0); 485 } 486 487 /* 488 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 489 * 'true' if this is being invoked as part of the processing for the "mount" 490 * subcommand. 491 */ 492 static int 493 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate) 494 { 495 int err; 496 497 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0) 498 return (-1); 499 500 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 501 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 502 zonecfg_strerror(err)); 503 return (-1); 504 } 505 506 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 507 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 508 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 509 zonecfg_strerror(err)); 510 return (-1); 511 } 512 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 513 bringup_failure_recovery = B_TRUE; 514 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE); 515 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 516 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 517 zonecfg_strerror(err)); 518 return (-1); 519 } 520 521 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0) 522 return (-1); 523 524 return (0); 525 } 526 527 int 528 init_template(void) 529 { 530 int fd; 531 int err = 0; 532 533 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 534 if (fd == -1) 535 return (-1); 536 537 /* 538 * For now, zoneadmd doesn't do anything with the contract. 539 * Deliver no events, don't inherit, and allow it to be orphaned. 540 */ 541 err |= ct_tmpl_set_critical(fd, 0); 542 err |= ct_tmpl_set_informative(fd, 0); 543 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 544 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 545 if (err || ct_tmpl_activate(fd)) { 546 (void) close(fd); 547 return (-1); 548 } 549 550 return (fd); 551 } 552 553 typedef struct fs_callback { 554 zlog_t *zlogp; 555 zoneid_t zoneid; 556 boolean_t mount_cmd; 557 } fs_callback_t; 558 559 static int 560 mount_early_fs(void *data, const char *spec, const char *dir, 561 const char *fstype, const char *opt) 562 { 563 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 564 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 565 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd; 566 char rootpath[MAXPATHLEN]; 567 pid_t child; 568 int child_status; 569 int tmpl_fd; 570 int rv; 571 ctid_t ct; 572 573 /* determine the zone rootpath */ 574 if (mount_cmd) { 575 char zonepath[MAXPATHLEN]; 576 char luroot[MAXPATHLEN]; 577 578 assert(zone_isnative || zone_iscluster); 579 580 if (zone_get_zonepath(zone_name, 581 zonepath, sizeof (zonepath)) != Z_OK) { 582 zerror(zlogp, B_FALSE, "unable to determine zone path"); 583 return (-1); 584 } 585 586 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 587 resolve_lofs(zlogp, luroot, sizeof (luroot)); 588 (void) strlcpy(rootpath, luroot, sizeof (rootpath)); 589 } else { 590 if (zone_get_rootpath(zone_name, 591 rootpath, sizeof (rootpath)) != Z_OK) { 592 zerror(zlogp, B_FALSE, "unable to determine zone root"); 593 return (-1); 594 } 595 } 596 597 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) { 598 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 599 rootpath, dir); 600 return (-1); 601 } else if (rv > 0) { 602 /* The mount point path doesn't exist, create it now. */ 603 if (make_one_dir(zlogp, rootpath, dir, 604 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 605 DEFAULT_DIR_GROUP) != 0) { 606 zerror(zlogp, B_FALSE, "failed to create mount point"); 607 return (-1); 608 } 609 610 /* 611 * Now this might seem weird, but we need to invoke 612 * valid_mount_path() again. Why? Because it checks 613 * to make sure that the mount point path is canonical, 614 * which it can only do if the path exists, so now that 615 * we've created the path we have to verify it again. 616 */ 617 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, 618 fstype)) < 0) { 619 zerror(zlogp, B_FALSE, 620 "%s%s is not a valid mount point", rootpath, dir); 621 return (-1); 622 } 623 } 624 625 if ((tmpl_fd = init_template()) == -1) { 626 zerror(zlogp, B_TRUE, "failed to create contract"); 627 return (-1); 628 } 629 630 if ((child = fork()) == -1) { 631 (void) ct_tmpl_clear(tmpl_fd); 632 (void) close(tmpl_fd); 633 zerror(zlogp, B_TRUE, "failed to fork"); 634 return (-1); 635 636 } else if (child == 0) { /* child */ 637 char opt_buf[MAX_MNTOPT_STR]; 638 int optlen = 0; 639 int mflag = MS_DATA; 640 641 (void) ct_tmpl_clear(tmpl_fd); 642 /* 643 * Even though there are no procs running in the zone, we 644 * do this for paranoia's sake. 645 */ 646 (void) closefrom(0); 647 648 if (zone_enter(zoneid) == -1) { 649 _exit(errno); 650 } 651 if (opt != NULL) { 652 /* 653 * The mount() system call is incredibly annoying. 654 * If options are specified, we need to copy them 655 * into a temporary buffer since the mount() system 656 * call will overwrite the options string. It will 657 * also fail if the new option string it wants to 658 * write is bigger than the one we passed in, so 659 * you must pass in a buffer of the maximum possible 660 * option string length. sigh. 661 */ 662 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 663 opt = opt_buf; 664 optlen = MAX_MNTOPT_STR; 665 mflag = MS_OPTIONSTR; 666 } 667 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 668 _exit(errno); 669 _exit(0); 670 } 671 672 /* parent */ 673 if (contract_latest(&ct) == -1) 674 ct = -1; 675 (void) ct_tmpl_clear(tmpl_fd); 676 (void) close(tmpl_fd); 677 if (waitpid(child, &child_status, 0) != child) { 678 /* unexpected: we must have been signalled */ 679 (void) contract_abandon_id(ct); 680 return (-1); 681 } 682 (void) contract_abandon_id(ct); 683 if (WEXITSTATUS(child_status) != 0) { 684 errno = WEXITSTATUS(child_status); 685 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 686 return (-1); 687 } 688 689 return (0); 690 } 691 692 /* 693 * If retstr is not NULL, the output of the subproc is returned in the str, 694 * otherwise it is output using zerror(). Any memory allocated for retstr 695 * should be freed by the caller. 696 */ 697 int 698 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) 699 { 700 char buf[1024]; /* arbitrary large amount */ 701 char *inbuf; 702 FILE *file; 703 int status; 704 int rd_cnt; 705 706 if (retstr != NULL) { 707 if ((*retstr = malloc(1024)) == NULL) { 708 zerror(zlogp, B_FALSE, "out of memory"); 709 return (-1); 710 } 711 inbuf = *retstr; 712 rd_cnt = 0; 713 } else { 714 inbuf = buf; 715 } 716 717 file = popen(cmdbuf, "r"); 718 if (file == NULL) { 719 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 720 return (-1); 721 } 722 723 while (fgets(inbuf, 1024, file) != NULL) { 724 if (retstr == NULL && zlogp != &logsys) { 725 zerror(zlogp, B_FALSE, "%s", inbuf); 726 } else { 727 char *p; 728 729 rd_cnt += 1024 - 1; 730 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) { 731 zerror(zlogp, B_FALSE, "out of memory"); 732 (void) pclose(file); 733 return (-1); 734 } 735 736 *retstr = p; 737 inbuf = *retstr + rd_cnt; 738 } 739 } 740 status = pclose(file); 741 742 if (WIFSIGNALED(status)) { 743 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 744 "signal %d", cmdbuf, WTERMSIG(status)); 745 return (-1); 746 } 747 assert(WIFEXITED(status)); 748 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 749 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 750 return (-1); 751 } 752 return (WEXITSTATUS(status)); 753 } 754 755 static int 756 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) 757 { 758 zoneid_t zoneid; 759 struct stat st; 760 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 761 char nbootargs[BOOTARGS_MAX]; 762 char cmdbuf[MAXPATHLEN]; 763 fs_callback_t cb; 764 brand_handle_t bh; 765 int err; 766 767 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) 768 return (-1); 769 770 if (init_console_slave(zlogp) != 0) 771 return (-1); 772 reset_slave_terminal(zlogp); 773 774 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 775 zerror(zlogp, B_TRUE, "unable to get zoneid"); 776 return (-1); 777 } 778 779 cb.zlogp = zlogp; 780 cb.zoneid = zoneid; 781 cb.mount_cmd = B_FALSE; 782 783 /* Get a handle to the brand info for this zone */ 784 if ((bh = brand_open(brand_name)) == NULL) { 785 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 786 return (-1); 787 } 788 789 /* 790 * Get the list of filesystems to mount from the brand 791 * configuration. These mounts are done via a thread that will 792 * enter the zone, so they are done from within the context of the 793 * zone. 794 */ 795 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 796 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 797 brand_close(bh); 798 return (-1); 799 } 800 801 /* 802 * Get the brand's boot callback if it exists. 803 */ 804 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 805 zerror(zlogp, B_FALSE, "unable to determine zone path"); 806 brand_close(bh); 807 return (-1); 808 } 809 (void) strcpy(cmdbuf, EXEC_PREFIX); 810 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 811 sizeof (cmdbuf) - EXEC_LEN) != 0) { 812 zerror(zlogp, B_FALSE, 813 "unable to determine branded zone's boot callback"); 814 brand_close(bh); 815 return (-1); 816 } 817 818 /* Get the path for this zone's init(1M) (or equivalent) process. */ 819 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { 820 zerror(zlogp, B_FALSE, 821 "unable to determine zone's init(1M) location"); 822 brand_close(bh); 823 return (-1); 824 } 825 826 brand_close(bh); 827 828 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 829 bad_boot_arg); 830 if (err == Z_INVAL) 831 eventstream_write(Z_EVT_ZONE_BADARGS); 832 else if (err != Z_OK) 833 return (-1); 834 835 assert(init_file[0] != '\0'); 836 837 /* Try to anticipate possible problems: Make sure init is executable. */ 838 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 839 zerror(zlogp, B_FALSE, "unable to determine zone root"); 840 return (-1); 841 } 842 843 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); 844 845 if (stat(initpath, &st) == -1) { 846 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 847 return (-1); 848 } 849 850 if ((st.st_mode & S_IXUSR) == 0) { 851 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 852 return (-1); 853 } 854 855 /* 856 * If there is a brand 'boot' callback, execute it now to give the 857 * brand one last chance to do any additional setup before the zone 858 * is booted. 859 */ 860 if ((strlen(cmdbuf) > EXEC_LEN) && 861 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { 862 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 863 return (-1); 864 } 865 866 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 867 zerror(zlogp, B_TRUE, "could not set zone boot file"); 868 return (-1); 869 } 870 871 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 872 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 873 return (-1); 874 } 875 876 if (zone_boot(zoneid) == -1) { 877 zerror(zlogp, B_TRUE, "unable to boot zone"); 878 return (-1); 879 } 880 881 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0) 882 return (-1); 883 884 return (0); 885 } 886 887 static int 888 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate) 889 { 890 int err; 891 892 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0) 893 return (-1); 894 895 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { 896 if (!bringup_failure_recovery) 897 zerror(zlogp, B_FALSE, "unable to destroy zone"); 898 return (-1); 899 } 900 901 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 902 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 903 zonecfg_strerror(err)); 904 905 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0) 906 return (-1); 907 908 return (0); 909 } 910 911 /* 912 * Generate AUE_zone_state for a command that boots a zone. 913 */ 914 static void 915 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 916 char *new_state) 917 { 918 adt_session_data_t *ah; 919 adt_event_data_t *event; 920 int pass_fail, fail_reason; 921 922 if (!adt_audit_enabled()) 923 return; 924 925 if (return_val == 0) { 926 pass_fail = ADT_SUCCESS; 927 fail_reason = ADT_SUCCESS; 928 } else { 929 pass_fail = ADT_FAILURE; 930 fail_reason = ADT_FAIL_VALUE_PROGRAM; 931 } 932 933 if (adt_start_session(&ah, NULL, 0)) { 934 zerror(zlogp, B_TRUE, gettext("audit failure.")); 935 return; 936 } 937 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 938 zerror(zlogp, B_TRUE, gettext("audit failure.")); 939 (void) adt_end_session(ah); 940 return; 941 } 942 943 event = adt_alloc_event(ah, ADT_zone_state); 944 if (event == NULL) { 945 zerror(zlogp, B_TRUE, gettext("audit failure.")); 946 (void) adt_end_session(ah); 947 return; 948 } 949 event->adt_zone_state.zonename = zone_name; 950 event->adt_zone_state.new_state = new_state; 951 952 if (adt_put_event(event, pass_fail, fail_reason)) 953 zerror(zlogp, B_TRUE, gettext("audit failure.")); 954 955 adt_free_event(event); 956 957 (void) adt_end_session(ah); 958 } 959 960 /* 961 * The main routine for the door server that deals with zone state transitions. 962 */ 963 /* ARGSUSED */ 964 static void 965 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 966 uint_t n_desc) 967 { 968 ucred_t *uc = NULL; 969 const priv_set_t *eset; 970 971 zone_state_t zstate; 972 zone_cmd_t cmd; 973 zone_cmd_arg_t *zargp; 974 975 boolean_t kernelcall; 976 977 int rval = -1; 978 uint64_t uniqid; 979 zoneid_t zoneid = -1; 980 zlog_t zlog; 981 zlog_t *zlogp; 982 zone_cmd_rval_t *rvalp; 983 size_t rlen = getpagesize(); /* conservative */ 984 fs_callback_t cb; 985 brand_handle_t bh; 986 987 /* LINTED E_BAD_PTR_CAST_ALIGN */ 988 zargp = (zone_cmd_arg_t *)args; 989 990 /* 991 * When we get the door unref message, we've fdetach'd the door, and 992 * it is time for us to shut down zoneadmd. 993 */ 994 if (zargp == DOOR_UNREF_DATA) { 995 /* 996 * See comment at end of main() for info on the last rites. 997 */ 998 exit(0); 999 } 1000 1001 if (zargp == NULL) { 1002 (void) door_return(NULL, 0, 0, 0); 1003 } 1004 1005 rvalp = alloca(rlen); 1006 bzero(rvalp, rlen); 1007 zlog.logfile = NULL; 1008 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 1009 zlog.buf = rvalp->errbuf; 1010 zlog.log = zlog.buf; 1011 /* defer initialization of zlog.locale until after credential check */ 1012 zlogp = &zlog; 1013 1014 if (alen != sizeof (zone_cmd_arg_t)) { 1015 /* 1016 * This really shouldn't be happening. 1017 */ 1018 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 1019 "unexpected (expected %d bytes)", alen, 1020 sizeof (zone_cmd_arg_t)); 1021 goto out; 1022 } 1023 cmd = zargp->cmd; 1024 1025 if (door_ucred(&uc) != 0) { 1026 zerror(&logsys, B_TRUE, "door_ucred"); 1027 goto out; 1028 } 1029 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 1030 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 1031 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 1032 ucred_geteuid(uc) != 0)) { 1033 zerror(&logsys, B_FALSE, "insufficient privileges"); 1034 goto out; 1035 } 1036 1037 kernelcall = ucred_getpid(uc) == 0; 1038 1039 /* 1040 * This is safe because we only use a zlog_t throughout the 1041 * duration of a door call; i.e., by the time the pointer 1042 * might become invalid, the door call would be over. 1043 */ 1044 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 1045 1046 (void) mutex_lock(&lock); 1047 1048 /* 1049 * Once we start to really die off, we don't want more connections. 1050 */ 1051 if (in_death_throes) { 1052 (void) mutex_unlock(&lock); 1053 ucred_free(uc); 1054 (void) door_return(NULL, 0, 0, 0); 1055 thr_exit(NULL); 1056 } 1057 1058 /* 1059 * Check for validity of command. 1060 */ 1061 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 1062 cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && 1063 cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 1064 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 1065 goto out; 1066 } 1067 1068 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 1069 /* 1070 * Can't happen 1071 */ 1072 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 1073 cmd); 1074 goto out; 1075 } 1076 /* 1077 * We ignore the possibility of someone calling zone_create(2) 1078 * explicitly; all requests must come through zoneadmd. 1079 */ 1080 if (zone_get_state(zone_name, &zstate) != Z_OK) { 1081 /* 1082 * Something terribly wrong happened 1083 */ 1084 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 1085 goto out; 1086 } 1087 1088 if (kernelcall) { 1089 /* 1090 * Kernel-initiated requests may lose their validity if the 1091 * zone_t the kernel was referring to has gone away. 1092 */ 1093 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 1094 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 1095 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 1096 /* 1097 * We're not talking about the same zone. The request 1098 * must have arrived too late. Return error. 1099 */ 1100 rval = -1; 1101 goto out; 1102 } 1103 zlogp = &logsys; /* Log errors to syslog */ 1104 } 1105 1106 /* 1107 * If we are being asked to forcibly mount or boot a zone, we 1108 * pretend that an INCOMPLETE zone is actually INSTALLED. 1109 */ 1110 if (zstate == ZONE_STATE_INCOMPLETE && 1111 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 1112 zstate = ZONE_STATE_INSTALLED; 1113 1114 switch (zstate) { 1115 case ZONE_STATE_CONFIGURED: 1116 case ZONE_STATE_INCOMPLETE: 1117 /* 1118 * Not our area of expertise; we just print a nice message 1119 * and die off. 1120 */ 1121 zerror(zlogp, B_FALSE, 1122 "%s operation is invalid for zones in state '%s'", 1123 z_cmd_name(cmd), zone_state_str(zstate)); 1124 break; 1125 1126 case ZONE_STATE_INSTALLED: 1127 switch (cmd) { 1128 case Z_READY: 1129 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate); 1130 if (rval == 0) 1131 eventstream_write(Z_EVT_ZONE_READIED); 1132 break; 1133 case Z_BOOT: 1134 case Z_FORCEBOOT: 1135 eventstream_write(Z_EVT_ZONE_BOOTING); 1136 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1137 == 0) { 1138 rval = zone_bootup(zlogp, zargp->bootbuf, 1139 zstate); 1140 } 1141 audit_put_record(zlogp, uc, rval, "boot"); 1142 if (rval != 0) { 1143 bringup_failure_recovery = B_TRUE; 1144 (void) zone_halt(zlogp, B_FALSE, B_FALSE, 1145 zstate); 1146 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1147 } 1148 break; 1149 case Z_HALT: 1150 if (kernelcall) /* Invalid; can't happen */ 1151 abort(); 1152 /* 1153 * We could have two clients racing to halt this 1154 * zone; the second client loses, but his request 1155 * doesn't fail, since the zone is now in the desired 1156 * state. 1157 */ 1158 zerror(zlogp, B_FALSE, "zone is already halted"); 1159 rval = 0; 1160 break; 1161 case Z_REBOOT: 1162 if (kernelcall) /* Invalid; can't happen */ 1163 abort(); 1164 zerror(zlogp, B_FALSE, "%s operation is invalid " 1165 "for zones in state '%s'", z_cmd_name(cmd), 1166 zone_state_str(zstate)); 1167 rval = -1; 1168 break; 1169 case Z_NOTE_UNINSTALLING: 1170 if (kernelcall) /* Invalid; can't happen */ 1171 abort(); 1172 /* 1173 * Tell the console to print out a message about this. 1174 * Once it does, we will be in_death_throes. 1175 */ 1176 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1177 break; 1178 case Z_MOUNT: 1179 case Z_FORCEMOUNT: 1180 if (kernelcall) /* Invalid; can't happen */ 1181 abort(); 1182 if (!zone_isnative && !zone_iscluster) { 1183 zerror(zlogp, B_FALSE, 1184 "%s operation is invalid for branded " 1185 "zones", z_cmd_name(cmd)); 1186 rval = -1; 1187 break; 1188 } 1189 1190 rval = zone_ready(zlogp, 1191 strcmp(zargp->bootbuf, "-U") == 0 ? 1192 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate); 1193 if (rval != 0) 1194 break; 1195 1196 eventstream_write(Z_EVT_ZONE_READIED); 1197 1198 /* Get a handle to the brand info for this zone */ 1199 if ((bh = brand_open(brand_name)) == NULL) { 1200 rval = -1; 1201 break; 1202 } 1203 1204 /* 1205 * Get the list of filesystems to mount from 1206 * the brand configuration. These mounts are done 1207 * via a thread that will enter the zone, so they 1208 * are done from within the context of the zone. 1209 */ 1210 cb.zlogp = zlogp; 1211 cb.zoneid = zone_id; 1212 cb.mount_cmd = B_TRUE; 1213 rval = brand_platform_iter_mounts(bh, 1214 mount_early_fs, &cb); 1215 1216 brand_close(bh); 1217 1218 /* 1219 * Ordinarily, /dev/fd would be mounted inside the zone 1220 * by svc:/system/filesystem/usr:default, but since 1221 * we're not booting the zone, we need to do this 1222 * manually. 1223 */ 1224 if (rval == 0) 1225 rval = mount_early_fs(&cb, 1226 "fd", "/dev/fd", "fd", NULL); 1227 break; 1228 case Z_UNMOUNT: 1229 if (kernelcall) /* Invalid; can't happen */ 1230 abort(); 1231 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1232 rval = 0; 1233 break; 1234 } 1235 break; 1236 1237 case ZONE_STATE_READY: 1238 switch (cmd) { 1239 case Z_READY: 1240 /* 1241 * We could have two clients racing to ready this 1242 * zone; the second client loses, but his request 1243 * doesn't fail, since the zone is now in the desired 1244 * state. 1245 */ 1246 zerror(zlogp, B_FALSE, "zone is already ready"); 1247 rval = 0; 1248 break; 1249 case Z_BOOT: 1250 (void) strlcpy(boot_args, zargp->bootbuf, 1251 sizeof (boot_args)); 1252 eventstream_write(Z_EVT_ZONE_BOOTING); 1253 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1254 audit_put_record(zlogp, uc, rval, "boot"); 1255 if (rval != 0) { 1256 bringup_failure_recovery = B_TRUE; 1257 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1258 zstate); 1259 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1260 } 1261 boot_args[0] = '\0'; 1262 break; 1263 case Z_HALT: 1264 if (kernelcall) /* Invalid; can't happen */ 1265 abort(); 1266 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1267 != 0) 1268 break; 1269 eventstream_write(Z_EVT_ZONE_HALTED); 1270 break; 1271 case Z_REBOOT: 1272 case Z_NOTE_UNINSTALLING: 1273 case Z_MOUNT: 1274 case Z_UNMOUNT: 1275 if (kernelcall) /* Invalid; can't happen */ 1276 abort(); 1277 zerror(zlogp, B_FALSE, "%s operation is invalid " 1278 "for zones in state '%s'", z_cmd_name(cmd), 1279 zone_state_str(zstate)); 1280 rval = -1; 1281 break; 1282 } 1283 break; 1284 1285 case ZONE_STATE_MOUNTED: 1286 switch (cmd) { 1287 case Z_UNMOUNT: 1288 if (kernelcall) /* Invalid; can't happen */ 1289 abort(); 1290 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate); 1291 if (rval == 0) { 1292 eventstream_write(Z_EVT_ZONE_HALTED); 1293 (void) sema_post(&scratch_sem); 1294 } 1295 break; 1296 default: 1297 if (kernelcall) /* Invalid; can't happen */ 1298 abort(); 1299 zerror(zlogp, B_FALSE, "%s operation is invalid " 1300 "for zones in state '%s'", z_cmd_name(cmd), 1301 zone_state_str(zstate)); 1302 rval = -1; 1303 break; 1304 } 1305 break; 1306 1307 case ZONE_STATE_RUNNING: 1308 case ZONE_STATE_SHUTTING_DOWN: 1309 case ZONE_STATE_DOWN: 1310 switch (cmd) { 1311 case Z_READY: 1312 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1313 != 0) 1314 break; 1315 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0) 1316 eventstream_write(Z_EVT_ZONE_READIED); 1317 else 1318 eventstream_write(Z_EVT_ZONE_HALTED); 1319 break; 1320 case Z_BOOT: 1321 /* 1322 * We could have two clients racing to boot this 1323 * zone; the second client loses, but his request 1324 * doesn't fail, since the zone is now in the desired 1325 * state. 1326 */ 1327 zerror(zlogp, B_FALSE, "zone is already booted"); 1328 rval = 0; 1329 break; 1330 case Z_HALT: 1331 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1332 != 0) 1333 break; 1334 eventstream_write(Z_EVT_ZONE_HALTED); 1335 break; 1336 case Z_REBOOT: 1337 (void) strlcpy(boot_args, zargp->bootbuf, 1338 sizeof (boot_args)); 1339 eventstream_write(Z_EVT_ZONE_REBOOTING); 1340 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1341 != 0) { 1342 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1343 boot_args[0] = '\0'; 1344 break; 1345 } 1346 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1347 != 0) { 1348 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1349 boot_args[0] = '\0'; 1350 break; 1351 } 1352 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1353 audit_put_record(zlogp, uc, rval, "reboot"); 1354 if (rval != 0) { 1355 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1356 zstate); 1357 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1358 } 1359 boot_args[0] = '\0'; 1360 break; 1361 case Z_NOTE_UNINSTALLING: 1362 case Z_MOUNT: 1363 case Z_UNMOUNT: 1364 zerror(zlogp, B_FALSE, "%s operation is invalid " 1365 "for zones in state '%s'", z_cmd_name(cmd), 1366 zone_state_str(zstate)); 1367 rval = -1; 1368 break; 1369 } 1370 break; 1371 default: 1372 abort(); 1373 } 1374 1375 /* 1376 * Because the state of the zone may have changed, we make sure 1377 * to wake the console poller, which is in charge of initiating 1378 * the shutdown procedure as necessary. 1379 */ 1380 eventstream_write(Z_EVT_NULL); 1381 1382 out: 1383 (void) mutex_unlock(&lock); 1384 if (kernelcall) { 1385 rvalp = NULL; 1386 rlen = 0; 1387 } else { 1388 rvalp->rval = rval; 1389 } 1390 if (uc != NULL) 1391 ucred_free(uc); 1392 (void) door_return((char *)rvalp, rlen, NULL, 0); 1393 thr_exit(NULL); 1394 } 1395 1396 static int 1397 setup_door(zlog_t *zlogp) 1398 { 1399 if ((zone_door = door_create(server, NULL, 1400 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1401 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1402 return (-1); 1403 } 1404 (void) fdetach(zone_door_path); 1405 1406 if (fattach(zone_door, zone_door_path) != 0) { 1407 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1408 (void) door_revoke(zone_door); 1409 (void) fdetach(zone_door_path); 1410 zone_door = -1; 1411 return (-1); 1412 } 1413 return (0); 1414 } 1415 1416 /* 1417 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1418 * is where zoneadmd itself will check to see that another instance of 1419 * zoneadmd isn't already controlling this zone. 1420 * 1421 * The idea here is that we want to open the path to which we will 1422 * attach our door, lock it, and then make sure that no-one has beat us 1423 * to fattach(3c)ing onto it. 1424 * 1425 * fattach(3c) is really a mount, so there are actually two possible 1426 * vnodes we could be dealing with. Our strategy is as follows: 1427 * 1428 * - If the file we opened is a regular file (common case): 1429 * There is no fattach(3c)ed door, so we have a chance of becoming 1430 * the managing zoneadmd. We attempt to lock the file: if it is 1431 * already locked, that means someone else raced us here, so we 1432 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1433 * that beat us to it. 1434 * 1435 * - If the file we opened is a namefs file: 1436 * This means there is already an established door fattach(3c)'ed 1437 * to the rendezvous path. We've lost the race, so we give up. 1438 * Note that in this case we also try to grab the file lock, and 1439 * will succeed in acquiring it since the vnode locked by the 1440 * "winning" zoneadmd was a regular one, and the one we locked was 1441 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1442 * we just return to zoneadm(1m) which knows to retry. 1443 */ 1444 static int 1445 make_daemon_exclusive(zlog_t *zlogp) 1446 { 1447 int doorfd = -1; 1448 int err, ret = -1; 1449 struct stat st; 1450 struct flock flock; 1451 zone_state_t zstate; 1452 1453 top: 1454 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1455 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1456 zonecfg_strerror(err)); 1457 goto out; 1458 } 1459 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1460 S_IREAD|S_IWRITE)) < 0) { 1461 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1462 goto out; 1463 } 1464 if (fstat(doorfd, &st) < 0) { 1465 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1466 goto out; 1467 } 1468 /* 1469 * Lock the file to synchronize with other zoneadmd 1470 */ 1471 flock.l_type = F_WRLCK; 1472 flock.l_whence = SEEK_SET; 1473 flock.l_start = (off_t)0; 1474 flock.l_len = (off_t)0; 1475 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1476 /* 1477 * Someone else raced us here and grabbed the lock file 1478 * first. A warning here is inappropriate since nothing 1479 * went wrong. 1480 */ 1481 goto out; 1482 } 1483 1484 if (strcmp(st.st_fstype, "namefs") == 0) { 1485 struct door_info info; 1486 1487 /* 1488 * There is already something fattach()'ed to this file. 1489 * Lets see what the door is up to. 1490 */ 1491 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1492 /* 1493 * Another zoneadmd process seems to be in 1494 * control of the situation and we don't need to 1495 * be here. A warning here is inappropriate 1496 * since nothing went wrong. 1497 * 1498 * If the door has been revoked, the zoneadmd 1499 * process currently managing the zone is going 1500 * away. We'll return control to zoneadm(1m) 1501 * which will try again (by which time zoneadmd 1502 * will hopefully have exited). 1503 */ 1504 goto out; 1505 } 1506 1507 /* 1508 * If we got this far, there's a fattach(3c)'ed door 1509 * that belongs to a process that has exited, which can 1510 * happen if the previous zoneadmd died unexpectedly. 1511 * 1512 * Let user know that something is amiss, but that we can 1513 * recover; if the zone is in the installed state, then don't 1514 * message, since having a running zoneadmd isn't really 1515 * expected/needed. We want to keep occurences of this message 1516 * limited to times when zoneadmd is picking back up from a 1517 * zoneadmd that died while the zone was in some non-trivial 1518 * state. 1519 */ 1520 if (zstate > ZONE_STATE_INSTALLED) { 1521 zerror(zlogp, B_FALSE, 1522 "zone '%s': WARNING: zone is in state '%s', but " 1523 "zoneadmd does not appear to be available; " 1524 "restarted zoneadmd to recover.", 1525 zone_name, zone_state_str(zstate)); 1526 } 1527 1528 (void) fdetach(zone_door_path); 1529 (void) close(doorfd); 1530 goto top; 1531 } 1532 ret = 0; 1533 out: 1534 (void) close(doorfd); 1535 return (ret); 1536 } 1537 1538 /* 1539 * Setup the brand's pre and post state change callbacks, as well as the 1540 * query callback, if any of these exist. 1541 */ 1542 static int 1543 brand_callback_init(brand_handle_t bh, char *zone_name) 1544 { 1545 char zpath[MAXPATHLEN]; 1546 1547 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) 1548 return (-1); 1549 1550 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, 1551 sizeof (pre_statechg_hook)); 1552 1553 if (brand_get_prestatechange(bh, zone_name, zpath, 1554 pre_statechg_hook + EXEC_LEN, 1555 sizeof (pre_statechg_hook) - EXEC_LEN) != 0) 1556 return (-1); 1557 1558 if (strlen(pre_statechg_hook) <= EXEC_LEN) 1559 pre_statechg_hook[0] = '\0'; 1560 1561 (void) strlcpy(post_statechg_hook, EXEC_PREFIX, 1562 sizeof (post_statechg_hook)); 1563 1564 if (brand_get_poststatechange(bh, zone_name, zpath, 1565 post_statechg_hook + EXEC_LEN, 1566 sizeof (post_statechg_hook) - EXEC_LEN) != 0) 1567 return (-1); 1568 1569 if (strlen(post_statechg_hook) <= EXEC_LEN) 1570 post_statechg_hook[0] = '\0'; 1571 1572 (void) strlcpy(query_hook, EXEC_PREFIX, 1573 sizeof (query_hook)); 1574 1575 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, 1576 sizeof (query_hook) - EXEC_LEN) != 0) 1577 return (-1); 1578 1579 if (strlen(query_hook) <= EXEC_LEN) 1580 query_hook[0] = '\0'; 1581 1582 return (0); 1583 } 1584 1585 int 1586 main(int argc, char *argv[]) 1587 { 1588 int opt; 1589 zoneid_t zid; 1590 priv_set_t *privset; 1591 zone_state_t zstate; 1592 char parents_locale[MAXPATHLEN]; 1593 brand_handle_t bh; 1594 int err; 1595 1596 pid_t pid; 1597 sigset_t blockset; 1598 sigset_t block_cld; 1599 1600 struct { 1601 sema_t sem; 1602 int status; 1603 zlog_t log; 1604 } *shstate; 1605 size_t shstatelen = getpagesize(); 1606 1607 zlog_t errlog; 1608 zlog_t *zlogp; 1609 1610 int ctfd; 1611 1612 progname = get_execbasename(argv[0]); 1613 1614 /* 1615 * Make sure stderr is unbuffered 1616 */ 1617 (void) setbuffer(stderr, NULL, 0); 1618 1619 /* 1620 * Get out of the way of mounted filesystems, since we will daemonize 1621 * soon. 1622 */ 1623 (void) chdir("/"); 1624 1625 /* 1626 * Use the default system umask per PSARC 1998/110 rather than 1627 * anything that may have been set by the caller. 1628 */ 1629 (void) umask(CMASK); 1630 1631 /* 1632 * Initially we want to use our parent's locale. 1633 */ 1634 (void) setlocale(LC_ALL, ""); 1635 (void) textdomain(TEXT_DOMAIN); 1636 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1637 sizeof (parents_locale)); 1638 1639 /* 1640 * This zlog_t is used for writing to stderr 1641 */ 1642 errlog.logfile = stderr; 1643 errlog.buflen = errlog.loglen = 0; 1644 errlog.buf = errlog.log = NULL; 1645 errlog.locale = parents_locale; 1646 1647 /* 1648 * We start off writing to stderr until we're ready to daemonize. 1649 */ 1650 zlogp = &errlog; 1651 1652 /* 1653 * Process options. 1654 */ 1655 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1656 switch (opt) { 1657 case 'R': 1658 zonecfg_set_root(optarg); 1659 break; 1660 case 'z': 1661 zone_name = optarg; 1662 break; 1663 default: 1664 usage(); 1665 } 1666 } 1667 1668 if (zone_name == NULL) 1669 usage(); 1670 1671 /* 1672 * Because usage() prints directly to stderr, it has gettext() 1673 * wrapping, which depends on the locale. But since zerror() calls 1674 * localize() which tweaks the locale, it is not safe to call zerror() 1675 * until after the last call to usage(). Fortunately, the last call 1676 * to usage() is just above and the first call to zerror() is just 1677 * below. Don't mess this up. 1678 */ 1679 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1680 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1681 GLOBAL_ZONENAME); 1682 return (1); 1683 } 1684 1685 if (zone_get_id(zone_name, &zid) != 0) { 1686 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1687 zonecfg_strerror(Z_NO_ZONE)); 1688 return (1); 1689 } 1690 1691 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1692 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1693 zonecfg_strerror(err)); 1694 return (1); 1695 } 1696 if (zstate < ZONE_STATE_INCOMPLETE) { 1697 zerror(zlogp, B_FALSE, 1698 "cannot manage a zone which is in state '%s'", 1699 zone_state_str(zstate)); 1700 return (1); 1701 } 1702 1703 /* Get a handle to the brand info for this zone */ 1704 if ((zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 1705 != Z_OK) || (bh = brand_open(brand_name)) == NULL) { 1706 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1707 return (1); 1708 } 1709 zone_isnative = brand_is_native(bh); 1710 zone_iscluster = (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0); 1711 1712 /* Get state change brand hooks. */ 1713 if (brand_callback_init(bh, zone_name) == -1) { 1714 zerror(zlogp, B_TRUE, 1715 "failed to initialize brand state change hooks"); 1716 brand_close(bh); 1717 return (1); 1718 } 1719 1720 brand_close(bh); 1721 1722 /* 1723 * Check that we have all privileges. It would be nice to pare 1724 * this down, but this is at least a first cut. 1725 */ 1726 if ((privset = priv_allocset()) == NULL) { 1727 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1728 return (1); 1729 } 1730 1731 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1732 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1733 priv_freeset(privset); 1734 return (1); 1735 } 1736 1737 if (priv_isfullset(privset) == B_FALSE) { 1738 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1739 "run this command (all privs required)"); 1740 priv_freeset(privset); 1741 return (1); 1742 } 1743 priv_freeset(privset); 1744 1745 if (mkzonedir(zlogp) != 0) 1746 return (1); 1747 1748 /* 1749 * Pre-fork: setup shared state 1750 */ 1751 if ((shstate = (void *)mmap(NULL, shstatelen, 1752 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1753 MAP_FAILED) { 1754 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1755 return (1); 1756 } 1757 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1758 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1759 (void) munmap((char *)shstate, shstatelen); 1760 return (1); 1761 } 1762 shstate->log.logfile = NULL; 1763 shstate->log.buflen = shstatelen - sizeof (*shstate); 1764 shstate->log.loglen = shstate->log.buflen; 1765 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1766 shstate->log.log = shstate->log.buf; 1767 shstate->log.locale = parents_locale; 1768 shstate->status = -1; 1769 1770 /* 1771 * We need a SIGCHLD handler so the sema_wait() below will wake 1772 * up if the child dies without doing a sema_post(). 1773 */ 1774 (void) sigset(SIGCHLD, sigchld); 1775 /* 1776 * We must mask SIGCHLD until after we've coped with the fork 1777 * sufficiently to deal with it; otherwise we can race and 1778 * receive the signal before pid has been initialized 1779 * (yes, this really happens). 1780 */ 1781 (void) sigemptyset(&block_cld); 1782 (void) sigaddset(&block_cld, SIGCHLD); 1783 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1784 1785 if ((ctfd = init_template()) == -1) { 1786 zerror(zlogp, B_TRUE, "failed to create contract"); 1787 return (1); 1788 } 1789 1790 /* 1791 * Do not let another thread localize a message while we are forking. 1792 */ 1793 (void) mutex_lock(&msglock); 1794 pid = fork(); 1795 (void) mutex_unlock(&msglock); 1796 1797 /* 1798 * In all cases (parent, child, and in the event of an error) we 1799 * don't want to cause creation of contracts on subsequent fork()s. 1800 */ 1801 (void) ct_tmpl_clear(ctfd); 1802 (void) close(ctfd); 1803 1804 if (pid == -1) { 1805 zerror(zlogp, B_TRUE, "could not fork"); 1806 return (1); 1807 1808 } else if (pid > 0) { /* parent */ 1809 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1810 /* 1811 * This marks a window of vulnerability in which we receive 1812 * the SIGCLD before falling into sema_wait (normally we would 1813 * get woken up from sema_wait with EINTR upon receipt of 1814 * SIGCLD). So we may need to use some other scheme like 1815 * sema_posting in the sigcld handler. 1816 * blech 1817 */ 1818 (void) sema_wait(&shstate->sem); 1819 (void) sema_destroy(&shstate->sem); 1820 if (shstate->status != 0) 1821 (void) waitpid(pid, NULL, WNOHANG); 1822 /* 1823 * It's ok if we die with SIGPIPE. It's not like we could have 1824 * done anything about it. 1825 */ 1826 (void) fprintf(stderr, "%s", shstate->log.buf); 1827 _exit(shstate->status == 0 ? 0 : 1); 1828 } 1829 1830 /* 1831 * The child charges on. 1832 */ 1833 (void) sigset(SIGCHLD, SIG_DFL); 1834 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1835 1836 /* 1837 * SIGPIPE can be delivered if we write to a socket for which the 1838 * peer endpoint is gone. That can lead to too-early termination 1839 * of zoneadmd, and that's not good eats. 1840 */ 1841 (void) sigset(SIGPIPE, SIG_IGN); 1842 /* 1843 * Stop using stderr 1844 */ 1845 zlogp = &shstate->log; 1846 1847 /* 1848 * We don't need stdout/stderr from now on. 1849 */ 1850 closefrom(0); 1851 1852 /* 1853 * Initialize the syslog zlog_t. This needs to be done after 1854 * the call to closefrom(). 1855 */ 1856 logsys.buf = logsys.log = NULL; 1857 logsys.buflen = logsys.loglen = 0; 1858 logsys.logfile = NULL; 1859 logsys.locale = DEFAULT_LOCALE; 1860 1861 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1862 1863 /* 1864 * The eventstream is used to publish state changes in the zone 1865 * from the door threads to the console I/O poller. 1866 */ 1867 if (eventstream_init() == -1) { 1868 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1869 goto child_out; 1870 } 1871 1872 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1873 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 1874 1875 /* 1876 * See if another zoneadmd is running for this zone. If not, then we 1877 * can now modify system state. 1878 */ 1879 if (make_daemon_exclusive(zlogp) == -1) 1880 goto child_out; 1881 1882 1883 /* 1884 * Create/join a new session; we need to be careful of what we do with 1885 * the console from now on so we don't end up being the session leader 1886 * for the terminal we're going to be handing out. 1887 */ 1888 (void) setsid(); 1889 1890 /* 1891 * This thread shouldn't be receiving any signals; in particular, 1892 * SIGCHLD should be received by the thread doing the fork(). 1893 */ 1894 (void) sigfillset(&blockset); 1895 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1896 1897 /* 1898 * Setup the console device and get ready to serve the console; 1899 * once this has completed, we're ready to let console clients 1900 * make an attempt to connect (they will block until 1901 * serve_console_sock() below gets called, and any pending 1902 * connection is accept()ed). 1903 */ 1904 if (!zonecfg_in_alt_root() && init_console(zlogp) == -1) 1905 goto child_out; 1906 1907 /* 1908 * Take the lock now, so that when the door server gets going, we 1909 * are guaranteed that it won't take a request until we are sure 1910 * that everything is completely set up. See the child_out: label 1911 * below to see why this matters. 1912 */ 1913 (void) mutex_lock(&lock); 1914 1915 /* Init semaphore for scratch zones. */ 1916 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 1917 zerror(zlogp, B_TRUE, 1918 "failed to initialize semaphore for scratch zone"); 1919 goto child_out; 1920 } 1921 1922 /* 1923 * Note: door setup must occur *after* the console is setup. 1924 * This is so that as zlogin tests the door to see if zoneadmd 1925 * is ready yet, we know that the console will get serviced 1926 * once door_info() indicates that the door is "up". 1927 */ 1928 if (setup_door(zlogp) == -1) 1929 goto child_out; 1930 1931 /* 1932 * Things seem OK so far; tell the parent process that we're done 1933 * with setup tasks. This will cause the parent to exit, signalling 1934 * to zoneadm, zlogin, or whatever forked it that we are ready to 1935 * service requests. 1936 */ 1937 shstate->status = 0; 1938 (void) sema_post(&shstate->sem); 1939 (void) munmap((char *)shstate, shstatelen); 1940 shstate = NULL; 1941 1942 (void) mutex_unlock(&lock); 1943 1944 /* 1945 * zlogp is now invalid, so reset it to the syslog logger. 1946 */ 1947 zlogp = &logsys; 1948 1949 /* 1950 * Now that we are free of any parents, switch to the default locale. 1951 */ 1952 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 1953 1954 /* 1955 * At this point the setup portion of main() is basically done, so 1956 * we reuse this thread to manage the zone console. When 1957 * serve_console() has returned, we are past the point of no return 1958 * in the life of this zoneadmd. 1959 */ 1960 if (zonecfg_in_alt_root()) { 1961 /* 1962 * This is just awful, but mounted scratch zones don't (and 1963 * can't) have consoles. We just wait for unmount instead. 1964 */ 1965 while (sema_wait(&scratch_sem) == EINTR) 1966 ; 1967 } else { 1968 serve_console(zlogp); 1969 assert(in_death_throes); 1970 } 1971 1972 /* 1973 * This is the next-to-last part of the exit interlock. Upon calling 1974 * fdetach(), the door will go unreferenced; once any 1975 * outstanding requests (like the door thread doing Z_HALT) are 1976 * done, the door will get an UNREF notification; when it handles 1977 * the UNREF, the door server will cause the exit. 1978 */ 1979 assert(!MUTEX_HELD(&lock)); 1980 (void) fdetach(zone_door_path); 1981 for (;;) 1982 (void) pause(); 1983 1984 child_out: 1985 assert(pid == 0); 1986 if (shstate != NULL) { 1987 shstate->status = -1; 1988 (void) sema_post(&shstate->sem); 1989 (void) munmap((char *)shstate, shstatelen); 1990 } 1991 1992 /* 1993 * This might trigger an unref notification, but if so, 1994 * we are still holding the lock, so our call to exit will 1995 * ultimately win the race and will publish the right exit 1996 * code. 1997 */ 1998 if (zone_door != -1) { 1999 assert(MUTEX_HELD(&lock)); 2000 (void) door_revoke(zone_door); 2001 (void) fdetach(zone_door_path); 2002 } 2003 return (1); /* return from main() forcibly exits an MT process */ 2004 } 2005