1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * zoneadmd manages zones; one zoneadmd process is launched for each 29 * non-global zone on the system. This daemon juggles four jobs: 30 * 31 * - Implement setup and teardown of the zone "virtual platform": mount and 32 * unmount filesystems; create and destroy network interfaces; communicate 33 * with devfsadmd to lay out devices for the zone; instantiate the zone 34 * console device; configure process runtime attributes such as resource 35 * controls, pool bindings, fine-grained privileges. 36 * 37 * - Launch the zone's init(1M) process. 38 * 39 * - Implement a door server; clients (like zoneadm) connect to the door 40 * server and request zone state changes. The kernel is also a client of 41 * this door server. A request to halt or reboot the zone which originates 42 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 43 * 44 * One minor problem is that messages emitted by zoneadmd need to be passed 45 * back to the zoneadm process making the request. These messages need to 46 * be rendered in the client's locale; so, this is passed in as part of the 47 * request. The exception is the kernel upcall to zoneadmd, in which case 48 * messages are syslog'd. 49 * 50 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 51 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 52 * strings which do not need to be translated. 53 * 54 * - Act as a console server for zlogin -C processes; see comments in zcons.c 55 * for more information about the zone console architecture. 56 * 57 * DESIGN NOTES 58 * 59 * Restart: 60 * A chief design constraint of zoneadmd is that it should be restartable in 61 * the case that the administrator kills it off, or it suffers a fatal error, 62 * without the running zone being impacted; this is akin to being able to 63 * reboot the service processor of a server without affecting the OS instance. 64 */ 65 66 #include <sys/param.h> 67 #include <sys/mman.h> 68 #include <sys/types.h> 69 #include <sys/stat.h> 70 #include <sys/sysmacros.h> 71 72 #include <bsm/adt.h> 73 #include <bsm/adt_event.h> 74 75 #include <alloca.h> 76 #include <assert.h> 77 #include <errno.h> 78 #include <door.h> 79 #include <fcntl.h> 80 #include <locale.h> 81 #include <signal.h> 82 #include <stdarg.h> 83 #include <stdio.h> 84 #include <stdlib.h> 85 #include <string.h> 86 #include <strings.h> 87 #include <synch.h> 88 #include <syslog.h> 89 #include <thread.h> 90 #include <unistd.h> 91 #include <wait.h> 92 #include <limits.h> 93 #include <zone.h> 94 #include <libbrand.h> 95 #include <sys/brand.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <sys/contract/process.h> 99 #include <sys/ctfs.h> 100 101 #include <libzonecfg.h> 102 #include "zoneadmd.h" 103 104 static char *progname; 105 char *zone_name; /* zone which we are managing */ 106 char brand_name[MAXNAMELEN]; 107 boolean_t zone_isnative; 108 boolean_t zone_iscluster; 109 boolean_t zone_islabeled; 110 static zoneid_t zone_id; 111 dladm_handle_t dld_handle = NULL; 112 113 static char pre_statechg_hook[2 * MAXPATHLEN]; 114 static char post_statechg_hook[2 * MAXPATHLEN]; 115 char query_hook[2 * MAXPATHLEN]; 116 117 zlog_t logsys; 118 119 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 120 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 121 122 static sema_t scratch_sem; /* for scratch zones */ 123 124 static char zone_door_path[MAXPATHLEN]; 125 static int zone_door = -1; 126 127 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 128 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 129 130 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 131 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 132 #endif 133 134 #define DEFAULT_LOCALE "C" 135 136 static const char * 137 z_cmd_name(zone_cmd_t zcmd) 138 { 139 /* This list needs to match the enum in sys/zone.h */ 140 static const char *zcmdstr[] = { 141 "ready", "boot", "forceboot", "reboot", "halt", 142 "note_uninstalling", "mount", "forcemount", "unmount" 143 }; 144 145 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 146 return ("unknown"); 147 else 148 return (zcmdstr[(int)zcmd]); 149 } 150 151 static char * 152 get_execbasename(char *execfullname) 153 { 154 char *last_slash, *execbasename; 155 156 /* guard against '/' at end of command invocation */ 157 for (;;) { 158 last_slash = strrchr(execfullname, '/'); 159 if (last_slash == NULL) { 160 execbasename = execfullname; 161 break; 162 } else { 163 execbasename = last_slash + 1; 164 if (*execbasename == '\0') { 165 *last_slash = '\0'; 166 continue; 167 } 168 break; 169 } 170 } 171 return (execbasename); 172 } 173 174 static void 175 usage(void) 176 { 177 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 178 (void) fprintf(stderr, 179 gettext("\tNote: %s should not be run directly.\n"), progname); 180 exit(2); 181 } 182 183 /* ARGSUSED */ 184 static void 185 sigchld(int sig) 186 { 187 } 188 189 char * 190 localize_msg(char *locale, const char *msg) 191 { 192 char *out; 193 194 (void) mutex_lock(&msglock); 195 (void) setlocale(LC_MESSAGES, locale); 196 out = gettext(msg); 197 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 198 (void) mutex_unlock(&msglock); 199 return (out); 200 } 201 202 /* PRINTFLIKE3 */ 203 void 204 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 205 { 206 va_list alist; 207 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 208 char *bp; 209 int saved_errno = errno; 210 211 if (zlogp == NULL) 212 return; 213 if (zlogp == &logsys) 214 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 215 zone_name); 216 else 217 buf[0] = '\0'; 218 bp = &(buf[strlen(buf)]); 219 220 /* 221 * In theory, the locale pointer should be set to either "C" or a 222 * char array, so it should never be NULL 223 */ 224 assert(zlogp->locale != NULL); 225 /* Locale is per process, but we are multi-threaded... */ 226 fmt = localize_msg(zlogp->locale, fmt); 227 228 va_start(alist, fmt); 229 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 230 va_end(alist); 231 bp = &(buf[strlen(buf)]); 232 if (use_strerror) 233 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 234 strerror(saved_errno)); 235 if (zlogp == &logsys) { 236 (void) syslog(LOG_ERR, "%s", buf); 237 } else if (zlogp->logfile != NULL) { 238 (void) fprintf(zlogp->logfile, "%s\n", buf); 239 } else { 240 size_t buflen; 241 size_t copylen; 242 243 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 244 copylen = MIN(buflen, zlogp->loglen); 245 zlogp->log += copylen; 246 zlogp->loglen -= copylen; 247 } 248 } 249 250 /* 251 * Emit a warning for any boot arguments which are unrecognized. Since 252 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 253 * put the arguments into an argv style array, use getopt to process them, 254 * and put the resultant argument string back into outargs. 255 * 256 * During the filtering, we pull out any arguments which are truly "boot" 257 * arguments, leaving only those which are to be passed intact to the 258 * progenitor process. The one we support at the moment is -i, which 259 * indicates to the kernel which program should be launched as 'init'. 260 * 261 * A return of Z_INVAL indicates specifically that the arguments are 262 * not valid; this is a non-fatal error. Except for Z_OK, all other return 263 * values are treated as fatal. 264 */ 265 static int 266 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 267 char *init_file, char *badarg) 268 { 269 int argc = 0, argc_save; 270 int i; 271 int err; 272 char *arg, *lasts, **argv = NULL, **argv_save; 273 char zonecfg_args[BOOTARGS_MAX]; 274 char scratchargs[BOOTARGS_MAX], *sargs; 275 char c; 276 277 bzero(outargs, BOOTARGS_MAX); 278 bzero(badarg, BOOTARGS_MAX); 279 280 /* 281 * If the user didn't specify transient boot arguments, check 282 * to see if there were any specified in the zone configuration, 283 * and use them if applicable. 284 */ 285 if (inargs == NULL || inargs[0] == '\0') { 286 zone_dochandle_t handle; 287 if ((handle = zonecfg_init_handle()) == NULL) { 288 zerror(zlogp, B_TRUE, 289 "getting zone configuration handle"); 290 return (Z_BAD_HANDLE); 291 } 292 err = zonecfg_get_snapshot_handle(zone_name, handle); 293 if (err != Z_OK) { 294 zerror(zlogp, B_FALSE, 295 "invalid configuration snapshot"); 296 zonecfg_fini_handle(handle); 297 return (Z_BAD_HANDLE); 298 } 299 300 bzero(zonecfg_args, sizeof (zonecfg_args)); 301 (void) zonecfg_get_bootargs(handle, zonecfg_args, 302 sizeof (zonecfg_args)); 303 inargs = zonecfg_args; 304 zonecfg_fini_handle(handle); 305 } 306 307 if (strlen(inargs) >= BOOTARGS_MAX) { 308 zerror(zlogp, B_FALSE, "boot argument string too long"); 309 return (Z_INVAL); 310 } 311 312 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 313 sargs = scratchargs; 314 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 315 sargs = NULL; 316 argc++; 317 } 318 319 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 320 zerror(zlogp, B_FALSE, "memory allocation failed"); 321 return (Z_NOMEM); 322 } 323 324 argv_save = argv; 325 argc_save = argc; 326 327 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 328 sargs = scratchargs; 329 i = 0; 330 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 331 sargs = NULL; 332 if ((argv[i] = strdup(arg)) == NULL) { 333 err = Z_NOMEM; 334 zerror(zlogp, B_FALSE, "memory allocation failed"); 335 goto done; 336 } 337 i++; 338 } 339 340 /* 341 * We preserve compatibility with the Solaris system boot behavior, 342 * which allows: 343 * 344 * # reboot kernel/unix -s -m verbose 345 * 346 * In this example, kernel/unix tells the booter what file to 347 * boot. We don't want reboot in a zone to be gratuitously different, 348 * so we silently ignore the boot file, if necessary. 349 */ 350 if (argv[0] == NULL) 351 goto done; 352 353 assert(argv[0][0] != ' '); 354 assert(argv[0][0] != '\t'); 355 356 if (argv[0][0] != '-' && argv[0][0] != '\0') { 357 argv = &argv[1]; 358 argc--; 359 } 360 361 optind = 0; 362 opterr = 0; 363 err = Z_OK; 364 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 365 switch (c) { 366 case 'i': 367 /* 368 * -i is handled by the runtime and is not passed 369 * along to userland 370 */ 371 (void) strlcpy(init_file, optarg, MAXPATHLEN); 372 break; 373 case 'f': 374 /* This has already been processed by zoneadm */ 375 break; 376 case 'm': 377 case 's': 378 /* These pass through unmolested */ 379 (void) snprintf(outargs, BOOTARGS_MAX, 380 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 381 break; 382 case '?': 383 /* 384 * We warn about unknown arguments but pass them 385 * along anyway-- if someone wants to develop their 386 * own init replacement, they can pass it whatever 387 * args they want. 388 */ 389 err = Z_INVAL; 390 (void) snprintf(outargs, BOOTARGS_MAX, 391 "%s -%c", outargs, optopt); 392 (void) snprintf(badarg, BOOTARGS_MAX, 393 "%s -%c", badarg, optopt); 394 break; 395 } 396 } 397 398 /* 399 * For Solaris Zones we warn about and discard non-option arguments. 400 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 401 * to the kernel, we concat up all the other remaining boot args. 402 * and warn on them as a group. 403 */ 404 if (optind < argc) { 405 err = Z_INVAL; 406 while (optind < argc) { 407 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 408 badarg, strlen(badarg) > 0 ? " " : "", 409 argv[optind]); 410 optind++; 411 } 412 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 413 "arguments `%s'.", badarg); 414 } 415 416 done: 417 for (i = 0; i < argc_save; i++) { 418 if (argv_save[i] != NULL) 419 free(argv_save[i]); 420 } 421 free(argv_save); 422 return (err); 423 } 424 425 426 static int 427 mkzonedir(zlog_t *zlogp) 428 { 429 struct stat st; 430 /* 431 * We must create and lock everyone but root out of ZONES_TMPDIR 432 * since anyone can open any UNIX domain socket, regardless of 433 * its file system permissions. Sigh... 434 */ 435 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 436 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 437 return (-1); 438 } 439 /* paranoia */ 440 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 441 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 442 return (-1); 443 } 444 (void) chmod(ZONES_TMPDIR, S_IRWXU); 445 return (0); 446 } 447 448 /* 449 * Run the brand's pre-state change callback, if it exists. 450 */ 451 static int 452 brand_prestatechg(zlog_t *zlogp, int state, int cmd) 453 { 454 char cmdbuf[2 * MAXPATHLEN]; 455 456 if (pre_statechg_hook[0] == '\0') 457 return (0); 458 459 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d", pre_statechg_hook, 460 state, cmd) > sizeof (cmdbuf)) 461 return (-1); 462 463 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 464 return (-1); 465 466 return (0); 467 } 468 469 /* 470 * Run the brand's post-state change callback, if it exists. 471 */ 472 static int 473 brand_poststatechg(zlog_t *zlogp, int state, int cmd) 474 { 475 char cmdbuf[2 * MAXPATHLEN]; 476 477 if (post_statechg_hook[0] == '\0') 478 return (0); 479 480 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d", post_statechg_hook, 481 state, cmd) > sizeof (cmdbuf)) 482 return (-1); 483 484 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 485 return (-1); 486 487 return (0); 488 } 489 490 /* 491 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 492 * 'true' if this is being invoked as part of the processing for the "mount" 493 * subcommand. 494 */ 495 static int 496 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate) 497 { 498 int err; 499 500 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0) 501 return (-1); 502 503 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 504 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 505 zonecfg_strerror(err)); 506 return (-1); 507 } 508 509 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 510 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 511 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 512 zonecfg_strerror(err)); 513 return (-1); 514 } 515 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 516 bringup_failure_recovery = B_TRUE; 517 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE); 518 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 519 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 520 zonecfg_strerror(err)); 521 return (-1); 522 } 523 524 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0) 525 return (-1); 526 527 return (0); 528 } 529 530 int 531 init_template(void) 532 { 533 int fd; 534 int err = 0; 535 536 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 537 if (fd == -1) 538 return (-1); 539 540 /* 541 * For now, zoneadmd doesn't do anything with the contract. 542 * Deliver no events, don't inherit, and allow it to be orphaned. 543 */ 544 err |= ct_tmpl_set_critical(fd, 0); 545 err |= ct_tmpl_set_informative(fd, 0); 546 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 547 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 548 if (err || ct_tmpl_activate(fd)) { 549 (void) close(fd); 550 return (-1); 551 } 552 553 return (fd); 554 } 555 556 typedef struct fs_callback { 557 zlog_t *zlogp; 558 zoneid_t zoneid; 559 boolean_t mount_cmd; 560 } fs_callback_t; 561 562 static int 563 mount_early_fs(void *data, const char *spec, const char *dir, 564 const char *fstype, const char *opt) 565 { 566 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 567 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 568 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd; 569 char rootpath[MAXPATHLEN]; 570 pid_t child; 571 int child_status; 572 int tmpl_fd; 573 int rv; 574 ctid_t ct; 575 576 /* determine the zone rootpath */ 577 if (mount_cmd) { 578 char zonepath[MAXPATHLEN]; 579 char luroot[MAXPATHLEN]; 580 581 if (zone_get_zonepath(zone_name, 582 zonepath, sizeof (zonepath)) != Z_OK) { 583 zerror(zlogp, B_FALSE, "unable to determine zone path"); 584 return (-1); 585 } 586 587 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 588 resolve_lofs(zlogp, luroot, sizeof (luroot)); 589 (void) strlcpy(rootpath, luroot, sizeof (rootpath)); 590 } else { 591 if (zone_get_rootpath(zone_name, 592 rootpath, sizeof (rootpath)) != Z_OK) { 593 zerror(zlogp, B_FALSE, "unable to determine zone root"); 594 return (-1); 595 } 596 } 597 598 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) { 599 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 600 rootpath, dir); 601 return (-1); 602 } else if (rv > 0) { 603 /* The mount point path doesn't exist, create it now. */ 604 if (make_one_dir(zlogp, rootpath, dir, 605 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 606 DEFAULT_DIR_GROUP) != 0) { 607 zerror(zlogp, B_FALSE, "failed to create mount point"); 608 return (-1); 609 } 610 611 /* 612 * Now this might seem weird, but we need to invoke 613 * valid_mount_path() again. Why? Because it checks 614 * to make sure that the mount point path is canonical, 615 * which it can only do if the path exists, so now that 616 * we've created the path we have to verify it again. 617 */ 618 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, 619 fstype)) < 0) { 620 zerror(zlogp, B_FALSE, 621 "%s%s is not a valid mount point", rootpath, dir); 622 return (-1); 623 } 624 } 625 626 if ((tmpl_fd = init_template()) == -1) { 627 zerror(zlogp, B_TRUE, "failed to create contract"); 628 return (-1); 629 } 630 631 if ((child = fork()) == -1) { 632 (void) ct_tmpl_clear(tmpl_fd); 633 (void) close(tmpl_fd); 634 zerror(zlogp, B_TRUE, "failed to fork"); 635 return (-1); 636 637 } else if (child == 0) { /* child */ 638 char opt_buf[MAX_MNTOPT_STR]; 639 int optlen = 0; 640 int mflag = MS_DATA; 641 642 (void) ct_tmpl_clear(tmpl_fd); 643 /* 644 * Even though there are no procs running in the zone, we 645 * do this for paranoia's sake. 646 */ 647 (void) closefrom(0); 648 649 if (zone_enter(zoneid) == -1) { 650 _exit(errno); 651 } 652 if (opt != NULL) { 653 /* 654 * The mount() system call is incredibly annoying. 655 * If options are specified, we need to copy them 656 * into a temporary buffer since the mount() system 657 * call will overwrite the options string. It will 658 * also fail if the new option string it wants to 659 * write is bigger than the one we passed in, so 660 * you must pass in a buffer of the maximum possible 661 * option string length. sigh. 662 */ 663 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 664 opt = opt_buf; 665 optlen = MAX_MNTOPT_STR; 666 mflag = MS_OPTIONSTR; 667 } 668 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 669 _exit(errno); 670 _exit(0); 671 } 672 673 /* parent */ 674 if (contract_latest(&ct) == -1) 675 ct = -1; 676 (void) ct_tmpl_clear(tmpl_fd); 677 (void) close(tmpl_fd); 678 if (waitpid(child, &child_status, 0) != child) { 679 /* unexpected: we must have been signalled */ 680 (void) contract_abandon_id(ct); 681 return (-1); 682 } 683 (void) contract_abandon_id(ct); 684 if (WEXITSTATUS(child_status) != 0) { 685 errno = WEXITSTATUS(child_status); 686 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 687 return (-1); 688 } 689 690 return (0); 691 } 692 693 /* 694 * If retstr is not NULL, the output of the subproc is returned in the str, 695 * otherwise it is output using zerror(). Any memory allocated for retstr 696 * should be freed by the caller. 697 */ 698 int 699 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) 700 { 701 char buf[1024]; /* arbitrary large amount */ 702 char *inbuf; 703 FILE *file; 704 int status; 705 int rd_cnt; 706 707 if (retstr != NULL) { 708 if ((*retstr = malloc(1024)) == NULL) { 709 zerror(zlogp, B_FALSE, "out of memory"); 710 return (-1); 711 } 712 inbuf = *retstr; 713 rd_cnt = 0; 714 } else { 715 inbuf = buf; 716 } 717 718 file = popen(cmdbuf, "r"); 719 if (file == NULL) { 720 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 721 return (-1); 722 } 723 724 while (fgets(inbuf, 1024, file) != NULL) { 725 if (retstr == NULL && zlogp != &logsys) { 726 zerror(zlogp, B_FALSE, "%s", inbuf); 727 } else { 728 char *p; 729 730 rd_cnt += 1024 - 1; 731 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) { 732 zerror(zlogp, B_FALSE, "out of memory"); 733 (void) pclose(file); 734 return (-1); 735 } 736 737 *retstr = p; 738 inbuf = *retstr + rd_cnt; 739 } 740 } 741 status = pclose(file); 742 743 if (WIFSIGNALED(status)) { 744 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 745 "signal %d", cmdbuf, WTERMSIG(status)); 746 return (-1); 747 } 748 assert(WIFEXITED(status)); 749 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 750 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 751 return (-1); 752 } 753 return (WEXITSTATUS(status)); 754 } 755 756 static int 757 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) 758 { 759 zoneid_t zoneid; 760 struct stat st; 761 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 762 char nbootargs[BOOTARGS_MAX]; 763 char cmdbuf[MAXPATHLEN]; 764 fs_callback_t cb; 765 brand_handle_t bh; 766 int err; 767 768 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) 769 return (-1); 770 771 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 772 zerror(zlogp, B_TRUE, "unable to get zoneid"); 773 return (-1); 774 } 775 776 cb.zlogp = zlogp; 777 cb.zoneid = zoneid; 778 cb.mount_cmd = B_FALSE; 779 780 /* Get a handle to the brand info for this zone */ 781 if ((bh = brand_open(brand_name)) == NULL) { 782 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 783 return (-1); 784 } 785 786 /* 787 * Get the list of filesystems to mount from the brand 788 * configuration. These mounts are done via a thread that will 789 * enter the zone, so they are done from within the context of the 790 * zone. 791 */ 792 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 793 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 794 brand_close(bh); 795 return (-1); 796 } 797 798 /* 799 * Get the brand's boot callback if it exists. 800 */ 801 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 802 zerror(zlogp, B_FALSE, "unable to determine zone path"); 803 brand_close(bh); 804 return (-1); 805 } 806 (void) strcpy(cmdbuf, EXEC_PREFIX); 807 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 808 sizeof (cmdbuf) - EXEC_LEN) != 0) { 809 zerror(zlogp, B_FALSE, 810 "unable to determine branded zone's boot callback"); 811 brand_close(bh); 812 return (-1); 813 } 814 815 /* Get the path for this zone's init(1M) (or equivalent) process. */ 816 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { 817 zerror(zlogp, B_FALSE, 818 "unable to determine zone's init(1M) location"); 819 brand_close(bh); 820 return (-1); 821 } 822 823 brand_close(bh); 824 825 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 826 bad_boot_arg); 827 if (err == Z_INVAL) 828 eventstream_write(Z_EVT_ZONE_BADARGS); 829 else if (err != Z_OK) 830 return (-1); 831 832 assert(init_file[0] != '\0'); 833 834 /* Try to anticipate possible problems: Make sure init is executable. */ 835 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 836 zerror(zlogp, B_FALSE, "unable to determine zone root"); 837 return (-1); 838 } 839 840 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); 841 842 if (stat(initpath, &st) == -1) { 843 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 844 return (-1); 845 } 846 847 if ((st.st_mode & S_IXUSR) == 0) { 848 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 849 return (-1); 850 } 851 852 /* 853 * If there is a brand 'boot' callback, execute it now to give the 854 * brand one last chance to do any additional setup before the zone 855 * is booted. 856 */ 857 if ((strlen(cmdbuf) > EXEC_LEN) && 858 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { 859 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 860 return (-1); 861 } 862 863 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 864 zerror(zlogp, B_TRUE, "could not set zone boot file"); 865 return (-1); 866 } 867 868 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 869 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 870 return (-1); 871 } 872 873 if (zone_boot(zoneid) == -1) { 874 zerror(zlogp, B_TRUE, "unable to boot zone"); 875 return (-1); 876 } 877 878 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0) 879 return (-1); 880 881 return (0); 882 } 883 884 static int 885 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate) 886 { 887 int err; 888 889 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0) 890 return (-1); 891 892 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { 893 if (!bringup_failure_recovery) 894 zerror(zlogp, B_FALSE, "unable to destroy zone"); 895 return (-1); 896 } 897 898 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 899 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 900 zonecfg_strerror(err)); 901 902 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0) 903 return (-1); 904 905 return (0); 906 } 907 908 /* 909 * Generate AUE_zone_state for a command that boots a zone. 910 */ 911 static void 912 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 913 char *new_state) 914 { 915 adt_session_data_t *ah; 916 adt_event_data_t *event; 917 int pass_fail, fail_reason; 918 919 if (!adt_audit_enabled()) 920 return; 921 922 if (return_val == 0) { 923 pass_fail = ADT_SUCCESS; 924 fail_reason = ADT_SUCCESS; 925 } else { 926 pass_fail = ADT_FAILURE; 927 fail_reason = ADT_FAIL_VALUE_PROGRAM; 928 } 929 930 if (adt_start_session(&ah, NULL, 0)) { 931 zerror(zlogp, B_TRUE, gettext("audit failure.")); 932 return; 933 } 934 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 935 zerror(zlogp, B_TRUE, gettext("audit failure.")); 936 (void) adt_end_session(ah); 937 return; 938 } 939 940 event = adt_alloc_event(ah, ADT_zone_state); 941 if (event == NULL) { 942 zerror(zlogp, B_TRUE, gettext("audit failure.")); 943 (void) adt_end_session(ah); 944 return; 945 } 946 event->adt_zone_state.zonename = zone_name; 947 event->adt_zone_state.new_state = new_state; 948 949 if (adt_put_event(event, pass_fail, fail_reason)) 950 zerror(zlogp, B_TRUE, gettext("audit failure.")); 951 952 adt_free_event(event); 953 954 (void) adt_end_session(ah); 955 } 956 957 /* 958 * The main routine for the door server that deals with zone state transitions. 959 */ 960 /* ARGSUSED */ 961 static void 962 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 963 uint_t n_desc) 964 { 965 ucred_t *uc = NULL; 966 const priv_set_t *eset; 967 968 zone_state_t zstate; 969 zone_cmd_t cmd; 970 zone_cmd_arg_t *zargp; 971 972 boolean_t kernelcall; 973 974 int rval = -1; 975 uint64_t uniqid; 976 zoneid_t zoneid = -1; 977 zlog_t zlog; 978 zlog_t *zlogp; 979 zone_cmd_rval_t *rvalp; 980 size_t rlen = getpagesize(); /* conservative */ 981 fs_callback_t cb; 982 brand_handle_t bh; 983 984 /* LINTED E_BAD_PTR_CAST_ALIGN */ 985 zargp = (zone_cmd_arg_t *)args; 986 987 /* 988 * When we get the door unref message, we've fdetach'd the door, and 989 * it is time for us to shut down zoneadmd. 990 */ 991 if (zargp == DOOR_UNREF_DATA) { 992 /* 993 * See comment at end of main() for info on the last rites. 994 */ 995 exit(0); 996 } 997 998 if (zargp == NULL) { 999 (void) door_return(NULL, 0, 0, 0); 1000 } 1001 1002 rvalp = alloca(rlen); 1003 bzero(rvalp, rlen); 1004 zlog.logfile = NULL; 1005 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 1006 zlog.buf = rvalp->errbuf; 1007 zlog.log = zlog.buf; 1008 /* defer initialization of zlog.locale until after credential check */ 1009 zlogp = &zlog; 1010 1011 if (alen != sizeof (zone_cmd_arg_t)) { 1012 /* 1013 * This really shouldn't be happening. 1014 */ 1015 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 1016 "unexpected (expected %d bytes)", alen, 1017 sizeof (zone_cmd_arg_t)); 1018 goto out; 1019 } 1020 cmd = zargp->cmd; 1021 1022 if (door_ucred(&uc) != 0) { 1023 zerror(&logsys, B_TRUE, "door_ucred"); 1024 goto out; 1025 } 1026 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 1027 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 1028 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 1029 ucred_geteuid(uc) != 0)) { 1030 zerror(&logsys, B_FALSE, "insufficient privileges"); 1031 goto out; 1032 } 1033 1034 kernelcall = ucred_getpid(uc) == 0; 1035 1036 /* 1037 * This is safe because we only use a zlog_t throughout the 1038 * duration of a door call; i.e., by the time the pointer 1039 * might become invalid, the door call would be over. 1040 */ 1041 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 1042 1043 (void) mutex_lock(&lock); 1044 1045 /* 1046 * Once we start to really die off, we don't want more connections. 1047 */ 1048 if (in_death_throes) { 1049 (void) mutex_unlock(&lock); 1050 ucred_free(uc); 1051 (void) door_return(NULL, 0, 0, 0); 1052 thr_exit(NULL); 1053 } 1054 1055 /* 1056 * Check for validity of command. 1057 */ 1058 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 1059 cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && 1060 cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 1061 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 1062 goto out; 1063 } 1064 1065 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 1066 /* 1067 * Can't happen 1068 */ 1069 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 1070 cmd); 1071 goto out; 1072 } 1073 /* 1074 * We ignore the possibility of someone calling zone_create(2) 1075 * explicitly; all requests must come through zoneadmd. 1076 */ 1077 if (zone_get_state(zone_name, &zstate) != Z_OK) { 1078 /* 1079 * Something terribly wrong happened 1080 */ 1081 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 1082 goto out; 1083 } 1084 1085 if (kernelcall) { 1086 /* 1087 * Kernel-initiated requests may lose their validity if the 1088 * zone_t the kernel was referring to has gone away. 1089 */ 1090 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 1091 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 1092 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 1093 /* 1094 * We're not talking about the same zone. The request 1095 * must have arrived too late. Return error. 1096 */ 1097 rval = -1; 1098 goto out; 1099 } 1100 zlogp = &logsys; /* Log errors to syslog */ 1101 } 1102 1103 /* 1104 * If we are being asked to forcibly mount or boot a zone, we 1105 * pretend that an INCOMPLETE zone is actually INSTALLED. 1106 */ 1107 if (zstate == ZONE_STATE_INCOMPLETE && 1108 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 1109 zstate = ZONE_STATE_INSTALLED; 1110 1111 switch (zstate) { 1112 case ZONE_STATE_CONFIGURED: 1113 case ZONE_STATE_INCOMPLETE: 1114 /* 1115 * Not our area of expertise; we just print a nice message 1116 * and die off. 1117 */ 1118 zerror(zlogp, B_FALSE, 1119 "%s operation is invalid for zones in state '%s'", 1120 z_cmd_name(cmd), zone_state_str(zstate)); 1121 break; 1122 1123 case ZONE_STATE_INSTALLED: 1124 switch (cmd) { 1125 case Z_READY: 1126 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate); 1127 if (rval == 0) 1128 eventstream_write(Z_EVT_ZONE_READIED); 1129 break; 1130 case Z_BOOT: 1131 case Z_FORCEBOOT: 1132 eventstream_write(Z_EVT_ZONE_BOOTING); 1133 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1134 == 0) { 1135 rval = zone_bootup(zlogp, zargp->bootbuf, 1136 zstate); 1137 } 1138 audit_put_record(zlogp, uc, rval, "boot"); 1139 if (rval != 0) { 1140 bringup_failure_recovery = B_TRUE; 1141 (void) zone_halt(zlogp, B_FALSE, B_FALSE, 1142 zstate); 1143 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1144 } 1145 break; 1146 case Z_HALT: 1147 if (kernelcall) /* Invalid; can't happen */ 1148 abort(); 1149 /* 1150 * We could have two clients racing to halt this 1151 * zone; the second client loses, but his request 1152 * doesn't fail, since the zone is now in the desired 1153 * state. 1154 */ 1155 zerror(zlogp, B_FALSE, "zone is already halted"); 1156 rval = 0; 1157 break; 1158 case Z_REBOOT: 1159 if (kernelcall) /* Invalid; can't happen */ 1160 abort(); 1161 zerror(zlogp, B_FALSE, "%s operation is invalid " 1162 "for zones in state '%s'", z_cmd_name(cmd), 1163 zone_state_str(zstate)); 1164 rval = -1; 1165 break; 1166 case Z_NOTE_UNINSTALLING: 1167 if (kernelcall) /* Invalid; can't happen */ 1168 abort(); 1169 /* 1170 * Tell the console to print out a message about this. 1171 * Once it does, we will be in_death_throes. 1172 */ 1173 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1174 break; 1175 case Z_MOUNT: 1176 case Z_FORCEMOUNT: 1177 if (kernelcall) /* Invalid; can't happen */ 1178 abort(); 1179 if (!zone_isnative && !zone_iscluster && 1180 !zone_islabeled) { 1181 /* 1182 * -U mounts the zone without lofs mounting 1183 * zone file systems back into the scratch 1184 * zone. This is required when mounting 1185 * non-native branded zones. 1186 */ 1187 (void) strlcpy(zargp->bootbuf, "-U", 1188 BOOTARGS_MAX); 1189 } 1190 1191 rval = zone_ready(zlogp, 1192 strcmp(zargp->bootbuf, "-U") == 0 ? 1193 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate); 1194 if (rval != 0) 1195 break; 1196 1197 eventstream_write(Z_EVT_ZONE_READIED); 1198 1199 /* 1200 * Get a handle to the native brand info. 1201 * We must always use the native brand file system 1202 * list when mounting the zone. 1203 */ 1204 if ((bh = brand_open(NATIVE_BRAND_NAME)) == NULL) { 1205 rval = -1; 1206 break; 1207 } 1208 1209 /* 1210 * Get the list of filesystems to mount from 1211 * the brand configuration. These mounts are done 1212 * via a thread that will enter the zone, so they 1213 * are done from within the context of the zone. 1214 */ 1215 cb.zlogp = zlogp; 1216 cb.zoneid = zone_id; 1217 cb.mount_cmd = B_TRUE; 1218 rval = brand_platform_iter_mounts(bh, 1219 mount_early_fs, &cb); 1220 1221 brand_close(bh); 1222 1223 /* 1224 * Ordinarily, /dev/fd would be mounted inside the zone 1225 * by svc:/system/filesystem/usr:default, but since 1226 * we're not booting the zone, we need to do this 1227 * manually. 1228 */ 1229 if (rval == 0) 1230 rval = mount_early_fs(&cb, 1231 "fd", "/dev/fd", "fd", NULL); 1232 break; 1233 case Z_UNMOUNT: 1234 if (kernelcall) /* Invalid; can't happen */ 1235 abort(); 1236 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1237 rval = 0; 1238 break; 1239 } 1240 break; 1241 1242 case ZONE_STATE_READY: 1243 switch (cmd) { 1244 case Z_READY: 1245 /* 1246 * We could have two clients racing to ready this 1247 * zone; the second client loses, but his request 1248 * doesn't fail, since the zone is now in the desired 1249 * state. 1250 */ 1251 zerror(zlogp, B_FALSE, "zone is already ready"); 1252 rval = 0; 1253 break; 1254 case Z_BOOT: 1255 (void) strlcpy(boot_args, zargp->bootbuf, 1256 sizeof (boot_args)); 1257 eventstream_write(Z_EVT_ZONE_BOOTING); 1258 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1259 audit_put_record(zlogp, uc, rval, "boot"); 1260 if (rval != 0) { 1261 bringup_failure_recovery = B_TRUE; 1262 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1263 zstate); 1264 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1265 } 1266 boot_args[0] = '\0'; 1267 break; 1268 case Z_HALT: 1269 if (kernelcall) /* Invalid; can't happen */ 1270 abort(); 1271 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1272 != 0) 1273 break; 1274 eventstream_write(Z_EVT_ZONE_HALTED); 1275 break; 1276 case Z_REBOOT: 1277 case Z_NOTE_UNINSTALLING: 1278 case Z_MOUNT: 1279 case Z_UNMOUNT: 1280 if (kernelcall) /* Invalid; can't happen */ 1281 abort(); 1282 zerror(zlogp, B_FALSE, "%s operation is invalid " 1283 "for zones in state '%s'", z_cmd_name(cmd), 1284 zone_state_str(zstate)); 1285 rval = -1; 1286 break; 1287 } 1288 break; 1289 1290 case ZONE_STATE_MOUNTED: 1291 switch (cmd) { 1292 case Z_UNMOUNT: 1293 if (kernelcall) /* Invalid; can't happen */ 1294 abort(); 1295 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate); 1296 if (rval == 0) { 1297 eventstream_write(Z_EVT_ZONE_HALTED); 1298 (void) sema_post(&scratch_sem); 1299 } 1300 break; 1301 default: 1302 if (kernelcall) /* Invalid; can't happen */ 1303 abort(); 1304 zerror(zlogp, B_FALSE, "%s operation is invalid " 1305 "for zones in state '%s'", z_cmd_name(cmd), 1306 zone_state_str(zstate)); 1307 rval = -1; 1308 break; 1309 } 1310 break; 1311 1312 case ZONE_STATE_RUNNING: 1313 case ZONE_STATE_SHUTTING_DOWN: 1314 case ZONE_STATE_DOWN: 1315 switch (cmd) { 1316 case Z_READY: 1317 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1318 != 0) 1319 break; 1320 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0) 1321 eventstream_write(Z_EVT_ZONE_READIED); 1322 else 1323 eventstream_write(Z_EVT_ZONE_HALTED); 1324 break; 1325 case Z_BOOT: 1326 /* 1327 * We could have two clients racing to boot this 1328 * zone; the second client loses, but his request 1329 * doesn't fail, since the zone is now in the desired 1330 * state. 1331 */ 1332 zerror(zlogp, B_FALSE, "zone is already booted"); 1333 rval = 0; 1334 break; 1335 case Z_HALT: 1336 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1337 != 0) 1338 break; 1339 eventstream_write(Z_EVT_ZONE_HALTED); 1340 break; 1341 case Z_REBOOT: 1342 (void) strlcpy(boot_args, zargp->bootbuf, 1343 sizeof (boot_args)); 1344 eventstream_write(Z_EVT_ZONE_REBOOTING); 1345 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1346 != 0) { 1347 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1348 boot_args[0] = '\0'; 1349 break; 1350 } 1351 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1352 != 0) { 1353 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1354 boot_args[0] = '\0'; 1355 break; 1356 } 1357 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1358 audit_put_record(zlogp, uc, rval, "reboot"); 1359 if (rval != 0) { 1360 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1361 zstate); 1362 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1363 } 1364 boot_args[0] = '\0'; 1365 break; 1366 case Z_NOTE_UNINSTALLING: 1367 case Z_MOUNT: 1368 case Z_UNMOUNT: 1369 zerror(zlogp, B_FALSE, "%s operation is invalid " 1370 "for zones in state '%s'", z_cmd_name(cmd), 1371 zone_state_str(zstate)); 1372 rval = -1; 1373 break; 1374 } 1375 break; 1376 default: 1377 abort(); 1378 } 1379 1380 /* 1381 * Because the state of the zone may have changed, we make sure 1382 * to wake the console poller, which is in charge of initiating 1383 * the shutdown procedure as necessary. 1384 */ 1385 eventstream_write(Z_EVT_NULL); 1386 1387 out: 1388 (void) mutex_unlock(&lock); 1389 if (kernelcall) { 1390 rvalp = NULL; 1391 rlen = 0; 1392 } else { 1393 rvalp->rval = rval; 1394 } 1395 if (uc != NULL) 1396 ucred_free(uc); 1397 (void) door_return((char *)rvalp, rlen, NULL, 0); 1398 thr_exit(NULL); 1399 } 1400 1401 static int 1402 setup_door(zlog_t *zlogp) 1403 { 1404 if ((zone_door = door_create(server, NULL, 1405 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1406 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1407 return (-1); 1408 } 1409 (void) fdetach(zone_door_path); 1410 1411 if (fattach(zone_door, zone_door_path) != 0) { 1412 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1413 (void) door_revoke(zone_door); 1414 (void) fdetach(zone_door_path); 1415 zone_door = -1; 1416 return (-1); 1417 } 1418 return (0); 1419 } 1420 1421 /* 1422 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1423 * is where zoneadmd itself will check to see that another instance of 1424 * zoneadmd isn't already controlling this zone. 1425 * 1426 * The idea here is that we want to open the path to which we will 1427 * attach our door, lock it, and then make sure that no-one has beat us 1428 * to fattach(3c)ing onto it. 1429 * 1430 * fattach(3c) is really a mount, so there are actually two possible 1431 * vnodes we could be dealing with. Our strategy is as follows: 1432 * 1433 * - If the file we opened is a regular file (common case): 1434 * There is no fattach(3c)ed door, so we have a chance of becoming 1435 * the managing zoneadmd. We attempt to lock the file: if it is 1436 * already locked, that means someone else raced us here, so we 1437 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1438 * that beat us to it. 1439 * 1440 * - If the file we opened is a namefs file: 1441 * This means there is already an established door fattach(3c)'ed 1442 * to the rendezvous path. We've lost the race, so we give up. 1443 * Note that in this case we also try to grab the file lock, and 1444 * will succeed in acquiring it since the vnode locked by the 1445 * "winning" zoneadmd was a regular one, and the one we locked was 1446 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1447 * we just return to zoneadm(1m) which knows to retry. 1448 */ 1449 static int 1450 make_daemon_exclusive(zlog_t *zlogp) 1451 { 1452 int doorfd = -1; 1453 int err, ret = -1; 1454 struct stat st; 1455 struct flock flock; 1456 zone_state_t zstate; 1457 1458 top: 1459 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1460 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1461 zonecfg_strerror(err)); 1462 goto out; 1463 } 1464 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1465 S_IREAD|S_IWRITE)) < 0) { 1466 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1467 goto out; 1468 } 1469 if (fstat(doorfd, &st) < 0) { 1470 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1471 goto out; 1472 } 1473 /* 1474 * Lock the file to synchronize with other zoneadmd 1475 */ 1476 flock.l_type = F_WRLCK; 1477 flock.l_whence = SEEK_SET; 1478 flock.l_start = (off_t)0; 1479 flock.l_len = (off_t)0; 1480 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1481 /* 1482 * Someone else raced us here and grabbed the lock file 1483 * first. A warning here is inappropriate since nothing 1484 * went wrong. 1485 */ 1486 goto out; 1487 } 1488 1489 if (strcmp(st.st_fstype, "namefs") == 0) { 1490 struct door_info info; 1491 1492 /* 1493 * There is already something fattach()'ed to this file. 1494 * Lets see what the door is up to. 1495 */ 1496 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1497 /* 1498 * Another zoneadmd process seems to be in 1499 * control of the situation and we don't need to 1500 * be here. A warning here is inappropriate 1501 * since nothing went wrong. 1502 * 1503 * If the door has been revoked, the zoneadmd 1504 * process currently managing the zone is going 1505 * away. We'll return control to zoneadm(1m) 1506 * which will try again (by which time zoneadmd 1507 * will hopefully have exited). 1508 */ 1509 goto out; 1510 } 1511 1512 /* 1513 * If we got this far, there's a fattach(3c)'ed door 1514 * that belongs to a process that has exited, which can 1515 * happen if the previous zoneadmd died unexpectedly. 1516 * 1517 * Let user know that something is amiss, but that we can 1518 * recover; if the zone is in the installed state, then don't 1519 * message, since having a running zoneadmd isn't really 1520 * expected/needed. We want to keep occurences of this message 1521 * limited to times when zoneadmd is picking back up from a 1522 * zoneadmd that died while the zone was in some non-trivial 1523 * state. 1524 */ 1525 if (zstate > ZONE_STATE_INSTALLED) { 1526 zerror(zlogp, B_FALSE, 1527 "zone '%s': WARNING: zone is in state '%s', but " 1528 "zoneadmd does not appear to be available; " 1529 "restarted zoneadmd to recover.", 1530 zone_name, zone_state_str(zstate)); 1531 } 1532 1533 (void) fdetach(zone_door_path); 1534 (void) close(doorfd); 1535 goto top; 1536 } 1537 ret = 0; 1538 out: 1539 (void) close(doorfd); 1540 return (ret); 1541 } 1542 1543 /* 1544 * Setup the brand's pre and post state change callbacks, as well as the 1545 * query callback, if any of these exist. 1546 */ 1547 static int 1548 brand_callback_init(brand_handle_t bh, char *zone_name) 1549 { 1550 char zpath[MAXPATHLEN]; 1551 1552 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) 1553 return (-1); 1554 1555 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, 1556 sizeof (pre_statechg_hook)); 1557 1558 if (brand_get_prestatechange(bh, zone_name, zpath, 1559 pre_statechg_hook + EXEC_LEN, 1560 sizeof (pre_statechg_hook) - EXEC_LEN) != 0) 1561 return (-1); 1562 1563 if (strlen(pre_statechg_hook) <= EXEC_LEN) 1564 pre_statechg_hook[0] = '\0'; 1565 1566 (void) strlcpy(post_statechg_hook, EXEC_PREFIX, 1567 sizeof (post_statechg_hook)); 1568 1569 if (brand_get_poststatechange(bh, zone_name, zpath, 1570 post_statechg_hook + EXEC_LEN, 1571 sizeof (post_statechg_hook) - EXEC_LEN) != 0) 1572 return (-1); 1573 1574 if (strlen(post_statechg_hook) <= EXEC_LEN) 1575 post_statechg_hook[0] = '\0'; 1576 1577 (void) strlcpy(query_hook, EXEC_PREFIX, 1578 sizeof (query_hook)); 1579 1580 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, 1581 sizeof (query_hook) - EXEC_LEN) != 0) 1582 return (-1); 1583 1584 if (strlen(query_hook) <= EXEC_LEN) 1585 query_hook[0] = '\0'; 1586 1587 return (0); 1588 } 1589 1590 int 1591 main(int argc, char *argv[]) 1592 { 1593 int opt; 1594 zoneid_t zid; 1595 priv_set_t *privset; 1596 zone_state_t zstate; 1597 char parents_locale[MAXPATHLEN]; 1598 brand_handle_t bh; 1599 int err; 1600 1601 pid_t pid; 1602 sigset_t blockset; 1603 sigset_t block_cld; 1604 1605 struct { 1606 sema_t sem; 1607 int status; 1608 zlog_t log; 1609 } *shstate; 1610 size_t shstatelen = getpagesize(); 1611 1612 zlog_t errlog; 1613 zlog_t *zlogp; 1614 1615 int ctfd; 1616 1617 progname = get_execbasename(argv[0]); 1618 1619 /* 1620 * Make sure stderr is unbuffered 1621 */ 1622 (void) setbuffer(stderr, NULL, 0); 1623 1624 /* 1625 * Get out of the way of mounted filesystems, since we will daemonize 1626 * soon. 1627 */ 1628 (void) chdir("/"); 1629 1630 /* 1631 * Use the default system umask per PSARC 1998/110 rather than 1632 * anything that may have been set by the caller. 1633 */ 1634 (void) umask(CMASK); 1635 1636 /* 1637 * Initially we want to use our parent's locale. 1638 */ 1639 (void) setlocale(LC_ALL, ""); 1640 (void) textdomain(TEXT_DOMAIN); 1641 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1642 sizeof (parents_locale)); 1643 1644 /* 1645 * This zlog_t is used for writing to stderr 1646 */ 1647 errlog.logfile = stderr; 1648 errlog.buflen = errlog.loglen = 0; 1649 errlog.buf = errlog.log = NULL; 1650 errlog.locale = parents_locale; 1651 1652 /* 1653 * We start off writing to stderr until we're ready to daemonize. 1654 */ 1655 zlogp = &errlog; 1656 1657 /* 1658 * Process options. 1659 */ 1660 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1661 switch (opt) { 1662 case 'R': 1663 zonecfg_set_root(optarg); 1664 break; 1665 case 'z': 1666 zone_name = optarg; 1667 break; 1668 default: 1669 usage(); 1670 } 1671 } 1672 1673 if (zone_name == NULL) 1674 usage(); 1675 1676 /* 1677 * Because usage() prints directly to stderr, it has gettext() 1678 * wrapping, which depends on the locale. But since zerror() calls 1679 * localize() which tweaks the locale, it is not safe to call zerror() 1680 * until after the last call to usage(). Fortunately, the last call 1681 * to usage() is just above and the first call to zerror() is just 1682 * below. Don't mess this up. 1683 */ 1684 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1685 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1686 GLOBAL_ZONENAME); 1687 return (1); 1688 } 1689 1690 if (zone_get_id(zone_name, &zid) != 0) { 1691 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1692 zonecfg_strerror(Z_NO_ZONE)); 1693 return (1); 1694 } 1695 1696 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1697 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1698 zonecfg_strerror(err)); 1699 return (1); 1700 } 1701 if (zstate < ZONE_STATE_INCOMPLETE) { 1702 zerror(zlogp, B_FALSE, 1703 "cannot manage a zone which is in state '%s'", 1704 zone_state_str(zstate)); 1705 return (1); 1706 } 1707 1708 /* Get a handle to the brand info for this zone */ 1709 if ((zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 1710 != Z_OK) || (bh = brand_open(brand_name)) == NULL) { 1711 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1712 return (1); 1713 } 1714 zone_isnative = brand_is_native(bh); 1715 zone_iscluster = (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0); 1716 zone_islabeled = (strcmp(brand_name, LABELED_BRAND_NAME) == 0); 1717 1718 /* Get state change brand hooks. */ 1719 if (brand_callback_init(bh, zone_name) == -1) { 1720 zerror(zlogp, B_TRUE, 1721 "failed to initialize brand state change hooks"); 1722 brand_close(bh); 1723 return (1); 1724 } 1725 1726 brand_close(bh); 1727 1728 /* 1729 * Check that we have all privileges. It would be nice to pare 1730 * this down, but this is at least a first cut. 1731 */ 1732 if ((privset = priv_allocset()) == NULL) { 1733 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1734 return (1); 1735 } 1736 1737 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1738 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1739 priv_freeset(privset); 1740 return (1); 1741 } 1742 1743 if (priv_isfullset(privset) == B_FALSE) { 1744 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1745 "run this command (all privs required)"); 1746 priv_freeset(privset); 1747 return (1); 1748 } 1749 priv_freeset(privset); 1750 1751 if (mkzonedir(zlogp) != 0) 1752 return (1); 1753 1754 /* 1755 * Pre-fork: setup shared state 1756 */ 1757 if ((shstate = (void *)mmap(NULL, shstatelen, 1758 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1759 MAP_FAILED) { 1760 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1761 return (1); 1762 } 1763 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1764 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1765 (void) munmap((char *)shstate, shstatelen); 1766 return (1); 1767 } 1768 shstate->log.logfile = NULL; 1769 shstate->log.buflen = shstatelen - sizeof (*shstate); 1770 shstate->log.loglen = shstate->log.buflen; 1771 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1772 shstate->log.log = shstate->log.buf; 1773 shstate->log.locale = parents_locale; 1774 shstate->status = -1; 1775 1776 /* 1777 * We need a SIGCHLD handler so the sema_wait() below will wake 1778 * up if the child dies without doing a sema_post(). 1779 */ 1780 (void) sigset(SIGCHLD, sigchld); 1781 /* 1782 * We must mask SIGCHLD until after we've coped with the fork 1783 * sufficiently to deal with it; otherwise we can race and 1784 * receive the signal before pid has been initialized 1785 * (yes, this really happens). 1786 */ 1787 (void) sigemptyset(&block_cld); 1788 (void) sigaddset(&block_cld, SIGCHLD); 1789 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1790 1791 if ((ctfd = init_template()) == -1) { 1792 zerror(zlogp, B_TRUE, "failed to create contract"); 1793 return (1); 1794 } 1795 1796 /* 1797 * Do not let another thread localize a message while we are forking. 1798 */ 1799 (void) mutex_lock(&msglock); 1800 pid = fork(); 1801 (void) mutex_unlock(&msglock); 1802 1803 /* 1804 * In all cases (parent, child, and in the event of an error) we 1805 * don't want to cause creation of contracts on subsequent fork()s. 1806 */ 1807 (void) ct_tmpl_clear(ctfd); 1808 (void) close(ctfd); 1809 1810 if (pid == -1) { 1811 zerror(zlogp, B_TRUE, "could not fork"); 1812 return (1); 1813 1814 } else if (pid > 0) { /* parent */ 1815 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1816 /* 1817 * This marks a window of vulnerability in which we receive 1818 * the SIGCLD before falling into sema_wait (normally we would 1819 * get woken up from sema_wait with EINTR upon receipt of 1820 * SIGCLD). So we may need to use some other scheme like 1821 * sema_posting in the sigcld handler. 1822 * blech 1823 */ 1824 (void) sema_wait(&shstate->sem); 1825 (void) sema_destroy(&shstate->sem); 1826 if (shstate->status != 0) 1827 (void) waitpid(pid, NULL, WNOHANG); 1828 /* 1829 * It's ok if we die with SIGPIPE. It's not like we could have 1830 * done anything about it. 1831 */ 1832 (void) fprintf(stderr, "%s", shstate->log.buf); 1833 _exit(shstate->status == 0 ? 0 : 1); 1834 } 1835 1836 /* 1837 * The child charges on. 1838 */ 1839 (void) sigset(SIGCHLD, SIG_DFL); 1840 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1841 1842 /* 1843 * SIGPIPE can be delivered if we write to a socket for which the 1844 * peer endpoint is gone. That can lead to too-early termination 1845 * of zoneadmd, and that's not good eats. 1846 */ 1847 (void) sigset(SIGPIPE, SIG_IGN); 1848 /* 1849 * Stop using stderr 1850 */ 1851 zlogp = &shstate->log; 1852 1853 /* 1854 * We don't need stdout/stderr from now on. 1855 */ 1856 closefrom(0); 1857 1858 /* 1859 * Initialize the syslog zlog_t. This needs to be done after 1860 * the call to closefrom(). 1861 */ 1862 logsys.buf = logsys.log = NULL; 1863 logsys.buflen = logsys.loglen = 0; 1864 logsys.logfile = NULL; 1865 logsys.locale = DEFAULT_LOCALE; 1866 1867 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1868 1869 /* 1870 * The eventstream is used to publish state changes in the zone 1871 * from the door threads to the console I/O poller. 1872 */ 1873 if (eventstream_init() == -1) { 1874 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1875 goto child_out; 1876 } 1877 1878 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1879 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 1880 1881 /* 1882 * See if another zoneadmd is running for this zone. If not, then we 1883 * can now modify system state. 1884 */ 1885 if (make_daemon_exclusive(zlogp) == -1) 1886 goto child_out; 1887 1888 1889 /* 1890 * Create/join a new session; we need to be careful of what we do with 1891 * the console from now on so we don't end up being the session leader 1892 * for the terminal we're going to be handing out. 1893 */ 1894 (void) setsid(); 1895 1896 /* 1897 * This thread shouldn't be receiving any signals; in particular, 1898 * SIGCHLD should be received by the thread doing the fork(). 1899 */ 1900 (void) sigfillset(&blockset); 1901 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1902 1903 /* 1904 * Setup the console device and get ready to serve the console; 1905 * once this has completed, we're ready to let console clients 1906 * make an attempt to connect (they will block until 1907 * serve_console_sock() below gets called, and any pending 1908 * connection is accept()ed). 1909 */ 1910 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0) 1911 goto child_out; 1912 1913 /* 1914 * Take the lock now, so that when the door server gets going, we 1915 * are guaranteed that it won't take a request until we are sure 1916 * that everything is completely set up. See the child_out: label 1917 * below to see why this matters. 1918 */ 1919 (void) mutex_lock(&lock); 1920 1921 /* Init semaphore for scratch zones. */ 1922 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 1923 zerror(zlogp, B_TRUE, 1924 "failed to initialize semaphore for scratch zone"); 1925 goto child_out; 1926 } 1927 1928 /* open the dladm handle */ 1929 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) { 1930 zerror(zlogp, B_FALSE, "failed to open dladm handle"); 1931 goto child_out; 1932 } 1933 1934 /* 1935 * Note: door setup must occur *after* the console is setup. 1936 * This is so that as zlogin tests the door to see if zoneadmd 1937 * is ready yet, we know that the console will get serviced 1938 * once door_info() indicates that the door is "up". 1939 */ 1940 if (setup_door(zlogp) == -1) 1941 goto child_out; 1942 1943 /* 1944 * Things seem OK so far; tell the parent process that we're done 1945 * with setup tasks. This will cause the parent to exit, signalling 1946 * to zoneadm, zlogin, or whatever forked it that we are ready to 1947 * service requests. 1948 */ 1949 shstate->status = 0; 1950 (void) sema_post(&shstate->sem); 1951 (void) munmap((char *)shstate, shstatelen); 1952 shstate = NULL; 1953 1954 (void) mutex_unlock(&lock); 1955 1956 /* 1957 * zlogp is now invalid, so reset it to the syslog logger. 1958 */ 1959 zlogp = &logsys; 1960 1961 /* 1962 * Now that we are free of any parents, switch to the default locale. 1963 */ 1964 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 1965 1966 /* 1967 * At this point the setup portion of main() is basically done, so 1968 * we reuse this thread to manage the zone console. When 1969 * serve_console() has returned, we are past the point of no return 1970 * in the life of this zoneadmd. 1971 */ 1972 if (zonecfg_in_alt_root()) { 1973 /* 1974 * This is just awful, but mounted scratch zones don't (and 1975 * can't) have consoles. We just wait for unmount instead. 1976 */ 1977 while (sema_wait(&scratch_sem) == EINTR) 1978 ; 1979 } else { 1980 serve_console(zlogp); 1981 assert(in_death_throes); 1982 } 1983 1984 /* 1985 * This is the next-to-last part of the exit interlock. Upon calling 1986 * fdetach(), the door will go unreferenced; once any 1987 * outstanding requests (like the door thread doing Z_HALT) are 1988 * done, the door will get an UNREF notification; when it handles 1989 * the UNREF, the door server will cause the exit. 1990 */ 1991 assert(!MUTEX_HELD(&lock)); 1992 (void) fdetach(zone_door_path); 1993 1994 for (;;) 1995 (void) pause(); 1996 1997 child_out: 1998 assert(pid == 0); 1999 if (shstate != NULL) { 2000 shstate->status = -1; 2001 (void) sema_post(&shstate->sem); 2002 (void) munmap((char *)shstate, shstatelen); 2003 } 2004 2005 /* 2006 * This might trigger an unref notification, but if so, 2007 * we are still holding the lock, so our call to exit will 2008 * ultimately win the race and will publish the right exit 2009 * code. 2010 */ 2011 if (zone_door != -1) { 2012 assert(MUTEX_HELD(&lock)); 2013 (void) door_revoke(zone_door); 2014 (void) fdetach(zone_door_path); 2015 } 2016 2017 if (dld_handle != NULL) 2018 dladm_close(dld_handle); 2019 2020 return (1); /* return from main() forcibly exits an MT process */ 2021 } 2022