1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * zoneadmd manages zones; one zoneadmd process is launched for each 29 * non-global zone on the system. This daemon juggles four jobs: 30 * 31 * - Implement setup and teardown of the zone "virtual platform": mount and 32 * unmount filesystems; create and destroy network interfaces; communicate 33 * with devfsadmd to lay out devices for the zone; instantiate the zone 34 * console device; configure process runtime attributes such as resource 35 * controls, pool bindings, fine-grained privileges. 36 * 37 * - Launch the zone's init(1M) process. 38 * 39 * - Implement a door server; clients (like zoneadm) connect to the door 40 * server and request zone state changes. The kernel is also a client of 41 * this door server. A request to halt or reboot the zone which originates 42 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 43 * 44 * One minor problem is that messages emitted by zoneadmd need to be passed 45 * back to the zoneadm process making the request. These messages need to 46 * be rendered in the client's locale; so, this is passed in as part of the 47 * request. The exception is the kernel upcall to zoneadmd, in which case 48 * messages are syslog'd. 49 * 50 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 51 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 52 * strings which do not need to be translated. 53 * 54 * - Act as a console server for zlogin -C processes; see comments in zcons.c 55 * for more information about the zone console architecture. 56 * 57 * DESIGN NOTES 58 * 59 * Restart: 60 * A chief design constraint of zoneadmd is that it should be restartable in 61 * the case that the administrator kills it off, or it suffers a fatal error, 62 * without the running zone being impacted; this is akin to being able to 63 * reboot the service processor of a server without affecting the OS instance. 64 */ 65 66 #include <sys/param.h> 67 #include <sys/mman.h> 68 #include <sys/types.h> 69 #include <sys/stat.h> 70 #include <sys/sysmacros.h> 71 72 #include <bsm/adt.h> 73 #include <bsm/adt_event.h> 74 75 #include <alloca.h> 76 #include <assert.h> 77 #include <errno.h> 78 #include <door.h> 79 #include <fcntl.h> 80 #include <locale.h> 81 #include <signal.h> 82 #include <stdarg.h> 83 #include <stdio.h> 84 #include <stdlib.h> 85 #include <string.h> 86 #include <strings.h> 87 #include <synch.h> 88 #include <syslog.h> 89 #include <thread.h> 90 #include <unistd.h> 91 #include <wait.h> 92 #include <limits.h> 93 #include <zone.h> 94 #include <libbrand.h> 95 #include <sys/brand.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <sys/contract/process.h> 99 #include <sys/ctfs.h> 100 101 #include <libzonecfg.h> 102 #include "zoneadmd.h" 103 104 static char *progname; 105 char *zone_name; /* zone which we are managing */ 106 char brand_name[MAXNAMELEN]; 107 boolean_t zone_isnative; 108 boolean_t zone_iscluster; 109 static zoneid_t zone_id; 110 dladm_handle_t dld_handle = NULL; 111 112 static char pre_statechg_hook[2 * MAXPATHLEN]; 113 static char post_statechg_hook[2 * MAXPATHLEN]; 114 char query_hook[2 * MAXPATHLEN]; 115 116 zlog_t logsys; 117 118 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 119 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 120 121 static sema_t scratch_sem; /* for scratch zones */ 122 123 static char zone_door_path[MAXPATHLEN]; 124 static int zone_door = -1; 125 126 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 127 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 128 129 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 130 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 131 #endif 132 133 #define DEFAULT_LOCALE "C" 134 135 static const char * 136 z_cmd_name(zone_cmd_t zcmd) 137 { 138 /* This list needs to match the enum in sys/zone.h */ 139 static const char *zcmdstr[] = { 140 "ready", "boot", "forceboot", "reboot", "halt", 141 "note_uninstalling", "mount", "forcemount", "unmount" 142 }; 143 144 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 145 return ("unknown"); 146 else 147 return (zcmdstr[(int)zcmd]); 148 } 149 150 static char * 151 get_execbasename(char *execfullname) 152 { 153 char *last_slash, *execbasename; 154 155 /* guard against '/' at end of command invocation */ 156 for (;;) { 157 last_slash = strrchr(execfullname, '/'); 158 if (last_slash == NULL) { 159 execbasename = execfullname; 160 break; 161 } else { 162 execbasename = last_slash + 1; 163 if (*execbasename == '\0') { 164 *last_slash = '\0'; 165 continue; 166 } 167 break; 168 } 169 } 170 return (execbasename); 171 } 172 173 static void 174 usage(void) 175 { 176 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 177 (void) fprintf(stderr, 178 gettext("\tNote: %s should not be run directly.\n"), progname); 179 exit(2); 180 } 181 182 /* ARGSUSED */ 183 static void 184 sigchld(int sig) 185 { 186 } 187 188 char * 189 localize_msg(char *locale, const char *msg) 190 { 191 char *out; 192 193 (void) mutex_lock(&msglock); 194 (void) setlocale(LC_MESSAGES, locale); 195 out = gettext(msg); 196 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 197 (void) mutex_unlock(&msglock); 198 return (out); 199 } 200 201 /* PRINTFLIKE3 */ 202 void 203 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 204 { 205 va_list alist; 206 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 207 char *bp; 208 int saved_errno = errno; 209 210 if (zlogp == NULL) 211 return; 212 if (zlogp == &logsys) 213 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 214 zone_name); 215 else 216 buf[0] = '\0'; 217 bp = &(buf[strlen(buf)]); 218 219 /* 220 * In theory, the locale pointer should be set to either "C" or a 221 * char array, so it should never be NULL 222 */ 223 assert(zlogp->locale != NULL); 224 /* Locale is per process, but we are multi-threaded... */ 225 fmt = localize_msg(zlogp->locale, fmt); 226 227 va_start(alist, fmt); 228 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 229 va_end(alist); 230 bp = &(buf[strlen(buf)]); 231 if (use_strerror) 232 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 233 strerror(saved_errno)); 234 if (zlogp == &logsys) { 235 (void) syslog(LOG_ERR, "%s", buf); 236 } else if (zlogp->logfile != NULL) { 237 (void) fprintf(zlogp->logfile, "%s\n", buf); 238 } else { 239 size_t buflen; 240 size_t copylen; 241 242 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 243 copylen = MIN(buflen, zlogp->loglen); 244 zlogp->log += copylen; 245 zlogp->loglen -= copylen; 246 } 247 } 248 249 /* 250 * Emit a warning for any boot arguments which are unrecognized. Since 251 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 252 * put the arguments into an argv style array, use getopt to process them, 253 * and put the resultant argument string back into outargs. 254 * 255 * During the filtering, we pull out any arguments which are truly "boot" 256 * arguments, leaving only those which are to be passed intact to the 257 * progenitor process. The one we support at the moment is -i, which 258 * indicates to the kernel which program should be launched as 'init'. 259 * 260 * A return of Z_INVAL indicates specifically that the arguments are 261 * not valid; this is a non-fatal error. Except for Z_OK, all other return 262 * values are treated as fatal. 263 */ 264 static int 265 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 266 char *init_file, char *badarg) 267 { 268 int argc = 0, argc_save; 269 int i; 270 int err; 271 char *arg, *lasts, **argv = NULL, **argv_save; 272 char zonecfg_args[BOOTARGS_MAX]; 273 char scratchargs[BOOTARGS_MAX], *sargs; 274 char c; 275 276 bzero(outargs, BOOTARGS_MAX); 277 bzero(badarg, BOOTARGS_MAX); 278 279 /* 280 * If the user didn't specify transient boot arguments, check 281 * to see if there were any specified in the zone configuration, 282 * and use them if applicable. 283 */ 284 if (inargs == NULL || inargs[0] == '\0') { 285 zone_dochandle_t handle; 286 if ((handle = zonecfg_init_handle()) == NULL) { 287 zerror(zlogp, B_TRUE, 288 "getting zone configuration handle"); 289 return (Z_BAD_HANDLE); 290 } 291 err = zonecfg_get_snapshot_handle(zone_name, handle); 292 if (err != Z_OK) { 293 zerror(zlogp, B_FALSE, 294 "invalid configuration snapshot"); 295 zonecfg_fini_handle(handle); 296 return (Z_BAD_HANDLE); 297 } 298 299 bzero(zonecfg_args, sizeof (zonecfg_args)); 300 (void) zonecfg_get_bootargs(handle, zonecfg_args, 301 sizeof (zonecfg_args)); 302 inargs = zonecfg_args; 303 zonecfg_fini_handle(handle); 304 } 305 306 if (strlen(inargs) >= BOOTARGS_MAX) { 307 zerror(zlogp, B_FALSE, "boot argument string too long"); 308 return (Z_INVAL); 309 } 310 311 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 312 sargs = scratchargs; 313 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 314 sargs = NULL; 315 argc++; 316 } 317 318 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 319 zerror(zlogp, B_FALSE, "memory allocation failed"); 320 return (Z_NOMEM); 321 } 322 323 argv_save = argv; 324 argc_save = argc; 325 326 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 327 sargs = scratchargs; 328 i = 0; 329 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 330 sargs = NULL; 331 if ((argv[i] = strdup(arg)) == NULL) { 332 err = Z_NOMEM; 333 zerror(zlogp, B_FALSE, "memory allocation failed"); 334 goto done; 335 } 336 i++; 337 } 338 339 /* 340 * We preserve compatibility with the Solaris system boot behavior, 341 * which allows: 342 * 343 * # reboot kernel/unix -s -m verbose 344 * 345 * In this example, kernel/unix tells the booter what file to 346 * boot. We don't want reboot in a zone to be gratuitously different, 347 * so we silently ignore the boot file, if necessary. 348 */ 349 if (argv[0] == NULL) 350 goto done; 351 352 assert(argv[0][0] != ' '); 353 assert(argv[0][0] != '\t'); 354 355 if (argv[0][0] != '-' && argv[0][0] != '\0') { 356 argv = &argv[1]; 357 argc--; 358 } 359 360 optind = 0; 361 opterr = 0; 362 err = Z_OK; 363 while ((c = getopt(argc, argv, "fi:m:s")) != -1) { 364 switch (c) { 365 case 'i': 366 /* 367 * -i is handled by the runtime and is not passed 368 * along to userland 369 */ 370 (void) strlcpy(init_file, optarg, MAXPATHLEN); 371 break; 372 case 'f': 373 /* This has already been processed by zoneadm */ 374 break; 375 case 'm': 376 case 's': 377 /* These pass through unmolested */ 378 (void) snprintf(outargs, BOOTARGS_MAX, 379 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 380 break; 381 case '?': 382 /* 383 * We warn about unknown arguments but pass them 384 * along anyway-- if someone wants to develop their 385 * own init replacement, they can pass it whatever 386 * args they want. 387 */ 388 err = Z_INVAL; 389 (void) snprintf(outargs, BOOTARGS_MAX, 390 "%s -%c", outargs, optopt); 391 (void) snprintf(badarg, BOOTARGS_MAX, 392 "%s -%c", badarg, optopt); 393 break; 394 } 395 } 396 397 /* 398 * For Solaris Zones we warn about and discard non-option arguments. 399 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 400 * to the kernel, we concat up all the other remaining boot args. 401 * and warn on them as a group. 402 */ 403 if (optind < argc) { 404 err = Z_INVAL; 405 while (optind < argc) { 406 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 407 badarg, strlen(badarg) > 0 ? " " : "", 408 argv[optind]); 409 optind++; 410 } 411 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 412 "arguments `%s'.", badarg); 413 } 414 415 done: 416 for (i = 0; i < argc_save; i++) { 417 if (argv_save[i] != NULL) 418 free(argv_save[i]); 419 } 420 free(argv_save); 421 return (err); 422 } 423 424 425 static int 426 mkzonedir(zlog_t *zlogp) 427 { 428 struct stat st; 429 /* 430 * We must create and lock everyone but root out of ZONES_TMPDIR 431 * since anyone can open any UNIX domain socket, regardless of 432 * its file system permissions. Sigh... 433 */ 434 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 435 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 436 return (-1); 437 } 438 /* paranoia */ 439 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 440 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 441 return (-1); 442 } 443 (void) chmod(ZONES_TMPDIR, S_IRWXU); 444 return (0); 445 } 446 447 /* 448 * Run the brand's pre-state change callback, if it exists. 449 */ 450 static int 451 brand_prestatechg(zlog_t *zlogp, int state, int cmd) 452 { 453 char cmdbuf[2 * MAXPATHLEN]; 454 455 if (pre_statechg_hook[0] == '\0') 456 return (0); 457 458 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d", pre_statechg_hook, 459 state, cmd) > sizeof (cmdbuf)) 460 return (-1); 461 462 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 463 return (-1); 464 465 return (0); 466 } 467 468 /* 469 * Run the brand's post-state change callback, if it exists. 470 */ 471 static int 472 brand_poststatechg(zlog_t *zlogp, int state, int cmd) 473 { 474 char cmdbuf[2 * MAXPATHLEN]; 475 476 if (post_statechg_hook[0] == '\0') 477 return (0); 478 479 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d", post_statechg_hook, 480 state, cmd) > sizeof (cmdbuf)) 481 return (-1); 482 483 if (do_subproc(zlogp, cmdbuf, NULL) != 0) 484 return (-1); 485 486 return (0); 487 } 488 489 /* 490 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 491 * 'true' if this is being invoked as part of the processing for the "mount" 492 * subcommand. 493 */ 494 static int 495 zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate) 496 { 497 int err; 498 499 if (brand_prestatechg(zlogp, zstate, Z_READY) != 0) 500 return (-1); 501 502 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 503 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 504 zonecfg_strerror(err)); 505 return (-1); 506 } 507 508 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 509 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 510 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 511 zonecfg_strerror(err)); 512 return (-1); 513 } 514 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 515 bringup_failure_recovery = B_TRUE; 516 (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE); 517 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 518 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 519 zonecfg_strerror(err)); 520 return (-1); 521 } 522 523 if (brand_poststatechg(zlogp, zstate, Z_READY) != 0) 524 return (-1); 525 526 return (0); 527 } 528 529 int 530 init_template(void) 531 { 532 int fd; 533 int err = 0; 534 535 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 536 if (fd == -1) 537 return (-1); 538 539 /* 540 * For now, zoneadmd doesn't do anything with the contract. 541 * Deliver no events, don't inherit, and allow it to be orphaned. 542 */ 543 err |= ct_tmpl_set_critical(fd, 0); 544 err |= ct_tmpl_set_informative(fd, 0); 545 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 546 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 547 if (err || ct_tmpl_activate(fd)) { 548 (void) close(fd); 549 return (-1); 550 } 551 552 return (fd); 553 } 554 555 typedef struct fs_callback { 556 zlog_t *zlogp; 557 zoneid_t zoneid; 558 boolean_t mount_cmd; 559 } fs_callback_t; 560 561 static int 562 mount_early_fs(void *data, const char *spec, const char *dir, 563 const char *fstype, const char *opt) 564 { 565 zlog_t *zlogp = ((fs_callback_t *)data)->zlogp; 566 zoneid_t zoneid = ((fs_callback_t *)data)->zoneid; 567 boolean_t mount_cmd = ((fs_callback_t *)data)->mount_cmd; 568 char rootpath[MAXPATHLEN]; 569 pid_t child; 570 int child_status; 571 int tmpl_fd; 572 int rv; 573 ctid_t ct; 574 575 /* determine the zone rootpath */ 576 if (mount_cmd) { 577 char zonepath[MAXPATHLEN]; 578 char luroot[MAXPATHLEN]; 579 580 if (zone_get_zonepath(zone_name, 581 zonepath, sizeof (zonepath)) != Z_OK) { 582 zerror(zlogp, B_FALSE, "unable to determine zone path"); 583 return (-1); 584 } 585 586 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 587 resolve_lofs(zlogp, luroot, sizeof (luroot)); 588 (void) strlcpy(rootpath, luroot, sizeof (rootpath)); 589 } else { 590 if (zone_get_rootpath(zone_name, 591 rootpath, sizeof (rootpath)) != Z_OK) { 592 zerror(zlogp, B_FALSE, "unable to determine zone root"); 593 return (-1); 594 } 595 } 596 597 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, fstype)) < 0) { 598 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 599 rootpath, dir); 600 return (-1); 601 } else if (rv > 0) { 602 /* The mount point path doesn't exist, create it now. */ 603 if (make_one_dir(zlogp, rootpath, dir, 604 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 605 DEFAULT_DIR_GROUP) != 0) { 606 zerror(zlogp, B_FALSE, "failed to create mount point"); 607 return (-1); 608 } 609 610 /* 611 * Now this might seem weird, but we need to invoke 612 * valid_mount_path() again. Why? Because it checks 613 * to make sure that the mount point path is canonical, 614 * which it can only do if the path exists, so now that 615 * we've created the path we have to verify it again. 616 */ 617 if ((rv = valid_mount_path(zlogp, rootpath, spec, dir, 618 fstype)) < 0) { 619 zerror(zlogp, B_FALSE, 620 "%s%s is not a valid mount point", rootpath, dir); 621 return (-1); 622 } 623 } 624 625 if ((tmpl_fd = init_template()) == -1) { 626 zerror(zlogp, B_TRUE, "failed to create contract"); 627 return (-1); 628 } 629 630 if ((child = fork()) == -1) { 631 (void) ct_tmpl_clear(tmpl_fd); 632 (void) close(tmpl_fd); 633 zerror(zlogp, B_TRUE, "failed to fork"); 634 return (-1); 635 636 } else if (child == 0) { /* child */ 637 char opt_buf[MAX_MNTOPT_STR]; 638 int optlen = 0; 639 int mflag = MS_DATA; 640 641 (void) ct_tmpl_clear(tmpl_fd); 642 /* 643 * Even though there are no procs running in the zone, we 644 * do this for paranoia's sake. 645 */ 646 (void) closefrom(0); 647 648 if (zone_enter(zoneid) == -1) { 649 _exit(errno); 650 } 651 if (opt != NULL) { 652 /* 653 * The mount() system call is incredibly annoying. 654 * If options are specified, we need to copy them 655 * into a temporary buffer since the mount() system 656 * call will overwrite the options string. It will 657 * also fail if the new option string it wants to 658 * write is bigger than the one we passed in, so 659 * you must pass in a buffer of the maximum possible 660 * option string length. sigh. 661 */ 662 (void) strlcpy(opt_buf, opt, sizeof (opt_buf)); 663 opt = opt_buf; 664 optlen = MAX_MNTOPT_STR; 665 mflag = MS_OPTIONSTR; 666 } 667 if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) 668 _exit(errno); 669 _exit(0); 670 } 671 672 /* parent */ 673 if (contract_latest(&ct) == -1) 674 ct = -1; 675 (void) ct_tmpl_clear(tmpl_fd); 676 (void) close(tmpl_fd); 677 if (waitpid(child, &child_status, 0) != child) { 678 /* unexpected: we must have been signalled */ 679 (void) contract_abandon_id(ct); 680 return (-1); 681 } 682 (void) contract_abandon_id(ct); 683 if (WEXITSTATUS(child_status) != 0) { 684 errno = WEXITSTATUS(child_status); 685 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 686 return (-1); 687 } 688 689 return (0); 690 } 691 692 /* 693 * If retstr is not NULL, the output of the subproc is returned in the str, 694 * otherwise it is output using zerror(). Any memory allocated for retstr 695 * should be freed by the caller. 696 */ 697 int 698 do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) 699 { 700 char buf[1024]; /* arbitrary large amount */ 701 char *inbuf; 702 FILE *file; 703 int status; 704 int rd_cnt; 705 706 if (retstr != NULL) { 707 if ((*retstr = malloc(1024)) == NULL) { 708 zerror(zlogp, B_FALSE, "out of memory"); 709 return (-1); 710 } 711 inbuf = *retstr; 712 rd_cnt = 0; 713 } else { 714 inbuf = buf; 715 } 716 717 file = popen(cmdbuf, "r"); 718 if (file == NULL) { 719 zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); 720 return (-1); 721 } 722 723 while (fgets(inbuf, 1024, file) != NULL) { 724 if (retstr == NULL && zlogp != &logsys) { 725 zerror(zlogp, B_FALSE, "%s", inbuf); 726 } else { 727 char *p; 728 729 rd_cnt += 1024 - 1; 730 if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) { 731 zerror(zlogp, B_FALSE, "out of memory"); 732 (void) pclose(file); 733 return (-1); 734 } 735 736 *retstr = p; 737 inbuf = *retstr + rd_cnt; 738 } 739 } 740 status = pclose(file); 741 742 if (WIFSIGNALED(status)) { 743 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 744 "signal %d", cmdbuf, WTERMSIG(status)); 745 return (-1); 746 } 747 assert(WIFEXITED(status)); 748 if (WEXITSTATUS(status) == ZEXIT_EXEC) { 749 zerror(zlogp, B_FALSE, "failed to exec %s", cmdbuf); 750 return (-1); 751 } 752 return (WEXITSTATUS(status)); 753 } 754 755 static int 756 zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) 757 { 758 zoneid_t zoneid; 759 struct stat st; 760 char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 761 char nbootargs[BOOTARGS_MAX]; 762 char cmdbuf[MAXPATHLEN]; 763 fs_callback_t cb; 764 brand_handle_t bh; 765 int err; 766 767 if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) 768 return (-1); 769 770 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 771 zerror(zlogp, B_TRUE, "unable to get zoneid"); 772 return (-1); 773 } 774 775 cb.zlogp = zlogp; 776 cb.zoneid = zoneid; 777 cb.mount_cmd = B_FALSE; 778 779 /* Get a handle to the brand info for this zone */ 780 if ((bh = brand_open(brand_name)) == NULL) { 781 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 782 return (-1); 783 } 784 785 /* 786 * Get the list of filesystems to mount from the brand 787 * configuration. These mounts are done via a thread that will 788 * enter the zone, so they are done from within the context of the 789 * zone. 790 */ 791 if (brand_platform_iter_mounts(bh, mount_early_fs, &cb) != 0) { 792 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 793 brand_close(bh); 794 return (-1); 795 } 796 797 /* 798 * Get the brand's boot callback if it exists. 799 */ 800 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 801 zerror(zlogp, B_FALSE, "unable to determine zone path"); 802 brand_close(bh); 803 return (-1); 804 } 805 (void) strcpy(cmdbuf, EXEC_PREFIX); 806 if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 807 sizeof (cmdbuf) - EXEC_LEN) != 0) { 808 zerror(zlogp, B_FALSE, 809 "unable to determine branded zone's boot callback"); 810 brand_close(bh); 811 return (-1); 812 } 813 814 /* Get the path for this zone's init(1M) (or equivalent) process. */ 815 if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { 816 zerror(zlogp, B_FALSE, 817 "unable to determine zone's init(1M) location"); 818 brand_close(bh); 819 return (-1); 820 } 821 822 brand_close(bh); 823 824 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 825 bad_boot_arg); 826 if (err == Z_INVAL) 827 eventstream_write(Z_EVT_ZONE_BADARGS); 828 else if (err != Z_OK) 829 return (-1); 830 831 assert(init_file[0] != '\0'); 832 833 /* Try to anticipate possible problems: Make sure init is executable. */ 834 if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 835 zerror(zlogp, B_FALSE, "unable to determine zone root"); 836 return (-1); 837 } 838 839 (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); 840 841 if (stat(initpath, &st) == -1) { 842 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 843 return (-1); 844 } 845 846 if ((st.st_mode & S_IXUSR) == 0) { 847 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 848 return (-1); 849 } 850 851 /* 852 * If there is a brand 'boot' callback, execute it now to give the 853 * brand one last chance to do any additional setup before the zone 854 * is booted. 855 */ 856 if ((strlen(cmdbuf) > EXEC_LEN) && 857 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { 858 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 859 return (-1); 860 } 861 862 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 863 zerror(zlogp, B_TRUE, "could not set zone boot file"); 864 return (-1); 865 } 866 867 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 868 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 869 return (-1); 870 } 871 872 if (zone_boot(zoneid) == -1) { 873 zerror(zlogp, B_TRUE, "unable to boot zone"); 874 return (-1); 875 } 876 877 if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0) 878 return (-1); 879 880 return (0); 881 } 882 883 static int 884 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate) 885 { 886 int err; 887 888 if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0) 889 return (-1); 890 891 if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { 892 if (!bringup_failure_recovery) 893 zerror(zlogp, B_FALSE, "unable to destroy zone"); 894 return (-1); 895 } 896 897 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 898 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 899 zonecfg_strerror(err)); 900 901 if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0) 902 return (-1); 903 904 return (0); 905 } 906 907 /* 908 * Generate AUE_zone_state for a command that boots a zone. 909 */ 910 static void 911 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 912 char *new_state) 913 { 914 adt_session_data_t *ah; 915 adt_event_data_t *event; 916 int pass_fail, fail_reason; 917 918 if (!adt_audit_enabled()) 919 return; 920 921 if (return_val == 0) { 922 pass_fail = ADT_SUCCESS; 923 fail_reason = ADT_SUCCESS; 924 } else { 925 pass_fail = ADT_FAILURE; 926 fail_reason = ADT_FAIL_VALUE_PROGRAM; 927 } 928 929 if (adt_start_session(&ah, NULL, 0)) { 930 zerror(zlogp, B_TRUE, gettext("audit failure.")); 931 return; 932 } 933 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 934 zerror(zlogp, B_TRUE, gettext("audit failure.")); 935 (void) adt_end_session(ah); 936 return; 937 } 938 939 event = adt_alloc_event(ah, ADT_zone_state); 940 if (event == NULL) { 941 zerror(zlogp, B_TRUE, gettext("audit failure.")); 942 (void) adt_end_session(ah); 943 return; 944 } 945 event->adt_zone_state.zonename = zone_name; 946 event->adt_zone_state.new_state = new_state; 947 948 if (adt_put_event(event, pass_fail, fail_reason)) 949 zerror(zlogp, B_TRUE, gettext("audit failure.")); 950 951 adt_free_event(event); 952 953 (void) adt_end_session(ah); 954 } 955 956 /* 957 * The main routine for the door server that deals with zone state transitions. 958 */ 959 /* ARGSUSED */ 960 static void 961 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 962 uint_t n_desc) 963 { 964 ucred_t *uc = NULL; 965 const priv_set_t *eset; 966 967 zone_state_t zstate; 968 zone_cmd_t cmd; 969 zone_cmd_arg_t *zargp; 970 971 boolean_t kernelcall; 972 973 int rval = -1; 974 uint64_t uniqid; 975 zoneid_t zoneid = -1; 976 zlog_t zlog; 977 zlog_t *zlogp; 978 zone_cmd_rval_t *rvalp; 979 size_t rlen = getpagesize(); /* conservative */ 980 fs_callback_t cb; 981 brand_handle_t bh; 982 983 /* LINTED E_BAD_PTR_CAST_ALIGN */ 984 zargp = (zone_cmd_arg_t *)args; 985 986 /* 987 * When we get the door unref message, we've fdetach'd the door, and 988 * it is time for us to shut down zoneadmd. 989 */ 990 if (zargp == DOOR_UNREF_DATA) { 991 /* 992 * See comment at end of main() for info on the last rites. 993 */ 994 exit(0); 995 } 996 997 if (zargp == NULL) { 998 (void) door_return(NULL, 0, 0, 0); 999 } 1000 1001 rvalp = alloca(rlen); 1002 bzero(rvalp, rlen); 1003 zlog.logfile = NULL; 1004 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 1005 zlog.buf = rvalp->errbuf; 1006 zlog.log = zlog.buf; 1007 /* defer initialization of zlog.locale until after credential check */ 1008 zlogp = &zlog; 1009 1010 if (alen != sizeof (zone_cmd_arg_t)) { 1011 /* 1012 * This really shouldn't be happening. 1013 */ 1014 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 1015 "unexpected (expected %d bytes)", alen, 1016 sizeof (zone_cmd_arg_t)); 1017 goto out; 1018 } 1019 cmd = zargp->cmd; 1020 1021 if (door_ucred(&uc) != 0) { 1022 zerror(&logsys, B_TRUE, "door_ucred"); 1023 goto out; 1024 } 1025 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 1026 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 1027 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 1028 ucred_geteuid(uc) != 0)) { 1029 zerror(&logsys, B_FALSE, "insufficient privileges"); 1030 goto out; 1031 } 1032 1033 kernelcall = ucred_getpid(uc) == 0; 1034 1035 /* 1036 * This is safe because we only use a zlog_t throughout the 1037 * duration of a door call; i.e., by the time the pointer 1038 * might become invalid, the door call would be over. 1039 */ 1040 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 1041 1042 (void) mutex_lock(&lock); 1043 1044 /* 1045 * Once we start to really die off, we don't want more connections. 1046 */ 1047 if (in_death_throes) { 1048 (void) mutex_unlock(&lock); 1049 ucred_free(uc); 1050 (void) door_return(NULL, 0, 0, 0); 1051 thr_exit(NULL); 1052 } 1053 1054 /* 1055 * Check for validity of command. 1056 */ 1057 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_FORCEBOOT && 1058 cmd != Z_REBOOT && cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && 1059 cmd != Z_MOUNT && cmd != Z_FORCEMOUNT && cmd != Z_UNMOUNT) { 1060 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 1061 goto out; 1062 } 1063 1064 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 1065 /* 1066 * Can't happen 1067 */ 1068 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 1069 cmd); 1070 goto out; 1071 } 1072 /* 1073 * We ignore the possibility of someone calling zone_create(2) 1074 * explicitly; all requests must come through zoneadmd. 1075 */ 1076 if (zone_get_state(zone_name, &zstate) != Z_OK) { 1077 /* 1078 * Something terribly wrong happened 1079 */ 1080 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 1081 goto out; 1082 } 1083 1084 if (kernelcall) { 1085 /* 1086 * Kernel-initiated requests may lose their validity if the 1087 * zone_t the kernel was referring to has gone away. 1088 */ 1089 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 1090 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 1091 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 1092 /* 1093 * We're not talking about the same zone. The request 1094 * must have arrived too late. Return error. 1095 */ 1096 rval = -1; 1097 goto out; 1098 } 1099 zlogp = &logsys; /* Log errors to syslog */ 1100 } 1101 1102 /* 1103 * If we are being asked to forcibly mount or boot a zone, we 1104 * pretend that an INCOMPLETE zone is actually INSTALLED. 1105 */ 1106 if (zstate == ZONE_STATE_INCOMPLETE && 1107 (cmd == Z_FORCEBOOT || cmd == Z_FORCEMOUNT)) 1108 zstate = ZONE_STATE_INSTALLED; 1109 1110 switch (zstate) { 1111 case ZONE_STATE_CONFIGURED: 1112 case ZONE_STATE_INCOMPLETE: 1113 /* 1114 * Not our area of expertise; we just print a nice message 1115 * and die off. 1116 */ 1117 zerror(zlogp, B_FALSE, 1118 "%s operation is invalid for zones in state '%s'", 1119 z_cmd_name(cmd), zone_state_str(zstate)); 1120 break; 1121 1122 case ZONE_STATE_INSTALLED: 1123 switch (cmd) { 1124 case Z_READY: 1125 rval = zone_ready(zlogp, Z_MNT_BOOT, zstate); 1126 if (rval == 0) 1127 eventstream_write(Z_EVT_ZONE_READIED); 1128 break; 1129 case Z_BOOT: 1130 case Z_FORCEBOOT: 1131 eventstream_write(Z_EVT_ZONE_BOOTING); 1132 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1133 == 0) { 1134 rval = zone_bootup(zlogp, zargp->bootbuf, 1135 zstate); 1136 } 1137 audit_put_record(zlogp, uc, rval, "boot"); 1138 if (rval != 0) { 1139 bringup_failure_recovery = B_TRUE; 1140 (void) zone_halt(zlogp, B_FALSE, B_FALSE, 1141 zstate); 1142 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1143 } 1144 break; 1145 case Z_HALT: 1146 if (kernelcall) /* Invalid; can't happen */ 1147 abort(); 1148 /* 1149 * We could have two clients racing to halt this 1150 * zone; the second client loses, but his request 1151 * doesn't fail, since the zone is now in the desired 1152 * state. 1153 */ 1154 zerror(zlogp, B_FALSE, "zone is already halted"); 1155 rval = 0; 1156 break; 1157 case Z_REBOOT: 1158 if (kernelcall) /* Invalid; can't happen */ 1159 abort(); 1160 zerror(zlogp, B_FALSE, "%s operation is invalid " 1161 "for zones in state '%s'", z_cmd_name(cmd), 1162 zone_state_str(zstate)); 1163 rval = -1; 1164 break; 1165 case Z_NOTE_UNINSTALLING: 1166 if (kernelcall) /* Invalid; can't happen */ 1167 abort(); 1168 /* 1169 * Tell the console to print out a message about this. 1170 * Once it does, we will be in_death_throes. 1171 */ 1172 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 1173 break; 1174 case Z_MOUNT: 1175 case Z_FORCEMOUNT: 1176 if (kernelcall) /* Invalid; can't happen */ 1177 abort(); 1178 if (!zone_isnative && !zone_iscluster) { 1179 /* 1180 * -U mounts the zone without lofs mounting 1181 * zone file systems back into the scratch 1182 * zone. This is required when mounting 1183 * non-native branded zones. 1184 */ 1185 (void) strlcpy(zargp->bootbuf, "-U", 1186 BOOTARGS_MAX); 1187 } 1188 1189 rval = zone_ready(zlogp, 1190 strcmp(zargp->bootbuf, "-U") == 0 ? 1191 Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate); 1192 if (rval != 0) 1193 break; 1194 1195 eventstream_write(Z_EVT_ZONE_READIED); 1196 1197 /* 1198 * Get a handle to the native brand info. 1199 * We must always use the native brand file system 1200 * list when mounting the zone. 1201 */ 1202 if ((bh = brand_open(NATIVE_BRAND_NAME)) == NULL) { 1203 rval = -1; 1204 break; 1205 } 1206 1207 /* 1208 * Get the list of filesystems to mount from 1209 * the brand configuration. These mounts are done 1210 * via a thread that will enter the zone, so they 1211 * are done from within the context of the zone. 1212 */ 1213 cb.zlogp = zlogp; 1214 cb.zoneid = zone_id; 1215 cb.mount_cmd = B_TRUE; 1216 rval = brand_platform_iter_mounts(bh, 1217 mount_early_fs, &cb); 1218 1219 brand_close(bh); 1220 1221 /* 1222 * Ordinarily, /dev/fd would be mounted inside the zone 1223 * by svc:/system/filesystem/usr:default, but since 1224 * we're not booting the zone, we need to do this 1225 * manually. 1226 */ 1227 if (rval == 0) 1228 rval = mount_early_fs(&cb, 1229 "fd", "/dev/fd", "fd", NULL); 1230 break; 1231 case Z_UNMOUNT: 1232 if (kernelcall) /* Invalid; can't happen */ 1233 abort(); 1234 zerror(zlogp, B_FALSE, "zone is already unmounted"); 1235 rval = 0; 1236 break; 1237 } 1238 break; 1239 1240 case ZONE_STATE_READY: 1241 switch (cmd) { 1242 case Z_READY: 1243 /* 1244 * We could have two clients racing to ready this 1245 * zone; the second client loses, but his request 1246 * doesn't fail, since the zone is now in the desired 1247 * state. 1248 */ 1249 zerror(zlogp, B_FALSE, "zone is already ready"); 1250 rval = 0; 1251 break; 1252 case Z_BOOT: 1253 (void) strlcpy(boot_args, zargp->bootbuf, 1254 sizeof (boot_args)); 1255 eventstream_write(Z_EVT_ZONE_BOOTING); 1256 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1257 audit_put_record(zlogp, uc, rval, "boot"); 1258 if (rval != 0) { 1259 bringup_failure_recovery = B_TRUE; 1260 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1261 zstate); 1262 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1263 } 1264 boot_args[0] = '\0'; 1265 break; 1266 case Z_HALT: 1267 if (kernelcall) /* Invalid; can't happen */ 1268 abort(); 1269 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1270 != 0) 1271 break; 1272 eventstream_write(Z_EVT_ZONE_HALTED); 1273 break; 1274 case Z_REBOOT: 1275 case Z_NOTE_UNINSTALLING: 1276 case Z_MOUNT: 1277 case Z_UNMOUNT: 1278 if (kernelcall) /* Invalid; can't happen */ 1279 abort(); 1280 zerror(zlogp, B_FALSE, "%s operation is invalid " 1281 "for zones in state '%s'", z_cmd_name(cmd), 1282 zone_state_str(zstate)); 1283 rval = -1; 1284 break; 1285 } 1286 break; 1287 1288 case ZONE_STATE_MOUNTED: 1289 switch (cmd) { 1290 case Z_UNMOUNT: 1291 if (kernelcall) /* Invalid; can't happen */ 1292 abort(); 1293 rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate); 1294 if (rval == 0) { 1295 eventstream_write(Z_EVT_ZONE_HALTED); 1296 (void) sema_post(&scratch_sem); 1297 } 1298 break; 1299 default: 1300 if (kernelcall) /* Invalid; can't happen */ 1301 abort(); 1302 zerror(zlogp, B_FALSE, "%s operation is invalid " 1303 "for zones in state '%s'", z_cmd_name(cmd), 1304 zone_state_str(zstate)); 1305 rval = -1; 1306 break; 1307 } 1308 break; 1309 1310 case ZONE_STATE_RUNNING: 1311 case ZONE_STATE_SHUTTING_DOWN: 1312 case ZONE_STATE_DOWN: 1313 switch (cmd) { 1314 case Z_READY: 1315 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1316 != 0) 1317 break; 1318 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0) 1319 eventstream_write(Z_EVT_ZONE_READIED); 1320 else 1321 eventstream_write(Z_EVT_ZONE_HALTED); 1322 break; 1323 case Z_BOOT: 1324 /* 1325 * We could have two clients racing to boot this 1326 * zone; the second client loses, but his request 1327 * doesn't fail, since the zone is now in the desired 1328 * state. 1329 */ 1330 zerror(zlogp, B_FALSE, "zone is already booted"); 1331 rval = 0; 1332 break; 1333 case Z_HALT: 1334 if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) 1335 != 0) 1336 break; 1337 eventstream_write(Z_EVT_ZONE_HALTED); 1338 break; 1339 case Z_REBOOT: 1340 (void) strlcpy(boot_args, zargp->bootbuf, 1341 sizeof (boot_args)); 1342 eventstream_write(Z_EVT_ZONE_REBOOTING); 1343 if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) 1344 != 0) { 1345 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1346 boot_args[0] = '\0'; 1347 break; 1348 } 1349 if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) 1350 != 0) { 1351 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1352 boot_args[0] = '\0'; 1353 break; 1354 } 1355 rval = zone_bootup(zlogp, zargp->bootbuf, zstate); 1356 audit_put_record(zlogp, uc, rval, "reboot"); 1357 if (rval != 0) { 1358 (void) zone_halt(zlogp, B_FALSE, B_TRUE, 1359 zstate); 1360 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1361 } 1362 boot_args[0] = '\0'; 1363 break; 1364 case Z_NOTE_UNINSTALLING: 1365 case Z_MOUNT: 1366 case Z_UNMOUNT: 1367 zerror(zlogp, B_FALSE, "%s operation is invalid " 1368 "for zones in state '%s'", z_cmd_name(cmd), 1369 zone_state_str(zstate)); 1370 rval = -1; 1371 break; 1372 } 1373 break; 1374 default: 1375 abort(); 1376 } 1377 1378 /* 1379 * Because the state of the zone may have changed, we make sure 1380 * to wake the console poller, which is in charge of initiating 1381 * the shutdown procedure as necessary. 1382 */ 1383 eventstream_write(Z_EVT_NULL); 1384 1385 out: 1386 (void) mutex_unlock(&lock); 1387 if (kernelcall) { 1388 rvalp = NULL; 1389 rlen = 0; 1390 } else { 1391 rvalp->rval = rval; 1392 } 1393 if (uc != NULL) 1394 ucred_free(uc); 1395 (void) door_return((char *)rvalp, rlen, NULL, 0); 1396 thr_exit(NULL); 1397 } 1398 1399 static int 1400 setup_door(zlog_t *zlogp) 1401 { 1402 if ((zone_door = door_create(server, NULL, 1403 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1404 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1405 return (-1); 1406 } 1407 (void) fdetach(zone_door_path); 1408 1409 if (fattach(zone_door, zone_door_path) != 0) { 1410 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1411 (void) door_revoke(zone_door); 1412 (void) fdetach(zone_door_path); 1413 zone_door = -1; 1414 return (-1); 1415 } 1416 return (0); 1417 } 1418 1419 /* 1420 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1421 * is where zoneadmd itself will check to see that another instance of 1422 * zoneadmd isn't already controlling this zone. 1423 * 1424 * The idea here is that we want to open the path to which we will 1425 * attach our door, lock it, and then make sure that no-one has beat us 1426 * to fattach(3c)ing onto it. 1427 * 1428 * fattach(3c) is really a mount, so there are actually two possible 1429 * vnodes we could be dealing with. Our strategy is as follows: 1430 * 1431 * - If the file we opened is a regular file (common case): 1432 * There is no fattach(3c)ed door, so we have a chance of becoming 1433 * the managing zoneadmd. We attempt to lock the file: if it is 1434 * already locked, that means someone else raced us here, so we 1435 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1436 * that beat us to it. 1437 * 1438 * - If the file we opened is a namefs file: 1439 * This means there is already an established door fattach(3c)'ed 1440 * to the rendezvous path. We've lost the race, so we give up. 1441 * Note that in this case we also try to grab the file lock, and 1442 * will succeed in acquiring it since the vnode locked by the 1443 * "winning" zoneadmd was a regular one, and the one we locked was 1444 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1445 * we just return to zoneadm(1m) which knows to retry. 1446 */ 1447 static int 1448 make_daemon_exclusive(zlog_t *zlogp) 1449 { 1450 int doorfd = -1; 1451 int err, ret = -1; 1452 struct stat st; 1453 struct flock flock; 1454 zone_state_t zstate; 1455 1456 top: 1457 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1458 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1459 zonecfg_strerror(err)); 1460 goto out; 1461 } 1462 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1463 S_IREAD|S_IWRITE)) < 0) { 1464 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1465 goto out; 1466 } 1467 if (fstat(doorfd, &st) < 0) { 1468 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1469 goto out; 1470 } 1471 /* 1472 * Lock the file to synchronize with other zoneadmd 1473 */ 1474 flock.l_type = F_WRLCK; 1475 flock.l_whence = SEEK_SET; 1476 flock.l_start = (off_t)0; 1477 flock.l_len = (off_t)0; 1478 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1479 /* 1480 * Someone else raced us here and grabbed the lock file 1481 * first. A warning here is inappropriate since nothing 1482 * went wrong. 1483 */ 1484 goto out; 1485 } 1486 1487 if (strcmp(st.st_fstype, "namefs") == 0) { 1488 struct door_info info; 1489 1490 /* 1491 * There is already something fattach()'ed to this file. 1492 * Lets see what the door is up to. 1493 */ 1494 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1495 /* 1496 * Another zoneadmd process seems to be in 1497 * control of the situation and we don't need to 1498 * be here. A warning here is inappropriate 1499 * since nothing went wrong. 1500 * 1501 * If the door has been revoked, the zoneadmd 1502 * process currently managing the zone is going 1503 * away. We'll return control to zoneadm(1m) 1504 * which will try again (by which time zoneadmd 1505 * will hopefully have exited). 1506 */ 1507 goto out; 1508 } 1509 1510 /* 1511 * If we got this far, there's a fattach(3c)'ed door 1512 * that belongs to a process that has exited, which can 1513 * happen if the previous zoneadmd died unexpectedly. 1514 * 1515 * Let user know that something is amiss, but that we can 1516 * recover; if the zone is in the installed state, then don't 1517 * message, since having a running zoneadmd isn't really 1518 * expected/needed. We want to keep occurences of this message 1519 * limited to times when zoneadmd is picking back up from a 1520 * zoneadmd that died while the zone was in some non-trivial 1521 * state. 1522 */ 1523 if (zstate > ZONE_STATE_INSTALLED) { 1524 zerror(zlogp, B_FALSE, 1525 "zone '%s': WARNING: zone is in state '%s', but " 1526 "zoneadmd does not appear to be available; " 1527 "restarted zoneadmd to recover.", 1528 zone_name, zone_state_str(zstate)); 1529 } 1530 1531 (void) fdetach(zone_door_path); 1532 (void) close(doorfd); 1533 goto top; 1534 } 1535 ret = 0; 1536 out: 1537 (void) close(doorfd); 1538 return (ret); 1539 } 1540 1541 /* 1542 * Setup the brand's pre and post state change callbacks, as well as the 1543 * query callback, if any of these exist. 1544 */ 1545 static int 1546 brand_callback_init(brand_handle_t bh, char *zone_name) 1547 { 1548 char zpath[MAXPATHLEN]; 1549 1550 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) 1551 return (-1); 1552 1553 (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, 1554 sizeof (pre_statechg_hook)); 1555 1556 if (brand_get_prestatechange(bh, zone_name, zpath, 1557 pre_statechg_hook + EXEC_LEN, 1558 sizeof (pre_statechg_hook) - EXEC_LEN) != 0) 1559 return (-1); 1560 1561 if (strlen(pre_statechg_hook) <= EXEC_LEN) 1562 pre_statechg_hook[0] = '\0'; 1563 1564 (void) strlcpy(post_statechg_hook, EXEC_PREFIX, 1565 sizeof (post_statechg_hook)); 1566 1567 if (brand_get_poststatechange(bh, zone_name, zpath, 1568 post_statechg_hook + EXEC_LEN, 1569 sizeof (post_statechg_hook) - EXEC_LEN) != 0) 1570 return (-1); 1571 1572 if (strlen(post_statechg_hook) <= EXEC_LEN) 1573 post_statechg_hook[0] = '\0'; 1574 1575 (void) strlcpy(query_hook, EXEC_PREFIX, 1576 sizeof (query_hook)); 1577 1578 if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, 1579 sizeof (query_hook) - EXEC_LEN) != 0) 1580 return (-1); 1581 1582 if (strlen(query_hook) <= EXEC_LEN) 1583 query_hook[0] = '\0'; 1584 1585 return (0); 1586 } 1587 1588 int 1589 main(int argc, char *argv[]) 1590 { 1591 int opt; 1592 zoneid_t zid; 1593 priv_set_t *privset; 1594 zone_state_t zstate; 1595 char parents_locale[MAXPATHLEN]; 1596 brand_handle_t bh; 1597 int err; 1598 1599 pid_t pid; 1600 sigset_t blockset; 1601 sigset_t block_cld; 1602 1603 struct { 1604 sema_t sem; 1605 int status; 1606 zlog_t log; 1607 } *shstate; 1608 size_t shstatelen = getpagesize(); 1609 1610 zlog_t errlog; 1611 zlog_t *zlogp; 1612 1613 int ctfd; 1614 1615 progname = get_execbasename(argv[0]); 1616 1617 /* 1618 * Make sure stderr is unbuffered 1619 */ 1620 (void) setbuffer(stderr, NULL, 0); 1621 1622 /* 1623 * Get out of the way of mounted filesystems, since we will daemonize 1624 * soon. 1625 */ 1626 (void) chdir("/"); 1627 1628 /* 1629 * Use the default system umask per PSARC 1998/110 rather than 1630 * anything that may have been set by the caller. 1631 */ 1632 (void) umask(CMASK); 1633 1634 /* 1635 * Initially we want to use our parent's locale. 1636 */ 1637 (void) setlocale(LC_ALL, ""); 1638 (void) textdomain(TEXT_DOMAIN); 1639 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1640 sizeof (parents_locale)); 1641 1642 /* 1643 * This zlog_t is used for writing to stderr 1644 */ 1645 errlog.logfile = stderr; 1646 errlog.buflen = errlog.loglen = 0; 1647 errlog.buf = errlog.log = NULL; 1648 errlog.locale = parents_locale; 1649 1650 /* 1651 * We start off writing to stderr until we're ready to daemonize. 1652 */ 1653 zlogp = &errlog; 1654 1655 /* 1656 * Process options. 1657 */ 1658 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1659 switch (opt) { 1660 case 'R': 1661 zonecfg_set_root(optarg); 1662 break; 1663 case 'z': 1664 zone_name = optarg; 1665 break; 1666 default: 1667 usage(); 1668 } 1669 } 1670 1671 if (zone_name == NULL) 1672 usage(); 1673 1674 /* 1675 * Because usage() prints directly to stderr, it has gettext() 1676 * wrapping, which depends on the locale. But since zerror() calls 1677 * localize() which tweaks the locale, it is not safe to call zerror() 1678 * until after the last call to usage(). Fortunately, the last call 1679 * to usage() is just above and the first call to zerror() is just 1680 * below. Don't mess this up. 1681 */ 1682 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1683 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1684 GLOBAL_ZONENAME); 1685 return (1); 1686 } 1687 1688 if (zone_get_id(zone_name, &zid) != 0) { 1689 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1690 zonecfg_strerror(Z_NO_ZONE)); 1691 return (1); 1692 } 1693 1694 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1695 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1696 zonecfg_strerror(err)); 1697 return (1); 1698 } 1699 if (zstate < ZONE_STATE_INCOMPLETE) { 1700 zerror(zlogp, B_FALSE, 1701 "cannot manage a zone which is in state '%s'", 1702 zone_state_str(zstate)); 1703 return (1); 1704 } 1705 1706 /* Get a handle to the brand info for this zone */ 1707 if ((zone_get_brand(zone_name, brand_name, sizeof (brand_name)) 1708 != Z_OK) || (bh = brand_open(brand_name)) == NULL) { 1709 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1710 return (1); 1711 } 1712 zone_isnative = brand_is_native(bh); 1713 zone_iscluster = (strcmp(brand_name, CLUSTER_BRAND_NAME) == 0); 1714 1715 /* Get state change brand hooks. */ 1716 if (brand_callback_init(bh, zone_name) == -1) { 1717 zerror(zlogp, B_TRUE, 1718 "failed to initialize brand state change hooks"); 1719 brand_close(bh); 1720 return (1); 1721 } 1722 1723 brand_close(bh); 1724 1725 /* 1726 * Check that we have all privileges. It would be nice to pare 1727 * this down, but this is at least a first cut. 1728 */ 1729 if ((privset = priv_allocset()) == NULL) { 1730 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1731 return (1); 1732 } 1733 1734 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1735 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1736 priv_freeset(privset); 1737 return (1); 1738 } 1739 1740 if (priv_isfullset(privset) == B_FALSE) { 1741 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1742 "run this command (all privs required)"); 1743 priv_freeset(privset); 1744 return (1); 1745 } 1746 priv_freeset(privset); 1747 1748 if (mkzonedir(zlogp) != 0) 1749 return (1); 1750 1751 /* 1752 * Pre-fork: setup shared state 1753 */ 1754 if ((shstate = (void *)mmap(NULL, shstatelen, 1755 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1756 MAP_FAILED) { 1757 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1758 return (1); 1759 } 1760 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1761 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1762 (void) munmap((char *)shstate, shstatelen); 1763 return (1); 1764 } 1765 shstate->log.logfile = NULL; 1766 shstate->log.buflen = shstatelen - sizeof (*shstate); 1767 shstate->log.loglen = shstate->log.buflen; 1768 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1769 shstate->log.log = shstate->log.buf; 1770 shstate->log.locale = parents_locale; 1771 shstate->status = -1; 1772 1773 /* 1774 * We need a SIGCHLD handler so the sema_wait() below will wake 1775 * up if the child dies without doing a sema_post(). 1776 */ 1777 (void) sigset(SIGCHLD, sigchld); 1778 /* 1779 * We must mask SIGCHLD until after we've coped with the fork 1780 * sufficiently to deal with it; otherwise we can race and 1781 * receive the signal before pid has been initialized 1782 * (yes, this really happens). 1783 */ 1784 (void) sigemptyset(&block_cld); 1785 (void) sigaddset(&block_cld, SIGCHLD); 1786 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1787 1788 if ((ctfd = init_template()) == -1) { 1789 zerror(zlogp, B_TRUE, "failed to create contract"); 1790 return (1); 1791 } 1792 1793 /* 1794 * Do not let another thread localize a message while we are forking. 1795 */ 1796 (void) mutex_lock(&msglock); 1797 pid = fork(); 1798 (void) mutex_unlock(&msglock); 1799 1800 /* 1801 * In all cases (parent, child, and in the event of an error) we 1802 * don't want to cause creation of contracts on subsequent fork()s. 1803 */ 1804 (void) ct_tmpl_clear(ctfd); 1805 (void) close(ctfd); 1806 1807 if (pid == -1) { 1808 zerror(zlogp, B_TRUE, "could not fork"); 1809 return (1); 1810 1811 } else if (pid > 0) { /* parent */ 1812 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1813 /* 1814 * This marks a window of vulnerability in which we receive 1815 * the SIGCLD before falling into sema_wait (normally we would 1816 * get woken up from sema_wait with EINTR upon receipt of 1817 * SIGCLD). So we may need to use some other scheme like 1818 * sema_posting in the sigcld handler. 1819 * blech 1820 */ 1821 (void) sema_wait(&shstate->sem); 1822 (void) sema_destroy(&shstate->sem); 1823 if (shstate->status != 0) 1824 (void) waitpid(pid, NULL, WNOHANG); 1825 /* 1826 * It's ok if we die with SIGPIPE. It's not like we could have 1827 * done anything about it. 1828 */ 1829 (void) fprintf(stderr, "%s", shstate->log.buf); 1830 _exit(shstate->status == 0 ? 0 : 1); 1831 } 1832 1833 /* 1834 * The child charges on. 1835 */ 1836 (void) sigset(SIGCHLD, SIG_DFL); 1837 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1838 1839 /* 1840 * SIGPIPE can be delivered if we write to a socket for which the 1841 * peer endpoint is gone. That can lead to too-early termination 1842 * of zoneadmd, and that's not good eats. 1843 */ 1844 (void) sigset(SIGPIPE, SIG_IGN); 1845 /* 1846 * Stop using stderr 1847 */ 1848 zlogp = &shstate->log; 1849 1850 /* 1851 * We don't need stdout/stderr from now on. 1852 */ 1853 closefrom(0); 1854 1855 /* 1856 * Initialize the syslog zlog_t. This needs to be done after 1857 * the call to closefrom(). 1858 */ 1859 logsys.buf = logsys.log = NULL; 1860 logsys.buflen = logsys.loglen = 0; 1861 logsys.logfile = NULL; 1862 logsys.locale = DEFAULT_LOCALE; 1863 1864 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1865 1866 /* 1867 * The eventstream is used to publish state changes in the zone 1868 * from the door threads to the console I/O poller. 1869 */ 1870 if (eventstream_init() == -1) { 1871 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1872 goto child_out; 1873 } 1874 1875 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1876 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 1877 1878 /* 1879 * See if another zoneadmd is running for this zone. If not, then we 1880 * can now modify system state. 1881 */ 1882 if (make_daemon_exclusive(zlogp) == -1) 1883 goto child_out; 1884 1885 1886 /* 1887 * Create/join a new session; we need to be careful of what we do with 1888 * the console from now on so we don't end up being the session leader 1889 * for the terminal we're going to be handing out. 1890 */ 1891 (void) setsid(); 1892 1893 /* 1894 * This thread shouldn't be receiving any signals; in particular, 1895 * SIGCHLD should be received by the thread doing the fork(). 1896 */ 1897 (void) sigfillset(&blockset); 1898 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1899 1900 /* 1901 * Setup the console device and get ready to serve the console; 1902 * once this has completed, we're ready to let console clients 1903 * make an attempt to connect (they will block until 1904 * serve_console_sock() below gets called, and any pending 1905 * connection is accept()ed). 1906 */ 1907 if (!zonecfg_in_alt_root() && init_console(zlogp) < 0) 1908 goto child_out; 1909 1910 /* 1911 * Take the lock now, so that when the door server gets going, we 1912 * are guaranteed that it won't take a request until we are sure 1913 * that everything is completely set up. See the child_out: label 1914 * below to see why this matters. 1915 */ 1916 (void) mutex_lock(&lock); 1917 1918 /* Init semaphore for scratch zones. */ 1919 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 1920 zerror(zlogp, B_TRUE, 1921 "failed to initialize semaphore for scratch zone"); 1922 goto child_out; 1923 } 1924 1925 /* open the dladm handle */ 1926 if (dladm_open(&dld_handle) != DLADM_STATUS_OK) { 1927 zerror(zlogp, B_FALSE, "failed to open dladm handle"); 1928 goto child_out; 1929 } 1930 1931 /* 1932 * Note: door setup must occur *after* the console is setup. 1933 * This is so that as zlogin tests the door to see if zoneadmd 1934 * is ready yet, we know that the console will get serviced 1935 * once door_info() indicates that the door is "up". 1936 */ 1937 if (setup_door(zlogp) == -1) 1938 goto child_out; 1939 1940 /* 1941 * Things seem OK so far; tell the parent process that we're done 1942 * with setup tasks. This will cause the parent to exit, signalling 1943 * to zoneadm, zlogin, or whatever forked it that we are ready to 1944 * service requests. 1945 */ 1946 shstate->status = 0; 1947 (void) sema_post(&shstate->sem); 1948 (void) munmap((char *)shstate, shstatelen); 1949 shstate = NULL; 1950 1951 (void) mutex_unlock(&lock); 1952 1953 /* 1954 * zlogp is now invalid, so reset it to the syslog logger. 1955 */ 1956 zlogp = &logsys; 1957 1958 /* 1959 * Now that we are free of any parents, switch to the default locale. 1960 */ 1961 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 1962 1963 /* 1964 * At this point the setup portion of main() is basically done, so 1965 * we reuse this thread to manage the zone console. When 1966 * serve_console() has returned, we are past the point of no return 1967 * in the life of this zoneadmd. 1968 */ 1969 if (zonecfg_in_alt_root()) { 1970 /* 1971 * This is just awful, but mounted scratch zones don't (and 1972 * can't) have consoles. We just wait for unmount instead. 1973 */ 1974 while (sema_wait(&scratch_sem) == EINTR) 1975 ; 1976 } else { 1977 serve_console(zlogp); 1978 assert(in_death_throes); 1979 } 1980 1981 /* 1982 * This is the next-to-last part of the exit interlock. Upon calling 1983 * fdetach(), the door will go unreferenced; once any 1984 * outstanding requests (like the door thread doing Z_HALT) are 1985 * done, the door will get an UNREF notification; when it handles 1986 * the UNREF, the door server will cause the exit. 1987 */ 1988 assert(!MUTEX_HELD(&lock)); 1989 (void) fdetach(zone_door_path); 1990 1991 for (;;) 1992 (void) pause(); 1993 1994 child_out: 1995 assert(pid == 0); 1996 if (shstate != NULL) { 1997 shstate->status = -1; 1998 (void) sema_post(&shstate->sem); 1999 (void) munmap((char *)shstate, shstatelen); 2000 } 2001 2002 /* 2003 * This might trigger an unref notification, but if so, 2004 * we are still holding the lock, so our call to exit will 2005 * ultimately win the race and will publish the right exit 2006 * code. 2007 */ 2008 if (zone_door != -1) { 2009 assert(MUTEX_HELD(&lock)); 2010 (void) door_revoke(zone_door); 2011 (void) fdetach(zone_door_path); 2012 } 2013 2014 if (dld_handle != NULL) 2015 dladm_close(dld_handle); 2016 2017 return (1); /* return from main() forcibly exits an MT process */ 2018 } 2019