1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * zoneadmd manages zones; one zoneadmd process is launched for each 31 * non-global zone on the system. This daemon juggles four jobs: 32 * 33 * - Implement setup and teardown of the zone "virtual platform": mount and 34 * unmount filesystems; create and destroy network interfaces; communicate 35 * with devfsadmd to lay out devices for the zone; instantiate the zone 36 * console device; configure process runtime attributes such as resource 37 * controls, pool bindings, fine-grained privileges. 38 * 39 * - Launch the zone's init(1M) process. 40 * 41 * - Implement a door server; clients (like zoneadm) connect to the door 42 * server and request zone state changes. The kernel is also a client of 43 * this door server. A request to halt or reboot the zone which originates 44 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 45 * 46 * One minor problem is that messages emitted by zoneadmd need to be passed 47 * back to the zoneadm process making the request. These messages need to 48 * be rendered in the client's locale; so, this is passed in as part of the 49 * request. The exception is the kernel upcall to zoneadmd, in which case 50 * messages are syslog'd. 51 * 52 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 53 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 54 * strings which do not need to be translated. 55 * 56 * - Act as a console server for zlogin -C processes; see comments in zcons.c 57 * for more information about the zone console architecture. 58 * 59 * DESIGN NOTES 60 * 61 * Restart: 62 * A chief design constraint of zoneadmd is that it should be restartable in 63 * the case that the administrator kills it off, or it suffers a fatal error, 64 * without the running zone being impacted; this is akin to being able to 65 * reboot the service processor of a server without affecting the OS instance. 66 */ 67 68 #include <sys/param.h> 69 #include <sys/mman.h> 70 #include <sys/types.h> 71 #include <sys/stat.h> 72 #include <sys/sysmacros.h> 73 74 #include <bsm/adt.h> 75 #include <bsm/adt_event.h> 76 77 #include <alloca.h> 78 #include <assert.h> 79 #include <errno.h> 80 #include <door.h> 81 #include <fcntl.h> 82 #include <locale.h> 83 #include <signal.h> 84 #include <stdarg.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <strings.h> 89 #include <synch.h> 90 #include <syslog.h> 91 #include <thread.h> 92 #include <unistd.h> 93 #include <wait.h> 94 #include <limits.h> 95 #include <zone.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <sys/contract/process.h> 99 #include <sys/ctfs.h> 100 101 #include <libzonecfg.h> 102 #include "zoneadmd.h" 103 104 static char *progname; 105 char *zone_name; /* zone which we are managing */ 106 static zoneid_t zone_id; 107 108 static zlog_t logsys; 109 110 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 111 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 112 113 static sema_t scratch_sem; /* for scratch zones */ 114 115 static char zone_door_path[MAXPATHLEN]; 116 static int zone_door = -1; 117 118 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 119 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 120 121 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 122 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 123 #endif 124 125 #define PATH_TO_INIT "/sbin/init" 126 127 #define DEFAULT_LOCALE "C" 128 129 static const char * 130 z_cmd_name(zone_cmd_t zcmd) 131 { 132 /* This list needs to match the enum in sys/zone.h */ 133 static const char *zcmdstr[] = { 134 "ready", "boot", "reboot", "halt", "note_uninstalling", 135 "mount", "unmount" 136 }; 137 138 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 139 return ("unknown"); 140 else 141 return (zcmdstr[(int)zcmd]); 142 } 143 144 static char * 145 get_execbasename(char *execfullname) 146 { 147 char *last_slash, *execbasename; 148 149 /* guard against '/' at end of command invocation */ 150 for (;;) { 151 last_slash = strrchr(execfullname, '/'); 152 if (last_slash == NULL) { 153 execbasename = execfullname; 154 break; 155 } else { 156 execbasename = last_slash + 1; 157 if (*execbasename == '\0') { 158 *last_slash = '\0'; 159 continue; 160 } 161 break; 162 } 163 } 164 return (execbasename); 165 } 166 167 static void 168 usage(void) 169 { 170 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 171 (void) fprintf(stderr, 172 gettext("\tNote: %s should not be run directly.\n"), progname); 173 exit(2); 174 } 175 176 /* ARGSUSED */ 177 static void 178 sigchld(int sig) 179 { 180 } 181 182 char * 183 localize_msg(char *locale, const char *msg) 184 { 185 char *out; 186 187 (void) mutex_lock(&msglock); 188 (void) setlocale(LC_MESSAGES, locale); 189 out = gettext(msg); 190 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 191 (void) mutex_unlock(&msglock); 192 return (out); 193 } 194 195 /* PRINTFLIKE3 */ 196 void 197 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 198 { 199 va_list alist; 200 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 201 char *bp; 202 int saved_errno = errno; 203 204 if (zlogp == NULL) 205 return; 206 if (zlogp == &logsys) 207 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 208 zone_name); 209 else 210 buf[0] = '\0'; 211 bp = &(buf[strlen(buf)]); 212 213 /* 214 * In theory, the locale pointer should be set to either "C" or a 215 * char array, so it should never be NULL 216 */ 217 assert(zlogp->locale != NULL); 218 /* Locale is per process, but we are multi-threaded... */ 219 fmt = localize_msg(zlogp->locale, fmt); 220 221 va_start(alist, fmt); 222 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 223 va_end(alist); 224 bp = &(buf[strlen(buf)]); 225 if (use_strerror) 226 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 227 strerror(saved_errno)); 228 if (zlogp == &logsys) { 229 (void) syslog(LOG_ERR, "%s", buf); 230 } else if (zlogp->logfile != NULL) { 231 (void) fprintf(zlogp->logfile, "%s\n", buf); 232 } else { 233 size_t buflen; 234 size_t copylen; 235 236 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 237 copylen = MIN(buflen, zlogp->loglen); 238 zlogp->log += copylen; 239 zlogp->loglen -= copylen; 240 } 241 } 242 243 /* 244 * Emit a warning for any boot arguments which are unrecognized. Since 245 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 246 * put the arguments into an argv style array, use getopt to process them, 247 * and put the resultant argument string back into outargs. 248 * 249 * During the filtering, we pull out any arguments which are truly "boot" 250 * arguments, leaving only those which are to be passed intact to the 251 * progenitor process. The one we support at the moment is -i, which 252 * indicates to the kernel which program should be launched as 'init'. 253 * 254 * A return of Z_INVAL indicates specifically that the arguments are 255 * not valid; this is a non-fatal error. Except for Z_OK, all other return 256 * values are treated as fatal. 257 */ 258 static int 259 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 260 char *init_file, char *badarg) 261 { 262 int argc = 0, argc_save; 263 int i; 264 int err; 265 char *arg, *lasts, **argv = NULL, **argv_save; 266 char zonecfg_args[BOOTARGS_MAX]; 267 char scratchargs[BOOTARGS_MAX], *sargs; 268 char c; 269 270 bzero(outargs, BOOTARGS_MAX); 271 bzero(badarg, BOOTARGS_MAX); 272 273 (void) strlcpy(init_file, PATH_TO_INIT, MAXPATHLEN); 274 275 /* 276 * If the user didn't specify transient boot arguments, check 277 * to see if there were any specified in the zone configuration, 278 * and use them if applicable. 279 */ 280 if (inargs == NULL || inargs[0] == '\0') { 281 zone_dochandle_t handle; 282 if ((handle = zonecfg_init_handle()) == NULL) { 283 zerror(zlogp, B_TRUE, 284 "getting zone configuration handle"); 285 return (Z_BAD_HANDLE); 286 } 287 err = zonecfg_get_snapshot_handle(zone_name, handle); 288 if (err != Z_OK) { 289 zerror(zlogp, B_FALSE, 290 "invalid configuration snapshot"); 291 zonecfg_fini_handle(handle); 292 return (Z_BAD_HANDLE); 293 } 294 295 bzero(zonecfg_args, sizeof (zonecfg_args)); 296 (void) zonecfg_get_bootargs(handle, zonecfg_args, 297 sizeof (zonecfg_args)); 298 inargs = zonecfg_args; 299 zonecfg_fini_handle(handle); 300 } 301 302 if (strlen(inargs) >= BOOTARGS_MAX) { 303 zerror(zlogp, B_FALSE, "boot argument string too long"); 304 return (Z_INVAL); 305 } 306 307 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 308 sargs = scratchargs; 309 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 310 sargs = NULL; 311 argc++; 312 } 313 314 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 315 zerror(zlogp, B_FALSE, "memory allocation failed"); 316 return (Z_NOMEM); 317 } 318 319 argv_save = argv; 320 argc_save = argc; 321 322 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 323 sargs = scratchargs; 324 i = 0; 325 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 326 sargs = NULL; 327 if ((argv[i] = strdup(arg)) == NULL) { 328 err = Z_NOMEM; 329 zerror(zlogp, B_FALSE, "memory allocation failed"); 330 goto done; 331 } 332 i++; 333 } 334 335 /* 336 * We preserve compatibility with the Solaris system boot behavior, 337 * which allows: 338 * 339 * # reboot kernel/unix -s -m verbose 340 * 341 * In this example, kernel/unix tells the booter what file to 342 * boot. We don't want reboot in a zone to be gratuitously different, 343 * so we silently ignore the boot file, if necessary. 344 */ 345 if (argv[0] == NULL) 346 goto done; 347 348 assert(argv[0][0] != ' '); 349 assert(argv[0][0] != '\t'); 350 351 if (argv[0][0] != '-' && argv[0][0] != '\0') { 352 argv = &argv[1]; 353 argc--; 354 } 355 356 optind = 0; 357 opterr = 0; 358 err = Z_OK; 359 while ((c = getopt(argc, argv, "i:m:s")) != -1) { 360 switch (c) { 361 case 'i': 362 /* 363 * -i is handled by the runtime and is not passed 364 * along to userland 365 */ 366 (void) strlcpy(init_file, optarg, MAXPATHLEN); 367 break; 368 case 'm': 369 case 's': 370 /* These pass through unmolested */ 371 (void) snprintf(outargs, BOOTARGS_MAX, 372 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 373 break; 374 case '?': 375 /* 376 * We warn about unknown arguments but pass them 377 * along anyway-- if someone wants to develop their 378 * own init replacement, they can pass it whatever 379 * args they want. 380 */ 381 err = Z_INVAL; 382 (void) snprintf(outargs, BOOTARGS_MAX, 383 "%s -%c", outargs, optopt); 384 (void) snprintf(badarg, BOOTARGS_MAX, 385 "%s -%c", badarg, optopt); 386 break; 387 } 388 } 389 390 /* 391 * For Solaris Zones we warn about and discard non-option arguments. 392 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 393 * to the kernel, we concat up all the other remaining boot args. 394 * and warn on them as a group. 395 */ 396 if (optind < argc) { 397 err = Z_INVAL; 398 while (optind < argc) { 399 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 400 badarg, strlen(badarg) > 0 ? " " : "", 401 argv[optind]); 402 optind++; 403 } 404 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 405 "arguments `%s'.", badarg); 406 } 407 408 done: 409 for (i = 0; i < argc_save; i++) { 410 if (argv_save[i] != NULL) 411 free(argv_save[i]); 412 } 413 free(argv_save); 414 return (err); 415 } 416 417 418 static int 419 mkzonedir(zlog_t *zlogp) 420 { 421 struct stat st; 422 /* 423 * We must create and lock everyone but root out of ZONES_TMPDIR 424 * since anyone can open any UNIX domain socket, regardless of 425 * its file system permissions. Sigh... 426 */ 427 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 428 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 429 return (-1); 430 } 431 /* paranoia */ 432 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 433 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 434 return (-1); 435 } 436 (void) chmod(ZONES_TMPDIR, S_IRWXU); 437 return (0); 438 } 439 440 /* 441 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 442 * 'true' if this is being invoked as part of the processing for the "mount" 443 * subcommand. 444 */ 445 static int 446 zone_ready(zlog_t *zlogp, boolean_t mount_cmd) 447 { 448 int err; 449 450 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 451 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 452 zonecfg_strerror(err)); 453 return (-1); 454 } 455 456 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 457 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 458 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 459 zonecfg_strerror(err)); 460 return (-1); 461 } 462 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 463 bringup_failure_recovery = B_TRUE; 464 (void) vplat_teardown(NULL, mount_cmd); 465 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 466 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 467 zonecfg_strerror(err)); 468 return (-1); 469 } 470 471 return (0); 472 } 473 474 int 475 init_template(void) 476 { 477 int fd; 478 int err = 0; 479 480 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 481 if (fd == -1) 482 return (-1); 483 484 /* 485 * For now, zoneadmd doesn't do anything with the contract. 486 * Deliver no events, don't inherit, and allow it to be orphaned. 487 */ 488 err |= ct_tmpl_set_critical(fd, 0); 489 err |= ct_tmpl_set_informative(fd, 0); 490 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 491 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 492 if (err || ct_tmpl_activate(fd)) { 493 (void) close(fd); 494 return (-1); 495 } 496 497 return (fd); 498 } 499 500 static int 501 mount_early_fs(zlog_t *zlogp, zoneid_t zoneid, const char *spec, 502 const char *dir, char *fstype) 503 { 504 pid_t child; 505 int child_status; 506 int tmpl_fd; 507 ctid_t ct; 508 509 if ((tmpl_fd = init_template()) == -1) { 510 zerror(zlogp, B_TRUE, "failed to create contract"); 511 return (-1); 512 } 513 514 if ((child = fork()) == -1) { 515 (void) ct_tmpl_clear(tmpl_fd); 516 (void) close(tmpl_fd); 517 zerror(zlogp, B_TRUE, "failed to fork"); 518 return (-1); 519 520 } else if (child == 0) { /* child */ 521 (void) ct_tmpl_clear(tmpl_fd); 522 /* 523 * Even though there are no procs running in the zone, we 524 * do this for paranoia's sake. 525 */ 526 (void) closefrom(0); 527 528 if (zone_enter(zoneid) == -1) { 529 _exit(errno); 530 } 531 if (mount(spec, dir, MS_DATA, fstype, NULL, 0, NULL, 0) != 0) 532 _exit(errno); 533 _exit(0); 534 } 535 536 /* parent */ 537 if (contract_latest(&ct) == -1) 538 ct = -1; 539 (void) ct_tmpl_clear(tmpl_fd); 540 (void) close(tmpl_fd); 541 if (waitpid(child, &child_status, 0) != child) { 542 /* unexpected: we must have been signalled */ 543 (void) contract_abandon_id(ct); 544 return (-1); 545 } 546 (void) contract_abandon_id(ct); 547 if (WEXITSTATUS(child_status) != 0) { 548 errno = WEXITSTATUS(child_status); 549 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 550 return (-1); 551 } 552 553 return (0); 554 } 555 556 static int 557 zone_mount_early(zlog_t *zlogp, zoneid_t zoneid) 558 { 559 if (mount_early_fs(zlogp, zoneid, "/proc", "/proc", "proc") != 0) 560 return (-1); 561 562 if (mount_early_fs(zlogp, zoneid, "ctfs", CTFS_ROOT, "ctfs") != 0) 563 return (-1); 564 565 if (mount_early_fs(zlogp, zoneid, "swap", "/etc/svc/volatile", 566 "tmpfs") != 0) 567 return (-1); 568 569 if (mount_early_fs(zlogp, zoneid, "mnttab", "/etc/mnttab", 570 "mntfs") != 0) 571 return (-1); 572 573 return (0); 574 } 575 576 static int 577 zone_bootup(zlog_t *zlogp, const char *bootargs) 578 { 579 zoneid_t zoneid; 580 struct stat st; 581 char zroot[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 582 char nbootargs[BOOTARGS_MAX]; 583 int err; 584 585 if (init_console_slave(zlogp) != 0) 586 return (-1); 587 reset_slave_terminal(zlogp); 588 589 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 590 zerror(zlogp, B_TRUE, "unable to get zoneid"); 591 return (-1); 592 } 593 594 if (zone_mount_early(zlogp, zoneid) != 0) 595 return (-1); 596 597 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 598 bad_boot_arg); 599 if (err == Z_INVAL) 600 eventstream_write(Z_EVT_ZONE_BADARGS); 601 else if (err != Z_OK) 602 return (-1); 603 604 assert(init_file[0] != '\0'); 605 606 /* 607 * Try to anticipate possible problems: Make sure whatever binary 608 * is supposed to be init is executable. 609 */ 610 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 611 zerror(zlogp, B_FALSE, "unable to determine zone root"); 612 return (-1); 613 } 614 (void) snprintf(initpath, sizeof (initpath), "%s%s", zroot, init_file); 615 616 if (stat(initpath, &st) == -1) { 617 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 618 return (-1); 619 } 620 621 if ((st.st_mode & S_IXUSR) == 0) { 622 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 623 return (-1); 624 } 625 626 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 627 zerror(zlogp, B_TRUE, "could not set zone boot file"); 628 return (-1); 629 } 630 631 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 632 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 633 return (-1); 634 } 635 636 if (zone_boot(zoneid) == -1) { 637 zerror(zlogp, B_TRUE, "unable to boot zone"); 638 return (-1); 639 } 640 641 return (0); 642 } 643 644 static int 645 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd) 646 { 647 int err; 648 649 if (vplat_teardown(zlogp, unmount_cmd) != 0) { 650 if (!bringup_failure_recovery) 651 zerror(zlogp, B_FALSE, "unable to destroy zone"); 652 return (-1); 653 } 654 655 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 656 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 657 zonecfg_strerror(err)); 658 659 return (0); 660 } 661 662 /* 663 * Generate AUE_zone_state for a command that boots a zone. 664 */ 665 static void 666 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 667 char *new_state) 668 { 669 adt_session_data_t *ah; 670 adt_event_data_t *event; 671 int pass_fail, fail_reason; 672 673 if (!adt_audit_enabled()) 674 return; 675 676 if (return_val == 0) { 677 pass_fail = ADT_SUCCESS; 678 fail_reason = ADT_SUCCESS; 679 } else { 680 pass_fail = ADT_FAILURE; 681 fail_reason = ADT_FAIL_VALUE_PROGRAM; 682 } 683 684 if (adt_start_session(&ah, NULL, 0)) { 685 zerror(zlogp, B_TRUE, gettext("audit failure.")); 686 return; 687 } 688 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 689 zerror(zlogp, B_TRUE, gettext("audit failure.")); 690 (void) adt_end_session(ah); 691 return; 692 } 693 694 event = adt_alloc_event(ah, ADT_zone_state); 695 if (event == NULL) { 696 zerror(zlogp, B_TRUE, gettext("audit failure.")); 697 (void) adt_end_session(ah); 698 return; 699 } 700 event->adt_zone_state.zonename = zone_name; 701 event->adt_zone_state.new_state = new_state; 702 703 if (adt_put_event(event, pass_fail, fail_reason)) 704 zerror(zlogp, B_TRUE, gettext("audit failure.")); 705 706 adt_free_event(event); 707 708 (void) adt_end_session(ah); 709 } 710 711 /* 712 * The main routine for the door server that deals with zone state transitions. 713 */ 714 /* ARGSUSED */ 715 static void 716 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 717 uint_t n_desc) 718 { 719 ucred_t *uc = NULL; 720 const priv_set_t *eset; 721 722 zone_state_t zstate; 723 zone_cmd_t cmd; 724 zone_cmd_arg_t *zargp; 725 726 boolean_t kernelcall; 727 728 int rval = -1; 729 uint64_t uniqid; 730 zoneid_t zoneid = -1; 731 zlog_t zlog; 732 zlog_t *zlogp; 733 zone_cmd_rval_t *rvalp; 734 size_t rlen = getpagesize(); /* conservative */ 735 736 /* LINTED E_BAD_PTR_CAST_ALIGN */ 737 zargp = (zone_cmd_arg_t *)args; 738 739 /* 740 * When we get the door unref message, we've fdetach'd the door, and 741 * it is time for us to shut down zoneadmd. 742 */ 743 if (zargp == DOOR_UNREF_DATA) { 744 /* 745 * See comment at end of main() for info on the last rites. 746 */ 747 exit(0); 748 } 749 750 if (zargp == NULL) { 751 (void) door_return(NULL, 0, 0, 0); 752 } 753 754 rvalp = alloca(rlen); 755 bzero(rvalp, rlen); 756 zlog.logfile = NULL; 757 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 758 zlog.buf = rvalp->errbuf; 759 zlog.log = zlog.buf; 760 /* defer initialization of zlog.locale until after credential check */ 761 zlogp = &zlog; 762 763 if (alen != sizeof (zone_cmd_arg_t)) { 764 /* 765 * This really shouldn't be happening. 766 */ 767 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 768 "unexpected (expected %d bytes)", alen, 769 sizeof (zone_cmd_arg_t)); 770 goto out; 771 } 772 cmd = zargp->cmd; 773 774 if (door_ucred(&uc) != 0) { 775 zerror(&logsys, B_TRUE, "door_ucred"); 776 goto out; 777 } 778 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 779 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 780 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 781 ucred_geteuid(uc) != 0)) { 782 zerror(&logsys, B_FALSE, "insufficient privileges"); 783 goto out; 784 } 785 786 kernelcall = ucred_getpid(uc) == 0; 787 788 /* 789 * This is safe because we only use a zlog_t throughout the 790 * duration of a door call; i.e., by the time the pointer 791 * might become invalid, the door call would be over. 792 */ 793 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 794 795 (void) mutex_lock(&lock); 796 797 /* 798 * Once we start to really die off, we don't want more connections. 799 */ 800 if (in_death_throes) { 801 (void) mutex_unlock(&lock); 802 ucred_free(uc); 803 (void) door_return(NULL, 0, 0, 0); 804 thr_exit(NULL); 805 } 806 807 /* 808 * Check for validity of command. 809 */ 810 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_REBOOT && 811 cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT && 812 cmd != Z_UNMOUNT) { 813 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 814 goto out; 815 } 816 817 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 818 /* 819 * Can't happen 820 */ 821 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 822 cmd); 823 goto out; 824 } 825 /* 826 * We ignore the possibility of someone calling zone_create(2) 827 * explicitly; all requests must come through zoneadmd. 828 */ 829 if (zone_get_state(zone_name, &zstate) != Z_OK) { 830 /* 831 * Something terribly wrong happened 832 */ 833 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 834 goto out; 835 } 836 837 if (kernelcall) { 838 /* 839 * Kernel-initiated requests may lose their validity if the 840 * zone_t the kernel was referring to has gone away. 841 */ 842 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 843 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 844 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 845 /* 846 * We're not talking about the same zone. The request 847 * must have arrived too late. Return error. 848 */ 849 rval = -1; 850 goto out; 851 } 852 zlogp = &logsys; /* Log errors to syslog */ 853 } 854 855 switch (zstate) { 856 case ZONE_STATE_CONFIGURED: 857 case ZONE_STATE_INCOMPLETE: 858 /* 859 * Not our area of expertise; we just print a nice message 860 * and die off. 861 */ 862 zerror(zlogp, B_FALSE, 863 "%s operation is invalid for zones in state '%s'", 864 z_cmd_name(cmd), zone_state_str(zstate)); 865 break; 866 867 case ZONE_STATE_INSTALLED: 868 switch (cmd) { 869 case Z_READY: 870 rval = zone_ready(zlogp, B_FALSE); 871 if (rval == 0) 872 eventstream_write(Z_EVT_ZONE_READIED); 873 break; 874 case Z_BOOT: 875 eventstream_write(Z_EVT_ZONE_BOOTING); 876 if ((rval = zone_ready(zlogp, B_FALSE)) == 0) 877 rval = zone_bootup(zlogp, zargp->bootbuf); 878 audit_put_record(zlogp, uc, rval, "boot"); 879 if (rval != 0) { 880 bringup_failure_recovery = B_TRUE; 881 (void) zone_halt(zlogp, B_FALSE); 882 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 883 } 884 break; 885 case Z_HALT: 886 if (kernelcall) /* Invalid; can't happen */ 887 abort(); 888 /* 889 * We could have two clients racing to halt this 890 * zone; the second client loses, but his request 891 * doesn't fail, since the zone is now in the desired 892 * state. 893 */ 894 zerror(zlogp, B_FALSE, "zone is already halted"); 895 rval = 0; 896 break; 897 case Z_REBOOT: 898 if (kernelcall) /* Invalid; can't happen */ 899 abort(); 900 zerror(zlogp, B_FALSE, "%s operation is invalid " 901 "for zones in state '%s'", z_cmd_name(cmd), 902 zone_state_str(zstate)); 903 rval = -1; 904 break; 905 case Z_NOTE_UNINSTALLING: 906 if (kernelcall) /* Invalid; can't happen */ 907 abort(); 908 /* 909 * Tell the console to print out a message about this. 910 * Once it does, we will be in_death_throes. 911 */ 912 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 913 break; 914 case Z_MOUNT: 915 if (kernelcall) /* Invalid; can't happen */ 916 abort(); 917 rval = zone_ready(zlogp, B_TRUE); 918 if (rval == 0) { 919 eventstream_write(Z_EVT_ZONE_READIED); 920 rval = zone_mount_early(zlogp, zone_id); 921 } 922 923 /* 924 * Ordinarily, /dev/fd would be mounted inside the zone 925 * by svc:/system/filesystem/usr:default, but since 926 * we're not booting the zone, we need to do this 927 * manually. 928 */ 929 if (rval == 0) 930 rval = mount_early_fs(zlogp, zone_id, "fd", 931 "/dev/fd", "fd"); 932 break; 933 case Z_UNMOUNT: 934 if (kernelcall) /* Invalid; can't happen */ 935 abort(); 936 zerror(zlogp, B_FALSE, "zone is already unmounted"); 937 rval = 0; 938 break; 939 } 940 break; 941 942 case ZONE_STATE_READY: 943 switch (cmd) { 944 case Z_READY: 945 /* 946 * We could have two clients racing to ready this 947 * zone; the second client loses, but his request 948 * doesn't fail, since the zone is now in the desired 949 * state. 950 */ 951 zerror(zlogp, B_FALSE, "zone is already ready"); 952 rval = 0; 953 break; 954 case Z_BOOT: 955 (void) strlcpy(boot_args, zargp->bootbuf, 956 sizeof (boot_args)); 957 eventstream_write(Z_EVT_ZONE_BOOTING); 958 rval = zone_bootup(zlogp, zargp->bootbuf); 959 audit_put_record(zlogp, uc, rval, "boot"); 960 if (rval != 0) { 961 bringup_failure_recovery = B_TRUE; 962 (void) zone_halt(zlogp, B_FALSE); 963 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 964 } 965 boot_args[0] = '\0'; 966 break; 967 case Z_HALT: 968 if (kernelcall) /* Invalid; can't happen */ 969 abort(); 970 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 971 break; 972 eventstream_write(Z_EVT_ZONE_HALTED); 973 break; 974 case Z_REBOOT: 975 case Z_NOTE_UNINSTALLING: 976 case Z_MOUNT: 977 case Z_UNMOUNT: 978 if (kernelcall) /* Invalid; can't happen */ 979 abort(); 980 zerror(zlogp, B_FALSE, "%s operation is invalid " 981 "for zones in state '%s'", z_cmd_name(cmd), 982 zone_state_str(zstate)); 983 rval = -1; 984 break; 985 } 986 break; 987 988 case ZONE_STATE_MOUNTED: 989 switch (cmd) { 990 case Z_UNMOUNT: 991 if (kernelcall) /* Invalid; can't happen */ 992 abort(); 993 rval = zone_halt(zlogp, B_TRUE); 994 if (rval == 0) { 995 eventstream_write(Z_EVT_ZONE_HALTED); 996 (void) sema_post(&scratch_sem); 997 } 998 break; 999 default: 1000 if (kernelcall) /* Invalid; can't happen */ 1001 abort(); 1002 zerror(zlogp, B_FALSE, "%s operation is invalid " 1003 "for zones in state '%s'", z_cmd_name(cmd), 1004 zone_state_str(zstate)); 1005 rval = -1; 1006 break; 1007 } 1008 break; 1009 1010 case ZONE_STATE_RUNNING: 1011 case ZONE_STATE_SHUTTING_DOWN: 1012 case ZONE_STATE_DOWN: 1013 switch (cmd) { 1014 case Z_READY: 1015 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 1016 break; 1017 if ((rval = zone_ready(zlogp, B_FALSE)) == 0) 1018 eventstream_write(Z_EVT_ZONE_READIED); 1019 else 1020 eventstream_write(Z_EVT_ZONE_HALTED); 1021 break; 1022 case Z_BOOT: 1023 /* 1024 * We could have two clients racing to boot this 1025 * zone; the second client loses, but his request 1026 * doesn't fail, since the zone is now in the desired 1027 * state. 1028 */ 1029 zerror(zlogp, B_FALSE, "zone is already booted"); 1030 rval = 0; 1031 break; 1032 case Z_HALT: 1033 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 1034 break; 1035 eventstream_write(Z_EVT_ZONE_HALTED); 1036 break; 1037 case Z_REBOOT: 1038 (void) strlcpy(boot_args, zargp->bootbuf, 1039 sizeof (boot_args)); 1040 eventstream_write(Z_EVT_ZONE_REBOOTING); 1041 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) { 1042 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1043 boot_args[0] = '\0'; 1044 break; 1045 } 1046 if ((rval = zone_ready(zlogp, B_FALSE)) != 0) { 1047 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1048 boot_args[0] = '\0'; 1049 break; 1050 } 1051 rval = zone_bootup(zlogp, zargp->bootbuf); 1052 audit_put_record(zlogp, uc, rval, "reboot"); 1053 if (rval != 0) { 1054 (void) zone_halt(zlogp, B_FALSE); 1055 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1056 } 1057 boot_args[0] = '\0'; 1058 break; 1059 case Z_NOTE_UNINSTALLING: 1060 case Z_MOUNT: 1061 case Z_UNMOUNT: 1062 zerror(zlogp, B_FALSE, "%s operation is invalid " 1063 "for zones in state '%s'", z_cmd_name(cmd), 1064 zone_state_str(zstate)); 1065 rval = -1; 1066 break; 1067 } 1068 break; 1069 default: 1070 abort(); 1071 } 1072 1073 /* 1074 * Because the state of the zone may have changed, we make sure 1075 * to wake the console poller, which is in charge of initiating 1076 * the shutdown procedure as necessary. 1077 */ 1078 eventstream_write(Z_EVT_NULL); 1079 1080 out: 1081 (void) mutex_unlock(&lock); 1082 if (kernelcall) { 1083 rvalp = NULL; 1084 rlen = 0; 1085 } else { 1086 rvalp->rval = rval; 1087 } 1088 if (uc != NULL) 1089 ucred_free(uc); 1090 (void) door_return((char *)rvalp, rlen, NULL, 0); 1091 thr_exit(NULL); 1092 } 1093 1094 static int 1095 setup_door(zlog_t *zlogp) 1096 { 1097 if ((zone_door = door_create(server, NULL, 1098 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1099 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1100 return (-1); 1101 } 1102 (void) fdetach(zone_door_path); 1103 1104 if (fattach(zone_door, zone_door_path) != 0) { 1105 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1106 (void) door_revoke(zone_door); 1107 (void) fdetach(zone_door_path); 1108 zone_door = -1; 1109 return (-1); 1110 } 1111 return (0); 1112 } 1113 1114 /* 1115 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1116 * is where zoneadmd itself will check to see that another instance of 1117 * zoneadmd isn't already controlling this zone. 1118 * 1119 * The idea here is that we want to open the path to which we will 1120 * attach our door, lock it, and then make sure that no-one has beat us 1121 * to fattach(3c)ing onto it. 1122 * 1123 * fattach(3c) is really a mount, so there are actually two possible 1124 * vnodes we could be dealing with. Our strategy is as follows: 1125 * 1126 * - If the file we opened is a regular file (common case): 1127 * There is no fattach(3c)ed door, so we have a chance of becoming 1128 * the managing zoneadmd. We attempt to lock the file: if it is 1129 * already locked, that means someone else raced us here, so we 1130 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1131 * that beat us to it. 1132 * 1133 * - If the file we opened is a namefs file: 1134 * This means there is already an established door fattach(3c)'ed 1135 * to the rendezvous path. We've lost the race, so we give up. 1136 * Note that in this case we also try to grab the file lock, and 1137 * will succeed in acquiring it since the vnode locked by the 1138 * "winning" zoneadmd was a regular one, and the one we locked was 1139 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1140 * we just return to zoneadm(1m) which knows to retry. 1141 */ 1142 static int 1143 make_daemon_exclusive(zlog_t *zlogp) 1144 { 1145 int doorfd = -1; 1146 int err, ret = -1; 1147 struct stat st; 1148 struct flock flock; 1149 zone_state_t zstate; 1150 1151 top: 1152 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1153 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1154 zonecfg_strerror(err)); 1155 goto out; 1156 } 1157 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1158 S_IREAD|S_IWRITE)) < 0) { 1159 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1160 goto out; 1161 } 1162 if (fstat(doorfd, &st) < 0) { 1163 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1164 goto out; 1165 } 1166 /* 1167 * Lock the file to synchronize with other zoneadmd 1168 */ 1169 flock.l_type = F_WRLCK; 1170 flock.l_whence = SEEK_SET; 1171 flock.l_start = (off_t)0; 1172 flock.l_len = (off_t)0; 1173 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1174 /* 1175 * Someone else raced us here and grabbed the lock file 1176 * first. A warning here is inappropriate since nothing 1177 * went wrong. 1178 */ 1179 goto out; 1180 } 1181 1182 if (strcmp(st.st_fstype, "namefs") == 0) { 1183 struct door_info info; 1184 1185 /* 1186 * There is already something fattach()'ed to this file. 1187 * Lets see what the door is up to. 1188 */ 1189 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1190 /* 1191 * Another zoneadmd process seems to be in 1192 * control of the situation and we don't need to 1193 * be here. A warning here is inappropriate 1194 * since nothing went wrong. 1195 * 1196 * If the door has been revoked, the zoneadmd 1197 * process currently managing the zone is going 1198 * away. We'll return control to zoneadm(1m) 1199 * which will try again (by which time zoneadmd 1200 * will hopefully have exited). 1201 */ 1202 goto out; 1203 } 1204 1205 /* 1206 * If we got this far, there's a fattach(3c)'ed door 1207 * that belongs to a process that has exited, which can 1208 * happen if the previous zoneadmd died unexpectedly. 1209 * 1210 * Let user know that something is amiss, but that we can 1211 * recover; if the zone is in the installed state, then don't 1212 * message, since having a running zoneadmd isn't really 1213 * expected/needed. We want to keep occurences of this message 1214 * limited to times when zoneadmd is picking back up from a 1215 * zoneadmd that died while the zone was in some non-trivial 1216 * state. 1217 */ 1218 if (zstate > ZONE_STATE_INSTALLED) { 1219 zerror(zlogp, B_FALSE, 1220 "zone '%s': WARNING: zone is in state '%s', but " 1221 "zoneadmd does not appear to be available; " 1222 "restarted zoneadmd to recover.", 1223 zone_name, zone_state_str(zstate)); 1224 } 1225 1226 (void) fdetach(zone_door_path); 1227 (void) close(doorfd); 1228 goto top; 1229 } 1230 ret = 0; 1231 out: 1232 (void) close(doorfd); 1233 return (ret); 1234 } 1235 1236 int 1237 main(int argc, char *argv[]) 1238 { 1239 int opt; 1240 zoneid_t zid; 1241 priv_set_t *privset; 1242 zone_state_t zstate; 1243 char parents_locale[MAXPATHLEN]; 1244 int err; 1245 1246 pid_t pid; 1247 sigset_t blockset; 1248 sigset_t block_cld; 1249 1250 struct { 1251 sema_t sem; 1252 int status; 1253 zlog_t log; 1254 } *shstate; 1255 size_t shstatelen = getpagesize(); 1256 1257 zlog_t errlog; 1258 zlog_t *zlogp; 1259 1260 progname = get_execbasename(argv[0]); 1261 1262 /* 1263 * Make sure stderr is unbuffered 1264 */ 1265 (void) setbuffer(stderr, NULL, 0); 1266 1267 /* 1268 * Get out of the way of mounted filesystems, since we will daemonize 1269 * soon. 1270 */ 1271 (void) chdir("/"); 1272 1273 /* 1274 * Use the default system umask per PSARC 1998/110 rather than 1275 * anything that may have been set by the caller. 1276 */ 1277 (void) umask(CMASK); 1278 1279 /* 1280 * Initially we want to use our parent's locale. 1281 */ 1282 (void) setlocale(LC_ALL, ""); 1283 (void) textdomain(TEXT_DOMAIN); 1284 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1285 sizeof (parents_locale)); 1286 1287 /* 1288 * This zlog_t is used for writing to stderr 1289 */ 1290 errlog.logfile = stderr; 1291 errlog.buflen = errlog.loglen = 0; 1292 errlog.buf = errlog.log = NULL; 1293 errlog.locale = parents_locale; 1294 1295 /* 1296 * We start off writing to stderr until we're ready to daemonize. 1297 */ 1298 zlogp = &errlog; 1299 1300 /* 1301 * Process options. 1302 */ 1303 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1304 switch (opt) { 1305 case 'R': 1306 zonecfg_set_root(optarg); 1307 break; 1308 case 'z': 1309 zone_name = optarg; 1310 break; 1311 default: 1312 usage(); 1313 } 1314 } 1315 1316 if (zone_name == NULL) 1317 usage(); 1318 1319 /* 1320 * Because usage() prints directly to stderr, it has gettext() 1321 * wrapping, which depends on the locale. But since zerror() calls 1322 * localize() which tweaks the locale, it is not safe to call zerror() 1323 * until after the last call to usage(). Fortunately, the last call 1324 * to usage() is just above and the first call to zerror() is just 1325 * below. Don't mess this up. 1326 */ 1327 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1328 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1329 GLOBAL_ZONENAME); 1330 return (1); 1331 } 1332 1333 if (zone_get_id(zone_name, &zid) != 0) { 1334 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1335 zonecfg_strerror(Z_NO_ZONE)); 1336 return (1); 1337 } 1338 1339 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1340 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1341 zonecfg_strerror(err)); 1342 return (1); 1343 } 1344 if (zstate < ZONE_STATE_INSTALLED) { 1345 zerror(zlogp, B_FALSE, 1346 "cannot manage a zone which is in state '%s'", 1347 zone_state_str(zstate)); 1348 return (1); 1349 } 1350 1351 /* 1352 * Check that we have all privileges. It would be nice to pare 1353 * this down, but this is at least a first cut. 1354 */ 1355 if ((privset = priv_allocset()) == NULL) { 1356 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1357 return (1); 1358 } 1359 1360 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1361 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1362 priv_freeset(privset); 1363 return (1); 1364 } 1365 1366 if (priv_isfullset(privset) == B_FALSE) { 1367 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1368 "run this command (all privs required)"); 1369 priv_freeset(privset); 1370 return (1); 1371 } 1372 priv_freeset(privset); 1373 1374 if (mkzonedir(zlogp) != 0) 1375 return (1); 1376 1377 /* 1378 * Pre-fork: setup shared state 1379 */ 1380 if ((shstate = (void *)mmap(NULL, shstatelen, 1381 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1382 MAP_FAILED) { 1383 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1384 return (1); 1385 } 1386 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1387 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1388 (void) munmap((char *)shstate, shstatelen); 1389 return (1); 1390 } 1391 shstate->log.logfile = NULL; 1392 shstate->log.buflen = shstatelen - sizeof (*shstate); 1393 shstate->log.loglen = shstate->log.buflen; 1394 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1395 shstate->log.log = shstate->log.buf; 1396 shstate->log.locale = parents_locale; 1397 shstate->status = -1; 1398 1399 /* 1400 * We need a SIGCHLD handler so the sema_wait() below will wake 1401 * up if the child dies without doing a sema_post(). 1402 */ 1403 (void) sigset(SIGCHLD, sigchld); 1404 /* 1405 * We must mask SIGCHLD until after we've coped with the fork 1406 * sufficiently to deal with it; otherwise we can race and 1407 * receive the signal before pid has been initialized 1408 * (yes, this really happens). 1409 */ 1410 (void) sigemptyset(&block_cld); 1411 (void) sigaddset(&block_cld, SIGCHLD); 1412 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1413 1414 /* 1415 * Do not let another thread localize a message while we are forking. 1416 */ 1417 (void) mutex_lock(&msglock); 1418 pid = fork(); 1419 (void) mutex_unlock(&msglock); 1420 if (pid == -1) { 1421 zerror(zlogp, B_TRUE, "could not fork"); 1422 return (1); 1423 1424 } else if (pid > 0) { /* parent */ 1425 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1426 /* 1427 * This marks a window of vulnerability in which we receive 1428 * the SIGCLD before falling into sema_wait (normally we would 1429 * get woken up from sema_wait with EINTR upon receipt of 1430 * SIGCLD). So we may need to use some other scheme like 1431 * sema_posting in the sigcld handler. 1432 * blech 1433 */ 1434 (void) sema_wait(&shstate->sem); 1435 (void) sema_destroy(&shstate->sem); 1436 if (shstate->status != 0) 1437 (void) waitpid(pid, NULL, WNOHANG); 1438 /* 1439 * It's ok if we die with SIGPIPE. It's not like we could have 1440 * done anything about it. 1441 */ 1442 (void) fprintf(stderr, "%s", shstate->log.buf); 1443 _exit(shstate->status == 0 ? 0 : 1); 1444 } 1445 1446 /* 1447 * The child charges on. 1448 */ 1449 (void) sigset(SIGCHLD, SIG_DFL); 1450 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1451 1452 /* 1453 * SIGPIPE can be delivered if we write to a socket for which the 1454 * peer endpoint is gone. That can lead to too-early termination 1455 * of zoneadmd, and that's not good eats. 1456 */ 1457 (void) sigset(SIGPIPE, SIG_IGN); 1458 /* 1459 * Stop using stderr 1460 */ 1461 zlogp = &shstate->log; 1462 1463 /* 1464 * We don't need stdout/stderr from now on. 1465 */ 1466 closefrom(0); 1467 1468 /* 1469 * Initialize the syslog zlog_t. This needs to be done after 1470 * the call to closefrom(). 1471 */ 1472 logsys.buf = logsys.log = NULL; 1473 logsys.buflen = logsys.loglen = 0; 1474 logsys.logfile = NULL; 1475 logsys.locale = DEFAULT_LOCALE; 1476 1477 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1478 1479 /* 1480 * The eventstream is used to publish state changes in the zone 1481 * from the door threads to the console I/O poller. 1482 */ 1483 if (eventstream_init() == -1) { 1484 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1485 goto child_out; 1486 } 1487 1488 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1489 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 1490 1491 /* 1492 * See if another zoneadmd is running for this zone. If not, then we 1493 * can now modify system state. 1494 */ 1495 if (make_daemon_exclusive(zlogp) == -1) 1496 goto child_out; 1497 1498 1499 /* 1500 * Create/join a new session; we need to be careful of what we do with 1501 * the console from now on so we don't end up being the session leader 1502 * for the terminal we're going to be handing out. 1503 */ 1504 (void) setsid(); 1505 1506 /* 1507 * This thread shouldn't be receiving any signals; in particular, 1508 * SIGCHLD should be received by the thread doing the fork(). 1509 */ 1510 (void) sigfillset(&blockset); 1511 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1512 1513 /* 1514 * Setup the console device and get ready to serve the console; 1515 * once this has completed, we're ready to let console clients 1516 * make an attempt to connect (they will block until 1517 * serve_console_sock() below gets called, and any pending 1518 * connection is accept()ed). 1519 */ 1520 if (!zonecfg_in_alt_root() && init_console(zlogp) == -1) 1521 goto child_out; 1522 1523 /* 1524 * Take the lock now, so that when the door server gets going, we 1525 * are guaranteed that it won't take a request until we are sure 1526 * that everything is completely set up. See the child_out: label 1527 * below to see why this matters. 1528 */ 1529 (void) mutex_lock(&lock); 1530 1531 /* Init semaphore for scratch zones. */ 1532 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 1533 zerror(zlogp, B_TRUE, 1534 "failed to initialize semaphore for scratch zone"); 1535 goto child_out; 1536 } 1537 1538 /* 1539 * Note: door setup must occur *after* the console is setup. 1540 * This is so that as zlogin tests the door to see if zoneadmd 1541 * is ready yet, we know that the console will get serviced 1542 * once door_info() indicates that the door is "up". 1543 */ 1544 if (setup_door(zlogp) == -1) 1545 goto child_out; 1546 1547 /* 1548 * Things seem OK so far; tell the parent process that we're done 1549 * with setup tasks. This will cause the parent to exit, signalling 1550 * to zoneadm, zlogin, or whatever forked it that we are ready to 1551 * service requests. 1552 */ 1553 shstate->status = 0; 1554 (void) sema_post(&shstate->sem); 1555 (void) munmap((char *)shstate, shstatelen); 1556 shstate = NULL; 1557 1558 (void) mutex_unlock(&lock); 1559 1560 /* 1561 * zlogp is now invalid, so reset it to the syslog logger. 1562 */ 1563 zlogp = &logsys; 1564 1565 /* 1566 * Now that we are free of any parents, switch to the default locale. 1567 */ 1568 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 1569 1570 /* 1571 * At this point the setup portion of main() is basically done, so 1572 * we reuse this thread to manage the zone console. When 1573 * serve_console() has returned, we are past the point of no return 1574 * in the life of this zoneadmd. 1575 */ 1576 if (zonecfg_in_alt_root()) { 1577 /* 1578 * This is just awful, but mounted scratch zones don't (and 1579 * can't) have consoles. We just wait for unmount instead. 1580 */ 1581 while (sema_wait(&scratch_sem) == EINTR) 1582 ; 1583 } else { 1584 serve_console(zlogp); 1585 assert(in_death_throes); 1586 } 1587 1588 /* 1589 * This is the next-to-last part of the exit interlock. Upon calling 1590 * fdetach(), the door will go unreferenced; once any 1591 * outstanding requests (like the door thread doing Z_HALT) are 1592 * done, the door will get an UNREF notification; when it handles 1593 * the UNREF, the door server will cause the exit. 1594 */ 1595 assert(!MUTEX_HELD(&lock)); 1596 (void) fdetach(zone_door_path); 1597 for (;;) 1598 (void) pause(); 1599 1600 child_out: 1601 assert(pid == 0); 1602 if (shstate != NULL) { 1603 shstate->status = -1; 1604 (void) sema_post(&shstate->sem); 1605 (void) munmap((char *)shstate, shstatelen); 1606 } 1607 1608 /* 1609 * This might trigger an unref notification, but if so, 1610 * we are still holding the lock, so our call to exit will 1611 * ultimately win the race and will publish the right exit 1612 * code. 1613 */ 1614 if (zone_door != -1) { 1615 assert(MUTEX_HELD(&lock)); 1616 (void) door_revoke(zone_door); 1617 (void) fdetach(zone_door_path); 1618 } 1619 return (1); /* return from main() forcibly exits an MT process */ 1620 } 1621