1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * zoneadmd manages zones; one zoneadmd process is launched for each 31 * non-global zone on the system. This daemon juggles four jobs: 32 * 33 * - Implement setup and teardown of the zone "virtual platform": mount and 34 * unmount filesystems; create and destroy network interfaces; communicate 35 * with devfsadmd to lay out devices for the zone; instantiate the zone 36 * console device; configure process runtime attributes such as resource 37 * controls, pool bindings, fine-grained privileges. 38 * 39 * - Launch the zone's init(1M) process. 40 * 41 * - Implement a door server; clients (like zoneadm) connect to the door 42 * server and request zone state changes. The kernel is also a client of 43 * this door server. A request to halt or reboot the zone which originates 44 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 45 * 46 * One minor problem is that messages emitted by zoneadmd need to be passed 47 * back to the zoneadm process making the request. These messages need to 48 * be rendered in the client's locale; so, this is passed in as part of the 49 * request. The exception is the kernel upcall to zoneadmd, in which case 50 * messages are syslog'd. 51 * 52 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 53 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 54 * strings which do not need to be translated. 55 * 56 * - Act as a console server for zlogin -C processes; see comments in zcons.c 57 * for more information about the zone console architecture. 58 * 59 * DESIGN NOTES 60 * 61 * Restart: 62 * A chief design constraint of zoneadmd is that it should be restartable in 63 * the case that the administrator kills it off, or it suffers a fatal error, 64 * without the running zone being impacted; this is akin to being able to 65 * reboot the service processor of a server without affecting the OS instance. 66 */ 67 68 #include <sys/param.h> 69 #include <sys/mman.h> 70 #include <sys/types.h> 71 #include <sys/stat.h> 72 #include <sys/sysmacros.h> 73 74 #include <bsm/adt.h> 75 #include <bsm/adt_event.h> 76 77 #include <alloca.h> 78 #include <assert.h> 79 #include <errno.h> 80 #include <door.h> 81 #include <fcntl.h> 82 #include <locale.h> 83 #include <signal.h> 84 #include <stdarg.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <strings.h> 89 #include <synch.h> 90 #include <syslog.h> 91 #include <thread.h> 92 #include <unistd.h> 93 #include <wait.h> 94 #include <limits.h> 95 #include <zone.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <sys/contract/process.h> 99 #include <sys/ctfs.h> 100 #include <sys/objfs.h> 101 102 #include <libzonecfg.h> 103 #include "zoneadmd.h" 104 105 static char *progname; 106 char *zone_name; /* zone which we are managing */ 107 static zoneid_t zone_id; 108 109 static zlog_t logsys; 110 111 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 112 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 113 114 static sema_t scratch_sem; /* for scratch zones */ 115 116 static char zone_door_path[MAXPATHLEN]; 117 static int zone_door = -1; 118 119 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 120 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 121 122 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 123 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 124 #endif 125 126 #define PATH_TO_INIT "/sbin/init" 127 128 #define DEFAULT_LOCALE "C" 129 130 static const char * 131 z_cmd_name(zone_cmd_t zcmd) 132 { 133 /* This list needs to match the enum in sys/zone.h */ 134 static const char *zcmdstr[] = { 135 "ready", "boot", "reboot", "halt", "note_uninstalling", 136 "mount", "unmount" 137 }; 138 139 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 140 return ("unknown"); 141 else 142 return (zcmdstr[(int)zcmd]); 143 } 144 145 static char * 146 get_execbasename(char *execfullname) 147 { 148 char *last_slash, *execbasename; 149 150 /* guard against '/' at end of command invocation */ 151 for (;;) { 152 last_slash = strrchr(execfullname, '/'); 153 if (last_slash == NULL) { 154 execbasename = execfullname; 155 break; 156 } else { 157 execbasename = last_slash + 1; 158 if (*execbasename == '\0') { 159 *last_slash = '\0'; 160 continue; 161 } 162 break; 163 } 164 } 165 return (execbasename); 166 } 167 168 static void 169 usage(void) 170 { 171 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 172 (void) fprintf(stderr, 173 gettext("\tNote: %s should not be run directly.\n"), progname); 174 exit(2); 175 } 176 177 /* ARGSUSED */ 178 static void 179 sigchld(int sig) 180 { 181 } 182 183 char * 184 localize_msg(char *locale, const char *msg) 185 { 186 char *out; 187 188 (void) mutex_lock(&msglock); 189 (void) setlocale(LC_MESSAGES, locale); 190 out = gettext(msg); 191 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 192 (void) mutex_unlock(&msglock); 193 return (out); 194 } 195 196 /* PRINTFLIKE3 */ 197 void 198 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 199 { 200 va_list alist; 201 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 202 char *bp; 203 int saved_errno = errno; 204 205 if (zlogp == NULL) 206 return; 207 if (zlogp == &logsys) 208 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 209 zone_name); 210 else 211 buf[0] = '\0'; 212 bp = &(buf[strlen(buf)]); 213 214 /* 215 * In theory, the locale pointer should be set to either "C" or a 216 * char array, so it should never be NULL 217 */ 218 assert(zlogp->locale != NULL); 219 /* Locale is per process, but we are multi-threaded... */ 220 fmt = localize_msg(zlogp->locale, fmt); 221 222 va_start(alist, fmt); 223 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 224 va_end(alist); 225 bp = &(buf[strlen(buf)]); 226 if (use_strerror) 227 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 228 strerror(saved_errno)); 229 if (zlogp == &logsys) { 230 (void) syslog(LOG_ERR, "%s", buf); 231 } else if (zlogp->logfile != NULL) { 232 (void) fprintf(zlogp->logfile, "%s\n", buf); 233 } else { 234 size_t buflen; 235 size_t copylen; 236 237 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 238 copylen = MIN(buflen, zlogp->loglen); 239 zlogp->log += copylen; 240 zlogp->loglen -= copylen; 241 } 242 } 243 244 /* 245 * Emit a warning for any boot arguments which are unrecognized. Since 246 * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we 247 * put the arguments into an argv style array, use getopt to process them, 248 * and put the resultant argument string back into outargs. 249 * 250 * During the filtering, we pull out any arguments which are truly "boot" 251 * arguments, leaving only those which are to be passed intact to the 252 * progenitor process. The one we support at the moment is -i, which 253 * indicates to the kernel which program should be launched as 'init'. 254 * 255 * A return of Z_INVAL indicates specifically that the arguments are 256 * not valid; this is a non-fatal error. Except for Z_OK, all other return 257 * values are treated as fatal. 258 */ 259 static int 260 filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, 261 char *init_file, char *badarg) 262 { 263 int argc = 0, argc_save; 264 int i; 265 int err; 266 char *arg, *lasts, **argv = NULL, **argv_save; 267 char zonecfg_args[BOOTARGS_MAX]; 268 char scratchargs[BOOTARGS_MAX], *sargs; 269 char c; 270 271 bzero(outargs, BOOTARGS_MAX); 272 bzero(badarg, BOOTARGS_MAX); 273 274 (void) strlcpy(init_file, PATH_TO_INIT, MAXPATHLEN); 275 276 /* 277 * If the user didn't specify transient boot arguments, check 278 * to see if there were any specified in the zone configuration, 279 * and use them if applicable. 280 */ 281 if (inargs == NULL || inargs[0] == '\0') { 282 zone_dochandle_t handle; 283 if ((handle = zonecfg_init_handle()) == NULL) { 284 zerror(zlogp, B_TRUE, 285 "getting zone configuration handle"); 286 return (Z_BAD_HANDLE); 287 } 288 err = zonecfg_get_snapshot_handle(zone_name, handle); 289 if (err != Z_OK) { 290 zerror(zlogp, B_FALSE, 291 "invalid configuration snapshot"); 292 zonecfg_fini_handle(handle); 293 return (Z_BAD_HANDLE); 294 } 295 296 bzero(zonecfg_args, sizeof (zonecfg_args)); 297 (void) zonecfg_get_bootargs(handle, zonecfg_args, 298 sizeof (zonecfg_args)); 299 inargs = zonecfg_args; 300 zonecfg_fini_handle(handle); 301 } 302 303 if (strlen(inargs) >= BOOTARGS_MAX) { 304 zerror(zlogp, B_FALSE, "boot argument string too long"); 305 return (Z_INVAL); 306 } 307 308 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 309 sargs = scratchargs; 310 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 311 sargs = NULL; 312 argc++; 313 } 314 315 if ((argv = calloc(argc + 1, sizeof (char *))) == NULL) { 316 zerror(zlogp, B_FALSE, "memory allocation failed"); 317 return (Z_NOMEM); 318 } 319 320 argv_save = argv; 321 argc_save = argc; 322 323 (void) strlcpy(scratchargs, inargs, sizeof (scratchargs)); 324 sargs = scratchargs; 325 i = 0; 326 while ((arg = strtok_r(sargs, " \t", &lasts)) != NULL) { 327 sargs = NULL; 328 if ((argv[i] = strdup(arg)) == NULL) { 329 err = Z_NOMEM; 330 zerror(zlogp, B_FALSE, "memory allocation failed"); 331 goto done; 332 } 333 i++; 334 } 335 336 /* 337 * We preserve compatibility with the Solaris system boot behavior, 338 * which allows: 339 * 340 * # reboot kernel/unix -s -m verbose 341 * 342 * In this example, kernel/unix tells the booter what file to 343 * boot. We don't want reboot in a zone to be gratuitously different, 344 * so we silently ignore the boot file, if necessary. 345 */ 346 if (argv[0] == NULL) 347 goto done; 348 349 assert(argv[0][0] != ' '); 350 assert(argv[0][0] != '\t'); 351 352 if (argv[0][0] != '-' && argv[0][0] != '\0') { 353 argv = &argv[1]; 354 argc--; 355 } 356 357 optind = 0; 358 opterr = 0; 359 err = Z_OK; 360 while ((c = getopt(argc, argv, "i:m:s")) != -1) { 361 switch (c) { 362 case 'i': 363 /* 364 * -i is handled by the runtime and is not passed 365 * along to userland 366 */ 367 (void) strlcpy(init_file, optarg, MAXPATHLEN); 368 break; 369 case 'm': 370 case 's': 371 /* These pass through unmolested */ 372 (void) snprintf(outargs, BOOTARGS_MAX, 373 "%s -%c %s ", outargs, c, optarg ? optarg : ""); 374 break; 375 case '?': 376 /* 377 * We warn about unknown arguments but pass them 378 * along anyway-- if someone wants to develop their 379 * own init replacement, they can pass it whatever 380 * args they want. 381 */ 382 err = Z_INVAL; 383 (void) snprintf(outargs, BOOTARGS_MAX, 384 "%s -%c", outargs, optopt); 385 (void) snprintf(badarg, BOOTARGS_MAX, 386 "%s -%c", badarg, optopt); 387 break; 388 } 389 } 390 391 /* 392 * For Solaris Zones we warn about and discard non-option arguments. 393 * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar 394 * to the kernel, we concat up all the other remaining boot args. 395 * and warn on them as a group. 396 */ 397 if (optind < argc) { 398 err = Z_INVAL; 399 while (optind < argc) { 400 (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", 401 badarg, strlen(badarg) > 0 ? " " : "", 402 argv[optind]); 403 optind++; 404 } 405 zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " 406 "arguments `%s'.", badarg); 407 } 408 409 done: 410 for (i = 0; i < argc_save; i++) { 411 if (argv_save[i] != NULL) 412 free(argv_save[i]); 413 } 414 free(argv_save); 415 return (err); 416 } 417 418 419 static int 420 mkzonedir(zlog_t *zlogp) 421 { 422 struct stat st; 423 /* 424 * We must create and lock everyone but root out of ZONES_TMPDIR 425 * since anyone can open any UNIX domain socket, regardless of 426 * its file system permissions. Sigh... 427 */ 428 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 429 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 430 return (-1); 431 } 432 /* paranoia */ 433 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 434 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 435 return (-1); 436 } 437 (void) chmod(ZONES_TMPDIR, S_IRWXU); 438 return (0); 439 } 440 441 /* 442 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 443 * 'true' if this is being invoked as part of the processing for the "mount" 444 * subcommand. 445 */ 446 static int 447 zone_ready(zlog_t *zlogp, boolean_t mount_cmd) 448 { 449 int err; 450 451 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 452 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 453 zonecfg_strerror(err)); 454 return (-1); 455 } 456 457 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 458 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 459 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 460 zonecfg_strerror(err)); 461 return (-1); 462 } 463 if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { 464 bringup_failure_recovery = B_TRUE; 465 (void) vplat_teardown(NULL, mount_cmd); 466 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 467 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 468 zonecfg_strerror(err)); 469 return (-1); 470 } 471 472 return (0); 473 } 474 475 int 476 init_template(void) 477 { 478 int fd; 479 int err = 0; 480 481 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 482 if (fd == -1) 483 return (-1); 484 485 /* 486 * For now, zoneadmd doesn't do anything with the contract. 487 * Deliver no events, don't inherit, and allow it to be orphaned. 488 */ 489 err |= ct_tmpl_set_critical(fd, 0); 490 err |= ct_tmpl_set_informative(fd, 0); 491 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 492 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 493 if (err || ct_tmpl_activate(fd)) { 494 (void) close(fd); 495 return (-1); 496 } 497 498 return (fd); 499 } 500 501 static int 502 mount_early_fs(zlog_t *zlogp, zoneid_t zoneid, const char *spec, 503 const char *dir, char *fstype) 504 { 505 pid_t child; 506 int child_status; 507 int tmpl_fd; 508 ctid_t ct; 509 510 if ((tmpl_fd = init_template()) == -1) { 511 zerror(zlogp, B_TRUE, "failed to create contract"); 512 return (-1); 513 } 514 515 if ((child = fork()) == -1) { 516 (void) ct_tmpl_clear(tmpl_fd); 517 (void) close(tmpl_fd); 518 zerror(zlogp, B_TRUE, "failed to fork"); 519 return (-1); 520 521 } else if (child == 0) { /* child */ 522 (void) ct_tmpl_clear(tmpl_fd); 523 /* 524 * Even though there are no procs running in the zone, we 525 * do this for paranoia's sake. 526 */ 527 (void) closefrom(0); 528 529 if (zone_enter(zoneid) == -1) { 530 _exit(errno); 531 } 532 if (mount(spec, dir, MS_DATA, fstype, NULL, 0, NULL, 0) != 0) 533 _exit(errno); 534 _exit(0); 535 } 536 537 /* parent */ 538 if (contract_latest(&ct) == -1) 539 ct = -1; 540 (void) ct_tmpl_clear(tmpl_fd); 541 (void) close(tmpl_fd); 542 if (waitpid(child, &child_status, 0) != child) { 543 /* unexpected: we must have been signalled */ 544 (void) contract_abandon_id(ct); 545 return (-1); 546 } 547 (void) contract_abandon_id(ct); 548 if (WEXITSTATUS(child_status) != 0) { 549 errno = WEXITSTATUS(child_status); 550 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 551 return (-1); 552 } 553 554 return (0); 555 } 556 557 static int 558 zone_mount_early(zlog_t *zlogp, zoneid_t zoneid) 559 { 560 if (mount_early_fs(zlogp, zoneid, "/proc", "/proc", "proc") != 0) 561 return (-1); 562 563 if (mount_early_fs(zlogp, zoneid, "ctfs", CTFS_ROOT, "ctfs") != 0) 564 return (-1); 565 566 if (mount_early_fs(zlogp, zoneid, "objfs", OBJFS_ROOT, "objfs") != 0) 567 return (-1); 568 569 if (mount_early_fs(zlogp, zoneid, "swap", "/etc/svc/volatile", 570 "tmpfs") != 0) 571 return (-1); 572 573 if (mount_early_fs(zlogp, zoneid, "mnttab", "/etc/mnttab", 574 "mntfs") != 0) 575 return (-1); 576 577 return (0); 578 } 579 580 static int 581 zone_bootup(zlog_t *zlogp, const char *bootargs) 582 { 583 zoneid_t zoneid; 584 struct stat st; 585 char zroot[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; 586 char nbootargs[BOOTARGS_MAX]; 587 int err; 588 589 if (init_console_slave(zlogp) != 0) 590 return (-1); 591 reset_slave_terminal(zlogp); 592 593 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 594 zerror(zlogp, B_TRUE, "unable to get zoneid"); 595 return (-1); 596 } 597 598 if (zone_mount_early(zlogp, zoneid) != 0) 599 return (-1); 600 601 err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, 602 bad_boot_arg); 603 if (err == Z_INVAL) 604 eventstream_write(Z_EVT_ZONE_BADARGS); 605 else if (err != Z_OK) 606 return (-1); 607 608 assert(init_file[0] != '\0'); 609 610 /* 611 * Try to anticipate possible problems: Make sure whatever binary 612 * is supposed to be init is executable. 613 */ 614 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 615 zerror(zlogp, B_FALSE, "unable to determine zone root"); 616 return (-1); 617 } 618 (void) snprintf(initpath, sizeof (initpath), "%s%s", zroot, init_file); 619 620 if (stat(initpath, &st) == -1) { 621 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 622 return (-1); 623 } 624 625 if ((st.st_mode & S_IXUSR) == 0) { 626 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 627 return (-1); 628 } 629 630 if (zone_setattr(zoneid, ZONE_ATTR_INITNAME, init_file, 0) == -1) { 631 zerror(zlogp, B_TRUE, "could not set zone boot file"); 632 return (-1); 633 } 634 635 if (zone_setattr(zoneid, ZONE_ATTR_BOOTARGS, nbootargs, 0) == -1) { 636 zerror(zlogp, B_TRUE, "could not set zone boot arguments"); 637 return (-1); 638 } 639 640 if (zone_boot(zoneid) == -1) { 641 zerror(zlogp, B_TRUE, "unable to boot zone"); 642 return (-1); 643 } 644 645 return (0); 646 } 647 648 static int 649 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd) 650 { 651 int err; 652 653 if (vplat_teardown(zlogp, unmount_cmd) != 0) { 654 if (!bringup_failure_recovery) 655 zerror(zlogp, B_FALSE, "unable to destroy zone"); 656 return (-1); 657 } 658 659 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 660 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 661 zonecfg_strerror(err)); 662 663 return (0); 664 } 665 666 /* 667 * Generate AUE_zone_state for a command that boots a zone. 668 */ 669 static void 670 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 671 char *new_state) 672 { 673 adt_session_data_t *ah; 674 adt_event_data_t *event; 675 int pass_fail, fail_reason; 676 677 if (!adt_audit_enabled()) 678 return; 679 680 if (return_val == 0) { 681 pass_fail = ADT_SUCCESS; 682 fail_reason = ADT_SUCCESS; 683 } else { 684 pass_fail = ADT_FAILURE; 685 fail_reason = ADT_FAIL_VALUE_PROGRAM; 686 } 687 688 if (adt_start_session(&ah, NULL, 0)) { 689 zerror(zlogp, B_TRUE, gettext("audit failure.")); 690 return; 691 } 692 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 693 zerror(zlogp, B_TRUE, gettext("audit failure.")); 694 (void) adt_end_session(ah); 695 return; 696 } 697 698 event = adt_alloc_event(ah, ADT_zone_state); 699 if (event == NULL) { 700 zerror(zlogp, B_TRUE, gettext("audit failure.")); 701 (void) adt_end_session(ah); 702 return; 703 } 704 event->adt_zone_state.zonename = zone_name; 705 event->adt_zone_state.new_state = new_state; 706 707 if (adt_put_event(event, pass_fail, fail_reason)) 708 zerror(zlogp, B_TRUE, gettext("audit failure.")); 709 710 adt_free_event(event); 711 712 (void) adt_end_session(ah); 713 } 714 715 /* 716 * The main routine for the door server that deals with zone state transitions. 717 */ 718 /* ARGSUSED */ 719 static void 720 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 721 uint_t n_desc) 722 { 723 ucred_t *uc = NULL; 724 const priv_set_t *eset; 725 726 zone_state_t zstate; 727 zone_cmd_t cmd; 728 zone_cmd_arg_t *zargp; 729 730 boolean_t kernelcall; 731 732 int rval = -1; 733 uint64_t uniqid; 734 zoneid_t zoneid = -1; 735 zlog_t zlog; 736 zlog_t *zlogp; 737 zone_cmd_rval_t *rvalp; 738 size_t rlen = getpagesize(); /* conservative */ 739 740 /* LINTED E_BAD_PTR_CAST_ALIGN */ 741 zargp = (zone_cmd_arg_t *)args; 742 743 /* 744 * When we get the door unref message, we've fdetach'd the door, and 745 * it is time for us to shut down zoneadmd. 746 */ 747 if (zargp == DOOR_UNREF_DATA) { 748 /* 749 * See comment at end of main() for info on the last rites. 750 */ 751 exit(0); 752 } 753 754 if (zargp == NULL) { 755 (void) door_return(NULL, 0, 0, 0); 756 } 757 758 rvalp = alloca(rlen); 759 bzero(rvalp, rlen); 760 zlog.logfile = NULL; 761 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 762 zlog.buf = rvalp->errbuf; 763 zlog.log = zlog.buf; 764 /* defer initialization of zlog.locale until after credential check */ 765 zlogp = &zlog; 766 767 if (alen != sizeof (zone_cmd_arg_t)) { 768 /* 769 * This really shouldn't be happening. 770 */ 771 zerror(&logsys, B_FALSE, "argument size (%d bytes) " 772 "unexpected (expected %d bytes)", alen, 773 sizeof (zone_cmd_arg_t)); 774 goto out; 775 } 776 cmd = zargp->cmd; 777 778 if (door_ucred(&uc) != 0) { 779 zerror(&logsys, B_TRUE, "door_ucred"); 780 goto out; 781 } 782 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 783 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 784 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 785 ucred_geteuid(uc) != 0)) { 786 zerror(&logsys, B_FALSE, "insufficient privileges"); 787 goto out; 788 } 789 790 kernelcall = ucred_getpid(uc) == 0; 791 792 /* 793 * This is safe because we only use a zlog_t throughout the 794 * duration of a door call; i.e., by the time the pointer 795 * might become invalid, the door call would be over. 796 */ 797 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 798 799 (void) mutex_lock(&lock); 800 801 /* 802 * Once we start to really die off, we don't want more connections. 803 */ 804 if (in_death_throes) { 805 (void) mutex_unlock(&lock); 806 ucred_free(uc); 807 (void) door_return(NULL, 0, 0, 0); 808 thr_exit(NULL); 809 } 810 811 /* 812 * Check for validity of command. 813 */ 814 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_REBOOT && 815 cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT && 816 cmd != Z_UNMOUNT) { 817 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 818 goto out; 819 } 820 821 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 822 /* 823 * Can't happen 824 */ 825 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 826 cmd); 827 goto out; 828 } 829 /* 830 * We ignore the possibility of someone calling zone_create(2) 831 * explicitly; all requests must come through zoneadmd. 832 */ 833 if (zone_get_state(zone_name, &zstate) != Z_OK) { 834 /* 835 * Something terribly wrong happened 836 */ 837 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 838 goto out; 839 } 840 841 if (kernelcall) { 842 /* 843 * Kernel-initiated requests may lose their validity if the 844 * zone_t the kernel was referring to has gone away. 845 */ 846 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 847 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 848 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 849 /* 850 * We're not talking about the same zone. The request 851 * must have arrived too late. Return error. 852 */ 853 rval = -1; 854 goto out; 855 } 856 zlogp = &logsys; /* Log errors to syslog */ 857 } 858 859 switch (zstate) { 860 case ZONE_STATE_CONFIGURED: 861 case ZONE_STATE_INCOMPLETE: 862 /* 863 * Not our area of expertise; we just print a nice message 864 * and die off. 865 */ 866 zerror(zlogp, B_FALSE, 867 "%s operation is invalid for zones in state '%s'", 868 z_cmd_name(cmd), zone_state_str(zstate)); 869 break; 870 871 case ZONE_STATE_INSTALLED: 872 switch (cmd) { 873 case Z_READY: 874 rval = zone_ready(zlogp, B_FALSE); 875 if (rval == 0) 876 eventstream_write(Z_EVT_ZONE_READIED); 877 break; 878 case Z_BOOT: 879 eventstream_write(Z_EVT_ZONE_BOOTING); 880 if ((rval = zone_ready(zlogp, B_FALSE)) == 0) 881 rval = zone_bootup(zlogp, zargp->bootbuf); 882 audit_put_record(zlogp, uc, rval, "boot"); 883 if (rval != 0) { 884 bringup_failure_recovery = B_TRUE; 885 (void) zone_halt(zlogp, B_FALSE); 886 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 887 } 888 break; 889 case Z_HALT: 890 if (kernelcall) /* Invalid; can't happen */ 891 abort(); 892 /* 893 * We could have two clients racing to halt this 894 * zone; the second client loses, but his request 895 * doesn't fail, since the zone is now in the desired 896 * state. 897 */ 898 zerror(zlogp, B_FALSE, "zone is already halted"); 899 rval = 0; 900 break; 901 case Z_REBOOT: 902 if (kernelcall) /* Invalid; can't happen */ 903 abort(); 904 zerror(zlogp, B_FALSE, "%s operation is invalid " 905 "for zones in state '%s'", z_cmd_name(cmd), 906 zone_state_str(zstate)); 907 rval = -1; 908 break; 909 case Z_NOTE_UNINSTALLING: 910 if (kernelcall) /* Invalid; can't happen */ 911 abort(); 912 /* 913 * Tell the console to print out a message about this. 914 * Once it does, we will be in_death_throes. 915 */ 916 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 917 break; 918 case Z_MOUNT: 919 if (kernelcall) /* Invalid; can't happen */ 920 abort(); 921 rval = zone_ready(zlogp, B_TRUE); 922 if (rval == 0) { 923 eventstream_write(Z_EVT_ZONE_READIED); 924 rval = zone_mount_early(zlogp, zone_id); 925 } 926 927 /* 928 * Ordinarily, /dev/fd would be mounted inside the zone 929 * by svc:/system/filesystem/usr:default, but since 930 * we're not booting the zone, we need to do this 931 * manually. 932 */ 933 if (rval == 0) 934 rval = mount_early_fs(zlogp, zone_id, "fd", 935 "/dev/fd", "fd"); 936 break; 937 case Z_UNMOUNT: 938 if (kernelcall) /* Invalid; can't happen */ 939 abort(); 940 zerror(zlogp, B_FALSE, "zone is already unmounted"); 941 rval = 0; 942 break; 943 } 944 break; 945 946 case ZONE_STATE_READY: 947 switch (cmd) { 948 case Z_READY: 949 /* 950 * We could have two clients racing to ready this 951 * zone; the second client loses, but his request 952 * doesn't fail, since the zone is now in the desired 953 * state. 954 */ 955 zerror(zlogp, B_FALSE, "zone is already ready"); 956 rval = 0; 957 break; 958 case Z_BOOT: 959 (void) strlcpy(boot_args, zargp->bootbuf, 960 sizeof (boot_args)); 961 eventstream_write(Z_EVT_ZONE_BOOTING); 962 rval = zone_bootup(zlogp, zargp->bootbuf); 963 audit_put_record(zlogp, uc, rval, "boot"); 964 if (rval != 0) { 965 bringup_failure_recovery = B_TRUE; 966 (void) zone_halt(zlogp, B_FALSE); 967 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 968 } 969 boot_args[0] = '\0'; 970 break; 971 case Z_HALT: 972 if (kernelcall) /* Invalid; can't happen */ 973 abort(); 974 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 975 break; 976 eventstream_write(Z_EVT_ZONE_HALTED); 977 break; 978 case Z_REBOOT: 979 case Z_NOTE_UNINSTALLING: 980 case Z_MOUNT: 981 case Z_UNMOUNT: 982 if (kernelcall) /* Invalid; can't happen */ 983 abort(); 984 zerror(zlogp, B_FALSE, "%s operation is invalid " 985 "for zones in state '%s'", z_cmd_name(cmd), 986 zone_state_str(zstate)); 987 rval = -1; 988 break; 989 } 990 break; 991 992 case ZONE_STATE_MOUNTED: 993 switch (cmd) { 994 case Z_UNMOUNT: 995 if (kernelcall) /* Invalid; can't happen */ 996 abort(); 997 rval = zone_halt(zlogp, B_TRUE); 998 if (rval == 0) { 999 eventstream_write(Z_EVT_ZONE_HALTED); 1000 (void) sema_post(&scratch_sem); 1001 } 1002 break; 1003 default: 1004 if (kernelcall) /* Invalid; can't happen */ 1005 abort(); 1006 zerror(zlogp, B_FALSE, "%s operation is invalid " 1007 "for zones in state '%s'", z_cmd_name(cmd), 1008 zone_state_str(zstate)); 1009 rval = -1; 1010 break; 1011 } 1012 break; 1013 1014 case ZONE_STATE_RUNNING: 1015 case ZONE_STATE_SHUTTING_DOWN: 1016 case ZONE_STATE_DOWN: 1017 switch (cmd) { 1018 case Z_READY: 1019 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 1020 break; 1021 if ((rval = zone_ready(zlogp, B_FALSE)) == 0) 1022 eventstream_write(Z_EVT_ZONE_READIED); 1023 else 1024 eventstream_write(Z_EVT_ZONE_HALTED); 1025 break; 1026 case Z_BOOT: 1027 /* 1028 * We could have two clients racing to boot this 1029 * zone; the second client loses, but his request 1030 * doesn't fail, since the zone is now in the desired 1031 * state. 1032 */ 1033 zerror(zlogp, B_FALSE, "zone is already booted"); 1034 rval = 0; 1035 break; 1036 case Z_HALT: 1037 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 1038 break; 1039 eventstream_write(Z_EVT_ZONE_HALTED); 1040 break; 1041 case Z_REBOOT: 1042 (void) strlcpy(boot_args, zargp->bootbuf, 1043 sizeof (boot_args)); 1044 eventstream_write(Z_EVT_ZONE_REBOOTING); 1045 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) { 1046 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1047 boot_args[0] = '\0'; 1048 break; 1049 } 1050 if ((rval = zone_ready(zlogp, B_FALSE)) != 0) { 1051 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1052 boot_args[0] = '\0'; 1053 break; 1054 } 1055 rval = zone_bootup(zlogp, zargp->bootbuf); 1056 audit_put_record(zlogp, uc, rval, "reboot"); 1057 if (rval != 0) { 1058 (void) zone_halt(zlogp, B_FALSE); 1059 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 1060 } 1061 boot_args[0] = '\0'; 1062 break; 1063 case Z_NOTE_UNINSTALLING: 1064 case Z_MOUNT: 1065 case Z_UNMOUNT: 1066 zerror(zlogp, B_FALSE, "%s operation is invalid " 1067 "for zones in state '%s'", z_cmd_name(cmd), 1068 zone_state_str(zstate)); 1069 rval = -1; 1070 break; 1071 } 1072 break; 1073 default: 1074 abort(); 1075 } 1076 1077 /* 1078 * Because the state of the zone may have changed, we make sure 1079 * to wake the console poller, which is in charge of initiating 1080 * the shutdown procedure as necessary. 1081 */ 1082 eventstream_write(Z_EVT_NULL); 1083 1084 out: 1085 (void) mutex_unlock(&lock); 1086 if (kernelcall) { 1087 rvalp = NULL; 1088 rlen = 0; 1089 } else { 1090 rvalp->rval = rval; 1091 } 1092 if (uc != NULL) 1093 ucred_free(uc); 1094 (void) door_return((char *)rvalp, rlen, NULL, 0); 1095 thr_exit(NULL); 1096 } 1097 1098 static int 1099 setup_door(zlog_t *zlogp) 1100 { 1101 if ((zone_door = door_create(server, NULL, 1102 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 1103 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 1104 return (-1); 1105 } 1106 (void) fdetach(zone_door_path); 1107 1108 if (fattach(zone_door, zone_door_path) != 0) { 1109 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 1110 (void) door_revoke(zone_door); 1111 (void) fdetach(zone_door_path); 1112 zone_door = -1; 1113 return (-1); 1114 } 1115 return (0); 1116 } 1117 1118 /* 1119 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 1120 * is where zoneadmd itself will check to see that another instance of 1121 * zoneadmd isn't already controlling this zone. 1122 * 1123 * The idea here is that we want to open the path to which we will 1124 * attach our door, lock it, and then make sure that no-one has beat us 1125 * to fattach(3c)ing onto it. 1126 * 1127 * fattach(3c) is really a mount, so there are actually two possible 1128 * vnodes we could be dealing with. Our strategy is as follows: 1129 * 1130 * - If the file we opened is a regular file (common case): 1131 * There is no fattach(3c)ed door, so we have a chance of becoming 1132 * the managing zoneadmd. We attempt to lock the file: if it is 1133 * already locked, that means someone else raced us here, so we 1134 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 1135 * that beat us to it. 1136 * 1137 * - If the file we opened is a namefs file: 1138 * This means there is already an established door fattach(3c)'ed 1139 * to the rendezvous path. We've lost the race, so we give up. 1140 * Note that in this case we also try to grab the file lock, and 1141 * will succeed in acquiring it since the vnode locked by the 1142 * "winning" zoneadmd was a regular one, and the one we locked was 1143 * the fattach(3c)'ed door node. At any rate, no harm is done, and 1144 * we just return to zoneadm(1m) which knows to retry. 1145 */ 1146 static int 1147 make_daemon_exclusive(zlog_t *zlogp) 1148 { 1149 int doorfd = -1; 1150 int err, ret = -1; 1151 struct stat st; 1152 struct flock flock; 1153 zone_state_t zstate; 1154 1155 top: 1156 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1157 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1158 zonecfg_strerror(err)); 1159 goto out; 1160 } 1161 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 1162 S_IREAD|S_IWRITE)) < 0) { 1163 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 1164 goto out; 1165 } 1166 if (fstat(doorfd, &st) < 0) { 1167 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 1168 goto out; 1169 } 1170 /* 1171 * Lock the file to synchronize with other zoneadmd 1172 */ 1173 flock.l_type = F_WRLCK; 1174 flock.l_whence = SEEK_SET; 1175 flock.l_start = (off_t)0; 1176 flock.l_len = (off_t)0; 1177 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 1178 /* 1179 * Someone else raced us here and grabbed the lock file 1180 * first. A warning here is inappropriate since nothing 1181 * went wrong. 1182 */ 1183 goto out; 1184 } 1185 1186 if (strcmp(st.st_fstype, "namefs") == 0) { 1187 struct door_info info; 1188 1189 /* 1190 * There is already something fattach()'ed to this file. 1191 * Lets see what the door is up to. 1192 */ 1193 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 1194 /* 1195 * Another zoneadmd process seems to be in 1196 * control of the situation and we don't need to 1197 * be here. A warning here is inappropriate 1198 * since nothing went wrong. 1199 * 1200 * If the door has been revoked, the zoneadmd 1201 * process currently managing the zone is going 1202 * away. We'll return control to zoneadm(1m) 1203 * which will try again (by which time zoneadmd 1204 * will hopefully have exited). 1205 */ 1206 goto out; 1207 } 1208 1209 /* 1210 * If we got this far, there's a fattach(3c)'ed door 1211 * that belongs to a process that has exited, which can 1212 * happen if the previous zoneadmd died unexpectedly. 1213 * 1214 * Let user know that something is amiss, but that we can 1215 * recover; if the zone is in the installed state, then don't 1216 * message, since having a running zoneadmd isn't really 1217 * expected/needed. We want to keep occurences of this message 1218 * limited to times when zoneadmd is picking back up from a 1219 * zoneadmd that died while the zone was in some non-trivial 1220 * state. 1221 */ 1222 if (zstate > ZONE_STATE_INSTALLED) { 1223 zerror(zlogp, B_FALSE, 1224 "zone '%s': WARNING: zone is in state '%s', but " 1225 "zoneadmd does not appear to be available; " 1226 "restarted zoneadmd to recover.", 1227 zone_name, zone_state_str(zstate)); 1228 } 1229 1230 (void) fdetach(zone_door_path); 1231 (void) close(doorfd); 1232 goto top; 1233 } 1234 ret = 0; 1235 out: 1236 (void) close(doorfd); 1237 return (ret); 1238 } 1239 1240 int 1241 main(int argc, char *argv[]) 1242 { 1243 int opt; 1244 zoneid_t zid; 1245 priv_set_t *privset; 1246 zone_state_t zstate; 1247 char parents_locale[MAXPATHLEN]; 1248 int err; 1249 1250 pid_t pid; 1251 sigset_t blockset; 1252 sigset_t block_cld; 1253 1254 struct { 1255 sema_t sem; 1256 int status; 1257 zlog_t log; 1258 } *shstate; 1259 size_t shstatelen = getpagesize(); 1260 1261 zlog_t errlog; 1262 zlog_t *zlogp; 1263 1264 int ctfd; 1265 1266 progname = get_execbasename(argv[0]); 1267 1268 /* 1269 * Make sure stderr is unbuffered 1270 */ 1271 (void) setbuffer(stderr, NULL, 0); 1272 1273 /* 1274 * Get out of the way of mounted filesystems, since we will daemonize 1275 * soon. 1276 */ 1277 (void) chdir("/"); 1278 1279 /* 1280 * Use the default system umask per PSARC 1998/110 rather than 1281 * anything that may have been set by the caller. 1282 */ 1283 (void) umask(CMASK); 1284 1285 /* 1286 * Initially we want to use our parent's locale. 1287 */ 1288 (void) setlocale(LC_ALL, ""); 1289 (void) textdomain(TEXT_DOMAIN); 1290 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1291 sizeof (parents_locale)); 1292 1293 /* 1294 * This zlog_t is used for writing to stderr 1295 */ 1296 errlog.logfile = stderr; 1297 errlog.buflen = errlog.loglen = 0; 1298 errlog.buf = errlog.log = NULL; 1299 errlog.locale = parents_locale; 1300 1301 /* 1302 * We start off writing to stderr until we're ready to daemonize. 1303 */ 1304 zlogp = &errlog; 1305 1306 /* 1307 * Process options. 1308 */ 1309 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1310 switch (opt) { 1311 case 'R': 1312 zonecfg_set_root(optarg); 1313 break; 1314 case 'z': 1315 zone_name = optarg; 1316 break; 1317 default: 1318 usage(); 1319 } 1320 } 1321 1322 if (zone_name == NULL) 1323 usage(); 1324 1325 /* 1326 * Because usage() prints directly to stderr, it has gettext() 1327 * wrapping, which depends on the locale. But since zerror() calls 1328 * localize() which tweaks the locale, it is not safe to call zerror() 1329 * until after the last call to usage(). Fortunately, the last call 1330 * to usage() is just above and the first call to zerror() is just 1331 * below. Don't mess this up. 1332 */ 1333 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1334 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1335 GLOBAL_ZONENAME); 1336 return (1); 1337 } 1338 1339 if (zone_get_id(zone_name, &zid) != 0) { 1340 zerror(zlogp, B_FALSE, "could not manage %s: %s", zone_name, 1341 zonecfg_strerror(Z_NO_ZONE)); 1342 return (1); 1343 } 1344 1345 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1346 zerror(zlogp, B_FALSE, "failed to get zone state: %s", 1347 zonecfg_strerror(err)); 1348 return (1); 1349 } 1350 if (zstate < ZONE_STATE_INSTALLED) { 1351 zerror(zlogp, B_FALSE, 1352 "cannot manage a zone which is in state '%s'", 1353 zone_state_str(zstate)); 1354 return (1); 1355 } 1356 1357 /* 1358 * Check that we have all privileges. It would be nice to pare 1359 * this down, but this is at least a first cut. 1360 */ 1361 if ((privset = priv_allocset()) == NULL) { 1362 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1363 return (1); 1364 } 1365 1366 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1367 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1368 priv_freeset(privset); 1369 return (1); 1370 } 1371 1372 if (priv_isfullset(privset) == B_FALSE) { 1373 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1374 "run this command (all privs required)"); 1375 priv_freeset(privset); 1376 return (1); 1377 } 1378 priv_freeset(privset); 1379 1380 if (mkzonedir(zlogp) != 0) 1381 return (1); 1382 1383 /* 1384 * Pre-fork: setup shared state 1385 */ 1386 if ((shstate = (void *)mmap(NULL, shstatelen, 1387 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1388 MAP_FAILED) { 1389 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1390 return (1); 1391 } 1392 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1393 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1394 (void) munmap((char *)shstate, shstatelen); 1395 return (1); 1396 } 1397 shstate->log.logfile = NULL; 1398 shstate->log.buflen = shstatelen - sizeof (*shstate); 1399 shstate->log.loglen = shstate->log.buflen; 1400 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1401 shstate->log.log = shstate->log.buf; 1402 shstate->log.locale = parents_locale; 1403 shstate->status = -1; 1404 1405 /* 1406 * We need a SIGCHLD handler so the sema_wait() below will wake 1407 * up if the child dies without doing a sema_post(). 1408 */ 1409 (void) sigset(SIGCHLD, sigchld); 1410 /* 1411 * We must mask SIGCHLD until after we've coped with the fork 1412 * sufficiently to deal with it; otherwise we can race and 1413 * receive the signal before pid has been initialized 1414 * (yes, this really happens). 1415 */ 1416 (void) sigemptyset(&block_cld); 1417 (void) sigaddset(&block_cld, SIGCHLD); 1418 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1419 1420 if ((ctfd = init_template()) == -1) { 1421 zerror(zlogp, B_TRUE, "failed to create contract"); 1422 return (1); 1423 } 1424 1425 /* 1426 * Do not let another thread localize a message while we are forking. 1427 */ 1428 (void) mutex_lock(&msglock); 1429 pid = fork(); 1430 (void) mutex_unlock(&msglock); 1431 1432 /* 1433 * In all cases (parent, child, and in the event of an error) we 1434 * don't want to cause creation of contracts on subsequent fork()s. 1435 */ 1436 (void) ct_tmpl_clear(ctfd); 1437 (void) close(ctfd); 1438 1439 if (pid == -1) { 1440 zerror(zlogp, B_TRUE, "could not fork"); 1441 return (1); 1442 1443 } else if (pid > 0) { /* parent */ 1444 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1445 /* 1446 * This marks a window of vulnerability in which we receive 1447 * the SIGCLD before falling into sema_wait (normally we would 1448 * get woken up from sema_wait with EINTR upon receipt of 1449 * SIGCLD). So we may need to use some other scheme like 1450 * sema_posting in the sigcld handler. 1451 * blech 1452 */ 1453 (void) sema_wait(&shstate->sem); 1454 (void) sema_destroy(&shstate->sem); 1455 if (shstate->status != 0) 1456 (void) waitpid(pid, NULL, WNOHANG); 1457 /* 1458 * It's ok if we die with SIGPIPE. It's not like we could have 1459 * done anything about it. 1460 */ 1461 (void) fprintf(stderr, "%s", shstate->log.buf); 1462 _exit(shstate->status == 0 ? 0 : 1); 1463 } 1464 1465 /* 1466 * The child charges on. 1467 */ 1468 (void) sigset(SIGCHLD, SIG_DFL); 1469 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1470 1471 /* 1472 * SIGPIPE can be delivered if we write to a socket for which the 1473 * peer endpoint is gone. That can lead to too-early termination 1474 * of zoneadmd, and that's not good eats. 1475 */ 1476 (void) sigset(SIGPIPE, SIG_IGN); 1477 /* 1478 * Stop using stderr 1479 */ 1480 zlogp = &shstate->log; 1481 1482 /* 1483 * We don't need stdout/stderr from now on. 1484 */ 1485 closefrom(0); 1486 1487 /* 1488 * Initialize the syslog zlog_t. This needs to be done after 1489 * the call to closefrom(). 1490 */ 1491 logsys.buf = logsys.log = NULL; 1492 logsys.buflen = logsys.loglen = 0; 1493 logsys.logfile = NULL; 1494 logsys.locale = DEFAULT_LOCALE; 1495 1496 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1497 1498 /* 1499 * The eventstream is used to publish state changes in the zone 1500 * from the door threads to the console I/O poller. 1501 */ 1502 if (eventstream_init() == -1) { 1503 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1504 goto child_out; 1505 } 1506 1507 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1508 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 1509 1510 /* 1511 * See if another zoneadmd is running for this zone. If not, then we 1512 * can now modify system state. 1513 */ 1514 if (make_daemon_exclusive(zlogp) == -1) 1515 goto child_out; 1516 1517 1518 /* 1519 * Create/join a new session; we need to be careful of what we do with 1520 * the console from now on so we don't end up being the session leader 1521 * for the terminal we're going to be handing out. 1522 */ 1523 (void) setsid(); 1524 1525 /* 1526 * This thread shouldn't be receiving any signals; in particular, 1527 * SIGCHLD should be received by the thread doing the fork(). 1528 */ 1529 (void) sigfillset(&blockset); 1530 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1531 1532 /* 1533 * Setup the console device and get ready to serve the console; 1534 * once this has completed, we're ready to let console clients 1535 * make an attempt to connect (they will block until 1536 * serve_console_sock() below gets called, and any pending 1537 * connection is accept()ed). 1538 */ 1539 if (!zonecfg_in_alt_root() && init_console(zlogp) == -1) 1540 goto child_out; 1541 1542 /* 1543 * Take the lock now, so that when the door server gets going, we 1544 * are guaranteed that it won't take a request until we are sure 1545 * that everything is completely set up. See the child_out: label 1546 * below to see why this matters. 1547 */ 1548 (void) mutex_lock(&lock); 1549 1550 /* Init semaphore for scratch zones. */ 1551 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 1552 zerror(zlogp, B_TRUE, 1553 "failed to initialize semaphore for scratch zone"); 1554 goto child_out; 1555 } 1556 1557 /* 1558 * Note: door setup must occur *after* the console is setup. 1559 * This is so that as zlogin tests the door to see if zoneadmd 1560 * is ready yet, we know that the console will get serviced 1561 * once door_info() indicates that the door is "up". 1562 */ 1563 if (setup_door(zlogp) == -1) 1564 goto child_out; 1565 1566 /* 1567 * Things seem OK so far; tell the parent process that we're done 1568 * with setup tasks. This will cause the parent to exit, signalling 1569 * to zoneadm, zlogin, or whatever forked it that we are ready to 1570 * service requests. 1571 */ 1572 shstate->status = 0; 1573 (void) sema_post(&shstate->sem); 1574 (void) munmap((char *)shstate, shstatelen); 1575 shstate = NULL; 1576 1577 (void) mutex_unlock(&lock); 1578 1579 /* 1580 * zlogp is now invalid, so reset it to the syslog logger. 1581 */ 1582 zlogp = &logsys; 1583 1584 /* 1585 * Now that we are free of any parents, switch to the default locale. 1586 */ 1587 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 1588 1589 /* 1590 * At this point the setup portion of main() is basically done, so 1591 * we reuse this thread to manage the zone console. When 1592 * serve_console() has returned, we are past the point of no return 1593 * in the life of this zoneadmd. 1594 */ 1595 if (zonecfg_in_alt_root()) { 1596 /* 1597 * This is just awful, but mounted scratch zones don't (and 1598 * can't) have consoles. We just wait for unmount instead. 1599 */ 1600 while (sema_wait(&scratch_sem) == EINTR) 1601 ; 1602 } else { 1603 serve_console(zlogp); 1604 assert(in_death_throes); 1605 } 1606 1607 /* 1608 * This is the next-to-last part of the exit interlock. Upon calling 1609 * fdetach(), the door will go unreferenced; once any 1610 * outstanding requests (like the door thread doing Z_HALT) are 1611 * done, the door will get an UNREF notification; when it handles 1612 * the UNREF, the door server will cause the exit. 1613 */ 1614 assert(!MUTEX_HELD(&lock)); 1615 (void) fdetach(zone_door_path); 1616 for (;;) 1617 (void) pause(); 1618 1619 child_out: 1620 assert(pid == 0); 1621 if (shstate != NULL) { 1622 shstate->status = -1; 1623 (void) sema_post(&shstate->sem); 1624 (void) munmap((char *)shstate, shstatelen); 1625 } 1626 1627 /* 1628 * This might trigger an unref notification, but if so, 1629 * we are still holding the lock, so our call to exit will 1630 * ultimately win the race and will publish the right exit 1631 * code. 1632 */ 1633 if (zone_door != -1) { 1634 assert(MUTEX_HELD(&lock)); 1635 (void) door_revoke(zone_door); 1636 (void) fdetach(zone_door_path); 1637 } 1638 return (1); /* return from main() forcibly exits an MT process */ 1639 } 1640