1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * zoneadmd manages zones; one zoneadmd process is launched for each 31 * non-global zone on the system. This daemon juggles four jobs: 32 * 33 * - Implement setup and teardown of the zone "virtual platform": mount and 34 * unmount filesystems; create and destroy network interfaces; communicate 35 * with devfsadmd to lay out devices for the zone; instantiate the zone 36 * console device; configure process runtime attributes such as resource 37 * controls, pool bindings, fine-grained privileges. 38 * 39 * - Launch the zone's init(1M) process. 40 * 41 * - Implement a door server; clients (like zoneadm) connect to the door 42 * server and request zone state changes. The kernel is also a client of 43 * this door server. A request to halt or reboot the zone which originates 44 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 45 * 46 * One minor problem is that messages emitted by zoneadmd need to be passed 47 * back to the zoneadm process making the request. These messages need to 48 * be rendered in the client's locale; so, this is passed in as part of the 49 * request. The exception is the kernel upcall to zoneadmd, in which case 50 * messages are syslog'd. 51 * 52 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 53 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 54 * strings which do not need to be translated. 55 * 56 * - Act as a console server for zlogin -C processes; see comments in zcons.c 57 * for more information about the zone console architecture. 58 * 59 * DESIGN NOTES 60 * 61 * Restart: 62 * A chief design constraint of zoneadmd is that it should be restartable in 63 * the case that the administrator kills it off, or it suffers a fatal error, 64 * without the running zone being impacted; this is akin to being able to 65 * reboot the service processor of a server without affecting the OS instance. 66 */ 67 68 #include <sys/param.h> 69 #include <sys/mman.h> 70 #include <sys/types.h> 71 #include <sys/stat.h> 72 #include <sys/sysmacros.h> 73 74 #include <bsm/adt.h> 75 #include <bsm/adt_event.h> 76 77 #include <alloca.h> 78 #include <assert.h> 79 #include <errno.h> 80 #include <door.h> 81 #include <fcntl.h> 82 #include <locale.h> 83 #include <signal.h> 84 #include <stdarg.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <strings.h> 89 #include <synch.h> 90 #include <syslog.h> 91 #include <thread.h> 92 #include <unistd.h> 93 #include <wait.h> 94 #include <limits.h> 95 #include <zone.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <sys/contract/process.h> 99 #include <sys/ctfs.h> 100 101 #include <libzonecfg.h> 102 #include "zoneadmd.h" 103 104 static char *progname; 105 char *zone_name; /* zone which we are managing */ 106 static zoneid_t zone_id; 107 108 static zlog_t logsys; 109 110 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 111 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 112 113 static sema_t scratch_sem; /* for scratch zones */ 114 115 static char zone_door_path[MAXPATHLEN]; 116 static int zone_door = -1; 117 118 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 119 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 120 121 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 122 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 123 #endif 124 125 #define PATH_TO_INIT "/sbin/init" 126 127 #define DEFAULT_LOCALE "C" 128 129 static const char * 130 z_cmd_name(zone_cmd_t zcmd) 131 { 132 /* This list needs to match the enum in sys/zone.h */ 133 static const char *zcmdstr[] = { 134 "ready", "boot", "reboot", "halt", "note_uninstalling", 135 "mount", "unmount" 136 }; 137 138 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 139 return ("unknown"); 140 else 141 return (zcmdstr[(int)zcmd]); 142 } 143 144 static char * 145 get_execbasename(char *execfullname) 146 { 147 char *last_slash, *execbasename; 148 149 /* guard against '/' at end of command invocation */ 150 for (;;) { 151 last_slash = strrchr(execfullname, '/'); 152 if (last_slash == NULL) { 153 execbasename = execfullname; 154 break; 155 } else { 156 execbasename = last_slash + 1; 157 if (*execbasename == '\0') { 158 *last_slash = '\0'; 159 continue; 160 } 161 break; 162 } 163 } 164 return (execbasename); 165 } 166 167 static void 168 usage(void) 169 { 170 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 171 (void) fprintf(stderr, 172 gettext("\tNote: %s should not be run directly.\n"), progname); 173 exit(2); 174 } 175 176 /* ARGSUSED */ 177 static void 178 sigchld(int sig) 179 { 180 } 181 182 char * 183 localize_msg(char *locale, const char *msg) 184 { 185 char *out; 186 187 (void) mutex_lock(&msglock); 188 (void) setlocale(LC_MESSAGES, locale); 189 out = gettext(msg); 190 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 191 (void) mutex_unlock(&msglock); 192 return (out); 193 } 194 195 /* PRINTFLIKE3 */ 196 void 197 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 198 { 199 va_list alist; 200 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 201 char *bp; 202 int saved_errno = errno; 203 204 if (zlogp == NULL) 205 return; 206 if (zlogp == &logsys) 207 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 208 zone_name); 209 else 210 buf[0] = '\0'; 211 bp = &(buf[strlen(buf)]); 212 213 /* 214 * In theory, the locale pointer should be set to either "C" or a 215 * char array, so it should never be NULL 216 */ 217 assert(zlogp->locale != NULL); 218 /* Locale is per process, but we are multi-threaded... */ 219 fmt = localize_msg(zlogp->locale, fmt); 220 221 va_start(alist, fmt); 222 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 223 va_end(alist); 224 bp = &(buf[strlen(buf)]); 225 if (use_strerror) 226 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 227 strerror(saved_errno)); 228 if (zlogp == &logsys) { 229 (void) syslog(LOG_ERR, "%s", buf); 230 } else if (zlogp->logfile != NULL) { 231 (void) fprintf(zlogp->logfile, "%s\n", buf); 232 } else { 233 size_t buflen; 234 size_t copylen; 235 236 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 237 copylen = MIN(buflen, zlogp->loglen); 238 zlogp->log += copylen; 239 zlogp->loglen -= copylen; 240 } 241 } 242 243 static int 244 mkzonedir(zlog_t *zlogp) 245 { 246 struct stat st; 247 /* 248 * We must create and lock everyone but root out of ZONES_TMPDIR 249 * since anyone can open any UNIX domain socket, regardless of 250 * its file system permissions. Sigh... 251 */ 252 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 253 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 254 return (-1); 255 } 256 /* paranoia */ 257 if ((stat(ZONES_TMPDIR, &st) < 0) || !S_ISDIR(st.st_mode)) { 258 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 259 return (-1); 260 } 261 (void) chmod(ZONES_TMPDIR, S_IRWXU); 262 return (0); 263 } 264 265 /* 266 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 267 * 'true' if this is being invoked as part of the processing for the "mount" 268 * subcommand. 269 */ 270 static int 271 zone_ready(zlog_t *zlogp, boolean_t mount_cmd) 272 { 273 int err; 274 275 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 276 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 277 zonecfg_strerror(err)); 278 return (-1); 279 } 280 281 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 282 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 283 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 284 zonecfg_strerror(err)); 285 return (-1); 286 } 287 if (vplat_bringup(zlogp, mount_cmd) != 0) { 288 bringup_failure_recovery = B_TRUE; 289 (void) vplat_teardown(NULL, mount_cmd); 290 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 291 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 292 zonecfg_strerror(err)); 293 return (-1); 294 } 295 296 return (0); 297 } 298 299 static int 300 init_template() 301 { 302 int fd; 303 int err = 0; 304 305 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 306 if (fd == -1) 307 return (-1); 308 309 /* 310 * For now, zoneadmd doesn't do anything with the contract. 311 * Deliver no events, don't inherit, and allow it to be orphaned. 312 */ 313 err |= ct_tmpl_set_critical(fd, 0); 314 err |= ct_tmpl_set_informative(fd, 0); 315 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 316 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 317 if (err || ct_tmpl_activate(fd)) { 318 (void) close(fd); 319 return (-1); 320 } 321 322 return (fd); 323 } 324 325 static int 326 mount_early_fs(zlog_t *zlogp, zoneid_t zoneid, const char *spec, 327 const char *dir, char *fstype) 328 { 329 pid_t child; 330 int child_status; 331 int tmpl_fd; 332 ctid_t ct; 333 334 if ((tmpl_fd = init_template()) == -1) { 335 zerror(zlogp, B_TRUE, "failed to create contract"); 336 return (-1); 337 } 338 339 if ((child = fork()) == -1) { 340 (void) ct_tmpl_clear(tmpl_fd); 341 (void) close(tmpl_fd); 342 zerror(zlogp, B_TRUE, "failed to fork"); 343 return (-1); 344 345 } else if (child == 0) { /* child */ 346 (void) ct_tmpl_clear(tmpl_fd); 347 /* 348 * Even though there are no procs running in the zone, we 349 * do this for paranoia's sake. 350 */ 351 (void) closefrom(0); 352 353 if (zone_enter(zoneid) == -1) { 354 _exit(errno); 355 } 356 if (mount(spec, dir, MS_DATA, fstype, NULL, 0, NULL, 0) != 0) 357 _exit(errno); 358 _exit(0); 359 } 360 361 /* parent */ 362 if (contract_latest(&ct) == -1) 363 ct = -1; 364 (void) ct_tmpl_clear(tmpl_fd); 365 (void) close(tmpl_fd); 366 if (waitpid(child, &child_status, 0) != child) { 367 /* unexpected: we must have been signalled */ 368 (void) contract_abandon_id(ct); 369 return (-1); 370 } 371 (void) contract_abandon_id(ct); 372 if (WEXITSTATUS(child_status) != 0) { 373 errno = WEXITSTATUS(child_status); 374 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 375 return (-1); 376 } 377 378 return (0); 379 } 380 381 static int 382 zone_mount_early(zlog_t *zlogp, zoneid_t zoneid) 383 { 384 if (mount_early_fs(zlogp, zoneid, "/proc", "/proc", "proc") != 0) 385 return (-1); 386 387 if (mount_early_fs(zlogp, zoneid, "ctfs", CTFS_ROOT, "ctfs") != 0) 388 return (-1); 389 390 if (mount_early_fs(zlogp, zoneid, "swap", "/etc/svc/volatile", 391 "tmpfs") != 0) 392 return (-1); 393 394 if (mount_early_fs(zlogp, zoneid, "mnttab", "/etc/mnttab", 395 "mntfs") != 0) 396 return (-1); 397 398 return (0); 399 } 400 401 static int 402 zone_bootup(zlog_t *zlogp, const char *bootargs) 403 { 404 zoneid_t zoneid; 405 struct stat st; 406 char zroot[MAXPATHLEN], initpath[MAXPATHLEN]; 407 408 if (init_console_slave(zlogp) != 0) 409 return (-1); 410 reset_slave_terminal(zlogp); 411 412 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 413 zerror(zlogp, B_TRUE, "unable to get zoneid"); 414 return (-1); 415 } 416 417 if (zone_mount_early(zlogp, zoneid) != 0) 418 return (-1); 419 420 /* 421 * Try to anticipate possible problems: Make sure init is executable. 422 */ 423 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 424 zerror(zlogp, B_FALSE, "unable to determine zone root"); 425 return (-1); 426 } 427 (void) snprintf(initpath, sizeof (initpath), "%s%s", zroot, 428 PATH_TO_INIT); 429 430 if (stat(initpath, &st) == -1) { 431 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 432 return (-1); 433 } 434 435 if ((st.st_mode & S_IXUSR) == 0) { 436 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 437 return (-1); 438 } 439 440 if (zone_boot(zoneid, bootargs) == -1) { 441 zerror(zlogp, B_TRUE, "unable to boot zone"); 442 return (-1); 443 } 444 445 return (0); 446 } 447 448 static int 449 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd) 450 { 451 int err; 452 453 if (vplat_teardown(zlogp, unmount_cmd) != 0) { 454 if (!bringup_failure_recovery) 455 zerror(zlogp, B_FALSE, "unable to destroy zone"); 456 return (-1); 457 } 458 459 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 460 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 461 zonecfg_strerror(err)); 462 463 return (0); 464 } 465 466 /* 467 * Generate AUE_zone_state for a command that boots a zone. 468 */ 469 static void 470 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 471 char *new_state) 472 { 473 adt_session_data_t *ah; 474 adt_event_data_t *event; 475 int pass_fail, fail_reason; 476 477 if (!adt_audit_enabled()) 478 return; 479 480 if (return_val == 0) { 481 pass_fail = ADT_SUCCESS; 482 fail_reason = ADT_SUCCESS; 483 } else { 484 pass_fail = ADT_FAILURE; 485 fail_reason = ADT_FAIL_VALUE_PROGRAM; 486 } 487 488 if (adt_start_session(&ah, NULL, 0)) { 489 zerror(zlogp, B_TRUE, gettext("audit failure.")); 490 return; 491 } 492 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 493 zerror(zlogp, B_TRUE, gettext("audit failure.")); 494 (void) adt_end_session(ah); 495 return; 496 } 497 498 event = adt_alloc_event(ah, ADT_zone_state); 499 if (event == NULL) { 500 zerror(zlogp, B_TRUE, gettext("audit failure.")); 501 (void) adt_end_session(ah); 502 return; 503 } 504 event->adt_zone_state.zonename = zone_name; 505 event->adt_zone_state.new_state = new_state; 506 507 if (adt_put_event(event, pass_fail, fail_reason)) 508 zerror(zlogp, B_TRUE, gettext("audit failure.")); 509 510 adt_free_event(event); 511 512 (void) adt_end_session(ah); 513 } 514 515 /* 516 * The main routine for the door server that deals with zone state transitions. 517 */ 518 /* ARGSUSED */ 519 static void 520 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 521 uint_t n_desc) 522 { 523 ucred_t *uc = NULL; 524 const priv_set_t *eset; 525 526 zone_state_t zstate; 527 zone_cmd_t cmd; 528 zone_cmd_arg_t *zargp; 529 530 boolean_t kernelcall; 531 532 int rval = -1; 533 uint64_t uniqid; 534 zoneid_t zoneid = -1; 535 zlog_t zlog; 536 zlog_t *zlogp; 537 zone_cmd_rval_t *rvalp; 538 size_t rlen = getpagesize(); /* conservative */ 539 540 /* LINTED E_BAD_PTR_CAST_ALIGN */ 541 zargp = (zone_cmd_arg_t *)args; 542 543 /* 544 * When we get the door unref message, we've fdetach'd the door, and 545 * it is time for us to shut down zoneadmd. 546 */ 547 if (zargp == DOOR_UNREF_DATA) { 548 /* 549 * See comment at end of main() for info on the last rites. 550 */ 551 exit(0); 552 } 553 554 if (zargp == NULL) { 555 (void) door_return(NULL, 0, 0, 0); 556 } 557 558 rvalp = alloca(rlen); 559 bzero(rvalp, rlen); 560 zlog.logfile = NULL; 561 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 562 zlog.buf = rvalp->errbuf; 563 zlog.log = zlog.buf; 564 /* defer initialization of zlog.locale until after credential check */ 565 zlogp = &zlog; 566 567 if (alen != sizeof (zone_cmd_arg_t)) { 568 /* 569 * This really shouldn't be happening. 570 */ 571 zerror(&logsys, B_FALSE, "invalid argument"); 572 goto out; 573 } 574 cmd = zargp->cmd; 575 576 if (door_ucred(&uc) != 0) { 577 zerror(&logsys, B_TRUE, "door_ucred"); 578 goto out; 579 } 580 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 581 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 582 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 583 ucred_geteuid(uc) != 0)) { 584 zerror(&logsys, B_FALSE, "insufficient privileges"); 585 goto out; 586 } 587 588 kernelcall = ucred_getpid(uc) == 0; 589 590 /* 591 * This is safe because we only use a zlog_t throughout the 592 * duration of a door call; i.e., by the time the pointer 593 * might become invalid, the door call would be over. 594 */ 595 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 596 597 (void) mutex_lock(&lock); 598 599 /* 600 * Once we start to really die off, we don't want more connections. 601 */ 602 if (in_death_throes) { 603 (void) mutex_unlock(&lock); 604 ucred_free(uc); 605 (void) door_return(NULL, 0, 0, 0); 606 thr_exit(NULL); 607 } 608 609 /* 610 * Check for validity of command. 611 */ 612 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_REBOOT && 613 cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT && 614 cmd != Z_UNMOUNT) { 615 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 616 goto out; 617 } 618 619 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 620 /* 621 * Can't happen 622 */ 623 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 624 cmd); 625 goto out; 626 } 627 /* 628 * We ignore the possibility of someone calling zone_create(2) 629 * explicitly; all requests must come through zoneadmd. 630 */ 631 if (zone_get_state(zone_name, &zstate) != Z_OK) { 632 /* 633 * Something terribly wrong happened 634 */ 635 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 636 goto out; 637 } 638 639 if (kernelcall) { 640 /* 641 * Kernel-initiated requests may lose their validity if the 642 * zone_t the kernel was referring to has gone away. 643 */ 644 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 645 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 646 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 647 /* 648 * We're not talking about the same zone. The request 649 * must have arrived too late. Return error. 650 */ 651 rval = -1; 652 goto out; 653 } 654 zlogp = &logsys; /* Log errors to syslog */ 655 } 656 657 switch (zstate) { 658 case ZONE_STATE_CONFIGURED: 659 case ZONE_STATE_INCOMPLETE: 660 /* 661 * Not our area of expertise; we just print a nice message 662 * and die off. 663 */ 664 zerror(zlogp, B_FALSE, 665 "%s operation is invalid for zones in state '%s'", 666 z_cmd_name(cmd), zone_state_str(zstate)); 667 break; 668 669 case ZONE_STATE_INSTALLED: 670 switch (cmd) { 671 case Z_READY: 672 rval = zone_ready(zlogp, B_FALSE); 673 if (rval == 0) 674 eventstream_write(Z_EVT_ZONE_READIED); 675 break; 676 case Z_BOOT: 677 eventstream_write(Z_EVT_ZONE_BOOTING); 678 if ((rval = zone_ready(zlogp, B_FALSE)) == 0) 679 rval = zone_bootup(zlogp, zargp->bootbuf); 680 audit_put_record(zlogp, uc, rval, "boot"); 681 if (rval != 0) { 682 bringup_failure_recovery = B_TRUE; 683 (void) zone_halt(zlogp, B_FALSE); 684 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 685 } 686 break; 687 case Z_HALT: 688 if (kernelcall) /* Invalid; can't happen */ 689 abort(); 690 /* 691 * We could have two clients racing to halt this 692 * zone; the second client loses, but his request 693 * doesn't fail, since the zone is now in the desired 694 * state. 695 */ 696 zerror(zlogp, B_FALSE, "zone is already halted"); 697 rval = 0; 698 break; 699 case Z_REBOOT: 700 if (kernelcall) /* Invalid; can't happen */ 701 abort(); 702 zerror(zlogp, B_FALSE, "%s operation is invalid " 703 "for zones in state '%s'", z_cmd_name(cmd), 704 zone_state_str(zstate)); 705 rval = -1; 706 break; 707 case Z_NOTE_UNINSTALLING: 708 if (kernelcall) /* Invalid; can't happen */ 709 abort(); 710 /* 711 * Tell the console to print out a message about this. 712 * Once it does, we will be in_death_throes. 713 */ 714 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 715 break; 716 case Z_MOUNT: 717 if (kernelcall) /* Invalid; can't happen */ 718 abort(); 719 rval = zone_ready(zlogp, B_TRUE); 720 if (rval == 0) { 721 eventstream_write(Z_EVT_ZONE_READIED); 722 rval = zone_mount_early(zlogp, zone_id); 723 } 724 725 /* 726 * Ordinarily, /dev/fd would be mounted inside the zone 727 * by svc:/system/filesystem/usr:default, but since 728 * we're not booting the zone, we need to do this 729 * manually. 730 */ 731 if (rval == 0) 732 rval = mount_early_fs(zlogp, zone_id, "fd", 733 "/dev/fd", "fd"); 734 break; 735 case Z_UNMOUNT: 736 if (kernelcall) /* Invalid; can't happen */ 737 abort(); 738 zerror(zlogp, B_FALSE, "zone is already unmounted"); 739 rval = 0; 740 break; 741 } 742 break; 743 744 case ZONE_STATE_READY: 745 switch (cmd) { 746 case Z_READY: 747 /* 748 * We could have two clients racing to ready this 749 * zone; the second client loses, but his request 750 * doesn't fail, since the zone is now in the desired 751 * state. 752 */ 753 zerror(zlogp, B_FALSE, "zone is already ready"); 754 rval = 0; 755 break; 756 case Z_BOOT: 757 eventstream_write(Z_EVT_ZONE_BOOTING); 758 rval = zone_bootup(zlogp, zargp->bootbuf); 759 audit_put_record(zlogp, uc, rval, "boot"); 760 if (rval != 0) { 761 bringup_failure_recovery = B_TRUE; 762 (void) zone_halt(zlogp, B_FALSE); 763 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 764 } 765 break; 766 case Z_HALT: 767 if (kernelcall) /* Invalid; can't happen */ 768 abort(); 769 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 770 break; 771 eventstream_write(Z_EVT_ZONE_HALTED); 772 break; 773 case Z_REBOOT: 774 case Z_NOTE_UNINSTALLING: 775 case Z_MOUNT: 776 case Z_UNMOUNT: 777 if (kernelcall) /* Invalid; can't happen */ 778 abort(); 779 zerror(zlogp, B_FALSE, "%s operation is invalid " 780 "for zones in state '%s'", z_cmd_name(cmd), 781 zone_state_str(zstate)); 782 rval = -1; 783 break; 784 } 785 break; 786 787 case ZONE_STATE_MOUNTED: 788 switch (cmd) { 789 case Z_UNMOUNT: 790 if (kernelcall) /* Invalid; can't happen */ 791 abort(); 792 rval = zone_halt(zlogp, B_TRUE); 793 if (rval == 0) { 794 eventstream_write(Z_EVT_ZONE_HALTED); 795 (void) sema_post(&scratch_sem); 796 } 797 break; 798 default: 799 if (kernelcall) /* Invalid; can't happen */ 800 abort(); 801 zerror(zlogp, B_FALSE, "%s operation is invalid " 802 "for zones in state '%s'", z_cmd_name(cmd), 803 zone_state_str(zstate)); 804 rval = -1; 805 break; 806 } 807 break; 808 809 case ZONE_STATE_RUNNING: 810 case ZONE_STATE_SHUTTING_DOWN: 811 case ZONE_STATE_DOWN: 812 switch (cmd) { 813 case Z_READY: 814 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 815 break; 816 if ((rval = zone_ready(zlogp, B_FALSE)) == 0) 817 eventstream_write(Z_EVT_ZONE_READIED); 818 else 819 eventstream_write(Z_EVT_ZONE_HALTED); 820 break; 821 case Z_BOOT: 822 /* 823 * We could have two clients racing to boot this 824 * zone; the second client loses, but his request 825 * doesn't fail, since the zone is now in the desired 826 * state. 827 */ 828 zerror(zlogp, B_FALSE, "zone is already booted"); 829 rval = 0; 830 break; 831 case Z_HALT: 832 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 833 break; 834 eventstream_write(Z_EVT_ZONE_HALTED); 835 break; 836 case Z_REBOOT: 837 eventstream_write(Z_EVT_ZONE_REBOOTING); 838 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) { 839 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 840 break; 841 } 842 if ((rval = zone_ready(zlogp, B_FALSE)) != 0) { 843 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 844 break; 845 } 846 rval = zone_bootup(zlogp, ""); 847 audit_put_record(zlogp, uc, rval, "reboot"); 848 if (rval != 0) { 849 (void) zone_halt(zlogp, B_FALSE); 850 eventstream_write(Z_EVT_ZONE_BOOTFAILED); 851 } 852 break; 853 case Z_NOTE_UNINSTALLING: 854 case Z_MOUNT: 855 case Z_UNMOUNT: 856 zerror(zlogp, B_FALSE, "%s operation is invalid " 857 "for zones in state '%s'", z_cmd_name(cmd), 858 zone_state_str(zstate)); 859 rval = -1; 860 break; 861 } 862 break; 863 default: 864 abort(); 865 } 866 867 /* 868 * Because the state of the zone may have changed, we make sure 869 * to wake the console poller, which is in charge of initiating 870 * the shutdown procedure as necessary. 871 */ 872 eventstream_write(Z_EVT_NULL); 873 874 out: 875 (void) mutex_unlock(&lock); 876 if (kernelcall) { 877 rvalp = NULL; 878 rlen = 0; 879 } else { 880 rvalp->rval = rval; 881 } 882 if (uc != NULL) 883 ucred_free(uc); 884 (void) door_return((char *)rvalp, rlen, NULL, 0); 885 thr_exit(NULL); 886 } 887 888 static int 889 setup_door(zlog_t *zlogp) 890 { 891 if ((zone_door = door_create(server, NULL, 892 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 893 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 894 return (-1); 895 } 896 (void) fdetach(zone_door_path); 897 898 if (fattach(zone_door, zone_door_path) != 0) { 899 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 900 (void) door_revoke(zone_door); 901 (void) fdetach(zone_door_path); 902 zone_door = -1; 903 return (-1); 904 } 905 return (0); 906 } 907 908 /* 909 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 910 * is where zoneadmd itself will check to see that another instance of 911 * zoneadmd isn't already controlling this zone. 912 * 913 * The idea here is that we want to open the path to which we will 914 * attach our door, lock it, and then make sure that no-one has beat us 915 * to fattach(3c)ing onto it. 916 * 917 * fattach(3c) is really a mount, so there are actually two possible 918 * vnodes we could be dealing with. Our strategy is as follows: 919 * 920 * - If the file we opened is a regular file (common case): 921 * There is no fattach(3c)ed door, so we have a chance of becoming 922 * the managing zoneadmd. We attempt to lock the file: if it is 923 * already locked, that means someone else raced us here, so we 924 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 925 * that beat us to it. 926 * 927 * - If the file we opened is a namefs file: 928 * This means there is already an established door fattach(3c)'ed 929 * to the rendezvous path. We've lost the race, so we give up. 930 * Note that in this case we also try to grab the file lock, and 931 * will succeed in acquiring it since the vnode locked by the 932 * "winning" zoneadmd was a regular one, and the one we locked was 933 * the fattach(3c)'ed door node. At any rate, no harm is done, and 934 * we just return to zoneadm(1m) which knows to retry. 935 */ 936 static int 937 make_daemon_exclusive(zlog_t *zlogp) 938 { 939 int doorfd = -1; 940 int err, ret = -1; 941 struct stat st; 942 struct flock flock; 943 zone_state_t zstate; 944 945 top: 946 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 947 zerror(zlogp, B_FALSE, "failed to get zone state: %s\n", 948 zonecfg_strerror(err)); 949 goto out; 950 } 951 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 952 S_IREAD|S_IWRITE)) < 0) { 953 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 954 goto out; 955 } 956 if (fstat(doorfd, &st) < 0) { 957 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 958 goto out; 959 } 960 /* 961 * Lock the file to synchronize with other zoneadmd 962 */ 963 flock.l_type = F_WRLCK; 964 flock.l_whence = SEEK_SET; 965 flock.l_start = (off_t)0; 966 flock.l_len = (off_t)0; 967 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 968 /* 969 * Someone else raced us here and grabbed the lock file 970 * first. A warning here is inappropriate since nothing 971 * went wrong. 972 */ 973 goto out; 974 } 975 976 if (strcmp(st.st_fstype, "namefs") == 0) { 977 struct door_info info; 978 979 /* 980 * There is already something fattach()'ed to this file. 981 * Lets see what the door is up to. 982 */ 983 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 984 /* 985 * Another zoneadmd process seems to be in 986 * control of the situation and we don't need to 987 * be here. A warning here is inappropriate 988 * since nothing went wrong. 989 * 990 * If the door has been revoked, the zoneadmd 991 * process currently managing the zone is going 992 * away. We'll return control to zoneadm(1m) 993 * which will try again (by which time zoneadmd 994 * will hopefully have exited). 995 */ 996 goto out; 997 } 998 999 /* 1000 * If we got this far, there's a fattach(3c)'ed door 1001 * that belongs to a process that has exited, which can 1002 * happen if the previous zoneadmd died unexpectedly. 1003 * 1004 * Let user know that something is amiss, but that we can 1005 * recover; if the zone is in the installed state, then don't 1006 * message, since having a running zoneadmd isn't really 1007 * expected/needed. We want to keep occurences of this message 1008 * limited to times when zoneadmd is picking back up from a 1009 * zoneadmd that died while the zone was in some non-trivial 1010 * state. 1011 */ 1012 if (zstate > ZONE_STATE_INSTALLED) { 1013 zerror(zlogp, B_FALSE, 1014 "zone '%s': WARNING: zone is in state '%s', but " 1015 "zoneadmd does not appear to be available; " 1016 "restarted zoneadmd to recover.", 1017 zone_name, zone_state_str(zstate)); 1018 } 1019 1020 (void) fdetach(zone_door_path); 1021 (void) close(doorfd); 1022 goto top; 1023 } 1024 ret = 0; 1025 out: 1026 (void) close(doorfd); 1027 return (ret); 1028 } 1029 1030 int 1031 main(int argc, char *argv[]) 1032 { 1033 int opt; 1034 zoneid_t zid; 1035 priv_set_t *privset; 1036 zone_state_t zstate; 1037 char parents_locale[MAXPATHLEN]; 1038 int err; 1039 1040 pid_t pid; 1041 sigset_t blockset; 1042 sigset_t block_cld; 1043 1044 struct { 1045 sema_t sem; 1046 int status; 1047 zlog_t log; 1048 } *shstate; 1049 size_t shstatelen = getpagesize(); 1050 1051 zlog_t errlog; 1052 zlog_t *zlogp; 1053 1054 progname = get_execbasename(argv[0]); 1055 1056 /* 1057 * Make sure stderr is unbuffered 1058 */ 1059 (void) setbuffer(stderr, NULL, 0); 1060 1061 /* 1062 * Get out of the way of mounted filesystems, since we will daemonize 1063 * soon. 1064 */ 1065 (void) chdir("/"); 1066 1067 /* 1068 * Use the default system umask per PSARC 1998/110 rather than 1069 * anything that may have been set by the caller. 1070 */ 1071 (void) umask(CMASK); 1072 1073 /* 1074 * Initially we want to use our parent's locale. 1075 */ 1076 (void) setlocale(LC_ALL, ""); 1077 (void) textdomain(TEXT_DOMAIN); 1078 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1079 sizeof (parents_locale)); 1080 1081 /* 1082 * This zlog_t is used for writing to stderr 1083 */ 1084 errlog.logfile = stderr; 1085 errlog.buflen = errlog.loglen = 0; 1086 errlog.buf = errlog.log = NULL; 1087 errlog.locale = parents_locale; 1088 1089 /* 1090 * We start off writing to stderr until we're ready to daemonize. 1091 */ 1092 zlogp = &errlog; 1093 1094 /* 1095 * Process options. 1096 */ 1097 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1098 switch (opt) { 1099 case 'R': 1100 zonecfg_set_root(optarg); 1101 break; 1102 case 'z': 1103 zone_name = optarg; 1104 break; 1105 default: 1106 usage(); 1107 } 1108 } 1109 1110 if (zone_name == NULL) 1111 usage(); 1112 1113 /* 1114 * Because usage() prints directly to stderr, it has gettext() 1115 * wrapping, which depends on the locale. But since zerror() calls 1116 * localize() which tweaks the locale, it is not safe to call zerror() 1117 * until after the last call to usage(). Fortunately, the last call 1118 * to usage() is just above and the first call to zerror() is just 1119 * below. Don't mess this up. 1120 */ 1121 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1122 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1123 GLOBAL_ZONENAME); 1124 return (1); 1125 } 1126 1127 if (zone_get_id(zone_name, &zid) != 0) { 1128 zerror(zlogp, B_FALSE, "could not manage %s: %s\n", zone_name, 1129 zonecfg_strerror(Z_NO_ZONE)); 1130 return (1); 1131 } 1132 1133 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1134 zerror(zlogp, B_FALSE, "failed to get zone state: %s\n", 1135 zonecfg_strerror(err)); 1136 return (1); 1137 } 1138 if (zstate < ZONE_STATE_INSTALLED) { 1139 zerror(zlogp, B_FALSE, 1140 "cannot manage a zone which is in state '%s'", 1141 zone_state_str(zstate)); 1142 return (1); 1143 } 1144 1145 /* 1146 * Check that we have all privileges. It would be nice to pare 1147 * this down, but this is at least a first cut. 1148 */ 1149 if ((privset = priv_allocset()) == NULL) { 1150 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1151 return (1); 1152 } 1153 1154 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1155 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1156 priv_freeset(privset); 1157 return (1); 1158 } 1159 1160 if (priv_isfullset(privset) == B_FALSE) { 1161 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1162 "run this command (all privs required)\n"); 1163 priv_freeset(privset); 1164 return (1); 1165 } 1166 priv_freeset(privset); 1167 1168 if (mkzonedir(zlogp) != 0) 1169 return (1); 1170 1171 /* 1172 * Pre-fork: setup shared state 1173 */ 1174 if ((shstate = (void *)mmap(NULL, shstatelen, 1175 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1176 MAP_FAILED) { 1177 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1178 return (1); 1179 } 1180 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1181 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1182 (void) munmap((char *)shstate, shstatelen); 1183 return (1); 1184 } 1185 shstate->log.logfile = NULL; 1186 shstate->log.buflen = shstatelen - sizeof (*shstate); 1187 shstate->log.loglen = shstate->log.buflen; 1188 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1189 shstate->log.log = shstate->log.buf; 1190 shstate->log.locale = parents_locale; 1191 shstate->status = -1; 1192 1193 /* 1194 * We need a SIGCHLD handler so the sema_wait() below will wake 1195 * up if the child dies without doing a sema_post(). 1196 */ 1197 (void) sigset(SIGCHLD, sigchld); 1198 /* 1199 * We must mask SIGCHLD until after we've coped with the fork 1200 * sufficiently to deal with it; otherwise we can race and 1201 * receive the signal before pid has been initialized 1202 * (yes, this really happens). 1203 */ 1204 (void) sigemptyset(&block_cld); 1205 (void) sigaddset(&block_cld, SIGCHLD); 1206 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1207 1208 /* 1209 * Do not let another thread localize a message while we are forking. 1210 */ 1211 (void) mutex_lock(&msglock); 1212 pid = fork(); 1213 (void) mutex_unlock(&msglock); 1214 if (pid == -1) { 1215 zerror(zlogp, B_TRUE, "could not fork"); 1216 return (1); 1217 1218 } else if (pid > 0) { /* parent */ 1219 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1220 /* 1221 * This marks a window of vulnerability in which we receive 1222 * the SIGCLD before falling into sema_wait (normally we would 1223 * get woken up from sema_wait with EINTR upon receipt of 1224 * SIGCLD). So we may need to use some other scheme like 1225 * sema_posting in the sigcld handler. 1226 * blech 1227 */ 1228 (void) sema_wait(&shstate->sem); 1229 (void) sema_destroy(&shstate->sem); 1230 if (shstate->status != 0) 1231 (void) waitpid(pid, NULL, WNOHANG); 1232 /* 1233 * It's ok if we die with SIGPIPE. It's not like we could have 1234 * done anything about it. 1235 */ 1236 (void) fprintf(stderr, "%s", shstate->log.buf); 1237 _exit(shstate->status == 0 ? 0 : 1); 1238 } 1239 1240 /* 1241 * The child charges on. 1242 */ 1243 (void) sigset(SIGCHLD, SIG_DFL); 1244 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1245 1246 /* 1247 * SIGPIPE can be delivered if we write to a socket for which the 1248 * peer endpoint is gone. That can lead to too-early termination 1249 * of zoneadmd, and that's not good eats. 1250 */ 1251 (void) sigset(SIGPIPE, SIG_IGN); 1252 /* 1253 * Stop using stderr 1254 */ 1255 zlogp = &shstate->log; 1256 1257 /* 1258 * We don't need stdout/stderr from now on. 1259 */ 1260 closefrom(0); 1261 1262 /* 1263 * Initialize the syslog zlog_t. This needs to be done after 1264 * the call to closefrom(). 1265 */ 1266 logsys.buf = logsys.log = NULL; 1267 logsys.buflen = logsys.loglen = 0; 1268 logsys.logfile = NULL; 1269 logsys.locale = DEFAULT_LOCALE; 1270 1271 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1272 1273 /* 1274 * The eventstream is used to publish state changes in the zone 1275 * from the door threads to the console I/O poller. 1276 */ 1277 if (eventstream_init() == -1) { 1278 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1279 goto child_out; 1280 } 1281 1282 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1283 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 1284 1285 /* 1286 * See if another zoneadmd is running for this zone. If not, then we 1287 * can now modify system state. 1288 */ 1289 if (make_daemon_exclusive(zlogp) == -1) 1290 goto child_out; 1291 1292 1293 /* 1294 * Create/join a new session; we need to be careful of what we do with 1295 * the console from now on so we don't end up being the session leader 1296 * for the terminal we're going to be handing out. 1297 */ 1298 (void) setsid(); 1299 1300 /* 1301 * This thread shouldn't be receiving any signals; in particular, 1302 * SIGCHLD should be received by the thread doing the fork(). 1303 */ 1304 (void) sigfillset(&blockset); 1305 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1306 1307 /* 1308 * Setup the console device and get ready to serve the console; 1309 * once this has completed, we're ready to let console clients 1310 * make an attempt to connect (they will block until 1311 * serve_console_sock() below gets called, and any pending 1312 * connection is accept()ed). 1313 */ 1314 if (!zonecfg_in_alt_root() && init_console(zlogp) == -1) 1315 goto child_out; 1316 1317 /* 1318 * Take the lock now, so that when the door server gets going, we 1319 * are guaranteed that it won't take a request until we are sure 1320 * that everything is completely set up. See the child_out: label 1321 * below to see why this matters. 1322 */ 1323 (void) mutex_lock(&lock); 1324 1325 /* Init semaphore for scratch zones. */ 1326 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 1327 zerror(zlogp, B_TRUE, 1328 "failed to initialize semaphore for scratch zone"); 1329 goto child_out; 1330 } 1331 1332 /* 1333 * Note: door setup must occur *after* the console is setup. 1334 * This is so that as zlogin tests the door to see if zoneadmd 1335 * is ready yet, we know that the console will get serviced 1336 * once door_info() indicates that the door is "up". 1337 */ 1338 if (setup_door(zlogp) == -1) 1339 goto child_out; 1340 1341 /* 1342 * Things seem OK so far; tell the parent process that we're done 1343 * with setup tasks. This will cause the parent to exit, signalling 1344 * to zoneadm, zlogin, or whatever forked it that we are ready to 1345 * service requests. 1346 */ 1347 shstate->status = 0; 1348 (void) sema_post(&shstate->sem); 1349 (void) munmap((char *)shstate, shstatelen); 1350 shstate = NULL; 1351 1352 (void) mutex_unlock(&lock); 1353 1354 /* 1355 * zlogp is now invalid, so reset it to the syslog logger. 1356 */ 1357 zlogp = &logsys; 1358 1359 /* 1360 * Now that we are free of any parents, switch to the default locale. 1361 */ 1362 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 1363 1364 /* 1365 * At this point the setup portion of main() is basically done, so 1366 * we reuse this thread to manage the zone console. When 1367 * serve_console() has returned, we are past the point of no return 1368 * in the life of this zoneadmd. 1369 */ 1370 if (zonecfg_in_alt_root()) { 1371 /* 1372 * This is just awful, but mounted scratch zones don't (and 1373 * can't) have consoles. We just wait for unmount instead. 1374 */ 1375 while (sema_wait(&scratch_sem) == EINTR) 1376 ; 1377 } else { 1378 serve_console(zlogp); 1379 assert(in_death_throes); 1380 } 1381 1382 /* 1383 * This is the next-to-last part of the exit interlock. Upon calling 1384 * fdetach(), the door will go unreferenced; once any 1385 * outstanding requests (like the door thread doing Z_HALT) are 1386 * done, the door will get an UNREF notification; when it handles 1387 * the UNREF, the door server will cause the exit. 1388 */ 1389 assert(!MUTEX_HELD(&lock)); 1390 (void) fdetach(zone_door_path); 1391 for (;;) 1392 (void) pause(); 1393 1394 child_out: 1395 assert(pid == 0); 1396 if (shstate != NULL) { 1397 shstate->status = -1; 1398 (void) sema_post(&shstate->sem); 1399 (void) munmap((char *)shstate, shstatelen); 1400 } 1401 1402 /* 1403 * This might trigger an unref notification, but if so, 1404 * we are still holding the lock, so our call to exit will 1405 * ultimately win the race and will publish the right exit 1406 * code. 1407 */ 1408 if (zone_door != -1) { 1409 assert(MUTEX_HELD(&lock)); 1410 (void) door_revoke(zone_door); 1411 (void) fdetach(zone_door_path); 1412 } 1413 return (1); /* return from main() forcibly exits an MT process */ 1414 } 1415