1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * zoneadmd manages zones; one zoneadmd process is launched for each 31 * non-global zone on the system. This daemon juggles four jobs: 32 * 33 * - Implement setup and teardown of the zone "virtual platform": mount and 34 * unmount filesystems; create and destroy network interfaces; communicate 35 * with devfsadmd to lay out devices for the zone; instantiate the zone 36 * console device; configure process runtime attributes such as resource 37 * controls, pool bindings, fine-grained privileges. 38 * 39 * - Launch the zone's init(1M) process. 40 * 41 * - Implement a door server; clients (like zoneadm) connect to the door 42 * server and request zone state changes. The kernel is also a client of 43 * this door server. A request to halt or reboot the zone which originates 44 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 45 * 46 * One minor problem is that messages emitted by zoneadmd need to be passed 47 * back to the zoneadm process making the request. These messages need to 48 * be rendered in the client's locale; so, this is passed in as part of the 49 * request. The exception is the kernel upcall to zoneadmd, in which case 50 * messages are syslog'd. 51 * 52 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 53 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 54 * strings which do not need to be translated. 55 * 56 * - Act as a console server for zlogin -C processes; see comments in zcons.c 57 * for more information about the zone console architecture. 58 * 59 * DESIGN NOTES 60 * 61 * Restart: 62 * A chief design constraint of zoneadmd is that it should be restartable in 63 * the case that the administrator kills it off, or it suffers a fatal error, 64 * without the running zone being impacted; this is akin to being able to 65 * reboot the service processor of a server without affecting the OS instance. 66 */ 67 68 #include <sys/param.h> 69 #include <sys/mman.h> 70 #include <sys/types.h> 71 #include <sys/stat.h> 72 #include <sys/sysmacros.h> 73 74 #include <bsm/adt.h> 75 #include <bsm/adt_event.h> 76 77 #include <alloca.h> 78 #include <assert.h> 79 #include <errno.h> 80 #include <door.h> 81 #include <fcntl.h> 82 #include <locale.h> 83 #include <signal.h> 84 #include <stdarg.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <strings.h> 89 #include <synch.h> 90 #include <syslog.h> 91 #include <thread.h> 92 #include <unistd.h> 93 #include <wait.h> 94 #include <limits.h> 95 #include <zone.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <sys/contract/process.h> 99 #include <sys/ctfs.h> 100 101 #include <libzonecfg.h> 102 #include "zoneadmd.h" 103 104 static char *progname; 105 char *zone_name; /* zone which we are managing */ 106 static zoneid_t zone_id; 107 108 static zlog_t logsys; 109 110 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 111 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 112 113 static sema_t scratch_sem; /* for scratch zones */ 114 115 static char zone_door_path[MAXPATHLEN]; 116 static int zone_door = -1; 117 118 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 119 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 120 121 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 122 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 123 #endif 124 125 #define PATH_TO_INIT "/sbin/init" 126 127 #define DEFAULT_LOCALE "C" 128 129 static const char * 130 z_cmd_name(zone_cmd_t zcmd) 131 { 132 /* This list needs to match the enum in sys/zone.h */ 133 static const char *zcmdstr[] = { 134 "ready", "boot", "reboot", "halt", "note_uninstalling", 135 "mount", "unmount" 136 }; 137 138 if (zcmd >= sizeof (zcmdstr) / sizeof (*zcmdstr)) 139 return ("unknown"); 140 else 141 return (zcmdstr[(int)zcmd]); 142 } 143 144 static char * 145 get_execbasename(char *execfullname) 146 { 147 char *last_slash, *execbasename; 148 149 /* guard against '/' at end of command invocation */ 150 for (;;) { 151 last_slash = strrchr(execfullname, '/'); 152 if (last_slash == NULL) { 153 execbasename = execfullname; 154 break; 155 } else { 156 execbasename = last_slash + 1; 157 if (*execbasename == '\0') { 158 *last_slash = '\0'; 159 continue; 160 } 161 break; 162 } 163 } 164 return (execbasename); 165 } 166 167 static void 168 usage(void) 169 { 170 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 171 (void) fprintf(stderr, 172 gettext("\tNote: %s should not be run directly.\n"), progname); 173 exit(2); 174 } 175 176 /* ARGSUSED */ 177 static void 178 sigchld(int sig) 179 { 180 } 181 182 char * 183 localize_msg(char *locale, const char *msg) 184 { 185 char *out; 186 187 (void) mutex_lock(&msglock); 188 (void) setlocale(LC_MESSAGES, locale); 189 out = gettext(msg); 190 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 191 (void) mutex_unlock(&msglock); 192 return (out); 193 } 194 195 /* PRINTFLIKE3 */ 196 void 197 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 198 { 199 va_list alist; 200 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 201 char *bp; 202 int saved_errno = errno; 203 204 if (zlogp == NULL) 205 return; 206 if (zlogp == &logsys) 207 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 208 zone_name); 209 else 210 buf[0] = '\0'; 211 bp = &(buf[strlen(buf)]); 212 213 /* 214 * In theory, the locale pointer should be set to either "C" or a 215 * char array, so it should never be NULL 216 */ 217 assert(zlogp->locale != NULL); 218 /* Locale is per process, but we are multi-threaded... */ 219 fmt = localize_msg(zlogp->locale, fmt); 220 221 va_start(alist, fmt); 222 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 223 va_end(alist); 224 bp = &(buf[strlen(buf)]); 225 if (use_strerror) 226 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 227 strerror(saved_errno)); 228 if (zlogp == &logsys) { 229 (void) syslog(LOG_ERR, "%s", buf); 230 } else if (zlogp->logfile != NULL) { 231 (void) fprintf(zlogp->logfile, "%s\n", buf); 232 } else { 233 size_t buflen; 234 size_t copylen; 235 236 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 237 copylen = MIN(buflen, zlogp->loglen); 238 zlogp->log += copylen; 239 zlogp->loglen -= copylen; 240 } 241 } 242 243 static int 244 mkzonedir(zlog_t *zlogp) 245 { 246 struct stat st; 247 /* 248 * We must create and lock everyone but root out of ZONES_TMPDIR 249 * since anyone can open any UNIX domain socket, regardless of 250 * its file system permissions. Sigh... 251 */ 252 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 253 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 254 return (-1); 255 } 256 /* paranoia */ 257 if ((stat(ZONES_TMPDIR, &st) < 0) || ((st.st_mode & S_IFDIR) == 0)) { 258 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 259 return (-1); 260 } 261 (void) chmod(ZONES_TMPDIR, S_IRWXU); 262 return (0); 263 } 264 265 /* 266 * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is 267 * 'true' if this is being invoked as part of the processing for the "mount" 268 * subcommand. 269 */ 270 static int 271 zone_ready(zlog_t *zlogp, boolean_t mount_cmd) 272 { 273 int err; 274 275 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 276 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 277 zonecfg_strerror(err)); 278 return (-1); 279 } 280 281 if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { 282 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 283 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 284 zonecfg_strerror(err)); 285 return (-1); 286 } 287 if (vplat_bringup(zlogp, mount_cmd) != 0) { 288 bringup_failure_recovery = B_TRUE; 289 (void) vplat_teardown(NULL, mount_cmd); 290 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 291 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 292 zonecfg_strerror(err)); 293 return (-1); 294 } 295 296 return (0); 297 } 298 299 static int 300 init_template() 301 { 302 int fd; 303 int err = 0; 304 305 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 306 if (fd == -1) 307 return (-1); 308 309 /* 310 * For now, zoneadmd doesn't do anything with the contract. 311 * Deliver no events, don't inherit, and allow it to be orphaned. 312 */ 313 err |= ct_tmpl_set_critical(fd, 0); 314 err |= ct_tmpl_set_informative(fd, 0); 315 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 316 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 317 if (err || ct_tmpl_activate(fd)) { 318 (void) close(fd); 319 return (-1); 320 } 321 322 return (fd); 323 } 324 325 static int 326 mount_early_fs(zlog_t *zlogp, zoneid_t zoneid, const char *spec, 327 const char *dir, char *fstype) 328 { 329 pid_t child; 330 int child_status; 331 int tmpl_fd; 332 ctid_t ct; 333 334 if ((tmpl_fd = init_template()) == -1) { 335 zerror(zlogp, B_TRUE, "failed to create contract"); 336 return (-1); 337 } 338 339 if ((child = fork()) == -1) { 340 (void) ct_tmpl_clear(tmpl_fd); 341 (void) close(tmpl_fd); 342 zerror(zlogp, B_TRUE, "failed to fork"); 343 return (-1); 344 345 } else if (child == 0) { /* child */ 346 (void) ct_tmpl_clear(tmpl_fd); 347 /* 348 * Even though there are no procs running in the zone, we 349 * do this for paranoia's sake. 350 */ 351 (void) closefrom(0); 352 353 if (zone_enter(zoneid) == -1) { 354 _exit(errno); 355 } 356 if (mount(spec, dir, MS_DATA, fstype, NULL, 0, NULL, 0) != 0) 357 _exit(errno); 358 _exit(0); 359 } 360 361 /* parent */ 362 if (contract_latest(&ct) == -1) 363 ct = -1; 364 (void) ct_tmpl_clear(tmpl_fd); 365 (void) close(tmpl_fd); 366 if (waitpid(child, &child_status, 0) != child) { 367 /* unexpected: we must have been signalled */ 368 (void) contract_abandon_id(ct); 369 return (-1); 370 } 371 (void) contract_abandon_id(ct); 372 if (WEXITSTATUS(child_status) != 0) { 373 errno = WEXITSTATUS(child_status); 374 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 375 return (-1); 376 } 377 378 return (0); 379 } 380 381 static int 382 zone_mount_early(zlog_t *zlogp, zoneid_t zoneid) 383 { 384 if (mount_early_fs(zlogp, zoneid, "/proc", "/proc", "proc") != 0) 385 return (-1); 386 387 if (mount_early_fs(zlogp, zoneid, "ctfs", CTFS_ROOT, "ctfs") != 0) 388 return (-1); 389 390 if (mount_early_fs(zlogp, zoneid, "swap", "/etc/svc/volatile", 391 "tmpfs") != 0) 392 return (-1); 393 394 if (mount_early_fs(zlogp, zoneid, "mnttab", "/etc/mnttab", 395 "mntfs") != 0) 396 return (-1); 397 398 return (0); 399 } 400 401 static int 402 zone_bootup(zlog_t *zlogp, const char *bootargs) 403 { 404 zoneid_t zoneid; 405 struct stat st; 406 char zroot[MAXPATHLEN], initpath[MAXPATHLEN]; 407 408 if (init_console_slave(zlogp) != 0) 409 return (-1); 410 reset_slave_terminal(zlogp); 411 412 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 413 zerror(zlogp, B_TRUE, "unable to get zoneid"); 414 return (-1); 415 } 416 417 if (zone_mount_early(zlogp, zoneid) != 0) 418 return (-1); 419 420 /* 421 * Try to anticipate possible problems: Make sure init is executable. 422 */ 423 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 424 zerror(zlogp, B_FALSE, "unable to determine zone root"); 425 return (-1); 426 } 427 (void) snprintf(initpath, sizeof (initpath), "%s%s", zroot, 428 PATH_TO_INIT); 429 430 if (stat(initpath, &st) == -1) { 431 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 432 return (-1); 433 } 434 435 if ((st.st_mode & S_IXUSR) == 0) { 436 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 437 return (-1); 438 } 439 440 if (zone_boot(zoneid, bootargs) == -1) { 441 zerror(zlogp, B_TRUE, "unable to boot zone"); 442 return (-1); 443 } 444 445 return (0); 446 } 447 448 static int 449 zone_halt(zlog_t *zlogp, boolean_t unmount_cmd) 450 { 451 int err; 452 453 if (vplat_teardown(zlogp, unmount_cmd) != 0) { 454 if (!bringup_failure_recovery) 455 zerror(zlogp, B_FALSE, "unable to destroy zone"); 456 return (-1); 457 } 458 459 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 460 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 461 zonecfg_strerror(err)); 462 463 return (0); 464 } 465 466 /* 467 * Generate AUE_zone_state for a command that boots a zone. 468 */ 469 static void 470 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 471 char *new_state) 472 { 473 adt_session_data_t *ah; 474 adt_event_data_t *event; 475 int pass_fail, fail_reason; 476 477 if (!adt_audit_enabled()) 478 return; 479 480 if (return_val == 0) { 481 pass_fail = ADT_SUCCESS; 482 fail_reason = ADT_SUCCESS; 483 } else { 484 pass_fail = ADT_FAILURE; 485 fail_reason = ADT_FAIL_VALUE_PROGRAM; 486 } 487 488 if (adt_start_session(&ah, NULL, 0)) { 489 zerror(zlogp, B_TRUE, gettext("audit failure.")); 490 return; 491 } 492 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 493 zerror(zlogp, B_TRUE, gettext("audit failure.")); 494 (void) adt_end_session(ah); 495 return; 496 } 497 498 event = adt_alloc_event(ah, ADT_zone_state); 499 if (event == NULL) { 500 zerror(zlogp, B_TRUE, gettext("audit failure.")); 501 (void) adt_end_session(ah); 502 return; 503 } 504 event->adt_zone_state.zonename = zone_name; 505 event->adt_zone_state.new_state = new_state; 506 507 if (adt_put_event(event, pass_fail, fail_reason)) 508 zerror(zlogp, B_TRUE, gettext("audit failure.")); 509 510 adt_free_event(event); 511 512 (void) adt_end_session(ah); 513 } 514 515 /* 516 * The main routine for the door server that deals with zone state transitions. 517 */ 518 /* ARGSUSED */ 519 static void 520 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 521 uint_t n_desc) 522 { 523 ucred_t *uc = NULL; 524 const priv_set_t *eset; 525 526 zone_state_t zstate; 527 zone_cmd_t cmd; 528 zone_cmd_arg_t *zargp; 529 530 boolean_t kernelcall; 531 532 int rval = -1; 533 uint64_t uniqid; 534 zoneid_t zoneid = -1; 535 zlog_t zlog; 536 zlog_t *zlogp; 537 zone_cmd_rval_t *rvalp; 538 size_t rlen = getpagesize(); /* conservative */ 539 540 /* LINTED E_BAD_PTR_CAST_ALIGN */ 541 zargp = (zone_cmd_arg_t *)args; 542 543 /* 544 * When we get the door unref message, we've fdetach'd the door, and 545 * it is time for us to shut down zoneadmd. 546 */ 547 if (zargp == DOOR_UNREF_DATA) { 548 /* 549 * See comment at end of main() for info on the last rites. 550 */ 551 exit(0); 552 } 553 554 if (zargp == NULL) { 555 (void) door_return(NULL, 0, 0, 0); 556 } 557 558 rvalp = alloca(rlen); 559 bzero(rvalp, rlen); 560 zlog.logfile = NULL; 561 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 562 zlog.buf = rvalp->errbuf; 563 zlog.log = zlog.buf; 564 /* defer initialization of zlog.locale until after credential check */ 565 zlogp = &zlog; 566 567 if (alen != sizeof (zone_cmd_arg_t)) { 568 /* 569 * This really shouldn't be happening. 570 */ 571 zerror(&logsys, B_FALSE, "invalid argument"); 572 goto out; 573 } 574 cmd = zargp->cmd; 575 576 if (door_ucred(&uc) != 0) { 577 zerror(&logsys, B_TRUE, "door_ucred"); 578 goto out; 579 } 580 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 581 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 582 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 583 ucred_geteuid(uc) != 0)) { 584 zerror(&logsys, B_FALSE, "insufficient privileges"); 585 goto out; 586 } 587 588 kernelcall = ucred_getpid(uc) == 0; 589 590 /* 591 * This is safe because we only use a zlog_t throughout the 592 * duration of a door call; i.e., by the time the pointer 593 * might become invalid, the door call would be over. 594 */ 595 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 596 597 (void) mutex_lock(&lock); 598 599 /* 600 * Once we start to really die off, we don't want more connections. 601 */ 602 if (in_death_throes) { 603 (void) mutex_unlock(&lock); 604 ucred_free(uc); 605 (void) door_return(NULL, 0, 0, 0); 606 thr_exit(NULL); 607 } 608 609 /* 610 * Check for validity of command. 611 */ 612 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_REBOOT && 613 cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING && cmd != Z_MOUNT && 614 cmd != Z_UNMOUNT) { 615 zerror(&logsys, B_FALSE, "invalid command %d", (int)cmd); 616 goto out; 617 } 618 619 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 620 /* 621 * Can't happen 622 */ 623 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 624 cmd); 625 goto out; 626 } 627 /* 628 * We ignore the possibility of someone calling zone_create(2) 629 * explicitly; all requests must come through zoneadmd. 630 */ 631 if (zone_get_state(zone_name, &zstate) != Z_OK) { 632 /* 633 * Something terribly wrong happened 634 */ 635 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 636 goto out; 637 } 638 639 if (kernelcall) { 640 /* 641 * Kernel-initiated requests may lose their validity if the 642 * zone_t the kernel was referring to has gone away. 643 */ 644 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 645 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 646 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 647 /* 648 * We're not talking about the same zone. The request 649 * must have arrived too late. Return error. 650 */ 651 rval = -1; 652 goto out; 653 } 654 zlogp = &logsys; /* Log errors to syslog */ 655 } 656 657 switch (zstate) { 658 case ZONE_STATE_CONFIGURED: 659 case ZONE_STATE_INCOMPLETE: 660 /* 661 * Not our area of expertise; we just print a nice message 662 * and die off. 663 */ 664 zerror(zlogp, B_FALSE, 665 "%s operation is invalid for zones in state '%s'", 666 z_cmd_name(cmd), zone_state_str(zstate)); 667 break; 668 669 case ZONE_STATE_INSTALLED: 670 switch (cmd) { 671 case Z_READY: 672 rval = zone_ready(zlogp, B_FALSE); 673 if (rval == 0) 674 eventstream_write(Z_EVT_ZONE_READIED); 675 break; 676 case Z_BOOT: 677 eventstream_write(Z_EVT_ZONE_BOOTING); 678 if ((rval = zone_ready(zlogp, B_FALSE)) == 0) 679 rval = zone_bootup(zlogp, zargp->bootbuf); 680 audit_put_record(zlogp, uc, rval, "boot"); 681 if (rval != 0) { 682 bringup_failure_recovery = B_TRUE; 683 (void) zone_halt(zlogp, B_FALSE); 684 } 685 break; 686 case Z_HALT: 687 if (kernelcall) /* Invalid; can't happen */ 688 abort(); 689 /* 690 * We could have two clients racing to halt this 691 * zone; the second client loses, but his request 692 * doesn't fail, since the zone is now in the desired 693 * state. 694 */ 695 zerror(zlogp, B_FALSE, "zone is already halted"); 696 rval = 0; 697 break; 698 case Z_REBOOT: 699 if (kernelcall) /* Invalid; can't happen */ 700 abort(); 701 zerror(zlogp, B_FALSE, "%s operation is invalid " 702 "for zones in state '%s'", z_cmd_name(cmd), 703 zone_state_str(zstate)); 704 rval = -1; 705 break; 706 case Z_NOTE_UNINSTALLING: 707 if (kernelcall) /* Invalid; can't happen */ 708 abort(); 709 /* 710 * Tell the console to print out a message about this. 711 * Once it does, we will be in_death_throes. 712 */ 713 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 714 break; 715 case Z_MOUNT: 716 if (kernelcall) /* Invalid; can't happen */ 717 abort(); 718 rval = zone_ready(zlogp, B_TRUE); 719 if (rval == 0) 720 rval = zone_mount_early(zlogp, zone_id); 721 /* 722 * Ordinarily, /dev/fd would be mounted inside the zone 723 * by svc:/system/filesystem/usr:default, but since 724 * we're not booting the zone, we need to do this 725 * manually. 726 */ 727 if (rval == 0) 728 rval = mount_early_fs(zlogp, zone_id, "fd", 729 "/dev/fd", "fd"); 730 break; 731 case Z_UNMOUNT: 732 if (kernelcall) /* Invalid; can't happen */ 733 abort(); 734 zerror(zlogp, B_FALSE, "zone is already unmounted"); 735 rval = 0; 736 break; 737 } 738 break; 739 740 case ZONE_STATE_READY: 741 switch (cmd) { 742 case Z_READY: 743 /* 744 * We could have two clients racing to ready this 745 * zone; the second client loses, but his request 746 * doesn't fail, since the zone is now in the desired 747 * state. 748 */ 749 zerror(zlogp, B_FALSE, "zone is already ready"); 750 rval = 0; 751 break; 752 case Z_BOOT: 753 eventstream_write(Z_EVT_ZONE_BOOTING); 754 rval = zone_bootup(zlogp, zargp->bootbuf); 755 audit_put_record(zlogp, uc, rval, "boot"); 756 if (rval != 0) { 757 bringup_failure_recovery = B_TRUE; 758 (void) zone_halt(zlogp, B_FALSE); 759 } 760 break; 761 case Z_HALT: 762 if (kernelcall) /* Invalid; can't happen */ 763 abort(); 764 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 765 break; 766 eventstream_write(Z_EVT_ZONE_HALTED); 767 break; 768 case Z_REBOOT: 769 case Z_NOTE_UNINSTALLING: 770 case Z_MOUNT: 771 case Z_UNMOUNT: 772 if (kernelcall) /* Invalid; can't happen */ 773 abort(); 774 zerror(zlogp, B_FALSE, "%s operation is invalid " 775 "for zones in state '%s'", z_cmd_name(cmd), 776 zone_state_str(zstate)); 777 rval = -1; 778 break; 779 } 780 break; 781 782 case ZONE_STATE_MOUNTED: 783 switch (cmd) { 784 case Z_UNMOUNT: 785 if (kernelcall) /* Invalid; can't happen */ 786 abort(); 787 rval = zone_halt(zlogp, B_TRUE); 788 if (rval == 0) 789 (void) sema_post(&scratch_sem); 790 break; 791 default: 792 if (kernelcall) /* Invalid; can't happen */ 793 abort(); 794 zerror(zlogp, B_FALSE, "%s operation is invalid " 795 "for zones in state '%s'", z_cmd_name(cmd), 796 zone_state_str(zstate)); 797 rval = -1; 798 break; 799 } 800 break; 801 802 case ZONE_STATE_RUNNING: 803 case ZONE_STATE_SHUTTING_DOWN: 804 case ZONE_STATE_DOWN: 805 switch (cmd) { 806 case Z_READY: 807 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 808 break; 809 if ((rval = zone_ready(zlogp, B_FALSE)) == 0) 810 eventstream_write(Z_EVT_ZONE_READIED); 811 break; 812 case Z_BOOT: 813 /* 814 * We could have two clients racing to boot this 815 * zone; the second client loses, but his request 816 * doesn't fail, since the zone is now in the desired 817 * state. 818 */ 819 zerror(zlogp, B_FALSE, "zone is already booted"); 820 rval = 0; 821 break; 822 case Z_HALT: 823 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 824 break; 825 eventstream_write(Z_EVT_ZONE_HALTED); 826 break; 827 case Z_REBOOT: 828 eventstream_write(Z_EVT_ZONE_REBOOTING); 829 if ((rval = zone_halt(zlogp, B_FALSE)) != 0) 830 break; 831 if ((rval = zone_ready(zlogp, B_FALSE)) == 0) { 832 rval = zone_bootup(zlogp, ""); 833 audit_put_record(zlogp, uc, rval, "reboot"); 834 if (rval != 0) 835 (void) zone_halt(zlogp, B_FALSE); 836 } 837 break; 838 case Z_NOTE_UNINSTALLING: 839 case Z_MOUNT: 840 case Z_UNMOUNT: 841 zerror(zlogp, B_FALSE, "%s operation is invalid " 842 "for zones in state '%s'", z_cmd_name(cmd), 843 zone_state_str(zstate)); 844 rval = -1; 845 break; 846 } 847 break; 848 default: 849 abort(); 850 } 851 852 /* 853 * Because the state of the zone may have changed, we make sure 854 * to wake the console poller, which is in charge of initiating 855 * the shutdown procedure as necessary. 856 */ 857 eventstream_write(Z_EVT_NULL); 858 859 out: 860 (void) mutex_unlock(&lock); 861 if (kernelcall) { 862 rvalp = NULL; 863 rlen = 0; 864 } else { 865 rvalp->rval = rval; 866 } 867 if (uc != NULL) 868 ucred_free(uc); 869 (void) door_return((char *)rvalp, rlen, NULL, 0); 870 thr_exit(NULL); 871 } 872 873 static int 874 setup_door(zlog_t *zlogp) 875 { 876 if ((zone_door = door_create(server, NULL, 877 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 878 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 879 return (-1); 880 } 881 (void) fdetach(zone_door_path); 882 883 if (fattach(zone_door, zone_door_path) != 0) { 884 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 885 (void) door_revoke(zone_door); 886 (void) fdetach(zone_door_path); 887 zone_door = -1; 888 return (-1); 889 } 890 return (0); 891 } 892 893 /* 894 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 895 * is where zoneadmd itself will check to see that another instance of 896 * zoneadmd isn't already controlling this zone. 897 * 898 * The idea here is that we want to open the path to which we will 899 * attach our door, lock it, and then make sure that no-one has beat us 900 * to fattach(3c)ing onto it. 901 * 902 * fattach(3c) is really a mount, so there are actually two possible 903 * vnodes we could be dealing with. Our strategy is as follows: 904 * 905 * - If the file we opened is a regular file (common case): 906 * There is no fattach(3c)ed door, so we have a chance of becoming 907 * the managing zoneadmd. We attempt to lock the file: if it is 908 * already locked, that means someone else raced us here, so we 909 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 910 * that beat us to it. 911 * 912 * - If the file we opened is a namefs file: 913 * This means there is already an established door fattach(3c)'ed 914 * to the rendezvous path. We've lost the race, so we give up. 915 * Note that in this case we also try to grab the file lock, and 916 * will succeed in acquiring it since the vnode locked by the 917 * "winning" zoneadmd was a regular one, and the one we locked was 918 * the fattach(3c)'ed door node. At any rate, no harm is done, and 919 * we just return to zoneadm(1m) which knows to retry. 920 */ 921 static int 922 make_daemon_exclusive(zlog_t *zlogp) 923 { 924 int doorfd = -1; 925 int err, ret = -1; 926 struct stat st; 927 struct flock flock; 928 zone_state_t zstate; 929 930 top: 931 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 932 zerror(zlogp, B_FALSE, "failed to get zone state: %s\n", 933 zonecfg_strerror(err)); 934 goto out; 935 } 936 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 937 S_IREAD|S_IWRITE)) < 0) { 938 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 939 goto out; 940 } 941 if (fstat(doorfd, &st) < 0) { 942 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 943 goto out; 944 } 945 /* 946 * Lock the file to synchronize with other zoneadmd 947 */ 948 flock.l_type = F_WRLCK; 949 flock.l_whence = SEEK_SET; 950 flock.l_start = (off_t)0; 951 flock.l_len = (off_t)0; 952 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 953 /* 954 * Someone else raced us here and grabbed the lock file 955 * first. A warning here is inappropriate since nothing 956 * went wrong. 957 */ 958 goto out; 959 } 960 961 if (strcmp(st.st_fstype, "namefs") == 0) { 962 struct door_info info; 963 964 /* 965 * There is already something fattach()'ed to this file. 966 * Lets see what the door is up to. 967 */ 968 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 969 /* 970 * Another zoneadmd process seems to be in 971 * control of the situation and we don't need to 972 * be here. A warning here is inappropriate 973 * since nothing went wrong. 974 * 975 * If the door has been revoked, the zoneadmd 976 * process currently managing the zone is going 977 * away. We'll return control to zoneadm(1m) 978 * which will try again (by which time zoneadmd 979 * will hopefully have exited). 980 */ 981 goto out; 982 } 983 984 /* 985 * If we got this far, there's a fattach(3c)'ed door 986 * that belongs to a process that has exited, which can 987 * happen if the previous zoneadmd died unexpectedly. 988 * 989 * Let user know that something is amiss, but that we can 990 * recover; if the zone is in the installed state, then don't 991 * message, since having a running zoneadmd isn't really 992 * expected/needed. We want to keep occurences of this message 993 * limited to times when zoneadmd is picking back up from a 994 * zoneadmd that died while the zone was in some non-trivial 995 * state. 996 */ 997 if (zstate > ZONE_STATE_INSTALLED) { 998 zerror(zlogp, B_FALSE, 999 "zone '%s': WARNING: zone is in state '%s', but " 1000 "zoneadmd does not appear to be available; " 1001 "restarted zoneadmd to recover.", 1002 zone_name, zone_state_str(zstate)); 1003 } 1004 1005 (void) fdetach(zone_door_path); 1006 (void) close(doorfd); 1007 goto top; 1008 } 1009 ret = 0; 1010 out: 1011 (void) close(doorfd); 1012 return (ret); 1013 } 1014 1015 int 1016 main(int argc, char *argv[]) 1017 { 1018 int opt; 1019 zoneid_t zid; 1020 priv_set_t *privset; 1021 zone_state_t zstate; 1022 char parents_locale[MAXPATHLEN]; 1023 int err; 1024 1025 pid_t pid; 1026 sigset_t blockset; 1027 sigset_t block_cld; 1028 1029 struct { 1030 sema_t sem; 1031 int status; 1032 zlog_t log; 1033 } *shstate; 1034 size_t shstatelen = getpagesize(); 1035 1036 zlog_t errlog; 1037 zlog_t *zlogp; 1038 1039 progname = get_execbasename(argv[0]); 1040 1041 /* 1042 * Make sure stderr is unbuffered 1043 */ 1044 (void) setbuffer(stderr, NULL, 0); 1045 1046 /* 1047 * Get out of the way of mounted filesystems, since we will daemonize 1048 * soon. 1049 */ 1050 (void) chdir("/"); 1051 1052 /* 1053 * Use the default system umask per PSARC 1998/110 rather than 1054 * anything that may have been set by the caller. 1055 */ 1056 (void) umask(CMASK); 1057 1058 /* 1059 * Initially we want to use our parent's locale. 1060 */ 1061 (void) setlocale(LC_ALL, ""); 1062 (void) textdomain(TEXT_DOMAIN); 1063 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1064 sizeof (parents_locale)); 1065 1066 /* 1067 * This zlog_t is used for writing to stderr 1068 */ 1069 errlog.logfile = stderr; 1070 errlog.buflen = errlog.loglen = 0; 1071 errlog.buf = errlog.log = NULL; 1072 errlog.locale = parents_locale; 1073 1074 /* 1075 * We start off writing to stderr until we're ready to daemonize. 1076 */ 1077 zlogp = &errlog; 1078 1079 /* 1080 * Process options. 1081 */ 1082 while ((opt = getopt(argc, argv, "R:z:")) != EOF) { 1083 switch (opt) { 1084 case 'R': 1085 zonecfg_set_root(optarg); 1086 break; 1087 case 'z': 1088 zone_name = optarg; 1089 break; 1090 default: 1091 usage(); 1092 } 1093 } 1094 1095 if (zone_name == NULL) 1096 usage(); 1097 1098 /* 1099 * Because usage() prints directly to stderr, it has gettext() 1100 * wrapping, which depends on the locale. But since zerror() calls 1101 * localize() which tweaks the locale, it is not safe to call zerror() 1102 * until after the last call to usage(). Fortunately, the last call 1103 * to usage() is just above and the first call to zerror() is just 1104 * below. Don't mess this up. 1105 */ 1106 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1107 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1108 GLOBAL_ZONENAME); 1109 return (1); 1110 } 1111 1112 if (zone_get_id(zone_name, &zid) != 0) { 1113 zerror(zlogp, B_FALSE, "could not manage %s: %s\n", zone_name, 1114 zonecfg_strerror(Z_NO_ZONE)); 1115 return (1); 1116 } 1117 1118 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1119 zerror(zlogp, B_FALSE, "failed to get zone state: %s\n", 1120 zonecfg_strerror(err)); 1121 return (1); 1122 } 1123 if (zstate < ZONE_STATE_INSTALLED) { 1124 zerror(zlogp, B_FALSE, 1125 "cannot manage a zone which is in state '%s'", 1126 zone_state_str(zstate)); 1127 return (1); 1128 } 1129 1130 /* 1131 * Check that we have all privileges. It would be nice to pare 1132 * this down, but this is at least a first cut. 1133 */ 1134 if ((privset = priv_allocset()) == NULL) { 1135 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1136 return (1); 1137 } 1138 1139 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1140 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1141 priv_freeset(privset); 1142 return (1); 1143 } 1144 1145 if (priv_isfullset(privset) == B_FALSE) { 1146 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1147 "run this command (all privs required)\n"); 1148 priv_freeset(privset); 1149 return (1); 1150 } 1151 priv_freeset(privset); 1152 1153 if (mkzonedir(zlogp) != 0) 1154 return (1); 1155 1156 /* 1157 * Pre-fork: setup shared state 1158 */ 1159 if ((shstate = (void *)mmap(NULL, shstatelen, 1160 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1161 MAP_FAILED) { 1162 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1163 return (1); 1164 } 1165 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1166 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1167 (void) munmap((char *)shstate, shstatelen); 1168 return (1); 1169 } 1170 shstate->log.logfile = NULL; 1171 shstate->log.buflen = shstatelen - sizeof (*shstate); 1172 shstate->log.loglen = shstate->log.buflen; 1173 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1174 shstate->log.log = shstate->log.buf; 1175 shstate->log.locale = parents_locale; 1176 shstate->status = -1; 1177 1178 /* 1179 * We need a SIGCHLD handler so the sema_wait() below will wake 1180 * up if the child dies without doing a sema_post(). 1181 */ 1182 (void) sigset(SIGCHLD, sigchld); 1183 /* 1184 * We must mask SIGCHLD until after we've coped with the fork 1185 * sufficiently to deal with it; otherwise we can race and 1186 * receive the signal before pid has been initialized 1187 * (yes, this really happens). 1188 */ 1189 (void) sigemptyset(&block_cld); 1190 (void) sigaddset(&block_cld, SIGCHLD); 1191 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1192 1193 /* 1194 * Do not let another thread localize a message while we are forking. 1195 */ 1196 (void) mutex_lock(&msglock); 1197 pid = fork(); 1198 (void) mutex_unlock(&msglock); 1199 if (pid == -1) { 1200 zerror(zlogp, B_TRUE, "could not fork"); 1201 return (1); 1202 1203 } else if (pid > 0) { /* parent */ 1204 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1205 /* 1206 * This marks a window of vulnerability in which we receive 1207 * the SIGCLD before falling into sema_wait (normally we would 1208 * get woken up from sema_wait with EINTR upon receipt of 1209 * SIGCLD). So we may need to use some other scheme like 1210 * sema_posting in the sigcld handler. 1211 * blech 1212 */ 1213 (void) sema_wait(&shstate->sem); 1214 (void) sema_destroy(&shstate->sem); 1215 if (shstate->status != 0) 1216 (void) waitpid(pid, NULL, WNOHANG); 1217 /* 1218 * It's ok if we die with SIGPIPE. It's not like we could have 1219 * done anything about it. 1220 */ 1221 (void) fprintf(stderr, "%s", shstate->log.buf); 1222 _exit(shstate->status == 0 ? 0 : 1); 1223 } 1224 1225 /* 1226 * The child charges on. 1227 */ 1228 (void) sigset(SIGCHLD, SIG_DFL); 1229 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1230 1231 /* 1232 * SIGPIPE can be delivered if we write to a socket for which the 1233 * peer endpoint is gone. That can lead to too-early termination 1234 * of zoneadmd, and that's not good eats. 1235 */ 1236 (void) sigset(SIGPIPE, SIG_IGN); 1237 /* 1238 * Stop using stderr 1239 */ 1240 zlogp = &shstate->log; 1241 1242 /* 1243 * We don't need stdout/stderr from now on. 1244 */ 1245 closefrom(0); 1246 1247 /* 1248 * Initialize the syslog zlog_t. This needs to be done after 1249 * the call to closefrom(). 1250 */ 1251 logsys.buf = logsys.log = NULL; 1252 logsys.buflen = logsys.loglen = 0; 1253 logsys.logfile = NULL; 1254 logsys.locale = DEFAULT_LOCALE; 1255 1256 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1257 1258 /* 1259 * The eventstream is used to publish state changes in the zone 1260 * from the door threads to the console I/O poller. 1261 */ 1262 if (eventstream_init() == -1) { 1263 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1264 goto child_out; 1265 } 1266 1267 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1268 "%s" ZONE_DOOR_PATH, zonecfg_get_root(), zone_name); 1269 1270 /* 1271 * See if another zoneadmd is running for this zone. If not, then we 1272 * can now modify system state. 1273 */ 1274 if (make_daemon_exclusive(zlogp) == -1) 1275 goto child_out; 1276 1277 1278 /* 1279 * Create/join a new session; we need to be careful of what we do with 1280 * the console from now on so we don't end up being the session leader 1281 * for the terminal we're going to be handing out. 1282 */ 1283 (void) setsid(); 1284 1285 /* 1286 * This thread shouldn't be receiving any signals; in particular, 1287 * SIGCHLD should be received by the thread doing the fork(). 1288 */ 1289 (void) sigfillset(&blockset); 1290 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1291 1292 /* 1293 * Setup the console device and get ready to serve the console; 1294 * once this has completed, we're ready to let console clients 1295 * make an attempt to connect (they will block until 1296 * serve_console_sock() below gets called, and any pending 1297 * connection is accept()ed). 1298 */ 1299 if (!zonecfg_in_alt_root() && init_console(zlogp) == -1) 1300 goto child_out; 1301 1302 /* 1303 * Take the lock now, so that when the door server gets going, we 1304 * are guaranteed that it won't take a request until we are sure 1305 * that everything is completely set up. See the child_out: label 1306 * below to see why this matters. 1307 */ 1308 (void) mutex_lock(&lock); 1309 1310 /* Init semaphore for scratch zones. */ 1311 if (sema_init(&scratch_sem, 0, USYNC_THREAD, NULL) == -1) { 1312 zerror(zlogp, B_TRUE, 1313 "failed to initialize semaphore for scratch zone"); 1314 goto child_out; 1315 } 1316 1317 /* 1318 * Note: door setup must occur *after* the console is setup. 1319 * This is so that as zlogin tests the door to see if zoneadmd 1320 * is ready yet, we know that the console will get serviced 1321 * once door_info() indicates that the door is "up". 1322 */ 1323 if (setup_door(zlogp) == -1) 1324 goto child_out; 1325 1326 /* 1327 * Things seem OK so far; tell the parent process that we're done 1328 * with setup tasks. This will cause the parent to exit, signalling 1329 * to zoneadm, zlogin, or whatever forked it that we are ready to 1330 * service requests. 1331 */ 1332 shstate->status = 0; 1333 (void) sema_post(&shstate->sem); 1334 (void) munmap((char *)shstate, shstatelen); 1335 shstate = NULL; 1336 1337 (void) mutex_unlock(&lock); 1338 1339 /* 1340 * zlogp is now invalid, so reset it to the syslog logger. 1341 */ 1342 zlogp = &logsys; 1343 1344 /* 1345 * Now that we are free of any parents, switch to the default locale. 1346 */ 1347 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 1348 1349 /* 1350 * At this point the setup portion of main() is basically done, so 1351 * we reuse this thread to manage the zone console. When 1352 * serve_console() has returned, we are past the point of no return 1353 * in the life of this zoneadmd. 1354 */ 1355 if (zonecfg_in_alt_root()) { 1356 /* 1357 * This is just awful, but mounted scratch zones don't (and 1358 * can't) have consoles. We just wait for unmount instead. 1359 */ 1360 while (sema_wait(&scratch_sem) == EINTR) 1361 ; 1362 } else { 1363 serve_console(zlogp); 1364 assert(in_death_throes); 1365 } 1366 1367 /* 1368 * This is the next-to-last part of the exit interlock. Upon calling 1369 * fdetach(), the door will go unreferenced; once any 1370 * outstanding requests (like the door thread doing Z_HALT) are 1371 * done, the door will get an UNREF notification; when it handles 1372 * the UNREF, the door server will cause the exit. 1373 */ 1374 assert(!MUTEX_HELD(&lock)); 1375 (void) fdetach(zone_door_path); 1376 for (;;) 1377 (void) pause(); 1378 1379 child_out: 1380 assert(pid == 0); 1381 if (shstate != NULL) { 1382 shstate->status = -1; 1383 (void) sema_post(&shstate->sem); 1384 (void) munmap((char *)shstate, shstatelen); 1385 } 1386 1387 /* 1388 * This might trigger an unref notification, but if so, 1389 * we are still holding the lock, so our call to exit will 1390 * ultimately win the race and will publish the right exit 1391 * code. 1392 */ 1393 if (zone_door != -1) { 1394 assert(MUTEX_HELD(&lock)); 1395 (void) door_revoke(zone_door); 1396 (void) fdetach(zone_door_path); 1397 } 1398 return (1); /* return from main() forcibly exits an MT process */ 1399 } 1400