1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * zoneadmd manages zones; one zoneadmd process is launched for each 31 * non-global zone on the system. This daemon juggles four jobs: 32 * 33 * - Implement setup and teardown of the zone "virtual platform": mount and 34 * unmount filesystems; create and destroy network interfaces; communicate 35 * with devfsadmd to lay out devices for the zone; instantiate the zone 36 * console device; configure process runtime attributes such as resource 37 * controls, pool bindings, fine-grained privileges. 38 * 39 * - Launch the zone's init(1M) process. 40 * 41 * - Implement a door server; clients (like zoneadm) connect to the door 42 * server and request zone state changes. The kernel is also a client of 43 * this door server. A request to halt or reboot the zone which originates 44 * *inside* the zone results in a door upcall from the kernel into zoneadmd. 45 * 46 * One minor problem is that messages emitted by zoneadmd need to be passed 47 * back to the zoneadm process making the request. These messages need to 48 * be rendered in the client's locale; so, this is passed in as part of the 49 * request. The exception is the kernel upcall to zoneadmd, in which case 50 * messages are syslog'd. 51 * 52 * To make all of this work, the Makefile adds -a to xgettext to extract *all* 53 * strings, and an exclusion file (zoneadmd.xcl) is used to exclude those 54 * strings which do not need to be translated. 55 * 56 * - Act as a console server for zlogin -C processes; see comments in zcons.c 57 * for more information about the zone console architecture. 58 * 59 * DESIGN NOTES 60 * 61 * Restart: 62 * A chief design constraint of zoneadmd is that it should be restartable in 63 * the case that the administrator kills it off, or it suffers a fatal error, 64 * without the running zone being impacted; this is akin to being able to 65 * reboot the service processor of a server without affecting the OS instance. 66 */ 67 68 #include <sys/param.h> 69 #include <sys/mman.h> 70 #include <sys/types.h> 71 #include <sys/stat.h> 72 #include <sys/sysmacros.h> 73 74 #include <bsm/adt.h> 75 #include <bsm/adt_event.h> 76 77 #include <alloca.h> 78 #include <assert.h> 79 #include <errno.h> 80 #include <door.h> 81 #include <fcntl.h> 82 #include <locale.h> 83 #include <signal.h> 84 #include <stdarg.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 #include <strings.h> 89 #include <synch.h> 90 #include <syslog.h> 91 #include <thread.h> 92 #include <unistd.h> 93 #include <wait.h> 94 #include <limits.h> 95 #include <zone.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <sys/contract/process.h> 99 #include <sys/ctfs.h> 100 101 #include <libzonecfg.h> 102 #include "zoneadmd.h" 103 104 static char *progname; 105 char *zone_name; /* zone which we are managing */ 106 107 static zlog_t logsys; 108 109 mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ 110 mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ 111 112 static char zone_door_path[MAXPATHLEN]; 113 static int zone_door = -1; 114 115 boolean_t in_death_throes = B_FALSE; /* daemon is dying */ 116 boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ 117 118 #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ 119 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 120 #endif 121 122 #define PATH_TO_INIT "/sbin/init" 123 124 #define DEFAULT_LOCALE "C" 125 126 static char * 127 get_execbasename(char *execfullname) 128 { 129 char *last_slash, *execbasename; 130 131 /* guard against '/' at end of command invocation */ 132 for (;;) { 133 last_slash = strrchr(execfullname, '/'); 134 if (last_slash == NULL) { 135 execbasename = execfullname; 136 break; 137 } else { 138 execbasename = last_slash + 1; 139 if (*execbasename == '\0') { 140 *last_slash = '\0'; 141 continue; 142 } 143 break; 144 } 145 } 146 return (execbasename); 147 } 148 149 static void 150 usage(void) 151 { 152 (void) fprintf(stderr, gettext("Usage: %s -z zonename\n"), progname); 153 (void) fprintf(stderr, 154 gettext("\tNote: %s should not be run directly.\n"), progname); 155 exit(2); 156 } 157 158 /* ARGSUSED */ 159 static void 160 sigchld(int sig) 161 { 162 } 163 164 char * 165 localize_msg(char *locale, const char *msg) 166 { 167 char *out; 168 169 (void) mutex_lock(&msglock); 170 (void) setlocale(LC_MESSAGES, locale); 171 out = gettext(msg); 172 (void) setlocale(LC_MESSAGES, DEFAULT_LOCALE); 173 (void) mutex_unlock(&msglock); 174 return (out); 175 } 176 177 /* PRINTFLIKE3 */ 178 void 179 zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) 180 { 181 va_list alist; 182 char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ 183 char *bp; 184 int saved_errno = errno; 185 186 if (zlogp == NULL) 187 return; 188 if (zlogp == &logsys) 189 (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", 190 zone_name); 191 else 192 buf[0] = '\0'; 193 bp = &(buf[strlen(buf)]); 194 195 /* 196 * In theory, the locale pointer should be set to either "C" or a 197 * char array, so it should never be NULL 198 */ 199 assert(zlogp->locale != NULL); 200 /* Locale is per process, but we are multi-threaded... */ 201 fmt = localize_msg(zlogp->locale, fmt); 202 203 va_start(alist, fmt); 204 (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); 205 va_end(alist); 206 bp = &(buf[strlen(buf)]); 207 if (use_strerror) 208 (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", 209 strerror(saved_errno)); 210 if (zlogp == &logsys) { 211 (void) syslog(LOG_ERR, "%s", buf); 212 } else if (zlogp->logfile != NULL) { 213 (void) fprintf(zlogp->logfile, "%s\n", buf); 214 } else { 215 size_t buflen; 216 size_t copylen; 217 218 buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); 219 copylen = MIN(buflen, zlogp->loglen); 220 zlogp->log += copylen; 221 zlogp->loglen -= copylen; 222 } 223 } 224 225 static int 226 mkzonedir(zlog_t *zlogp) 227 { 228 struct stat st; 229 /* 230 * We must create and lock everyone but root out of ZONES_TMPDIR 231 * since anyone can open any UNIX domain socket, regardless of 232 * its file system permissions. Sigh... 233 */ 234 if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) { 235 zerror(zlogp, B_TRUE, "could not mkdir '%s'", ZONES_TMPDIR); 236 return (-1); 237 } 238 /* paranoia */ 239 if ((stat(ZONES_TMPDIR, &st) < 0) || ((st.st_mode & S_IFDIR) == 0)) { 240 zerror(zlogp, B_TRUE, "'%s' is not a directory", ZONES_TMPDIR); 241 return (-1); 242 } 243 (void) chmod(ZONES_TMPDIR, S_IRWXU); 244 return (0); 245 } 246 247 static zoneid_t 248 zone_ready(zlog_t *zlogp) 249 { 250 int err; 251 252 if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { 253 zerror(zlogp, B_FALSE, "unable to create snapshot: %s", 254 zonecfg_strerror(err)); 255 return (-1); 256 } 257 258 if (vplat_create(zlogp) != 0) { 259 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 260 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 261 zonecfg_strerror(err)); 262 return (-1); 263 } 264 if (vplat_bringup(zlogp) != 0) { 265 bringup_failure_recovery = B_TRUE; 266 (void) vplat_teardown(NULL); 267 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 268 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 269 zonecfg_strerror(err)); 270 return (-1); 271 } 272 273 return (0); 274 } 275 276 static int 277 init_template() 278 { 279 int fd; 280 int err = 0; 281 282 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 283 if (fd == -1) 284 return (-1); 285 286 /* 287 * For now, zoneadmd doesn't do anything with the contract. 288 * Deliver no events, don't inherit, and allow it to be orphaned. 289 */ 290 err |= ct_tmpl_set_critical(fd, 0); 291 err |= ct_tmpl_set_informative(fd, 0); 292 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 293 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 294 if (err || ct_tmpl_activate(fd)) { 295 (void) close(fd); 296 return (-1); 297 } 298 299 return (fd); 300 } 301 302 static int 303 mount_early_fs(zlog_t *zlogp, zoneid_t zoneid, const char *spec, 304 const char *dir, char *fstype) 305 { 306 pid_t child; 307 int child_status; 308 int tmpl_fd; 309 ctid_t ct; 310 311 if ((tmpl_fd = init_template()) == -1) { 312 zerror(zlogp, B_TRUE, "failed to create contract"); 313 return (-1); 314 } 315 316 if ((child = fork()) == -1) { 317 (void) ct_tmpl_clear(tmpl_fd); 318 (void) close(tmpl_fd); 319 zerror(zlogp, B_TRUE, "failed to fork"); 320 return (-1); 321 322 } else if (child == 0) { /* child */ 323 (void) ct_tmpl_clear(tmpl_fd); 324 /* 325 * Even though there are no procs running in the zone, we 326 * do this for paranoia's sake. 327 */ 328 (void) closefrom(0); 329 330 if (zone_enter(zoneid) == -1) { 331 _exit(errno); 332 } 333 if (mount(spec, dir, MS_DATA, fstype, NULL, 0, NULL, 0) != 0) 334 _exit(errno); 335 _exit(0); 336 } 337 338 /* parent */ 339 if (contract_latest(&ct) == -1) 340 ct = -1; 341 (void) ct_tmpl_clear(tmpl_fd); 342 (void) close(tmpl_fd); 343 if (waitpid(child, &child_status, 0) != child) { 344 /* unexpected: we must have been signalled */ 345 (void) contract_abandon_id(ct); 346 return (-1); 347 } 348 (void) contract_abandon_id(ct); 349 if (WEXITSTATUS(child_status) != 0) { 350 errno = WEXITSTATUS(child_status); 351 zerror(zlogp, B_TRUE, "mount of %s failed", dir); 352 return (-1); 353 } 354 355 return (0); 356 } 357 358 static int 359 zone_bootup(zlog_t *zlogp, const char *bootargs) 360 { 361 zoneid_t zoneid; 362 struct stat st; 363 char zroot[MAXPATHLEN], initpath[MAXPATHLEN]; 364 365 if (init_console_slave(zlogp) != 0) 366 return (-1); 367 reset_slave_terminal(zlogp); 368 369 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 370 zerror(zlogp, B_TRUE, "unable to get zoneid"); 371 return (-1); 372 } 373 374 if (mount_early_fs(zlogp, zoneid, "/proc", "/proc", "proc") != 0) 375 return (-1); 376 377 if (mount_early_fs(zlogp, zoneid, "ctfs", CTFS_ROOT, "ctfs") != 0) 378 return (-1); 379 380 if (mount_early_fs(zlogp, zoneid, "swap", "/etc/svc/volatile", 381 "tmpfs") != 0) 382 return (-1); 383 384 if (mount_early_fs(zlogp, zoneid, "mnttab", "/etc/mnttab", 385 "mntfs") != 0) 386 return (-1); 387 388 /* 389 * Try to anticipate possible problems: Make sure init is executable. 390 */ 391 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 392 zerror(zlogp, B_FALSE, "unable to determine zone root"); 393 return (-1); 394 } 395 (void) snprintf(initpath, sizeof (initpath), "%s%s", zroot, 396 PATH_TO_INIT); 397 398 if (stat(initpath, &st) == -1) { 399 zerror(zlogp, B_TRUE, "could not stat %s", initpath); 400 return (-1); 401 } 402 403 if ((st.st_mode & S_IXUSR) == 0) { 404 zerror(zlogp, B_FALSE, "%s is not executable", initpath); 405 return (-1); 406 } 407 408 if (zone_boot(zoneid, bootargs) == -1) { 409 zerror(zlogp, B_TRUE, "unable to boot zone"); 410 return (-1); 411 } 412 413 return (0); 414 } 415 416 static int 417 zone_halt(zlog_t *zlogp) 418 { 419 int err; 420 421 if (vplat_teardown(zlogp) != 0) { 422 if (!bringup_failure_recovery) 423 zerror(zlogp, B_FALSE, "unable to destroy zone"); 424 return (-1); 425 } 426 427 if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) 428 zerror(zlogp, B_FALSE, "destroying snapshot: %s", 429 zonecfg_strerror(err)); 430 431 return (0); 432 } 433 434 /* 435 * Generate AUE_zone_state for a command that boots a zone. 436 */ 437 static void 438 audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, 439 char *new_state) 440 { 441 adt_session_data_t *ah; 442 adt_event_data_t *event; 443 int pass_fail, fail_reason; 444 445 if (!adt_audit_enabled()) 446 return; 447 448 if (return_val == 0) { 449 pass_fail = ADT_SUCCESS; 450 fail_reason = ADT_SUCCESS; 451 } else { 452 pass_fail = ADT_FAILURE; 453 fail_reason = ADT_FAIL_VALUE_PROGRAM; 454 } 455 456 if (adt_start_session(&ah, NULL, 0)) { 457 zerror(zlogp, B_TRUE, gettext("audit failure.")); 458 return; 459 } 460 if (adt_set_from_ucred(ah, uc, ADT_NEW)) { 461 zerror(zlogp, B_TRUE, gettext("audit failure.")); 462 (void) adt_end_session(ah); 463 return; 464 } 465 466 event = adt_alloc_event(ah, ADT_zone_state); 467 if (event == NULL) { 468 zerror(zlogp, B_TRUE, gettext("audit failure.")); 469 (void) adt_end_session(ah); 470 return; 471 } 472 event->adt_zone_state.zonename = zone_name; 473 event->adt_zone_state.new_state = new_state; 474 475 if (adt_put_event(event, pass_fail, fail_reason)) 476 zerror(zlogp, B_TRUE, gettext("audit failure.")); 477 478 adt_free_event(event); 479 480 (void) adt_end_session(ah); 481 } 482 483 /* 484 * The main routine for the door server that deals with zone state transitions. 485 */ 486 /* ARGSUSED */ 487 static void 488 server(void *cookie, char *args, size_t alen, door_desc_t *dp, 489 uint_t n_desc) 490 { 491 ucred_t *uc = NULL; 492 const priv_set_t *eset; 493 494 zone_state_t zstate; 495 zone_cmd_t cmd; 496 zone_cmd_arg_t *zargp; 497 498 boolean_t kernelcall; 499 500 int rval = -1; 501 uint64_t uniqid; 502 zoneid_t zoneid = -1; 503 zlog_t zlog; 504 zlog_t *zlogp; 505 zone_cmd_rval_t *rvalp; 506 size_t rlen = getpagesize(); /* conservative */ 507 char *cmd_str = NULL; 508 509 /* LINTED E_BAD_PTR_CAST_ALIGN */ 510 zargp = (zone_cmd_arg_t *)args; 511 512 /* 513 * When we get the door unref message, we've fdetach'd the door, and 514 * it is time for us to shut down zoneadmd. 515 */ 516 if (zargp == DOOR_UNREF_DATA) { 517 /* 518 * See comment at end of main() for info on the last rites. 519 */ 520 exit(0); 521 } 522 523 if (zargp == NULL) { 524 (void) door_return(NULL, 0, 0, 0); 525 } 526 527 rvalp = alloca(rlen); 528 bzero(rvalp, rlen); 529 zlog.logfile = NULL; 530 zlog.buflen = zlog.loglen = rlen - sizeof (zone_cmd_rval_t) + 1; 531 zlog.buf = rvalp->errbuf; 532 zlog.log = zlog.buf; 533 /* defer initialization of zlog.locale until after credential check */ 534 zlogp = &zlog; 535 536 if (alen != sizeof (zone_cmd_arg_t)) { 537 /* 538 * This really shouldn't be happening. 539 */ 540 zerror(&logsys, B_FALSE, "invalid argument"); 541 goto out; 542 } 543 cmd = zargp->cmd; 544 545 if (door_ucred(&uc) != 0) { 546 zerror(&logsys, B_TRUE, "door_ucred"); 547 goto out; 548 } 549 eset = ucred_getprivset(uc, PRIV_EFFECTIVE); 550 if (ucred_getzoneid(uc) != GLOBAL_ZONEID || 551 (eset != NULL ? !priv_ismember(eset, PRIV_SYS_CONFIG) : 552 ucred_geteuid(uc) != 0)) { 553 zerror(&logsys, B_FALSE, "insufficient privileges"); 554 goto out; 555 } 556 557 kernelcall = ucred_getpid(uc) == 0; 558 559 /* 560 * This is safe because we only use a zlog_t throughout the 561 * duration of a door call; i.e., by the time the pointer 562 * might become invalid, the door call would be over. 563 */ 564 zlog.locale = kernelcall ? DEFAULT_LOCALE : zargp->locale; 565 566 (void) mutex_lock(&lock); 567 568 /* 569 * Once we start to really die off, we don't want more connections. 570 */ 571 if (in_death_throes) { 572 (void) mutex_unlock(&lock); 573 ucred_free(uc); 574 (void) door_return(NULL, 0, 0, 0); 575 thr_exit(NULL); 576 } 577 578 /* 579 * Check for validity of command. 580 */ 581 if (cmd != Z_READY && cmd != Z_BOOT && cmd != Z_REBOOT && 582 cmd != Z_HALT && cmd != Z_NOTE_UNINSTALLING) { 583 zerror(&logsys, B_FALSE, "invalid command"); 584 goto out; 585 } 586 587 if (kernelcall && (cmd != Z_HALT && cmd != Z_REBOOT)) { 588 /* 589 * Can't happen 590 */ 591 zerror(&logsys, B_FALSE, "received unexpected kernel upcall %d", 592 cmd); 593 goto out; 594 } 595 /* 596 * We ignore the possibility of someone calling zone_create(2) 597 * explicitly; all requests must come through zoneadmd. 598 */ 599 if (zone_get_state(zone_name, &zstate) != Z_OK) { 600 /* 601 * Something terribly wrong happened 602 */ 603 zerror(&logsys, B_FALSE, "unable to determine state of zone"); 604 goto out; 605 } 606 607 if (kernelcall) { 608 /* 609 * Kernel-initiated requests may lose their validity if the 610 * zone_t the kernel was referring to has gone away. 611 */ 612 if ((zoneid = getzoneidbyname(zone_name)) == -1 || 613 zone_getattr(zoneid, ZONE_ATTR_UNIQID, &uniqid, 614 sizeof (uniqid)) == -1 || uniqid != zargp->uniqid) { 615 /* 616 * We're not talking about the same zone. The request 617 * must have arrived too late. Return error. 618 */ 619 rval = -1; 620 goto out; 621 } 622 zlogp = &logsys; /* Log errors to syslog */ 623 } 624 625 switch (zstate) { 626 case ZONE_STATE_CONFIGURED: 627 case ZONE_STATE_INCOMPLETE: 628 /* 629 * Not our area of expertise; we just print a nice message 630 * and die off. 631 */ 632 switch (cmd) { 633 case Z_READY: 634 cmd_str = "ready"; 635 break; 636 case Z_BOOT: 637 cmd_str = "boot"; 638 break; 639 case Z_HALT: 640 cmd_str = "halt"; 641 break; 642 case Z_REBOOT: 643 cmd_str = "reboot"; 644 break; 645 } 646 assert(cmd_str != NULL); 647 zerror(zlogp, B_FALSE, 648 "%s operation is invalid for zones in state '%s'", 649 cmd_str, zone_state_str(zstate)); 650 break; 651 652 case ZONE_STATE_INSTALLED: 653 switch (cmd) { 654 case Z_READY: 655 rval = zone_ready(zlogp); 656 if (rval == 0) 657 eventstream_write(Z_EVT_ZONE_READIED); 658 break; 659 case Z_BOOT: 660 eventstream_write(Z_EVT_ZONE_BOOTING); 661 if ((rval = zone_ready(zlogp)) == 0) 662 rval = zone_bootup(zlogp, zargp->bootbuf); 663 audit_put_record(zlogp, uc, rval, "boot"); 664 if (rval != 0) { 665 bringup_failure_recovery = B_TRUE; 666 (void) zone_halt(zlogp); 667 } 668 break; 669 case Z_HALT: 670 if (kernelcall) /* Invalid; can't happen */ 671 abort(); 672 /* 673 * We could have two clients racing to halt this 674 * zone; the second client loses, but his request 675 * doesn't fail, since the zone is now in the desired 676 * state. 677 */ 678 zerror(zlogp, B_FALSE, "zone is already halted"); 679 rval = 0; 680 break; 681 case Z_REBOOT: 682 if (kernelcall) /* Invalid; can't happen */ 683 abort(); 684 zerror(zlogp, B_FALSE, "%s operation is invalid " 685 "for zones in state '%s'", "reboot", 686 zone_state_str(zstate)); 687 rval = -1; 688 break; 689 case Z_NOTE_UNINSTALLING: 690 if (kernelcall) /* Invalid; can't happen */ 691 abort(); 692 /* 693 * Tell the console to print out a message about this. 694 * Once it does, we will be in_death_throes. 695 */ 696 eventstream_write(Z_EVT_ZONE_UNINSTALLING); 697 break; 698 } 699 break; 700 701 case ZONE_STATE_READY: 702 switch (cmd) { 703 case Z_READY: 704 /* 705 * We could have two clients racing to ready this 706 * zone; the second client loses, but his request 707 * doesn't fail, since the zone is now in the desired 708 * state. 709 */ 710 zerror(zlogp, B_FALSE, "zone is already ready"); 711 rval = 0; 712 break; 713 case Z_BOOT: 714 eventstream_write(Z_EVT_ZONE_BOOTING); 715 rval = zone_bootup(zlogp, zargp->bootbuf); 716 audit_put_record(zlogp, uc, rval, "boot"); 717 if (rval != 0) { 718 bringup_failure_recovery = B_TRUE; 719 (void) zone_halt(zlogp); 720 } 721 break; 722 case Z_HALT: 723 if (kernelcall) /* Invalid; can't happen */ 724 abort(); 725 if ((rval = zone_halt(zlogp)) != 0) 726 break; 727 eventstream_write(Z_EVT_ZONE_HALTED); 728 break; 729 case Z_REBOOT: 730 if (kernelcall) /* Invalid; can't happen */ 731 abort(); 732 zerror(zlogp, B_FALSE, "%s operation is invalid " 733 "for zones in state '%s'", "reboot", 734 zone_state_str(zstate)); 735 rval = -1; 736 break; 737 case Z_NOTE_UNINSTALLING: 738 if (kernelcall) /* Invalid; can't happen */ 739 abort(); 740 zerror(zlogp, B_FALSE, "%s operation is " 741 "invalid for zones in state '%s'", 742 "note_uninstall", zone_state_str(zstate)); 743 rval = -1; 744 break; 745 } 746 break; 747 748 case ZONE_STATE_RUNNING: 749 case ZONE_STATE_SHUTTING_DOWN: 750 case ZONE_STATE_DOWN: 751 switch (cmd) { 752 case Z_READY: 753 if ((rval = zone_halt(zlogp)) != 0) 754 break; 755 if ((rval = zone_ready(zlogp)) == 0) 756 eventstream_write(Z_EVT_ZONE_READIED); 757 break; 758 case Z_BOOT: 759 /* 760 * We could have two clients racing to boot this 761 * zone; the second client loses, but his request 762 * doesn't fail, since the zone is now in the desired 763 * state. 764 */ 765 zerror(zlogp, B_FALSE, "zone is already booted"); 766 rval = 0; 767 break; 768 case Z_HALT: 769 if ((rval = zone_halt(zlogp)) != 0) 770 break; 771 eventstream_write(Z_EVT_ZONE_HALTED); 772 break; 773 case Z_REBOOT: 774 eventstream_write(Z_EVT_ZONE_REBOOTING); 775 if ((rval = zone_halt(zlogp)) != 0) 776 break; 777 if ((rval = zone_ready(zlogp)) == 0) { 778 rval = zone_bootup(zlogp, ""); 779 audit_put_record(zlogp, uc, rval, "reboot"); 780 if (rval != 0) 781 (void) zone_halt(zlogp); 782 } 783 break; 784 case Z_NOTE_UNINSTALLING: 785 zerror(zlogp, B_FALSE, "%s operation is " 786 "invalid for zones in state '%s'", 787 "note_uninstall", zone_state_str(zstate)); 788 rval = -1; 789 break; 790 } 791 break; 792 default: 793 abort(); 794 } 795 796 /* 797 * Because the state of the zone may have changed, we make sure 798 * to wake the console poller, which is in charge of initiating 799 * the shutdown procedure as necessary. 800 */ 801 eventstream_write(Z_EVT_NULL); 802 803 out: 804 (void) mutex_unlock(&lock); 805 if (kernelcall) { 806 rvalp = NULL; 807 rlen = 0; 808 } else { 809 rvalp->rval = rval; 810 } 811 if (uc != NULL) 812 ucred_free(uc); 813 (void) door_return((char *)rvalp, rlen, NULL, 0); 814 thr_exit(NULL); 815 } 816 817 static int 818 setup_door(zlog_t *zlogp) 819 { 820 if ((zone_door = door_create(server, NULL, 821 DOOR_UNREF | DOOR_REFUSE_DESC | DOOR_NO_CANCEL)) < 0) { 822 zerror(zlogp, B_TRUE, "%s failed", "door_create"); 823 return (-1); 824 } 825 (void) fdetach(zone_door_path); 826 827 if (fattach(zone_door, zone_door_path) != 0) { 828 zerror(zlogp, B_TRUE, "fattach to %s failed", zone_door_path); 829 (void) door_revoke(zone_door); 830 (void) fdetach(zone_door_path); 831 zone_door = -1; 832 return (-1); 833 } 834 return (0); 835 } 836 837 /* 838 * zoneadm(1m) will start zoneadmd if it thinks it isn't running; this 839 * is where zoneadmd itself will check to see that another instance of 840 * zoneadmd isn't already controlling this zone. 841 * 842 * The idea here is that we want to open the path to which we will 843 * attach our door, lock it, and then make sure that no-one has beat us 844 * to fattach(3c)ing onto it. 845 * 846 * fattach(3c) is really a mount, so there are actually two possible 847 * vnodes we could be dealing with. Our strategy is as follows: 848 * 849 * - If the file we opened is a regular file (common case): 850 * There is no fattach(3c)ed door, so we have a chance of becoming 851 * the managing zoneadmd. We attempt to lock the file: if it is 852 * already locked, that means someone else raced us here, so we 853 * lose and give up. zoneadm(1m) will try to contact the zoneadmd 854 * that beat us to it. 855 * 856 * - If the file we opened is a namefs file: 857 * This means there is already an established door fattach(3c)'ed 858 * to the rendezvous path. We've lost the race, so we give up. 859 * Note that in this case we also try to grab the file lock, and 860 * will succeed in acquiring it since the vnode locked by the 861 * "winning" zoneadmd was a regular one, and the one we locked was 862 * the fattach(3c)'ed door node. At any rate, no harm is done, and 863 * we just return to zoneadm(1m) which knows to retry. 864 */ 865 static int 866 make_daemon_exclusive(zlog_t *zlogp) 867 { 868 int doorfd = -1; 869 int err, ret = -1; 870 struct stat st; 871 struct flock flock; 872 zone_state_t zstate; 873 874 top: 875 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 876 zerror(zlogp, B_FALSE, "failed to get zone state: %s\n", 877 zonecfg_strerror(err)); 878 goto out; 879 } 880 if ((doorfd = open(zone_door_path, O_CREAT|O_RDWR, 881 S_IREAD|S_IWRITE)) < 0) { 882 zerror(zlogp, B_TRUE, "failed to open %s", zone_door_path); 883 goto out; 884 } 885 if (fstat(doorfd, &st) < 0) { 886 zerror(zlogp, B_TRUE, "failed to stat %s", zone_door_path); 887 goto out; 888 } 889 /* 890 * Lock the file to synchronize with other zoneadmd 891 */ 892 flock.l_type = F_WRLCK; 893 flock.l_whence = SEEK_SET; 894 flock.l_start = (off_t)0; 895 flock.l_len = (off_t)0; 896 if (fcntl(doorfd, F_SETLK, &flock) < 0) { 897 /* 898 * Someone else raced us here and grabbed the lock file 899 * first. A warning here is inappropriate since nothing 900 * went wrong. 901 */ 902 goto out; 903 } 904 905 if (strcmp(st.st_fstype, "namefs") == 0) { 906 struct door_info info; 907 908 /* 909 * There is already something fattach()'ed to this file. 910 * Lets see what the door is up to. 911 */ 912 if (door_info(doorfd, &info) == 0 && info.di_target != -1) { 913 /* 914 * Another zoneadmd process seems to be in 915 * control of the situation and we don't need to 916 * be here. A warning here is inappropriate 917 * since nothing went wrong. 918 * 919 * If the door has been revoked, the zoneadmd 920 * process currently managing the zone is going 921 * away. We'll return control to zoneadm(1m) 922 * which will try again (by which time zoneadmd 923 * will hopefully have exited). 924 */ 925 goto out; 926 } 927 928 /* 929 * If we got this far, there's a fattach(3c)'ed door 930 * that belongs to a process that has exited, which can 931 * happen if the previous zoneadmd died unexpectedly. 932 * 933 * Let user know that something is amiss, but that we can 934 * recover; if the zone is in the installed state, then don't 935 * message, since having a running zoneadmd isn't really 936 * expected/needed. We want to keep occurences of this message 937 * limited to times when zoneadmd is picking back up from a 938 * zoneadmd that died while the zone was in some non-trivial 939 * state. 940 */ 941 if (zstate > ZONE_STATE_INSTALLED) { 942 zerror(zlogp, B_FALSE, 943 "zone '%s': WARNING: zone is in state '%s', but " 944 "zoneadmd does not appear to be available; " 945 "restarted zoneadmd to recover.", 946 zone_name, zone_state_str(zstate)); 947 } 948 949 (void) fdetach(zone_door_path); 950 (void) close(doorfd); 951 goto top; 952 } 953 ret = 0; 954 out: 955 (void) close(doorfd); 956 return (ret); 957 } 958 959 int 960 main(int argc, char *argv[]) 961 { 962 int opt; 963 zoneid_t zid; 964 priv_set_t *privset; 965 zone_state_t zstate; 966 char parents_locale[MAXPATHLEN]; 967 int err; 968 969 pid_t pid; 970 sigset_t blockset; 971 sigset_t block_cld; 972 973 struct { 974 sema_t sem; 975 int status; 976 zlog_t log; 977 } *shstate; 978 size_t shstatelen = getpagesize(); 979 980 zlog_t errlog; 981 zlog_t *zlogp; 982 983 progname = get_execbasename(argv[0]); 984 985 /* 986 * Make sure stderr is unbuffered 987 */ 988 (void) setbuffer(stderr, NULL, 0); 989 990 /* 991 * Get out of the way of mounted filesystems, since we will daemonize 992 * soon. 993 */ 994 (void) chdir("/"); 995 996 /* 997 * Use the default system umask per PSARC 1998/110 rather than 998 * anything that may have been set by the caller. 999 */ 1000 (void) umask(CMASK); 1001 1002 /* 1003 * Initially we want to use our parent's locale. 1004 */ 1005 (void) setlocale(LC_ALL, ""); 1006 (void) textdomain(TEXT_DOMAIN); 1007 (void) strlcpy(parents_locale, setlocale(LC_MESSAGES, NULL), 1008 sizeof (parents_locale)); 1009 1010 /* 1011 * This zlog_t is used for writing to stderr 1012 */ 1013 errlog.logfile = stderr; 1014 errlog.buflen = errlog.loglen = 0; 1015 errlog.buf = errlog.log = NULL; 1016 errlog.locale = parents_locale; 1017 1018 /* 1019 * We start off writing to stderr until we're ready to daemonize. 1020 */ 1021 zlogp = &errlog; 1022 1023 /* 1024 * Process options. 1025 */ 1026 while ((opt = getopt(argc, argv, "z:")) != EOF) { 1027 switch (opt) { 1028 case 'z': 1029 zone_name = optarg; 1030 break; 1031 default: 1032 usage(); 1033 } 1034 } 1035 1036 if (zone_name == NULL) 1037 usage(); 1038 1039 /* 1040 * Because usage() prints directly to stderr, it has gettext() 1041 * wrapping, which depends on the locale. But since zerror() calls 1042 * localize() which tweaks the locale, it is not safe to call zerror() 1043 * until after the last call to usage(). Fortunately, the last call 1044 * to usage() is just above and the first call to zerror() is just 1045 * below. Don't mess this up. 1046 */ 1047 if (strcmp(zone_name, GLOBAL_ZONENAME) == 0) { 1048 zerror(zlogp, B_FALSE, "cannot manage the %s zone", 1049 GLOBAL_ZONENAME); 1050 return (1); 1051 } 1052 1053 if (zone_get_id(zone_name, &zid) != 0) { 1054 zerror(zlogp, B_FALSE, "could not manage %s: %s\n", zone_name, 1055 zonecfg_strerror(Z_NO_ZONE)); 1056 return (1); 1057 } 1058 1059 if ((err = zone_get_state(zone_name, &zstate)) != Z_OK) { 1060 zerror(zlogp, B_FALSE, "failed to get zone state: %s\n", 1061 zonecfg_strerror(err)); 1062 return (1); 1063 } 1064 if (zstate < ZONE_STATE_INSTALLED) { 1065 zerror(zlogp, B_FALSE, 1066 "cannot manage a zone which is in state '%s'", 1067 zone_state_str(zstate)); 1068 return (1); 1069 } 1070 1071 /* 1072 * Check that we have all privileges. It would be nice to pare 1073 * this down, but this is at least a first cut. 1074 */ 1075 if ((privset = priv_allocset()) == NULL) { 1076 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 1077 return (1); 1078 } 1079 1080 if (getppriv(PRIV_EFFECTIVE, privset) != 0) { 1081 zerror(zlogp, B_TRUE, "%s failed", "getppriv"); 1082 priv_freeset(privset); 1083 return (1); 1084 } 1085 1086 if (priv_isfullset(privset) == B_FALSE) { 1087 zerror(zlogp, B_FALSE, "You lack sufficient privilege to " 1088 "run this command (all privs required)\n"); 1089 priv_freeset(privset); 1090 return (1); 1091 } 1092 priv_freeset(privset); 1093 1094 if (mkzonedir(zlogp) != 0) 1095 return (1); 1096 1097 /* 1098 * Pre-fork: setup shared state 1099 */ 1100 if ((shstate = (void *)mmap(NULL, shstatelen, 1101 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, (off_t)0)) == 1102 MAP_FAILED) { 1103 zerror(zlogp, B_TRUE, "%s failed", "mmap"); 1104 return (1); 1105 } 1106 if (sema_init(&shstate->sem, 0, USYNC_PROCESS, NULL) != 0) { 1107 zerror(zlogp, B_TRUE, "%s failed", "sema_init()"); 1108 (void) munmap((char *)shstate, shstatelen); 1109 return (1); 1110 } 1111 shstate->log.logfile = NULL; 1112 shstate->log.buflen = shstatelen - sizeof (*shstate); 1113 shstate->log.loglen = shstate->log.buflen; 1114 shstate->log.buf = (char *)shstate + sizeof (*shstate); 1115 shstate->log.log = shstate->log.buf; 1116 shstate->log.locale = parents_locale; 1117 shstate->status = -1; 1118 1119 /* 1120 * We need a SIGCHLD handler so the sema_wait() below will wake 1121 * up if the child dies without doing a sema_post(). 1122 */ 1123 (void) sigset(SIGCHLD, sigchld); 1124 /* 1125 * We must mask SIGCHLD until after we've coped with the fork 1126 * sufficiently to deal with it; otherwise we can race and 1127 * receive the signal before pid has been initialized 1128 * (yes, this really happens). 1129 */ 1130 (void) sigemptyset(&block_cld); 1131 (void) sigaddset(&block_cld, SIGCHLD); 1132 (void) sigprocmask(SIG_BLOCK, &block_cld, NULL); 1133 1134 /* 1135 * Do not let another thread localize a message while we are forking. 1136 */ 1137 (void) mutex_lock(&msglock); 1138 pid = fork(); 1139 (void) mutex_unlock(&msglock); 1140 if (pid == -1) { 1141 zerror(zlogp, B_TRUE, "could not fork"); 1142 return (1); 1143 1144 } else if (pid > 0) { /* parent */ 1145 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1146 /* 1147 * This marks a window of vulnerability in which we receive 1148 * the SIGCLD before falling into sema_wait (normally we would 1149 * get woken up from sema_wait with EINTR upon receipt of 1150 * SIGCLD). So we may need to use some other scheme like 1151 * sema_posting in the sigcld handler. 1152 * blech 1153 */ 1154 (void) sema_wait(&shstate->sem); 1155 (void) sema_destroy(&shstate->sem); 1156 if (shstate->status != 0) 1157 (void) waitpid(pid, NULL, WNOHANG); 1158 /* 1159 * It's ok if we die with SIGPIPE. It's not like we could have 1160 * done anything about it. 1161 */ 1162 (void) fprintf(stderr, "%s", shstate->log.buf); 1163 _exit(shstate->status == 0 ? 0 : 1); 1164 } 1165 1166 /* 1167 * The child charges on. 1168 */ 1169 (void) sigset(SIGCHLD, SIG_DFL); 1170 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL); 1171 1172 /* 1173 * SIGPIPE can be delivered if we write to a socket for which the 1174 * peer endpoint is gone. That can lead to too-early termination 1175 * of zoneadmd, and that's not good eats. 1176 */ 1177 (void) sigset(SIGPIPE, SIG_IGN); 1178 /* 1179 * Stop using stderr 1180 */ 1181 zlogp = &shstate->log; 1182 1183 /* 1184 * We don't need stdout/stderr from now on. 1185 */ 1186 closefrom(0); 1187 1188 /* 1189 * Initialize the syslog zlog_t. This needs to be done after 1190 * the call to closefrom(). 1191 */ 1192 logsys.buf = logsys.log = NULL; 1193 logsys.buflen = logsys.loglen = 0; 1194 logsys.logfile = NULL; 1195 logsys.locale = DEFAULT_LOCALE; 1196 1197 openlog("zoneadmd", LOG_PID, LOG_DAEMON); 1198 1199 /* 1200 * The eventstream is used to publish state changes in the zone 1201 * from the door threads to the console I/O poller. 1202 */ 1203 if (eventstream_init() == -1) { 1204 zerror(zlogp, B_TRUE, "unable to create eventstream"); 1205 goto child_out; 1206 } 1207 1208 (void) snprintf(zone_door_path, sizeof (zone_door_path), 1209 ZONE_DOOR_PATH, zone_name); 1210 1211 /* 1212 * See if another zoneadmd is running for this zone. If not, then we 1213 * can now modify system state. 1214 */ 1215 if (make_daemon_exclusive(zlogp) == -1) 1216 goto child_out; 1217 1218 1219 /* 1220 * Create/join a new session; we need to be careful of what we do with 1221 * the console from now on so we don't end up being the session leader 1222 * for the terminal we're going to be handing out. 1223 */ 1224 (void) setsid(); 1225 1226 /* 1227 * This thread shouldn't be receiving any signals; in particular, 1228 * SIGCHLD should be received by the thread doing the fork(). 1229 */ 1230 (void) sigfillset(&blockset); 1231 (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); 1232 1233 /* 1234 * Setup the console device and get ready to serve the console; 1235 * once this has completed, we're ready to let console clients 1236 * make an attempt to connect (they will block until 1237 * serve_console_sock() below gets called, and any pending 1238 * connection is accept()ed). 1239 */ 1240 if (init_console(zlogp) == -1) 1241 goto child_out; 1242 1243 /* 1244 * Take the lock now, so that when the door server gets going, we 1245 * are guaranteed that it won't take a request until we are sure 1246 * that everything is completely set up. See the child_out: label 1247 * below to see why this matters. 1248 */ 1249 (void) mutex_lock(&lock); 1250 1251 /* 1252 * Note: door setup must occur *after* the console is setup. 1253 * This is so that as zlogin tests the door to see if zoneadmd 1254 * is ready yet, we know that the console will get serviced 1255 * once door_info() indicates that the door is "up". 1256 */ 1257 if (setup_door(zlogp) == -1) 1258 goto child_out; 1259 1260 /* 1261 * Things seem OK so far; tell the parent process that we're done 1262 * with setup tasks. This will cause the parent to exit, signalling 1263 * to zoneadm, zlogin, or whatever forked it that we are ready to 1264 * service requests. 1265 */ 1266 shstate->status = 0; 1267 (void) sema_post(&shstate->sem); 1268 (void) munmap((char *)shstate, shstatelen); 1269 shstate = NULL; 1270 1271 (void) mutex_unlock(&lock); 1272 1273 /* 1274 * zlogp is now invalid, so reset it to the syslog logger. 1275 */ 1276 zlogp = &logsys; 1277 1278 /* 1279 * Now that we are free of any parents, switch to the default locale. 1280 */ 1281 (void) setlocale(LC_ALL, DEFAULT_LOCALE); 1282 1283 /* 1284 * At this point the setup portion of main() is basically done, so 1285 * we reuse this thread to manage the zone console. When 1286 * serve_console() has returned, we are past the point of no return 1287 * in the life of this zoneadmd. 1288 */ 1289 serve_console(zlogp); 1290 assert(in_death_throes); 1291 1292 /* 1293 * This is the next-to-last part of the exit interlock. Upon calling 1294 * fdetach(), the door will go unreferenced; once any 1295 * outstanding requests (like the door thread doing Z_HALT) are 1296 * done, the door will get an UNREF notification; when it handles 1297 * the UNREF, the door server will cause the exit. 1298 */ 1299 assert(!MUTEX_HELD(&lock)); 1300 (void) fdetach(zone_door_path); 1301 for (;;) 1302 (void) pause(); 1303 1304 child_out: 1305 assert(pid == 0); 1306 if (shstate != NULL) { 1307 shstate->status = -1; 1308 (void) sema_post(&shstate->sem); 1309 (void) munmap((char *)shstate, shstatelen); 1310 } 1311 1312 /* 1313 * This might trigger an unref notification, but if so, 1314 * we are still holding the lock, so our call to exit will 1315 * ultimately win the race and will publish the right exit 1316 * code. 1317 */ 1318 if (zone_door != -1) { 1319 assert(MUTEX_HELD(&lock)); 1320 (void) door_revoke(zone_door); 1321 (void) fdetach(zone_door_path); 1322 } 1323 return (1); /* return from main() forcibly exits an MT process */ 1324 } 1325