1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This module contains functions used to bring up and tear down the 31 * Virtual Platform: [un]mounting file-systems, [un]plumbing network 32 * interfaces, [un]configuring devices, establishing resource controls, 33 * and creating/destroying the zone in the kernel. These actions, on 34 * the way up, ready the zone; on the way down, they halt the zone. 35 * See the much longer block comment at the beginning of zoneadmd.c 36 * for a bigger picture of how the whole program functions. 37 * 38 * This module also has primary responsibility for the layout of "scratch 39 * zones." These are mounted, but inactive, zones that are used during 40 * operating system upgrade and potentially other administrative action. The 41 * scratch zone environment is similar to the miniroot environment. The zone's 42 * actual root is mounted read-write on /a, and the standard paths (/usr, 43 * /sbin, /lib) all lead to read-only copies of the running system's binaries. 44 * This allows the administrative tools to manipulate the zone using "-R /a" 45 * without relying on any binaries in the zone itself. 46 * 47 * If the scratch zone is on an alternate root (Live Upgrade [LU] boot 48 * environment), then we must resolve the lofs mounts used there to uncover 49 * writable (unshared) resources. Shared resources, though, are always 50 * read-only. In addition, if the "same" zone with a different root path is 51 * currently running, then "/b" inside the zone points to the running zone's 52 * root. This allows LU to synchronize configuration files during the upgrade 53 * process. 54 * 55 * To construct this environment, this module creates a tmpfs mount on 56 * $ZONEPATH/lu. Inside this scratch area, the miniroot-like environment as 57 * described above is constructed on the fly. The zone is then created using 58 * $ZONEPATH/lu as the root. 59 * 60 * Note that scratch zones are inactive. The zone's bits are not running and 61 * likely cannot be run correctly until upgrade is done. Init is not running 62 * there, nor is SMF. Because of this, the "mounted" state of a scratch zone 63 * is not a part of the usual halt/ready/boot state machine. 64 */ 65 66 #include <sys/param.h> 67 #include <sys/mount.h> 68 #include <sys/mntent.h> 69 #include <sys/socket.h> 70 #include <sys/utsname.h> 71 #include <sys/types.h> 72 #include <sys/stat.h> 73 #include <sys/sockio.h> 74 #include <sys/stropts.h> 75 #include <sys/conf.h> 76 77 #include <inet/tcp.h> 78 #include <arpa/inet.h> 79 #include <netinet/in.h> 80 #include <net/route.h> 81 82 #include <stdio.h> 83 #include <errno.h> 84 #include <fcntl.h> 85 #include <unistd.h> 86 #include <rctl.h> 87 #include <stdlib.h> 88 #include <string.h> 89 #include <strings.h> 90 #include <wait.h> 91 #include <limits.h> 92 #include <libgen.h> 93 #include <libzfs.h> 94 #include <libdevinfo.h> 95 #include <zone.h> 96 #include <assert.h> 97 #include <libcontract.h> 98 #include <libcontract_priv.h> 99 #include <uuid/uuid.h> 100 101 #include <sys/mntio.h> 102 #include <sys/mnttab.h> 103 #include <sys/fs/autofs.h> /* for _autofssys() */ 104 #include <sys/fs/lofs_info.h> 105 #include <sys/fs/zfs.h> 106 107 #include <pool.h> 108 #include <sys/pool.h> 109 110 #include <libzonecfg.h> 111 #include <synch.h> 112 113 #include "zoneadmd.h" 114 #include <tsol/label.h> 115 #include <libtsnet.h> 116 #include <sys/priv.h> 117 118 #define V4_ADDR_LEN 32 119 #define V6_ADDR_LEN 128 120 121 /* 0755 is the default directory mode. */ 122 #define DEFAULT_DIR_MODE \ 123 (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) 124 125 #define IPD_DEFAULT_OPTS \ 126 MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES 127 128 #define DFSTYPES "/etc/dfs/fstypes" 129 #define MAXTNZLEN 2048 130 131 /* 132 * This is the set of directories and devices (relative to <zone_root>/dev) 133 * which must be present in every zone. Users can augment this list with 134 * additional device rules in their zone configuration, but at present cannot 135 * remove any of the this set of standard devices. 136 */ 137 static const char *standard_devs[] = { 138 "arp", 139 "conslog", 140 "cpu/self/cpuid", 141 "crypto", 142 "cryptoadm", 143 "dsk", 144 "dtrace/helper", 145 "fd", 146 "kstat", 147 "lo0", 148 "lo1", 149 "lo2", 150 "lo3", 151 "log", 152 "logindmux", 153 "null", 154 #ifdef __sparc 155 "openprom", 156 #endif 157 "poll", 158 "pool", 159 "ptmx", 160 "pts/*", 161 "random", 162 "rdsk", 163 "rmt", 164 "sad/user", 165 "swap", 166 "sysevent", 167 "tcp", 168 "tcp6", 169 "term", 170 "ticlts", 171 "ticots", 172 "ticotsord", 173 "tty", 174 "udp", 175 "udp6", 176 "urandom", 177 "zero", 178 "zfs", 179 NULL 180 }; 181 182 struct source_target { 183 const char *source; 184 const char *target; 185 }; 186 187 /* 188 * Set of symlinks (relative to <zone_root>/dev) which must be present in 189 * every zone. 190 */ 191 static struct source_target standard_devlinks[] = { 192 { "stderr", "./fd/2" }, 193 { "stdin", "./fd/0" }, 194 { "stdout", "./fd/1" }, 195 { "dtremote", "/dev/null" }, 196 { "console", "zconsole" }, 197 { "syscon", "zconsole" }, 198 { "sysmsg", "zconsole" }, 199 { "systty", "zconsole" }, 200 { "msglog", "zconsole" }, 201 { NULL, NULL } 202 }; 203 204 static int vplat_mount_dev(zlog_t *); 205 206 /* for routing socket */ 207 static int rts_seqno = 0; 208 209 /* mangled zone name when mounting in an alternate root environment */ 210 static char kernzone[ZONENAME_MAX]; 211 212 /* array of cached mount entries for resolve_lofs */ 213 static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max; 214 215 /* for Trusted Extensions */ 216 static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *); 217 static int tsol_mounts(zlog_t *, char *, char *); 218 static void tsol_unmounts(zlog_t *, char *); 219 static m_label_t *zlabel = NULL; 220 static m_label_t *zid_label = NULL; 221 static priv_set_t *zprivs = NULL; 222 223 /* from libsocket, not in any header file */ 224 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *); 225 226 /* 227 * An optimization for build_mnttable: reallocate (and potentially copy the 228 * data) only once every N times through the loop. 229 */ 230 #define MNTTAB_HUNK 32 231 232 /* 233 * Private autofs system call 234 */ 235 extern int _autofssys(int, void *); 236 237 static int 238 autofs_cleanup(zoneid_t zoneid) 239 { 240 /* 241 * Ask autofs to unmount all trigger nodes in the given zone. 242 */ 243 return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid)); 244 } 245 246 static void 247 free_mnttable(struct mnttab *mnt_array, uint_t nelem) 248 { 249 uint_t i; 250 251 if (mnt_array == NULL) 252 return; 253 for (i = 0; i < nelem; i++) { 254 free(mnt_array[i].mnt_mountp); 255 free(mnt_array[i].mnt_fstype); 256 free(mnt_array[i].mnt_special); 257 free(mnt_array[i].mnt_mntopts); 258 assert(mnt_array[i].mnt_time == NULL); 259 } 260 free(mnt_array); 261 } 262 263 /* 264 * Build the mount table for the zone rooted at "zroot", storing the resulting 265 * array of struct mnttabs in "mnt_arrayp" and the number of elements in the 266 * array in "nelemp". 267 */ 268 static int 269 build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab, 270 struct mnttab **mnt_arrayp, uint_t *nelemp) 271 { 272 struct mnttab mnt; 273 struct mnttab *mnts; 274 struct mnttab *mnp; 275 uint_t nmnt; 276 277 rewind(mnttab); 278 resetmnttab(mnttab); 279 nmnt = 0; 280 mnts = NULL; 281 while (getmntent(mnttab, &mnt) == 0) { 282 struct mnttab *tmp_array; 283 284 if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0) 285 continue; 286 if (nmnt % MNTTAB_HUNK == 0) { 287 tmp_array = realloc(mnts, 288 (nmnt + MNTTAB_HUNK) * sizeof (*mnts)); 289 if (tmp_array == NULL) { 290 free_mnttable(mnts, nmnt); 291 return (-1); 292 } 293 mnts = tmp_array; 294 } 295 mnp = &mnts[nmnt++]; 296 297 /* 298 * Zero out any fields we're not using. 299 */ 300 (void) memset(mnp, 0, sizeof (*mnp)); 301 302 if (mnt.mnt_special != NULL) 303 mnp->mnt_special = strdup(mnt.mnt_special); 304 if (mnt.mnt_mntopts != NULL) 305 mnp->mnt_mntopts = strdup(mnt.mnt_mntopts); 306 mnp->mnt_mountp = strdup(mnt.mnt_mountp); 307 mnp->mnt_fstype = strdup(mnt.mnt_fstype); 308 if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) || 309 (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) || 310 mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) { 311 zerror(zlogp, B_TRUE, "memory allocation failed"); 312 free_mnttable(mnts, nmnt); 313 return (-1); 314 } 315 } 316 *mnt_arrayp = mnts; 317 *nelemp = nmnt; 318 return (0); 319 } 320 321 /* 322 * This is an optimization. The resolve_lofs function is used quite frequently 323 * to manipulate file paths, and on a machine with a large number of zones, 324 * there will be a huge number of mounted file systems. Thus, we trigger a 325 * reread of the list of mount points 326 */ 327 static void 328 lofs_discard_mnttab(void) 329 { 330 free_mnttable(resolve_lofs_mnts, 331 resolve_lofs_mnt_max - resolve_lofs_mnts); 332 resolve_lofs_mnts = resolve_lofs_mnt_max = NULL; 333 } 334 335 static int 336 lofs_read_mnttab(zlog_t *zlogp) 337 { 338 FILE *mnttab; 339 uint_t nmnts; 340 341 if ((mnttab = fopen(MNTTAB, "r")) == NULL) 342 return (-1); 343 if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts, 344 &nmnts) == -1) { 345 (void) fclose(mnttab); 346 return (-1); 347 } 348 (void) fclose(mnttab); 349 resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts; 350 return (0); 351 } 352 353 /* 354 * This function loops over potential loopback mounts and symlinks in a given 355 * path and resolves them all down to an absolute path. 356 */ 357 static void 358 resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen) 359 { 360 int len, arlen; 361 const char *altroot; 362 char tmppath[MAXPATHLEN]; 363 boolean_t outside_altroot; 364 365 if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1) 366 return; 367 tmppath[len] = '\0'; 368 (void) strlcpy(path, tmppath, sizeof (tmppath)); 369 370 /* This happens once per zoneadmd operation. */ 371 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 372 return; 373 374 altroot = zonecfg_get_root(); 375 arlen = strlen(altroot); 376 outside_altroot = B_FALSE; 377 for (;;) { 378 struct mnttab *mnp; 379 380 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; 381 mnp++) { 382 if (mnp->mnt_fstype == NULL || 383 mnp->mnt_mountp == NULL || 384 mnp->mnt_special == NULL || 385 strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0) 386 continue; 387 len = strlen(mnp->mnt_mountp); 388 if (strncmp(mnp->mnt_mountp, path, len) == 0 && 389 (path[len] == '/' || path[len] == '\0')) 390 break; 391 } 392 if (mnp >= resolve_lofs_mnt_max) 393 break; 394 if (outside_altroot) { 395 char *cp; 396 int olen = sizeof (MNTOPT_RO) - 1; 397 398 /* 399 * If we run into a read-only mount outside of the 400 * alternate root environment, then the user doesn't 401 * want this path to be made read-write. 402 */ 403 if (mnp->mnt_mntopts != NULL && 404 (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) != 405 NULL && 406 (cp == mnp->mnt_mntopts || cp[-1] == ',') && 407 (cp[olen] == '\0' || cp[olen] == ',')) { 408 break; 409 } 410 } else if (arlen > 0 && 411 (strncmp(mnp->mnt_special, altroot, arlen) != 0 || 412 (mnp->mnt_special[arlen] != '\0' && 413 mnp->mnt_special[arlen] != '/'))) { 414 outside_altroot = B_TRUE; 415 } 416 /* use temporary buffer because new path might be longer */ 417 (void) snprintf(tmppath, sizeof (tmppath), "%s%s", 418 mnp->mnt_special, path + len); 419 if ((len = resolvepath(tmppath, path, pathlen)) == -1) 420 break; 421 path[len] = '\0'; 422 } 423 } 424 425 /* 426 * For a regular mount, check if a replacement lofs mount is needed because the 427 * referenced device is already mounted somewhere. 428 */ 429 static int 430 check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr) 431 { 432 struct mnttab *mnp; 433 zone_fsopt_t *optptr, *onext; 434 435 /* This happens once per zoneadmd operation. */ 436 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 437 return (-1); 438 439 /* 440 * If this special node isn't already in use, then it's ours alone; 441 * no need to worry about conflicting mounts. 442 */ 443 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; 444 mnp++) { 445 if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0) 446 break; 447 } 448 if (mnp >= resolve_lofs_mnt_max) 449 return (0); 450 451 /* 452 * Convert this duplicate mount into a lofs mount. 453 */ 454 (void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp, 455 sizeof (fsptr->zone_fs_special)); 456 (void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS, 457 sizeof (fsptr->zone_fs_type)); 458 fsptr->zone_fs_raw[0] = '\0'; 459 460 /* 461 * Discard all but one of the original options and set that to be the 462 * same set of options used for inherit package directory resources. 463 */ 464 optptr = fsptr->zone_fs_options; 465 if (optptr == NULL) { 466 optptr = malloc(sizeof (*optptr)); 467 if (optptr == NULL) { 468 zerror(zlogp, B_TRUE, "cannot mount %s", 469 fsptr->zone_fs_dir); 470 return (-1); 471 } 472 } else { 473 while ((onext = optptr->zone_fsopt_next) != NULL) { 474 optptr->zone_fsopt_next = onext->zone_fsopt_next; 475 free(onext); 476 } 477 } 478 (void) strcpy(optptr->zone_fsopt_opt, IPD_DEFAULT_OPTS); 479 optptr->zone_fsopt_next = NULL; 480 fsptr->zone_fs_options = optptr; 481 return (0); 482 } 483 484 static int 485 make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode) 486 { 487 char path[MAXPATHLEN]; 488 struct stat st; 489 490 if (snprintf(path, sizeof (path), "%s%s", prefix, subdir) > 491 sizeof (path)) { 492 zerror(zlogp, B_FALSE, "pathname %s%s is too long", prefix, 493 subdir); 494 return (-1); 495 } 496 497 if (lstat(path, &st) == 0) { 498 /* 499 * We don't check the file mode since presumably the zone 500 * administrator may have had good reason to change the mode, 501 * and we don't need to second guess him. 502 */ 503 if (!S_ISDIR(st.st_mode)) { 504 if (is_system_labeled() && 505 S_ISREG(st.st_mode)) { 506 /* 507 * The need to mount readonly copies of 508 * global zone /etc/ files is unique to 509 * Trusted Extensions. 510 */ 511 if (strncmp(subdir, "/etc/", 512 strlen("/etc/")) != 0) { 513 zerror(zlogp, B_FALSE, 514 "%s is not in /etc", path); 515 return (-1); 516 } 517 } else { 518 zerror(zlogp, B_FALSE, 519 "%s is not a directory", path); 520 return (-1); 521 } 522 } 523 } else if (mkdirp(path, mode) != 0) { 524 if (errno == EROFS) 525 zerror(zlogp, B_FALSE, "Could not mkdir %s.\nIt is on " 526 "a read-only file system in this local zone.\nMake " 527 "sure %s exists in the global zone.", path, subdir); 528 else 529 zerror(zlogp, B_TRUE, "mkdirp of %s failed", path); 530 return (-1); 531 } 532 return (0); 533 } 534 535 static void 536 free_remote_fstypes(char **types) 537 { 538 uint_t i; 539 540 if (types == NULL) 541 return; 542 for (i = 0; types[i] != NULL; i++) 543 free(types[i]); 544 free(types); 545 } 546 547 static char ** 548 get_remote_fstypes(zlog_t *zlogp) 549 { 550 char **types = NULL; 551 FILE *fp; 552 char buf[MAXPATHLEN]; 553 char fstype[MAXPATHLEN]; 554 uint_t lines = 0; 555 uint_t i; 556 557 if ((fp = fopen(DFSTYPES, "r")) == NULL) { 558 zerror(zlogp, B_TRUE, "failed to open %s", DFSTYPES); 559 return (NULL); 560 } 561 /* 562 * Count the number of lines 563 */ 564 while (fgets(buf, sizeof (buf), fp) != NULL) 565 lines++; 566 if (lines == 0) /* didn't read anything; empty file */ 567 goto out; 568 rewind(fp); 569 /* 570 * Allocate enough space for a NULL-terminated array. 571 */ 572 types = calloc(lines + 1, sizeof (char *)); 573 if (types == NULL) { 574 zerror(zlogp, B_TRUE, "memory allocation failed"); 575 goto out; 576 } 577 i = 0; 578 while (fgets(buf, sizeof (buf), fp) != NULL) { 579 /* LINTED - fstype is big enough to hold buf */ 580 if (sscanf(buf, "%s", fstype) == 0) { 581 zerror(zlogp, B_FALSE, "unable to parse %s", DFSTYPES); 582 free_remote_fstypes(types); 583 types = NULL; 584 goto out; 585 } 586 types[i] = strdup(fstype); 587 if (types[i] == NULL) { 588 zerror(zlogp, B_TRUE, "memory allocation failed"); 589 free_remote_fstypes(types); 590 types = NULL; 591 goto out; 592 } 593 i++; 594 } 595 out: 596 (void) fclose(fp); 597 return (types); 598 } 599 600 static boolean_t 601 is_remote_fstype(const char *fstype, char *const *remote_fstypes) 602 { 603 uint_t i; 604 605 if (remote_fstypes == NULL) 606 return (B_FALSE); 607 for (i = 0; remote_fstypes[i] != NULL; i++) { 608 if (strcmp(remote_fstypes[i], fstype) == 0) 609 return (B_TRUE); 610 } 611 return (B_FALSE); 612 } 613 614 /* 615 * This converts a zone root path (normally of the form .../root) to a Live 616 * Upgrade scratch zone root (of the form .../lu). 617 */ 618 static void 619 root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved) 620 { 621 if (!isresolved && zonecfg_in_alt_root()) 622 resolve_lofs(zlogp, zroot, zrootlen); 623 (void) strcpy(strrchr(zroot, '/') + 1, "lu"); 624 } 625 626 /* 627 * The general strategy for unmounting filesystems is as follows: 628 * 629 * - Remote filesystems may be dead, and attempting to contact them as 630 * part of a regular unmount may hang forever; we want to always try to 631 * forcibly unmount such filesystems and only fall back to regular 632 * unmounts if the filesystem doesn't support forced unmounts. 633 * 634 * - We don't want to unnecessarily corrupt metadata on local 635 * filesystems (ie UFS), so we want to start off with graceful unmounts, 636 * and only escalate to doing forced unmounts if we get stuck. 637 * 638 * We start off walking backwards through the mount table. This doesn't 639 * give us strict ordering but ensures that we try to unmount submounts 640 * first. We thus limit the number of failed umount2(2) calls. 641 * 642 * The mechanism for determining if we're stuck is to count the number 643 * of failed unmounts each iteration through the mount table. This 644 * gives us an upper bound on the number of filesystems which remain 645 * mounted (autofs trigger nodes are dealt with separately). If at the 646 * end of one unmount+autofs_cleanup cycle we still have the same number 647 * of mounts that we started out with, we're stuck and try a forced 648 * unmount. If that fails (filesystem doesn't support forced unmounts) 649 * then we bail and are unable to teardown the zone. If it succeeds, 650 * we're no longer stuck so we continue with our policy of trying 651 * graceful mounts first. 652 * 653 * Zone must be down (ie, no processes or threads active). 654 */ 655 static int 656 unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd) 657 { 658 int error = 0; 659 FILE *mnttab; 660 struct mnttab *mnts; 661 uint_t nmnt; 662 char zroot[MAXPATHLEN + 1]; 663 size_t zrootlen; 664 uint_t oldcount = UINT_MAX; 665 boolean_t stuck = B_FALSE; 666 char **remote_fstypes = NULL; 667 668 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 669 zerror(zlogp, B_FALSE, "unable to determine zone root"); 670 return (-1); 671 } 672 if (unmount_cmd) 673 root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE); 674 675 (void) strcat(zroot, "/"); 676 zrootlen = strlen(zroot); 677 678 /* 679 * For Trusted Extensions unmount each higher level zone's mount 680 * of our zone's /export/home 681 */ 682 if (!unmount_cmd) 683 tsol_unmounts(zlogp, zone_name); 684 685 if ((mnttab = fopen(MNTTAB, "r")) == NULL) { 686 zerror(zlogp, B_TRUE, "failed to open %s", MNTTAB); 687 return (-1); 688 } 689 /* 690 * Use our hacky mntfs ioctl so we see everything, even mounts with 691 * MS_NOMNTTAB. 692 */ 693 if (ioctl(fileno(mnttab), MNTIOC_SHOWHIDDEN, NULL) < 0) { 694 zerror(zlogp, B_TRUE, "unable to configure %s", MNTTAB); 695 error++; 696 goto out; 697 } 698 699 /* 700 * Build the list of remote fstypes so we know which ones we 701 * should forcibly unmount. 702 */ 703 remote_fstypes = get_remote_fstypes(zlogp); 704 for (; /* ever */; ) { 705 uint_t newcount = 0; 706 boolean_t unmounted; 707 struct mnttab *mnp; 708 char *path; 709 uint_t i; 710 711 mnts = NULL; 712 nmnt = 0; 713 /* 714 * MNTTAB gives us a way to walk through mounted 715 * filesystems; we need to be able to walk them in 716 * reverse order, so we build a list of all mounted 717 * filesystems. 718 */ 719 if (build_mnttable(zlogp, zroot, zrootlen, mnttab, &mnts, 720 &nmnt) != 0) { 721 error++; 722 goto out; 723 } 724 for (i = 0; i < nmnt; i++) { 725 mnp = &mnts[nmnt - i - 1]; /* access in reverse order */ 726 path = mnp->mnt_mountp; 727 unmounted = B_FALSE; 728 /* 729 * Try forced unmount first for remote filesystems. 730 * 731 * Not all remote filesystems support forced unmounts, 732 * so if this fails (ENOTSUP) we'll continue on 733 * and try a regular unmount. 734 */ 735 if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) { 736 if (umount2(path, MS_FORCE) == 0) 737 unmounted = B_TRUE; 738 } 739 /* 740 * Try forced unmount if we're stuck. 741 */ 742 if (stuck) { 743 if (umount2(path, MS_FORCE) == 0) { 744 unmounted = B_TRUE; 745 stuck = B_FALSE; 746 } else { 747 /* 748 * The first failure indicates a 749 * mount we won't be able to get 750 * rid of automatically, so we 751 * bail. 752 */ 753 error++; 754 zerror(zlogp, B_FALSE, 755 "unable to unmount '%s'", path); 756 free_mnttable(mnts, nmnt); 757 goto out; 758 } 759 } 760 /* 761 * Try regular unmounts for everything else. 762 */ 763 if (!unmounted && umount2(path, 0) != 0) 764 newcount++; 765 } 766 free_mnttable(mnts, nmnt); 767 768 if (newcount == 0) 769 break; 770 if (newcount >= oldcount) { 771 /* 772 * Last round didn't unmount anything; we're stuck and 773 * should start trying forced unmounts. 774 */ 775 stuck = B_TRUE; 776 } 777 oldcount = newcount; 778 779 /* 780 * Autofs doesn't let you unmount its trigger nodes from 781 * userland so we have to tell the kernel to cleanup for us. 782 */ 783 if (autofs_cleanup(zoneid) != 0) { 784 zerror(zlogp, B_TRUE, "unable to remove autofs nodes"); 785 error++; 786 goto out; 787 } 788 } 789 790 out: 791 free_remote_fstypes(remote_fstypes); 792 (void) fclose(mnttab); 793 return (error ? -1 : 0); 794 } 795 796 static int 797 fs_compare(const void *m1, const void *m2) 798 { 799 struct zone_fstab *i = (struct zone_fstab *)m1; 800 struct zone_fstab *j = (struct zone_fstab *)m2; 801 802 return (strcmp(i->zone_fs_dir, j->zone_fs_dir)); 803 } 804 805 /* 806 * Fork and exec (and wait for) the mentioned binary with the provided 807 * arguments. Returns (-1) if something went wrong with fork(2) or exec(2), 808 * returns the exit status otherwise. 809 * 810 * If we were unable to exec the provided pathname (for whatever 811 * reason), we return the special token ZEXIT_EXEC. The current value 812 * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the 813 * consumers of this function; any future consumers must make sure this 814 * remains the case. 815 */ 816 static int 817 forkexec(zlog_t *zlogp, const char *path, char *const argv[]) 818 { 819 pid_t child_pid; 820 int child_status = 0; 821 822 /* 823 * Do not let another thread localize a message while we are forking. 824 */ 825 (void) mutex_lock(&msglock); 826 child_pid = fork(); 827 (void) mutex_unlock(&msglock); 828 if (child_pid == -1) { 829 zerror(zlogp, B_TRUE, "could not fork for %s", argv[0]); 830 return (-1); 831 } else if (child_pid == 0) { 832 closefrom(0); 833 /* redirect stdin, stdout & stderr to /dev/null */ 834 (void) open("/dev/null", O_RDONLY); /* stdin */ 835 (void) open("/dev/null", O_WRONLY); /* stdout */ 836 (void) open("/dev/null", O_WRONLY); /* stderr */ 837 (void) execv(path, argv); 838 /* 839 * Since we are in the child, there is no point calling zerror() 840 * since there is nobody waiting to consume it. So exit with a 841 * special code that the parent will recognize and call zerror() 842 * accordingly. 843 */ 844 845 _exit(ZEXIT_EXEC); 846 } else { 847 (void) waitpid(child_pid, &child_status, 0); 848 } 849 850 if (WIFSIGNALED(child_status)) { 851 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 852 "signal %d", path, WTERMSIG(child_status)); 853 return (-1); 854 } 855 assert(WIFEXITED(child_status)); 856 if (WEXITSTATUS(child_status) == ZEXIT_EXEC) { 857 zerror(zlogp, B_FALSE, "failed to exec %s", path); 858 return (-1); 859 } 860 return (WEXITSTATUS(child_status)); 861 } 862 863 static int 864 dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev) 865 { 866 char cmdbuf[MAXPATHLEN]; 867 char *argv[4]; 868 int status; 869 870 /* 871 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but 872 * that would cost us an extra fork/exec without buying us anything. 873 */ 874 if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype) 875 > sizeof (cmdbuf)) { 876 zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); 877 return (-1); 878 } 879 880 argv[0] = "fsck"; 881 argv[1] = "-m"; 882 argv[2] = (char *)rawdev; 883 argv[3] = NULL; 884 885 status = forkexec(zlogp, cmdbuf, argv); 886 if (status == 0 || status == -1) 887 return (status); 888 zerror(zlogp, B_FALSE, "fsck of '%s' failed with exit status %d; " 889 "run fsck manually", rawdev, status); 890 return (-1); 891 } 892 893 static int 894 domount(zlog_t *zlogp, const char *fstype, const char *opts, 895 const char *special, const char *directory) 896 { 897 char cmdbuf[MAXPATHLEN]; 898 char *argv[6]; 899 int status; 900 901 /* 902 * We could alternatively have called /usr/sbin/mount -F <fstype>, but 903 * that would cost us an extra fork/exec without buying us anything. 904 */ 905 if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype) 906 > sizeof (cmdbuf)) { 907 zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); 908 return (-1); 909 } 910 argv[0] = "mount"; 911 if (opts[0] == '\0') { 912 argv[1] = (char *)special; 913 argv[2] = (char *)directory; 914 argv[3] = NULL; 915 } else { 916 argv[1] = "-o"; 917 argv[2] = (char *)opts; 918 argv[3] = (char *)special; 919 argv[4] = (char *)directory; 920 argv[5] = NULL; 921 } 922 923 status = forkexec(zlogp, cmdbuf, argv); 924 if (status == 0 || status == -1) 925 return (status); 926 if (opts[0] == '\0') 927 zerror(zlogp, B_FALSE, "\"%s %s %s\" " 928 "failed with exit code %d", 929 cmdbuf, special, directory, status); 930 else 931 zerror(zlogp, B_FALSE, "\"%s -o %s %s %s\" " 932 "failed with exit code %d", 933 cmdbuf, opts, special, directory, status); 934 return (-1); 935 } 936 937 /* 938 * Make sure if a given path exists, it is not a sym-link, and is a directory. 939 */ 940 static int 941 check_path(zlog_t *zlogp, const char *path) 942 { 943 struct stat statbuf; 944 char respath[MAXPATHLEN]; 945 int res; 946 947 if (lstat(path, &statbuf) != 0) { 948 if (errno == ENOENT) 949 return (0); 950 zerror(zlogp, B_TRUE, "can't stat %s", path); 951 return (-1); 952 } 953 if (S_ISLNK(statbuf.st_mode)) { 954 zerror(zlogp, B_FALSE, "%s is a symlink", path); 955 return (-1); 956 } 957 if (!S_ISDIR(statbuf.st_mode)) { 958 if (is_system_labeled() && S_ISREG(statbuf.st_mode)) { 959 /* 960 * The need to mount readonly copies of 961 * global zone /etc/ files is unique to 962 * Trusted Extensions. 963 * The check for /etc/ via strstr() is to 964 * allow paths like $ZONEROOT/etc/passwd 965 */ 966 if (strstr(path, "/etc/") == NULL) { 967 zerror(zlogp, B_FALSE, 968 "%s is not in /etc", path); 969 return (-1); 970 } 971 } else { 972 zerror(zlogp, B_FALSE, "%s is not a directory", path); 973 return (-1); 974 } 975 } 976 if ((res = resolvepath(path, respath, sizeof (respath))) == -1) { 977 zerror(zlogp, B_TRUE, "unable to resolve path %s", path); 978 return (-1); 979 } 980 respath[res] = '\0'; 981 if (strcmp(path, respath) != 0) { 982 /* 983 * We don't like ".."s and "."s throwing us off 984 */ 985 zerror(zlogp, B_FALSE, "%s is not a canonical path", path); 986 return (-1); 987 } 988 return (0); 989 } 990 991 /* 992 * Check every component of rootpath/relpath. If any component fails (ie, 993 * exists but isn't the canonical path to a directory), it is returned in 994 * badpath, which is assumed to be at least of size MAXPATHLEN. 995 * 996 * Relpath must begin with '/'. 997 */ 998 static boolean_t 999 valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *relpath) 1000 { 1001 char abspath[MAXPATHLEN], *slashp; 1002 1003 /* 1004 * Make sure abspath has at least one '/' after its rootpath 1005 * component, and ends with '/'. 1006 */ 1007 if (snprintf(abspath, sizeof (abspath), "%s%s/", rootpath, relpath) > 1008 sizeof (abspath)) { 1009 zerror(zlogp, B_FALSE, "pathname %s%s is too long", rootpath, 1010 relpath); 1011 return (B_FALSE); 1012 } 1013 1014 slashp = &abspath[strlen(rootpath)]; 1015 assert(*slashp == '/'); 1016 do { 1017 *slashp = '\0'; 1018 if (check_path(zlogp, abspath) != 0) 1019 return (B_FALSE); 1020 *slashp = '/'; 1021 slashp++; 1022 } while ((slashp = strchr(slashp, '/')) != NULL); 1023 return (B_TRUE); 1024 } 1025 1026 static int 1027 mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath) 1028 { 1029 char path[MAXPATHLEN]; 1030 char specpath[MAXPATHLEN]; 1031 char optstr[MAX_MNTOPT_STR]; 1032 zone_fsopt_t *optptr; 1033 1034 if (!valid_mount_path(zlogp, rootpath, fsptr->zone_fs_dir)) { 1035 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 1036 rootpath, fsptr->zone_fs_dir); 1037 return (-1); 1038 } 1039 1040 if (make_one_dir(zlogp, rootpath, fsptr->zone_fs_dir, 1041 DEFAULT_DIR_MODE) != 0) 1042 return (-1); 1043 1044 (void) snprintf(path, sizeof (path), "%s%s", rootpath, 1045 fsptr->zone_fs_dir); 1046 1047 if (strlen(fsptr->zone_fs_special) == 0) { 1048 /* 1049 * A zero-length special is how we distinguish IPDs from 1050 * general-purpose FSs. Make sure it mounts from a place that 1051 * can be seen via the alternate zone's root. 1052 */ 1053 if (snprintf(specpath, sizeof (specpath), "%s%s", 1054 zonecfg_get_root(), fsptr->zone_fs_dir) >= 1055 sizeof (specpath)) { 1056 zerror(zlogp, B_FALSE, "cannot mount %s: path too " 1057 "long in alternate root", fsptr->zone_fs_dir); 1058 return (-1); 1059 } 1060 if (zonecfg_in_alt_root()) 1061 resolve_lofs(zlogp, specpath, sizeof (specpath)); 1062 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, 1063 specpath, path) != 0) { 1064 zerror(zlogp, B_TRUE, "failed to loopback mount %s", 1065 specpath); 1066 return (-1); 1067 } 1068 return (0); 1069 } 1070 1071 /* 1072 * In general the strategy here is to do just as much verification as 1073 * necessary to avoid crashing or otherwise doing something bad; if the 1074 * administrator initiated the operation via zoneadm(1m), he'll get 1075 * auto-verification which will let him know what's wrong. If he 1076 * modifies the zone configuration of a running zone and doesn't attempt 1077 * to verify that it's OK we won't crash but won't bother trying to be 1078 * too helpful either. zoneadm verify is only a couple keystrokes away. 1079 */ 1080 if (!zonecfg_valid_fs_type(fsptr->zone_fs_type)) { 1081 zerror(zlogp, B_FALSE, "cannot mount %s on %s: " 1082 "invalid file-system type %s", fsptr->zone_fs_special, 1083 fsptr->zone_fs_dir, fsptr->zone_fs_type); 1084 return (-1); 1085 } 1086 1087 /* 1088 * If we're looking at an alternate root environment, then construct 1089 * read-only loopback mounts as necessary. For all lofs mounts, make 1090 * sure that the 'special' entry points inside the alternate root. (We 1091 * don't do this with other mounts, as devfs isn't in the alternate 1092 * root, and we need to assume the device environment is roughly the 1093 * same.) 1094 */ 1095 if (zonecfg_in_alt_root()) { 1096 struct stat64 st; 1097 1098 if (stat64(fsptr->zone_fs_special, &st) != -1 && 1099 S_ISBLK(st.st_mode) && 1100 check_lofs_needed(zlogp, fsptr) == -1) 1101 return (-1); 1102 if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) { 1103 if (snprintf(specpath, sizeof (specpath), "%s%s", 1104 zonecfg_get_root(), fsptr->zone_fs_special) >= 1105 sizeof (specpath)) { 1106 zerror(zlogp, B_FALSE, "cannot mount %s: path " 1107 "too long in alternate root", 1108 fsptr->zone_fs_special); 1109 return (-1); 1110 } 1111 resolve_lofs(zlogp, specpath, sizeof (specpath)); 1112 (void) strlcpy(fsptr->zone_fs_special, specpath, 1113 sizeof (fsptr->zone_fs_special)); 1114 } 1115 } 1116 1117 /* 1118 * Run 'fsck -m' if there's a device to fsck. 1119 */ 1120 if (fsptr->zone_fs_raw[0] != '\0' && 1121 dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_raw) != 0) 1122 return (-1); 1123 1124 /* 1125 * Build up mount option string. 1126 */ 1127 optstr[0] = '\0'; 1128 if (fsptr->zone_fs_options != NULL) { 1129 (void) strlcpy(optstr, fsptr->zone_fs_options->zone_fsopt_opt, 1130 sizeof (optstr)); 1131 for (optptr = fsptr->zone_fs_options->zone_fsopt_next; 1132 optptr != NULL; optptr = optptr->zone_fsopt_next) { 1133 (void) strlcat(optstr, ",", sizeof (optstr)); 1134 (void) strlcat(optstr, optptr->zone_fsopt_opt, 1135 sizeof (optstr)); 1136 } 1137 } 1138 return (domount(zlogp, fsptr->zone_fs_type, optstr, 1139 fsptr->zone_fs_special, path)); 1140 } 1141 1142 static void 1143 free_fs_data(struct zone_fstab *fsarray, uint_t nelem) 1144 { 1145 uint_t i; 1146 1147 if (fsarray == NULL) 1148 return; 1149 for (i = 0; i < nelem; i++) 1150 zonecfg_free_fs_option_list(fsarray[i].zone_fs_options); 1151 free(fsarray); 1152 } 1153 1154 /* 1155 * This function constructs the miniroot-like "scratch zone" environment. If 1156 * it returns B_FALSE, then the error has already been logged. 1157 */ 1158 static boolean_t 1159 build_mounted(zlog_t *zlogp, char *rootpath, size_t rootlen, 1160 const char *zonepath) 1161 { 1162 char tmp[MAXPATHLEN], fromdir[MAXPATHLEN]; 1163 char luroot[MAXPATHLEN]; 1164 const char **cpp; 1165 static const char *mkdirs[] = { 1166 "/system", "/system/contract", "/system/object", "/proc", 1167 "/dev", "/tmp", "/a", NULL 1168 }; 1169 static const char *localdirs[] = { 1170 "/etc", "/var", NULL 1171 }; 1172 static const char *loopdirs[] = { 1173 "/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform", 1174 "/usr", NULL 1175 }; 1176 static const char *tmpdirs[] = { 1177 "/tmp", "/var/run", NULL 1178 }; 1179 FILE *fp; 1180 struct stat st; 1181 char *altstr; 1182 uuid_t uuid; 1183 1184 /* 1185 * Construct a small Solaris environment, including the zone root 1186 * mounted on '/a' inside that environment. 1187 */ 1188 resolve_lofs(zlogp, rootpath, rootlen); 1189 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 1190 resolve_lofs(zlogp, luroot, sizeof (luroot)); 1191 (void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot); 1192 (void) symlink("./usr/bin", tmp); 1193 1194 /* 1195 * These are mostly special mount points; not handled here. (See 1196 * zone_mount_early.) 1197 */ 1198 for (cpp = mkdirs; *cpp != NULL; cpp++) { 1199 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1200 if (mkdir(tmp, 0755) != 0) { 1201 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1202 return (B_FALSE); 1203 } 1204 } 1205 1206 /* 1207 * These are mounted read-write from the zone undergoing upgrade. We 1208 * must be careful not to 'leak' things from the main system into the 1209 * zone, and this accomplishes that goal. 1210 */ 1211 for (cpp = localdirs; *cpp != NULL; cpp++) { 1212 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1213 (void) snprintf(fromdir, sizeof (fromdir), "%s%s", rootpath, 1214 *cpp); 1215 if (mkdir(tmp, 0755) != 0) { 1216 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1217 return (B_FALSE); 1218 } 1219 if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp) != 0) { 1220 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1221 *cpp); 1222 return (B_FALSE); 1223 } 1224 } 1225 1226 /* 1227 * These are things mounted read-only from the running system because 1228 * they contain binaries that must match system. 1229 */ 1230 for (cpp = loopdirs; *cpp != NULL; cpp++) { 1231 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1232 if (mkdir(tmp, 0755) != 0) { 1233 if (errno != EEXIST) { 1234 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1235 return (B_FALSE); 1236 } 1237 if (lstat(tmp, &st) != 0) { 1238 zerror(zlogp, B_TRUE, "cannot stat %s", tmp); 1239 return (B_FALSE); 1240 } 1241 /* 1242 * Ignore any non-directories encountered. These are 1243 * things that have been converted into symlinks 1244 * (/etc/fs and /etc/lib) and no longer need a lofs 1245 * fixup. 1246 */ 1247 if (!S_ISDIR(st.st_mode)) 1248 continue; 1249 } 1250 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, *cpp, 1251 tmp) != 0) { 1252 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1253 *cpp); 1254 return (B_FALSE); 1255 } 1256 } 1257 1258 /* 1259 * These are things with tmpfs mounted inside. 1260 */ 1261 for (cpp = tmpdirs; *cpp != NULL; cpp++) { 1262 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1263 if (mkdir(tmp, 0755) != 0 && errno != EEXIST) { 1264 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1265 return (B_FALSE); 1266 } 1267 if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) { 1268 zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp); 1269 return (B_FALSE); 1270 } 1271 } 1272 1273 /* 1274 * This is here to support lucopy. If there's an instance of this same 1275 * zone on the current running system, then we mount its root up as 1276 * read-only inside the scratch zone. 1277 */ 1278 (void) zonecfg_get_uuid(zone_name, uuid); 1279 altstr = strdup(zonecfg_get_root()); 1280 if (altstr == NULL) { 1281 zerror(zlogp, B_TRUE, "memory allocation failed"); 1282 return (B_FALSE); 1283 } 1284 zonecfg_set_root(""); 1285 (void) strlcpy(tmp, zone_name, sizeof (tmp)); 1286 (void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp)); 1287 if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK && 1288 strcmp(fromdir, rootpath) != 0) { 1289 (void) snprintf(tmp, sizeof (tmp), "%s/b", luroot); 1290 if (mkdir(tmp, 0755) != 0) { 1291 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1292 return (B_FALSE); 1293 } 1294 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, fromdir, 1295 tmp) != 0) { 1296 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1297 fromdir); 1298 return (B_FALSE); 1299 } 1300 } 1301 zonecfg_set_root(altstr); 1302 free(altstr); 1303 1304 if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) { 1305 zerror(zlogp, B_TRUE, "cannot open zone mapfile"); 1306 return (B_FALSE); 1307 } 1308 (void) ftruncate(fileno(fp), 0); 1309 if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) { 1310 zerror(zlogp, B_TRUE, "cannot add zone mapfile entry"); 1311 } 1312 zonecfg_close_scratch(fp); 1313 (void) snprintf(tmp, sizeof (tmp), "%s/a", luroot); 1314 if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0) 1315 return (B_FALSE); 1316 (void) strlcpy(rootpath, tmp, rootlen); 1317 return (B_TRUE); 1318 } 1319 1320 static int 1321 mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd) 1322 { 1323 char rootpath[MAXPATHLEN]; 1324 char zonepath[MAXPATHLEN]; 1325 int num_fs = 0, i; 1326 struct zone_fstab fstab, *fs_ptr = NULL, *tmp_ptr; 1327 struct zone_fstab *fsp; 1328 zone_dochandle_t handle = NULL; 1329 zone_state_t zstate; 1330 1331 if (zone_get_state(zone_name, &zstate) != Z_OK || 1332 (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) { 1333 zerror(zlogp, B_FALSE, 1334 "zone must be in '%s' or '%s' state to mount file-systems", 1335 zone_state_str(ZONE_STATE_READY), 1336 zone_state_str(ZONE_STATE_MOUNTED)); 1337 goto bad; 1338 } 1339 1340 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { 1341 zerror(zlogp, B_TRUE, "unable to determine zone path"); 1342 goto bad; 1343 } 1344 1345 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { 1346 zerror(zlogp, B_TRUE, "unable to determine zone root"); 1347 goto bad; 1348 } 1349 1350 if ((handle = zonecfg_init_handle()) == NULL) { 1351 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 1352 goto bad; 1353 } 1354 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK || 1355 zonecfg_setfsent(handle) != Z_OK) { 1356 zerror(zlogp, B_FALSE, "invalid configuration"); 1357 goto bad; 1358 } 1359 1360 /* 1361 * Iterate through the rest of the filesystems, first the IPDs, then 1362 * the general FSs. Sort them all, then mount them in sorted order. 1363 * This is to make sure the higher level directories (e.g., /usr) 1364 * get mounted before any beneath them (e.g., /usr/local). 1365 */ 1366 if (zonecfg_setipdent(handle) != Z_OK) { 1367 zerror(zlogp, B_FALSE, "invalid configuration"); 1368 goto bad; 1369 } 1370 while (zonecfg_getipdent(handle, &fstab) == Z_OK) { 1371 num_fs++; 1372 if ((tmp_ptr = realloc(fs_ptr, 1373 num_fs * sizeof (*tmp_ptr))) == NULL) { 1374 zerror(zlogp, B_TRUE, "memory allocation failed"); 1375 num_fs--; 1376 (void) zonecfg_endipdent(handle); 1377 goto bad; 1378 } 1379 fs_ptr = tmp_ptr; 1380 fsp = &fs_ptr[num_fs - 1]; 1381 /* 1382 * IPDs logically only have a mount point; all other properties 1383 * are implied. 1384 */ 1385 (void) strlcpy(fsp->zone_fs_dir, 1386 fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir)); 1387 fsp->zone_fs_special[0] = '\0'; 1388 fsp->zone_fs_raw[0] = '\0'; 1389 fsp->zone_fs_type[0] = '\0'; 1390 fsp->zone_fs_options = NULL; 1391 } 1392 (void) zonecfg_endipdent(handle); 1393 1394 if (zonecfg_setfsent(handle) != Z_OK) { 1395 zerror(zlogp, B_FALSE, "invalid configuration"); 1396 goto bad; 1397 } 1398 while (zonecfg_getfsent(handle, &fstab) == Z_OK) { 1399 /* 1400 * ZFS filesystems will not be accessible under an alternate 1401 * root, since the pool will not be known. Ignore them in this 1402 * case. 1403 */ 1404 if (mount_cmd && strcmp(fstab.zone_fs_type, MNTTYPE_ZFS) == 0) 1405 continue; 1406 1407 num_fs++; 1408 if ((tmp_ptr = realloc(fs_ptr, 1409 num_fs * sizeof (*tmp_ptr))) == NULL) { 1410 zerror(zlogp, B_TRUE, "memory allocation failed"); 1411 num_fs--; 1412 (void) zonecfg_endfsent(handle); 1413 goto bad; 1414 } 1415 fs_ptr = tmp_ptr; 1416 fsp = &fs_ptr[num_fs - 1]; 1417 (void) strlcpy(fsp->zone_fs_dir, 1418 fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir)); 1419 (void) strlcpy(fsp->zone_fs_special, fstab.zone_fs_special, 1420 sizeof (fsp->zone_fs_special)); 1421 (void) strlcpy(fsp->zone_fs_raw, fstab.zone_fs_raw, 1422 sizeof (fsp->zone_fs_raw)); 1423 (void) strlcpy(fsp->zone_fs_type, fstab.zone_fs_type, 1424 sizeof (fsp->zone_fs_type)); 1425 fsp->zone_fs_options = fstab.zone_fs_options; 1426 } 1427 (void) zonecfg_endfsent(handle); 1428 zonecfg_fini_handle(handle); 1429 handle = NULL; 1430 1431 /* 1432 * When we're mounting a zone for administration, / is the 1433 * scratch zone and dev is mounted at /dev. The to-be-upgraded 1434 * zone is mounted at /a, and we set up that environment so that 1435 * process can access both the running system's utilities 1436 * and the to-be-modified zone's files. The only exception 1437 * is the zone's /dev which isn't mounted at all, which is 1438 * the same as global zone installation where /a/dev and 1439 * /a/devices are not mounted. 1440 */ 1441 if (mount_cmd && 1442 !build_mounted(zlogp, rootpath, sizeof (rootpath), zonepath)) 1443 goto bad; 1444 1445 qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare); 1446 for (i = 0; i < num_fs; i++) { 1447 if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0) 1448 goto bad; 1449 } 1450 1451 /* 1452 * For Trusted Extensions cross-mount each lower level /export/home 1453 */ 1454 if (!mount_cmd && tsol_mounts(zlogp, zone_name, rootpath) != 0) 1455 goto bad; 1456 1457 free_fs_data(fs_ptr, num_fs); 1458 1459 /* 1460 * Everything looks fine. 1461 */ 1462 return (0); 1463 1464 bad: 1465 if (handle != NULL) 1466 zonecfg_fini_handle(handle); 1467 free_fs_data(fs_ptr, num_fs); 1468 return (-1); 1469 } 1470 1471 /* caller makes sure neither parameter is NULL */ 1472 static int 1473 addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr) 1474 { 1475 int prefixlen; 1476 1477 prefixlen = atoi(prefixstr); 1478 if (prefixlen < 0 || prefixlen > maxprefixlen) 1479 return (1); 1480 while (prefixlen > 0) { 1481 if (prefixlen >= 8) { 1482 *maskstr++ = 0xFF; 1483 prefixlen -= 8; 1484 continue; 1485 } 1486 *maskstr |= 1 << (8 - prefixlen); 1487 prefixlen--; 1488 } 1489 return (0); 1490 } 1491 1492 /* 1493 * Tear down all interfaces belonging to the given zone. This should 1494 * be called with the zone in a state other than "running", so that 1495 * interfaces can't be assigned to the zone after this returns. 1496 * 1497 * If anything goes wrong, log an error message and return an error. 1498 */ 1499 static int 1500 unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id) 1501 { 1502 struct lifnum lifn; 1503 struct lifconf lifc; 1504 struct lifreq *lifrp, lifrl; 1505 int64_t lifc_flags = LIFC_NOXMIT | LIFC_ALLZONES; 1506 int num_ifs, s, i, ret_code = 0; 1507 uint_t bufsize; 1508 char *buf = NULL; 1509 1510 if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { 1511 zerror(zlogp, B_TRUE, "could not get socket"); 1512 ret_code = -1; 1513 goto bad; 1514 } 1515 lifn.lifn_family = AF_UNSPEC; 1516 lifn.lifn_flags = (int)lifc_flags; 1517 if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) { 1518 zerror(zlogp, B_TRUE, 1519 "could not determine number of interfaces"); 1520 ret_code = -1; 1521 goto bad; 1522 } 1523 num_ifs = lifn.lifn_count; 1524 bufsize = num_ifs * sizeof (struct lifreq); 1525 if ((buf = malloc(bufsize)) == NULL) { 1526 zerror(zlogp, B_TRUE, "memory allocation failed"); 1527 ret_code = -1; 1528 goto bad; 1529 } 1530 lifc.lifc_family = AF_UNSPEC; 1531 lifc.lifc_flags = (int)lifc_flags; 1532 lifc.lifc_len = bufsize; 1533 lifc.lifc_buf = buf; 1534 if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) { 1535 zerror(zlogp, B_TRUE, "could not get configured interfaces"); 1536 ret_code = -1; 1537 goto bad; 1538 } 1539 lifrp = lifc.lifc_req; 1540 for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, lifrp++) { 1541 (void) close(s); 1542 if ((s = socket(lifrp->lifr_addr.ss_family, SOCK_DGRAM, 0)) < 1543 0) { 1544 zerror(zlogp, B_TRUE, "%s: could not get socket", 1545 lifrl.lifr_name); 1546 ret_code = -1; 1547 continue; 1548 } 1549 (void) memset(&lifrl, 0, sizeof (lifrl)); 1550 (void) strncpy(lifrl.lifr_name, lifrp->lifr_name, 1551 sizeof (lifrl.lifr_name)); 1552 if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifrl) < 0) { 1553 zerror(zlogp, B_TRUE, 1554 "%s: could not determine zone interface belongs to", 1555 lifrl.lifr_name); 1556 ret_code = -1; 1557 continue; 1558 } 1559 if (lifrl.lifr_zoneid == zone_id) { 1560 if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) { 1561 zerror(zlogp, B_TRUE, 1562 "%s: could not remove interface", 1563 lifrl.lifr_name); 1564 ret_code = -1; 1565 continue; 1566 } 1567 } 1568 } 1569 bad: 1570 if (s > 0) 1571 (void) close(s); 1572 if (buf) 1573 free(buf); 1574 return (ret_code); 1575 } 1576 1577 static union sockunion { 1578 struct sockaddr sa; 1579 struct sockaddr_in sin; 1580 struct sockaddr_dl sdl; 1581 struct sockaddr_in6 sin6; 1582 } so_dst, so_ifp; 1583 1584 static struct { 1585 struct rt_msghdr hdr; 1586 char space[512]; 1587 } rtmsg; 1588 1589 static int 1590 salen(struct sockaddr *sa) 1591 { 1592 switch (sa->sa_family) { 1593 case AF_INET: 1594 return (sizeof (struct sockaddr_in)); 1595 case AF_LINK: 1596 return (sizeof (struct sockaddr_dl)); 1597 case AF_INET6: 1598 return (sizeof (struct sockaddr_in6)); 1599 default: 1600 return (sizeof (struct sockaddr)); 1601 } 1602 } 1603 1604 #define ROUNDUP_LONG(a) \ 1605 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long)) 1606 1607 /* 1608 * Look up which zone is using a given IP address. The address in question 1609 * is expected to have been stuffed into the structure to which lifr points 1610 * via a previous SIOCGLIFADDR ioctl(). 1611 * 1612 * This is done using black router socket magic. 1613 * 1614 * Return the name of the zone on success or NULL on failure. 1615 * 1616 * This is a lot of code for a simple task; a new ioctl request to take care 1617 * of this might be a useful RFE. 1618 */ 1619 1620 static char * 1621 who_is_using(zlog_t *zlogp, struct lifreq *lifr) 1622 { 1623 static char answer[ZONENAME_MAX]; 1624 pid_t pid; 1625 int s, rlen, l, i; 1626 char *cp = rtmsg.space; 1627 struct sockaddr_dl *ifp = NULL; 1628 struct sockaddr *sa; 1629 char save_if_name[LIFNAMSIZ]; 1630 1631 answer[0] = '\0'; 1632 1633 pid = getpid(); 1634 if ((s = socket(PF_ROUTE, SOCK_RAW, 0)) < 0) { 1635 zerror(zlogp, B_TRUE, "could not get routing socket"); 1636 return (NULL); 1637 } 1638 1639 if (lifr->lifr_addr.ss_family == AF_INET) { 1640 struct sockaddr_in *sin4; 1641 1642 so_dst.sa.sa_family = AF_INET; 1643 sin4 = (struct sockaddr_in *)&lifr->lifr_addr; 1644 so_dst.sin.sin_addr = sin4->sin_addr; 1645 } else { 1646 struct sockaddr_in6 *sin6; 1647 1648 so_dst.sa.sa_family = AF_INET6; 1649 sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr; 1650 so_dst.sin6.sin6_addr = sin6->sin6_addr; 1651 } 1652 1653 so_ifp.sa.sa_family = AF_LINK; 1654 1655 (void) memset(&rtmsg, 0, sizeof (rtmsg)); 1656 rtmsg.hdr.rtm_type = RTM_GET; 1657 rtmsg.hdr.rtm_flags = RTF_UP | RTF_HOST; 1658 rtmsg.hdr.rtm_version = RTM_VERSION; 1659 rtmsg.hdr.rtm_seq = ++rts_seqno; 1660 rtmsg.hdr.rtm_addrs = RTA_IFP | RTA_DST; 1661 1662 l = ROUNDUP_LONG(salen(&so_dst.sa)); 1663 (void) memmove(cp, &(so_dst), l); 1664 cp += l; 1665 l = ROUNDUP_LONG(salen(&so_ifp.sa)); 1666 (void) memmove(cp, &(so_ifp), l); 1667 cp += l; 1668 1669 rtmsg.hdr.rtm_msglen = l = cp - (char *)&rtmsg; 1670 1671 if ((rlen = write(s, &rtmsg, l)) < 0) { 1672 zerror(zlogp, B_TRUE, "writing to routing socket"); 1673 return (NULL); 1674 } else if (rlen < (int)rtmsg.hdr.rtm_msglen) { 1675 zerror(zlogp, B_TRUE, 1676 "write to routing socket got only %d for len\n", rlen); 1677 return (NULL); 1678 } 1679 do { 1680 l = read(s, &rtmsg, sizeof (rtmsg)); 1681 } while (l > 0 && (rtmsg.hdr.rtm_seq != rts_seqno || 1682 rtmsg.hdr.rtm_pid != pid)); 1683 if (l < 0) { 1684 zerror(zlogp, B_TRUE, "reading from routing socket"); 1685 return (NULL); 1686 } 1687 1688 if (rtmsg.hdr.rtm_version != RTM_VERSION) { 1689 zerror(zlogp, B_FALSE, 1690 "routing message version %d not understood", 1691 rtmsg.hdr.rtm_version); 1692 return (NULL); 1693 } 1694 if (rtmsg.hdr.rtm_msglen != (ushort_t)l) { 1695 zerror(zlogp, B_FALSE, "message length mismatch, " 1696 "expected %d bytes, returned %d bytes", 1697 rtmsg.hdr.rtm_msglen, l); 1698 return (NULL); 1699 } 1700 if (rtmsg.hdr.rtm_errno != 0) { 1701 errno = rtmsg.hdr.rtm_errno; 1702 zerror(zlogp, B_TRUE, "RTM_GET routing socket message"); 1703 return (NULL); 1704 } 1705 if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) { 1706 zerror(zlogp, B_FALSE, "interface not found"); 1707 return (NULL); 1708 } 1709 cp = ((char *)(&rtmsg.hdr + 1)); 1710 for (i = 1; i != 0; i <<= 1) { 1711 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1712 sa = (struct sockaddr *)cp; 1713 if (i != RTA_IFP) { 1714 if ((i & rtmsg.hdr.rtm_addrs) != 0) 1715 cp += ROUNDUP_LONG(salen(sa)); 1716 continue; 1717 } 1718 if (sa->sa_family == AF_LINK && 1719 ((struct sockaddr_dl *)sa)->sdl_nlen != 0) 1720 ifp = (struct sockaddr_dl *)sa; 1721 break; 1722 } 1723 if (ifp == NULL) { 1724 zerror(zlogp, B_FALSE, "interface could not be determined"); 1725 return (NULL); 1726 } 1727 1728 /* 1729 * We need to set the I/F name to what we got above, then do the 1730 * appropriate ioctl to get its zone name. But lifr->lifr_name is 1731 * used by the calling function to do a REMOVEIF, so if we leave the 1732 * "good" zone's I/F name in place, *that* I/F will be removed instead 1733 * of the bad one. So we save the old (bad) I/F name before over- 1734 * writing it and doing the ioctl, then restore it after the ioctl. 1735 */ 1736 (void) strlcpy(save_if_name, lifr->lifr_name, sizeof (save_if_name)); 1737 (void) strncpy(lifr->lifr_name, ifp->sdl_data, ifp->sdl_nlen); 1738 lifr->lifr_name[ifp->sdl_nlen] = '\0'; 1739 i = ioctl(s, SIOCGLIFZONE, lifr); 1740 (void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name)); 1741 if (i < 0) { 1742 zerror(zlogp, B_TRUE, 1743 "%s: could not determine the zone interface belongs to", 1744 lifr->lifr_name); 1745 return (NULL); 1746 } 1747 if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0) 1748 (void) snprintf(answer, sizeof (answer), "%d", 1749 lifr->lifr_zoneid); 1750 1751 if (strlen(answer) > 0) 1752 return (answer); 1753 return (NULL); 1754 } 1755 1756 typedef struct mcast_rtmsg_s { 1757 struct rt_msghdr m_rtm; 1758 union { 1759 struct { 1760 struct sockaddr_in m_dst; 1761 struct sockaddr_in m_gw; 1762 struct sockaddr_in m_netmask; 1763 } m_v4; 1764 struct { 1765 struct sockaddr_in6 m_dst; 1766 struct sockaddr_in6 m_gw; 1767 struct sockaddr_in6 m_netmask; 1768 } m_v6; 1769 } m_u; 1770 } mcast_rtmsg_t; 1771 #define m_dst4 m_u.m_v4.m_dst 1772 #define m_dst6 m_u.m_v6.m_dst 1773 #define m_gw4 m_u.m_v4.m_gw 1774 #define m_gw6 m_u.m_v6.m_gw 1775 #define m_netmask4 m_u.m_v4.m_netmask 1776 #define m_netmask6 m_u.m_v6.m_netmask 1777 1778 /* 1779 * Configures a single interface: a new virtual interface is added, based on 1780 * the physical interface nwiftabptr->zone_nwif_physical, with the address 1781 * specified in nwiftabptr->zone_nwif_address, for zone zone_id. Note that 1782 * the "address" can be an IPv6 address (with a /prefixlength required), an 1783 * IPv4 address (with a /prefixlength optional), or a name; for the latter, 1784 * an IPv4 name-to-address resolution will be attempted. 1785 * 1786 * A default interface route for multicast is created on the first IPv4 and 1787 * IPv6 interfaces (that have the IFF_MULTICAST flag set), respectively. 1788 * This should really be done in the init scripts if we ever allow zones to 1789 * modify the routing tables. 1790 * 1791 * If anything goes wrong, we log an detailed error message, attempt to tear 1792 * down whatever we set up and return an error. 1793 */ 1794 static int 1795 configure_one_interface(zlog_t *zlogp, zoneid_t zone_id, 1796 struct zone_nwiftab *nwiftabptr, boolean_t *mcast_rt_v4_setp, 1797 boolean_t *mcast_rt_v6_setp) 1798 { 1799 struct lifreq lifr; 1800 struct sockaddr_in netmask4; 1801 struct sockaddr_in6 netmask6; 1802 struct in_addr in4; 1803 struct in6_addr in6; 1804 sa_family_t af; 1805 char *slashp = strchr(nwiftabptr->zone_nwif_address, '/'); 1806 mcast_rtmsg_t mcast_rtmsg; 1807 int s; 1808 int rs; 1809 int rlen; 1810 boolean_t got_netmask = B_FALSE; 1811 char addrstr4[INET_ADDRSTRLEN]; 1812 int res; 1813 1814 res = zonecfg_valid_net_address(nwiftabptr->zone_nwif_address, &lifr); 1815 if (res != Z_OK) { 1816 zerror(zlogp, B_FALSE, "%s: %s", zonecfg_strerror(res), 1817 nwiftabptr->zone_nwif_address); 1818 return (-1); 1819 } 1820 af = lifr.lifr_addr.ss_family; 1821 if (af == AF_INET) 1822 in4 = ((struct sockaddr_in *)(&lifr.lifr_addr))->sin_addr; 1823 else 1824 in6 = ((struct sockaddr_in6 *)(&lifr.lifr_addr))->sin6_addr; 1825 1826 if ((s = socket(af, SOCK_DGRAM, 0)) < 0) { 1827 zerror(zlogp, B_TRUE, "could not get socket"); 1828 return (-1); 1829 } 1830 1831 (void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical, 1832 sizeof (lifr.lifr_name)); 1833 if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) { 1834 /* 1835 * Here, we know that the interface can't be brought up. 1836 * A similar warning message was already printed out to 1837 * the console by zoneadm(1M) so instead we log the 1838 * message to syslog and continue. 1839 */ 1840 zerror(&logsys, B_TRUE, "WARNING: skipping interface " 1841 "'%s' which may not be present/plumbed in the " 1842 "global zone.", lifr.lifr_name); 1843 (void) close(s); 1844 return (Z_OK); 1845 } 1846 1847 if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) { 1848 zerror(zlogp, B_TRUE, 1849 "%s: could not set IP address to %s", 1850 lifr.lifr_name, nwiftabptr->zone_nwif_address); 1851 goto bad; 1852 } 1853 1854 /* Preserve literal IPv4 address for later potential printing. */ 1855 if (af == AF_INET) 1856 (void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN); 1857 1858 lifr.lifr_zoneid = zone_id; 1859 if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) { 1860 zerror(zlogp, B_TRUE, "%s: could not place interface into zone", 1861 lifr.lifr_name); 1862 goto bad; 1863 } 1864 1865 if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) { 1866 got_netmask = B_TRUE; /* default setting will be correct */ 1867 } else { 1868 if (af == AF_INET) { 1869 /* 1870 * The IPv4 netmask can be determined either 1871 * directly if a prefix length was supplied with 1872 * the address or via the netmasks database. Not 1873 * being able to determine it is a common failure, 1874 * but it often is not fatal to operation of the 1875 * interface. In that case, a warning will be 1876 * printed after the rest of the interface's 1877 * parameters have been configured. 1878 */ 1879 (void) memset(&netmask4, 0, sizeof (netmask4)); 1880 if (slashp != NULL) { 1881 if (addr2netmask(slashp + 1, V4_ADDR_LEN, 1882 (uchar_t *)&netmask4.sin_addr) != 0) { 1883 *slashp = '/'; 1884 zerror(zlogp, B_FALSE, 1885 "%s: invalid prefix length in %s", 1886 lifr.lifr_name, 1887 nwiftabptr->zone_nwif_address); 1888 goto bad; 1889 } 1890 got_netmask = B_TRUE; 1891 } else if (getnetmaskbyaddr(in4, 1892 &netmask4.sin_addr) == 0) { 1893 got_netmask = B_TRUE; 1894 } 1895 if (got_netmask) { 1896 netmask4.sin_family = af; 1897 (void) memcpy(&lifr.lifr_addr, &netmask4, 1898 sizeof (netmask4)); 1899 } 1900 } else { 1901 (void) memset(&netmask6, 0, sizeof (netmask6)); 1902 if (addr2netmask(slashp + 1, V6_ADDR_LEN, 1903 (uchar_t *)&netmask6.sin6_addr) != 0) { 1904 *slashp = '/'; 1905 zerror(zlogp, B_FALSE, 1906 "%s: invalid prefix length in %s", 1907 lifr.lifr_name, 1908 nwiftabptr->zone_nwif_address); 1909 goto bad; 1910 } 1911 got_netmask = B_TRUE; 1912 netmask6.sin6_family = af; 1913 (void) memcpy(&lifr.lifr_addr, &netmask6, 1914 sizeof (netmask6)); 1915 } 1916 if (got_netmask && 1917 ioctl(s, SIOCSLIFNETMASK, (caddr_t)&lifr) < 0) { 1918 zerror(zlogp, B_TRUE, "%s: could not set netmask", 1919 lifr.lifr_name); 1920 goto bad; 1921 } 1922 1923 /* 1924 * This doesn't set the broadcast address at all. Rather, it 1925 * gets, then sets the interface's address, relying on the fact 1926 * that resetting the address will reset the broadcast address. 1927 */ 1928 if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { 1929 zerror(zlogp, B_TRUE, "%s: could not get address", 1930 lifr.lifr_name); 1931 goto bad; 1932 } 1933 if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) { 1934 zerror(zlogp, B_TRUE, 1935 "%s: could not reset broadcast address", 1936 lifr.lifr_name); 1937 goto bad; 1938 } 1939 } 1940 1941 if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { 1942 zerror(zlogp, B_TRUE, "%s: could not get flags", 1943 lifr.lifr_name); 1944 goto bad; 1945 } 1946 lifr.lifr_flags |= IFF_UP; 1947 if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { 1948 int save_errno = errno; 1949 char *zone_using; 1950 1951 /* 1952 * If we failed with something other than EADDRNOTAVAIL, 1953 * then skip to the end. Otherwise, look up our address, 1954 * then call a function to determine which zone is already 1955 * using that address. 1956 */ 1957 if (errno != EADDRNOTAVAIL) { 1958 zerror(zlogp, B_TRUE, 1959 "%s: could not bring interface up", lifr.lifr_name); 1960 goto bad; 1961 } 1962 if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { 1963 zerror(zlogp, B_TRUE, "%s: could not get address", 1964 lifr.lifr_name); 1965 goto bad; 1966 } 1967 zone_using = who_is_using(zlogp, &lifr); 1968 errno = save_errno; 1969 if (zone_using == NULL) 1970 zerror(zlogp, B_TRUE, 1971 "%s: could not bring interface up", lifr.lifr_name); 1972 else 1973 zerror(zlogp, B_TRUE, "%s: could not bring interface " 1974 "up: address in use by zone '%s'", lifr.lifr_name, 1975 zone_using); 1976 goto bad; 1977 } 1978 if ((lifr.lifr_flags & IFF_MULTICAST) && ((af == AF_INET && 1979 mcast_rt_v4_setp != NULL && *mcast_rt_v4_setp == B_FALSE) || 1980 (af == AF_INET6 && 1981 mcast_rt_v6_setp != NULL && *mcast_rt_v6_setp == B_FALSE))) { 1982 rs = socket(PF_ROUTE, SOCK_RAW, 0); 1983 if (rs < 0) { 1984 zerror(zlogp, B_TRUE, "%s: could not create " 1985 "routing socket", lifr.lifr_name); 1986 goto bad; 1987 } 1988 (void) shutdown(rs, 0); 1989 (void) memset((void *)&mcast_rtmsg, 0, sizeof (mcast_rtmsg_t)); 1990 mcast_rtmsg.m_rtm.rtm_msglen = sizeof (struct rt_msghdr) + 1991 3 * (af == AF_INET ? sizeof (struct sockaddr_in) : 1992 sizeof (struct sockaddr_in6)); 1993 mcast_rtmsg.m_rtm.rtm_version = RTM_VERSION; 1994 mcast_rtmsg.m_rtm.rtm_type = RTM_ADD; 1995 mcast_rtmsg.m_rtm.rtm_flags = RTF_UP; 1996 mcast_rtmsg.m_rtm.rtm_addrs = 1997 RTA_DST | RTA_GATEWAY | RTA_NETMASK; 1998 mcast_rtmsg.m_rtm.rtm_seq = ++rts_seqno; 1999 if (af == AF_INET) { 2000 mcast_rtmsg.m_dst4.sin_family = AF_INET; 2001 mcast_rtmsg.m_dst4.sin_addr.s_addr = 2002 htonl(INADDR_UNSPEC_GROUP); 2003 mcast_rtmsg.m_gw4.sin_family = AF_INET; 2004 mcast_rtmsg.m_gw4.sin_addr = in4; 2005 mcast_rtmsg.m_netmask4.sin_family = AF_INET; 2006 mcast_rtmsg.m_netmask4.sin_addr.s_addr = 2007 htonl(IN_CLASSD_NET); 2008 } else { 2009 mcast_rtmsg.m_dst6.sin6_family = AF_INET6; 2010 mcast_rtmsg.m_dst6.sin6_addr.s6_addr[0] = 0xffU; 2011 mcast_rtmsg.m_gw6.sin6_family = AF_INET6; 2012 mcast_rtmsg.m_gw6.sin6_addr = in6; 2013 mcast_rtmsg.m_netmask6.sin6_family = AF_INET6; 2014 mcast_rtmsg.m_netmask6.sin6_addr.s6_addr[0] = 0xffU; 2015 } 2016 rlen = write(rs, (char *)&mcast_rtmsg, 2017 mcast_rtmsg.m_rtm.rtm_msglen); 2018 /* 2019 * The write to the multicast socket will fail if the 2020 * interface belongs to a failed IPMP group. This is a 2021 * non-fatal error and the zone will continue booting. 2022 * While the zone is running, if any interface in the 2023 * failed IPMP group recovers, the zone will fallback to 2024 * using that interface. 2025 */ 2026 if (rlen < mcast_rtmsg.m_rtm.rtm_msglen) { 2027 if (rlen < 0) { 2028 zerror(zlogp, B_TRUE, "WARNING: interface " 2029 "'%s' not available as default for " 2030 "multicast.", lifr.lifr_name); 2031 } else { 2032 zerror(zlogp, B_FALSE, "WARNING: interface " 2033 "'%s' not available as default for " 2034 "multicast; routing socket returned " 2035 "unexpected %d bytes.", 2036 lifr.lifr_name, rlen); 2037 } 2038 } else { 2039 2040 if (af == AF_INET) { 2041 *mcast_rt_v4_setp = B_TRUE; 2042 } else { 2043 *mcast_rt_v6_setp = B_TRUE; 2044 } 2045 } 2046 (void) close(rs); 2047 } 2048 2049 if (!got_netmask) { 2050 /* 2051 * A common, but often non-fatal problem, is that the system 2052 * cannot find the netmask for an interface address. This is 2053 * often caused by it being only in /etc/inet/netmasks, but 2054 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not 2055 * in that. This doesn't show up at boot because the netmask 2056 * is obtained from /etc/inet/netmasks when no network 2057 * interfaces are up, but isn't consulted when NIS/NIS+ is 2058 * available. We warn the user here that something like this 2059 * has happened and we're just running with a default and 2060 * possible incorrect netmask. 2061 */ 2062 char buffer[INET6_ADDRSTRLEN]; 2063 void *addr; 2064 2065 if (af == AF_INET) 2066 addr = &((struct sockaddr_in *) 2067 (&lifr.lifr_addr))->sin_addr; 2068 else 2069 addr = &((struct sockaddr_in6 *) 2070 (&lifr.lifr_addr))->sin6_addr; 2071 2072 /* Find out what netmask interface is going to be using */ 2073 if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 || 2074 inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL) 2075 goto bad; 2076 zerror(zlogp, B_FALSE, 2077 "WARNING: %s: no matching subnet found in netmasks(4) for " 2078 "%s; using default of %s.", 2079 lifr.lifr_name, addrstr4, buffer); 2080 } 2081 2082 (void) close(s); 2083 return (Z_OK); 2084 bad: 2085 (void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr); 2086 (void) close(s); 2087 return (-1); 2088 } 2089 2090 /* 2091 * Sets up network interfaces based on information from the zone configuration. 2092 * An IPv4 loopback interface is set up "for free", modeling the global system. 2093 * If any of the configuration interfaces were IPv6, then an IPv6 loopback 2094 * address is set up as well. 2095 * 2096 * If anything goes wrong, we log a general error message, attempt to tear down 2097 * whatever we set up, and return an error. 2098 */ 2099 static int 2100 configure_network_interfaces(zlog_t *zlogp) 2101 { 2102 zone_dochandle_t handle; 2103 struct zone_nwiftab nwiftab, loopback_iftab; 2104 boolean_t saw_v6 = B_FALSE; 2105 boolean_t mcast_rt_v4_set = B_FALSE; 2106 boolean_t mcast_rt_v6_set = B_FALSE; 2107 zoneid_t zoneid; 2108 2109 if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) { 2110 zerror(zlogp, B_TRUE, "unable to get zoneid"); 2111 return (-1); 2112 } 2113 2114 if ((handle = zonecfg_init_handle()) == NULL) { 2115 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2116 return (-1); 2117 } 2118 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2119 zerror(zlogp, B_FALSE, "invalid configuration"); 2120 zonecfg_fini_handle(handle); 2121 return (-1); 2122 } 2123 if (zonecfg_setnwifent(handle) == Z_OK) { 2124 for (;;) { 2125 struct in6_addr in6; 2126 2127 if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK) 2128 break; 2129 if (configure_one_interface(zlogp, zoneid, 2130 &nwiftab, &mcast_rt_v4_set, &mcast_rt_v6_set) != 2131 Z_OK) { 2132 (void) zonecfg_endnwifent(handle); 2133 zonecfg_fini_handle(handle); 2134 return (-1); 2135 } 2136 if (inet_pton(AF_INET6, nwiftab.zone_nwif_address, 2137 &in6) == 1) 2138 saw_v6 = B_TRUE; 2139 } 2140 (void) zonecfg_endnwifent(handle); 2141 } 2142 zonecfg_fini_handle(handle); 2143 (void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0", 2144 sizeof (loopback_iftab.zone_nwif_physical)); 2145 (void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1", 2146 sizeof (loopback_iftab.zone_nwif_address)); 2147 if (configure_one_interface(zlogp, zoneid, &loopback_iftab, NULL, NULL) 2148 != Z_OK) { 2149 return (-1); 2150 } 2151 if (saw_v6) { 2152 (void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128", 2153 sizeof (loopback_iftab.zone_nwif_address)); 2154 if (configure_one_interface(zlogp, zoneid, 2155 &loopback_iftab, NULL, NULL) != Z_OK) { 2156 return (-1); 2157 } 2158 } 2159 return (0); 2160 } 2161 2162 static int 2163 tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid, 2164 const struct sockaddr_storage *local, const struct sockaddr_storage *remote) 2165 { 2166 int fd; 2167 struct strioctl ioc; 2168 tcp_ioc_abort_conn_t conn; 2169 int error; 2170 2171 conn.ac_local = *local; 2172 conn.ac_remote = *remote; 2173 conn.ac_start = TCPS_SYN_SENT; 2174 conn.ac_end = TCPS_TIME_WAIT; 2175 conn.ac_zoneid = zoneid; 2176 2177 ioc.ic_cmd = TCP_IOC_ABORT_CONN; 2178 ioc.ic_timout = -1; /* infinite timeout */ 2179 ioc.ic_len = sizeof (conn); 2180 ioc.ic_dp = (char *)&conn; 2181 2182 if ((fd = open("/dev/tcp", O_RDONLY)) < 0) { 2183 zerror(zlogp, B_TRUE, "unable to open %s", "/dev/tcp"); 2184 return (-1); 2185 } 2186 2187 error = ioctl(fd, I_STR, &ioc); 2188 (void) close(fd); 2189 if (error == 0 || errno == ENOENT) /* ENOENT is not an error */ 2190 return (0); 2191 return (-1); 2192 } 2193 2194 static int 2195 tcp_abort_connections(zlog_t *zlogp, zoneid_t zoneid) 2196 { 2197 struct sockaddr_storage l, r; 2198 struct sockaddr_in *local, *remote; 2199 struct sockaddr_in6 *local6, *remote6; 2200 int error; 2201 2202 /* 2203 * Abort IPv4 connections. 2204 */ 2205 bzero(&l, sizeof (*local)); 2206 local = (struct sockaddr_in *)&l; 2207 local->sin_family = AF_INET; 2208 local->sin_addr.s_addr = INADDR_ANY; 2209 local->sin_port = 0; 2210 2211 bzero(&r, sizeof (*remote)); 2212 remote = (struct sockaddr_in *)&r; 2213 remote->sin_family = AF_INET; 2214 remote->sin_addr.s_addr = INADDR_ANY; 2215 remote->sin_port = 0; 2216 2217 if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0) 2218 return (error); 2219 2220 /* 2221 * Abort IPv6 connections. 2222 */ 2223 bzero(&l, sizeof (*local6)); 2224 local6 = (struct sockaddr_in6 *)&l; 2225 local6->sin6_family = AF_INET6; 2226 local6->sin6_port = 0; 2227 local6->sin6_addr = in6addr_any; 2228 2229 bzero(&r, sizeof (*remote6)); 2230 remote6 = (struct sockaddr_in6 *)&r; 2231 remote6->sin6_family = AF_INET6; 2232 remote6->sin6_port = 0; 2233 remote6->sin6_addr = in6addr_any; 2234 2235 if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0) 2236 return (error); 2237 return (0); 2238 } 2239 2240 static int 2241 get_privset(zlog_t *zlogp, priv_set_t *privs, boolean_t mount_cmd) 2242 { 2243 int error = -1; 2244 zone_dochandle_t handle; 2245 char *privname = NULL; 2246 2247 if (mount_cmd) { 2248 if (zonecfg_default_privset(privs) == Z_OK) 2249 return (0); 2250 zerror(zlogp, B_FALSE, 2251 "failed to determine the zone's default privilege set"); 2252 return (-1); 2253 } 2254 2255 if ((handle = zonecfg_init_handle()) == NULL) { 2256 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2257 return (-1); 2258 } 2259 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2260 zerror(zlogp, B_FALSE, "invalid configuration"); 2261 zonecfg_fini_handle(handle); 2262 return (-1); 2263 } 2264 2265 switch (zonecfg_get_privset(handle, privs, &privname)) { 2266 case Z_OK: 2267 error = 0; 2268 break; 2269 case Z_PRIV_PROHIBITED: 2270 zerror(zlogp, B_FALSE, "privilege \"%s\" is not permitted " 2271 "within the zone's privilege set", privname); 2272 break; 2273 case Z_PRIV_REQUIRED: 2274 zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing " 2275 "from the zone's privilege set", privname); 2276 break; 2277 case Z_PRIV_UNKNOWN: 2278 zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified " 2279 "in the zone's privilege set", privname); 2280 break; 2281 default: 2282 zerror(zlogp, B_FALSE, "failed to determine the zone's " 2283 "privilege set"); 2284 break; 2285 } 2286 2287 free(privname); 2288 zonecfg_fini_handle(handle); 2289 return (error); 2290 } 2291 2292 static int 2293 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) 2294 { 2295 nvlist_t *nvl = NULL; 2296 char *nvl_packed = NULL; 2297 size_t nvl_size = 0; 2298 nvlist_t **nvlv = NULL; 2299 int rctlcount = 0; 2300 int error = -1; 2301 zone_dochandle_t handle; 2302 struct zone_rctltab rctltab; 2303 rctlblk_t *rctlblk = NULL; 2304 2305 *bufp = NULL; 2306 *bufsizep = 0; 2307 2308 if ((handle = zonecfg_init_handle()) == NULL) { 2309 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2310 return (-1); 2311 } 2312 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2313 zerror(zlogp, B_FALSE, "invalid configuration"); 2314 zonecfg_fini_handle(handle); 2315 return (-1); 2316 } 2317 2318 rctltab.zone_rctl_valptr = NULL; 2319 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) { 2320 zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc"); 2321 goto out; 2322 } 2323 2324 if (zonecfg_setrctlent(handle) != Z_OK) { 2325 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent"); 2326 goto out; 2327 } 2328 2329 if ((rctlblk = malloc(rctlblk_size())) == NULL) { 2330 zerror(zlogp, B_TRUE, "memory allocation failed"); 2331 goto out; 2332 } 2333 while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) { 2334 struct zone_rctlvaltab *rctlval; 2335 uint_t i, count; 2336 const char *name = rctltab.zone_rctl_name; 2337 2338 /* zoneadm should have already warned about unknown rctls. */ 2339 if (!zonecfg_is_rctl(name)) { 2340 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 2341 rctltab.zone_rctl_valptr = NULL; 2342 continue; 2343 } 2344 count = 0; 2345 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL; 2346 rctlval = rctlval->zone_rctlval_next) { 2347 count++; 2348 } 2349 if (count == 0) { /* ignore */ 2350 continue; /* Nothing to free */ 2351 } 2352 if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL) 2353 goto out; 2354 i = 0; 2355 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL; 2356 rctlval = rctlval->zone_rctlval_next, i++) { 2357 if (nvlist_alloc(&nvlv[i], NV_UNIQUE_NAME, 0) != 0) { 2358 zerror(zlogp, B_TRUE, "%s failed", 2359 "nvlist_alloc"); 2360 goto out; 2361 } 2362 if (zonecfg_construct_rctlblk(rctlval, rctlblk) 2363 != Z_OK) { 2364 zerror(zlogp, B_FALSE, "invalid rctl value: " 2365 "(priv=%s,limit=%s,action=%s)", 2366 rctlval->zone_rctlval_priv, 2367 rctlval->zone_rctlval_limit, 2368 rctlval->zone_rctlval_action); 2369 goto out; 2370 } 2371 if (!zonecfg_valid_rctl(name, rctlblk)) { 2372 zerror(zlogp, B_FALSE, 2373 "(priv=%s,limit=%s,action=%s) is not a " 2374 "valid value for rctl '%s'", 2375 rctlval->zone_rctlval_priv, 2376 rctlval->zone_rctlval_limit, 2377 rctlval->zone_rctlval_action, 2378 name); 2379 goto out; 2380 } 2381 if (nvlist_add_uint64(nvlv[i], "privilege", 2382 rctlblk_get_privilege(rctlblk)) != 0) { 2383 zerror(zlogp, B_FALSE, "%s failed", 2384 "nvlist_add_uint64"); 2385 goto out; 2386 } 2387 if (nvlist_add_uint64(nvlv[i], "limit", 2388 rctlblk_get_value(rctlblk)) != 0) { 2389 zerror(zlogp, B_FALSE, "%s failed", 2390 "nvlist_add_uint64"); 2391 goto out; 2392 } 2393 if (nvlist_add_uint64(nvlv[i], "action", 2394 (uint_t)rctlblk_get_local_action(rctlblk, NULL)) 2395 != 0) { 2396 zerror(zlogp, B_FALSE, "%s failed", 2397 "nvlist_add_uint64"); 2398 goto out; 2399 } 2400 } 2401 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 2402 rctltab.zone_rctl_valptr = NULL; 2403 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count) 2404 != 0) { 2405 zerror(zlogp, B_FALSE, "%s failed", 2406 "nvlist_add_nvlist_array"); 2407 goto out; 2408 } 2409 for (i = 0; i < count; i++) 2410 nvlist_free(nvlv[i]); 2411 free(nvlv); 2412 nvlv = NULL; 2413 rctlcount++; 2414 } 2415 (void) zonecfg_endrctlent(handle); 2416 2417 if (rctlcount == 0) { 2418 error = 0; 2419 goto out; 2420 } 2421 if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0) 2422 != 0) { 2423 zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack"); 2424 goto out; 2425 } 2426 2427 error = 0; 2428 *bufp = nvl_packed; 2429 *bufsizep = nvl_size; 2430 2431 out: 2432 free(rctlblk); 2433 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 2434 if (error && nvl_packed != NULL) 2435 free(nvl_packed); 2436 if (nvl != NULL) 2437 nvlist_free(nvl); 2438 if (nvlv != NULL) 2439 free(nvlv); 2440 if (handle != NULL) 2441 zonecfg_fini_handle(handle); 2442 return (error); 2443 } 2444 2445 static int 2446 get_zone_pool(zlog_t *zlogp, char *poolbuf, size_t bufsz) 2447 { 2448 zone_dochandle_t handle; 2449 int error; 2450 2451 if ((handle = zonecfg_init_handle()) == NULL) { 2452 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2453 return (Z_NOMEM); 2454 } 2455 error = zonecfg_get_snapshot_handle(zone_name, handle); 2456 if (error != Z_OK) { 2457 zerror(zlogp, B_FALSE, "invalid configuration"); 2458 zonecfg_fini_handle(handle); 2459 return (error); 2460 } 2461 error = zonecfg_get_pool(handle, poolbuf, bufsz); 2462 zonecfg_fini_handle(handle); 2463 return (error); 2464 } 2465 2466 static int 2467 get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep) 2468 { 2469 zone_dochandle_t handle; 2470 struct zone_dstab dstab; 2471 size_t total, offset, len; 2472 int error = -1; 2473 char *str; 2474 2475 *bufp = NULL; 2476 *bufsizep = 0; 2477 2478 if ((handle = zonecfg_init_handle()) == NULL) { 2479 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2480 return (-1); 2481 } 2482 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2483 zerror(zlogp, B_FALSE, "invalid configuration"); 2484 zonecfg_fini_handle(handle); 2485 return (-1); 2486 } 2487 2488 if (zonecfg_setdsent(handle) != Z_OK) { 2489 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent"); 2490 goto out; 2491 } 2492 2493 total = 0; 2494 while (zonecfg_getdsent(handle, &dstab) == Z_OK) 2495 total += strlen(dstab.zone_dataset_name) + 1; 2496 (void) zonecfg_enddsent(handle); 2497 2498 if (total == 0) { 2499 error = 0; 2500 goto out; 2501 } 2502 2503 if ((str = malloc(total)) == NULL) { 2504 zerror(zlogp, B_TRUE, "memory allocation failed"); 2505 goto out; 2506 } 2507 2508 if (zonecfg_setdsent(handle) != Z_OK) { 2509 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent"); 2510 goto out; 2511 } 2512 offset = 0; 2513 while (zonecfg_getdsent(handle, &dstab) == Z_OK) { 2514 len = strlen(dstab.zone_dataset_name); 2515 (void) strlcpy(str + offset, dstab.zone_dataset_name, 2516 sizeof (dstab.zone_dataset_name) - offset); 2517 offset += len; 2518 if (offset != total - 1) 2519 str[offset++] = ','; 2520 } 2521 (void) zonecfg_enddsent(handle); 2522 2523 error = 0; 2524 *bufp = str; 2525 *bufsizep = total; 2526 2527 out: 2528 if (error != 0 && str != NULL) 2529 free(str); 2530 if (handle != NULL) 2531 zonecfg_fini_handle(handle); 2532 2533 return (error); 2534 } 2535 2536 static int 2537 validate_datasets(zlog_t *zlogp) 2538 { 2539 zone_dochandle_t handle; 2540 struct zone_dstab dstab; 2541 zfs_handle_t *zhp; 2542 libzfs_handle_t *hdl; 2543 2544 if ((handle = zonecfg_init_handle()) == NULL) { 2545 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2546 return (-1); 2547 } 2548 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2549 zerror(zlogp, B_FALSE, "invalid configuration"); 2550 zonecfg_fini_handle(handle); 2551 return (-1); 2552 } 2553 2554 if (zonecfg_setdsent(handle) != Z_OK) { 2555 zerror(zlogp, B_FALSE, "invalid configuration"); 2556 zonecfg_fini_handle(handle); 2557 return (-1); 2558 } 2559 2560 if ((hdl = libzfs_init()) == NULL) { 2561 zerror(zlogp, B_FALSE, "opening ZFS library"); 2562 zonecfg_fini_handle(handle); 2563 return (-1); 2564 } 2565 2566 while (zonecfg_getdsent(handle, &dstab) == Z_OK) { 2567 2568 if ((zhp = zfs_open(hdl, dstab.zone_dataset_name, 2569 ZFS_TYPE_FILESYSTEM)) == NULL) { 2570 zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'", 2571 dstab.zone_dataset_name); 2572 zonecfg_fini_handle(handle); 2573 libzfs_fini(hdl); 2574 return (-1); 2575 } 2576 2577 /* 2578 * Automatically set the 'zoned' property. We check the value 2579 * first because we'll get EPERM if it is already set. 2580 */ 2581 if (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) && 2582 zfs_prop_set(zhp, ZFS_PROP_ZONED, "on") != 0) { 2583 zerror(zlogp, B_FALSE, "cannot set 'zoned' " 2584 "property for ZFS dataset '%s'\n", 2585 dstab.zone_dataset_name); 2586 zonecfg_fini_handle(handle); 2587 zfs_close(zhp); 2588 libzfs_fini(hdl); 2589 return (-1); 2590 } 2591 2592 zfs_close(zhp); 2593 } 2594 (void) zonecfg_enddsent(handle); 2595 2596 zonecfg_fini_handle(handle); 2597 libzfs_fini(hdl); 2598 2599 return (0); 2600 } 2601 2602 static int 2603 bind_to_pool(zlog_t *zlogp, zoneid_t zoneid) 2604 { 2605 pool_conf_t *poolconf; 2606 pool_t *pool; 2607 char poolname[MAXPATHLEN]; 2608 int status; 2609 int error; 2610 2611 /* 2612 * Find the pool mentioned in the zone configuration, and bind to it. 2613 */ 2614 error = get_zone_pool(zlogp, poolname, sizeof (poolname)); 2615 if (error == Z_NO_ENTRY || (error == Z_OK && strlen(poolname) == 0)) { 2616 /* 2617 * The property is not set on the zone, so the pool 2618 * should be bound to the default pool. But that's 2619 * already done by the kernel, so we can just return. 2620 */ 2621 return (0); 2622 } 2623 if (error != Z_OK) { 2624 /* 2625 * Not an error, even though it shouldn't be happening. 2626 */ 2627 zerror(zlogp, B_FALSE, 2628 "WARNING: unable to retrieve default pool."); 2629 return (0); 2630 } 2631 /* 2632 * Don't do anything if pools aren't enabled. 2633 */ 2634 if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED) { 2635 zerror(zlogp, B_FALSE, "WARNING: pools facility not active; " 2636 "zone will not be bound to pool '%s'.", poolname); 2637 return (0); 2638 } 2639 /* 2640 * Try to provide a sane error message if the requested pool doesn't 2641 * exist. 2642 */ 2643 if ((poolconf = pool_conf_alloc()) == NULL) { 2644 zerror(zlogp, B_FALSE, "%s failed", "pool_conf_alloc"); 2645 return (-1); 2646 } 2647 if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) != 2648 PO_SUCCESS) { 2649 zerror(zlogp, B_FALSE, "%s failed", "pool_conf_open"); 2650 pool_conf_free(poolconf); 2651 return (-1); 2652 } 2653 pool = pool_get_pool(poolconf, poolname); 2654 (void) pool_conf_close(poolconf); 2655 pool_conf_free(poolconf); 2656 if (pool == NULL) { 2657 zerror(zlogp, B_FALSE, "WARNING: pool '%s' not found; " 2658 "using default pool.", poolname); 2659 return (0); 2660 } 2661 /* 2662 * Bind the zone to the pool. 2663 */ 2664 if (pool_set_binding(poolname, P_ZONEID, zoneid) != PO_SUCCESS) { 2665 zerror(zlogp, B_FALSE, "WARNING: unable to bind to pool '%s'; " 2666 "using default pool.", poolname); 2667 } 2668 return (0); 2669 } 2670 2671 /* 2672 * Mount lower level home directories into/from current zone 2673 * Share exported directories specified in dfstab for zone 2674 */ 2675 static int 2676 tsol_mounts(zlog_t *zlogp, char *zone_name, char *rootpath) 2677 { 2678 zoneid_t *zids = NULL; 2679 priv_set_t *zid_privs; 2680 const priv_impl_info_t *ip = NULL; 2681 uint_t nzents_saved; 2682 uint_t nzents; 2683 int i; 2684 char readonly[] = "ro"; 2685 struct zone_fstab lower_fstab; 2686 char *argv[4]; 2687 2688 if (!is_system_labeled()) 2689 return (0); 2690 2691 if (zid_label == NULL) { 2692 zid_label = m_label_alloc(MAC_LABEL); 2693 if (zid_label == NULL) 2694 return (-1); 2695 } 2696 2697 /* Make sure our zone has an /export/home dir */ 2698 (void) make_one_dir(zlogp, rootpath, "/export/home", 2699 DEFAULT_DIR_MODE); 2700 2701 lower_fstab.zone_fs_raw[0] = '\0'; 2702 (void) strlcpy(lower_fstab.zone_fs_type, MNTTYPE_LOFS, 2703 sizeof (lower_fstab.zone_fs_type)); 2704 lower_fstab.zone_fs_options = NULL; 2705 (void) zonecfg_add_fs_option(&lower_fstab, readonly); 2706 2707 /* 2708 * Get the list of zones from the kernel 2709 */ 2710 if (zone_list(NULL, &nzents) != 0) { 2711 zerror(zlogp, B_TRUE, "unable to list zones"); 2712 zonecfg_free_fs_option_list(lower_fstab.zone_fs_options); 2713 return (-1); 2714 } 2715 again: 2716 if (nzents == 0) { 2717 zonecfg_free_fs_option_list(lower_fstab.zone_fs_options); 2718 return (-1); 2719 } 2720 2721 zids = malloc(nzents * sizeof (zoneid_t)); 2722 if (zids == NULL) { 2723 zerror(zlogp, B_TRUE, "memory allocation failed"); 2724 return (-1); 2725 } 2726 nzents_saved = nzents; 2727 2728 if (zone_list(zids, &nzents) != 0) { 2729 zerror(zlogp, B_TRUE, "unable to list zones"); 2730 zonecfg_free_fs_option_list(lower_fstab.zone_fs_options); 2731 free(zids); 2732 return (-1); 2733 } 2734 if (nzents != nzents_saved) { 2735 /* list changed, try again */ 2736 free(zids); 2737 goto again; 2738 } 2739 2740 ip = getprivimplinfo(); 2741 if ((zid_privs = priv_allocset()) == NULL) { 2742 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 2743 zonecfg_free_fs_option_list( 2744 lower_fstab.zone_fs_options); 2745 free(zids); 2746 return (-1); 2747 } 2748 2749 for (i = 0; i < nzents; i++) { 2750 char zid_name[ZONENAME_MAX]; 2751 zone_state_t zid_state; 2752 char zid_rpath[MAXPATHLEN]; 2753 struct stat stat_buf; 2754 2755 if (zids[i] == GLOBAL_ZONEID) 2756 continue; 2757 2758 if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1) 2759 continue; 2760 2761 /* 2762 * Do special setup for the zone we are booting 2763 */ 2764 if (strcmp(zid_name, zone_name) == 0) { 2765 struct zone_fstab autofs_fstab; 2766 char map_path[MAXPATHLEN]; 2767 int fd; 2768 2769 /* 2770 * Create auto_home_<zone> map for this zone 2771 * in the global zone. The local zone entry 2772 * will be created by automount when the zone 2773 * is booted. 2774 */ 2775 2776 (void) snprintf(autofs_fstab.zone_fs_special, 2777 MAXPATHLEN, "auto_home_%s", zid_name); 2778 2779 (void) snprintf(autofs_fstab.zone_fs_dir, MAXPATHLEN, 2780 "/zone/%s/home", zid_name); 2781 2782 (void) snprintf(map_path, sizeof (map_path), 2783 "/etc/%s", autofs_fstab.zone_fs_special); 2784 /* 2785 * If the map file doesn't exist create a template 2786 */ 2787 if ((fd = open(map_path, O_RDWR | O_CREAT | O_EXCL, 2788 S_IRUSR | S_IWUSR | S_IRGRP| S_IROTH)) != -1) { 2789 int len; 2790 char map_rec[MAXPATHLEN]; 2791 2792 len = snprintf(map_rec, sizeof (map_rec), 2793 "+%s\n*\t-fstype=lofs\t:%s/export/home/&\n", 2794 autofs_fstab.zone_fs_special, rootpath); 2795 (void) write(fd, map_rec, len); 2796 (void) close(fd); 2797 } 2798 2799 /* 2800 * Mount auto_home_<zone> in the global zone if absent. 2801 * If it's already of type autofs, then 2802 * don't mount it again. 2803 */ 2804 if ((stat(autofs_fstab.zone_fs_dir, &stat_buf) == -1) || 2805 strcmp(stat_buf.st_fstype, MNTTYPE_AUTOFS) != 0) { 2806 char optstr[] = "indirect,ignore,nobrowse"; 2807 2808 (void) make_one_dir(zlogp, "", 2809 autofs_fstab.zone_fs_dir, DEFAULT_DIR_MODE); 2810 2811 /* 2812 * Mount will fail if automounter has already 2813 * processed the auto_home_<zonename> map 2814 */ 2815 (void) domount(zlogp, MNTTYPE_AUTOFS, optstr, 2816 autofs_fstab.zone_fs_special, 2817 autofs_fstab.zone_fs_dir); 2818 } 2819 continue; 2820 } 2821 2822 2823 if (zone_get_state(zid_name, &zid_state) != Z_OK || 2824 (zid_state != ZONE_STATE_READY && 2825 zid_state != ZONE_STATE_RUNNING)) 2826 /* Skip over zones without mounted filesystems */ 2827 continue; 2828 2829 if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label, 2830 sizeof (m_label_t)) < 0) 2831 /* Skip over zones with unspecified label */ 2832 continue; 2833 2834 if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath, 2835 sizeof (zid_rpath)) == -1) 2836 /* Skip over zones with bad path */ 2837 continue; 2838 2839 if (zone_getattr(zids[i], ZONE_ATTR_PRIVSET, zid_privs, 2840 sizeof (priv_chunk_t) * ip->priv_setsize) == -1) 2841 /* Skip over zones with bad privs */ 2842 continue; 2843 2844 /* 2845 * Reading down is valid according to our label model 2846 * but some customers want to disable it because it 2847 * allows execute down and other possible attacks. 2848 * Therefore, we restrict this feature to zones that 2849 * have the NET_MAC_AWARE privilege which is required 2850 * for NFS read-down semantics. 2851 */ 2852 if ((bldominates(zlabel, zid_label)) && 2853 (priv_ismember(zprivs, PRIV_NET_MAC_AWARE))) { 2854 /* 2855 * Our zone dominates this one. 2856 * Create a lofs mount from lower zone's /export/home 2857 */ 2858 (void) snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN, 2859 "%s/zone/%s/export/home", rootpath, zid_name); 2860 2861 /* 2862 * If the target is already an LOFS mount 2863 * then don't do it again. 2864 */ 2865 if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) || 2866 strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) { 2867 2868 if (snprintf(lower_fstab.zone_fs_special, 2869 MAXPATHLEN, "%s/export", 2870 zid_rpath) > MAXPATHLEN) 2871 continue; 2872 2873 /* 2874 * Make sure the lower-level home exists 2875 */ 2876 if (make_one_dir(zlogp, 2877 lower_fstab.zone_fs_special, 2878 "/home", DEFAULT_DIR_MODE) != 0) 2879 continue; 2880 2881 (void) strlcat(lower_fstab.zone_fs_special, 2882 "/home", MAXPATHLEN); 2883 2884 /* 2885 * Mount can fail because the lower-level 2886 * zone may have already done a mount up. 2887 */ 2888 (void) mount_one(zlogp, &lower_fstab, ""); 2889 } 2890 } else if ((bldominates(zid_label, zlabel)) && 2891 (priv_ismember(zid_privs, PRIV_NET_MAC_AWARE))) { 2892 /* 2893 * This zone dominates our zone. 2894 * Create a lofs mount from our zone's /export/home 2895 */ 2896 if (snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN, 2897 "%s/zone/%s/export/home", zid_rpath, 2898 zone_name) > MAXPATHLEN) 2899 continue; 2900 2901 /* 2902 * If the target is already an LOFS mount 2903 * then don't do it again. 2904 */ 2905 if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) || 2906 strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) { 2907 2908 (void) snprintf(lower_fstab.zone_fs_special, 2909 MAXPATHLEN, "%s/export/home", rootpath); 2910 2911 /* 2912 * Mount can fail because the higher-level 2913 * zone may have already done a mount down. 2914 */ 2915 (void) mount_one(zlogp, &lower_fstab, ""); 2916 } 2917 } 2918 } 2919 zonecfg_free_fs_option_list(lower_fstab.zone_fs_options); 2920 priv_freeset(zid_privs); 2921 free(zids); 2922 2923 /* 2924 * Now share any exported directories from this zone. 2925 * Each zone can have its own dfstab. 2926 */ 2927 2928 argv[0] = "zoneshare"; 2929 argv[1] = "-z"; 2930 argv[2] = zone_name; 2931 argv[3] = NULL; 2932 2933 (void) forkexec(zlogp, "/usr/lib/zones/zoneshare", argv); 2934 /* Don't check for errors since they don't affect the zone */ 2935 2936 return (0); 2937 } 2938 2939 /* 2940 * Unmount lofs mounts from higher level zones 2941 * Unshare nfs exported directories 2942 */ 2943 static void 2944 tsol_unmounts(zlog_t *zlogp, char *zone_name) 2945 { 2946 zoneid_t *zids = NULL; 2947 uint_t nzents_saved; 2948 uint_t nzents; 2949 int i; 2950 char *argv[4]; 2951 char path[MAXPATHLEN]; 2952 2953 if (!is_system_labeled()) 2954 return; 2955 2956 /* 2957 * Get the list of zones from the kernel 2958 */ 2959 if (zone_list(NULL, &nzents) != 0) { 2960 return; 2961 } 2962 2963 if (zid_label == NULL) { 2964 zid_label = m_label_alloc(MAC_LABEL); 2965 if (zid_label == NULL) 2966 return; 2967 } 2968 2969 again: 2970 if (nzents == 0) 2971 return; 2972 2973 zids = malloc(nzents * sizeof (zoneid_t)); 2974 if (zids == NULL) { 2975 zerror(zlogp, B_TRUE, "memory allocation failed"); 2976 return; 2977 } 2978 nzents_saved = nzents; 2979 2980 if (zone_list(zids, &nzents) != 0) { 2981 free(zids); 2982 return; 2983 } 2984 if (nzents != nzents_saved) { 2985 /* list changed, try again */ 2986 free(zids); 2987 goto again; 2988 } 2989 2990 for (i = 0; i < nzents; i++) { 2991 char zid_name[ZONENAME_MAX]; 2992 zone_state_t zid_state; 2993 char zid_rpath[MAXPATHLEN]; 2994 2995 if (zids[i] == GLOBAL_ZONEID) 2996 continue; 2997 2998 if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1) 2999 continue; 3000 3001 /* 3002 * Skip the zone we are halting 3003 */ 3004 if (strcmp(zid_name, zone_name) == 0) 3005 continue; 3006 3007 if ((zone_getattr(zids[i], ZONE_ATTR_STATUS, &zid_state, 3008 sizeof (zid_state)) < 0) || 3009 (zid_state < ZONE_IS_READY)) 3010 /* Skip over zones without mounted filesystems */ 3011 continue; 3012 3013 if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label, 3014 sizeof (m_label_t)) < 0) 3015 /* Skip over zones with unspecified label */ 3016 continue; 3017 3018 if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath, 3019 sizeof (zid_rpath)) == -1) 3020 /* Skip over zones with bad path */ 3021 continue; 3022 3023 if (zlabel != NULL && bldominates(zid_label, zlabel)) { 3024 /* 3025 * This zone dominates our zone. 3026 * Unmount the lofs mount of our zone's /export/home 3027 */ 3028 3029 if (snprintf(path, MAXPATHLEN, 3030 "%s/zone/%s/export/home", zid_rpath, 3031 zone_name) > MAXPATHLEN) 3032 continue; 3033 3034 /* Skip over mount failures */ 3035 (void) umount(path); 3036 } 3037 } 3038 free(zids); 3039 3040 /* 3041 * Unmount global zone autofs trigger for this zone 3042 */ 3043 (void) snprintf(path, MAXPATHLEN, "/zone/%s/home", zone_name); 3044 /* Skip over mount failures */ 3045 (void) umount(path); 3046 3047 /* 3048 * Next unshare any exported directories from this zone. 3049 */ 3050 3051 argv[0] = "zoneunshare"; 3052 argv[1] = "-z"; 3053 argv[2] = zone_name; 3054 argv[3] = NULL; 3055 3056 (void) forkexec(zlogp, "/usr/lib/zones/zoneunshare", argv); 3057 /* Don't check for errors since they don't affect the zone */ 3058 3059 /* 3060 * Finally, deallocate any devices in the zone. 3061 */ 3062 3063 argv[0] = "deallocate"; 3064 argv[1] = "-Isz"; 3065 argv[2] = zone_name; 3066 argv[3] = NULL; 3067 3068 (void) forkexec(zlogp, "/usr/sbin/deallocate", argv); 3069 /* Don't check for errors since they don't affect the zone */ 3070 } 3071 3072 /* 3073 * Fetch the Trusted Extensions label and multi-level ports (MLPs) for 3074 * this zone. 3075 */ 3076 static tsol_zcent_t * 3077 get_zone_label(zlog_t *zlogp, priv_set_t *privs) 3078 { 3079 FILE *fp; 3080 tsol_zcent_t *zcent = NULL; 3081 char line[MAXTNZLEN]; 3082 3083 if ((fp = fopen(TNZONECFG_PATH, "r")) == NULL) { 3084 zerror(zlogp, B_TRUE, "%s", TNZONECFG_PATH); 3085 return (NULL); 3086 } 3087 3088 while (fgets(line, sizeof (line), fp) != NULL) { 3089 /* 3090 * Check for malformed database 3091 */ 3092 if (strlen(line) == MAXTNZLEN - 1) 3093 break; 3094 if ((zcent = tsol_sgetzcent(line, NULL, NULL)) == NULL) 3095 continue; 3096 if (strcmp(zcent->zc_name, zone_name) == 0) 3097 break; 3098 tsol_freezcent(zcent); 3099 zcent = NULL; 3100 } 3101 (void) fclose(fp); 3102 3103 if (zcent == NULL) { 3104 zerror(zlogp, B_FALSE, "zone requires a label assignment. " 3105 "See tnzonecfg(4)"); 3106 } else { 3107 if (zlabel == NULL) 3108 zlabel = m_label_alloc(MAC_LABEL); 3109 /* 3110 * Save this zone's privileges for later read-down processing 3111 */ 3112 if ((zprivs = priv_allocset()) == NULL) { 3113 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 3114 return (NULL); 3115 } else { 3116 priv_copyset(privs, zprivs); 3117 } 3118 } 3119 return (zcent); 3120 } 3121 3122 /* 3123 * Add the Trusted Extensions multi-level ports for this zone. 3124 */ 3125 static void 3126 set_mlps(zlog_t *zlogp, zoneid_t zoneid, tsol_zcent_t *zcent) 3127 { 3128 tsol_mlp_t *mlp; 3129 tsol_mlpent_t tsme; 3130 3131 if (!is_system_labeled()) 3132 return; 3133 3134 tsme.tsme_zoneid = zoneid; 3135 tsme.tsme_flags = 0; 3136 for (mlp = zcent->zc_private_mlp; !TSOL_MLP_END(mlp); mlp++) { 3137 tsme.tsme_mlp = *mlp; 3138 if (tnmlp(TNDB_LOAD, &tsme) != 0) { 3139 zerror(zlogp, B_TRUE, "cannot set zone-specific MLP " 3140 "on %d-%d/%d", mlp->mlp_port, 3141 mlp->mlp_port_upper, mlp->mlp_ipp); 3142 } 3143 } 3144 3145 tsme.tsme_flags = TSOL_MEF_SHARED; 3146 for (mlp = zcent->zc_shared_mlp; !TSOL_MLP_END(mlp); mlp++) { 3147 tsme.tsme_mlp = *mlp; 3148 if (tnmlp(TNDB_LOAD, &tsme) != 0) { 3149 zerror(zlogp, B_TRUE, "cannot set shared MLP " 3150 "on %d-%d/%d", mlp->mlp_port, 3151 mlp->mlp_port_upper, mlp->mlp_ipp); 3152 } 3153 } 3154 } 3155 3156 static void 3157 remove_mlps(zlog_t *zlogp, zoneid_t zoneid) 3158 { 3159 tsol_mlpent_t tsme; 3160 3161 if (!is_system_labeled()) 3162 return; 3163 3164 (void) memset(&tsme, 0, sizeof (tsme)); 3165 tsme.tsme_zoneid = zoneid; 3166 if (tnmlp(TNDB_FLUSH, &tsme) != 0) 3167 zerror(zlogp, B_TRUE, "cannot flush MLPs"); 3168 } 3169 3170 int 3171 prtmount(const char *fs, void *x) { 3172 zerror((zlog_t *)x, B_FALSE, " %s", fs); 3173 return (0); 3174 } 3175 3176 /* 3177 * Look for zones running on the main system that are using this root (or any 3178 * subdirectory of it). Return B_TRUE and print an error if a conflicting zone 3179 * is found or if we can't tell. 3180 */ 3181 static boolean_t 3182 duplicate_zone_root(zlog_t *zlogp, const char *rootpath) 3183 { 3184 zoneid_t *zids = NULL; 3185 uint_t nzids = 0; 3186 boolean_t retv; 3187 int rlen, zlen; 3188 char zroot[MAXPATHLEN]; 3189 char zonename[ZONENAME_MAX]; 3190 3191 for (;;) { 3192 nzids += 10; 3193 zids = malloc(nzids * sizeof (*zids)); 3194 if (zids == NULL) { 3195 zerror(zlogp, B_TRUE, "memory allocation failed"); 3196 return (B_TRUE); 3197 } 3198 if (zone_list(zids, &nzids) == 0) 3199 break; 3200 free(zids); 3201 } 3202 retv = B_FALSE; 3203 rlen = strlen(rootpath); 3204 while (nzids > 0) { 3205 /* 3206 * Ignore errors; they just mean that the zone has disappeared 3207 * while we were busy. 3208 */ 3209 if (zone_getattr(zids[--nzids], ZONE_ATTR_ROOT, zroot, 3210 sizeof (zroot)) == -1) 3211 continue; 3212 zlen = strlen(zroot); 3213 if (zlen > rlen) 3214 zlen = rlen; 3215 if (strncmp(rootpath, zroot, zlen) == 0 && 3216 (zroot[zlen] == '\0' || zroot[zlen] == '/') && 3217 (rootpath[zlen] == '\0' || rootpath[zlen] == '/')) { 3218 if (getzonenamebyid(zids[nzids], zonename, 3219 sizeof (zonename)) == -1) 3220 (void) snprintf(zonename, sizeof (zonename), 3221 "id %d", (int)zids[nzids]); 3222 zerror(zlogp, B_FALSE, 3223 "zone root %s already in use by zone %s", 3224 rootpath, zonename); 3225 retv = B_TRUE; 3226 break; 3227 } 3228 } 3229 free(zids); 3230 return (retv); 3231 } 3232 3233 /* 3234 * Search for loopback mounts that use this same source node (same device and 3235 * inode). Return B_TRUE if there is one or if we can't tell. 3236 */ 3237 static boolean_t 3238 duplicate_reachable_path(zlog_t *zlogp, const char *rootpath) 3239 { 3240 struct stat64 rst, zst; 3241 struct mnttab *mnp; 3242 3243 if (stat64(rootpath, &rst) == -1) { 3244 zerror(zlogp, B_TRUE, "can't stat %s", rootpath); 3245 return (B_TRUE); 3246 } 3247 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 3248 return (B_TRUE); 3249 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) { 3250 if (mnp->mnt_fstype == NULL || 3251 strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0) 3252 continue; 3253 /* We're looking at a loopback mount. Stat it. */ 3254 if (mnp->mnt_special != NULL && 3255 stat64(mnp->mnt_special, &zst) != -1 && 3256 rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) { 3257 zerror(zlogp, B_FALSE, 3258 "zone root %s is reachable through %s", 3259 rootpath, mnp->mnt_mountp); 3260 return (B_TRUE); 3261 } 3262 } 3263 return (B_FALSE); 3264 } 3265 3266 zoneid_t 3267 vplat_create(zlog_t *zlogp, boolean_t mount_cmd) 3268 { 3269 zoneid_t rval = -1; 3270 priv_set_t *privs; 3271 char rootpath[MAXPATHLEN]; 3272 char *rctlbuf = NULL; 3273 size_t rctlbufsz = 0; 3274 char *zfsbuf = NULL; 3275 size_t zfsbufsz = 0; 3276 zoneid_t zoneid = -1; 3277 int xerr; 3278 char *kzone; 3279 FILE *fp = NULL; 3280 tsol_zcent_t *zcent = NULL; 3281 int match = 0; 3282 int doi = 0; 3283 3284 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { 3285 zerror(zlogp, B_TRUE, "unable to determine zone root"); 3286 return (-1); 3287 } 3288 if (zonecfg_in_alt_root()) 3289 resolve_lofs(zlogp, rootpath, sizeof (rootpath)); 3290 3291 if ((privs = priv_allocset()) == NULL) { 3292 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 3293 return (-1); 3294 } 3295 priv_emptyset(privs); 3296 if (get_privset(zlogp, privs, mount_cmd) != 0) 3297 goto error; 3298 3299 if (!mount_cmd && get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) { 3300 zerror(zlogp, B_FALSE, "Unable to get list of rctls"); 3301 goto error; 3302 } 3303 3304 if (get_datasets(zlogp, &zfsbuf, &zfsbufsz) != 0) { 3305 zerror(zlogp, B_FALSE, "Unable to get list of ZFS datasets"); 3306 goto error; 3307 } 3308 3309 if (!mount_cmd && is_system_labeled()) { 3310 zcent = get_zone_label(zlogp, privs); 3311 if (zcent != NULL) { 3312 match = zcent->zc_match; 3313 doi = zcent->zc_doi; 3314 *zlabel = zcent->zc_label; 3315 } else { 3316 goto error; 3317 } 3318 } 3319 3320 kzone = zone_name; 3321 3322 /* 3323 * We must do this scan twice. First, we look for zones running on the 3324 * main system that are using this root (or any subdirectory of it). 3325 * Next, we reduce to the shortest path and search for loopback mounts 3326 * that use this same source node (same device and inode). 3327 */ 3328 if (duplicate_zone_root(zlogp, rootpath)) 3329 goto error; 3330 if (duplicate_reachable_path(zlogp, rootpath)) 3331 goto error; 3332 3333 if (mount_cmd) { 3334 root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE); 3335 3336 /* 3337 * Forge up a special root for this zone. When a zone is 3338 * mounted, we can't let the zone have its own root because the 3339 * tools that will be used in this "scratch zone" need access 3340 * to both the zone's resources and the running machine's 3341 * executables. 3342 * 3343 * Note that the mkdir here also catches read-only filesystems. 3344 */ 3345 if (mkdir(rootpath, 0755) != 0 && errno != EEXIST) { 3346 zerror(zlogp, B_TRUE, "cannot create %s", rootpath); 3347 goto error; 3348 } 3349 if (domount(zlogp, "tmpfs", "", "swap", rootpath) != 0) 3350 goto error; 3351 } 3352 3353 if (zonecfg_in_alt_root()) { 3354 /* 3355 * If we are mounting up a zone in an alternate root partition, 3356 * then we have some additional work to do before starting the 3357 * zone. First, resolve the root path down so that we're not 3358 * fooled by duplicates. Then forge up an internal name for 3359 * the zone. 3360 */ 3361 if ((fp = zonecfg_open_scratch("", B_TRUE)) == NULL) { 3362 zerror(zlogp, B_TRUE, "cannot open mapfile"); 3363 goto error; 3364 } 3365 if (zonecfg_lock_scratch(fp) != 0) { 3366 zerror(zlogp, B_TRUE, "cannot lock mapfile"); 3367 goto error; 3368 } 3369 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(), 3370 NULL, 0) == 0) { 3371 zerror(zlogp, B_FALSE, "scratch zone already running"); 3372 goto error; 3373 } 3374 /* This is the preferred name */ 3375 (void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s", 3376 zone_name); 3377 srandom(getpid()); 3378 while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL, 3379 0) == 0) { 3380 /* This is just an arbitrary name; note "." usage */ 3381 (void) snprintf(kernzone, sizeof (kernzone), 3382 "SUNWlu.%08lX%08lX", random(), random()); 3383 } 3384 kzone = kernzone; 3385 } 3386 3387 xerr = 0; 3388 if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf, 3389 rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel)) == -1) { 3390 if (xerr == ZE_AREMOUNTS) { 3391 if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) { 3392 zerror(zlogp, B_FALSE, 3393 "An unknown file-system is mounted on " 3394 "a subdirectory of %s", rootpath); 3395 } else { 3396 3397 zerror(zlogp, B_FALSE, 3398 "These file-systems are mounted on " 3399 "subdirectories of %s:", rootpath); 3400 (void) zonecfg_find_mounts(rootpath, 3401 prtmount, zlogp); 3402 } 3403 } else if (xerr == ZE_CHROOTED) { 3404 zerror(zlogp, B_FALSE, "%s: " 3405 "cannot create a zone from a chrooted " 3406 "environment", "zone_create"); 3407 } else { 3408 zerror(zlogp, B_TRUE, "%s failed", "zone_create"); 3409 } 3410 goto error; 3411 } 3412 3413 if (zonecfg_in_alt_root() && 3414 zonecfg_add_scratch(fp, zone_name, kernzone, 3415 zonecfg_get_root()) == -1) { 3416 zerror(zlogp, B_TRUE, "cannot add mapfile entry"); 3417 goto error; 3418 } 3419 3420 /* 3421 * The following is a warning, not an error, and is not performed when 3422 * merely mounting a zone for administrative use. 3423 */ 3424 if (!mount_cmd && bind_to_pool(zlogp, zoneid) != 0) 3425 zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to " 3426 "requested pool; using default pool."); 3427 if (!mount_cmd) 3428 set_mlps(zlogp, zoneid, zcent); 3429 rval = zoneid; 3430 zoneid = -1; 3431 3432 error: 3433 if (zoneid != -1) 3434 (void) zone_destroy(zoneid); 3435 if (rctlbuf != NULL) 3436 free(rctlbuf); 3437 priv_freeset(privs); 3438 if (fp != NULL) 3439 zonecfg_close_scratch(fp); 3440 lofs_discard_mnttab(); 3441 if (zcent != NULL) 3442 tsol_freezcent(zcent); 3443 return (rval); 3444 } 3445 3446 /* 3447 * Enter the zone and write a /etc/zones/index file there. This allows 3448 * libzonecfg (and thus zoneadm) to report the UUID and potentially other zone 3449 * details from inside the zone. 3450 */ 3451 static void 3452 write_index_file(zoneid_t zoneid) 3453 { 3454 FILE *zef; 3455 FILE *zet; 3456 struct zoneent *zep; 3457 pid_t child; 3458 int tmpl_fd; 3459 ctid_t ct; 3460 int fd; 3461 char uuidstr[UUID_PRINTABLE_STRING_LENGTH]; 3462 3463 /* Locate the zone entry in the global zone's index file */ 3464 if ((zef = setzoneent()) == NULL) 3465 return; 3466 while ((zep = getzoneent_private(zef)) != NULL) { 3467 if (strcmp(zep->zone_name, zone_name) == 0) 3468 break; 3469 free(zep); 3470 } 3471 endzoneent(zef); 3472 if (zep == NULL) 3473 return; 3474 3475 if ((tmpl_fd = init_template()) == -1) { 3476 free(zep); 3477 return; 3478 } 3479 3480 if ((child = fork()) == -1) { 3481 (void) ct_tmpl_clear(tmpl_fd); 3482 (void) close(tmpl_fd); 3483 free(zep); 3484 return; 3485 } 3486 3487 /* parent waits for child to finish */ 3488 if (child != 0) { 3489 free(zep); 3490 if (contract_latest(&ct) == -1) 3491 ct = -1; 3492 (void) ct_tmpl_clear(tmpl_fd); 3493 (void) close(tmpl_fd); 3494 (void) waitpid(child, NULL, 0); 3495 (void) contract_abandon_id(ct); 3496 return; 3497 } 3498 3499 /* child enters zone and sets up index file */ 3500 (void) ct_tmpl_clear(tmpl_fd); 3501 if (zone_enter(zoneid) != -1) { 3502 (void) mkdir(ZONE_CONFIG_ROOT, ZONE_CONFIG_MODE); 3503 (void) chown(ZONE_CONFIG_ROOT, ZONE_CONFIG_UID, 3504 ZONE_CONFIG_GID); 3505 fd = open(ZONE_INDEX_FILE, O_WRONLY|O_CREAT|O_TRUNC, 3506 ZONE_INDEX_MODE); 3507 if (fd != -1 && (zet = fdopen(fd, "w")) != NULL) { 3508 (void) fchown(fd, ZONE_INDEX_UID, ZONE_INDEX_GID); 3509 if (uuid_is_null(zep->zone_uuid)) 3510 uuidstr[0] = '\0'; 3511 else 3512 uuid_unparse(zep->zone_uuid, uuidstr); 3513 (void) fprintf(zet, "%s:%s:/:%s\n", zep->zone_name, 3514 zone_state_str(zep->zone_state), 3515 uuidstr); 3516 (void) fclose(zet); 3517 } 3518 } 3519 _exit(0); 3520 } 3521 3522 int 3523 vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd, zoneid_t zoneid) 3524 { 3525 3526 if (!mount_cmd && validate_datasets(zlogp) != 0) { 3527 lofs_discard_mnttab(); 3528 return (-1); 3529 } 3530 3531 if (mount_filesystems(zlogp, mount_cmd) != 0) { 3532 lofs_discard_mnttab(); 3533 return (-1); 3534 } 3535 3536 /* mount /dev for zone (both normal and scratch zone) */ 3537 if (vplat_mount_dev(zlogp) != 0) { 3538 lofs_discard_mnttab(); 3539 return (-1); 3540 } 3541 3542 if (!mount_cmd && configure_network_interfaces(zlogp) != 0) { 3543 lofs_discard_mnttab(); 3544 return (-1); 3545 } 3546 3547 write_index_file(zoneid); 3548 3549 lofs_discard_mnttab(); 3550 return (0); 3551 } 3552 3553 static int 3554 lu_root_teardown(zlog_t *zlogp) 3555 { 3556 char zroot[MAXPATHLEN]; 3557 3558 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 3559 zerror(zlogp, B_FALSE, "unable to determine zone root"); 3560 return (-1); 3561 } 3562 root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE); 3563 3564 /* 3565 * At this point, the processes are gone, the filesystems (save the 3566 * root) are unmounted, and the zone is on death row. But there may 3567 * still be creds floating about in the system that reference the 3568 * zone_t, and which pin down zone_rootvp causing this call to fail 3569 * with EBUSY. Thus, we try for a little while before just giving up. 3570 * (How I wish this were not true, and umount2 just did the right 3571 * thing, or tmpfs supported MS_FORCE This is a gross hack.) 3572 */ 3573 if (umount2(zroot, MS_FORCE) != 0) { 3574 if (errno == ENOTSUP && umount2(zroot, 0) == 0) 3575 goto unmounted; 3576 if (errno == EBUSY) { 3577 int tries = 10; 3578 3579 while (--tries >= 0) { 3580 (void) sleep(1); 3581 if (umount2(zroot, 0) == 0) 3582 goto unmounted; 3583 if (errno != EBUSY) 3584 break; 3585 } 3586 } 3587 zerror(zlogp, B_TRUE, "unable to unmount '%s'", zroot); 3588 return (-1); 3589 } 3590 unmounted: 3591 3592 /* 3593 * Only zones in an alternate root environment have scratch zone 3594 * entries. 3595 */ 3596 if (zonecfg_in_alt_root()) { 3597 FILE *fp; 3598 int retv; 3599 3600 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) { 3601 zerror(zlogp, B_TRUE, "cannot open mapfile"); 3602 return (-1); 3603 } 3604 retv = -1; 3605 if (zonecfg_lock_scratch(fp) != 0) 3606 zerror(zlogp, B_TRUE, "cannot lock mapfile"); 3607 else if (zonecfg_delete_scratch(fp, kernzone) != 0) 3608 zerror(zlogp, B_TRUE, "cannot delete map entry"); 3609 else 3610 retv = 0; 3611 zonecfg_close_scratch(fp); 3612 return (retv); 3613 } else { 3614 return (0); 3615 } 3616 } 3617 3618 int 3619 vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd) 3620 { 3621 char *kzone; 3622 zoneid_t zoneid; 3623 3624 kzone = zone_name; 3625 if (zonecfg_in_alt_root()) { 3626 FILE *fp; 3627 3628 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) { 3629 zerror(zlogp, B_TRUE, "unable to open map file"); 3630 goto error; 3631 } 3632 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(), 3633 kernzone, sizeof (kernzone)) != 0) { 3634 zerror(zlogp, B_FALSE, "unable to find scratch zone"); 3635 zonecfg_close_scratch(fp); 3636 goto error; 3637 } 3638 zonecfg_close_scratch(fp); 3639 kzone = kernzone; 3640 } 3641 3642 if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) { 3643 if (!bringup_failure_recovery) 3644 zerror(zlogp, B_TRUE, "unable to get zoneid"); 3645 if (unmount_cmd) 3646 (void) lu_root_teardown(zlogp); 3647 goto error; 3648 } 3649 3650 if (zone_shutdown(zoneid) != 0) { 3651 zerror(zlogp, B_TRUE, "unable to shutdown zone"); 3652 goto error; 3653 } 3654 3655 if (!unmount_cmd && 3656 unconfigure_network_interfaces(zlogp, zoneid) != 0) { 3657 zerror(zlogp, B_FALSE, 3658 "unable to unconfigure network interfaces in zone"); 3659 goto error; 3660 } 3661 3662 if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) { 3663 zerror(zlogp, B_TRUE, "unable to abort TCP connections"); 3664 goto error; 3665 } 3666 3667 /* destroy zconsole before umount /dev */ 3668 if (!unmount_cmd) 3669 destroy_console_slave(); 3670 3671 if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) { 3672 zerror(zlogp, B_FALSE, 3673 "unable to unmount file systems in zone"); 3674 goto error; 3675 } 3676 3677 remove_mlps(zlogp, zoneid); 3678 3679 if (zone_destroy(zoneid) != 0) { 3680 zerror(zlogp, B_TRUE, "unable to destroy zone"); 3681 goto error; 3682 } 3683 3684 /* 3685 * Special teardown for alternate boot environments: remove the tmpfs 3686 * root for the zone and then remove it from the map file. 3687 */ 3688 if (unmount_cmd && lu_root_teardown(zlogp) != 0) 3689 goto error; 3690 3691 lofs_discard_mnttab(); 3692 return (0); 3693 3694 error: 3695 lofs_discard_mnttab(); 3696 return (-1); 3697 } 3698 3699 /* 3700 * Apply the standard lists of devices/symlinks/mappings and the user-specified 3701 * list of devices (via zonecfg) to the /dev filesystem. The filesystem will 3702 * use these as a profile/filter to determine what exists in /dev. 3703 */ 3704 static int 3705 vplat_mount_dev(zlog_t *zlogp) 3706 { 3707 char zonedevpath[MAXPATHLEN]; 3708 zone_dochandle_t handle = NULL; 3709 struct zone_devtab ztab; 3710 zone_fsopt_t opt_attr; 3711 di_prof_t prof = NULL; 3712 int i, err, len; 3713 int retval = -1; 3714 3715 struct zone_fstab devtab = { 3716 "/dev", 3717 "/dev", 3718 MNTTYPE_DEV, 3719 NULL, 3720 "" 3721 }; 3722 3723 if (err = zone_get_devroot(zone_name, zonedevpath, 3724 sizeof (zonedevpath))) { 3725 zerror(zlogp, B_FALSE, "can't get zone dev: %s", 3726 zonecfg_strerror(err)); 3727 return (-1); 3728 } 3729 3730 /* 3731 * The old /dev was a lofs mount from <zonepath>/dev, with 3732 * dev fs, that becomes a mount on <zonepath>/root/dev. 3733 * However, we need to preserve device permission bits during 3734 * upgrade. What we should do is migrate the attribute directory 3735 * on upgrade, but for now, preserve it at <zonepath>/dev. 3736 */ 3737 (void) strcpy(opt_attr.zone_fsopt_opt, "attrdir="); 3738 len = strlen(opt_attr.zone_fsopt_opt); 3739 if (err = zone_get_zonepath(zone_name, 3740 opt_attr.zone_fsopt_opt + len, MAX_MNTOPT_STR - len)) { 3741 zerror(zlogp, B_FALSE, "can't get zone path: %s", 3742 zonecfg_strerror(err)); 3743 return (-1); 3744 } 3745 3746 if (make_one_dir(zlogp, opt_attr.zone_fsopt_opt + len, "/dev", 3747 DEFAULT_DIR_MODE) != 0) 3748 return (-1); 3749 3750 (void) strlcat(opt_attr.zone_fsopt_opt, "/dev", MAX_MNTOPT_STR); 3751 devtab.zone_fs_options = &opt_attr; 3752 opt_attr.zone_fsopt_next = NULL; 3753 3754 /* mount /dev inside the zone */ 3755 i = strlen(zonedevpath); 3756 if (mount_one(zlogp, &devtab, zonedevpath)) 3757 return (-1); 3758 3759 (void) strlcat(zonedevpath, "/dev", sizeof (zonedevpath)); 3760 if (di_prof_init(zonedevpath, &prof)) { 3761 zerror(zlogp, B_TRUE, "failed to initialize profile"); 3762 goto cleanup; 3763 } 3764 3765 /* Add the standard devices and directories */ 3766 for (i = 0; standard_devs[i] != NULL; ++i) { 3767 if (di_prof_add_dev(prof, standard_devs[i])) { 3768 zerror(zlogp, B_TRUE, "failed to add " 3769 "standard device"); 3770 goto cleanup; 3771 } 3772 } 3773 3774 /* Add the standard symlinks */ 3775 for (i = 0; standard_devlinks[i].source != NULL; ++i) { 3776 if (di_prof_add_symlink(prof, 3777 standard_devlinks[i].source, 3778 standard_devlinks[i].target)) { 3779 zerror(zlogp, B_TRUE, "failed to add " 3780 "standard symlink"); 3781 goto cleanup; 3782 } 3783 } 3784 3785 /* Add user-specified devices and directories */ 3786 if ((handle = zonecfg_init_handle()) == NULL) { 3787 zerror(zlogp, B_FALSE, "can't initialize zone handle"); 3788 goto cleanup; 3789 } 3790 if (err = zonecfg_get_handle(zone_name, handle)) { 3791 zerror(zlogp, B_FALSE, "can't get handle for zone " 3792 "%s: %s", zone_name, zonecfg_strerror(err)); 3793 goto cleanup; 3794 } 3795 if (err = zonecfg_setdevent(handle)) { 3796 zerror(zlogp, B_FALSE, "%s: %s", zone_name, 3797 zonecfg_strerror(err)); 3798 goto cleanup; 3799 } 3800 while (zonecfg_getdevent(handle, &ztab) == Z_OK) { 3801 if (di_prof_add_dev(prof, ztab.zone_dev_match)) { 3802 zerror(zlogp, B_TRUE, "failed to add " 3803 "user-specified device"); 3804 goto cleanup; 3805 } 3806 } 3807 (void) zonecfg_enddevent(handle); 3808 3809 /* Send profile to kernel */ 3810 if (di_prof_commit(prof)) { 3811 zerror(zlogp, B_TRUE, "failed to commit profile"); 3812 goto cleanup; 3813 } 3814 3815 retval = 0; 3816 3817 cleanup: 3818 if (handle) 3819 zonecfg_fini_handle(handle); 3820 if (prof) 3821 di_prof_fini(prof); 3822 return (retval); 3823 } 3824