1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This module contains functions used to bring up and tear down the 31 * Virtual Platform: [un]mounting file-systems, [un]plumbing network 32 * interfaces, [un]configuring devices, establishing resource controls, 33 * and creating/destroying the zone in the kernel. These actions, on 34 * the way up, ready the zone; on the way down, they halt the zone. 35 * See the much longer block comment at the beginning of zoneadmd.c 36 * for a bigger picture of how the whole program functions. 37 * 38 * This module also has primary responsibility for the layout of "scratch 39 * zones." These are mounted, but inactive, zones that are used during 40 * operating system upgrade and potentially other administrative action. The 41 * scratch zone environment is similar to the miniroot environment. The zone's 42 * actual root is mounted read-write on /a, and the standard paths (/usr, 43 * /sbin, /lib) all lead to read-only copies of the running system's binaries. 44 * This allows the administrative tools to manipulate the zone using "-R /a" 45 * without relying on any binaries in the zone itself. 46 * 47 * If the scratch zone is on an alternate root (Live Upgrade [LU] boot 48 * environment), then we must resolve the lofs mounts used there to uncover 49 * writable (unshared) resources. Shared resources, though, are always 50 * read-only. In addition, if the "same" zone with a different root path is 51 * currently running, then "/b" inside the zone points to the running zone's 52 * root. This allows LU to synchronize configuration files during the upgrade 53 * process. 54 * 55 * To construct this environment, this module creates a tmpfs mount on 56 * $ZONEPATH/lu. Inside this scratch area, the miniroot-like environment as 57 * described above is constructed on the fly. The zone is then created using 58 * $ZONEPATH/lu as the root. 59 * 60 * Note that scratch zones are inactive. The zone's bits are not running and 61 * likely cannot be run correctly until upgrade is done. Init is not running 62 * there, nor is SMF. Because of this, the "mounted" state of a scratch zone 63 * is not a part of the usual halt/ready/boot state machine. 64 */ 65 66 #include <sys/param.h> 67 #include <sys/mount.h> 68 #include <sys/mntent.h> 69 #include <sys/socket.h> 70 #include <sys/utsname.h> 71 #include <sys/types.h> 72 #include <sys/stat.h> 73 #include <sys/sockio.h> 74 #include <sys/stropts.h> 75 #include <sys/conf.h> 76 77 #include <inet/tcp.h> 78 #include <arpa/inet.h> 79 #include <netinet/in.h> 80 #include <net/route.h> 81 82 #include <stdio.h> 83 #include <errno.h> 84 #include <fcntl.h> 85 #include <unistd.h> 86 #include <rctl.h> 87 #include <stdlib.h> 88 #include <string.h> 89 #include <strings.h> 90 #include <wait.h> 91 #include <limits.h> 92 #include <libgen.h> 93 #include <libzfs.h> 94 #include <zone.h> 95 #include <assert.h> 96 #include <libcontract.h> 97 #include <libcontract_priv.h> 98 #include <uuid/uuid.h> 99 100 #include <sys/mntio.h> 101 #include <sys/mnttab.h> 102 #include <sys/fs/autofs.h> /* for _autofssys() */ 103 #include <sys/fs/lofs_info.h> 104 #include <sys/fs/zfs.h> 105 106 #include <pool.h> 107 #include <sys/pool.h> 108 109 #include <libzonecfg.h> 110 #include <synch.h> 111 #include "zoneadmd.h" 112 #include <tsol/label.h> 113 #include <libtsnet.h> 114 #include <sys/priv.h> 115 116 #define V4_ADDR_LEN 32 117 #define V6_ADDR_LEN 128 118 119 /* 0755 is the default directory mode. */ 120 #define DEFAULT_DIR_MODE \ 121 (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) 122 123 #define IPD_DEFAULT_OPTS \ 124 MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES 125 126 #define DFSTYPES "/etc/dfs/fstypes" 127 #define MAXTNZLEN 2048 128 129 /* 130 * A list of directories which should be created. 131 */ 132 133 struct dir_info { 134 char *dir_name; 135 mode_t dir_mode; 136 }; 137 138 /* 139 * The pathnames below are relative to the zonepath 140 */ 141 static struct dir_info dev_dirs[] = { 142 { "/dev", 0755 }, 143 { "/dev/dsk", 0755 }, 144 { "/dev/fd", 0555 }, 145 { "/dev/pts", 0755 }, 146 { "/dev/rdsk", 0755 }, 147 { "/dev/rmt", 0755 }, 148 { "/dev/sad", 0755 }, 149 { "/dev/swap", 0755 }, 150 { "/dev/term", 0755 }, 151 }; 152 153 /* 154 * A list of devices which should be symlinked to /dev/zconsole. 155 */ 156 157 struct symlink_info { 158 char *sl_source; 159 char *sl_target; 160 }; 161 162 /* 163 * The "source" paths are relative to the zonepath 164 */ 165 static struct symlink_info dev_symlinks[] = { 166 { "/dev/stderr", "./fd/2" }, 167 { "/dev/stdin", "./fd/0" }, 168 { "/dev/stdout", "./fd/1" }, 169 { "/dev/dtremote", "/dev/null" }, 170 { "/dev/console", "zconsole" }, 171 { "/dev/syscon", "zconsole" }, 172 { "/dev/sysmsg", "zconsole" }, 173 { "/dev/systty", "zconsole" }, 174 { "/dev/msglog", "zconsole" }, 175 }; 176 177 /* for routing socket */ 178 static int rts_seqno = 0; 179 180 /* mangled zone name when mounting in an alternate root environment */ 181 static char kernzone[ZONENAME_MAX]; 182 183 /* array of cached mount entries for resolve_lofs */ 184 static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max; 185 186 /* for Trusted Extensions */ 187 static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *); 188 static int tsol_mounts(zlog_t *, char *, char *); 189 static void tsol_unmounts(zlog_t *, char *); 190 static m_label_t *zlabel = NULL; 191 static m_label_t *zid_label = NULL; 192 static priv_set_t *zprivs = NULL; 193 194 /* from libsocket, not in any header file */ 195 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *); 196 197 /* 198 * An optimization for build_mnttable: reallocate (and potentially copy the 199 * data) only once every N times through the loop. 200 */ 201 #define MNTTAB_HUNK 32 202 203 /* 204 * Private autofs system call 205 */ 206 extern int _autofssys(int, void *); 207 208 static int 209 autofs_cleanup(zoneid_t zoneid) 210 { 211 /* 212 * Ask autofs to unmount all trigger nodes in the given zone. 213 */ 214 return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid)); 215 } 216 217 static void 218 free_mnttable(struct mnttab *mnt_array, uint_t nelem) 219 { 220 uint_t i; 221 222 if (mnt_array == NULL) 223 return; 224 for (i = 0; i < nelem; i++) { 225 free(mnt_array[i].mnt_mountp); 226 free(mnt_array[i].mnt_fstype); 227 free(mnt_array[i].mnt_special); 228 free(mnt_array[i].mnt_mntopts); 229 assert(mnt_array[i].mnt_time == NULL); 230 } 231 free(mnt_array); 232 } 233 234 /* 235 * Build the mount table for the zone rooted at "zroot", storing the resulting 236 * array of struct mnttabs in "mnt_arrayp" and the number of elements in the 237 * array in "nelemp". 238 */ 239 static int 240 build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab, 241 struct mnttab **mnt_arrayp, uint_t *nelemp) 242 { 243 struct mnttab mnt; 244 struct mnttab *mnts; 245 struct mnttab *mnp; 246 uint_t nmnt; 247 248 rewind(mnttab); 249 resetmnttab(mnttab); 250 nmnt = 0; 251 mnts = NULL; 252 while (getmntent(mnttab, &mnt) == 0) { 253 struct mnttab *tmp_array; 254 255 if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0) 256 continue; 257 if (nmnt % MNTTAB_HUNK == 0) { 258 tmp_array = realloc(mnts, 259 (nmnt + MNTTAB_HUNK) * sizeof (*mnts)); 260 if (tmp_array == NULL) { 261 free_mnttable(mnts, nmnt); 262 return (-1); 263 } 264 mnts = tmp_array; 265 } 266 mnp = &mnts[nmnt++]; 267 268 /* 269 * Zero out any fields we're not using. 270 */ 271 (void) memset(mnp, 0, sizeof (*mnp)); 272 273 if (mnt.mnt_special != NULL) 274 mnp->mnt_special = strdup(mnt.mnt_special); 275 if (mnt.mnt_mntopts != NULL) 276 mnp->mnt_mntopts = strdup(mnt.mnt_mntopts); 277 mnp->mnt_mountp = strdup(mnt.mnt_mountp); 278 mnp->mnt_fstype = strdup(mnt.mnt_fstype); 279 if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) || 280 (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) || 281 mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) { 282 zerror(zlogp, B_TRUE, "memory allocation failed"); 283 free_mnttable(mnts, nmnt); 284 return (-1); 285 } 286 } 287 *mnt_arrayp = mnts; 288 *nelemp = nmnt; 289 return (0); 290 } 291 292 /* 293 * This is an optimization. The resolve_lofs function is used quite frequently 294 * to manipulate file paths, and on a machine with a large number of zones, 295 * there will be a huge number of mounted file systems. Thus, we trigger a 296 * reread of the list of mount points 297 */ 298 static void 299 lofs_discard_mnttab(void) 300 { 301 free_mnttable(resolve_lofs_mnts, 302 resolve_lofs_mnt_max - resolve_lofs_mnts); 303 resolve_lofs_mnts = resolve_lofs_mnt_max = NULL; 304 } 305 306 static int 307 lofs_read_mnttab(zlog_t *zlogp) 308 { 309 FILE *mnttab; 310 uint_t nmnts; 311 312 if ((mnttab = fopen(MNTTAB, "r")) == NULL) 313 return (-1); 314 if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts, 315 &nmnts) == -1) { 316 (void) fclose(mnttab); 317 return (-1); 318 } 319 (void) fclose(mnttab); 320 resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts; 321 return (0); 322 } 323 324 /* 325 * This function loops over potential loopback mounts and symlinks in a given 326 * path and resolves them all down to an absolute path. 327 */ 328 static void 329 resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen) 330 { 331 int len, arlen; 332 const char *altroot; 333 char tmppath[MAXPATHLEN]; 334 boolean_t outside_altroot; 335 336 if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1) 337 return; 338 tmppath[len] = '\0'; 339 (void) strlcpy(path, tmppath, sizeof (tmppath)); 340 341 /* This happens once per zoneadmd operation. */ 342 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 343 return; 344 345 altroot = zonecfg_get_root(); 346 arlen = strlen(altroot); 347 outside_altroot = B_FALSE; 348 for (;;) { 349 struct mnttab *mnp; 350 351 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; 352 mnp++) { 353 if (mnp->mnt_fstype == NULL || 354 mnp->mnt_mountp == NULL || 355 mnp->mnt_special == NULL || 356 strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0) 357 continue; 358 len = strlen(mnp->mnt_mountp); 359 if (strncmp(mnp->mnt_mountp, path, len) == 0 && 360 (path[len] == '/' || path[len] == '\0')) 361 break; 362 } 363 if (mnp >= resolve_lofs_mnt_max) 364 break; 365 if (outside_altroot) { 366 char *cp; 367 int olen = sizeof (MNTOPT_RO) - 1; 368 369 /* 370 * If we run into a read-only mount outside of the 371 * alternate root environment, then the user doesn't 372 * want this path to be made read-write. 373 */ 374 if (mnp->mnt_mntopts != NULL && 375 (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) != 376 NULL && 377 (cp == mnp->mnt_mntopts || cp[-1] == ',') && 378 (cp[olen] == '\0' || cp[olen] == ',')) { 379 break; 380 } 381 } else if (arlen > 0 && 382 (strncmp(mnp->mnt_special, altroot, arlen) != 0 || 383 (mnp->mnt_special[arlen] != '\0' && 384 mnp->mnt_special[arlen] != '/'))) { 385 outside_altroot = B_TRUE; 386 } 387 /* use temporary buffer because new path might be longer */ 388 (void) snprintf(tmppath, sizeof (tmppath), "%s%s", 389 mnp->mnt_special, path + len); 390 if ((len = resolvepath(tmppath, path, pathlen)) == -1) 391 break; 392 path[len] = '\0'; 393 } 394 } 395 396 /* 397 * For a regular mount, check if a replacement lofs mount is needed because the 398 * referenced device is already mounted somewhere. 399 */ 400 static int 401 check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr) 402 { 403 struct mnttab *mnp; 404 zone_fsopt_t *optptr, *onext; 405 406 /* This happens once per zoneadmd operation. */ 407 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 408 return (-1); 409 410 /* 411 * If this special node isn't already in use, then it's ours alone; 412 * no need to worry about conflicting mounts. 413 */ 414 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; 415 mnp++) { 416 if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0) 417 break; 418 } 419 if (mnp >= resolve_lofs_mnt_max) 420 return (0); 421 422 /* 423 * Convert this duplicate mount into a lofs mount. 424 */ 425 (void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp, 426 sizeof (fsptr->zone_fs_special)); 427 (void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS, 428 sizeof (fsptr->zone_fs_type)); 429 fsptr->zone_fs_raw[0] = '\0'; 430 431 /* 432 * Discard all but one of the original options and set that to be the 433 * same set of options used for inherit package directory resources. 434 */ 435 optptr = fsptr->zone_fs_options; 436 if (optptr == NULL) { 437 optptr = malloc(sizeof (*optptr)); 438 if (optptr == NULL) { 439 zerror(zlogp, B_TRUE, "cannot mount %s", 440 fsptr->zone_fs_dir); 441 return (-1); 442 } 443 } else { 444 while ((onext = optptr->zone_fsopt_next) != NULL) { 445 optptr->zone_fsopt_next = onext->zone_fsopt_next; 446 free(onext); 447 } 448 } 449 (void) strcpy(optptr->zone_fsopt_opt, IPD_DEFAULT_OPTS); 450 optptr->zone_fsopt_next = NULL; 451 fsptr->zone_fs_options = optptr; 452 return (0); 453 } 454 455 static int 456 make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode) 457 { 458 char path[MAXPATHLEN]; 459 struct stat st; 460 461 if (snprintf(path, sizeof (path), "%s%s", prefix, subdir) > 462 sizeof (path)) { 463 zerror(zlogp, B_FALSE, "pathname %s%s is too long", prefix, 464 subdir); 465 return (-1); 466 } 467 468 if (lstat(path, &st) == 0) { 469 /* 470 * We don't check the file mode since presumably the zone 471 * administrator may have had good reason to change the mode, 472 * and we don't need to second guess him. 473 */ 474 if (!S_ISDIR(st.st_mode)) { 475 if (is_system_labeled() && 476 S_ISREG(st.st_mode)) { 477 /* 478 * The need to mount readonly copies of 479 * global zone /etc/ files is unique to 480 * Trusted Extensions. 481 */ 482 if (strncmp(subdir, "/etc/", 483 strlen("/etc/")) != 0) { 484 zerror(zlogp, B_FALSE, 485 "%s is not in /etc", path); 486 return (-1); 487 } 488 } else { 489 zerror(zlogp, B_FALSE, 490 "%s is not a directory", path); 491 return (-1); 492 } 493 } 494 } else if (mkdirp(path, mode) != 0) { 495 if (errno == EROFS) 496 zerror(zlogp, B_FALSE, "Could not mkdir %s.\nIt is on " 497 "a read-only file system in this local zone.\nMake " 498 "sure %s exists in the global zone.", path, subdir); 499 else 500 zerror(zlogp, B_TRUE, "mkdirp of %s failed", path); 501 return (-1); 502 } 503 return (0); 504 } 505 506 /* 507 * Make /dev and various directories underneath it. 508 */ 509 static int 510 make_dev_dirs(zlog_t *zlogp, const char *zonepath) 511 { 512 int i; 513 514 for (i = 0; i < sizeof (dev_dirs) / sizeof (struct dir_info); i++) { 515 if (make_one_dir(zlogp, zonepath, dev_dirs[i].dir_name, 516 dev_dirs[i].dir_mode) != 0) 517 return (-1); 518 } 519 return (0); 520 } 521 522 /* 523 * Make various sym-links underneath /dev. 524 */ 525 static int 526 make_dev_links(zlog_t *zlogp, char *zonepath) 527 { 528 int i; 529 530 for (i = 0; i < sizeof (dev_symlinks) / sizeof (struct symlink_info); 531 i++) { 532 char dev[MAXPATHLEN]; 533 struct stat st; 534 535 (void) snprintf(dev, sizeof (dev), "%s%s", zonepath, 536 dev_symlinks[i].sl_source); 537 if (lstat(dev, &st) == 0) { 538 /* 539 * Try not to call unlink(2) on directories, since that 540 * makes UFS unhappy. 541 */ 542 if (S_ISDIR(st.st_mode)) { 543 zerror(zlogp, B_FALSE, "symlink path %s is a " 544 "directory", dev_symlinks[i].sl_source); 545 return (-1); 546 } 547 (void) unlink(dev); 548 } 549 if (symlink(dev_symlinks[i].sl_target, dev) != 0) { 550 zerror(zlogp, B_TRUE, "could not setup %s->%s symlink", 551 dev_symlinks[i].sl_source, 552 dev_symlinks[i].sl_target); 553 return (-1); 554 } 555 } 556 return (0); 557 } 558 559 /* 560 * Create various directories and sym-links under /dev. 561 */ 562 static int 563 create_dev_files(zlog_t *zlogp) 564 { 565 char zonepath[MAXPATHLEN]; 566 567 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { 568 zerror(zlogp, B_TRUE, "unable to determine zone root"); 569 return (-1); 570 } 571 if (zonecfg_in_alt_root()) 572 resolve_lofs(zlogp, zonepath, sizeof (zonepath)); 573 574 if (make_dev_dirs(zlogp, zonepath) != 0) 575 return (-1); 576 if (make_dev_links(zlogp, zonepath) != 0) 577 return (-1); 578 return (0); 579 } 580 581 static void 582 free_remote_fstypes(char **types) 583 { 584 uint_t i; 585 586 if (types == NULL) 587 return; 588 for (i = 0; types[i] != NULL; i++) 589 free(types[i]); 590 free(types); 591 } 592 593 static char ** 594 get_remote_fstypes(zlog_t *zlogp) 595 { 596 char **types = NULL; 597 FILE *fp; 598 char buf[MAXPATHLEN]; 599 char fstype[MAXPATHLEN]; 600 uint_t lines = 0; 601 uint_t i; 602 603 if ((fp = fopen(DFSTYPES, "r")) == NULL) { 604 zerror(zlogp, B_TRUE, "failed to open %s", DFSTYPES); 605 return (NULL); 606 } 607 /* 608 * Count the number of lines 609 */ 610 while (fgets(buf, sizeof (buf), fp) != NULL) 611 lines++; 612 if (lines == 0) /* didn't read anything; empty file */ 613 goto out; 614 rewind(fp); 615 /* 616 * Allocate enough space for a NULL-terminated array. 617 */ 618 types = calloc(lines + 1, sizeof (char *)); 619 if (types == NULL) { 620 zerror(zlogp, B_TRUE, "memory allocation failed"); 621 goto out; 622 } 623 i = 0; 624 while (fgets(buf, sizeof (buf), fp) != NULL) { 625 /* LINTED - fstype is big enough to hold buf */ 626 if (sscanf(buf, "%s", fstype) == 0) { 627 zerror(zlogp, B_FALSE, "unable to parse %s", DFSTYPES); 628 free_remote_fstypes(types); 629 types = NULL; 630 goto out; 631 } 632 types[i] = strdup(fstype); 633 if (types[i] == NULL) { 634 zerror(zlogp, B_TRUE, "memory allocation failed"); 635 free_remote_fstypes(types); 636 types = NULL; 637 goto out; 638 } 639 i++; 640 } 641 out: 642 (void) fclose(fp); 643 return (types); 644 } 645 646 static boolean_t 647 is_remote_fstype(const char *fstype, char *const *remote_fstypes) 648 { 649 uint_t i; 650 651 if (remote_fstypes == NULL) 652 return (B_FALSE); 653 for (i = 0; remote_fstypes[i] != NULL; i++) { 654 if (strcmp(remote_fstypes[i], fstype) == 0) 655 return (B_TRUE); 656 } 657 return (B_FALSE); 658 } 659 660 /* 661 * This converts a zone root path (normally of the form .../root) to a Live 662 * Upgrade scratch zone root (of the form .../lu). 663 */ 664 static void 665 root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved) 666 { 667 if (!isresolved && zonecfg_in_alt_root()) 668 resolve_lofs(zlogp, zroot, zrootlen); 669 (void) strcpy(strrchr(zroot, '/') + 1, "lu"); 670 } 671 672 /* 673 * The general strategy for unmounting filesystems is as follows: 674 * 675 * - Remote filesystems may be dead, and attempting to contact them as 676 * part of a regular unmount may hang forever; we want to always try to 677 * forcibly unmount such filesystems and only fall back to regular 678 * unmounts if the filesystem doesn't support forced unmounts. 679 * 680 * - We don't want to unnecessarily corrupt metadata on local 681 * filesystems (ie UFS), so we want to start off with graceful unmounts, 682 * and only escalate to doing forced unmounts if we get stuck. 683 * 684 * We start off walking backwards through the mount table. This doesn't 685 * give us strict ordering but ensures that we try to unmount submounts 686 * first. We thus limit the number of failed umount2(2) calls. 687 * 688 * The mechanism for determining if we're stuck is to count the number 689 * of failed unmounts each iteration through the mount table. This 690 * gives us an upper bound on the number of filesystems which remain 691 * mounted (autofs trigger nodes are dealt with separately). If at the 692 * end of one unmount+autofs_cleanup cycle we still have the same number 693 * of mounts that we started out with, we're stuck and try a forced 694 * unmount. If that fails (filesystem doesn't support forced unmounts) 695 * then we bail and are unable to teardown the zone. If it succeeds, 696 * we're no longer stuck so we continue with our policy of trying 697 * graceful mounts first. 698 * 699 * Zone must be down (ie, no processes or threads active). 700 */ 701 static int 702 unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd) 703 { 704 int error = 0; 705 FILE *mnttab; 706 struct mnttab *mnts; 707 uint_t nmnt; 708 char zroot[MAXPATHLEN + 1]; 709 size_t zrootlen; 710 uint_t oldcount = UINT_MAX; 711 boolean_t stuck = B_FALSE; 712 char **remote_fstypes = NULL; 713 714 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 715 zerror(zlogp, B_FALSE, "unable to determine zone root"); 716 return (-1); 717 } 718 if (unmount_cmd) 719 root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE); 720 721 (void) strcat(zroot, "/"); 722 zrootlen = strlen(zroot); 723 724 /* 725 * For Trusted Extensions unmount each higher level zone's mount 726 * of our zone's /export/home 727 */ 728 if (!unmount_cmd) 729 tsol_unmounts(zlogp, zone_name); 730 731 if ((mnttab = fopen(MNTTAB, "r")) == NULL) { 732 zerror(zlogp, B_TRUE, "failed to open %s", MNTTAB); 733 return (-1); 734 } 735 /* 736 * Use our hacky mntfs ioctl so we see everything, even mounts with 737 * MS_NOMNTTAB. 738 */ 739 if (ioctl(fileno(mnttab), MNTIOC_SHOWHIDDEN, NULL) < 0) { 740 zerror(zlogp, B_TRUE, "unable to configure %s", MNTTAB); 741 error++; 742 goto out; 743 } 744 745 /* 746 * Build the list of remote fstypes so we know which ones we 747 * should forcibly unmount. 748 */ 749 remote_fstypes = get_remote_fstypes(zlogp); 750 for (; /* ever */; ) { 751 uint_t newcount = 0; 752 boolean_t unmounted; 753 struct mnttab *mnp; 754 char *path; 755 uint_t i; 756 757 mnts = NULL; 758 nmnt = 0; 759 /* 760 * MNTTAB gives us a way to walk through mounted 761 * filesystems; we need to be able to walk them in 762 * reverse order, so we build a list of all mounted 763 * filesystems. 764 */ 765 if (build_mnttable(zlogp, zroot, zrootlen, mnttab, &mnts, 766 &nmnt) != 0) { 767 error++; 768 goto out; 769 } 770 for (i = 0; i < nmnt; i++) { 771 mnp = &mnts[nmnt - i - 1]; /* access in reverse order */ 772 path = mnp->mnt_mountp; 773 unmounted = B_FALSE; 774 /* 775 * Try forced unmount first for remote filesystems. 776 * 777 * Not all remote filesystems support forced unmounts, 778 * so if this fails (ENOTSUP) we'll continue on 779 * and try a regular unmount. 780 */ 781 if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) { 782 if (umount2(path, MS_FORCE) == 0) 783 unmounted = B_TRUE; 784 } 785 /* 786 * Try forced unmount if we're stuck. 787 */ 788 if (stuck) { 789 if (umount2(path, MS_FORCE) == 0) { 790 unmounted = B_TRUE; 791 stuck = B_FALSE; 792 } else { 793 /* 794 * The first failure indicates a 795 * mount we won't be able to get 796 * rid of automatically, so we 797 * bail. 798 */ 799 error++; 800 zerror(zlogp, B_FALSE, 801 "unable to unmount '%s'", path); 802 free_mnttable(mnts, nmnt); 803 goto out; 804 } 805 } 806 /* 807 * Try regular unmounts for everything else. 808 */ 809 if (!unmounted && umount2(path, 0) != 0) 810 newcount++; 811 } 812 free_mnttable(mnts, nmnt); 813 814 if (newcount == 0) 815 break; 816 if (newcount >= oldcount) { 817 /* 818 * Last round didn't unmount anything; we're stuck and 819 * should start trying forced unmounts. 820 */ 821 stuck = B_TRUE; 822 } 823 oldcount = newcount; 824 825 /* 826 * Autofs doesn't let you unmount its trigger nodes from 827 * userland so we have to tell the kernel to cleanup for us. 828 */ 829 if (autofs_cleanup(zoneid) != 0) { 830 zerror(zlogp, B_TRUE, "unable to remove autofs nodes"); 831 error++; 832 goto out; 833 } 834 } 835 836 out: 837 free_remote_fstypes(remote_fstypes); 838 (void) fclose(mnttab); 839 return (error ? -1 : 0); 840 } 841 842 static int 843 fs_compare(const void *m1, const void *m2) 844 { 845 struct zone_fstab *i = (struct zone_fstab *)m1; 846 struct zone_fstab *j = (struct zone_fstab *)m2; 847 848 return (strcmp(i->zone_fs_dir, j->zone_fs_dir)); 849 } 850 851 /* 852 * Fork and exec (and wait for) the mentioned binary with the provided 853 * arguments. Returns (-1) if something went wrong with fork(2) or exec(2), 854 * returns the exit status otherwise. 855 * 856 * If we were unable to exec the provided pathname (for whatever 857 * reason), we return the special token ZEXIT_EXEC. The current value 858 * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the 859 * consumers of this function; any future consumers must make sure this 860 * remains the case. 861 */ 862 static int 863 forkexec(zlog_t *zlogp, const char *path, char *const argv[]) 864 { 865 pid_t child_pid; 866 int child_status = 0; 867 868 /* 869 * Do not let another thread localize a message while we are forking. 870 */ 871 (void) mutex_lock(&msglock); 872 child_pid = fork(); 873 (void) mutex_unlock(&msglock); 874 if (child_pid == -1) { 875 zerror(zlogp, B_TRUE, "could not fork for %s", argv[0]); 876 return (-1); 877 } else if (child_pid == 0) { 878 closefrom(0); 879 /* redirect stdin, stdout & stderr to /dev/null */ 880 (void) open("/dev/null", O_RDONLY); /* stdin */ 881 (void) open("/dev/null", O_WRONLY); /* stdout */ 882 (void) open("/dev/null", O_WRONLY); /* stderr */ 883 (void) execv(path, argv); 884 /* 885 * Since we are in the child, there is no point calling zerror() 886 * since there is nobody waiting to consume it. So exit with a 887 * special code that the parent will recognize and call zerror() 888 * accordingly. 889 */ 890 891 _exit(ZEXIT_EXEC); 892 } else { 893 (void) waitpid(child_pid, &child_status, 0); 894 } 895 896 if (WIFSIGNALED(child_status)) { 897 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 898 "signal %d", path, WTERMSIG(child_status)); 899 return (-1); 900 } 901 assert(WIFEXITED(child_status)); 902 if (WEXITSTATUS(child_status) == ZEXIT_EXEC) { 903 zerror(zlogp, B_FALSE, "failed to exec %s", path); 904 return (-1); 905 } 906 return (WEXITSTATUS(child_status)); 907 } 908 909 static int 910 dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev) 911 { 912 char cmdbuf[MAXPATHLEN]; 913 char *argv[4]; 914 int status; 915 916 /* 917 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but 918 * that would cost us an extra fork/exec without buying us anything. 919 */ 920 if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype) 921 > sizeof (cmdbuf)) { 922 zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); 923 return (-1); 924 } 925 926 argv[0] = "fsck"; 927 argv[1] = "-m"; 928 argv[2] = (char *)rawdev; 929 argv[3] = NULL; 930 931 status = forkexec(zlogp, cmdbuf, argv); 932 if (status == 0 || status == -1) 933 return (status); 934 zerror(zlogp, B_FALSE, "fsck of '%s' failed with exit status %d; " 935 "run fsck manually", rawdev, status); 936 return (-1); 937 } 938 939 static int 940 domount(zlog_t *zlogp, const char *fstype, const char *opts, 941 const char *special, const char *directory) 942 { 943 char cmdbuf[MAXPATHLEN]; 944 char *argv[6]; 945 int status; 946 947 /* 948 * We could alternatively have called /usr/sbin/mount -F <fstype>, but 949 * that would cost us an extra fork/exec without buying us anything. 950 */ 951 if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype) 952 > sizeof (cmdbuf)) { 953 zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); 954 return (-1); 955 } 956 argv[0] = "mount"; 957 if (opts[0] == '\0') { 958 argv[1] = (char *)special; 959 argv[2] = (char *)directory; 960 argv[3] = NULL; 961 } else { 962 argv[1] = "-o"; 963 argv[2] = (char *)opts; 964 argv[3] = (char *)special; 965 argv[4] = (char *)directory; 966 argv[5] = NULL; 967 } 968 969 status = forkexec(zlogp, cmdbuf, argv); 970 if (status == 0 || status == -1) 971 return (status); 972 if (opts[0] == '\0') 973 zerror(zlogp, B_FALSE, "\"%s %s %s\" " 974 "failed with exit code %d", 975 cmdbuf, special, directory, status); 976 else 977 zerror(zlogp, B_FALSE, "\"%s -o %s %s %s\" " 978 "failed with exit code %d", 979 cmdbuf, opts, special, directory, status); 980 return (-1); 981 } 982 983 /* 984 * Make sure if a given path exists, it is not a sym-link, and is a directory. 985 */ 986 static int 987 check_path(zlog_t *zlogp, const char *path) 988 { 989 struct stat statbuf; 990 char respath[MAXPATHLEN]; 991 int res; 992 993 if (lstat(path, &statbuf) != 0) { 994 if (errno == ENOENT) 995 return (0); 996 zerror(zlogp, B_TRUE, "can't stat %s", path); 997 return (-1); 998 } 999 if (S_ISLNK(statbuf.st_mode)) { 1000 zerror(zlogp, B_FALSE, "%s is a symlink", path); 1001 return (-1); 1002 } 1003 if (!S_ISDIR(statbuf.st_mode)) { 1004 if (is_system_labeled() && S_ISREG(statbuf.st_mode)) { 1005 /* 1006 * The need to mount readonly copies of 1007 * global zone /etc/ files is unique to 1008 * Trusted Extensions. 1009 * The check for /etc/ via strstr() is to 1010 * allow paths like $ZONEROOT/etc/passwd 1011 */ 1012 if (strstr(path, "/etc/") == NULL) { 1013 zerror(zlogp, B_FALSE, 1014 "%s is not in /etc", path); 1015 return (-1); 1016 } 1017 } else { 1018 zerror(zlogp, B_FALSE, "%s is not a directory", path); 1019 return (-1); 1020 } 1021 } 1022 if ((res = resolvepath(path, respath, sizeof (respath))) == -1) { 1023 zerror(zlogp, B_TRUE, "unable to resolve path %s", path); 1024 return (-1); 1025 } 1026 respath[res] = '\0'; 1027 if (strcmp(path, respath) != 0) { 1028 /* 1029 * We don't like ".."s and "."s throwing us off 1030 */ 1031 zerror(zlogp, B_FALSE, "%s is not a canonical path", path); 1032 return (-1); 1033 } 1034 return (0); 1035 } 1036 1037 /* 1038 * Check every component of rootpath/relpath. If any component fails (ie, 1039 * exists but isn't the canonical path to a directory), it is returned in 1040 * badpath, which is assumed to be at least of size MAXPATHLEN. 1041 * 1042 * Relpath must begin with '/'. 1043 */ 1044 static boolean_t 1045 valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *relpath) 1046 { 1047 char abspath[MAXPATHLEN], *slashp; 1048 1049 /* 1050 * Make sure abspath has at least one '/' after its rootpath 1051 * component, and ends with '/'. 1052 */ 1053 if (snprintf(abspath, sizeof (abspath), "%s%s/", rootpath, relpath) > 1054 sizeof (abspath)) { 1055 zerror(zlogp, B_FALSE, "pathname %s%s is too long", rootpath, 1056 relpath); 1057 return (B_FALSE); 1058 } 1059 1060 slashp = &abspath[strlen(rootpath)]; 1061 assert(*slashp == '/'); 1062 do { 1063 *slashp = '\0'; 1064 if (check_path(zlogp, abspath) != 0) 1065 return (B_FALSE); 1066 *slashp = '/'; 1067 slashp++; 1068 } while ((slashp = strchr(slashp, '/')) != NULL); 1069 return (B_TRUE); 1070 } 1071 1072 static int 1073 mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath) 1074 { 1075 char path[MAXPATHLEN]; 1076 char specpath[MAXPATHLEN]; 1077 char optstr[MAX_MNTOPT_STR]; 1078 zone_fsopt_t *optptr; 1079 1080 if (!valid_mount_path(zlogp, rootpath, fsptr->zone_fs_dir)) { 1081 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 1082 rootpath, fsptr->zone_fs_dir); 1083 return (-1); 1084 } 1085 1086 if (make_one_dir(zlogp, rootpath, fsptr->zone_fs_dir, 1087 DEFAULT_DIR_MODE) != 0) 1088 return (-1); 1089 1090 (void) snprintf(path, sizeof (path), "%s%s", rootpath, 1091 fsptr->zone_fs_dir); 1092 1093 if (strlen(fsptr->zone_fs_special) == 0) { 1094 /* 1095 * A zero-length special is how we distinguish IPDs from 1096 * general-purpose FSs. Make sure it mounts from a place that 1097 * can be seen via the alternate zone's root. 1098 */ 1099 if (snprintf(specpath, sizeof (specpath), "%s%s", 1100 zonecfg_get_root(), fsptr->zone_fs_dir) >= 1101 sizeof (specpath)) { 1102 zerror(zlogp, B_FALSE, "cannot mount %s: path too " 1103 "long in alternate root", fsptr->zone_fs_dir); 1104 return (-1); 1105 } 1106 if (zonecfg_in_alt_root()) 1107 resolve_lofs(zlogp, specpath, sizeof (specpath)); 1108 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, 1109 specpath, path) != 0) { 1110 zerror(zlogp, B_TRUE, "failed to loopback mount %s", 1111 specpath); 1112 return (-1); 1113 } 1114 return (0); 1115 } 1116 1117 /* 1118 * In general the strategy here is to do just as much verification as 1119 * necessary to avoid crashing or otherwise doing something bad; if the 1120 * administrator initiated the operation via zoneadm(1m), he'll get 1121 * auto-verification which will let him know what's wrong. If he 1122 * modifies the zone configuration of a running zone and doesn't attempt 1123 * to verify that it's OK we won't crash but won't bother trying to be 1124 * too helpful either. zoneadm verify is only a couple keystrokes away. 1125 */ 1126 if (!zonecfg_valid_fs_type(fsptr->zone_fs_type)) { 1127 zerror(zlogp, B_FALSE, "cannot mount %s on %s: " 1128 "invalid file-system type %s", fsptr->zone_fs_special, 1129 fsptr->zone_fs_dir, fsptr->zone_fs_type); 1130 return (-1); 1131 } 1132 1133 /* 1134 * If we're looking at an alternate root environment, then construct 1135 * read-only loopback mounts as necessary. For all lofs mounts, make 1136 * sure that the 'special' entry points inside the alternate root. (We 1137 * don't do this with other mounts, as devfs isn't in the alternate 1138 * root, and we need to assume the device environment is roughly the 1139 * same.) 1140 */ 1141 if (zonecfg_in_alt_root()) { 1142 struct stat64 st; 1143 1144 if (stat64(fsptr->zone_fs_special, &st) != -1 && 1145 S_ISBLK(st.st_mode) && 1146 check_lofs_needed(zlogp, fsptr) == -1) 1147 return (-1); 1148 if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) { 1149 if (snprintf(specpath, sizeof (specpath), "%s%s", 1150 zonecfg_get_root(), fsptr->zone_fs_special) >= 1151 sizeof (specpath)) { 1152 zerror(zlogp, B_FALSE, "cannot mount %s: path " 1153 "too long in alternate root", 1154 fsptr->zone_fs_special); 1155 return (-1); 1156 } 1157 resolve_lofs(zlogp, specpath, sizeof (specpath)); 1158 (void) strlcpy(fsptr->zone_fs_special, specpath, 1159 sizeof (fsptr->zone_fs_special)); 1160 } 1161 } 1162 1163 /* 1164 * Run 'fsck -m' if there's a device to fsck. 1165 */ 1166 if (fsptr->zone_fs_raw[0] != '\0' && 1167 dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_raw) != 0) 1168 return (-1); 1169 1170 /* 1171 * Build up mount option string. 1172 */ 1173 optstr[0] = '\0'; 1174 if (fsptr->zone_fs_options != NULL) { 1175 (void) strlcpy(optstr, fsptr->zone_fs_options->zone_fsopt_opt, 1176 sizeof (optstr)); 1177 for (optptr = fsptr->zone_fs_options->zone_fsopt_next; 1178 optptr != NULL; optptr = optptr->zone_fsopt_next) { 1179 (void) strlcat(optstr, ",", sizeof (optstr)); 1180 (void) strlcat(optstr, optptr->zone_fsopt_opt, 1181 sizeof (optstr)); 1182 } 1183 } 1184 return (domount(zlogp, fsptr->zone_fs_type, optstr, 1185 fsptr->zone_fs_special, path)); 1186 } 1187 1188 static void 1189 free_fs_data(struct zone_fstab *fsarray, uint_t nelem) 1190 { 1191 uint_t i; 1192 1193 if (fsarray == NULL) 1194 return; 1195 for (i = 0; i < nelem; i++) 1196 zonecfg_free_fs_option_list(fsarray[i].zone_fs_options); 1197 free(fsarray); 1198 } 1199 1200 /* 1201 * This function constructs the miniroot-like "scratch zone" environment. If 1202 * it returns B_FALSE, then the error has already been logged. 1203 */ 1204 static boolean_t 1205 build_mounted(zlog_t *zlogp, char *rootpath, size_t rootlen, 1206 const char *zonepath) 1207 { 1208 char tmp[MAXPATHLEN], fromdir[MAXPATHLEN]; 1209 char luroot[MAXPATHLEN]; 1210 const char **cpp; 1211 static const char *mkdirs[] = { 1212 "/system", "/system/contract", "/proc", "/dev", "/tmp", 1213 "/a", NULL 1214 }; 1215 static const char *localdirs[] = { 1216 "/etc", "/var", NULL 1217 }; 1218 static const char *loopdirs[] = { 1219 "/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform", 1220 "/usr", NULL 1221 }; 1222 static const char *tmpdirs[] = { 1223 "/tmp", "/var/run", NULL 1224 }; 1225 FILE *fp; 1226 struct stat st; 1227 char *altstr; 1228 uuid_t uuid; 1229 1230 /* 1231 * Construct a small Solaris environment, including the zone root 1232 * mounted on '/a' inside that environment. 1233 */ 1234 resolve_lofs(zlogp, rootpath, rootlen); 1235 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 1236 resolve_lofs(zlogp, luroot, sizeof (luroot)); 1237 (void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot); 1238 (void) symlink("./usr/bin", tmp); 1239 1240 /* 1241 * These are mostly special mount points; not handled here. (See 1242 * zone_mount_early.) 1243 */ 1244 for (cpp = mkdirs; *cpp != NULL; cpp++) { 1245 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1246 if (mkdir(tmp, 0755) != 0) { 1247 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1248 return (B_FALSE); 1249 } 1250 } 1251 1252 /* 1253 * These are mounted read-write from the zone undergoing upgrade. We 1254 * must be careful not to 'leak' things from the main system into the 1255 * zone, and this accomplishes that goal. 1256 */ 1257 for (cpp = localdirs; *cpp != NULL; cpp++) { 1258 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1259 (void) snprintf(fromdir, sizeof (fromdir), "%s%s", rootpath, 1260 *cpp); 1261 if (mkdir(tmp, 0755) != 0) { 1262 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1263 return (B_FALSE); 1264 } 1265 if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp) != 0) { 1266 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1267 *cpp); 1268 return (B_FALSE); 1269 } 1270 } 1271 1272 /* 1273 * These are things mounted read-only from the running system because 1274 * they contain binaries that must match system. 1275 */ 1276 for (cpp = loopdirs; *cpp != NULL; cpp++) { 1277 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1278 if (mkdir(tmp, 0755) != 0) { 1279 if (errno != EEXIST) { 1280 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1281 return (B_FALSE); 1282 } 1283 if (lstat(tmp, &st) != 0) { 1284 zerror(zlogp, B_TRUE, "cannot stat %s", tmp); 1285 return (B_FALSE); 1286 } 1287 /* 1288 * Ignore any non-directories encountered. These are 1289 * things that have been converted into symlinks 1290 * (/etc/fs and /etc/lib) and no longer need a lofs 1291 * fixup. 1292 */ 1293 if (!S_ISDIR(st.st_mode)) 1294 continue; 1295 } 1296 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, *cpp, 1297 tmp) != 0) { 1298 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1299 *cpp); 1300 return (B_FALSE); 1301 } 1302 } 1303 1304 /* 1305 * These are things with tmpfs mounted inside. 1306 */ 1307 for (cpp = tmpdirs; *cpp != NULL; cpp++) { 1308 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1309 if (mkdir(tmp, 0755) != 0 && errno != EEXIST) { 1310 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1311 return (B_FALSE); 1312 } 1313 if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) { 1314 zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp); 1315 return (B_FALSE); 1316 } 1317 } 1318 1319 /* 1320 * This is here to support lucopy. If there's an instance of this same 1321 * zone on the current running system, then we mount its root up as 1322 * read-only inside the scratch zone. 1323 */ 1324 (void) zonecfg_get_uuid(zone_name, uuid); 1325 altstr = strdup(zonecfg_get_root()); 1326 if (altstr == NULL) { 1327 zerror(zlogp, B_TRUE, "memory allocation failed"); 1328 return (B_FALSE); 1329 } 1330 zonecfg_set_root(""); 1331 (void) strlcpy(tmp, zone_name, sizeof (tmp)); 1332 (void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp)); 1333 if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK && 1334 strcmp(fromdir, rootpath) != 0) { 1335 (void) snprintf(tmp, sizeof (tmp), "%s/b", luroot); 1336 if (mkdir(tmp, 0755) != 0) { 1337 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1338 return (B_FALSE); 1339 } 1340 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, fromdir, 1341 tmp) != 0) { 1342 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1343 fromdir); 1344 return (B_FALSE); 1345 } 1346 } 1347 zonecfg_set_root(altstr); 1348 free(altstr); 1349 1350 if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) { 1351 zerror(zlogp, B_TRUE, "cannot open zone mapfile"); 1352 return (B_FALSE); 1353 } 1354 (void) ftruncate(fileno(fp), 0); 1355 if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) { 1356 zerror(zlogp, B_TRUE, "cannot add zone mapfile entry"); 1357 } 1358 zonecfg_close_scratch(fp); 1359 (void) snprintf(tmp, sizeof (tmp), "%s/a", luroot); 1360 if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0) 1361 return (B_FALSE); 1362 (void) strlcpy(rootpath, tmp, rootlen); 1363 return (B_TRUE); 1364 } 1365 1366 static int 1367 mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd) 1368 { 1369 char rootpath[MAXPATHLEN]; 1370 char zonepath[MAXPATHLEN]; 1371 int num_fs = 0, i; 1372 struct zone_fstab fstab, *fs_ptr = NULL, *tmp_ptr; 1373 struct zone_fstab *fsp; 1374 zone_dochandle_t handle = NULL; 1375 zone_state_t zstate; 1376 1377 if (zone_get_state(zone_name, &zstate) != Z_OK || 1378 (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) { 1379 zerror(zlogp, B_FALSE, 1380 "zone must be in '%s' or '%s' state to mount file-systems", 1381 zone_state_str(ZONE_STATE_READY), 1382 zone_state_str(ZONE_STATE_MOUNTED)); 1383 goto bad; 1384 } 1385 1386 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { 1387 zerror(zlogp, B_TRUE, "unable to determine zone path"); 1388 goto bad; 1389 } 1390 1391 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { 1392 zerror(zlogp, B_TRUE, "unable to determine zone root"); 1393 goto bad; 1394 } 1395 1396 if ((handle = zonecfg_init_handle()) == NULL) { 1397 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 1398 goto bad; 1399 } 1400 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK || 1401 zonecfg_setfsent(handle) != Z_OK) { 1402 zerror(zlogp, B_FALSE, "invalid configuration"); 1403 goto bad; 1404 } 1405 1406 /* 1407 * /dev in the zone is loopback'd from the external /dev repository, 1408 * in order to provide a largely read-only semantic. But because 1409 * processes in the zone need to be able to chown, chmod, etc. zone 1410 * /dev files, we can't use a 'ro' lofs mount. Instead we use a 1411 * special mode just for zones, "zonedevfs". 1412 * 1413 * In the future we should front /dev with a full-fledged filesystem. 1414 */ 1415 num_fs++; 1416 if ((tmp_ptr = realloc(fs_ptr, num_fs * sizeof (*tmp_ptr))) == NULL) { 1417 zerror(zlogp, B_TRUE, "memory allocation failed"); 1418 num_fs--; 1419 goto bad; 1420 } 1421 fs_ptr = tmp_ptr; 1422 fsp = &fs_ptr[num_fs - 1]; 1423 /* 1424 * Note that mount_one will prepend the alternate root to 1425 * zone_fs_special and do the necessary resolution, so all that is 1426 * needed here is to strip the root added by zone_get_zonepath. 1427 */ 1428 (void) strlcpy(fsp->zone_fs_dir, "/dev", sizeof (fsp->zone_fs_dir)); 1429 (void) snprintf(fsp->zone_fs_special, sizeof (fsp->zone_fs_special), 1430 "%s/dev", zonepath + strlen(zonecfg_get_root())); 1431 fsp->zone_fs_raw[0] = '\0'; 1432 (void) strlcpy(fsp->zone_fs_type, MNTTYPE_LOFS, 1433 sizeof (fsp->zone_fs_type)); 1434 fsp->zone_fs_options = NULL; 1435 if (zonecfg_add_fs_option(fsp, MNTOPT_LOFS_ZONEDEVFS) != Z_OK) { 1436 zerror(zlogp, B_FALSE, "error adding property"); 1437 goto bad; 1438 } 1439 1440 /* 1441 * Iterate through the rest of the filesystems, first the IPDs, then 1442 * the general FSs. Sort them all, then mount them in sorted order. 1443 * This is to make sure the higher level directories (e.g., /usr) 1444 * get mounted before any beneath them (e.g., /usr/local). 1445 */ 1446 if (zonecfg_setipdent(handle) != Z_OK) { 1447 zerror(zlogp, B_FALSE, "invalid configuration"); 1448 goto bad; 1449 } 1450 while (zonecfg_getipdent(handle, &fstab) == Z_OK) { 1451 num_fs++; 1452 if ((tmp_ptr = realloc(fs_ptr, 1453 num_fs * sizeof (*tmp_ptr))) == NULL) { 1454 zerror(zlogp, B_TRUE, "memory allocation failed"); 1455 num_fs--; 1456 (void) zonecfg_endipdent(handle); 1457 goto bad; 1458 } 1459 fs_ptr = tmp_ptr; 1460 fsp = &fs_ptr[num_fs - 1]; 1461 /* 1462 * IPDs logically only have a mount point; all other properties 1463 * are implied. 1464 */ 1465 (void) strlcpy(fsp->zone_fs_dir, 1466 fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir)); 1467 fsp->zone_fs_special[0] = '\0'; 1468 fsp->zone_fs_raw[0] = '\0'; 1469 fsp->zone_fs_type[0] = '\0'; 1470 fsp->zone_fs_options = NULL; 1471 } 1472 (void) zonecfg_endipdent(handle); 1473 1474 if (zonecfg_setfsent(handle) != Z_OK) { 1475 zerror(zlogp, B_FALSE, "invalid configuration"); 1476 goto bad; 1477 } 1478 while (zonecfg_getfsent(handle, &fstab) == Z_OK) { 1479 /* 1480 * ZFS filesystems will not be accessible under an alternate 1481 * root, since the pool will not be known. Ignore them in this 1482 * case. 1483 */ 1484 if (mount_cmd && strcmp(fstab.zone_fs_type, MNTTYPE_ZFS) == 0) 1485 continue; 1486 1487 num_fs++; 1488 if ((tmp_ptr = realloc(fs_ptr, 1489 num_fs * sizeof (*tmp_ptr))) == NULL) { 1490 zerror(zlogp, B_TRUE, "memory allocation failed"); 1491 num_fs--; 1492 (void) zonecfg_endfsent(handle); 1493 goto bad; 1494 } 1495 fs_ptr = tmp_ptr; 1496 fsp = &fs_ptr[num_fs - 1]; 1497 (void) strlcpy(fsp->zone_fs_dir, 1498 fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir)); 1499 (void) strlcpy(fsp->zone_fs_special, fstab.zone_fs_special, 1500 sizeof (fsp->zone_fs_special)); 1501 (void) strlcpy(fsp->zone_fs_raw, fstab.zone_fs_raw, 1502 sizeof (fsp->zone_fs_raw)); 1503 (void) strlcpy(fsp->zone_fs_type, fstab.zone_fs_type, 1504 sizeof (fsp->zone_fs_type)); 1505 fsp->zone_fs_options = fstab.zone_fs_options; 1506 } 1507 (void) zonecfg_endfsent(handle); 1508 zonecfg_fini_handle(handle); 1509 handle = NULL; 1510 1511 /* 1512 * If we're mounting a zone for administration, then we need to set up 1513 * the "/a" environment inside the zone so that the commands that run 1514 * in there have access to both the running system's utilities and the 1515 * to-be-modified zone's files. 1516 */ 1517 if (mount_cmd && 1518 !build_mounted(zlogp, rootpath, sizeof (rootpath), zonepath)) 1519 goto bad; 1520 1521 qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare); 1522 for (i = 0; i < num_fs; i++) { 1523 if (mount_cmd && strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) { 1524 size_t slen = strlen(rootpath) - 2; 1525 1526 /* /dev is special and always goes at the top */ 1527 rootpath[slen] = '\0'; 1528 if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0) 1529 goto bad; 1530 rootpath[slen] = '/'; 1531 continue; 1532 } 1533 if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0) 1534 goto bad; 1535 } 1536 1537 /* 1538 * For Trusted Extensions cross-mount each lower level /export/home 1539 */ 1540 if (!mount_cmd && tsol_mounts(zlogp, zone_name, rootpath) != 0) 1541 goto bad; 1542 1543 free_fs_data(fs_ptr, num_fs); 1544 1545 /* 1546 * Everything looks fine. 1547 */ 1548 return (0); 1549 1550 bad: 1551 if (handle != NULL) 1552 zonecfg_fini_handle(handle); 1553 free_fs_data(fs_ptr, num_fs); 1554 return (-1); 1555 } 1556 1557 /* caller makes sure neither parameter is NULL */ 1558 static int 1559 addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr) 1560 { 1561 int prefixlen; 1562 1563 prefixlen = atoi(prefixstr); 1564 if (prefixlen < 0 || prefixlen > maxprefixlen) 1565 return (1); 1566 while (prefixlen > 0) { 1567 if (prefixlen >= 8) { 1568 *maskstr++ = 0xFF; 1569 prefixlen -= 8; 1570 continue; 1571 } 1572 *maskstr |= 1 << (8 - prefixlen); 1573 prefixlen--; 1574 } 1575 return (0); 1576 } 1577 1578 /* 1579 * Tear down all interfaces belonging to the given zone. This should 1580 * be called with the zone in a state other than "running", so that 1581 * interfaces can't be assigned to the zone after this returns. 1582 * 1583 * If anything goes wrong, log an error message and return an error. 1584 */ 1585 static int 1586 unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id) 1587 { 1588 struct lifnum lifn; 1589 struct lifconf lifc; 1590 struct lifreq *lifrp, lifrl; 1591 int64_t lifc_flags = LIFC_NOXMIT | LIFC_ALLZONES; 1592 int num_ifs, s, i, ret_code = 0; 1593 uint_t bufsize; 1594 char *buf = NULL; 1595 1596 if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { 1597 zerror(zlogp, B_TRUE, "could not get socket"); 1598 ret_code = -1; 1599 goto bad; 1600 } 1601 lifn.lifn_family = AF_UNSPEC; 1602 lifn.lifn_flags = (int)lifc_flags; 1603 if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) { 1604 zerror(zlogp, B_TRUE, 1605 "could not determine number of interfaces"); 1606 ret_code = -1; 1607 goto bad; 1608 } 1609 num_ifs = lifn.lifn_count; 1610 bufsize = num_ifs * sizeof (struct lifreq); 1611 if ((buf = malloc(bufsize)) == NULL) { 1612 zerror(zlogp, B_TRUE, "memory allocation failed"); 1613 ret_code = -1; 1614 goto bad; 1615 } 1616 lifc.lifc_family = AF_UNSPEC; 1617 lifc.lifc_flags = (int)lifc_flags; 1618 lifc.lifc_len = bufsize; 1619 lifc.lifc_buf = buf; 1620 if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) { 1621 zerror(zlogp, B_TRUE, "could not get configured interfaces"); 1622 ret_code = -1; 1623 goto bad; 1624 } 1625 lifrp = lifc.lifc_req; 1626 for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, lifrp++) { 1627 (void) close(s); 1628 if ((s = socket(lifrp->lifr_addr.ss_family, SOCK_DGRAM, 0)) < 1629 0) { 1630 zerror(zlogp, B_TRUE, "%s: could not get socket", 1631 lifrl.lifr_name); 1632 ret_code = -1; 1633 continue; 1634 } 1635 (void) memset(&lifrl, 0, sizeof (lifrl)); 1636 (void) strncpy(lifrl.lifr_name, lifrp->lifr_name, 1637 sizeof (lifrl.lifr_name)); 1638 if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifrl) < 0) { 1639 zerror(zlogp, B_TRUE, 1640 "%s: could not determine zone interface belongs to", 1641 lifrl.lifr_name); 1642 ret_code = -1; 1643 continue; 1644 } 1645 if (lifrl.lifr_zoneid == zone_id) { 1646 if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) { 1647 zerror(zlogp, B_TRUE, 1648 "%s: could not remove interface", 1649 lifrl.lifr_name); 1650 ret_code = -1; 1651 continue; 1652 } 1653 } 1654 } 1655 bad: 1656 if (s > 0) 1657 (void) close(s); 1658 if (buf) 1659 free(buf); 1660 return (ret_code); 1661 } 1662 1663 static union sockunion { 1664 struct sockaddr sa; 1665 struct sockaddr_in sin; 1666 struct sockaddr_dl sdl; 1667 struct sockaddr_in6 sin6; 1668 } so_dst, so_ifp; 1669 1670 static struct { 1671 struct rt_msghdr hdr; 1672 char space[512]; 1673 } rtmsg; 1674 1675 static int 1676 salen(struct sockaddr *sa) 1677 { 1678 switch (sa->sa_family) { 1679 case AF_INET: 1680 return (sizeof (struct sockaddr_in)); 1681 case AF_LINK: 1682 return (sizeof (struct sockaddr_dl)); 1683 case AF_INET6: 1684 return (sizeof (struct sockaddr_in6)); 1685 default: 1686 return (sizeof (struct sockaddr)); 1687 } 1688 } 1689 1690 #define ROUNDUP_LONG(a) \ 1691 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long)) 1692 1693 /* 1694 * Look up which zone is using a given IP address. The address in question 1695 * is expected to have been stuffed into the structure to which lifr points 1696 * via a previous SIOCGLIFADDR ioctl(). 1697 * 1698 * This is done using black router socket magic. 1699 * 1700 * Return the name of the zone on success or NULL on failure. 1701 * 1702 * This is a lot of code for a simple task; a new ioctl request to take care 1703 * of this might be a useful RFE. 1704 */ 1705 1706 static char * 1707 who_is_using(zlog_t *zlogp, struct lifreq *lifr) 1708 { 1709 static char answer[ZONENAME_MAX]; 1710 pid_t pid; 1711 int s, rlen, l, i; 1712 char *cp = rtmsg.space; 1713 struct sockaddr_dl *ifp = NULL; 1714 struct sockaddr *sa; 1715 char save_if_name[LIFNAMSIZ]; 1716 1717 answer[0] = '\0'; 1718 1719 pid = getpid(); 1720 if ((s = socket(PF_ROUTE, SOCK_RAW, 0)) < 0) { 1721 zerror(zlogp, B_TRUE, "could not get routing socket"); 1722 return (NULL); 1723 } 1724 1725 if (lifr->lifr_addr.ss_family == AF_INET) { 1726 struct sockaddr_in *sin4; 1727 1728 so_dst.sa.sa_family = AF_INET; 1729 sin4 = (struct sockaddr_in *)&lifr->lifr_addr; 1730 so_dst.sin.sin_addr = sin4->sin_addr; 1731 } else { 1732 struct sockaddr_in6 *sin6; 1733 1734 so_dst.sa.sa_family = AF_INET6; 1735 sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr; 1736 so_dst.sin6.sin6_addr = sin6->sin6_addr; 1737 } 1738 1739 so_ifp.sa.sa_family = AF_LINK; 1740 1741 (void) memset(&rtmsg, 0, sizeof (rtmsg)); 1742 rtmsg.hdr.rtm_type = RTM_GET; 1743 rtmsg.hdr.rtm_flags = RTF_UP | RTF_HOST; 1744 rtmsg.hdr.rtm_version = RTM_VERSION; 1745 rtmsg.hdr.rtm_seq = ++rts_seqno; 1746 rtmsg.hdr.rtm_addrs = RTA_IFP | RTA_DST; 1747 1748 l = ROUNDUP_LONG(salen(&so_dst.sa)); 1749 (void) memmove(cp, &(so_dst), l); 1750 cp += l; 1751 l = ROUNDUP_LONG(salen(&so_ifp.sa)); 1752 (void) memmove(cp, &(so_ifp), l); 1753 cp += l; 1754 1755 rtmsg.hdr.rtm_msglen = l = cp - (char *)&rtmsg; 1756 1757 if ((rlen = write(s, &rtmsg, l)) < 0) { 1758 zerror(zlogp, B_TRUE, "writing to routing socket"); 1759 return (NULL); 1760 } else if (rlen < (int)rtmsg.hdr.rtm_msglen) { 1761 zerror(zlogp, B_TRUE, 1762 "write to routing socket got only %d for len\n", rlen); 1763 return (NULL); 1764 } 1765 do { 1766 l = read(s, &rtmsg, sizeof (rtmsg)); 1767 } while (l > 0 && (rtmsg.hdr.rtm_seq != rts_seqno || 1768 rtmsg.hdr.rtm_pid != pid)); 1769 if (l < 0) { 1770 zerror(zlogp, B_TRUE, "reading from routing socket"); 1771 return (NULL); 1772 } 1773 1774 if (rtmsg.hdr.rtm_version != RTM_VERSION) { 1775 zerror(zlogp, B_FALSE, 1776 "routing message version %d not understood", 1777 rtmsg.hdr.rtm_version); 1778 return (NULL); 1779 } 1780 if (rtmsg.hdr.rtm_msglen != (ushort_t)l) { 1781 zerror(zlogp, B_FALSE, "message length mismatch, " 1782 "expected %d bytes, returned %d bytes", 1783 rtmsg.hdr.rtm_msglen, l); 1784 return (NULL); 1785 } 1786 if (rtmsg.hdr.rtm_errno != 0) { 1787 errno = rtmsg.hdr.rtm_errno; 1788 zerror(zlogp, B_TRUE, "RTM_GET routing socket message"); 1789 return (NULL); 1790 } 1791 if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) { 1792 zerror(zlogp, B_FALSE, "interface not found"); 1793 return (NULL); 1794 } 1795 cp = ((char *)(&rtmsg.hdr + 1)); 1796 for (i = 1; i != 0; i <<= 1) { 1797 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1798 sa = (struct sockaddr *)cp; 1799 if (i != RTA_IFP) { 1800 if ((i & rtmsg.hdr.rtm_addrs) != 0) 1801 cp += ROUNDUP_LONG(salen(sa)); 1802 continue; 1803 } 1804 if (sa->sa_family == AF_LINK && 1805 ((struct sockaddr_dl *)sa)->sdl_nlen != 0) 1806 ifp = (struct sockaddr_dl *)sa; 1807 break; 1808 } 1809 if (ifp == NULL) { 1810 zerror(zlogp, B_FALSE, "interface could not be determined"); 1811 return (NULL); 1812 } 1813 1814 /* 1815 * We need to set the I/F name to what we got above, then do the 1816 * appropriate ioctl to get its zone name. But lifr->lifr_name is 1817 * used by the calling function to do a REMOVEIF, so if we leave the 1818 * "good" zone's I/F name in place, *that* I/F will be removed instead 1819 * of the bad one. So we save the old (bad) I/F name before over- 1820 * writing it and doing the ioctl, then restore it after the ioctl. 1821 */ 1822 (void) strlcpy(save_if_name, lifr->lifr_name, sizeof (save_if_name)); 1823 (void) strncpy(lifr->lifr_name, ifp->sdl_data, ifp->sdl_nlen); 1824 lifr->lifr_name[ifp->sdl_nlen] = '\0'; 1825 i = ioctl(s, SIOCGLIFZONE, lifr); 1826 (void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name)); 1827 if (i < 0) { 1828 zerror(zlogp, B_TRUE, 1829 "%s: could not determine the zone interface belongs to", 1830 lifr->lifr_name); 1831 return (NULL); 1832 } 1833 if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0) 1834 (void) snprintf(answer, sizeof (answer), "%d", 1835 lifr->lifr_zoneid); 1836 1837 if (strlen(answer) > 0) 1838 return (answer); 1839 return (NULL); 1840 } 1841 1842 typedef struct mcast_rtmsg_s { 1843 struct rt_msghdr m_rtm; 1844 union { 1845 struct { 1846 struct sockaddr_in m_dst; 1847 struct sockaddr_in m_gw; 1848 struct sockaddr_in m_netmask; 1849 } m_v4; 1850 struct { 1851 struct sockaddr_in6 m_dst; 1852 struct sockaddr_in6 m_gw; 1853 struct sockaddr_in6 m_netmask; 1854 } m_v6; 1855 } m_u; 1856 } mcast_rtmsg_t; 1857 #define m_dst4 m_u.m_v4.m_dst 1858 #define m_dst6 m_u.m_v6.m_dst 1859 #define m_gw4 m_u.m_v4.m_gw 1860 #define m_gw6 m_u.m_v6.m_gw 1861 #define m_netmask4 m_u.m_v4.m_netmask 1862 #define m_netmask6 m_u.m_v6.m_netmask 1863 1864 /* 1865 * Configures a single interface: a new virtual interface is added, based on 1866 * the physical interface nwiftabptr->zone_nwif_physical, with the address 1867 * specified in nwiftabptr->zone_nwif_address, for zone zone_id. Note that 1868 * the "address" can be an IPv6 address (with a /prefixlength required), an 1869 * IPv4 address (with a /prefixlength optional), or a name; for the latter, 1870 * an IPv4 name-to-address resolution will be attempted. 1871 * 1872 * A default interface route for multicast is created on the first IPv4 and 1873 * IPv6 interfaces (that have the IFF_MULTICAST flag set), respectively. 1874 * This should really be done in the init scripts if we ever allow zones to 1875 * modify the routing tables. 1876 * 1877 * If anything goes wrong, we log an detailed error message, attempt to tear 1878 * down whatever we set up and return an error. 1879 */ 1880 static int 1881 configure_one_interface(zlog_t *zlogp, zoneid_t zone_id, 1882 struct zone_nwiftab *nwiftabptr, boolean_t *mcast_rt_v4_setp, 1883 boolean_t *mcast_rt_v6_setp) 1884 { 1885 struct lifreq lifr; 1886 struct sockaddr_in netmask4; 1887 struct sockaddr_in6 netmask6; 1888 struct in_addr in4; 1889 struct in6_addr in6; 1890 sa_family_t af; 1891 char *slashp = strchr(nwiftabptr->zone_nwif_address, '/'); 1892 mcast_rtmsg_t mcast_rtmsg; 1893 int s; 1894 int rs; 1895 int rlen; 1896 boolean_t got_netmask = B_FALSE; 1897 char addrstr4[INET_ADDRSTRLEN]; 1898 int res; 1899 1900 res = zonecfg_valid_net_address(nwiftabptr->zone_nwif_address, &lifr); 1901 if (res != Z_OK) { 1902 zerror(zlogp, B_FALSE, "%s: %s", zonecfg_strerror(res), 1903 nwiftabptr->zone_nwif_address); 1904 return (-1); 1905 } 1906 af = lifr.lifr_addr.ss_family; 1907 if (af == AF_INET) 1908 in4 = ((struct sockaddr_in *)(&lifr.lifr_addr))->sin_addr; 1909 else 1910 in6 = ((struct sockaddr_in6 *)(&lifr.lifr_addr))->sin6_addr; 1911 1912 if ((s = socket(af, SOCK_DGRAM, 0)) < 0) { 1913 zerror(zlogp, B_TRUE, "could not get socket"); 1914 return (-1); 1915 } 1916 1917 (void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical, 1918 sizeof (lifr.lifr_name)); 1919 if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) { 1920 zerror(zlogp, B_TRUE, "%s: could not add interface", 1921 lifr.lifr_name); 1922 (void) close(s); 1923 return (-1); 1924 } 1925 1926 if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) { 1927 zerror(zlogp, B_TRUE, 1928 "%s: could not set IP address to %s", 1929 lifr.lifr_name, nwiftabptr->zone_nwif_address); 1930 goto bad; 1931 } 1932 1933 /* Preserve literal IPv4 address for later potential printing. */ 1934 if (af == AF_INET) 1935 (void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN); 1936 1937 lifr.lifr_zoneid = zone_id; 1938 if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) { 1939 zerror(zlogp, B_TRUE, "%s: could not place interface into zone", 1940 lifr.lifr_name); 1941 goto bad; 1942 } 1943 1944 if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) { 1945 got_netmask = B_TRUE; /* default setting will be correct */ 1946 } else { 1947 if (af == AF_INET) { 1948 /* 1949 * The IPv4 netmask can be determined either 1950 * directly if a prefix length was supplied with 1951 * the address or via the netmasks database. Not 1952 * being able to determine it is a common failure, 1953 * but it often is not fatal to operation of the 1954 * interface. In that case, a warning will be 1955 * printed after the rest of the interface's 1956 * parameters have been configured. 1957 */ 1958 (void) memset(&netmask4, 0, sizeof (netmask4)); 1959 if (slashp != NULL) { 1960 if (addr2netmask(slashp + 1, V4_ADDR_LEN, 1961 (uchar_t *)&netmask4.sin_addr) != 0) { 1962 *slashp = '/'; 1963 zerror(zlogp, B_FALSE, 1964 "%s: invalid prefix length in %s", 1965 lifr.lifr_name, 1966 nwiftabptr->zone_nwif_address); 1967 goto bad; 1968 } 1969 got_netmask = B_TRUE; 1970 } else if (getnetmaskbyaddr(in4, 1971 &netmask4.sin_addr) == 0) { 1972 got_netmask = B_TRUE; 1973 } 1974 if (got_netmask) { 1975 netmask4.sin_family = af; 1976 (void) memcpy(&lifr.lifr_addr, &netmask4, 1977 sizeof (netmask4)); 1978 } 1979 } else { 1980 (void) memset(&netmask6, 0, sizeof (netmask6)); 1981 if (addr2netmask(slashp + 1, V6_ADDR_LEN, 1982 (uchar_t *)&netmask6.sin6_addr) != 0) { 1983 *slashp = '/'; 1984 zerror(zlogp, B_FALSE, 1985 "%s: invalid prefix length in %s", 1986 lifr.lifr_name, 1987 nwiftabptr->zone_nwif_address); 1988 goto bad; 1989 } 1990 got_netmask = B_TRUE; 1991 netmask6.sin6_family = af; 1992 (void) memcpy(&lifr.lifr_addr, &netmask6, 1993 sizeof (netmask6)); 1994 } 1995 if (got_netmask && 1996 ioctl(s, SIOCSLIFNETMASK, (caddr_t)&lifr) < 0) { 1997 zerror(zlogp, B_TRUE, "%s: could not set netmask", 1998 lifr.lifr_name); 1999 goto bad; 2000 } 2001 2002 /* 2003 * This doesn't set the broadcast address at all. Rather, it 2004 * gets, then sets the interface's address, relying on the fact 2005 * that resetting the address will reset the broadcast address. 2006 */ 2007 if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { 2008 zerror(zlogp, B_TRUE, "%s: could not get address", 2009 lifr.lifr_name); 2010 goto bad; 2011 } 2012 if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) { 2013 zerror(zlogp, B_TRUE, 2014 "%s: could not reset broadcast address", 2015 lifr.lifr_name); 2016 goto bad; 2017 } 2018 } 2019 2020 if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { 2021 zerror(zlogp, B_TRUE, "%s: could not get flags", 2022 lifr.lifr_name); 2023 goto bad; 2024 } 2025 lifr.lifr_flags |= IFF_UP; 2026 if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { 2027 int save_errno = errno; 2028 char *zone_using; 2029 2030 /* 2031 * If we failed with something other than EADDRNOTAVAIL, 2032 * then skip to the end. Otherwise, look up our address, 2033 * then call a function to determine which zone is already 2034 * using that address. 2035 */ 2036 if (errno != EADDRNOTAVAIL) { 2037 zerror(zlogp, B_TRUE, 2038 "%s: could not bring interface up", lifr.lifr_name); 2039 goto bad; 2040 } 2041 if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { 2042 zerror(zlogp, B_TRUE, "%s: could not get address", 2043 lifr.lifr_name); 2044 goto bad; 2045 } 2046 zone_using = who_is_using(zlogp, &lifr); 2047 errno = save_errno; 2048 if (zone_using == NULL) 2049 zerror(zlogp, B_TRUE, 2050 "%s: could not bring interface up", lifr.lifr_name); 2051 else 2052 zerror(zlogp, B_TRUE, "%s: could not bring interface " 2053 "up: address in use by zone '%s'", lifr.lifr_name, 2054 zone_using); 2055 goto bad; 2056 } 2057 if ((lifr.lifr_flags & IFF_MULTICAST) && ((af == AF_INET && 2058 mcast_rt_v4_setp != NULL && *mcast_rt_v4_setp == B_FALSE) || 2059 (af == AF_INET6 && 2060 mcast_rt_v6_setp != NULL && *mcast_rt_v6_setp == B_FALSE))) { 2061 rs = socket(PF_ROUTE, SOCK_RAW, 0); 2062 if (rs < 0) { 2063 zerror(zlogp, B_TRUE, "%s: could not create " 2064 "routing socket", lifr.lifr_name); 2065 goto bad; 2066 } 2067 (void) shutdown(rs, 0); 2068 (void) memset((void *)&mcast_rtmsg, 0, sizeof (mcast_rtmsg_t)); 2069 mcast_rtmsg.m_rtm.rtm_msglen = sizeof (struct rt_msghdr) + 2070 3 * (af == AF_INET ? sizeof (struct sockaddr_in) : 2071 sizeof (struct sockaddr_in6)); 2072 mcast_rtmsg.m_rtm.rtm_version = RTM_VERSION; 2073 mcast_rtmsg.m_rtm.rtm_type = RTM_ADD; 2074 mcast_rtmsg.m_rtm.rtm_flags = RTF_UP; 2075 mcast_rtmsg.m_rtm.rtm_addrs = 2076 RTA_DST | RTA_GATEWAY | RTA_NETMASK; 2077 mcast_rtmsg.m_rtm.rtm_seq = ++rts_seqno; 2078 if (af == AF_INET) { 2079 mcast_rtmsg.m_dst4.sin_family = AF_INET; 2080 mcast_rtmsg.m_dst4.sin_addr.s_addr = 2081 htonl(INADDR_UNSPEC_GROUP); 2082 mcast_rtmsg.m_gw4.sin_family = AF_INET; 2083 mcast_rtmsg.m_gw4.sin_addr = in4; 2084 mcast_rtmsg.m_netmask4.sin_family = AF_INET; 2085 mcast_rtmsg.m_netmask4.sin_addr.s_addr = 2086 htonl(IN_CLASSD_NET); 2087 } else { 2088 mcast_rtmsg.m_dst6.sin6_family = AF_INET6; 2089 mcast_rtmsg.m_dst6.sin6_addr.s6_addr[0] = 0xffU; 2090 mcast_rtmsg.m_gw6.sin6_family = AF_INET6; 2091 mcast_rtmsg.m_gw6.sin6_addr = in6; 2092 mcast_rtmsg.m_netmask6.sin6_family = AF_INET6; 2093 mcast_rtmsg.m_netmask6.sin6_addr.s6_addr[0] = 0xffU; 2094 } 2095 rlen = write(rs, (char *)&mcast_rtmsg, 2096 mcast_rtmsg.m_rtm.rtm_msglen); 2097 if (rlen < mcast_rtmsg.m_rtm.rtm_msglen) { 2098 if (rlen < 0) { 2099 zerror(zlogp, B_TRUE, "%s: could not set " 2100 "default interface for multicast", 2101 lifr.lifr_name); 2102 } else { 2103 zerror(zlogp, B_FALSE, "%s: write to routing " 2104 "socket returned %d", lifr.lifr_name, rlen); 2105 } 2106 (void) close(rs); 2107 goto bad; 2108 } 2109 if (af == AF_INET) { 2110 *mcast_rt_v4_setp = B_TRUE; 2111 } else { 2112 *mcast_rt_v6_setp = B_TRUE; 2113 } 2114 (void) close(rs); 2115 } 2116 2117 if (!got_netmask) { 2118 /* 2119 * A common, but often non-fatal problem, is that the system 2120 * cannot find the netmask for an interface address. This is 2121 * often caused by it being only in /etc/inet/netmasks, but 2122 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not 2123 * in that. This doesn't show up at boot because the netmask 2124 * is obtained from /etc/inet/netmasks when no network 2125 * interfaces are up, but isn't consulted when NIS/NIS+ is 2126 * available. We warn the user here that something like this 2127 * has happened and we're just running with a default and 2128 * possible incorrect netmask. 2129 */ 2130 char buffer[INET6_ADDRSTRLEN]; 2131 void *addr; 2132 2133 if (af == AF_INET) 2134 addr = &((struct sockaddr_in *) 2135 (&lifr.lifr_addr))->sin_addr; 2136 else 2137 addr = &((struct sockaddr_in6 *) 2138 (&lifr.lifr_addr))->sin6_addr; 2139 2140 /* Find out what netmask interface is going to be using */ 2141 if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 || 2142 inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL) 2143 goto bad; 2144 zerror(zlogp, B_FALSE, 2145 "WARNING: %s: no matching subnet found in netmasks(4) for " 2146 "%s; using default of %s.", 2147 lifr.lifr_name, addrstr4, buffer); 2148 } 2149 2150 (void) close(s); 2151 return (Z_OK); 2152 bad: 2153 (void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr); 2154 (void) close(s); 2155 return (-1); 2156 } 2157 2158 /* 2159 * Sets up network interfaces based on information from the zone configuration. 2160 * An IPv4 loopback interface is set up "for free", modeling the global system. 2161 * If any of the configuration interfaces were IPv6, then an IPv6 loopback 2162 * address is set up as well. 2163 * 2164 * If anything goes wrong, we log a general error message, attempt to tear down 2165 * whatever we set up, and return an error. 2166 */ 2167 static int 2168 configure_network_interfaces(zlog_t *zlogp) 2169 { 2170 zone_dochandle_t handle; 2171 struct zone_nwiftab nwiftab, loopback_iftab; 2172 boolean_t saw_v6 = B_FALSE; 2173 boolean_t mcast_rt_v4_set = B_FALSE; 2174 boolean_t mcast_rt_v6_set = B_FALSE; 2175 zoneid_t zoneid; 2176 2177 if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) { 2178 zerror(zlogp, B_TRUE, "unable to get zoneid"); 2179 return (-1); 2180 } 2181 2182 if ((handle = zonecfg_init_handle()) == NULL) { 2183 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2184 return (-1); 2185 } 2186 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2187 zerror(zlogp, B_FALSE, "invalid configuration"); 2188 zonecfg_fini_handle(handle); 2189 return (-1); 2190 } 2191 if (zonecfg_setnwifent(handle) == Z_OK) { 2192 for (;;) { 2193 struct in6_addr in6; 2194 2195 if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK) 2196 break; 2197 if (configure_one_interface(zlogp, zoneid, 2198 &nwiftab, &mcast_rt_v4_set, &mcast_rt_v6_set) != 2199 Z_OK) { 2200 (void) zonecfg_endnwifent(handle); 2201 zonecfg_fini_handle(handle); 2202 return (-1); 2203 } 2204 if (inet_pton(AF_INET6, nwiftab.zone_nwif_address, 2205 &in6) == 1) 2206 saw_v6 = B_TRUE; 2207 } 2208 (void) zonecfg_endnwifent(handle); 2209 } 2210 zonecfg_fini_handle(handle); 2211 (void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0", 2212 sizeof (loopback_iftab.zone_nwif_physical)); 2213 (void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1", 2214 sizeof (loopback_iftab.zone_nwif_address)); 2215 if (configure_one_interface(zlogp, zoneid, &loopback_iftab, NULL, NULL) 2216 != Z_OK) { 2217 return (-1); 2218 } 2219 if (saw_v6) { 2220 (void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128", 2221 sizeof (loopback_iftab.zone_nwif_address)); 2222 if (configure_one_interface(zlogp, zoneid, 2223 &loopback_iftab, NULL, NULL) != Z_OK) { 2224 return (-1); 2225 } 2226 } 2227 return (0); 2228 } 2229 2230 static int 2231 tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid, 2232 const struct sockaddr_storage *local, const struct sockaddr_storage *remote) 2233 { 2234 int fd; 2235 struct strioctl ioc; 2236 tcp_ioc_abort_conn_t conn; 2237 int error; 2238 2239 conn.ac_local = *local; 2240 conn.ac_remote = *remote; 2241 conn.ac_start = TCPS_SYN_SENT; 2242 conn.ac_end = TCPS_TIME_WAIT; 2243 conn.ac_zoneid = zoneid; 2244 2245 ioc.ic_cmd = TCP_IOC_ABORT_CONN; 2246 ioc.ic_timout = -1; /* infinite timeout */ 2247 ioc.ic_len = sizeof (conn); 2248 ioc.ic_dp = (char *)&conn; 2249 2250 if ((fd = open("/dev/tcp", O_RDONLY)) < 0) { 2251 zerror(zlogp, B_TRUE, "unable to open %s", "/dev/tcp"); 2252 return (-1); 2253 } 2254 2255 error = ioctl(fd, I_STR, &ioc); 2256 (void) close(fd); 2257 if (error == 0 || errno == ENOENT) /* ENOENT is not an error */ 2258 return (0); 2259 return (-1); 2260 } 2261 2262 static int 2263 tcp_abort_connections(zlog_t *zlogp, zoneid_t zoneid) 2264 { 2265 struct sockaddr_storage l, r; 2266 struct sockaddr_in *local, *remote; 2267 struct sockaddr_in6 *local6, *remote6; 2268 int error; 2269 2270 /* 2271 * Abort IPv4 connections. 2272 */ 2273 bzero(&l, sizeof (*local)); 2274 local = (struct sockaddr_in *)&l; 2275 local->sin_family = AF_INET; 2276 local->sin_addr.s_addr = INADDR_ANY; 2277 local->sin_port = 0; 2278 2279 bzero(&r, sizeof (*remote)); 2280 remote = (struct sockaddr_in *)&r; 2281 remote->sin_family = AF_INET; 2282 remote->sin_addr.s_addr = INADDR_ANY; 2283 remote->sin_port = 0; 2284 2285 if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0) 2286 return (error); 2287 2288 /* 2289 * Abort IPv6 connections. 2290 */ 2291 bzero(&l, sizeof (*local6)); 2292 local6 = (struct sockaddr_in6 *)&l; 2293 local6->sin6_family = AF_INET6; 2294 local6->sin6_port = 0; 2295 local6->sin6_addr = in6addr_any; 2296 2297 bzero(&r, sizeof (*remote6)); 2298 remote6 = (struct sockaddr_in6 *)&r; 2299 remote6->sin6_family = AF_INET6; 2300 remote6->sin6_port = 0; 2301 remote6->sin6_addr = in6addr_any; 2302 2303 if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0) 2304 return (error); 2305 return (0); 2306 } 2307 2308 static int 2309 devfsadm_call(zlog_t *zlogp, const char *arg) 2310 { 2311 char *argv[4]; 2312 int status; 2313 2314 argv[0] = DEVFSADM; 2315 argv[1] = (char *)arg; 2316 argv[2] = zone_name; 2317 argv[3] = NULL; 2318 status = forkexec(zlogp, DEVFSADM_PATH, argv); 2319 if (status == 0 || status == -1) 2320 return (status); 2321 zerror(zlogp, B_FALSE, "%s call (%s %s %s) unexpectedly returned %d", 2322 DEVFSADM, DEVFSADM_PATH, arg, zone_name, status); 2323 return (-1); 2324 } 2325 2326 static int 2327 devfsadm_register(zlog_t *zlogp) 2328 { 2329 /* 2330 * Ready the zone's devices. 2331 */ 2332 return (devfsadm_call(zlogp, "-z")); 2333 } 2334 2335 static int 2336 devfsadm_unregister(zlog_t *zlogp) 2337 { 2338 return (devfsadm_call(zlogp, "-Z")); 2339 } 2340 2341 static int 2342 get_privset(zlog_t *zlogp, priv_set_t *privs, boolean_t mount_cmd) 2343 { 2344 int error = -1; 2345 zone_dochandle_t handle; 2346 char *privname = NULL; 2347 2348 if (mount_cmd) { 2349 if (zonecfg_default_privset(privs) == Z_OK) 2350 return (0); 2351 zerror(zlogp, B_FALSE, 2352 "failed to determine the zone's default privilege set"); 2353 return (-1); 2354 } 2355 2356 if ((handle = zonecfg_init_handle()) == NULL) { 2357 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2358 return (-1); 2359 } 2360 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2361 zerror(zlogp, B_FALSE, "invalid configuration"); 2362 zonecfg_fini_handle(handle); 2363 return (-1); 2364 } 2365 2366 switch (zonecfg_get_privset(handle, privs, &privname)) { 2367 case Z_OK: 2368 error = 0; 2369 break; 2370 case Z_PRIV_PROHIBITED: 2371 zerror(zlogp, B_FALSE, "privilege \"%s\" is not permitted " 2372 "within the zone's privilege set", privname); 2373 break; 2374 case Z_PRIV_REQUIRED: 2375 zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing " 2376 "from the zone's privilege set", privname); 2377 break; 2378 case Z_PRIV_UNKNOWN: 2379 zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified " 2380 "in the zone's privilege set", privname); 2381 break; 2382 default: 2383 zerror(zlogp, B_FALSE, "failed to determine the zone's " 2384 "privilege set"); 2385 break; 2386 } 2387 2388 free(privname); 2389 zonecfg_fini_handle(handle); 2390 return (error); 2391 } 2392 2393 static int 2394 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) 2395 { 2396 nvlist_t *nvl = NULL; 2397 char *nvl_packed = NULL; 2398 size_t nvl_size = 0; 2399 nvlist_t **nvlv = NULL; 2400 int rctlcount = 0; 2401 int error = -1; 2402 zone_dochandle_t handle; 2403 struct zone_rctltab rctltab; 2404 rctlblk_t *rctlblk = NULL; 2405 2406 *bufp = NULL; 2407 *bufsizep = 0; 2408 2409 if ((handle = zonecfg_init_handle()) == NULL) { 2410 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2411 return (-1); 2412 } 2413 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2414 zerror(zlogp, B_FALSE, "invalid configuration"); 2415 zonecfg_fini_handle(handle); 2416 return (-1); 2417 } 2418 2419 rctltab.zone_rctl_valptr = NULL; 2420 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) { 2421 zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc"); 2422 goto out; 2423 } 2424 2425 if (zonecfg_setrctlent(handle) != Z_OK) { 2426 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent"); 2427 goto out; 2428 } 2429 2430 if ((rctlblk = malloc(rctlblk_size())) == NULL) { 2431 zerror(zlogp, B_TRUE, "memory allocation failed"); 2432 goto out; 2433 } 2434 while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) { 2435 struct zone_rctlvaltab *rctlval; 2436 uint_t i, count; 2437 const char *name = rctltab.zone_rctl_name; 2438 2439 /* zoneadm should have already warned about unknown rctls. */ 2440 if (!zonecfg_is_rctl(name)) { 2441 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 2442 rctltab.zone_rctl_valptr = NULL; 2443 continue; 2444 } 2445 count = 0; 2446 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL; 2447 rctlval = rctlval->zone_rctlval_next) { 2448 count++; 2449 } 2450 if (count == 0) { /* ignore */ 2451 continue; /* Nothing to free */ 2452 } 2453 if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL) 2454 goto out; 2455 i = 0; 2456 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL; 2457 rctlval = rctlval->zone_rctlval_next, i++) { 2458 if (nvlist_alloc(&nvlv[i], NV_UNIQUE_NAME, 0) != 0) { 2459 zerror(zlogp, B_TRUE, "%s failed", 2460 "nvlist_alloc"); 2461 goto out; 2462 } 2463 if (zonecfg_construct_rctlblk(rctlval, rctlblk) 2464 != Z_OK) { 2465 zerror(zlogp, B_FALSE, "invalid rctl value: " 2466 "(priv=%s,limit=%s,action=%s)", 2467 rctlval->zone_rctlval_priv, 2468 rctlval->zone_rctlval_limit, 2469 rctlval->zone_rctlval_action); 2470 goto out; 2471 } 2472 if (!zonecfg_valid_rctl(name, rctlblk)) { 2473 zerror(zlogp, B_FALSE, 2474 "(priv=%s,limit=%s,action=%s) is not a " 2475 "valid value for rctl '%s'", 2476 rctlval->zone_rctlval_priv, 2477 rctlval->zone_rctlval_limit, 2478 rctlval->zone_rctlval_action, 2479 name); 2480 goto out; 2481 } 2482 if (nvlist_add_uint64(nvlv[i], "privilege", 2483 rctlblk_get_privilege(rctlblk)) != 0) { 2484 zerror(zlogp, B_FALSE, "%s failed", 2485 "nvlist_add_uint64"); 2486 goto out; 2487 } 2488 if (nvlist_add_uint64(nvlv[i], "limit", 2489 rctlblk_get_value(rctlblk)) != 0) { 2490 zerror(zlogp, B_FALSE, "%s failed", 2491 "nvlist_add_uint64"); 2492 goto out; 2493 } 2494 if (nvlist_add_uint64(nvlv[i], "action", 2495 (uint_t)rctlblk_get_local_action(rctlblk, NULL)) 2496 != 0) { 2497 zerror(zlogp, B_FALSE, "%s failed", 2498 "nvlist_add_uint64"); 2499 goto out; 2500 } 2501 } 2502 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 2503 rctltab.zone_rctl_valptr = NULL; 2504 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count) 2505 != 0) { 2506 zerror(zlogp, B_FALSE, "%s failed", 2507 "nvlist_add_nvlist_array"); 2508 goto out; 2509 } 2510 for (i = 0; i < count; i++) 2511 nvlist_free(nvlv[i]); 2512 free(nvlv); 2513 nvlv = NULL; 2514 rctlcount++; 2515 } 2516 (void) zonecfg_endrctlent(handle); 2517 2518 if (rctlcount == 0) { 2519 error = 0; 2520 goto out; 2521 } 2522 if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0) 2523 != 0) { 2524 zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack"); 2525 goto out; 2526 } 2527 2528 error = 0; 2529 *bufp = nvl_packed; 2530 *bufsizep = nvl_size; 2531 2532 out: 2533 free(rctlblk); 2534 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 2535 if (error && nvl_packed != NULL) 2536 free(nvl_packed); 2537 if (nvl != NULL) 2538 nvlist_free(nvl); 2539 if (nvlv != NULL) 2540 free(nvlv); 2541 if (handle != NULL) 2542 zonecfg_fini_handle(handle); 2543 return (error); 2544 } 2545 2546 static int 2547 get_zone_pool(zlog_t *zlogp, char *poolbuf, size_t bufsz) 2548 { 2549 zone_dochandle_t handle; 2550 int error; 2551 2552 if ((handle = zonecfg_init_handle()) == NULL) { 2553 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2554 return (Z_NOMEM); 2555 } 2556 error = zonecfg_get_snapshot_handle(zone_name, handle); 2557 if (error != Z_OK) { 2558 zerror(zlogp, B_FALSE, "invalid configuration"); 2559 zonecfg_fini_handle(handle); 2560 return (error); 2561 } 2562 error = zonecfg_get_pool(handle, poolbuf, bufsz); 2563 zonecfg_fini_handle(handle); 2564 return (error); 2565 } 2566 2567 static int 2568 get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep) 2569 { 2570 zone_dochandle_t handle; 2571 struct zone_dstab dstab; 2572 size_t total, offset, len; 2573 int error = -1; 2574 char *str; 2575 2576 *bufp = NULL; 2577 *bufsizep = 0; 2578 2579 if ((handle = zonecfg_init_handle()) == NULL) { 2580 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2581 return (-1); 2582 } 2583 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2584 zerror(zlogp, B_FALSE, "invalid configuration"); 2585 zonecfg_fini_handle(handle); 2586 return (-1); 2587 } 2588 2589 if (zonecfg_setdsent(handle) != Z_OK) { 2590 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent"); 2591 goto out; 2592 } 2593 2594 total = 0; 2595 while (zonecfg_getdsent(handle, &dstab) == Z_OK) 2596 total += strlen(dstab.zone_dataset_name) + 1; 2597 (void) zonecfg_enddsent(handle); 2598 2599 if (total == 0) { 2600 error = 0; 2601 goto out; 2602 } 2603 2604 if ((str = malloc(total)) == NULL) { 2605 zerror(zlogp, B_TRUE, "memory allocation failed"); 2606 goto out; 2607 } 2608 2609 if (zonecfg_setdsent(handle) != Z_OK) { 2610 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent"); 2611 goto out; 2612 } 2613 offset = 0; 2614 while (zonecfg_getdsent(handle, &dstab) == Z_OK) { 2615 len = strlen(dstab.zone_dataset_name); 2616 (void) strlcpy(str + offset, dstab.zone_dataset_name, 2617 sizeof (dstab.zone_dataset_name) - offset); 2618 offset += len; 2619 if (offset != total - 1) 2620 str[offset++] = ','; 2621 } 2622 (void) zonecfg_enddsent(handle); 2623 2624 error = 0; 2625 *bufp = str; 2626 *bufsizep = total; 2627 2628 out: 2629 if (error != 0 && str != NULL) 2630 free(str); 2631 if (handle != NULL) 2632 zonecfg_fini_handle(handle); 2633 2634 return (error); 2635 } 2636 2637 static int 2638 validate_datasets(zlog_t *zlogp) 2639 { 2640 zone_dochandle_t handle; 2641 struct zone_dstab dstab; 2642 zfs_handle_t *zhp; 2643 libzfs_handle_t *hdl; 2644 2645 if ((handle = zonecfg_init_handle()) == NULL) { 2646 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2647 return (-1); 2648 } 2649 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2650 zerror(zlogp, B_FALSE, "invalid configuration"); 2651 zonecfg_fini_handle(handle); 2652 return (-1); 2653 } 2654 2655 if (zonecfg_setdsent(handle) != Z_OK) { 2656 zerror(zlogp, B_FALSE, "invalid configuration"); 2657 zonecfg_fini_handle(handle); 2658 return (-1); 2659 } 2660 2661 if ((hdl = libzfs_init()) == NULL) { 2662 zerror(zlogp, B_FALSE, "opening ZFS library"); 2663 zonecfg_fini_handle(handle); 2664 return (-1); 2665 } 2666 2667 while (zonecfg_getdsent(handle, &dstab) == Z_OK) { 2668 2669 if ((zhp = zfs_open(hdl, dstab.zone_dataset_name, 2670 ZFS_TYPE_FILESYSTEM)) == NULL) { 2671 zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'", 2672 dstab.zone_dataset_name); 2673 zonecfg_fini_handle(handle); 2674 libzfs_fini(hdl); 2675 return (-1); 2676 } 2677 2678 /* 2679 * Automatically set the 'zoned' property. We check the value 2680 * first because we'll get EPERM if it is already set. 2681 */ 2682 if (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) && 2683 zfs_prop_set(zhp, ZFS_PROP_ZONED, "on") != 0) { 2684 zerror(zlogp, B_FALSE, "cannot set 'zoned' " 2685 "property for ZFS dataset '%s'\n", 2686 dstab.zone_dataset_name); 2687 zonecfg_fini_handle(handle); 2688 zfs_close(zhp); 2689 libzfs_fini(hdl); 2690 return (-1); 2691 } 2692 2693 zfs_close(zhp); 2694 } 2695 (void) zonecfg_enddsent(handle); 2696 2697 zonecfg_fini_handle(handle); 2698 libzfs_fini(hdl); 2699 2700 return (0); 2701 } 2702 2703 static int 2704 bind_to_pool(zlog_t *zlogp, zoneid_t zoneid) 2705 { 2706 pool_conf_t *poolconf; 2707 pool_t *pool; 2708 char poolname[MAXPATHLEN]; 2709 int status; 2710 int error; 2711 2712 /* 2713 * Find the pool mentioned in the zone configuration, and bind to it. 2714 */ 2715 error = get_zone_pool(zlogp, poolname, sizeof (poolname)); 2716 if (error == Z_NO_ENTRY || (error == Z_OK && strlen(poolname) == 0)) { 2717 /* 2718 * The property is not set on the zone, so the pool 2719 * should be bound to the default pool. But that's 2720 * already done by the kernel, so we can just return. 2721 */ 2722 return (0); 2723 } 2724 if (error != Z_OK) { 2725 /* 2726 * Not an error, even though it shouldn't be happening. 2727 */ 2728 zerror(zlogp, B_FALSE, 2729 "WARNING: unable to retrieve default pool."); 2730 return (0); 2731 } 2732 /* 2733 * Don't do anything if pools aren't enabled. 2734 */ 2735 if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED) { 2736 zerror(zlogp, B_FALSE, "WARNING: pools facility not active; " 2737 "zone will not be bound to pool '%s'.", poolname); 2738 return (0); 2739 } 2740 /* 2741 * Try to provide a sane error message if the requested pool doesn't 2742 * exist. 2743 */ 2744 if ((poolconf = pool_conf_alloc()) == NULL) { 2745 zerror(zlogp, B_FALSE, "%s failed", "pool_conf_alloc"); 2746 return (-1); 2747 } 2748 if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) != 2749 PO_SUCCESS) { 2750 zerror(zlogp, B_FALSE, "%s failed", "pool_conf_open"); 2751 pool_conf_free(poolconf); 2752 return (-1); 2753 } 2754 pool = pool_get_pool(poolconf, poolname); 2755 (void) pool_conf_close(poolconf); 2756 pool_conf_free(poolconf); 2757 if (pool == NULL) { 2758 zerror(zlogp, B_FALSE, "WARNING: pool '%s' not found; " 2759 "using default pool.", poolname); 2760 return (0); 2761 } 2762 /* 2763 * Bind the zone to the pool. 2764 */ 2765 if (pool_set_binding(poolname, P_ZONEID, zoneid) != PO_SUCCESS) { 2766 zerror(zlogp, B_FALSE, "WARNING: unable to bind to pool '%s'; " 2767 "using default pool.", poolname); 2768 } 2769 return (0); 2770 } 2771 2772 /* 2773 * Mount lower level home directories into/from current zone 2774 * Share exported directories specified in dfstab for zone 2775 */ 2776 static int 2777 tsol_mounts(zlog_t *zlogp, char *zone_name, char *rootpath) 2778 { 2779 zoneid_t *zids = NULL; 2780 priv_set_t *zid_privs; 2781 const priv_impl_info_t *ip = NULL; 2782 uint_t nzents_saved; 2783 uint_t nzents; 2784 int i; 2785 char readonly[] = "ro"; 2786 struct zone_fstab lower_fstab; 2787 char *argv[4]; 2788 2789 if (!is_system_labeled()) 2790 return (0); 2791 2792 if (zid_label == NULL) { 2793 zid_label = m_label_alloc(MAC_LABEL); 2794 if (zid_label == NULL) 2795 return (-1); 2796 } 2797 2798 /* Make sure our zone has an /export/home dir */ 2799 (void) make_one_dir(zlogp, rootpath, "/export/home", 2800 DEFAULT_DIR_MODE); 2801 2802 lower_fstab.zone_fs_raw[0] = '\0'; 2803 (void) strlcpy(lower_fstab.zone_fs_type, MNTTYPE_LOFS, 2804 sizeof (lower_fstab.zone_fs_type)); 2805 lower_fstab.zone_fs_options = NULL; 2806 (void) zonecfg_add_fs_option(&lower_fstab, readonly); 2807 2808 /* 2809 * Get the list of zones from the kernel 2810 */ 2811 if (zone_list(NULL, &nzents) != 0) { 2812 zerror(zlogp, B_TRUE, "unable to list zones"); 2813 zonecfg_free_fs_option_list(lower_fstab.zone_fs_options); 2814 return (-1); 2815 } 2816 again: 2817 if (nzents == 0) { 2818 zonecfg_free_fs_option_list(lower_fstab.zone_fs_options); 2819 return (-1); 2820 } 2821 2822 zids = malloc(nzents * sizeof (zoneid_t)); 2823 if (zids == NULL) { 2824 zerror(zlogp, B_TRUE, "memory allocation failed"); 2825 return (-1); 2826 } 2827 nzents_saved = nzents; 2828 2829 if (zone_list(zids, &nzents) != 0) { 2830 zerror(zlogp, B_TRUE, "unable to list zones"); 2831 zonecfg_free_fs_option_list(lower_fstab.zone_fs_options); 2832 free(zids); 2833 return (-1); 2834 } 2835 if (nzents != nzents_saved) { 2836 /* list changed, try again */ 2837 free(zids); 2838 goto again; 2839 } 2840 2841 ip = getprivimplinfo(); 2842 if ((zid_privs = priv_allocset()) == NULL) { 2843 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 2844 zonecfg_free_fs_option_list( 2845 lower_fstab.zone_fs_options); 2846 free(zids); 2847 return (-1); 2848 } 2849 2850 for (i = 0; i < nzents; i++) { 2851 char zid_name[ZONENAME_MAX]; 2852 zone_state_t zid_state; 2853 char zid_rpath[MAXPATHLEN]; 2854 struct stat stat_buf; 2855 2856 if (zids[i] == GLOBAL_ZONEID) 2857 continue; 2858 2859 if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1) 2860 continue; 2861 2862 /* 2863 * Do special setup for the zone we are booting 2864 */ 2865 if (strcmp(zid_name, zone_name) == 0) { 2866 struct zone_fstab autofs_fstab; 2867 char map_path[MAXPATHLEN]; 2868 int fd; 2869 2870 /* 2871 * Create auto_home_<zone> map for this zone 2872 * in the global zone. The local zone entry 2873 * will be created by automount when the zone 2874 * is booted. 2875 */ 2876 2877 (void) snprintf(autofs_fstab.zone_fs_special, 2878 MAXPATHLEN, "auto_home_%s", zid_name); 2879 2880 (void) snprintf(autofs_fstab.zone_fs_dir, MAXPATHLEN, 2881 "/zone/%s/home", zid_name); 2882 2883 (void) snprintf(map_path, sizeof (map_path), 2884 "/etc/%s", autofs_fstab.zone_fs_special); 2885 /* 2886 * If the map file doesn't exist create a template 2887 */ 2888 if ((fd = open(map_path, O_RDWR | O_CREAT | O_EXCL, 2889 S_IRUSR | S_IWUSR | S_IRGRP| S_IROTH)) != -1) { 2890 int len; 2891 char map_rec[MAXPATHLEN]; 2892 2893 len = snprintf(map_rec, sizeof (map_rec), 2894 "+%s\n*\t-fstype=lofs\t:%s/export/home/&\n", 2895 autofs_fstab.zone_fs_special, rootpath); 2896 (void) write(fd, map_rec, len); 2897 (void) close(fd); 2898 } 2899 2900 /* 2901 * Mount auto_home_<zone> in the global zone if absent. 2902 * If it's already of type autofs, then 2903 * don't mount it again. 2904 */ 2905 if ((stat(autofs_fstab.zone_fs_dir, &stat_buf) == -1) || 2906 strcmp(stat_buf.st_fstype, MNTTYPE_AUTOFS) != 0) { 2907 char optstr[] = "indirect,ignore,nobrowse"; 2908 2909 (void) make_one_dir(zlogp, "", 2910 autofs_fstab.zone_fs_dir, DEFAULT_DIR_MODE); 2911 2912 /* 2913 * Mount will fail if automounter has already 2914 * processed the auto_home_<zonename> map 2915 */ 2916 (void) domount(zlogp, MNTTYPE_AUTOFS, optstr, 2917 autofs_fstab.zone_fs_special, 2918 autofs_fstab.zone_fs_dir); 2919 } 2920 continue; 2921 } 2922 2923 2924 if (zone_get_state(zid_name, &zid_state) != Z_OK || 2925 (zid_state != ZONE_STATE_READY && 2926 zid_state != ZONE_STATE_RUNNING)) 2927 /* Skip over zones without mounted filesystems */ 2928 continue; 2929 2930 if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label, 2931 sizeof (m_label_t)) < 0) 2932 /* Skip over zones with unspecified label */ 2933 continue; 2934 2935 if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath, 2936 sizeof (zid_rpath)) == -1) 2937 /* Skip over zones with bad path */ 2938 continue; 2939 2940 if (zone_getattr(zids[i], ZONE_ATTR_PRIVSET, zid_privs, 2941 sizeof (priv_chunk_t) * ip->priv_setsize) == -1) 2942 /* Skip over zones with bad privs */ 2943 continue; 2944 2945 /* 2946 * Reading down is valid according to our label model 2947 * but some customers want to disable it because it 2948 * allows execute down and other possible attacks. 2949 * Therefore, we restrict this feature to zones that 2950 * have the NET_MAC_AWARE privilege which is required 2951 * for NFS read-down semantics. 2952 */ 2953 if ((bldominates(zlabel, zid_label)) && 2954 (priv_ismember(zprivs, PRIV_NET_MAC_AWARE))) { 2955 /* 2956 * Our zone dominates this one. 2957 * Create a lofs mount from lower zone's /export/home 2958 */ 2959 (void) snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN, 2960 "%s/zone/%s/export/home", rootpath, zid_name); 2961 2962 /* 2963 * If the target is already an LOFS mount 2964 * then don't do it again. 2965 */ 2966 if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) || 2967 strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) { 2968 2969 if (snprintf(lower_fstab.zone_fs_special, 2970 MAXPATHLEN, "%s/export", 2971 zid_rpath) > MAXPATHLEN) 2972 continue; 2973 2974 /* 2975 * Make sure the lower-level home exists 2976 */ 2977 if (make_one_dir(zlogp, 2978 lower_fstab.zone_fs_special, 2979 "/home", DEFAULT_DIR_MODE) != 0) 2980 continue; 2981 2982 (void) strlcat(lower_fstab.zone_fs_special, 2983 "/home", MAXPATHLEN); 2984 2985 /* 2986 * Mount can fail because the lower-level 2987 * zone may have already done a mount up. 2988 */ 2989 (void) mount_one(zlogp, &lower_fstab, ""); 2990 } 2991 } else if ((bldominates(zid_label, zlabel)) && 2992 (priv_ismember(zid_privs, PRIV_NET_MAC_AWARE))) { 2993 /* 2994 * This zone dominates our zone. 2995 * Create a lofs mount from our zone's /export/home 2996 */ 2997 if (snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN, 2998 "%s/zone/%s/export/home", zid_rpath, 2999 zone_name) > MAXPATHLEN) 3000 continue; 3001 3002 /* 3003 * If the target is already an LOFS mount 3004 * then don't do it again. 3005 */ 3006 if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) || 3007 strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) { 3008 3009 (void) snprintf(lower_fstab.zone_fs_special, 3010 MAXPATHLEN, "%s/export/home", rootpath); 3011 3012 /* 3013 * Mount can fail because the higher-level 3014 * zone may have already done a mount down. 3015 */ 3016 (void) mount_one(zlogp, &lower_fstab, ""); 3017 } 3018 } 3019 } 3020 zonecfg_free_fs_option_list(lower_fstab.zone_fs_options); 3021 priv_freeset(zid_privs); 3022 free(zids); 3023 3024 /* 3025 * Now share any exported directories from this zone. 3026 * Each zone can have its own dfstab. 3027 */ 3028 3029 argv[0] = "zoneshare"; 3030 argv[1] = "-z"; 3031 argv[2] = zone_name; 3032 argv[3] = NULL; 3033 3034 (void) forkexec(zlogp, "/usr/lib/zones/zoneshare", argv); 3035 /* Don't check for errors since they don't affect the zone */ 3036 3037 return (0); 3038 } 3039 3040 /* 3041 * Unmount lofs mounts from higher level zones 3042 * Unshare nfs exported directories 3043 */ 3044 static void 3045 tsol_unmounts(zlog_t *zlogp, char *zone_name) 3046 { 3047 zoneid_t *zids = NULL; 3048 uint_t nzents_saved; 3049 uint_t nzents; 3050 int i; 3051 char *argv[4]; 3052 char path[MAXPATHLEN]; 3053 3054 if (!is_system_labeled()) 3055 return; 3056 3057 /* 3058 * Get the list of zones from the kernel 3059 */ 3060 if (zone_list(NULL, &nzents) != 0) { 3061 return; 3062 } 3063 3064 if (zid_label == NULL) { 3065 zid_label = m_label_alloc(MAC_LABEL); 3066 if (zid_label == NULL) 3067 return; 3068 } 3069 3070 again: 3071 if (nzents == 0) 3072 return; 3073 3074 zids = malloc(nzents * sizeof (zoneid_t)); 3075 if (zids == NULL) { 3076 zerror(zlogp, B_TRUE, "memory allocation failed"); 3077 return; 3078 } 3079 nzents_saved = nzents; 3080 3081 if (zone_list(zids, &nzents) != 0) { 3082 free(zids); 3083 return; 3084 } 3085 if (nzents != nzents_saved) { 3086 /* list changed, try again */ 3087 free(zids); 3088 goto again; 3089 } 3090 3091 for (i = 0; i < nzents; i++) { 3092 char zid_name[ZONENAME_MAX]; 3093 zone_state_t zid_state; 3094 char zid_rpath[MAXPATHLEN]; 3095 3096 if (zids[i] == GLOBAL_ZONEID) 3097 continue; 3098 3099 if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1) 3100 continue; 3101 3102 /* 3103 * Skip the zone we are halting 3104 */ 3105 if (strcmp(zid_name, zone_name) == 0) 3106 continue; 3107 3108 if ((zone_getattr(zids[i], ZONE_ATTR_STATUS, &zid_state, 3109 sizeof (zid_state)) < 0) || 3110 (zid_state < ZONE_IS_READY)) 3111 /* Skip over zones without mounted filesystems */ 3112 continue; 3113 3114 if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label, 3115 sizeof (m_label_t)) < 0) 3116 /* Skip over zones with unspecified label */ 3117 continue; 3118 3119 if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath, 3120 sizeof (zid_rpath)) == -1) 3121 /* Skip over zones with bad path */ 3122 continue; 3123 3124 if (zlabel != NULL && bldominates(zid_label, zlabel)) { 3125 /* 3126 * This zone dominates our zone. 3127 * Unmount the lofs mount of our zone's /export/home 3128 */ 3129 3130 if (snprintf(path, MAXPATHLEN, 3131 "%s/zone/%s/export/home", zid_rpath, 3132 zone_name) > MAXPATHLEN) 3133 continue; 3134 3135 /* Skip over mount failures */ 3136 (void) umount(path); 3137 } 3138 } 3139 free(zids); 3140 3141 /* 3142 * Unmount global zone autofs trigger for this zone 3143 */ 3144 (void) snprintf(path, MAXPATHLEN, "/zone/%s/home", zone_name); 3145 /* Skip over mount failures */ 3146 (void) umount(path); 3147 3148 /* 3149 * Next unshare any exported directories from this zone. 3150 */ 3151 3152 argv[0] = "zoneunshare"; 3153 argv[1] = "-z"; 3154 argv[2] = zone_name; 3155 argv[3] = NULL; 3156 3157 (void) forkexec(zlogp, "/usr/lib/zones/zoneunshare", argv); 3158 /* Don't check for errors since they don't affect the zone */ 3159 3160 /* 3161 * Finally, deallocate any devices in the zone. 3162 */ 3163 3164 argv[0] = "deallocate"; 3165 argv[1] = "-Isz"; 3166 argv[2] = zone_name; 3167 argv[3] = NULL; 3168 3169 (void) forkexec(zlogp, "/usr/sbin/deallocate", argv); 3170 /* Don't check for errors since they don't affect the zone */ 3171 } 3172 3173 /* 3174 * Fetch the Trusted Extensions label and multi-level ports (MLPs) for 3175 * this zone. 3176 */ 3177 static tsol_zcent_t * 3178 get_zone_label(zlog_t *zlogp, priv_set_t *privs) 3179 { 3180 FILE *fp; 3181 tsol_zcent_t *zcent = NULL; 3182 char line[MAXTNZLEN]; 3183 3184 if ((fp = fopen(TNZONECFG_PATH, "r")) == NULL) { 3185 zerror(zlogp, B_TRUE, "%s", TNZONECFG_PATH); 3186 return (NULL); 3187 } 3188 3189 while (fgets(line, sizeof (line), fp) != NULL) { 3190 /* 3191 * Check for malformed database 3192 */ 3193 if (strlen(line) == MAXTNZLEN - 1) 3194 break; 3195 if ((zcent = tsol_sgetzcent(line, NULL, NULL)) == NULL) 3196 continue; 3197 if (strcmp(zcent->zc_name, zone_name) == 0) 3198 break; 3199 tsol_freezcent(zcent); 3200 zcent = NULL; 3201 } 3202 (void) fclose(fp); 3203 3204 if (zcent == NULL) { 3205 zerror(zlogp, B_FALSE, "zone requires a label assignment. " 3206 "See tnzonecfg(4)"); 3207 } else { 3208 if (zlabel == NULL) 3209 zlabel = m_label_alloc(MAC_LABEL); 3210 /* 3211 * Save this zone's privileges for later read-down processing 3212 */ 3213 if ((zprivs = priv_allocset()) == NULL) { 3214 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 3215 return (NULL); 3216 } else { 3217 priv_copyset(privs, zprivs); 3218 } 3219 } 3220 return (zcent); 3221 } 3222 3223 /* 3224 * Add the Trusted Extensions multi-level ports for this zone. 3225 */ 3226 static void 3227 set_mlps(zlog_t *zlogp, zoneid_t zoneid, tsol_zcent_t *zcent) 3228 { 3229 tsol_mlp_t *mlp; 3230 tsol_mlpent_t tsme; 3231 3232 if (!is_system_labeled()) 3233 return; 3234 3235 tsme.tsme_zoneid = zoneid; 3236 tsme.tsme_flags = 0; 3237 for (mlp = zcent->zc_private_mlp; !TSOL_MLP_END(mlp); mlp++) { 3238 tsme.tsme_mlp = *mlp; 3239 if (tnmlp(TNDB_LOAD, &tsme) != 0) { 3240 zerror(zlogp, B_TRUE, "cannot set zone-specific MLP " 3241 "on %d-%d/%d", mlp->mlp_port, 3242 mlp->mlp_port_upper, mlp->mlp_ipp); 3243 } 3244 } 3245 3246 tsme.tsme_flags = TSOL_MEF_SHARED; 3247 for (mlp = zcent->zc_shared_mlp; !TSOL_MLP_END(mlp); mlp++) { 3248 tsme.tsme_mlp = *mlp; 3249 if (tnmlp(TNDB_LOAD, &tsme) != 0) { 3250 zerror(zlogp, B_TRUE, "cannot set shared MLP " 3251 "on %d-%d/%d", mlp->mlp_port, 3252 mlp->mlp_port_upper, mlp->mlp_ipp); 3253 } 3254 } 3255 } 3256 3257 static void 3258 remove_mlps(zlog_t *zlogp, zoneid_t zoneid) 3259 { 3260 tsol_mlpent_t tsme; 3261 3262 if (!is_system_labeled()) 3263 return; 3264 3265 (void) memset(&tsme, 0, sizeof (tsme)); 3266 tsme.tsme_zoneid = zoneid; 3267 if (tnmlp(TNDB_FLUSH, &tsme) != 0) 3268 zerror(zlogp, B_TRUE, "cannot flush MLPs"); 3269 } 3270 3271 int 3272 prtmount(const char *fs, void *x) { 3273 zerror((zlog_t *)x, B_FALSE, " %s", fs); 3274 return (0); 3275 } 3276 3277 /* 3278 * Look for zones running on the main system that are using this root (or any 3279 * subdirectory of it). Return B_TRUE and print an error if a conflicting zone 3280 * is found or if we can't tell. 3281 */ 3282 static boolean_t 3283 duplicate_zone_root(zlog_t *zlogp, const char *rootpath) 3284 { 3285 zoneid_t *zids = NULL; 3286 uint_t nzids = 0; 3287 boolean_t retv; 3288 int rlen, zlen; 3289 char zroot[MAXPATHLEN]; 3290 char zonename[ZONENAME_MAX]; 3291 3292 for (;;) { 3293 nzids += 10; 3294 zids = malloc(nzids * sizeof (*zids)); 3295 if (zids == NULL) { 3296 zerror(zlogp, B_TRUE, "memory allocation failed"); 3297 return (B_TRUE); 3298 } 3299 if (zone_list(zids, &nzids) == 0) 3300 break; 3301 free(zids); 3302 } 3303 retv = B_FALSE; 3304 rlen = strlen(rootpath); 3305 while (nzids > 0) { 3306 /* 3307 * Ignore errors; they just mean that the zone has disappeared 3308 * while we were busy. 3309 */ 3310 if (zone_getattr(zids[--nzids], ZONE_ATTR_ROOT, zroot, 3311 sizeof (zroot)) == -1) 3312 continue; 3313 zlen = strlen(zroot); 3314 if (zlen > rlen) 3315 zlen = rlen; 3316 if (strncmp(rootpath, zroot, zlen) == 0 && 3317 (zroot[zlen] == '\0' || zroot[zlen] == '/') && 3318 (rootpath[zlen] == '\0' || rootpath[zlen] == '/')) { 3319 if (getzonenamebyid(zids[nzids], zonename, 3320 sizeof (zonename)) == -1) 3321 (void) snprintf(zonename, sizeof (zonename), 3322 "id %d", (int)zids[nzids]); 3323 zerror(zlogp, B_FALSE, 3324 "zone root %s already in use by zone %s", 3325 rootpath, zonename); 3326 retv = B_TRUE; 3327 break; 3328 } 3329 } 3330 free(zids); 3331 return (retv); 3332 } 3333 3334 /* 3335 * Search for loopback mounts that use this same source node (same device and 3336 * inode). Return B_TRUE if there is one or if we can't tell. 3337 */ 3338 static boolean_t 3339 duplicate_reachable_path(zlog_t *zlogp, const char *rootpath) 3340 { 3341 struct stat64 rst, zst; 3342 struct mnttab *mnp; 3343 3344 if (stat64(rootpath, &rst) == -1) { 3345 zerror(zlogp, B_TRUE, "can't stat %s", rootpath); 3346 return (B_TRUE); 3347 } 3348 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 3349 return (B_TRUE); 3350 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) { 3351 if (mnp->mnt_fstype == NULL || 3352 strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0) 3353 continue; 3354 /* We're looking at a loopback mount. Stat it. */ 3355 if (mnp->mnt_special != NULL && 3356 stat64(mnp->mnt_special, &zst) != -1 && 3357 rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) { 3358 zerror(zlogp, B_FALSE, 3359 "zone root %s is reachable through %s", 3360 rootpath, mnp->mnt_mountp); 3361 return (B_TRUE); 3362 } 3363 } 3364 return (B_FALSE); 3365 } 3366 3367 zoneid_t 3368 vplat_create(zlog_t *zlogp, boolean_t mount_cmd) 3369 { 3370 zoneid_t rval = -1; 3371 priv_set_t *privs; 3372 char rootpath[MAXPATHLEN]; 3373 char *rctlbuf = NULL; 3374 size_t rctlbufsz = 0; 3375 char *zfsbuf = NULL; 3376 size_t zfsbufsz = 0; 3377 zoneid_t zoneid = -1; 3378 int xerr; 3379 char *kzone; 3380 FILE *fp = NULL; 3381 tsol_zcent_t *zcent = NULL; 3382 int match = 0; 3383 int doi = 0; 3384 3385 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { 3386 zerror(zlogp, B_TRUE, "unable to determine zone root"); 3387 return (-1); 3388 } 3389 if (zonecfg_in_alt_root()) 3390 resolve_lofs(zlogp, rootpath, sizeof (rootpath)); 3391 3392 if ((privs = priv_allocset()) == NULL) { 3393 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 3394 return (-1); 3395 } 3396 priv_emptyset(privs); 3397 if (get_privset(zlogp, privs, mount_cmd) != 0) 3398 goto error; 3399 3400 if (!mount_cmd && get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) { 3401 zerror(zlogp, B_FALSE, "Unable to get list of rctls"); 3402 goto error; 3403 } 3404 3405 if (get_datasets(zlogp, &zfsbuf, &zfsbufsz) != 0) { 3406 zerror(zlogp, B_FALSE, "Unable to get list of ZFS datasets"); 3407 goto error; 3408 } 3409 3410 if (!mount_cmd && is_system_labeled()) { 3411 zcent = get_zone_label(zlogp, privs); 3412 if (zcent != NULL) { 3413 match = zcent->zc_match; 3414 doi = zcent->zc_doi; 3415 *zlabel = zcent->zc_label; 3416 } else { 3417 goto error; 3418 } 3419 } 3420 3421 kzone = zone_name; 3422 3423 /* 3424 * We must do this scan twice. First, we look for zones running on the 3425 * main system that are using this root (or any subdirectory of it). 3426 * Next, we reduce to the shortest path and search for loopback mounts 3427 * that use this same source node (same device and inode). 3428 */ 3429 if (duplicate_zone_root(zlogp, rootpath)) 3430 goto error; 3431 if (duplicate_reachable_path(zlogp, rootpath)) 3432 goto error; 3433 3434 if (mount_cmd) { 3435 root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE); 3436 3437 /* 3438 * Forge up a special root for this zone. When a zone is 3439 * mounted, we can't let the zone have its own root because the 3440 * tools that will be used in this "scratch zone" need access 3441 * to both the zone's resources and the running machine's 3442 * executables. 3443 * 3444 * Note that the mkdir here also catches read-only filesystems. 3445 */ 3446 if (mkdir(rootpath, 0755) != 0 && errno != EEXIST) { 3447 zerror(zlogp, B_TRUE, "cannot create %s", rootpath); 3448 goto error; 3449 } 3450 if (domount(zlogp, "tmpfs", "", "swap", rootpath) != 0) 3451 goto error; 3452 } 3453 3454 if (zonecfg_in_alt_root()) { 3455 /* 3456 * If we are mounting up a zone in an alternate root partition, 3457 * then we have some additional work to do before starting the 3458 * zone. First, resolve the root path down so that we're not 3459 * fooled by duplicates. Then forge up an internal name for 3460 * the zone. 3461 */ 3462 if ((fp = zonecfg_open_scratch("", B_TRUE)) == NULL) { 3463 zerror(zlogp, B_TRUE, "cannot open mapfile"); 3464 goto error; 3465 } 3466 if (zonecfg_lock_scratch(fp) != 0) { 3467 zerror(zlogp, B_TRUE, "cannot lock mapfile"); 3468 goto error; 3469 } 3470 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(), 3471 NULL, 0) == 0) { 3472 zerror(zlogp, B_FALSE, "scratch zone already running"); 3473 goto error; 3474 } 3475 /* This is the preferred name */ 3476 (void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s", 3477 zone_name); 3478 srandom(getpid()); 3479 while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL, 3480 0) == 0) { 3481 /* This is just an arbitrary name; note "." usage */ 3482 (void) snprintf(kernzone, sizeof (kernzone), 3483 "SUNWlu.%08lX%08lX", random(), random()); 3484 } 3485 kzone = kernzone; 3486 } 3487 3488 xerr = 0; 3489 if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf, 3490 rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel)) == -1) { 3491 if (xerr == ZE_AREMOUNTS) { 3492 if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) { 3493 zerror(zlogp, B_FALSE, 3494 "An unknown file-system is mounted on " 3495 "a subdirectory of %s", rootpath); 3496 } else { 3497 3498 zerror(zlogp, B_FALSE, 3499 "These file-systems are mounted on " 3500 "subdirectories of %s:", rootpath); 3501 (void) zonecfg_find_mounts(rootpath, 3502 prtmount, zlogp); 3503 } 3504 } else if (xerr == ZE_CHROOTED) { 3505 zerror(zlogp, B_FALSE, "%s: " 3506 "cannot create a zone from a chrooted " 3507 "environment", "zone_create"); 3508 } else { 3509 zerror(zlogp, B_TRUE, "%s failed", "zone_create"); 3510 } 3511 goto error; 3512 } 3513 3514 if (zonecfg_in_alt_root() && 3515 zonecfg_add_scratch(fp, zone_name, kernzone, 3516 zonecfg_get_root()) == -1) { 3517 zerror(zlogp, B_TRUE, "cannot add mapfile entry"); 3518 goto error; 3519 } 3520 3521 /* 3522 * The following is a warning, not an error, and is not performed when 3523 * merely mounting a zone for administrative use. 3524 */ 3525 if (!mount_cmd && bind_to_pool(zlogp, zoneid) != 0) 3526 zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to " 3527 "requested pool; using default pool."); 3528 if (!mount_cmd) 3529 set_mlps(zlogp, zoneid, zcent); 3530 rval = zoneid; 3531 zoneid = -1; 3532 3533 error: 3534 if (zoneid != -1) 3535 (void) zone_destroy(zoneid); 3536 if (rctlbuf != NULL) 3537 free(rctlbuf); 3538 priv_freeset(privs); 3539 if (fp != NULL) 3540 zonecfg_close_scratch(fp); 3541 lofs_discard_mnttab(); 3542 if (zcent != NULL) 3543 tsol_freezcent(zcent); 3544 return (rval); 3545 } 3546 3547 /* 3548 * Enter the zone and write a /etc/zones/index file there. This allows 3549 * libzonecfg (and thus zoneadm) to report the UUID and potentially other zone 3550 * details from inside the zone. 3551 */ 3552 static void 3553 write_index_file(zoneid_t zoneid) 3554 { 3555 FILE *zef; 3556 FILE *zet; 3557 struct zoneent *zep; 3558 pid_t child; 3559 int tmpl_fd; 3560 ctid_t ct; 3561 int fd; 3562 char uuidstr[UUID_PRINTABLE_STRING_LENGTH]; 3563 3564 /* Locate the zone entry in the global zone's index file */ 3565 if ((zef = setzoneent()) == NULL) 3566 return; 3567 while ((zep = getzoneent_private(zef)) != NULL) { 3568 if (strcmp(zep->zone_name, zone_name) == 0) 3569 break; 3570 free(zep); 3571 } 3572 endzoneent(zef); 3573 if (zep == NULL) 3574 return; 3575 3576 if ((tmpl_fd = init_template()) == -1) { 3577 free(zep); 3578 return; 3579 } 3580 3581 if ((child = fork()) == -1) { 3582 (void) ct_tmpl_clear(tmpl_fd); 3583 (void) close(tmpl_fd); 3584 free(zep); 3585 return; 3586 } 3587 3588 /* parent waits for child to finish */ 3589 if (child != 0) { 3590 free(zep); 3591 if (contract_latest(&ct) == -1) 3592 ct = -1; 3593 (void) ct_tmpl_clear(tmpl_fd); 3594 (void) close(tmpl_fd); 3595 (void) waitpid(child, NULL, 0); 3596 (void) contract_abandon_id(ct); 3597 return; 3598 } 3599 3600 /* child enters zone and sets up index file */ 3601 (void) ct_tmpl_clear(tmpl_fd); 3602 if (zone_enter(zoneid) != -1) { 3603 (void) mkdir(ZONE_CONFIG_ROOT, ZONE_CONFIG_MODE); 3604 (void) chown(ZONE_CONFIG_ROOT, ZONE_CONFIG_UID, 3605 ZONE_CONFIG_GID); 3606 fd = open(ZONE_INDEX_FILE, O_WRONLY|O_CREAT|O_TRUNC, 3607 ZONE_INDEX_MODE); 3608 if (fd != -1 && (zet = fdopen(fd, "w")) != NULL) { 3609 (void) fchown(fd, ZONE_INDEX_UID, ZONE_INDEX_GID); 3610 if (uuid_is_null(zep->zone_uuid)) 3611 uuidstr[0] = '\0'; 3612 else 3613 uuid_unparse(zep->zone_uuid, uuidstr); 3614 (void) fprintf(zet, "%s:%s:/:%s\n", zep->zone_name, 3615 zone_state_str(zep->zone_state), 3616 uuidstr); 3617 (void) fclose(zet); 3618 } 3619 } 3620 _exit(0); 3621 } 3622 3623 int 3624 vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd, zoneid_t zoneid) 3625 { 3626 if (!mount_cmd && validate_datasets(zlogp) != 0) { 3627 lofs_discard_mnttab(); 3628 return (-1); 3629 } 3630 3631 if (create_dev_files(zlogp) != 0 || 3632 mount_filesystems(zlogp, mount_cmd) != 0) { 3633 lofs_discard_mnttab(); 3634 return (-1); 3635 } 3636 if (!mount_cmd && (devfsadm_register(zlogp) != 0 || 3637 configure_network_interfaces(zlogp) != 0)) { 3638 lofs_discard_mnttab(); 3639 return (-1); 3640 } 3641 3642 write_index_file(zoneid); 3643 3644 lofs_discard_mnttab(); 3645 return (0); 3646 } 3647 3648 static int 3649 lu_root_teardown(zlog_t *zlogp) 3650 { 3651 char zroot[MAXPATHLEN]; 3652 3653 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 3654 zerror(zlogp, B_FALSE, "unable to determine zone root"); 3655 return (-1); 3656 } 3657 root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE); 3658 3659 /* 3660 * At this point, the processes are gone, the filesystems (save the 3661 * root) are unmounted, and the zone is on death row. But there may 3662 * still be creds floating about in the system that reference the 3663 * zone_t, and which pin down zone_rootvp causing this call to fail 3664 * with EBUSY. Thus, we try for a little while before just giving up. 3665 * (How I wish this were not true, and umount2 just did the right 3666 * thing, or tmpfs supported MS_FORCE This is a gross hack.) 3667 */ 3668 if (umount2(zroot, MS_FORCE) != 0) { 3669 if (errno == ENOTSUP && umount2(zroot, 0) == 0) 3670 goto unmounted; 3671 if (errno == EBUSY) { 3672 int tries = 10; 3673 3674 while (--tries >= 0) { 3675 (void) sleep(1); 3676 if (umount2(zroot, 0) == 0) 3677 goto unmounted; 3678 if (errno != EBUSY) 3679 break; 3680 } 3681 } 3682 zerror(zlogp, B_TRUE, "unable to unmount '%s'", zroot); 3683 return (-1); 3684 } 3685 unmounted: 3686 3687 /* 3688 * Only zones in an alternate root environment have scratch zone 3689 * entries. 3690 */ 3691 if (zonecfg_in_alt_root()) { 3692 FILE *fp; 3693 int retv; 3694 3695 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) { 3696 zerror(zlogp, B_TRUE, "cannot open mapfile"); 3697 return (-1); 3698 } 3699 retv = -1; 3700 if (zonecfg_lock_scratch(fp) != 0) 3701 zerror(zlogp, B_TRUE, "cannot lock mapfile"); 3702 else if (zonecfg_delete_scratch(fp, kernzone) != 0) 3703 zerror(zlogp, B_TRUE, "cannot delete map entry"); 3704 else 3705 retv = 0; 3706 zonecfg_close_scratch(fp); 3707 return (retv); 3708 } else { 3709 return (0); 3710 } 3711 } 3712 3713 int 3714 vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd) 3715 { 3716 char *kzone; 3717 zoneid_t zoneid; 3718 3719 kzone = zone_name; 3720 if (zonecfg_in_alt_root()) { 3721 FILE *fp; 3722 3723 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) { 3724 zerror(zlogp, B_TRUE, "unable to open map file"); 3725 goto error; 3726 } 3727 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(), 3728 kernzone, sizeof (kernzone)) != 0) { 3729 zerror(zlogp, B_FALSE, "unable to find scratch zone"); 3730 zonecfg_close_scratch(fp); 3731 goto error; 3732 } 3733 zonecfg_close_scratch(fp); 3734 kzone = kernzone; 3735 } 3736 3737 if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) { 3738 if (!bringup_failure_recovery) 3739 zerror(zlogp, B_TRUE, "unable to get zoneid"); 3740 if (unmount_cmd) 3741 (void) lu_root_teardown(zlogp); 3742 goto error; 3743 } 3744 3745 if (zone_shutdown(zoneid) != 0) { 3746 zerror(zlogp, B_TRUE, "unable to shutdown zone"); 3747 goto error; 3748 } 3749 3750 if (!unmount_cmd && devfsadm_unregister(zlogp) != 0) 3751 goto error; 3752 3753 if (!unmount_cmd && 3754 unconfigure_network_interfaces(zlogp, zoneid) != 0) { 3755 zerror(zlogp, B_FALSE, 3756 "unable to unconfigure network interfaces in zone"); 3757 goto error; 3758 } 3759 3760 if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) { 3761 zerror(zlogp, B_TRUE, "unable to abort TCP connections"); 3762 goto error; 3763 } 3764 3765 if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) { 3766 zerror(zlogp, B_FALSE, 3767 "unable to unmount file systems in zone"); 3768 goto error; 3769 } 3770 3771 remove_mlps(zlogp, zoneid); 3772 3773 if (zone_destroy(zoneid) != 0) { 3774 zerror(zlogp, B_TRUE, "unable to destroy zone"); 3775 goto error; 3776 } 3777 3778 /* 3779 * Special teardown for alternate boot environments: remove the tmpfs 3780 * root for the zone and then remove it from the map file. 3781 */ 3782 if (unmount_cmd && lu_root_teardown(zlogp) != 0) 3783 goto error; 3784 3785 if (!unmount_cmd) 3786 destroy_console_slave(); 3787 3788 lofs_discard_mnttab(); 3789 return (0); 3790 3791 error: 3792 lofs_discard_mnttab(); 3793 return (-1); 3794 } 3795