1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * This module contains functions used to bring up and tear down the 30 * Virtual Platform: [un]mounting file-systems, [un]plumbing network 31 * interfaces, [un]configuring devices, establishing resource controls, 32 * and creating/destroying the zone in the kernel. These actions, on 33 * the way up, ready the zone; on the way down, they halt the zone. 34 * See the much longer block comment at the beginning of zoneadmd.c 35 * for a bigger picture of how the whole program functions. 36 * 37 * This module also has primary responsibility for the layout of "scratch 38 * zones." These are mounted, but inactive, zones that are used during 39 * operating system upgrade and potentially other administrative action. The 40 * scratch zone environment is similar to the miniroot environment. The zone's 41 * actual root is mounted read-write on /a, and the standard paths (/usr, 42 * /sbin, /lib) all lead to read-only copies of the running system's binaries. 43 * This allows the administrative tools to manipulate the zone using "-R /a" 44 * without relying on any binaries in the zone itself. 45 * 46 * If the scratch zone is on an alternate root (Live Upgrade [LU] boot 47 * environment), then we must resolve the lofs mounts used there to uncover 48 * writable (unshared) resources. Shared resources, though, are always 49 * read-only. In addition, if the "same" zone with a different root path is 50 * currently running, then "/b" inside the zone points to the running zone's 51 * root. This allows LU to synchronize configuration files during the upgrade 52 * process. 53 * 54 * To construct this environment, this module creates a tmpfs mount on 55 * $ZONEPATH/lu. Inside this scratch area, the miniroot-like environment as 56 * described above is constructed on the fly. The zone is then created using 57 * $ZONEPATH/lu as the root. 58 * 59 * Note that scratch zones are inactive. The zone's bits are not running and 60 * likely cannot be run correctly until upgrade is done. Init is not running 61 * there, nor is SMF. Because of this, the "mounted" state of a scratch zone 62 * is not a part of the usual halt/ready/boot state machine. 63 */ 64 65 #include <sys/param.h> 66 #include <sys/mount.h> 67 #include <sys/mntent.h> 68 #include <sys/socket.h> 69 #include <sys/utsname.h> 70 #include <sys/types.h> 71 #include <sys/stat.h> 72 #include <sys/sockio.h> 73 #include <sys/stropts.h> 74 #include <sys/conf.h> 75 76 #include <inet/tcp.h> 77 #include <arpa/inet.h> 78 #include <netinet/in.h> 79 #include <net/route.h> 80 #include <netdb.h> 81 82 #include <stdio.h> 83 #include <errno.h> 84 #include <fcntl.h> 85 #include <unistd.h> 86 #include <rctl.h> 87 #include <stdlib.h> 88 #include <string.h> 89 #include <strings.h> 90 #include <wait.h> 91 #include <limits.h> 92 #include <libgen.h> 93 #include <libzfs.h> 94 #include <zone.h> 95 #include <assert.h> 96 97 #include <sys/mntio.h> 98 #include <sys/mnttab.h> 99 #include <sys/fs/autofs.h> /* for _autofssys() */ 100 #include <sys/fs/lofs_info.h> 101 #include <sys/fs/zfs.h> 102 103 #include <pool.h> 104 #include <sys/pool.h> 105 106 #include <libzonecfg.h> 107 #include "zoneadmd.h" 108 109 #define V4_ADDR_LEN 32 110 #define V6_ADDR_LEN 128 111 112 /* 0755 is the default directory mode. */ 113 #define DEFAULT_DIR_MODE \ 114 (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) 115 116 #define IPD_DEFAULT_OPTS \ 117 MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES 118 119 #define DFSTYPES "/etc/dfs/fstypes" 120 121 /* 122 * A list of directories which should be created. 123 */ 124 125 struct dir_info { 126 char *dir_name; 127 mode_t dir_mode; 128 }; 129 130 /* 131 * The pathnames below are relative to the zonepath 132 */ 133 static struct dir_info dev_dirs[] = { 134 { "/dev", 0755 }, 135 { "/dev/dsk", 0755 }, 136 { "/dev/fd", 0555 }, 137 { "/dev/pts", 0755 }, 138 { "/dev/rdsk", 0755 }, 139 { "/dev/rmt", 0755 }, 140 { "/dev/sad", 0755 }, 141 { "/dev/swap", 0755 }, 142 { "/dev/term", 0755 }, 143 }; 144 145 /* 146 * A list of devices which should be symlinked to /dev/zconsole. 147 */ 148 149 struct symlink_info { 150 char *sl_source; 151 char *sl_target; 152 }; 153 154 /* 155 * The "source" paths are relative to the zonepath 156 */ 157 static struct symlink_info dev_symlinks[] = { 158 { "/dev/stderr", "./fd/2" }, 159 { "/dev/stdin", "./fd/0" }, 160 { "/dev/stdout", "./fd/1" }, 161 { "/dev/dtremote", "/dev/null" }, 162 { "/dev/console", "zconsole" }, 163 { "/dev/syscon", "zconsole" }, 164 { "/dev/sysmsg", "zconsole" }, 165 { "/dev/systty", "zconsole" }, 166 { "/dev/msglog", "zconsole" }, 167 }; 168 169 /* for routing socket */ 170 static int rts_seqno = 0; 171 172 /* mangled zone name when mounting in an alternate root environment */ 173 static char kernzone[ZONENAME_MAX]; 174 175 /* array of cached mount entries for resolve_lofs */ 176 static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max; 177 178 /* from libsocket, not in any header file */ 179 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *); 180 181 /* 182 * An optimization for build_mnttable: reallocate (and potentially copy the 183 * data) only once every N times through the loop. 184 */ 185 #define MNTTAB_HUNK 32 186 187 /* 188 * Private autofs system call 189 */ 190 extern int _autofssys(int, void *); 191 192 static int 193 autofs_cleanup(zoneid_t zoneid) 194 { 195 /* 196 * Ask autofs to unmount all trigger nodes in the given zone. 197 */ 198 return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid)); 199 } 200 201 static void 202 free_mnttable(struct mnttab *mnt_array, uint_t nelem) 203 { 204 uint_t i; 205 206 if (mnt_array == NULL) 207 return; 208 for (i = 0; i < nelem; i++) { 209 free(mnt_array[i].mnt_mountp); 210 free(mnt_array[i].mnt_fstype); 211 free(mnt_array[i].mnt_special); 212 free(mnt_array[i].mnt_mntopts); 213 assert(mnt_array[i].mnt_time == NULL); 214 } 215 free(mnt_array); 216 } 217 218 /* 219 * Build the mount table for the zone rooted at "zroot", storing the resulting 220 * array of struct mnttabs in "mnt_arrayp" and the number of elements in the 221 * array in "nelemp". 222 */ 223 static int 224 build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab, 225 struct mnttab **mnt_arrayp, uint_t *nelemp) 226 { 227 struct mnttab mnt; 228 struct mnttab *mnts; 229 struct mnttab *mnp; 230 uint_t nmnt; 231 232 rewind(mnttab); 233 resetmnttab(mnttab); 234 nmnt = 0; 235 mnts = NULL; 236 while (getmntent(mnttab, &mnt) == 0) { 237 struct mnttab *tmp_array; 238 239 if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0) 240 continue; 241 if (nmnt % MNTTAB_HUNK == 0) { 242 tmp_array = realloc(mnts, 243 (nmnt + MNTTAB_HUNK) * sizeof (*mnts)); 244 if (tmp_array == NULL) { 245 free_mnttable(mnts, nmnt); 246 return (-1); 247 } 248 mnts = tmp_array; 249 } 250 mnp = &mnts[nmnt++]; 251 252 /* 253 * Zero out any fields we're not using. 254 */ 255 (void) memset(mnp, 0, sizeof (*mnp)); 256 257 if (mnt.mnt_special != NULL) 258 mnp->mnt_special = strdup(mnt.mnt_special); 259 if (mnt.mnt_mntopts != NULL) 260 mnp->mnt_mntopts = strdup(mnt.mnt_mntopts); 261 mnp->mnt_mountp = strdup(mnt.mnt_mountp); 262 mnp->mnt_fstype = strdup(mnt.mnt_fstype); 263 if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) || 264 (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) || 265 mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) { 266 zerror(zlogp, B_TRUE, "memory allocation failed"); 267 free_mnttable(mnts, nmnt); 268 return (-1); 269 } 270 } 271 *mnt_arrayp = mnts; 272 *nelemp = nmnt; 273 return (0); 274 } 275 276 /* 277 * This is an optimization. The resolve_lofs function is used quite frequently 278 * to manipulate file paths, and on a machine with a large number of zones, 279 * there will be a huge number of mounted file systems. Thus, we trigger a 280 * reread of the list of mount points 281 */ 282 static void 283 lofs_discard_mnttab(void) 284 { 285 free_mnttable(resolve_lofs_mnts, 286 resolve_lofs_mnt_max - resolve_lofs_mnts); 287 resolve_lofs_mnts = resolve_lofs_mnt_max = NULL; 288 } 289 290 static int 291 lofs_read_mnttab(zlog_t *zlogp) 292 { 293 FILE *mnttab; 294 uint_t nmnts; 295 296 if ((mnttab = fopen(MNTTAB, "r")) == NULL) 297 return (-1); 298 if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts, 299 &nmnts) == -1) { 300 (void) fclose(mnttab); 301 return (-1); 302 } 303 (void) fclose(mnttab); 304 resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts; 305 return (0); 306 } 307 308 /* 309 * This function loops over potential loopback mounts and symlinks in a given 310 * path and resolves them all down to an absolute path. 311 */ 312 static void 313 resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen) 314 { 315 int len, arlen; 316 const char *altroot; 317 char tmppath[MAXPATHLEN]; 318 boolean_t outside_altroot; 319 320 if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1) 321 return; 322 tmppath[len] = '\0'; 323 (void) strlcpy(path, tmppath, sizeof (tmppath)); 324 325 /* This happens once per zoneadmd operation. */ 326 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 327 return; 328 329 altroot = zonecfg_get_root(); 330 arlen = strlen(altroot); 331 outside_altroot = B_FALSE; 332 for (;;) { 333 struct mnttab *mnp; 334 335 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; 336 mnp++) { 337 if (mnp->mnt_fstype == NULL || 338 mnp->mnt_mountp == NULL || 339 mnp->mnt_special == NULL || 340 strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0) 341 continue; 342 len = strlen(mnp->mnt_mountp); 343 if (strncmp(mnp->mnt_mountp, path, len) == 0 && 344 (path[len] == '/' || path[len] == '\0')) 345 break; 346 } 347 if (mnp >= resolve_lofs_mnt_max) 348 break; 349 if (outside_altroot) { 350 char *cp; 351 int olen = sizeof (MNTOPT_RO) - 1; 352 353 /* 354 * If we run into a read-only mount outside of the 355 * alternate root environment, then the user doesn't 356 * want this path to be made read-write. 357 */ 358 if (mnp->mnt_mntopts != NULL && 359 (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) != 360 NULL && 361 (cp == mnp->mnt_mntopts || cp[-1] == ',') && 362 (cp[olen] == '\0' || cp[olen] == ',')) { 363 break; 364 } 365 } else if (arlen > 0 && 366 (strncmp(mnp->mnt_special, altroot, arlen) != 0 || 367 (mnp->mnt_special[arlen] != '\0' && 368 mnp->mnt_special[arlen] != '/'))) { 369 outside_altroot = B_TRUE; 370 } 371 /* use temporary buffer because new path might be longer */ 372 (void) snprintf(tmppath, sizeof (tmppath), "%s%s", 373 mnp->mnt_special, path + len); 374 if ((len = resolvepath(tmppath, path, pathlen)) == -1) 375 break; 376 path[len] = '\0'; 377 } 378 } 379 380 /* 381 * For a regular mount, check if a replacement lofs mount is needed because the 382 * referenced device is already mounted somewhere. 383 */ 384 static int 385 check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr) 386 { 387 struct mnttab *mnp; 388 zone_fsopt_t *optptr, *onext; 389 390 /* This happens once per zoneadmd operation. */ 391 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 392 return (-1); 393 394 /* 395 * If this special node isn't already in use, then it's ours alone; 396 * no need to worry about conflicting mounts. 397 */ 398 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; 399 mnp++) { 400 if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0) 401 break; 402 } 403 if (mnp >= resolve_lofs_mnt_max) 404 return (0); 405 406 /* 407 * Convert this duplicate mount into a lofs mount. 408 */ 409 (void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp, 410 sizeof (fsptr->zone_fs_special)); 411 (void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS, 412 sizeof (fsptr->zone_fs_type)); 413 fsptr->zone_fs_raw[0] = '\0'; 414 415 /* 416 * Discard all but one of the original options and set that to be the 417 * same set of options used for inherit package directory resources. 418 */ 419 optptr = fsptr->zone_fs_options; 420 if (optptr == NULL) { 421 optptr = malloc(sizeof (*optptr)); 422 if (optptr == NULL) { 423 zerror(zlogp, B_TRUE, "cannot mount %s", 424 fsptr->zone_fs_dir); 425 return (-1); 426 } 427 } else { 428 while ((onext = optptr->zone_fsopt_next) != NULL) { 429 optptr->zone_fsopt_next = onext->zone_fsopt_next; 430 free(onext); 431 } 432 } 433 (void) strcpy(optptr->zone_fsopt_opt, IPD_DEFAULT_OPTS); 434 optptr->zone_fsopt_next = NULL; 435 fsptr->zone_fs_options = optptr; 436 return (0); 437 } 438 439 static int 440 make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode) 441 { 442 char path[MAXPATHLEN]; 443 struct stat st; 444 445 if (snprintf(path, sizeof (path), "%s%s", prefix, subdir) > 446 sizeof (path)) { 447 zerror(zlogp, B_FALSE, "pathname %s%s is too long", prefix, 448 subdir); 449 return (-1); 450 } 451 452 if (lstat(path, &st) == 0) { 453 /* 454 * We don't check the file mode since presumably the zone 455 * administrator may have had good reason to change the mode, 456 * and we don't need to second guess him. 457 */ 458 if (!S_ISDIR(st.st_mode)) { 459 zerror(zlogp, B_FALSE, "%s is not a directory", path); 460 return (-1); 461 } 462 } else if (mkdirp(path, mode) != 0) { 463 if (errno == EROFS) 464 zerror(zlogp, B_FALSE, "Could not mkdir %s.\nIt is on " 465 "a read-only file system in this local zone.\nMake " 466 "sure %s exists in the global zone.", path, subdir); 467 else 468 zerror(zlogp, B_TRUE, "mkdirp of %s failed", path); 469 return (-1); 470 } 471 return (0); 472 } 473 474 /* 475 * Make /dev and various directories underneath it. 476 */ 477 static int 478 make_dev_dirs(zlog_t *zlogp, const char *zonepath) 479 { 480 int i; 481 482 for (i = 0; i < sizeof (dev_dirs) / sizeof (struct dir_info); i++) { 483 if (make_one_dir(zlogp, zonepath, dev_dirs[i].dir_name, 484 dev_dirs[i].dir_mode) != 0) 485 return (-1); 486 } 487 return (0); 488 } 489 490 /* 491 * Make various sym-links underneath /dev. 492 */ 493 static int 494 make_dev_links(zlog_t *zlogp, char *zonepath) 495 { 496 int i; 497 498 for (i = 0; i < sizeof (dev_symlinks) / sizeof (struct symlink_info); 499 i++) { 500 char dev[MAXPATHLEN]; 501 struct stat st; 502 503 (void) snprintf(dev, sizeof (dev), "%s%s", zonepath, 504 dev_symlinks[i].sl_source); 505 if (lstat(dev, &st) == 0) { 506 /* 507 * Try not to call unlink(2) on directories, since that 508 * makes UFS unhappy. 509 */ 510 if (S_ISDIR(st.st_mode)) { 511 zerror(zlogp, B_FALSE, "symlink path %s is a " 512 "directory", dev_symlinks[i].sl_source); 513 return (-1); 514 } 515 (void) unlink(dev); 516 } 517 if (symlink(dev_symlinks[i].sl_target, dev) != 0) { 518 zerror(zlogp, B_TRUE, "could not setup %s->%s symlink", 519 dev_symlinks[i].sl_source, 520 dev_symlinks[i].sl_target); 521 return (-1); 522 } 523 } 524 return (0); 525 } 526 527 /* 528 * Create various directories and sym-links under /dev. 529 */ 530 static int 531 create_dev_files(zlog_t *zlogp) 532 { 533 char zonepath[MAXPATHLEN]; 534 535 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { 536 zerror(zlogp, B_TRUE, "unable to determine zone root"); 537 return (-1); 538 } 539 if (zonecfg_in_alt_root()) 540 resolve_lofs(zlogp, zonepath, sizeof (zonepath)); 541 542 if (make_dev_dirs(zlogp, zonepath) != 0) 543 return (-1); 544 if (make_dev_links(zlogp, zonepath) != 0) 545 return (-1); 546 return (0); 547 } 548 549 static void 550 free_remote_fstypes(char **types) 551 { 552 uint_t i; 553 554 if (types == NULL) 555 return; 556 for (i = 0; types[i] != NULL; i++) 557 free(types[i]); 558 free(types); 559 } 560 561 static char ** 562 get_remote_fstypes(zlog_t *zlogp) 563 { 564 char **types = NULL; 565 FILE *fp; 566 char buf[MAXPATHLEN]; 567 char fstype[MAXPATHLEN]; 568 uint_t lines = 0; 569 uint_t i; 570 571 if ((fp = fopen(DFSTYPES, "r")) == NULL) { 572 zerror(zlogp, B_TRUE, "failed to open %s", DFSTYPES); 573 return (NULL); 574 } 575 /* 576 * Count the number of lines 577 */ 578 while (fgets(buf, sizeof (buf), fp) != NULL) 579 lines++; 580 if (lines == 0) /* didn't read anything; empty file */ 581 goto out; 582 rewind(fp); 583 /* 584 * Allocate enough space for a NULL-terminated array. 585 */ 586 types = calloc(lines + 1, sizeof (char *)); 587 if (types == NULL) { 588 zerror(zlogp, B_TRUE, "memory allocation failed"); 589 goto out; 590 } 591 i = 0; 592 while (fgets(buf, sizeof (buf), fp) != NULL) { 593 /* LINTED - fstype is big enough to hold buf */ 594 if (sscanf(buf, "%s", fstype) == 0) { 595 zerror(zlogp, B_FALSE, "unable to parse %s", DFSTYPES); 596 free_remote_fstypes(types); 597 types = NULL; 598 goto out; 599 } 600 types[i] = strdup(fstype); 601 if (types[i] == NULL) { 602 zerror(zlogp, B_TRUE, "memory allocation failed"); 603 free_remote_fstypes(types); 604 types = NULL; 605 goto out; 606 } 607 i++; 608 } 609 out: 610 (void) fclose(fp); 611 return (types); 612 } 613 614 static boolean_t 615 is_remote_fstype(const char *fstype, char *const *remote_fstypes) 616 { 617 uint_t i; 618 619 if (remote_fstypes == NULL) 620 return (B_FALSE); 621 for (i = 0; remote_fstypes[i] != NULL; i++) { 622 if (strcmp(remote_fstypes[i], fstype) == 0) 623 return (B_TRUE); 624 } 625 return (B_FALSE); 626 } 627 628 /* 629 * This converts a zone root path (normally of the form .../root) to a Live 630 * Upgrade scratch zone root (of the form .../lu). 631 */ 632 static void 633 root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved) 634 { 635 if (!isresolved && zonecfg_in_alt_root()) 636 resolve_lofs(zlogp, zroot, zrootlen); 637 (void) strcpy(strrchr(zroot, '/') + 1, "lu"); 638 } 639 640 /* 641 * The general strategy for unmounting filesystems is as follows: 642 * 643 * - Remote filesystems may be dead, and attempting to contact them as 644 * part of a regular unmount may hang forever; we want to always try to 645 * forcibly unmount such filesystems and only fall back to regular 646 * unmounts if the filesystem doesn't support forced unmounts. 647 * 648 * - We don't want to unnecessarily corrupt metadata on local 649 * filesystems (ie UFS), so we want to start off with graceful unmounts, 650 * and only escalate to doing forced unmounts if we get stuck. 651 * 652 * We start off walking backwards through the mount table. This doesn't 653 * give us strict ordering but ensures that we try to unmount submounts 654 * first. We thus limit the number of failed umount2(2) calls. 655 * 656 * The mechanism for determining if we're stuck is to count the number 657 * of failed unmounts each iteration through the mount table. This 658 * gives us an upper bound on the number of filesystems which remain 659 * mounted (autofs trigger nodes are dealt with separately). If at the 660 * end of one unmount+autofs_cleanup cycle we still have the same number 661 * of mounts that we started out with, we're stuck and try a forced 662 * unmount. If that fails (filesystem doesn't support forced unmounts) 663 * then we bail and are unable to teardown the zone. If it succeeds, 664 * we're no longer stuck so we continue with our policy of trying 665 * graceful mounts first. 666 * 667 * Zone must be down (ie, no processes or threads active). 668 */ 669 static int 670 unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd) 671 { 672 int error = 0; 673 FILE *mnttab; 674 struct mnttab *mnts; 675 uint_t nmnt; 676 char zroot[MAXPATHLEN + 1]; 677 size_t zrootlen; 678 uint_t oldcount = UINT_MAX; 679 boolean_t stuck = B_FALSE; 680 char **remote_fstypes = NULL; 681 682 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 683 zerror(zlogp, B_FALSE, "unable to determine zone root"); 684 return (-1); 685 } 686 if (unmount_cmd) 687 root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE); 688 689 (void) strcat(zroot, "/"); 690 zrootlen = strlen(zroot); 691 692 if ((mnttab = fopen(MNTTAB, "r")) == NULL) { 693 zerror(zlogp, B_TRUE, "failed to open %s", MNTTAB); 694 return (-1); 695 } 696 /* 697 * Use our hacky mntfs ioctl so we see everything, even mounts with 698 * MS_NOMNTTAB. 699 */ 700 if (ioctl(fileno(mnttab), MNTIOC_SHOWHIDDEN, NULL) < 0) { 701 zerror(zlogp, B_TRUE, "unable to configure %s", MNTTAB); 702 error++; 703 goto out; 704 } 705 706 /* 707 * Build the list of remote fstypes so we know which ones we 708 * should forcibly unmount. 709 */ 710 remote_fstypes = get_remote_fstypes(zlogp); 711 for (; /* ever */; ) { 712 uint_t newcount = 0; 713 boolean_t unmounted; 714 struct mnttab *mnp; 715 char *path; 716 uint_t i; 717 718 mnts = NULL; 719 nmnt = 0; 720 /* 721 * MNTTAB gives us a way to walk through mounted 722 * filesystems; we need to be able to walk them in 723 * reverse order, so we build a list of all mounted 724 * filesystems. 725 */ 726 if (build_mnttable(zlogp, zroot, zrootlen, mnttab, &mnts, 727 &nmnt) != 0) { 728 error++; 729 goto out; 730 } 731 for (i = 0; i < nmnt; i++) { 732 mnp = &mnts[nmnt - i - 1]; /* access in reverse order */ 733 path = mnp->mnt_mountp; 734 unmounted = B_FALSE; 735 /* 736 * Try forced unmount first for remote filesystems. 737 * 738 * Not all remote filesystems support forced unmounts, 739 * so if this fails (ENOTSUP) we'll continue on 740 * and try a regular unmount. 741 */ 742 if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) { 743 if (umount2(path, MS_FORCE) == 0) 744 unmounted = B_TRUE; 745 } 746 /* 747 * Try forced unmount if we're stuck. 748 */ 749 if (stuck) { 750 if (umount2(path, MS_FORCE) == 0) { 751 unmounted = B_TRUE; 752 stuck = B_FALSE; 753 } else { 754 /* 755 * The first failure indicates a 756 * mount we won't be able to get 757 * rid of automatically, so we 758 * bail. 759 */ 760 error++; 761 zerror(zlogp, B_FALSE, 762 "unable to unmount '%s'", path); 763 free_mnttable(mnts, nmnt); 764 goto out; 765 } 766 } 767 /* 768 * Try regular unmounts for everything else. 769 */ 770 if (!unmounted && umount2(path, 0) != 0) 771 newcount++; 772 } 773 free_mnttable(mnts, nmnt); 774 775 if (newcount == 0) 776 break; 777 if (newcount >= oldcount) { 778 /* 779 * Last round didn't unmount anything; we're stuck and 780 * should start trying forced unmounts. 781 */ 782 stuck = B_TRUE; 783 } 784 oldcount = newcount; 785 786 /* 787 * Autofs doesn't let you unmount its trigger nodes from 788 * userland so we have to tell the kernel to cleanup for us. 789 */ 790 if (autofs_cleanup(zoneid) != 0) { 791 zerror(zlogp, B_TRUE, "unable to remove autofs nodes"); 792 error++; 793 goto out; 794 } 795 } 796 797 out: 798 free_remote_fstypes(remote_fstypes); 799 (void) fclose(mnttab); 800 return (error ? -1 : 0); 801 } 802 803 static int 804 fs_compare(const void *m1, const void *m2) 805 { 806 struct zone_fstab *i = (struct zone_fstab *)m1; 807 struct zone_fstab *j = (struct zone_fstab *)m2; 808 809 return (strcmp(i->zone_fs_dir, j->zone_fs_dir)); 810 } 811 812 /* 813 * Fork and exec (and wait for) the mentioned binary with the provided 814 * arguments. Returns (-1) if something went wrong with fork(2) or exec(2), 815 * returns the exit status otherwise. 816 * 817 * If we were unable to exec the provided pathname (for whatever 818 * reason), we return the special token ZEXIT_EXEC. The current value 819 * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the 820 * consumers of this function; any future consumers must make sure this 821 * remains the case. 822 */ 823 static int 824 forkexec(zlog_t *zlogp, const char *path, char *const argv[]) 825 { 826 pid_t child_pid; 827 int child_status = 0; 828 829 /* 830 * Do not let another thread localize a message while we are forking. 831 */ 832 (void) mutex_lock(&msglock); 833 child_pid = fork(); 834 (void) mutex_unlock(&msglock); 835 if (child_pid == -1) { 836 zerror(zlogp, B_TRUE, "could not fork for %s", argv[0]); 837 return (-1); 838 } else if (child_pid == 0) { 839 closefrom(0); 840 (void) execv(path, argv); 841 /* 842 * Since we are in the child, there is no point calling zerror() 843 * since there is nobody waiting to consume it. So exit with a 844 * special code that the parent will recognize and call zerror() 845 * accordingly. 846 */ 847 848 _exit(ZEXIT_EXEC); 849 } else { 850 (void) waitpid(child_pid, &child_status, 0); 851 } 852 853 if (WIFSIGNALED(child_status)) { 854 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 855 "signal %d", path, WTERMSIG(child_status)); 856 return (-1); 857 } 858 assert(WIFEXITED(child_status)); 859 if (WEXITSTATUS(child_status) == ZEXIT_EXEC) { 860 zerror(zlogp, B_FALSE, "failed to exec %s", path); 861 return (-1); 862 } 863 return (WEXITSTATUS(child_status)); 864 } 865 866 static int 867 dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev) 868 { 869 char cmdbuf[MAXPATHLEN]; 870 char *argv[4]; 871 int status; 872 873 /* 874 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but 875 * that would cost us an extra fork/exec without buying us anything. 876 */ 877 if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype) 878 > sizeof (cmdbuf)) { 879 zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); 880 return (-1); 881 } 882 883 argv[0] = "fsck"; 884 argv[1] = "-m"; 885 argv[2] = (char *)rawdev; 886 argv[3] = NULL; 887 888 status = forkexec(zlogp, cmdbuf, argv); 889 if (status == 0 || status == -1) 890 return (status); 891 zerror(zlogp, B_FALSE, "fsck of '%s' failed with exit status %d; " 892 "run fsck manually", rawdev, status); 893 return (-1); 894 } 895 896 static int 897 domount(zlog_t *zlogp, const char *fstype, const char *opts, 898 const char *special, const char *directory) 899 { 900 char cmdbuf[MAXPATHLEN]; 901 char *argv[6]; 902 int status; 903 904 /* 905 * We could alternatively have called /usr/sbin/mount -F <fstype>, but 906 * that would cost us an extra fork/exec without buying us anything. 907 */ 908 if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype) 909 > sizeof (cmdbuf)) { 910 zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); 911 return (-1); 912 } 913 argv[0] = "mount"; 914 if (opts[0] == '\0') { 915 argv[1] = (char *)special; 916 argv[2] = (char *)directory; 917 argv[3] = NULL; 918 } else { 919 argv[1] = "-o"; 920 argv[2] = (char *)opts; 921 argv[3] = (char *)special; 922 argv[4] = (char *)directory; 923 argv[5] = NULL; 924 } 925 926 status = forkexec(zlogp, cmdbuf, argv); 927 if (status == 0 || status == -1) 928 return (status); 929 if (opts[0] == '\0') 930 zerror(zlogp, B_FALSE, "\"%s %s %s\" " 931 "failed with exit code %d", 932 cmdbuf, special, directory, status); 933 else 934 zerror(zlogp, B_FALSE, "\"%s -o %s %s %s\" " 935 "failed with exit code %d", 936 cmdbuf, opts, special, directory, status); 937 return (-1); 938 } 939 940 /* 941 * Make sure if a given path exists, it is not a sym-link, and is a directory. 942 */ 943 static int 944 check_path(zlog_t *zlogp, const char *path) 945 { 946 struct stat statbuf; 947 char respath[MAXPATHLEN]; 948 int res; 949 950 if (lstat(path, &statbuf) != 0) { 951 if (errno == ENOENT) 952 return (0); 953 zerror(zlogp, B_TRUE, "can't stat %s", path); 954 return (-1); 955 } 956 if (S_ISLNK(statbuf.st_mode)) { 957 zerror(zlogp, B_FALSE, "%s is a symlink", path); 958 return (-1); 959 } 960 if (!S_ISDIR(statbuf.st_mode)) { 961 zerror(zlogp, B_FALSE, "%s is not a directory", path); 962 return (-1); 963 } 964 if ((res = resolvepath(path, respath, sizeof (respath))) == -1) { 965 zerror(zlogp, B_TRUE, "unable to resolve path %s", path); 966 return (-1); 967 } 968 respath[res] = '\0'; 969 if (strcmp(path, respath) != 0) { 970 /* 971 * We don't like ".."s and "."s throwing us off 972 */ 973 zerror(zlogp, B_FALSE, "%s is not a canonical path", path); 974 return (-1); 975 } 976 return (0); 977 } 978 979 /* 980 * Check every component of rootpath/relpath. If any component fails (ie, 981 * exists but isn't the canonical path to a directory), it is returned in 982 * badpath, which is assumed to be at least of size MAXPATHLEN. 983 * 984 * Relpath must begin with '/'. 985 */ 986 static boolean_t 987 valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *relpath) 988 { 989 char abspath[MAXPATHLEN], *slashp; 990 991 /* 992 * Make sure abspath has at least one '/' after its rootpath 993 * component, and ends with '/'. 994 */ 995 if (snprintf(abspath, sizeof (abspath), "%s%s/", rootpath, relpath) > 996 sizeof (abspath)) { 997 zerror(zlogp, B_FALSE, "pathname %s%s is too long", rootpath, 998 relpath); 999 return (B_FALSE); 1000 } 1001 1002 slashp = &abspath[strlen(rootpath)]; 1003 assert(*slashp == '/'); 1004 do { 1005 *slashp = '\0'; 1006 if (check_path(zlogp, abspath) != 0) 1007 return (B_FALSE); 1008 *slashp = '/'; 1009 slashp++; 1010 } while ((slashp = strchr(slashp, '/')) != NULL); 1011 return (B_TRUE); 1012 } 1013 1014 static int 1015 mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath) 1016 { 1017 char path[MAXPATHLEN]; 1018 char specpath[MAXPATHLEN]; 1019 char optstr[MAX_MNTOPT_STR]; 1020 zone_fsopt_t *optptr; 1021 1022 if (!valid_mount_path(zlogp, rootpath, fsptr->zone_fs_dir)) { 1023 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 1024 rootpath, fsptr->zone_fs_dir); 1025 return (-1); 1026 } 1027 1028 if (make_one_dir(zlogp, rootpath, fsptr->zone_fs_dir, 1029 DEFAULT_DIR_MODE) != 0) 1030 return (-1); 1031 1032 (void) snprintf(path, sizeof (path), "%s%s", rootpath, 1033 fsptr->zone_fs_dir); 1034 1035 if (strlen(fsptr->zone_fs_special) == 0) { 1036 /* 1037 * A zero-length special is how we distinguish IPDs from 1038 * general-purpose FSs. Make sure it mounts from a place that 1039 * can be seen via the alternate zone's root. 1040 */ 1041 if (snprintf(specpath, sizeof (specpath), "%s%s", 1042 zonecfg_get_root(), fsptr->zone_fs_dir) >= 1043 sizeof (specpath)) { 1044 zerror(zlogp, B_FALSE, "cannot mount %s: path too " 1045 "long in alternate root", fsptr->zone_fs_dir); 1046 return (-1); 1047 } 1048 if (zonecfg_in_alt_root()) 1049 resolve_lofs(zlogp, specpath, sizeof (specpath)); 1050 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, 1051 specpath, path) != 0) { 1052 zerror(zlogp, B_TRUE, "failed to loopback mount %s", 1053 specpath); 1054 return (-1); 1055 } 1056 return (0); 1057 } 1058 1059 /* 1060 * In general the strategy here is to do just as much verification as 1061 * necessary to avoid crashing or otherwise doing something bad; if the 1062 * administrator initiated the operation via zoneadm(1m), he'll get 1063 * auto-verification which will let him know what's wrong. If he 1064 * modifies the zone configuration of a running zone and doesn't attempt 1065 * to verify that it's OK we won't crash but won't bother trying to be 1066 * too helpful either. zoneadm verify is only a couple keystrokes away. 1067 */ 1068 if (!zonecfg_valid_fs_type(fsptr->zone_fs_type)) { 1069 zerror(zlogp, B_FALSE, "cannot mount %s on %s: " 1070 "invalid file-system type %s", fsptr->zone_fs_special, 1071 fsptr->zone_fs_dir, fsptr->zone_fs_type); 1072 return (-1); 1073 } 1074 1075 /* 1076 * If we're looking at an alternate root environment, then construct 1077 * read-only loopback mounts as necessary. For all lofs mounts, make 1078 * sure that the 'special' entry points inside the alternate root. (We 1079 * don't do this with other mounts, as devfs isn't in the alternate 1080 * root, and we need to assume the device environment is roughly the 1081 * same.) 1082 */ 1083 if (zonecfg_in_alt_root()) { 1084 struct stat64 st; 1085 1086 if (stat64(fsptr->zone_fs_special, &st) != -1 && 1087 S_ISBLK(st.st_mode) && 1088 check_lofs_needed(zlogp, fsptr) == -1) 1089 return (-1); 1090 if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) { 1091 if (snprintf(specpath, sizeof (specpath), "%s%s", 1092 zonecfg_get_root(), fsptr->zone_fs_special) >= 1093 sizeof (specpath)) { 1094 zerror(zlogp, B_FALSE, "cannot mount %s: path " 1095 "too long in alternate root", 1096 fsptr->zone_fs_special); 1097 return (-1); 1098 } 1099 resolve_lofs(zlogp, specpath, sizeof (specpath)); 1100 (void) strlcpy(fsptr->zone_fs_special, specpath, 1101 sizeof (fsptr->zone_fs_special)); 1102 } 1103 } 1104 1105 /* 1106 * Run 'fsck -m' if there's a device to fsck. 1107 */ 1108 if (fsptr->zone_fs_raw[0] != '\0' && 1109 dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_raw) != 0) 1110 return (-1); 1111 1112 /* 1113 * Build up mount option string. 1114 */ 1115 optstr[0] = '\0'; 1116 if (fsptr->zone_fs_options != NULL) { 1117 (void) strlcpy(optstr, fsptr->zone_fs_options->zone_fsopt_opt, 1118 sizeof (optstr)); 1119 for (optptr = fsptr->zone_fs_options->zone_fsopt_next; 1120 optptr != NULL; optptr = optptr->zone_fsopt_next) { 1121 (void) strlcat(optstr, ",", sizeof (optstr)); 1122 (void) strlcat(optstr, optptr->zone_fsopt_opt, 1123 sizeof (optstr)); 1124 } 1125 } 1126 return (domount(zlogp, fsptr->zone_fs_type, optstr, 1127 fsptr->zone_fs_special, path)); 1128 } 1129 1130 static void 1131 free_fs_data(struct zone_fstab *fsarray, uint_t nelem) 1132 { 1133 uint_t i; 1134 1135 if (fsarray == NULL) 1136 return; 1137 for (i = 0; i < nelem; i++) 1138 zonecfg_free_fs_option_list(fsarray[i].zone_fs_options); 1139 free(fsarray); 1140 } 1141 1142 /* 1143 * This function constructs the miniroot-like "scratch zone" environment. If 1144 * it returns B_FALSE, then the error has already been logged. 1145 */ 1146 static boolean_t 1147 build_mounted(zlog_t *zlogp, char *rootpath, size_t rootlen, 1148 const char *zonepath) 1149 { 1150 char tmp[MAXPATHLEN], fromdir[MAXPATHLEN]; 1151 char luroot[MAXPATHLEN]; 1152 const char **cpp; 1153 static const char *mkdirs[] = { 1154 "/system", "/system/contract", "/proc", "/dev", "/tmp", 1155 "/a", NULL 1156 }; 1157 static const char *localdirs[] = { 1158 "/etc", "/var", NULL 1159 }; 1160 static const char *loopdirs[] = { 1161 "/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform", 1162 "/usr", NULL 1163 }; 1164 static const char *tmpdirs[] = { 1165 "/tmp", "/var/run", NULL 1166 }; 1167 FILE *fp; 1168 struct stat st; 1169 char *altstr; 1170 uuid_t uuid; 1171 1172 /* 1173 * Construct a small Solaris environment, including the zone root 1174 * mounted on '/a' inside that environment. 1175 */ 1176 resolve_lofs(zlogp, rootpath, rootlen); 1177 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 1178 resolve_lofs(zlogp, luroot, sizeof (luroot)); 1179 (void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot); 1180 (void) symlink("./usr/bin", tmp); 1181 1182 /* 1183 * These are mostly special mount points; not handled here. (See 1184 * zone_mount_early.) 1185 */ 1186 for (cpp = mkdirs; *cpp != NULL; cpp++) { 1187 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1188 if (mkdir(tmp, 0755) != 0) { 1189 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1190 return (B_FALSE); 1191 } 1192 } 1193 1194 /* 1195 * These are mounted read-write from the zone undergoing upgrade. We 1196 * must be careful not to 'leak' things from the main system into the 1197 * zone, and this accomplishes that goal. 1198 */ 1199 for (cpp = localdirs; *cpp != NULL; cpp++) { 1200 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1201 (void) snprintf(fromdir, sizeof (fromdir), "%s%s", rootpath, 1202 *cpp); 1203 if (mkdir(tmp, 0755) != 0) { 1204 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1205 return (B_FALSE); 1206 } 1207 if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp) != 0) { 1208 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1209 *cpp); 1210 return (B_FALSE); 1211 } 1212 } 1213 1214 /* 1215 * These are things mounted read-only from the running system because 1216 * they contain binaries that must match system. 1217 */ 1218 for (cpp = loopdirs; *cpp != NULL; cpp++) { 1219 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1220 if (mkdir(tmp, 0755) != 0) { 1221 if (errno != EEXIST) { 1222 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1223 return (B_FALSE); 1224 } 1225 if (lstat(tmp, &st) != 0) { 1226 zerror(zlogp, B_TRUE, "cannot stat %s", tmp); 1227 return (B_FALSE); 1228 } 1229 /* 1230 * Ignore any non-directories encountered. These are 1231 * things that have been converted into symlinks 1232 * (/etc/fs and /etc/lib) and no longer need a lofs 1233 * fixup. 1234 */ 1235 if (!S_ISDIR(st.st_mode)) 1236 continue; 1237 } 1238 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, *cpp, 1239 tmp) != 0) { 1240 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1241 *cpp); 1242 return (B_FALSE); 1243 } 1244 } 1245 1246 /* 1247 * These are things with tmpfs mounted inside. 1248 */ 1249 for (cpp = tmpdirs; *cpp != NULL; cpp++) { 1250 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1251 if (mkdir(tmp, 0755) != 0 && errno != EEXIST) { 1252 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1253 return (B_FALSE); 1254 } 1255 if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) { 1256 zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp); 1257 return (B_FALSE); 1258 } 1259 } 1260 1261 /* 1262 * This is here to support lucopy. If there's an instance of this same 1263 * zone on the current running system, then we mount its root up as 1264 * read-only inside the scratch zone. 1265 */ 1266 (void) zonecfg_get_uuid(zone_name, uuid); 1267 altstr = strdup(zonecfg_get_root()); 1268 if (altstr == NULL) { 1269 zerror(zlogp, B_TRUE, "out of memory"); 1270 return (B_FALSE); 1271 } 1272 zonecfg_set_root(""); 1273 (void) strlcpy(tmp, zone_name, sizeof (tmp)); 1274 (void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp)); 1275 if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK && 1276 strcmp(fromdir, rootpath) != 0) { 1277 (void) snprintf(tmp, sizeof (tmp), "%s/b", luroot); 1278 if (mkdir(tmp, 0755) != 0) { 1279 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1280 return (B_FALSE); 1281 } 1282 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, fromdir, 1283 tmp) != 0) { 1284 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1285 fromdir); 1286 return (B_FALSE); 1287 } 1288 } 1289 zonecfg_set_root(altstr); 1290 free(altstr); 1291 1292 if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) { 1293 zerror(zlogp, B_TRUE, "cannot open zone mapfile"); 1294 return (B_FALSE); 1295 } 1296 (void) ftruncate(fileno(fp), 0); 1297 if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) { 1298 zerror(zlogp, B_TRUE, "cannot add zone mapfile entry"); 1299 } 1300 zonecfg_close_scratch(fp); 1301 (void) snprintf(tmp, sizeof (tmp), "%s/a", luroot); 1302 if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0) 1303 return (B_FALSE); 1304 (void) strlcpy(rootpath, tmp, rootlen); 1305 return (B_TRUE); 1306 } 1307 1308 static int 1309 mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd) 1310 { 1311 char rootpath[MAXPATHLEN]; 1312 char zonepath[MAXPATHLEN]; 1313 int num_fs = 0, i; 1314 struct zone_fstab fstab, *fs_ptr = NULL, *tmp_ptr; 1315 struct zone_fstab *fsp; 1316 zone_dochandle_t handle = NULL; 1317 zone_state_t zstate; 1318 1319 if (zone_get_state(zone_name, &zstate) != Z_OK || 1320 (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) { 1321 zerror(zlogp, B_FALSE, 1322 "zone must be in '%s' or '%s' state to mount file-systems", 1323 zone_state_str(ZONE_STATE_READY), 1324 zone_state_str(ZONE_STATE_MOUNTED)); 1325 goto bad; 1326 } 1327 1328 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { 1329 zerror(zlogp, B_TRUE, "unable to determine zone path"); 1330 goto bad; 1331 } 1332 1333 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { 1334 zerror(zlogp, B_TRUE, "unable to determine zone root"); 1335 goto bad; 1336 } 1337 1338 if ((handle = zonecfg_init_handle()) == NULL) { 1339 zerror(zlogp, B_TRUE, 1340 "could not get zone configuration handle"); 1341 goto bad; 1342 } 1343 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK || 1344 zonecfg_setfsent(handle) != Z_OK) { 1345 zerror(zlogp, B_FALSE, "invalid configuration"); 1346 goto bad; 1347 } 1348 1349 /* 1350 * /dev in the zone is loopback'd from the external /dev repository, 1351 * in order to provide a largely read-only semantic. But because 1352 * processes in the zone need to be able to chown, chmod, etc. zone 1353 * /dev files, we can't use a 'ro' lofs mount. Instead we use a 1354 * special mode just for zones, "zonedevfs". 1355 * 1356 * In the future we should front /dev with a full-fledged filesystem. 1357 */ 1358 num_fs++; 1359 if ((tmp_ptr = realloc(fs_ptr, num_fs * sizeof (*tmp_ptr))) == NULL) { 1360 zerror(zlogp, B_TRUE, "memory allocation failed"); 1361 num_fs--; 1362 goto bad; 1363 } 1364 fs_ptr = tmp_ptr; 1365 fsp = &fs_ptr[num_fs - 1]; 1366 /* 1367 * Note that mount_one will prepend the alternate root to 1368 * zone_fs_special and do the necessary resolution, so all that is 1369 * needed here is to strip the root added by zone_get_zonepath. 1370 */ 1371 (void) strlcpy(fsp->zone_fs_dir, "/dev", sizeof (fsp->zone_fs_dir)); 1372 (void) snprintf(fsp->zone_fs_special, sizeof (fsp->zone_fs_special), 1373 "%s/dev", zonepath + strlen(zonecfg_get_root())); 1374 fsp->zone_fs_raw[0] = '\0'; 1375 (void) strlcpy(fsp->zone_fs_type, MNTTYPE_LOFS, 1376 sizeof (fsp->zone_fs_type)); 1377 fsp->zone_fs_options = NULL; 1378 if (zonecfg_add_fs_option(fsp, MNTOPT_LOFS_ZONEDEVFS) != Z_OK) { 1379 zerror(zlogp, B_FALSE, "error adding property"); 1380 goto bad; 1381 } 1382 1383 /* 1384 * Iterate through the rest of the filesystems, first the IPDs, then 1385 * the general FSs. Sort them all, then mount them in sorted order. 1386 * This is to make sure the higher level directories (e.g., /usr) 1387 * get mounted before any beneath them (e.g., /usr/local). 1388 */ 1389 if (zonecfg_setipdent(handle) != Z_OK) { 1390 zerror(zlogp, B_FALSE, "invalid configuration"); 1391 goto bad; 1392 } 1393 while (zonecfg_getipdent(handle, &fstab) == Z_OK) { 1394 num_fs++; 1395 if ((tmp_ptr = realloc(fs_ptr, 1396 num_fs * sizeof (*tmp_ptr))) == NULL) { 1397 zerror(zlogp, B_TRUE, "memory allocation failed"); 1398 num_fs--; 1399 (void) zonecfg_endipdent(handle); 1400 goto bad; 1401 } 1402 fs_ptr = tmp_ptr; 1403 fsp = &fs_ptr[num_fs - 1]; 1404 /* 1405 * IPDs logically only have a mount point; all other properties 1406 * are implied. 1407 */ 1408 (void) strlcpy(fsp->zone_fs_dir, 1409 fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir)); 1410 fsp->zone_fs_special[0] = '\0'; 1411 fsp->zone_fs_raw[0] = '\0'; 1412 fsp->zone_fs_type[0] = '\0'; 1413 fsp->zone_fs_options = NULL; 1414 } 1415 (void) zonecfg_endipdent(handle); 1416 1417 if (zonecfg_setfsent(handle) != Z_OK) { 1418 zerror(zlogp, B_FALSE, "invalid configuration"); 1419 goto bad; 1420 } 1421 while (zonecfg_getfsent(handle, &fstab) == Z_OK) { 1422 /* 1423 * ZFS filesystems will not be accessible under an alternate 1424 * root, since the pool will not be known. Ignore them in this 1425 * case. 1426 */ 1427 if (mount_cmd && strcmp(fstab.zone_fs_type, MNTTYPE_ZFS) == 0) 1428 continue; 1429 1430 num_fs++; 1431 if ((tmp_ptr = realloc(fs_ptr, 1432 num_fs * sizeof (*tmp_ptr))) == NULL) { 1433 zerror(zlogp, B_TRUE, "memory allocation failed"); 1434 num_fs--; 1435 (void) zonecfg_endfsent(handle); 1436 goto bad; 1437 } 1438 fs_ptr = tmp_ptr; 1439 fsp = &fs_ptr[num_fs - 1]; 1440 (void) strlcpy(fsp->zone_fs_dir, 1441 fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir)); 1442 (void) strlcpy(fsp->zone_fs_special, fstab.zone_fs_special, 1443 sizeof (fsp->zone_fs_special)); 1444 (void) strlcpy(fsp->zone_fs_raw, fstab.zone_fs_raw, 1445 sizeof (fsp->zone_fs_raw)); 1446 (void) strlcpy(fsp->zone_fs_type, fstab.zone_fs_type, 1447 sizeof (fsp->zone_fs_type)); 1448 fsp->zone_fs_options = fstab.zone_fs_options; 1449 } 1450 (void) zonecfg_endfsent(handle); 1451 zonecfg_fini_handle(handle); 1452 handle = NULL; 1453 1454 /* 1455 * If we're mounting a zone for administration, then we need to set up 1456 * the "/a" environment inside the zone so that the commands that run 1457 * in there have access to both the running system's utilities and the 1458 * to-be-modified zone's files. 1459 */ 1460 if (mount_cmd && 1461 !build_mounted(zlogp, rootpath, sizeof (rootpath), zonepath)) 1462 goto bad; 1463 1464 qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare); 1465 for (i = 0; i < num_fs; i++) { 1466 if (mount_cmd && strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) { 1467 size_t slen = strlen(rootpath) - 2; 1468 1469 /* /dev is special and always goes at the top */ 1470 rootpath[slen] = '\0'; 1471 if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0) 1472 goto bad; 1473 rootpath[slen] = '/'; 1474 continue; 1475 } 1476 if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0) 1477 goto bad; 1478 } 1479 free_fs_data(fs_ptr, num_fs); 1480 1481 /* 1482 * Everything looks fine. 1483 */ 1484 return (0); 1485 1486 bad: 1487 if (handle != NULL) 1488 zonecfg_fini_handle(handle); 1489 free_fs_data(fs_ptr, num_fs); 1490 return (-1); 1491 } 1492 1493 /* caller makes sure neither parameter is NULL */ 1494 static int 1495 addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr) 1496 { 1497 int prefixlen; 1498 1499 prefixlen = atoi(prefixstr); 1500 if (prefixlen < 0 || prefixlen > maxprefixlen) 1501 return (1); 1502 while (prefixlen > 0) { 1503 if (prefixlen >= 8) { 1504 *maskstr++ = 0xFF; 1505 prefixlen -= 8; 1506 continue; 1507 } 1508 *maskstr |= 1 << (8 - prefixlen); 1509 prefixlen--; 1510 } 1511 return (0); 1512 } 1513 1514 /* 1515 * Tear down all interfaces belonging to the given zone. This should 1516 * be called with the zone in a state other than "running", so that 1517 * interfaces can't be assigned to the zone after this returns. 1518 * 1519 * If anything goes wrong, log an error message and return an error. 1520 */ 1521 static int 1522 unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id) 1523 { 1524 struct lifnum lifn; 1525 struct lifconf lifc; 1526 struct lifreq *lifrp, lifrl; 1527 int64_t lifc_flags = LIFC_NOXMIT | LIFC_ALLZONES; 1528 int num_ifs, s, i, ret_code = 0; 1529 uint_t bufsize; 1530 char *buf = NULL; 1531 1532 if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { 1533 zerror(zlogp, B_TRUE, "could not get socket"); 1534 ret_code = -1; 1535 goto bad; 1536 } 1537 lifn.lifn_family = AF_UNSPEC; 1538 lifn.lifn_flags = (int)lifc_flags; 1539 if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) { 1540 zerror(zlogp, B_TRUE, 1541 "could not determine number of interfaces"); 1542 ret_code = -1; 1543 goto bad; 1544 } 1545 num_ifs = lifn.lifn_count; 1546 bufsize = num_ifs * sizeof (struct lifreq); 1547 if ((buf = malloc(bufsize)) == NULL) { 1548 zerror(zlogp, B_TRUE, "memory allocation failed"); 1549 ret_code = -1; 1550 goto bad; 1551 } 1552 lifc.lifc_family = AF_UNSPEC; 1553 lifc.lifc_flags = (int)lifc_flags; 1554 lifc.lifc_len = bufsize; 1555 lifc.lifc_buf = buf; 1556 if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) { 1557 zerror(zlogp, B_TRUE, "could not get configured interfaces"); 1558 ret_code = -1; 1559 goto bad; 1560 } 1561 lifrp = lifc.lifc_req; 1562 for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, lifrp++) { 1563 (void) close(s); 1564 if ((s = socket(lifrp->lifr_addr.ss_family, SOCK_DGRAM, 0)) < 1565 0) { 1566 zerror(zlogp, B_TRUE, "%s: could not get socket", 1567 lifrl.lifr_name); 1568 ret_code = -1; 1569 continue; 1570 } 1571 (void) memset(&lifrl, 0, sizeof (lifrl)); 1572 (void) strncpy(lifrl.lifr_name, lifrp->lifr_name, 1573 sizeof (lifrl.lifr_name)); 1574 if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifrl) < 0) { 1575 zerror(zlogp, B_TRUE, 1576 "%s: could not determine zone interface belongs to", 1577 lifrl.lifr_name); 1578 ret_code = -1; 1579 continue; 1580 } 1581 if (lifrl.lifr_zoneid == zone_id) { 1582 if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) { 1583 zerror(zlogp, B_TRUE, 1584 "%s: could not remove interface", 1585 lifrl.lifr_name); 1586 ret_code = -1; 1587 continue; 1588 } 1589 } 1590 } 1591 bad: 1592 if (s > 0) 1593 (void) close(s); 1594 if (buf) 1595 free(buf); 1596 return (ret_code); 1597 } 1598 1599 static union sockunion { 1600 struct sockaddr sa; 1601 struct sockaddr_in sin; 1602 struct sockaddr_dl sdl; 1603 struct sockaddr_in6 sin6; 1604 } so_dst, so_ifp; 1605 1606 static struct { 1607 struct rt_msghdr hdr; 1608 char space[512]; 1609 } rtmsg; 1610 1611 static int 1612 salen(struct sockaddr *sa) 1613 { 1614 switch (sa->sa_family) { 1615 case AF_INET: 1616 return (sizeof (struct sockaddr_in)); 1617 case AF_LINK: 1618 return (sizeof (struct sockaddr_dl)); 1619 case AF_INET6: 1620 return (sizeof (struct sockaddr_in6)); 1621 default: 1622 return (sizeof (struct sockaddr)); 1623 } 1624 } 1625 1626 #define ROUNDUP_LONG(a) \ 1627 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long)) 1628 1629 /* 1630 * Look up which zone is using a given IP address. The address in question 1631 * is expected to have been stuffed into the structure to which lifr points 1632 * via a previous SIOCGLIFADDR ioctl(). 1633 * 1634 * This is done using black router socket magic. 1635 * 1636 * Return the name of the zone on success or NULL on failure. 1637 * 1638 * This is a lot of code for a simple task; a new ioctl request to take care 1639 * of this might be a useful RFE. 1640 */ 1641 1642 static char * 1643 who_is_using(zlog_t *zlogp, struct lifreq *lifr) 1644 { 1645 static char answer[ZONENAME_MAX]; 1646 pid_t pid; 1647 int s, rlen, l, i; 1648 char *cp = rtmsg.space; 1649 struct sockaddr_dl *ifp = NULL; 1650 struct sockaddr *sa; 1651 char save_if_name[LIFNAMSIZ]; 1652 1653 answer[0] = '\0'; 1654 1655 pid = getpid(); 1656 if ((s = socket(PF_ROUTE, SOCK_RAW, 0)) < 0) { 1657 zerror(zlogp, B_TRUE, "could not get routing socket"); 1658 return (NULL); 1659 } 1660 1661 if (lifr->lifr_addr.ss_family == AF_INET) { 1662 struct sockaddr_in *sin4; 1663 1664 so_dst.sa.sa_family = AF_INET; 1665 sin4 = (struct sockaddr_in *)&lifr->lifr_addr; 1666 so_dst.sin.sin_addr = sin4->sin_addr; 1667 } else { 1668 struct sockaddr_in6 *sin6; 1669 1670 so_dst.sa.sa_family = AF_INET6; 1671 sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr; 1672 so_dst.sin6.sin6_addr = sin6->sin6_addr; 1673 } 1674 1675 so_ifp.sa.sa_family = AF_LINK; 1676 1677 (void) memset(&rtmsg, 0, sizeof (rtmsg)); 1678 rtmsg.hdr.rtm_type = RTM_GET; 1679 rtmsg.hdr.rtm_flags = RTF_UP | RTF_HOST; 1680 rtmsg.hdr.rtm_version = RTM_VERSION; 1681 rtmsg.hdr.rtm_seq = ++rts_seqno; 1682 rtmsg.hdr.rtm_addrs = RTA_IFP | RTA_DST; 1683 1684 l = ROUNDUP_LONG(salen(&so_dst.sa)); 1685 (void) memmove(cp, &(so_dst), l); 1686 cp += l; 1687 l = ROUNDUP_LONG(salen(&so_ifp.sa)); 1688 (void) memmove(cp, &(so_ifp), l); 1689 cp += l; 1690 1691 rtmsg.hdr.rtm_msglen = l = cp - (char *)&rtmsg; 1692 1693 if ((rlen = write(s, &rtmsg, l)) < 0) { 1694 zerror(zlogp, B_TRUE, "writing to routing socket"); 1695 return (NULL); 1696 } else if (rlen < (int)rtmsg.hdr.rtm_msglen) { 1697 zerror(zlogp, B_TRUE, 1698 "write to routing socket got only %d for len\n", rlen); 1699 return (NULL); 1700 } 1701 do { 1702 l = read(s, &rtmsg, sizeof (rtmsg)); 1703 } while (l > 0 && (rtmsg.hdr.rtm_seq != rts_seqno || 1704 rtmsg.hdr.rtm_pid != pid)); 1705 if (l < 0) { 1706 zerror(zlogp, B_TRUE, "reading from routing socket"); 1707 return (NULL); 1708 } 1709 1710 if (rtmsg.hdr.rtm_version != RTM_VERSION) { 1711 zerror(zlogp, B_FALSE, 1712 "routing message version %d not understood", 1713 rtmsg.hdr.rtm_version); 1714 return (NULL); 1715 } 1716 if (rtmsg.hdr.rtm_msglen != (ushort_t)l) { 1717 zerror(zlogp, B_FALSE, "message length mismatch, " 1718 "expected %d bytes, returned %d bytes", 1719 rtmsg.hdr.rtm_msglen, l); 1720 return (NULL); 1721 } 1722 if (rtmsg.hdr.rtm_errno != 0) { 1723 errno = rtmsg.hdr.rtm_errno; 1724 zerror(zlogp, B_TRUE, "RTM_GET routing socket message"); 1725 return (NULL); 1726 } 1727 if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) { 1728 zerror(zlogp, B_FALSE, "interface not found"); 1729 return (NULL); 1730 } 1731 cp = ((char *)(&rtmsg.hdr + 1)); 1732 for (i = 1; i != 0; i <<= 1) { 1733 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1734 sa = (struct sockaddr *)cp; 1735 if (i != RTA_IFP) { 1736 if ((i & rtmsg.hdr.rtm_addrs) != 0) 1737 cp += ROUNDUP_LONG(salen(sa)); 1738 continue; 1739 } 1740 if (sa->sa_family == AF_LINK && 1741 ((struct sockaddr_dl *)sa)->sdl_nlen != 0) 1742 ifp = (struct sockaddr_dl *)sa; 1743 break; 1744 } 1745 if (ifp == NULL) { 1746 zerror(zlogp, B_FALSE, "interface could not be determined"); 1747 return (NULL); 1748 } 1749 1750 /* 1751 * We need to set the I/F name to what we got above, then do the 1752 * appropriate ioctl to get its zone name. But lifr->lifr_name is 1753 * used by the calling function to do a REMOVEIF, so if we leave the 1754 * "good" zone's I/F name in place, *that* I/F will be removed instead 1755 * of the bad one. So we save the old (bad) I/F name before over- 1756 * writing it and doing the ioctl, then restore it after the ioctl. 1757 */ 1758 (void) strlcpy(save_if_name, lifr->lifr_name, sizeof (save_if_name)); 1759 (void) strncpy(lifr->lifr_name, ifp->sdl_data, ifp->sdl_nlen); 1760 lifr->lifr_name[ifp->sdl_nlen] = '\0'; 1761 i = ioctl(s, SIOCGLIFZONE, lifr); 1762 (void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name)); 1763 if (i < 0) { 1764 zerror(zlogp, B_TRUE, 1765 "%s: could not determine the zone interface belongs to", 1766 lifr->lifr_name); 1767 return (NULL); 1768 } 1769 if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0) 1770 (void) snprintf(answer, sizeof (answer), "%d", 1771 lifr->lifr_zoneid); 1772 1773 if (strlen(answer) > 0) 1774 return (answer); 1775 return (NULL); 1776 } 1777 1778 typedef struct mcast_rtmsg_s { 1779 struct rt_msghdr m_rtm; 1780 union { 1781 struct { 1782 struct sockaddr_in m_dst; 1783 struct sockaddr_in m_gw; 1784 struct sockaddr_in m_netmask; 1785 } m_v4; 1786 struct { 1787 struct sockaddr_in6 m_dst; 1788 struct sockaddr_in6 m_gw; 1789 struct sockaddr_in6 m_netmask; 1790 } m_v6; 1791 } m_u; 1792 } mcast_rtmsg_t; 1793 #define m_dst4 m_u.m_v4.m_dst 1794 #define m_dst6 m_u.m_v6.m_dst 1795 #define m_gw4 m_u.m_v4.m_gw 1796 #define m_gw6 m_u.m_v6.m_gw 1797 #define m_netmask4 m_u.m_v4.m_netmask 1798 #define m_netmask6 m_u.m_v6.m_netmask 1799 1800 /* 1801 * Configures a single interface: a new virtual interface is added, based on 1802 * the physical interface nwiftabptr->zone_nwif_physical, with the address 1803 * specified in nwiftabptr->zone_nwif_address, for zone zone_id. Note that 1804 * the "address" can be an IPv6 address (with a /prefixlength required), an 1805 * IPv4 address (with a /prefixlength optional), or a name; for the latter, 1806 * an IPv4 name-to-address resolution will be attempted. 1807 * 1808 * A default interface route for multicast is created on the first IPv4 and 1809 * IPv6 interfaces (that have the IFF_MULTICAST flag set), respectively. 1810 * This should really be done in the init scripts if we ever allow zones to 1811 * modify the routing tables. 1812 * 1813 * If anything goes wrong, we log an detailed error message, attempt to tear 1814 * down whatever we set up and return an error. 1815 */ 1816 static int 1817 configure_one_interface(zlog_t *zlogp, zoneid_t zone_id, 1818 struct zone_nwiftab *nwiftabptr, boolean_t *mcast_rt_v4_setp, 1819 boolean_t *mcast_rt_v6_setp) 1820 { 1821 struct lifreq lifr; 1822 struct sockaddr_in netmask4; 1823 struct sockaddr_in6 netmask6; 1824 struct in_addr in4; 1825 struct in6_addr in6; 1826 sa_family_t af; 1827 char *slashp = strchr(nwiftabptr->zone_nwif_address, '/'); 1828 mcast_rtmsg_t mcast_rtmsg; 1829 int s; 1830 int rs; 1831 int rlen; 1832 boolean_t got_netmask = B_FALSE; 1833 char addrstr4[INET_ADDRSTRLEN]; 1834 int res; 1835 1836 res = zonecfg_valid_net_address(nwiftabptr->zone_nwif_address, &lifr); 1837 if (res != Z_OK) { 1838 zerror(zlogp, B_FALSE, "%s: %s", zonecfg_strerror(res), 1839 nwiftabptr->zone_nwif_address); 1840 return (-1); 1841 } 1842 af = lifr.lifr_addr.ss_family; 1843 if (af == AF_INET) 1844 in4 = ((struct sockaddr_in *)(&lifr.lifr_addr))->sin_addr; 1845 else 1846 in6 = ((struct sockaddr_in6 *)(&lifr.lifr_addr))->sin6_addr; 1847 1848 if ((s = socket(af, SOCK_DGRAM, 0)) < 0) { 1849 zerror(zlogp, B_TRUE, "could not get socket"); 1850 return (-1); 1851 } 1852 1853 (void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical, 1854 sizeof (lifr.lifr_name)); 1855 if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) { 1856 zerror(zlogp, B_TRUE, "%s: could not add interface", 1857 lifr.lifr_name); 1858 (void) close(s); 1859 return (-1); 1860 } 1861 1862 if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) { 1863 zerror(zlogp, B_TRUE, 1864 "%s: could not set IP address to %s", 1865 lifr.lifr_name, nwiftabptr->zone_nwif_address); 1866 goto bad; 1867 } 1868 1869 /* Preserve literal IPv4 address for later potential printing. */ 1870 if (af == AF_INET) 1871 (void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN); 1872 1873 lifr.lifr_zoneid = zone_id; 1874 if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) { 1875 zerror(zlogp, B_TRUE, "%s: could not place interface into zone", 1876 lifr.lifr_name); 1877 goto bad; 1878 } 1879 1880 if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) { 1881 got_netmask = B_TRUE; /* default setting will be correct */ 1882 } else { 1883 if (af == AF_INET) { 1884 /* 1885 * The IPv4 netmask can be determined either 1886 * directly if a prefix length was supplied with 1887 * the address or via the netmasks database. Not 1888 * being able to determine it is a common failure, 1889 * but it often is not fatal to operation of the 1890 * interface. In that case, a warning will be 1891 * printed after the rest of the interface's 1892 * parameters have been configured. 1893 */ 1894 (void) memset(&netmask4, 0, sizeof (netmask4)); 1895 if (slashp != NULL) { 1896 if (addr2netmask(slashp + 1, V4_ADDR_LEN, 1897 (uchar_t *)&netmask4.sin_addr) != 0) { 1898 *slashp = '/'; 1899 zerror(zlogp, B_FALSE, 1900 "%s: invalid prefix length in %s", 1901 lifr.lifr_name, 1902 nwiftabptr->zone_nwif_address); 1903 goto bad; 1904 } 1905 got_netmask = B_TRUE; 1906 } else if (getnetmaskbyaddr(in4, 1907 &netmask4.sin_addr) == 0) { 1908 got_netmask = B_TRUE; 1909 } 1910 if (got_netmask) { 1911 netmask4.sin_family = af; 1912 (void) memcpy(&lifr.lifr_addr, &netmask4, 1913 sizeof (netmask4)); 1914 } 1915 } else { 1916 (void) memset(&netmask6, 0, sizeof (netmask6)); 1917 if (addr2netmask(slashp + 1, V6_ADDR_LEN, 1918 (uchar_t *)&netmask6.sin6_addr) != 0) { 1919 *slashp = '/'; 1920 zerror(zlogp, B_FALSE, 1921 "%s: invalid prefix length in %s", 1922 lifr.lifr_name, 1923 nwiftabptr->zone_nwif_address); 1924 goto bad; 1925 } 1926 got_netmask = B_TRUE; 1927 netmask6.sin6_family = af; 1928 (void) memcpy(&lifr.lifr_addr, &netmask6, 1929 sizeof (netmask6)); 1930 } 1931 if (got_netmask && 1932 ioctl(s, SIOCSLIFNETMASK, (caddr_t)&lifr) < 0) { 1933 zerror(zlogp, B_TRUE, "%s: could not set netmask", 1934 lifr.lifr_name); 1935 goto bad; 1936 } 1937 1938 /* 1939 * This doesn't set the broadcast address at all. Rather, it 1940 * gets, then sets the interface's address, relying on the fact 1941 * that resetting the address will reset the broadcast address. 1942 */ 1943 if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { 1944 zerror(zlogp, B_TRUE, "%s: could not get address", 1945 lifr.lifr_name); 1946 goto bad; 1947 } 1948 if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) { 1949 zerror(zlogp, B_TRUE, 1950 "%s: could not reset broadcast address", 1951 lifr.lifr_name); 1952 goto bad; 1953 } 1954 } 1955 1956 if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { 1957 zerror(zlogp, B_TRUE, "%s: could not get flags", 1958 lifr.lifr_name); 1959 goto bad; 1960 } 1961 lifr.lifr_flags |= IFF_UP; 1962 if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { 1963 int save_errno = errno; 1964 char *zone_using; 1965 1966 /* 1967 * If we failed with something other than EADDRNOTAVAIL, 1968 * then skip to the end. Otherwise, look up our address, 1969 * then call a function to determine which zone is already 1970 * using that address. 1971 */ 1972 if (errno != EADDRNOTAVAIL) { 1973 zerror(zlogp, B_TRUE, 1974 "%s: could not bring interface up", lifr.lifr_name); 1975 goto bad; 1976 } 1977 if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { 1978 zerror(zlogp, B_TRUE, "%s: could not get address", 1979 lifr.lifr_name); 1980 goto bad; 1981 } 1982 zone_using = who_is_using(zlogp, &lifr); 1983 errno = save_errno; 1984 if (zone_using == NULL) 1985 zerror(zlogp, B_TRUE, 1986 "%s: could not bring interface up", lifr.lifr_name); 1987 else 1988 zerror(zlogp, B_TRUE, "%s: could not bring interface " 1989 "up: address in use by zone '%s'", lifr.lifr_name, 1990 zone_using); 1991 goto bad; 1992 } 1993 if ((lifr.lifr_flags & IFF_MULTICAST) && ((af == AF_INET && 1994 mcast_rt_v4_setp != NULL && *mcast_rt_v4_setp == B_FALSE) || 1995 (af == AF_INET6 && 1996 mcast_rt_v6_setp != NULL && *mcast_rt_v6_setp == B_FALSE))) { 1997 rs = socket(PF_ROUTE, SOCK_RAW, 0); 1998 if (rs < 0) { 1999 zerror(zlogp, B_TRUE, "%s: could not create " 2000 "routing socket", lifr.lifr_name); 2001 goto bad; 2002 } 2003 (void) shutdown(rs, 0); 2004 (void) memset((void *)&mcast_rtmsg, 0, sizeof (mcast_rtmsg_t)); 2005 mcast_rtmsg.m_rtm.rtm_msglen = sizeof (struct rt_msghdr) + 2006 3 * (af == AF_INET ? sizeof (struct sockaddr_in) : 2007 sizeof (struct sockaddr_in6)); 2008 mcast_rtmsg.m_rtm.rtm_version = RTM_VERSION; 2009 mcast_rtmsg.m_rtm.rtm_type = RTM_ADD; 2010 mcast_rtmsg.m_rtm.rtm_flags = RTF_UP; 2011 mcast_rtmsg.m_rtm.rtm_addrs = 2012 RTA_DST | RTA_GATEWAY | RTA_NETMASK; 2013 mcast_rtmsg.m_rtm.rtm_seq = ++rts_seqno; 2014 if (af == AF_INET) { 2015 mcast_rtmsg.m_dst4.sin_family = AF_INET; 2016 mcast_rtmsg.m_dst4.sin_addr.s_addr = 2017 htonl(INADDR_UNSPEC_GROUP); 2018 mcast_rtmsg.m_gw4.sin_family = AF_INET; 2019 mcast_rtmsg.m_gw4.sin_addr = in4; 2020 mcast_rtmsg.m_netmask4.sin_family = AF_INET; 2021 mcast_rtmsg.m_netmask4.sin_addr.s_addr = 2022 htonl(IN_CLASSD_NET); 2023 } else { 2024 mcast_rtmsg.m_dst6.sin6_family = AF_INET6; 2025 mcast_rtmsg.m_dst6.sin6_addr.s6_addr[0] = 0xffU; 2026 mcast_rtmsg.m_gw6.sin6_family = AF_INET6; 2027 mcast_rtmsg.m_gw6.sin6_addr = in6; 2028 mcast_rtmsg.m_netmask6.sin6_family = AF_INET6; 2029 mcast_rtmsg.m_netmask6.sin6_addr.s6_addr[0] = 0xffU; 2030 } 2031 rlen = write(rs, (char *)&mcast_rtmsg, 2032 mcast_rtmsg.m_rtm.rtm_msglen); 2033 if (rlen < mcast_rtmsg.m_rtm.rtm_msglen) { 2034 if (rlen < 0) { 2035 zerror(zlogp, B_TRUE, "%s: could not set " 2036 "default interface for multicast", 2037 lifr.lifr_name); 2038 } else { 2039 zerror(zlogp, B_FALSE, "%s: write to routing " 2040 "socket returned %d", lifr.lifr_name, rlen); 2041 } 2042 (void) close(rs); 2043 goto bad; 2044 } 2045 if (af == AF_INET) { 2046 *mcast_rt_v4_setp = B_TRUE; 2047 } else { 2048 *mcast_rt_v6_setp = B_TRUE; 2049 } 2050 (void) close(rs); 2051 } 2052 2053 if (!got_netmask) { 2054 /* 2055 * A common, but often non-fatal problem, is that the system 2056 * cannot find the netmask for an interface address. This is 2057 * often caused by it being only in /etc/inet/netmasks, but 2058 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not 2059 * in that. This doesn't show up at boot because the netmask 2060 * is obtained from /etc/inet/netmasks when no network 2061 * interfaces are up, but isn't consulted when NIS/NIS+ is 2062 * available. We warn the user here that something like this 2063 * has happened and we're just running with a default and 2064 * possible incorrect netmask. 2065 */ 2066 char buffer[INET6_ADDRSTRLEN]; 2067 void *addr; 2068 2069 if (af == AF_INET) 2070 addr = &((struct sockaddr_in *) 2071 (&lifr.lifr_addr))->sin_addr; 2072 else 2073 addr = &((struct sockaddr_in6 *) 2074 (&lifr.lifr_addr))->sin6_addr; 2075 2076 /* Find out what netmask interface is going to be using */ 2077 if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 || 2078 inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL) 2079 goto bad; 2080 zerror(zlogp, B_FALSE, 2081 "WARNING: %s: no matching subnet found in netmasks(4) for " 2082 "%s; using default of %s.", 2083 lifr.lifr_name, addrstr4, buffer); 2084 } 2085 2086 (void) close(s); 2087 return (Z_OK); 2088 bad: 2089 (void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr); 2090 (void) close(s); 2091 return (-1); 2092 } 2093 2094 /* 2095 * Sets up network interfaces based on information from the zone configuration. 2096 * An IPv4 loopback interface is set up "for free", modeling the global system. 2097 * If any of the configuration interfaces were IPv6, then an IPv6 loopback 2098 * address is set up as well. 2099 * 2100 * If anything goes wrong, we log a general error message, attempt to tear down 2101 * whatever we set up, and return an error. 2102 */ 2103 static int 2104 configure_network_interfaces(zlog_t *zlogp) 2105 { 2106 zone_dochandle_t handle; 2107 struct zone_nwiftab nwiftab, loopback_iftab; 2108 boolean_t saw_v6 = B_FALSE; 2109 boolean_t mcast_rt_v4_set = B_FALSE; 2110 boolean_t mcast_rt_v6_set = B_FALSE; 2111 zoneid_t zoneid; 2112 2113 if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) { 2114 zerror(zlogp, B_TRUE, "unable to get zoneid"); 2115 return (-1); 2116 } 2117 2118 if ((handle = zonecfg_init_handle()) == NULL) { 2119 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2120 return (-1); 2121 } 2122 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2123 zerror(zlogp, B_FALSE, "invalid configuration"); 2124 zonecfg_fini_handle(handle); 2125 return (-1); 2126 } 2127 if (zonecfg_setnwifent(handle) == Z_OK) { 2128 for (;;) { 2129 struct in6_addr in6; 2130 2131 if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK) 2132 break; 2133 if (configure_one_interface(zlogp, zoneid, 2134 &nwiftab, &mcast_rt_v4_set, &mcast_rt_v6_set) != 2135 Z_OK) { 2136 (void) zonecfg_endnwifent(handle); 2137 zonecfg_fini_handle(handle); 2138 return (-1); 2139 } 2140 if (inet_pton(AF_INET6, nwiftab.zone_nwif_address, 2141 &in6) == 1) 2142 saw_v6 = B_TRUE; 2143 } 2144 (void) zonecfg_endnwifent(handle); 2145 } 2146 zonecfg_fini_handle(handle); 2147 (void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0", 2148 sizeof (loopback_iftab.zone_nwif_physical)); 2149 (void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1", 2150 sizeof (loopback_iftab.zone_nwif_address)); 2151 if (configure_one_interface(zlogp, zoneid, &loopback_iftab, NULL, NULL) 2152 != Z_OK) { 2153 return (-1); 2154 } 2155 if (saw_v6) { 2156 (void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128", 2157 sizeof (loopback_iftab.zone_nwif_address)); 2158 if (configure_one_interface(zlogp, zoneid, 2159 &loopback_iftab, NULL, NULL) != Z_OK) { 2160 return (-1); 2161 } 2162 } 2163 return (0); 2164 } 2165 2166 static int 2167 tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid, 2168 const struct sockaddr_storage *local, const struct sockaddr_storage *remote) 2169 { 2170 int fd; 2171 struct strioctl ioc; 2172 tcp_ioc_abort_conn_t conn; 2173 int error; 2174 2175 conn.ac_local = *local; 2176 conn.ac_remote = *remote; 2177 conn.ac_start = TCPS_SYN_SENT; 2178 conn.ac_end = TCPS_TIME_WAIT; 2179 conn.ac_zoneid = zoneid; 2180 2181 ioc.ic_cmd = TCP_IOC_ABORT_CONN; 2182 ioc.ic_timout = -1; /* infinite timeout */ 2183 ioc.ic_len = sizeof (conn); 2184 ioc.ic_dp = (char *)&conn; 2185 2186 if ((fd = open("/dev/tcp", O_RDONLY)) < 0) { 2187 zerror(zlogp, B_TRUE, "unable to open %s", "/dev/tcp"); 2188 return (-1); 2189 } 2190 2191 error = ioctl(fd, I_STR, &ioc); 2192 (void) close(fd); 2193 if (error == 0 || errno == ENOENT) /* ENOENT is not an error */ 2194 return (0); 2195 return (-1); 2196 } 2197 2198 static int 2199 tcp_abort_connections(zlog_t *zlogp, zoneid_t zoneid) 2200 { 2201 struct sockaddr_storage l, r; 2202 struct sockaddr_in *local, *remote; 2203 struct sockaddr_in6 *local6, *remote6; 2204 int error; 2205 2206 /* 2207 * Abort IPv4 connections. 2208 */ 2209 bzero(&l, sizeof (*local)); 2210 local = (struct sockaddr_in *)&l; 2211 local->sin_family = AF_INET; 2212 local->sin_addr.s_addr = INADDR_ANY; 2213 local->sin_port = 0; 2214 2215 bzero(&r, sizeof (*remote)); 2216 remote = (struct sockaddr_in *)&r; 2217 remote->sin_family = AF_INET; 2218 remote->sin_addr.s_addr = INADDR_ANY; 2219 remote->sin_port = 0; 2220 2221 if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0) 2222 return (error); 2223 2224 /* 2225 * Abort IPv6 connections. 2226 */ 2227 bzero(&l, sizeof (*local6)); 2228 local6 = (struct sockaddr_in6 *)&l; 2229 local6->sin6_family = AF_INET6; 2230 local6->sin6_port = 0; 2231 local6->sin6_addr = in6addr_any; 2232 2233 bzero(&r, sizeof (*remote6)); 2234 remote6 = (struct sockaddr_in6 *)&r; 2235 remote6->sin6_family = AF_INET6; 2236 remote6->sin6_port = 0; 2237 remote6->sin6_addr = in6addr_any; 2238 2239 if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0) 2240 return (error); 2241 return (0); 2242 } 2243 2244 static int 2245 devfsadm_call(zlog_t *zlogp, const char *arg) 2246 { 2247 char *argv[4]; 2248 int status; 2249 2250 argv[0] = DEVFSADM; 2251 argv[1] = (char *)arg; 2252 argv[2] = zone_name; 2253 argv[3] = NULL; 2254 status = forkexec(zlogp, DEVFSADM_PATH, argv); 2255 if (status == 0 || status == -1) 2256 return (status); 2257 zerror(zlogp, B_FALSE, "%s call (%s %s %s) unexpectedly returned %d", 2258 DEVFSADM, DEVFSADM_PATH, arg, zone_name, status); 2259 return (-1); 2260 } 2261 2262 static int 2263 devfsadm_register(zlog_t *zlogp) 2264 { 2265 /* 2266 * Ready the zone's devices. 2267 */ 2268 return (devfsadm_call(zlogp, "-z")); 2269 } 2270 2271 static int 2272 devfsadm_unregister(zlog_t *zlogp) 2273 { 2274 return (devfsadm_call(zlogp, "-Z")); 2275 } 2276 2277 static int 2278 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) 2279 { 2280 nvlist_t *nvl = NULL; 2281 char *nvl_packed = NULL; 2282 size_t nvl_size = 0; 2283 nvlist_t **nvlv = NULL; 2284 int rctlcount = 0; 2285 int error = -1; 2286 zone_dochandle_t handle; 2287 struct zone_rctltab rctltab; 2288 rctlblk_t *rctlblk = NULL; 2289 2290 *bufp = NULL; 2291 *bufsizep = 0; 2292 2293 if ((handle = zonecfg_init_handle()) == NULL) { 2294 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2295 return (-1); 2296 } 2297 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2298 zerror(zlogp, B_FALSE, "invalid configuration"); 2299 zonecfg_fini_handle(handle); 2300 return (-1); 2301 } 2302 2303 rctltab.zone_rctl_valptr = NULL; 2304 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) { 2305 zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc"); 2306 goto out; 2307 } 2308 2309 if (zonecfg_setrctlent(handle) != Z_OK) { 2310 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent"); 2311 goto out; 2312 } 2313 2314 if ((rctlblk = malloc(rctlblk_size())) == NULL) { 2315 zerror(zlogp, B_TRUE, "memory allocation failed"); 2316 goto out; 2317 } 2318 while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) { 2319 struct zone_rctlvaltab *rctlval; 2320 uint_t i, count; 2321 const char *name = rctltab.zone_rctl_name; 2322 2323 /* zoneadm should have already warned about unknown rctls. */ 2324 if (!zonecfg_is_rctl(name)) { 2325 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 2326 rctltab.zone_rctl_valptr = NULL; 2327 continue; 2328 } 2329 count = 0; 2330 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL; 2331 rctlval = rctlval->zone_rctlval_next) { 2332 count++; 2333 } 2334 if (count == 0) { /* ignore */ 2335 continue; /* Nothing to free */ 2336 } 2337 if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL) 2338 goto out; 2339 i = 0; 2340 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL; 2341 rctlval = rctlval->zone_rctlval_next, i++) { 2342 if (nvlist_alloc(&nvlv[i], NV_UNIQUE_NAME, 0) != 0) { 2343 zerror(zlogp, B_TRUE, "%s failed", 2344 "nvlist_alloc"); 2345 goto out; 2346 } 2347 if (zonecfg_construct_rctlblk(rctlval, rctlblk) 2348 != Z_OK) { 2349 zerror(zlogp, B_FALSE, "invalid rctl value: " 2350 "(priv=%s,limit=%s,action=%s)", 2351 rctlval->zone_rctlval_priv, 2352 rctlval->zone_rctlval_limit, 2353 rctlval->zone_rctlval_action); 2354 goto out; 2355 } 2356 if (!zonecfg_valid_rctl(name, rctlblk)) { 2357 zerror(zlogp, B_FALSE, 2358 "(priv=%s,limit=%s,action=%s) is not a " 2359 "valid value for rctl '%s'", 2360 rctlval->zone_rctlval_priv, 2361 rctlval->zone_rctlval_limit, 2362 rctlval->zone_rctlval_action, 2363 name); 2364 goto out; 2365 } 2366 if (nvlist_add_uint64(nvlv[i], "privilege", 2367 rctlblk_get_privilege(rctlblk)) != 0) { 2368 zerror(zlogp, B_FALSE, "%s failed", 2369 "nvlist_add_uint64"); 2370 goto out; 2371 } 2372 if (nvlist_add_uint64(nvlv[i], "limit", 2373 rctlblk_get_value(rctlblk)) != 0) { 2374 zerror(zlogp, B_FALSE, "%s failed", 2375 "nvlist_add_uint64"); 2376 goto out; 2377 } 2378 if (nvlist_add_uint64(nvlv[i], "action", 2379 (uint_t)rctlblk_get_local_action(rctlblk, NULL)) 2380 != 0) { 2381 zerror(zlogp, B_FALSE, "%s failed", 2382 "nvlist_add_uint64"); 2383 goto out; 2384 } 2385 } 2386 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 2387 rctltab.zone_rctl_valptr = NULL; 2388 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count) 2389 != 0) { 2390 zerror(zlogp, B_FALSE, "%s failed", 2391 "nvlist_add_nvlist_array"); 2392 goto out; 2393 } 2394 for (i = 0; i < count; i++) 2395 nvlist_free(nvlv[i]); 2396 free(nvlv); 2397 nvlv = NULL; 2398 rctlcount++; 2399 } 2400 (void) zonecfg_endrctlent(handle); 2401 2402 if (rctlcount == 0) { 2403 error = 0; 2404 goto out; 2405 } 2406 if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0) 2407 != 0) { 2408 zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack"); 2409 goto out; 2410 } 2411 2412 error = 0; 2413 *bufp = nvl_packed; 2414 *bufsizep = nvl_size; 2415 2416 out: 2417 free(rctlblk); 2418 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 2419 if (error && nvl_packed != NULL) 2420 free(nvl_packed); 2421 if (nvl != NULL) 2422 nvlist_free(nvl); 2423 if (nvlv != NULL) 2424 free(nvlv); 2425 if (handle != NULL) 2426 zonecfg_fini_handle(handle); 2427 return (error); 2428 } 2429 2430 static int 2431 get_zone_pool(zlog_t *zlogp, char *poolbuf, size_t bufsz) 2432 { 2433 zone_dochandle_t handle; 2434 int error; 2435 2436 if ((handle = zonecfg_init_handle()) == NULL) { 2437 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2438 return (-1); 2439 } 2440 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2441 zerror(zlogp, B_FALSE, "invalid configuration"); 2442 zonecfg_fini_handle(handle); 2443 return (-1); 2444 } 2445 error = zonecfg_get_pool(handle, poolbuf, bufsz); 2446 zonecfg_fini_handle(handle); 2447 return (error); 2448 } 2449 2450 static int 2451 get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep) 2452 { 2453 zone_dochandle_t handle; 2454 struct zone_dstab dstab; 2455 size_t total, offset, len; 2456 int error = -1; 2457 char *str; 2458 2459 *bufp = NULL; 2460 *bufsizep = 0; 2461 2462 if ((handle = zonecfg_init_handle()) == NULL) { 2463 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2464 return (-1); 2465 } 2466 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2467 zerror(zlogp, B_FALSE, "invalid configuration"); 2468 zonecfg_fini_handle(handle); 2469 return (-1); 2470 } 2471 2472 if (zonecfg_setdsent(handle) != Z_OK) { 2473 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent"); 2474 goto out; 2475 } 2476 2477 total = 0; 2478 while (zonecfg_getdsent(handle, &dstab) == Z_OK) 2479 total += strlen(dstab.zone_dataset_name) + 1; 2480 (void) zonecfg_enddsent(handle); 2481 2482 if (total == 0) { 2483 error = 0; 2484 goto out; 2485 } 2486 2487 if ((str = malloc(total)) == NULL) { 2488 zerror(zlogp, B_TRUE, "memory allocation failed"); 2489 goto out; 2490 } 2491 2492 if (zonecfg_setdsent(handle) != Z_OK) { 2493 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent"); 2494 goto out; 2495 } 2496 offset = 0; 2497 while (zonecfg_getdsent(handle, &dstab) == Z_OK) { 2498 len = strlen(dstab.zone_dataset_name); 2499 (void) strlcpy(str + offset, dstab.zone_dataset_name, 2500 sizeof (dstab.zone_dataset_name) - offset); 2501 offset += len; 2502 if (offset != total - 1) 2503 str[offset++] = ','; 2504 } 2505 (void) zonecfg_enddsent(handle); 2506 2507 error = 0; 2508 *bufp = str; 2509 *bufsizep = total; 2510 2511 out: 2512 if (error != 0 && str != NULL) 2513 free(str); 2514 if (handle != NULL) 2515 zonecfg_fini_handle(handle); 2516 2517 return (error); 2518 } 2519 2520 /* ARGSUSED */ 2521 static void 2522 zfs_error_handler(const char *fmt, va_list ap) 2523 { 2524 /* 2525 * Do nothing - we interpret the failures from each libzfs call below. 2526 */ 2527 } 2528 2529 static int 2530 validate_datasets(zlog_t *zlogp) 2531 { 2532 zone_dochandle_t handle; 2533 struct zone_dstab dstab; 2534 zfs_handle_t *zhp; 2535 2536 if ((handle = zonecfg_init_handle()) == NULL) { 2537 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2538 return (-1); 2539 } 2540 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2541 zerror(zlogp, B_FALSE, "invalid configuration"); 2542 zonecfg_fini_handle(handle); 2543 return (-1); 2544 } 2545 2546 if (zonecfg_setdsent(handle) != Z_OK) { 2547 zerror(zlogp, B_FALSE, "invalid configuration"); 2548 zonecfg_fini_handle(handle); 2549 return (-1); 2550 } 2551 2552 zfs_set_error_handler(zfs_error_handler); 2553 2554 while (zonecfg_getdsent(handle, &dstab) == Z_OK) { 2555 2556 if ((zhp = zfs_open(dstab.zone_dataset_name, 2557 ZFS_TYPE_FILESYSTEM)) == NULL) { 2558 zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'", 2559 dstab.zone_dataset_name); 2560 zonecfg_fini_handle(handle); 2561 return (-1); 2562 } 2563 2564 /* 2565 * Automatically set the 'zoned' property. We check the value 2566 * first because we'll get EPERM if it is already set. 2567 */ 2568 if (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) && 2569 zfs_prop_set(zhp, ZFS_PROP_ZONED, "on") != 0) { 2570 zerror(zlogp, B_FALSE, "cannot set 'zoned' " 2571 "property for ZFS dataset '%s'\n", 2572 dstab.zone_dataset_name); 2573 zonecfg_fini_handle(handle); 2574 zfs_close(zhp); 2575 return (-1); 2576 } 2577 2578 zfs_close(zhp); 2579 } 2580 (void) zonecfg_enddsent(handle); 2581 2582 zonecfg_fini_handle(handle); 2583 2584 return (0); 2585 } 2586 2587 static int 2588 bind_to_pool(zlog_t *zlogp, zoneid_t zoneid) 2589 { 2590 pool_conf_t *poolconf; 2591 pool_t *pool; 2592 char poolname[MAXPATHLEN]; 2593 int status; 2594 int error; 2595 2596 /* 2597 * Find the pool mentioned in the zone configuration, and bind to it. 2598 */ 2599 error = get_zone_pool(zlogp, poolname, sizeof (poolname)); 2600 if (error == Z_NO_ENTRY || (error == Z_OK && strlen(poolname) == 0)) { 2601 /* 2602 * The property is not set on the zone, so the pool 2603 * should be bound to the default pool. But that's 2604 * already done by the kernel, so we can just return. 2605 */ 2606 return (0); 2607 } 2608 if (error != Z_OK) { 2609 /* 2610 * Not an error, even though it shouldn't be happening. 2611 */ 2612 zerror(zlogp, B_FALSE, 2613 "WARNING: unable to retrieve default pool."); 2614 return (0); 2615 } 2616 /* 2617 * Don't do anything if pools aren't enabled. 2618 */ 2619 if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED) { 2620 zerror(zlogp, B_FALSE, "WARNING: pools facility not active; " 2621 "zone will not be bound to pool '%s'.", poolname); 2622 return (0); 2623 } 2624 /* 2625 * Try to provide a sane error message if the requested pool doesn't 2626 * exist. 2627 */ 2628 if ((poolconf = pool_conf_alloc()) == NULL) { 2629 zerror(zlogp, B_FALSE, "%s failed", "pool_conf_alloc"); 2630 return (-1); 2631 } 2632 if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) != 2633 PO_SUCCESS) { 2634 zerror(zlogp, B_FALSE, "%s failed", "pool_conf_open"); 2635 pool_conf_free(poolconf); 2636 return (-1); 2637 } 2638 pool = pool_get_pool(poolconf, poolname); 2639 (void) pool_conf_close(poolconf); 2640 pool_conf_free(poolconf); 2641 if (pool == NULL) { 2642 zerror(zlogp, B_FALSE, "WARNING: pool '%s' not found; " 2643 "using default pool.", poolname); 2644 return (0); 2645 } 2646 /* 2647 * Bind the zone to the pool. 2648 */ 2649 if (pool_set_binding(poolname, P_ZONEID, zoneid) != PO_SUCCESS) { 2650 zerror(zlogp, B_FALSE, "WARNING: unable to bind to pool '%s'; " 2651 "using default pool.", poolname); 2652 } 2653 return (0); 2654 } 2655 2656 int 2657 prtmount(const char *fs, void *x) { 2658 zerror((zlog_t *)x, B_FALSE, " %s", fs); 2659 return (0); 2660 } 2661 2662 /* 2663 * Look for zones running on the main system that are using this root (or any 2664 * subdirectory of it). Return B_TRUE and print an error if a conflicting zone 2665 * is found or if we can't tell. 2666 */ 2667 static boolean_t 2668 duplicate_zone_root(zlog_t *zlogp, const char *rootpath) 2669 { 2670 zoneid_t *zids = NULL; 2671 uint_t nzids = 0; 2672 boolean_t retv; 2673 int rlen, zlen; 2674 char zroot[MAXPATHLEN]; 2675 char zonename[ZONENAME_MAX]; 2676 2677 for (;;) { 2678 nzids += 10; 2679 zids = malloc(nzids * sizeof (*zids)); 2680 if (zids == NULL) { 2681 zerror(zlogp, B_TRUE, "unable to allocate memory"); 2682 return (B_TRUE); 2683 } 2684 if (zone_list(zids, &nzids) == 0) 2685 break; 2686 free(zids); 2687 } 2688 retv = B_FALSE; 2689 rlen = strlen(rootpath); 2690 while (nzids > 0) { 2691 /* 2692 * Ignore errors; they just mean that the zone has disappeared 2693 * while we were busy. 2694 */ 2695 if (zone_getattr(zids[--nzids], ZONE_ATTR_ROOT, zroot, 2696 sizeof (zroot)) == -1) 2697 continue; 2698 zlen = strlen(zroot); 2699 if (zlen > rlen) 2700 zlen = rlen; 2701 if (strncmp(rootpath, zroot, zlen) == 0 && 2702 (zroot[zlen] == '\0' || zroot[zlen] == '/') && 2703 (rootpath[zlen] == '\0' || rootpath[zlen] == '/')) { 2704 if (getzonenamebyid(zids[nzids], zonename, 2705 sizeof (zonename)) == -1) 2706 (void) snprintf(zonename, sizeof (zonename), 2707 "id %d", (int)zids[nzids]); 2708 zerror(zlogp, B_FALSE, 2709 "zone root %s already in use by zone %s", 2710 rootpath, zonename); 2711 retv = B_TRUE; 2712 break; 2713 } 2714 } 2715 free(zids); 2716 return (retv); 2717 } 2718 2719 /* 2720 * Search for loopback mounts that use this same source node (same device and 2721 * inode). Return B_TRUE if there is one or if we can't tell. 2722 */ 2723 static boolean_t 2724 duplicate_reachable_path(zlog_t *zlogp, const char *rootpath) 2725 { 2726 struct stat64 rst, zst; 2727 struct mnttab *mnp; 2728 2729 if (stat64(rootpath, &rst) == -1) { 2730 zerror(zlogp, B_TRUE, "can't stat %s", rootpath); 2731 return (B_TRUE); 2732 } 2733 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 2734 return (B_TRUE); 2735 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) { 2736 if (mnp->mnt_fstype == NULL || 2737 strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0) 2738 continue; 2739 /* We're looking at a loopback mount. Stat it. */ 2740 if (mnp->mnt_special != NULL && 2741 stat64(mnp->mnt_special, &zst) != -1 && 2742 rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) { 2743 zerror(zlogp, B_FALSE, 2744 "zone root %s is reachable through %s", 2745 rootpath, mnp->mnt_mountp); 2746 return (B_TRUE); 2747 } 2748 } 2749 return (B_FALSE); 2750 } 2751 2752 zoneid_t 2753 vplat_create(zlog_t *zlogp, boolean_t mount_cmd) 2754 { 2755 zoneid_t rval = -1; 2756 priv_set_t *privs; 2757 char rootpath[MAXPATHLEN]; 2758 char *rctlbuf = NULL; 2759 size_t rctlbufsz = 0; 2760 char *zfsbuf = NULL; 2761 size_t zfsbufsz = 0; 2762 zoneid_t zoneid = -1; 2763 int xerr; 2764 char *kzone; 2765 FILE *fp = NULL; 2766 2767 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { 2768 zerror(zlogp, B_TRUE, "unable to determine zone root"); 2769 return (-1); 2770 } 2771 if (zonecfg_in_alt_root()) 2772 resolve_lofs(zlogp, rootpath, sizeof (rootpath)); 2773 2774 if ((privs = priv_allocset()) == NULL) { 2775 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 2776 return (-1); 2777 } 2778 priv_emptyset(privs); 2779 if (zonecfg_get_privset(privs) != Z_OK) { 2780 zerror(zlogp, B_TRUE, "Failed to initialize privileges"); 2781 goto error; 2782 } 2783 if (!mount_cmd && get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) { 2784 zerror(zlogp, B_FALSE, "Unable to get list of rctls"); 2785 goto error; 2786 } 2787 if (get_datasets(zlogp, &zfsbuf, &zfsbufsz) != 0) { 2788 zerror(zlogp, B_FALSE, "Unable to get list of ZFS datasets"); 2789 goto error; 2790 } 2791 2792 kzone = zone_name; 2793 2794 /* 2795 * We must do this scan twice. First, we look for zones running on the 2796 * main system that are using this root (or any subdirectory of it). 2797 * Next, we reduce to the shortest path and search for loopback mounts 2798 * that use this same source node (same device and inode). 2799 */ 2800 if (duplicate_zone_root(zlogp, rootpath)) 2801 goto error; 2802 if (duplicate_reachable_path(zlogp, rootpath)) 2803 goto error; 2804 2805 if (mount_cmd) { 2806 root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE); 2807 2808 /* 2809 * Forge up a special root for this zone. When a zone is 2810 * mounted, we can't let the zone have its own root because the 2811 * tools that will be used in this "scratch zone" need access 2812 * to both the zone's resources and the running machine's 2813 * executables. 2814 * 2815 * Note that the mkdir here also catches read-only filesystems. 2816 */ 2817 if (mkdir(rootpath, 0755) != 0 && errno != EEXIST) { 2818 zerror(zlogp, B_TRUE, "cannot create %s", rootpath); 2819 goto error; 2820 } 2821 if (domount(zlogp, "tmpfs", "", "swap", rootpath) != 0) 2822 goto error; 2823 } 2824 2825 if (zonecfg_in_alt_root()) { 2826 /* 2827 * If we are mounting up a zone in an alternate root partition, 2828 * then we have some additional work to do before starting the 2829 * zone. First, resolve the root path down so that we're not 2830 * fooled by duplicates. Then forge up an internal name for 2831 * the zone. 2832 */ 2833 if ((fp = zonecfg_open_scratch("", B_TRUE)) == NULL) { 2834 zerror(zlogp, B_TRUE, "cannot open mapfile"); 2835 goto error; 2836 } 2837 if (zonecfg_lock_scratch(fp) != 0) { 2838 zerror(zlogp, B_TRUE, "cannot lock mapfile"); 2839 goto error; 2840 } 2841 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(), 2842 NULL, 0) == 0) { 2843 zerror(zlogp, B_FALSE, "scratch zone already running"); 2844 goto error; 2845 } 2846 /* This is the preferred name */ 2847 (void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s", 2848 zone_name); 2849 srandom(getpid()); 2850 while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL, 2851 0) == 0) { 2852 /* This is just an arbitrary name; note "." usage */ 2853 (void) snprintf(kernzone, sizeof (kernzone), 2854 "SUNWlu.%08lX%08lX", random(), random()); 2855 } 2856 kzone = kernzone; 2857 } 2858 2859 xerr = 0; 2860 if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf, 2861 rctlbufsz, zfsbuf, zfsbufsz, &xerr)) == -1) { 2862 if (xerr == ZE_AREMOUNTS) { 2863 if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) { 2864 zerror(zlogp, B_FALSE, 2865 "An unknown file-system is mounted on " 2866 "a subdirectory of %s", rootpath); 2867 } else { 2868 2869 zerror(zlogp, B_FALSE, 2870 "These file-systems are mounted on " 2871 "subdirectories of %s:", rootpath); 2872 (void) zonecfg_find_mounts(rootpath, 2873 prtmount, zlogp); 2874 } 2875 } else if (xerr == ZE_CHROOTED) { 2876 zerror(zlogp, B_FALSE, "%s: " 2877 "cannot create a zone from a chrooted " 2878 "environment", "zone_create"); 2879 } else { 2880 zerror(zlogp, B_TRUE, "%s failed", "zone_create"); 2881 } 2882 goto error; 2883 } 2884 2885 if (zonecfg_in_alt_root() && 2886 zonecfg_add_scratch(fp, zone_name, kernzone, 2887 zonecfg_get_root()) == -1) { 2888 zerror(zlogp, B_TRUE, "cannot add mapfile entry"); 2889 goto error; 2890 } 2891 2892 /* 2893 * The following is a warning, not an error, and is not performed when 2894 * merely mounting a zone for administrative use. 2895 */ 2896 if (!mount_cmd && bind_to_pool(zlogp, zoneid) != 0) 2897 zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to " 2898 "requested pool; using default pool."); 2899 rval = zoneid; 2900 zoneid = -1; 2901 2902 error: 2903 if (zoneid != -1) 2904 (void) zone_destroy(zoneid); 2905 if (rctlbuf != NULL) 2906 free(rctlbuf); 2907 priv_freeset(privs); 2908 if (fp != NULL) 2909 zonecfg_close_scratch(fp); 2910 lofs_discard_mnttab(); 2911 return (rval); 2912 } 2913 2914 int 2915 vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd) 2916 { 2917 if (!mount_cmd && validate_datasets(zlogp) != 0) { 2918 lofs_discard_mnttab(); 2919 return (-1); 2920 } 2921 2922 if (create_dev_files(zlogp) != 0 || 2923 mount_filesystems(zlogp, mount_cmd) != 0) { 2924 lofs_discard_mnttab(); 2925 return (-1); 2926 } 2927 if (!mount_cmd && (devfsadm_register(zlogp) != 0 || 2928 configure_network_interfaces(zlogp) != 0)) { 2929 lofs_discard_mnttab(); 2930 return (-1); 2931 } 2932 lofs_discard_mnttab(); 2933 return (0); 2934 } 2935 2936 static int 2937 lu_root_teardown(zlog_t *zlogp) 2938 { 2939 char zroot[MAXPATHLEN]; 2940 2941 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 2942 zerror(zlogp, B_FALSE, "unable to determine zone root"); 2943 return (-1); 2944 } 2945 root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE); 2946 2947 /* 2948 * At this point, the processes are gone, the filesystems (save the 2949 * root) are unmounted, and the zone is on death row. But there may 2950 * still be creds floating about in the system that reference the 2951 * zone_t, and which pin down zone_rootvp causing this call to fail 2952 * with EBUSY. Thus, we try for a little while before just giving up. 2953 * (How I wish this were not true, and umount2 just did the right 2954 * thing, or tmpfs supported MS_FORCE This is a gross hack.) 2955 */ 2956 if (umount2(zroot, MS_FORCE) != 0) { 2957 if (errno == ENOTSUP && umount2(zroot, 0) == 0) 2958 goto unmounted; 2959 if (errno == EBUSY) { 2960 int tries = 10; 2961 2962 while (--tries >= 0) { 2963 (void) sleep(1); 2964 if (umount2(zroot, 0) == 0) 2965 goto unmounted; 2966 if (errno != EBUSY) 2967 break; 2968 } 2969 } 2970 zerror(zlogp, B_TRUE, "unable to unmount '%s'", zroot); 2971 return (-1); 2972 } 2973 unmounted: 2974 2975 /* 2976 * Only zones in an alternate root environment have scratch zone 2977 * entries. 2978 */ 2979 if (zonecfg_in_alt_root()) { 2980 FILE *fp; 2981 int retv; 2982 2983 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) { 2984 zerror(zlogp, B_TRUE, "cannot open mapfile"); 2985 return (-1); 2986 } 2987 retv = -1; 2988 if (zonecfg_lock_scratch(fp) != 0) 2989 zerror(zlogp, B_TRUE, "cannot lock mapfile"); 2990 else if (zonecfg_delete_scratch(fp, kernzone) != 0) 2991 zerror(zlogp, B_TRUE, "cannot delete map entry"); 2992 else 2993 retv = 0; 2994 zonecfg_close_scratch(fp); 2995 return (retv); 2996 } else { 2997 return (0); 2998 } 2999 } 3000 3001 int 3002 vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd) 3003 { 3004 char *kzone; 3005 zoneid_t zoneid; 3006 3007 kzone = zone_name; 3008 if (zonecfg_in_alt_root()) { 3009 FILE *fp; 3010 3011 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) { 3012 zerror(zlogp, B_TRUE, "unable to open map file"); 3013 goto error; 3014 } 3015 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(), 3016 kernzone, sizeof (kernzone)) != 0) { 3017 zerror(zlogp, B_FALSE, "unable to find scratch zone"); 3018 zonecfg_close_scratch(fp); 3019 goto error; 3020 } 3021 zonecfg_close_scratch(fp); 3022 kzone = kernzone; 3023 } 3024 3025 if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) { 3026 if (!bringup_failure_recovery) 3027 zerror(zlogp, B_TRUE, "unable to get zoneid"); 3028 if (unmount_cmd) 3029 (void) lu_root_teardown(zlogp); 3030 goto error; 3031 } 3032 3033 if (zone_shutdown(zoneid) != 0) { 3034 zerror(zlogp, B_TRUE, "unable to shutdown zone"); 3035 goto error; 3036 } 3037 3038 if (!unmount_cmd && devfsadm_unregister(zlogp) != 0) 3039 goto error; 3040 3041 if (!unmount_cmd && 3042 unconfigure_network_interfaces(zlogp, zoneid) != 0) { 3043 zerror(zlogp, B_FALSE, 3044 "unable to unconfigure network interfaces in zone"); 3045 goto error; 3046 } 3047 3048 if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) { 3049 zerror(zlogp, B_TRUE, "unable to abort TCP connections"); 3050 goto error; 3051 } 3052 3053 if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) { 3054 zerror(zlogp, B_FALSE, 3055 "unable to unmount file systems in zone"); 3056 goto error; 3057 } 3058 3059 if (zone_destroy(zoneid) != 0) { 3060 zerror(zlogp, B_TRUE, "unable to destroy zone"); 3061 goto error; 3062 } 3063 3064 /* 3065 * Special teardown for alternate boot environments: remove the tmpfs 3066 * root for the zone and then remove it from the map file. 3067 */ 3068 if (unmount_cmd && lu_root_teardown(zlogp) != 0) 3069 goto error; 3070 3071 if (!unmount_cmd) 3072 destroy_console_slave(); 3073 3074 lofs_discard_mnttab(); 3075 return (0); 3076 3077 error: 3078 lofs_discard_mnttab(); 3079 return (-1); 3080 } 3081