1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This module contains functions used to bring up and tear down the 31 * Virtual Platform: [un]mounting file-systems, [un]plumbing network 32 * interfaces, [un]configuring devices, establishing resource controls, 33 * and creating/destroying the zone in the kernel. These actions, on 34 * the way up, ready the zone; on the way down, they halt the zone. 35 * See the much longer block comment at the beginning of zoneadmd.c 36 * for a bigger picture of how the whole program functions. 37 * 38 * This module also has primary responsibility for the layout of "scratch 39 * zones." These are mounted, but inactive, zones that are used during 40 * operating system upgrade and potentially other administrative action. The 41 * scratch zone environment is similar to the miniroot environment. The zone's 42 * actual root is mounted read-write on /a, and the standard paths (/usr, 43 * /sbin, /lib) all lead to read-only copies of the running system's binaries. 44 * This allows the administrative tools to manipulate the zone using "-R /a" 45 * without relying on any binaries in the zone itself. 46 * 47 * If the scratch zone is on an alternate root (Live Upgrade [LU] boot 48 * environment), then we must resolve the lofs mounts used there to uncover 49 * writable (unshared) resources. Shared resources, though, are always 50 * read-only. In addition, if the "same" zone with a different root path is 51 * currently running, then "/b" inside the zone points to the running zone's 52 * root. This allows LU to synchronize configuration files during the upgrade 53 * process. 54 * 55 * To construct this environment, this module creates a tmpfs mount on 56 * $ZONEPATH/lu. Inside this scratch area, the miniroot-like environment as 57 * described above is constructed on the fly. The zone is then created using 58 * $ZONEPATH/lu as the root. 59 * 60 * Note that scratch zones are inactive. The zone's bits are not running and 61 * likely cannot be run correctly until upgrade is done. Init is not running 62 * there, nor is SMF. Because of this, the "mounted" state of a scratch zone 63 * is not a part of the usual halt/ready/boot state machine. 64 */ 65 66 #include <sys/param.h> 67 #include <sys/mount.h> 68 #include <sys/mntent.h> 69 #include <sys/socket.h> 70 #include <sys/utsname.h> 71 #include <sys/types.h> 72 #include <sys/stat.h> 73 #include <sys/sockio.h> 74 #include <sys/stropts.h> 75 #include <sys/conf.h> 76 77 #include <inet/tcp.h> 78 #include <arpa/inet.h> 79 #include <netinet/in.h> 80 #include <net/route.h> 81 #include <netdb.h> 82 83 #include <stdio.h> 84 #include <errno.h> 85 #include <fcntl.h> 86 #include <unistd.h> 87 #include <rctl.h> 88 #include <stdlib.h> 89 #include <string.h> 90 #include <strings.h> 91 #include <wait.h> 92 #include <limits.h> 93 #include <libgen.h> 94 #include <zone.h> 95 #include <assert.h> 96 97 #include <sys/mntio.h> 98 #include <sys/mnttab.h> 99 #include <sys/fs/autofs.h> /* for _autofssys() */ 100 #include <sys/fs/lofs_info.h> 101 102 #include <pool.h> 103 #include <sys/pool.h> 104 105 #include <libzonecfg.h> 106 #include "zoneadmd.h" 107 108 #define V4_ADDR_LEN 32 109 #define V6_ADDR_LEN 128 110 111 /* 0755 is the default directory mode. */ 112 #define DEFAULT_DIR_MODE \ 113 (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) 114 115 #define IPD_DEFAULT_OPTS \ 116 MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES 117 118 #define DFSTYPES "/etc/dfs/fstypes" 119 120 /* 121 * A list of directories which should be created. 122 */ 123 124 struct dir_info { 125 char *dir_name; 126 mode_t dir_mode; 127 }; 128 129 /* 130 * The pathnames below are relative to the zonepath 131 */ 132 static struct dir_info dev_dirs[] = { 133 { "/dev", 0755 }, 134 { "/dev/dsk", 0755 }, 135 { "/dev/fd", 0555 }, 136 { "/dev/pts", 0755 }, 137 { "/dev/rdsk", 0755 }, 138 { "/dev/rmt", 0755 }, 139 { "/dev/sad", 0755 }, 140 { "/dev/swap", 0755 }, 141 { "/dev/term", 0755 }, 142 }; 143 144 /* 145 * A list of devices which should be symlinked to /dev/zconsole. 146 */ 147 148 struct symlink_info { 149 char *sl_source; 150 char *sl_target; 151 }; 152 153 /* 154 * The "source" paths are relative to the zonepath 155 */ 156 static struct symlink_info dev_symlinks[] = { 157 { "/dev/stderr", "./fd/2" }, 158 { "/dev/stdin", "./fd/0" }, 159 { "/dev/stdout", "./fd/1" }, 160 { "/dev/dtremote", "/dev/null" }, 161 { "/dev/console", "zconsole" }, 162 { "/dev/syscon", "zconsole" }, 163 { "/dev/sysmsg", "zconsole" }, 164 { "/dev/systty", "zconsole" }, 165 { "/dev/msglog", "zconsole" }, 166 }; 167 168 /* for routing socket */ 169 static int rts_seqno = 0; 170 171 /* mangled zone name when mounting in an alternate root environment */ 172 static char kernzone[ZONENAME_MAX]; 173 174 /* array of cached mount entries for resolve_lofs */ 175 static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max; 176 177 /* from libsocket, not in any header file */ 178 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *); 179 180 /* 181 * An optimization for build_mnttable: reallocate (and potentially copy the 182 * data) only once every N times through the loop. 183 */ 184 #define MNTTAB_HUNK 32 185 186 /* 187 * Private autofs system call 188 */ 189 extern int _autofssys(int, void *); 190 191 static int 192 autofs_cleanup(zoneid_t zoneid) 193 { 194 /* 195 * Ask autofs to unmount all trigger nodes in the given zone. 196 */ 197 return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid)); 198 } 199 200 static void 201 free_mnttable(struct mnttab *mnt_array, uint_t nelem) 202 { 203 uint_t i; 204 205 if (mnt_array == NULL) 206 return; 207 for (i = 0; i < nelem; i++) { 208 free(mnt_array[i].mnt_mountp); 209 free(mnt_array[i].mnt_fstype); 210 free(mnt_array[i].mnt_special); 211 free(mnt_array[i].mnt_mntopts); 212 assert(mnt_array[i].mnt_time == NULL); 213 } 214 free(mnt_array); 215 } 216 217 /* 218 * Build the mount table for the zone rooted at "zroot", storing the resulting 219 * array of struct mnttabs in "mnt_arrayp" and the number of elements in the 220 * array in "nelemp". 221 */ 222 static int 223 build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab, 224 struct mnttab **mnt_arrayp, uint_t *nelemp) 225 { 226 struct mnttab mnt; 227 struct mnttab *mnts; 228 struct mnttab *mnp; 229 uint_t nmnt; 230 231 rewind(mnttab); 232 resetmnttab(mnttab); 233 nmnt = 0; 234 mnts = NULL; 235 while (getmntent(mnttab, &mnt) == 0) { 236 struct mnttab *tmp_array; 237 238 if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0) 239 continue; 240 if (nmnt % MNTTAB_HUNK == 0) { 241 tmp_array = realloc(mnts, 242 (nmnt + MNTTAB_HUNK) * sizeof (*mnts)); 243 if (tmp_array == NULL) { 244 free_mnttable(mnts, nmnt); 245 return (-1); 246 } 247 mnts = tmp_array; 248 } 249 mnp = &mnts[nmnt++]; 250 251 /* 252 * Zero out any fields we're not using. 253 */ 254 (void) memset(mnp, 0, sizeof (*mnp)); 255 256 if (mnt.mnt_special != NULL) 257 mnp->mnt_special = strdup(mnt.mnt_special); 258 if (mnt.mnt_mntopts != NULL) 259 mnp->mnt_mntopts = strdup(mnt.mnt_mntopts); 260 mnp->mnt_mountp = strdup(mnt.mnt_mountp); 261 mnp->mnt_fstype = strdup(mnt.mnt_fstype); 262 if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) || 263 (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) || 264 mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) { 265 zerror(zlogp, B_TRUE, "memory allocation failed"); 266 free_mnttable(mnts, nmnt); 267 return (-1); 268 } 269 } 270 *mnt_arrayp = mnts; 271 *nelemp = nmnt; 272 return (0); 273 } 274 275 /* 276 * This is an optimization. The resolve_lofs function is used quite frequently 277 * to manipulate file paths, and on a machine with a large number of zones, 278 * there will be a huge number of mounted file systems. Thus, we trigger a 279 * reread of the list of mount points 280 */ 281 static void 282 lofs_discard_mnttab(void) 283 { 284 free_mnttable(resolve_lofs_mnts, 285 resolve_lofs_mnt_max - resolve_lofs_mnts); 286 resolve_lofs_mnts = resolve_lofs_mnt_max = NULL; 287 } 288 289 static int 290 lofs_read_mnttab(zlog_t *zlogp) 291 { 292 FILE *mnttab; 293 uint_t nmnts; 294 295 if ((mnttab = fopen(MNTTAB, "r")) == NULL) 296 return (-1); 297 if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts, 298 &nmnts) == -1) { 299 (void) fclose(mnttab); 300 return (-1); 301 } 302 (void) fclose(mnttab); 303 resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts; 304 return (0); 305 } 306 307 /* 308 * This function loops over potential loopback mounts and symlinks in a given 309 * path and resolves them all down to an absolute path. 310 */ 311 static void 312 resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen) 313 { 314 int len, arlen; 315 const char *altroot; 316 char tmppath[MAXPATHLEN]; 317 boolean_t outside_altroot; 318 319 if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1) 320 return; 321 tmppath[len] = '\0'; 322 (void) strlcpy(path, tmppath, sizeof (tmppath)); 323 324 /* This happens once per zoneadmd operation. */ 325 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 326 return; 327 328 altroot = zonecfg_get_root(); 329 arlen = strlen(altroot); 330 outside_altroot = B_FALSE; 331 for (;;) { 332 struct mnttab *mnp; 333 334 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; 335 mnp++) { 336 if (mnp->mnt_fstype == NULL || 337 mnp->mnt_mountp == NULL || 338 mnp->mnt_special == NULL || 339 strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0) 340 continue; 341 len = strlen(mnp->mnt_mountp); 342 if (strncmp(mnp->mnt_mountp, path, len) == 0 && 343 (path[len] == '/' || path[len] == '\0')) 344 break; 345 } 346 if (mnp >= resolve_lofs_mnt_max) 347 break; 348 if (outside_altroot) { 349 char *cp; 350 int olen = sizeof (MNTOPT_RO) - 1; 351 352 /* 353 * If we run into a read-only mount outside of the 354 * alternate root environment, then the user doesn't 355 * want this path to be made read-write. 356 */ 357 if (mnp->mnt_mntopts != NULL && 358 (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) != 359 NULL && 360 (cp == mnp->mnt_mntopts || cp[-1] == ',') && 361 (cp[olen] == '\0' || cp[olen] == ',')) { 362 break; 363 } 364 } else if (arlen > 0 && 365 (strncmp(mnp->mnt_special, altroot, arlen) != 0 || 366 (mnp->mnt_special[arlen] != '\0' && 367 mnp->mnt_special[arlen] != '/'))) { 368 outside_altroot = B_TRUE; 369 } 370 /* use temporary buffer because new path might be longer */ 371 (void) snprintf(tmppath, sizeof (tmppath), "%s%s", 372 mnp->mnt_special, path + len); 373 if ((len = resolvepath(tmppath, path, pathlen)) == -1) 374 break; 375 path[len] = '\0'; 376 } 377 } 378 379 /* 380 * For a regular mount, check if a replacement lofs mount is needed because the 381 * referenced device is already mounted somewhere. 382 */ 383 static int 384 check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr) 385 { 386 struct mnttab *mnp; 387 zone_fsopt_t *optptr, *onext; 388 389 /* This happens once per zoneadmd operation. */ 390 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 391 return (-1); 392 393 /* 394 * If this special node isn't already in use, then it's ours alone; 395 * no need to worry about conflicting mounts. 396 */ 397 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; 398 mnp++) { 399 if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0) 400 break; 401 } 402 if (mnp >= resolve_lofs_mnt_max) 403 return (0); 404 405 /* 406 * Convert this duplicate mount into a lofs mount. 407 */ 408 (void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp, 409 sizeof (fsptr->zone_fs_special)); 410 (void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS, 411 sizeof (fsptr->zone_fs_type)); 412 fsptr->zone_fs_raw[0] = '\0'; 413 414 /* 415 * Discard all but one of the original options and set that to be the 416 * same set of options used for inherit package directory resources. 417 */ 418 optptr = fsptr->zone_fs_options; 419 if (optptr == NULL) { 420 optptr = malloc(sizeof (*optptr)); 421 if (optptr == NULL) { 422 zerror(zlogp, B_TRUE, "cannot mount %s", 423 fsptr->zone_fs_dir); 424 return (-1); 425 } 426 } else { 427 while ((onext = optptr->zone_fsopt_next) != NULL) { 428 optptr->zone_fsopt_next = onext->zone_fsopt_next; 429 free(onext); 430 } 431 } 432 (void) strcpy(optptr->zone_fsopt_opt, IPD_DEFAULT_OPTS); 433 optptr->zone_fsopt_next = NULL; 434 fsptr->zone_fs_options = optptr; 435 return (0); 436 } 437 438 static int 439 make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode) 440 { 441 char path[MAXPATHLEN]; 442 struct stat st; 443 444 if (snprintf(path, sizeof (path), "%s%s", prefix, subdir) > 445 sizeof (path)) { 446 zerror(zlogp, B_FALSE, "pathname %s%s is too long", prefix, 447 subdir); 448 return (-1); 449 } 450 451 if (lstat(path, &st) == 0) { 452 /* 453 * We don't check the file mode since presumably the zone 454 * administrator may have had good reason to change the mode, 455 * and we don't need to second guess him. 456 */ 457 if (!S_ISDIR(st.st_mode)) { 458 zerror(zlogp, B_FALSE, "%s is not a directory", path); 459 return (-1); 460 } 461 } else if (mkdirp(path, mode) != 0) { 462 if (errno == EROFS) 463 zerror(zlogp, B_FALSE, "Could not mkdir %s.\nIt is on " 464 "a read-only file system in this local zone.\nMake " 465 "sure %s exists in the global zone.", path, subdir); 466 else 467 zerror(zlogp, B_TRUE, "mkdirp of %s failed", path); 468 return (-1); 469 } 470 return (0); 471 } 472 473 /* 474 * Make /dev and various directories underneath it. 475 */ 476 static int 477 make_dev_dirs(zlog_t *zlogp, const char *zonepath) 478 { 479 int i; 480 481 for (i = 0; i < sizeof (dev_dirs) / sizeof (struct dir_info); i++) { 482 if (make_one_dir(zlogp, zonepath, dev_dirs[i].dir_name, 483 dev_dirs[i].dir_mode) != 0) 484 return (-1); 485 } 486 return (0); 487 } 488 489 /* 490 * Make various sym-links underneath /dev. 491 */ 492 static int 493 make_dev_links(zlog_t *zlogp, char *zonepath) 494 { 495 int i; 496 497 for (i = 0; i < sizeof (dev_symlinks) / sizeof (struct symlink_info); 498 i++) { 499 char dev[MAXPATHLEN]; 500 struct stat st; 501 502 (void) snprintf(dev, sizeof (dev), "%s%s", zonepath, 503 dev_symlinks[i].sl_source); 504 if (lstat(dev, &st) == 0) { 505 /* 506 * Try not to call unlink(2) on directories, since that 507 * makes UFS unhappy. 508 */ 509 if (S_ISDIR(st.st_mode)) { 510 zerror(zlogp, B_FALSE, "symlink path %s is a " 511 "directory", dev_symlinks[i].sl_source); 512 return (-1); 513 } 514 (void) unlink(dev); 515 } 516 if (symlink(dev_symlinks[i].sl_target, dev) != 0) { 517 zerror(zlogp, B_TRUE, "could not setup %s->%s symlink", 518 dev_symlinks[i].sl_source, 519 dev_symlinks[i].sl_target); 520 return (-1); 521 } 522 } 523 return (0); 524 } 525 526 /* 527 * Create various directories and sym-links under /dev. 528 */ 529 static int 530 create_dev_files(zlog_t *zlogp) 531 { 532 char zonepath[MAXPATHLEN]; 533 534 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { 535 zerror(zlogp, B_TRUE, "unable to determine zone root"); 536 return (-1); 537 } 538 if (zonecfg_in_alt_root()) 539 resolve_lofs(zlogp, zonepath, sizeof (zonepath)); 540 541 if (make_dev_dirs(zlogp, zonepath) != 0) 542 return (-1); 543 if (make_dev_links(zlogp, zonepath) != 0) 544 return (-1); 545 return (0); 546 } 547 548 static void 549 free_remote_fstypes(char **types) 550 { 551 uint_t i; 552 553 if (types == NULL) 554 return; 555 for (i = 0; types[i] != NULL; i++) 556 free(types[i]); 557 free(types); 558 } 559 560 static char ** 561 get_remote_fstypes(zlog_t *zlogp) 562 { 563 char **types = NULL; 564 FILE *fp; 565 char buf[MAXPATHLEN]; 566 char fstype[MAXPATHLEN]; 567 uint_t lines = 0; 568 uint_t i; 569 570 if ((fp = fopen(DFSTYPES, "r")) == NULL) { 571 zerror(zlogp, B_TRUE, "failed to open %s", DFSTYPES); 572 return (NULL); 573 } 574 /* 575 * Count the number of lines 576 */ 577 while (fgets(buf, sizeof (buf), fp) != NULL) 578 lines++; 579 if (lines == 0) /* didn't read anything; empty file */ 580 goto out; 581 rewind(fp); 582 /* 583 * Allocate enough space for a NULL-terminated array. 584 */ 585 types = calloc(lines + 1, sizeof (char *)); 586 if (types == NULL) { 587 zerror(zlogp, B_TRUE, "memory allocation failed"); 588 goto out; 589 } 590 i = 0; 591 while (fgets(buf, sizeof (buf), fp) != NULL) { 592 /* LINTED - fstype is big enough to hold buf */ 593 if (sscanf(buf, "%s", fstype) == 0) { 594 zerror(zlogp, B_FALSE, "unable to parse %s", DFSTYPES); 595 free_remote_fstypes(types); 596 types = NULL; 597 goto out; 598 } 599 types[i] = strdup(fstype); 600 if (types[i] == NULL) { 601 zerror(zlogp, B_TRUE, "memory allocation failed"); 602 free_remote_fstypes(types); 603 types = NULL; 604 goto out; 605 } 606 i++; 607 } 608 out: 609 (void) fclose(fp); 610 return (types); 611 } 612 613 static boolean_t 614 is_remote_fstype(const char *fstype, char *const *remote_fstypes) 615 { 616 uint_t i; 617 618 if (remote_fstypes == NULL) 619 return (B_FALSE); 620 for (i = 0; remote_fstypes[i] != NULL; i++) { 621 if (strcmp(remote_fstypes[i], fstype) == 0) 622 return (B_TRUE); 623 } 624 return (B_FALSE); 625 } 626 627 /* 628 * This converts a zone root path (normally of the form .../root) to a Live 629 * Upgrade scratch zone root (of the form .../lu). 630 */ 631 static void 632 root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved) 633 { 634 if (!isresolved && zonecfg_in_alt_root()) 635 resolve_lofs(zlogp, zroot, zrootlen); 636 (void) strcpy(strrchr(zroot, '/') + 1, "lu"); 637 } 638 639 /* 640 * The general strategy for unmounting filesystems is as follows: 641 * 642 * - Remote filesystems may be dead, and attempting to contact them as 643 * part of a regular unmount may hang forever; we want to always try to 644 * forcibly unmount such filesystems and only fall back to regular 645 * unmounts if the filesystem doesn't support forced unmounts. 646 * 647 * - We don't want to unnecessarily corrupt metadata on local 648 * filesystems (ie UFS), so we want to start off with graceful unmounts, 649 * and only escalate to doing forced unmounts if we get stuck. 650 * 651 * We start off walking backwards through the mount table. This doesn't 652 * give us strict ordering but ensures that we try to unmount submounts 653 * first. We thus limit the number of failed umount2(2) calls. 654 * 655 * The mechanism for determining if we're stuck is to count the number 656 * of failed unmounts each iteration through the mount table. This 657 * gives us an upper bound on the number of filesystems which remain 658 * mounted (autofs trigger nodes are dealt with separately). If at the 659 * end of one unmount+autofs_cleanup cycle we still have the same number 660 * of mounts that we started out with, we're stuck and try a forced 661 * unmount. If that fails (filesystem doesn't support forced unmounts) 662 * then we bail and are unable to teardown the zone. If it succeeds, 663 * we're no longer stuck so we continue with our policy of trying 664 * graceful mounts first. 665 * 666 * Zone must be down (ie, no processes or threads active). 667 */ 668 static int 669 unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd) 670 { 671 int error = 0; 672 FILE *mnttab; 673 struct mnttab *mnts; 674 uint_t nmnt; 675 char zroot[MAXPATHLEN + 1]; 676 size_t zrootlen; 677 uint_t oldcount = UINT_MAX; 678 boolean_t stuck = B_FALSE; 679 char **remote_fstypes = NULL; 680 681 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 682 zerror(zlogp, B_FALSE, "unable to determine zone root"); 683 return (-1); 684 } 685 if (unmount_cmd) 686 root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE); 687 688 (void) strcat(zroot, "/"); 689 zrootlen = strlen(zroot); 690 691 if ((mnttab = fopen(MNTTAB, "r")) == NULL) { 692 zerror(zlogp, B_TRUE, "failed to open %s", MNTTAB); 693 return (-1); 694 } 695 /* 696 * Use our hacky mntfs ioctl so we see everything, even mounts with 697 * MS_NOMNTTAB. 698 */ 699 if (ioctl(fileno(mnttab), MNTIOC_SHOWHIDDEN, NULL) < 0) { 700 zerror(zlogp, B_TRUE, "unable to configure %s", MNTTAB); 701 error++; 702 goto out; 703 } 704 705 /* 706 * Build the list of remote fstypes so we know which ones we 707 * should forcibly unmount. 708 */ 709 remote_fstypes = get_remote_fstypes(zlogp); 710 for (; /* ever */; ) { 711 uint_t newcount = 0; 712 boolean_t unmounted; 713 struct mnttab *mnp; 714 char *path; 715 uint_t i; 716 717 mnts = NULL; 718 nmnt = 0; 719 /* 720 * MNTTAB gives us a way to walk through mounted 721 * filesystems; we need to be able to walk them in 722 * reverse order, so we build a list of all mounted 723 * filesystems. 724 */ 725 if (build_mnttable(zlogp, zroot, zrootlen, mnttab, &mnts, 726 &nmnt) != 0) { 727 error++; 728 goto out; 729 } 730 for (i = 0; i < nmnt; i++) { 731 mnp = &mnts[nmnt - i - 1]; /* access in reverse order */ 732 path = mnp->mnt_mountp; 733 unmounted = B_FALSE; 734 /* 735 * Try forced unmount first for remote filesystems. 736 * 737 * Not all remote filesystems support forced unmounts, 738 * so if this fails (ENOTSUP) we'll continue on 739 * and try a regular unmount. 740 */ 741 if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) { 742 if (umount2(path, MS_FORCE) == 0) 743 unmounted = B_TRUE; 744 } 745 /* 746 * Try forced unmount if we're stuck. 747 */ 748 if (stuck) { 749 if (umount2(path, MS_FORCE) == 0) { 750 unmounted = B_TRUE; 751 stuck = B_FALSE; 752 } else { 753 /* 754 * The first failure indicates a 755 * mount we won't be able to get 756 * rid of automatically, so we 757 * bail. 758 */ 759 error++; 760 zerror(zlogp, B_FALSE, 761 "unable to unmount '%s'", path); 762 free_mnttable(mnts, nmnt); 763 goto out; 764 } 765 } 766 /* 767 * Try regular unmounts for everything else. 768 */ 769 if (!unmounted && umount2(path, 0) != 0) 770 newcount++; 771 } 772 free_mnttable(mnts, nmnt); 773 774 if (newcount == 0) 775 break; 776 if (newcount >= oldcount) { 777 /* 778 * Last round didn't unmount anything; we're stuck and 779 * should start trying forced unmounts. 780 */ 781 stuck = B_TRUE; 782 } 783 oldcount = newcount; 784 785 /* 786 * Autofs doesn't let you unmount its trigger nodes from 787 * userland so we have to tell the kernel to cleanup for us. 788 */ 789 if (autofs_cleanup(zoneid) != 0) { 790 zerror(zlogp, B_TRUE, "unable to remove autofs nodes"); 791 error++; 792 goto out; 793 } 794 } 795 796 out: 797 free_remote_fstypes(remote_fstypes); 798 (void) fclose(mnttab); 799 return (error ? -1 : 0); 800 } 801 802 static int 803 fs_compare(const void *m1, const void *m2) 804 { 805 struct zone_fstab *i = (struct zone_fstab *)m1; 806 struct zone_fstab *j = (struct zone_fstab *)m2; 807 808 return (strcmp(i->zone_fs_dir, j->zone_fs_dir)); 809 } 810 811 /* 812 * Fork and exec (and wait for) the mentioned binary with the provided 813 * arguments. Returns (-1) if something went wrong with fork(2) or exec(2), 814 * returns the exit status otherwise. 815 * 816 * If we were unable to exec the provided pathname (for whatever 817 * reason), we return the special token ZEXIT_EXEC. The current value 818 * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the 819 * consumers of this function; any future consumers must make sure this 820 * remains the case. 821 */ 822 static int 823 forkexec(zlog_t *zlogp, const char *path, char *const argv[]) 824 { 825 pid_t child_pid; 826 int child_status = 0; 827 828 /* 829 * Do not let another thread localize a message while we are forking. 830 */ 831 (void) mutex_lock(&msglock); 832 child_pid = fork(); 833 (void) mutex_unlock(&msglock); 834 if (child_pid == -1) { 835 zerror(zlogp, B_TRUE, "could not fork for %s", argv[0]); 836 return (-1); 837 } else if (child_pid == 0) { 838 closefrom(0); 839 (void) execv(path, argv); 840 /* 841 * Since we are in the child, there is no point calling zerror() 842 * since there is nobody waiting to consume it. So exit with a 843 * special code that the parent will recognize and call zerror() 844 * accordingly. 845 */ 846 847 _exit(ZEXIT_EXEC); 848 } else { 849 (void) waitpid(child_pid, &child_status, 0); 850 } 851 852 if (WIFSIGNALED(child_status)) { 853 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 854 "signal %d", path, WTERMSIG(child_status)); 855 return (-1); 856 } 857 assert(WIFEXITED(child_status)); 858 if (WEXITSTATUS(child_status) == ZEXIT_EXEC) { 859 zerror(zlogp, B_FALSE, "failed to exec %s", path); 860 return (-1); 861 } 862 return (WEXITSTATUS(child_status)); 863 } 864 865 static int 866 dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev) 867 { 868 char cmdbuf[MAXPATHLEN]; 869 char *argv[4]; 870 int status; 871 872 /* 873 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but 874 * that would cost us an extra fork/exec without buying us anything. 875 */ 876 if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype) 877 > sizeof (cmdbuf)) { 878 zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); 879 return (-1); 880 } 881 882 argv[0] = "fsck"; 883 argv[1] = "-m"; 884 argv[2] = (char *)rawdev; 885 argv[3] = NULL; 886 887 status = forkexec(zlogp, cmdbuf, argv); 888 if (status == 0 || status == -1) 889 return (status); 890 zerror(zlogp, B_FALSE, "fsck of '%s' failed with exit status %d; " 891 "run fsck manually", rawdev, status); 892 return (-1); 893 } 894 895 static int 896 domount(zlog_t *zlogp, const char *fstype, const char *opts, 897 const char *special, const char *directory) 898 { 899 char cmdbuf[MAXPATHLEN]; 900 char *argv[6]; 901 int status; 902 903 /* 904 * We could alternatively have called /usr/sbin/mount -F <fstype>, but 905 * that would cost us an extra fork/exec without buying us anything. 906 */ 907 if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype) 908 > sizeof (cmdbuf)) { 909 zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); 910 return (-1); 911 } 912 argv[0] = "mount"; 913 if (opts[0] == '\0') { 914 argv[1] = (char *)special; 915 argv[2] = (char *)directory; 916 argv[3] = NULL; 917 } else { 918 argv[1] = "-o"; 919 argv[2] = (char *)opts; 920 argv[3] = (char *)special; 921 argv[4] = (char *)directory; 922 argv[5] = NULL; 923 } 924 925 status = forkexec(zlogp, cmdbuf, argv); 926 if (status == 0 || status == -1) 927 return (status); 928 if (opts[0] == '\0') 929 zerror(zlogp, B_FALSE, "\"%s %s %s\" " 930 "failed with exit code %d", 931 cmdbuf, special, directory, status); 932 else 933 zerror(zlogp, B_FALSE, "\"%s -o %s %s %s\" " 934 "failed with exit code %d", 935 cmdbuf, opts, special, directory, status); 936 return (-1); 937 } 938 939 /* 940 * Make sure if a given path exists, it is not a sym-link, and is a directory. 941 */ 942 static int 943 check_path(zlog_t *zlogp, const char *path) 944 { 945 struct stat statbuf; 946 char respath[MAXPATHLEN]; 947 int res; 948 949 if (lstat(path, &statbuf) != 0) { 950 if (errno == ENOENT) 951 return (0); 952 zerror(zlogp, B_TRUE, "can't stat %s", path); 953 return (-1); 954 } 955 if (S_ISLNK(statbuf.st_mode)) { 956 zerror(zlogp, B_FALSE, "%s is a symlink", path); 957 return (-1); 958 } 959 if (!S_ISDIR(statbuf.st_mode)) { 960 zerror(zlogp, B_FALSE, "%s is not a directory", path); 961 return (-1); 962 } 963 if ((res = resolvepath(path, respath, sizeof (respath))) == -1) { 964 zerror(zlogp, B_TRUE, "unable to resolve path %s", path); 965 return (-1); 966 } 967 respath[res] = '\0'; 968 if (strcmp(path, respath) != 0) { 969 /* 970 * We don't like ".."s and "."s throwing us off 971 */ 972 zerror(zlogp, B_FALSE, "%s is not a canonical path", path); 973 return (-1); 974 } 975 return (0); 976 } 977 978 /* 979 * Check every component of rootpath/relpath. If any component fails (ie, 980 * exists but isn't the canonical path to a directory), it is returned in 981 * badpath, which is assumed to be at least of size MAXPATHLEN. 982 * 983 * Relpath must begin with '/'. 984 */ 985 static boolean_t 986 valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *relpath) 987 { 988 char abspath[MAXPATHLEN], *slashp; 989 990 /* 991 * Make sure abspath has at least one '/' after its rootpath 992 * component, and ends with '/'. 993 */ 994 if (snprintf(abspath, sizeof (abspath), "%s%s/", rootpath, relpath) > 995 sizeof (abspath)) { 996 zerror(zlogp, B_FALSE, "pathname %s%s is too long", rootpath, 997 relpath); 998 return (B_FALSE); 999 } 1000 1001 slashp = &abspath[strlen(rootpath)]; 1002 assert(*slashp == '/'); 1003 do { 1004 *slashp = '\0'; 1005 if (check_path(zlogp, abspath) != 0) 1006 return (B_FALSE); 1007 *slashp = '/'; 1008 slashp++; 1009 } while ((slashp = strchr(slashp, '/')) != NULL); 1010 return (B_TRUE); 1011 } 1012 1013 static int 1014 mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath) 1015 { 1016 char path[MAXPATHLEN]; 1017 char specpath[MAXPATHLEN]; 1018 char optstr[MAX_MNTOPT_STR]; 1019 zone_fsopt_t *optptr; 1020 1021 if (!valid_mount_path(zlogp, rootpath, fsptr->zone_fs_dir)) { 1022 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 1023 rootpath, fsptr->zone_fs_dir); 1024 return (-1); 1025 } 1026 1027 if (make_one_dir(zlogp, rootpath, fsptr->zone_fs_dir, 1028 DEFAULT_DIR_MODE) != 0) 1029 return (-1); 1030 1031 (void) snprintf(path, sizeof (path), "%s%s", rootpath, 1032 fsptr->zone_fs_dir); 1033 1034 if (strlen(fsptr->zone_fs_special) == 0) { 1035 /* 1036 * A zero-length special is how we distinguish IPDs from 1037 * general-purpose FSs. Make sure it mounts from a place that 1038 * can be seen via the alternate zone's root. 1039 */ 1040 if (snprintf(specpath, sizeof (specpath), "%s%s", 1041 zonecfg_get_root(), fsptr->zone_fs_dir) >= 1042 sizeof (specpath)) { 1043 zerror(zlogp, B_FALSE, "cannot mount %s: path too " 1044 "long in alternate root", fsptr->zone_fs_dir); 1045 return (-1); 1046 } 1047 if (zonecfg_in_alt_root()) 1048 resolve_lofs(zlogp, specpath, sizeof (specpath)); 1049 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, 1050 specpath, path) != 0) { 1051 zerror(zlogp, B_TRUE, "failed to loopback mount %s", 1052 specpath); 1053 return (-1); 1054 } 1055 return (0); 1056 } 1057 1058 /* 1059 * In general the strategy here is to do just as much verification as 1060 * necessary to avoid crashing or otherwise doing something bad; if the 1061 * administrator initiated the operation via zoneadm(1m), he'll get 1062 * auto-verification which will let him know what's wrong. If he 1063 * modifies the zone configuration of a running zone and doesn't attempt 1064 * to verify that it's OK we won't crash but won't bother trying to be 1065 * too helpful either. zoneadm verify is only a couple keystrokes away. 1066 */ 1067 if (!zonecfg_valid_fs_type(fsptr->zone_fs_type)) { 1068 zerror(zlogp, B_FALSE, "cannot mount %s on %s: " 1069 "invalid file-system type %s", fsptr->zone_fs_special, 1070 fsptr->zone_fs_dir, fsptr->zone_fs_type); 1071 return (-1); 1072 } 1073 1074 /* 1075 * If we're looking at an alternate root environment, then construct 1076 * read-only loopback mounts as necessary. For all lofs mounts, make 1077 * sure that the 'special' entry points inside the alternate root. (We 1078 * don't do this with other mounts, as devfs isn't in the alternate 1079 * root, and we need to assume the device environment is roughly the 1080 * same.) 1081 */ 1082 if (zonecfg_in_alt_root()) { 1083 struct stat64 st; 1084 1085 if (stat64(fsptr->zone_fs_special, &st) != -1 && 1086 S_ISBLK(st.st_mode) && 1087 check_lofs_needed(zlogp, fsptr) == -1) 1088 return (-1); 1089 if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) { 1090 if (snprintf(specpath, sizeof (specpath), "%s%s", 1091 zonecfg_get_root(), fsptr->zone_fs_special) >= 1092 sizeof (specpath)) { 1093 zerror(zlogp, B_FALSE, "cannot mount %s: path " 1094 "too long in alternate root", 1095 fsptr->zone_fs_special); 1096 return (-1); 1097 } 1098 resolve_lofs(zlogp, specpath, sizeof (specpath)); 1099 (void) strlcpy(fsptr->zone_fs_special, specpath, 1100 sizeof (fsptr->zone_fs_special)); 1101 } 1102 } 1103 1104 /* 1105 * Run 'fsck -m' if there's a device to fsck. 1106 */ 1107 if (fsptr->zone_fs_raw[0] != '\0' && 1108 dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_raw) != 0) 1109 return (-1); 1110 1111 /* 1112 * Build up mount option string. 1113 */ 1114 optstr[0] = '\0'; 1115 if (fsptr->zone_fs_options != NULL) { 1116 (void) strlcpy(optstr, fsptr->zone_fs_options->zone_fsopt_opt, 1117 sizeof (optstr)); 1118 for (optptr = fsptr->zone_fs_options->zone_fsopt_next; 1119 optptr != NULL; optptr = optptr->zone_fsopt_next) { 1120 (void) strlcat(optstr, ",", sizeof (optstr)); 1121 (void) strlcat(optstr, optptr->zone_fsopt_opt, 1122 sizeof (optstr)); 1123 } 1124 } 1125 return (domount(zlogp, fsptr->zone_fs_type, optstr, 1126 fsptr->zone_fs_special, path)); 1127 } 1128 1129 static void 1130 free_fs_data(struct zone_fstab *fsarray, uint_t nelem) 1131 { 1132 uint_t i; 1133 1134 if (fsarray == NULL) 1135 return; 1136 for (i = 0; i < nelem; i++) 1137 zonecfg_free_fs_option_list(fsarray[i].zone_fs_options); 1138 free(fsarray); 1139 } 1140 1141 /* 1142 * This function constructs the miniroot-like "scratch zone" environment. If 1143 * it returns B_FALSE, then the error has already been logged. 1144 */ 1145 static boolean_t 1146 build_mounted(zlog_t *zlogp, char *rootpath, size_t rootlen, 1147 const char *zonepath) 1148 { 1149 char tmp[MAXPATHLEN], fromdir[MAXPATHLEN]; 1150 char luroot[MAXPATHLEN]; 1151 const char **cpp; 1152 static const char *mkdirs[] = { 1153 "/system", "/system/contract", "/proc", "/dev", "/tmp", 1154 "/a", NULL 1155 }; 1156 static const char *localdirs[] = { 1157 "/etc", "/var", NULL 1158 }; 1159 static const char *loopdirs[] = { 1160 "/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform", 1161 "/usr", NULL 1162 }; 1163 static const char *tmpdirs[] = { 1164 "/tmp", "/var/run", NULL 1165 }; 1166 FILE *fp; 1167 struct stat st; 1168 char *altstr; 1169 uuid_t uuid; 1170 1171 /* 1172 * Construct a small Solaris environment, including the zone root 1173 * mounted on '/a' inside that environment. 1174 */ 1175 resolve_lofs(zlogp, rootpath, rootlen); 1176 (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); 1177 resolve_lofs(zlogp, luroot, sizeof (luroot)); 1178 (void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot); 1179 (void) symlink("./usr/bin", tmp); 1180 1181 /* 1182 * These are mostly special mount points; not handled here. (See 1183 * zone_mount_early.) 1184 */ 1185 for (cpp = mkdirs; *cpp != NULL; cpp++) { 1186 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1187 if (mkdir(tmp, 0755) != 0) { 1188 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1189 return (B_FALSE); 1190 } 1191 } 1192 1193 /* 1194 * These are mounted read-write from the zone undergoing upgrade. We 1195 * must be careful not to 'leak' things from the main system into the 1196 * zone, and this accomplishes that goal. 1197 */ 1198 for (cpp = localdirs; *cpp != NULL; cpp++) { 1199 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1200 (void) snprintf(fromdir, sizeof (fromdir), "%s%s", rootpath, 1201 *cpp); 1202 if (mkdir(tmp, 0755) != 0) { 1203 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1204 return (B_FALSE); 1205 } 1206 if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp) != 0) { 1207 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1208 *cpp); 1209 return (B_FALSE); 1210 } 1211 } 1212 1213 /* 1214 * These are things mounted read-only from the running system because 1215 * they contain binaries that must match system. 1216 */ 1217 for (cpp = loopdirs; *cpp != NULL; cpp++) { 1218 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1219 if (mkdir(tmp, 0755) != 0) { 1220 if (errno != EEXIST) { 1221 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1222 return (B_FALSE); 1223 } 1224 if (lstat(tmp, &st) != 0) { 1225 zerror(zlogp, B_TRUE, "cannot stat %s", tmp); 1226 return (B_FALSE); 1227 } 1228 /* 1229 * Ignore any non-directories encountered. These are 1230 * things that have been converted into symlinks 1231 * (/etc/fs and /etc/lib) and no longer need a lofs 1232 * fixup. 1233 */ 1234 if (!S_ISDIR(st.st_mode)) 1235 continue; 1236 } 1237 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, *cpp, 1238 tmp) != 0) { 1239 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1240 *cpp); 1241 return (B_FALSE); 1242 } 1243 } 1244 1245 /* 1246 * These are things with tmpfs mounted inside. 1247 */ 1248 for (cpp = tmpdirs; *cpp != NULL; cpp++) { 1249 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1250 if (mkdir(tmp, 0755) != 0 && errno != EEXIST) { 1251 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1252 return (B_FALSE); 1253 } 1254 if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) { 1255 zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp); 1256 return (B_FALSE); 1257 } 1258 } 1259 1260 /* 1261 * This is here to support lucopy. If there's an instance of this same 1262 * zone on the current running system, then we mount its root up as 1263 * read-only inside the scratch zone. 1264 */ 1265 (void) zonecfg_get_uuid(zone_name, uuid); 1266 altstr = strdup(zonecfg_get_root()); 1267 if (altstr == NULL) { 1268 zerror(zlogp, B_TRUE, "out of memory"); 1269 return (B_FALSE); 1270 } 1271 zonecfg_set_root(""); 1272 (void) strlcpy(tmp, zone_name, sizeof (tmp)); 1273 (void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp)); 1274 if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK && 1275 strcmp(fromdir, rootpath) != 0) { 1276 (void) snprintf(tmp, sizeof (tmp), "%s/b", luroot); 1277 if (mkdir(tmp, 0755) != 0) { 1278 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1279 return (B_FALSE); 1280 } 1281 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, fromdir, 1282 tmp) != 0) { 1283 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1284 fromdir); 1285 return (B_FALSE); 1286 } 1287 } 1288 zonecfg_set_root(altstr); 1289 free(altstr); 1290 1291 if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) { 1292 zerror(zlogp, B_TRUE, "cannot open zone mapfile"); 1293 return (B_FALSE); 1294 } 1295 (void) ftruncate(fileno(fp), 0); 1296 if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) { 1297 zerror(zlogp, B_TRUE, "cannot add zone mapfile entry"); 1298 } 1299 zonecfg_close_scratch(fp); 1300 (void) snprintf(tmp, sizeof (tmp), "%s/a", luroot); 1301 if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0) 1302 return (B_FALSE); 1303 (void) strlcpy(rootpath, tmp, rootlen); 1304 return (B_TRUE); 1305 } 1306 1307 static int 1308 mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd) 1309 { 1310 char rootpath[MAXPATHLEN]; 1311 char zonepath[MAXPATHLEN]; 1312 int num_fs = 0, i; 1313 struct zone_fstab fstab, *fs_ptr = NULL, *tmp_ptr; 1314 struct zone_fstab *fsp; 1315 zone_dochandle_t handle = NULL; 1316 zone_state_t zstate; 1317 1318 if (zone_get_state(zone_name, &zstate) != Z_OK || 1319 (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) { 1320 zerror(zlogp, B_FALSE, 1321 "zone must be in '%s' or '%s' state to mount file-systems", 1322 zone_state_str(ZONE_STATE_READY), 1323 zone_state_str(ZONE_STATE_MOUNTED)); 1324 goto bad; 1325 } 1326 1327 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { 1328 zerror(zlogp, B_TRUE, "unable to determine zone path"); 1329 goto bad; 1330 } 1331 1332 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { 1333 zerror(zlogp, B_TRUE, "unable to determine zone root"); 1334 goto bad; 1335 } 1336 1337 if ((handle = zonecfg_init_handle()) == NULL) { 1338 zerror(zlogp, B_TRUE, 1339 "could not get zone configuration handle"); 1340 goto bad; 1341 } 1342 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK || 1343 zonecfg_setfsent(handle) != Z_OK) { 1344 zerror(zlogp, B_FALSE, "invalid configuration"); 1345 goto bad; 1346 } 1347 1348 /* 1349 * /dev in the zone is loopback'd from the external /dev repository, 1350 * in order to provide a largely read-only semantic. But because 1351 * processes in the zone need to be able to chown, chmod, etc. zone 1352 * /dev files, we can't use a 'ro' lofs mount. Instead we use a 1353 * special mode just for zones, "zonedevfs". 1354 * 1355 * In the future we should front /dev with a full-fledged filesystem. 1356 */ 1357 num_fs++; 1358 if ((tmp_ptr = realloc(fs_ptr, num_fs * sizeof (*tmp_ptr))) == NULL) { 1359 zerror(zlogp, B_TRUE, "memory allocation failed"); 1360 num_fs--; 1361 goto bad; 1362 } 1363 fs_ptr = tmp_ptr; 1364 fsp = &fs_ptr[num_fs - 1]; 1365 /* 1366 * Note that mount_one will prepend the alternate root to 1367 * zone_fs_special and do the necessary resolution, so all that is 1368 * needed here is to strip the root added by zone_get_zonepath. 1369 */ 1370 (void) strlcpy(fsp->zone_fs_dir, "/dev", sizeof (fsp->zone_fs_dir)); 1371 (void) snprintf(fsp->zone_fs_special, sizeof (fsp->zone_fs_special), 1372 "%s/dev", zonepath + strlen(zonecfg_get_root())); 1373 fsp->zone_fs_raw[0] = '\0'; 1374 (void) strlcpy(fsp->zone_fs_type, MNTTYPE_LOFS, 1375 sizeof (fsp->zone_fs_type)); 1376 fsp->zone_fs_options = NULL; 1377 if (zonecfg_add_fs_option(fsp, MNTOPT_LOFS_ZONEDEVFS) != Z_OK) { 1378 zerror(zlogp, B_FALSE, "error adding property"); 1379 goto bad; 1380 } 1381 1382 /* 1383 * Iterate through the rest of the filesystems, first the IPDs, then 1384 * the general FSs. Sort them all, then mount them in sorted order. 1385 * This is to make sure the higher level directories (e.g., /usr) 1386 * get mounted before any beneath them (e.g., /usr/local). 1387 */ 1388 if (zonecfg_setipdent(handle) != Z_OK) { 1389 zerror(zlogp, B_FALSE, "invalid configuration"); 1390 goto bad; 1391 } 1392 while (zonecfg_getipdent(handle, &fstab) == Z_OK) { 1393 num_fs++; 1394 if ((tmp_ptr = realloc(fs_ptr, 1395 num_fs * sizeof (*tmp_ptr))) == NULL) { 1396 zerror(zlogp, B_TRUE, "memory allocation failed"); 1397 num_fs--; 1398 (void) zonecfg_endipdent(handle); 1399 goto bad; 1400 } 1401 fs_ptr = tmp_ptr; 1402 fsp = &fs_ptr[num_fs - 1]; 1403 /* 1404 * IPDs logically only have a mount point; all other properties 1405 * are implied. 1406 */ 1407 (void) strlcpy(fsp->zone_fs_dir, 1408 fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir)); 1409 fsp->zone_fs_special[0] = '\0'; 1410 fsp->zone_fs_raw[0] = '\0'; 1411 fsp->zone_fs_type[0] = '\0'; 1412 fsp->zone_fs_options = NULL; 1413 } 1414 (void) zonecfg_endipdent(handle); 1415 1416 if (zonecfg_setfsent(handle) != Z_OK) { 1417 zerror(zlogp, B_FALSE, "invalid configuration"); 1418 goto bad; 1419 } 1420 while (zonecfg_getfsent(handle, &fstab) == Z_OK) { 1421 num_fs++; 1422 if ((tmp_ptr = realloc(fs_ptr, 1423 num_fs * sizeof (*tmp_ptr))) == NULL) { 1424 zerror(zlogp, B_TRUE, "memory allocation failed"); 1425 num_fs--; 1426 (void) zonecfg_endfsent(handle); 1427 goto bad; 1428 } 1429 fs_ptr = tmp_ptr; 1430 fsp = &fs_ptr[num_fs - 1]; 1431 (void) strlcpy(fsp->zone_fs_dir, 1432 fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir)); 1433 (void) strlcpy(fsp->zone_fs_special, fstab.zone_fs_special, 1434 sizeof (fsp->zone_fs_special)); 1435 (void) strlcpy(fsp->zone_fs_raw, fstab.zone_fs_raw, 1436 sizeof (fsp->zone_fs_raw)); 1437 (void) strlcpy(fsp->zone_fs_type, fstab.zone_fs_type, 1438 sizeof (fsp->zone_fs_type)); 1439 fsp->zone_fs_options = fstab.zone_fs_options; 1440 } 1441 (void) zonecfg_endfsent(handle); 1442 zonecfg_fini_handle(handle); 1443 handle = NULL; 1444 1445 /* 1446 * If we're mounting a zone for administration, then we need to set up 1447 * the "/a" environment inside the zone so that the commands that run 1448 * in there have access to both the running system's utilities and the 1449 * to-be-modified zone's files. 1450 */ 1451 if (mount_cmd && 1452 !build_mounted(zlogp, rootpath, sizeof (rootpath), zonepath)) 1453 goto bad; 1454 1455 qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare); 1456 for (i = 0; i < num_fs; i++) { 1457 if (mount_cmd && strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) { 1458 size_t slen = strlen(rootpath) - 2; 1459 1460 /* /dev is special and always goes at the top */ 1461 rootpath[slen] = '\0'; 1462 if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0) 1463 goto bad; 1464 rootpath[slen] = '/'; 1465 continue; 1466 } 1467 if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0) 1468 goto bad; 1469 } 1470 free_fs_data(fs_ptr, num_fs); 1471 1472 /* 1473 * Everything looks fine. 1474 */ 1475 return (0); 1476 1477 bad: 1478 if (handle != NULL) 1479 zonecfg_fini_handle(handle); 1480 free_fs_data(fs_ptr, num_fs); 1481 return (-1); 1482 } 1483 1484 /* caller makes sure neither parameter is NULL */ 1485 static int 1486 addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr) 1487 { 1488 int prefixlen; 1489 1490 prefixlen = atoi(prefixstr); 1491 if (prefixlen < 0 || prefixlen > maxprefixlen) 1492 return (1); 1493 while (prefixlen > 0) { 1494 if (prefixlen >= 8) { 1495 *maskstr++ = 0xFF; 1496 prefixlen -= 8; 1497 continue; 1498 } 1499 *maskstr |= 1 << (8 - prefixlen); 1500 prefixlen--; 1501 } 1502 return (0); 1503 } 1504 1505 /* 1506 * Tear down all interfaces belonging to the given zone. This should 1507 * be called with the zone in a state other than "running", so that 1508 * interfaces can't be assigned to the zone after this returns. 1509 * 1510 * If anything goes wrong, log an error message and return an error. 1511 */ 1512 static int 1513 unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id) 1514 { 1515 struct lifnum lifn; 1516 struct lifconf lifc; 1517 struct lifreq *lifrp, lifrl; 1518 int64_t lifc_flags = LIFC_NOXMIT | LIFC_ALLZONES; 1519 int num_ifs, s, i, ret_code = 0; 1520 uint_t bufsize; 1521 char *buf = NULL; 1522 1523 if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { 1524 zerror(zlogp, B_TRUE, "could not get socket"); 1525 ret_code = -1; 1526 goto bad; 1527 } 1528 lifn.lifn_family = AF_UNSPEC; 1529 lifn.lifn_flags = (int)lifc_flags; 1530 if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) { 1531 zerror(zlogp, B_TRUE, 1532 "could not determine number of interfaces"); 1533 ret_code = -1; 1534 goto bad; 1535 } 1536 num_ifs = lifn.lifn_count; 1537 bufsize = num_ifs * sizeof (struct lifreq); 1538 if ((buf = malloc(bufsize)) == NULL) { 1539 zerror(zlogp, B_TRUE, "memory allocation failed"); 1540 ret_code = -1; 1541 goto bad; 1542 } 1543 lifc.lifc_family = AF_UNSPEC; 1544 lifc.lifc_flags = (int)lifc_flags; 1545 lifc.lifc_len = bufsize; 1546 lifc.lifc_buf = buf; 1547 if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) { 1548 zerror(zlogp, B_TRUE, "could not get configured interfaces"); 1549 ret_code = -1; 1550 goto bad; 1551 } 1552 lifrp = lifc.lifc_req; 1553 for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, lifrp++) { 1554 (void) close(s); 1555 if ((s = socket(lifrp->lifr_addr.ss_family, SOCK_DGRAM, 0)) < 1556 0) { 1557 zerror(zlogp, B_TRUE, "%s: could not get socket", 1558 lifrl.lifr_name); 1559 ret_code = -1; 1560 continue; 1561 } 1562 (void) memset(&lifrl, 0, sizeof (lifrl)); 1563 (void) strncpy(lifrl.lifr_name, lifrp->lifr_name, 1564 sizeof (lifrl.lifr_name)); 1565 if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifrl) < 0) { 1566 zerror(zlogp, B_TRUE, 1567 "%s: could not determine zone interface belongs to", 1568 lifrl.lifr_name); 1569 ret_code = -1; 1570 continue; 1571 } 1572 if (lifrl.lifr_zoneid == zone_id) { 1573 if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) { 1574 zerror(zlogp, B_TRUE, 1575 "%s: could not remove interface", 1576 lifrl.lifr_name); 1577 ret_code = -1; 1578 continue; 1579 } 1580 } 1581 } 1582 bad: 1583 if (s > 0) 1584 (void) close(s); 1585 if (buf) 1586 free(buf); 1587 return (ret_code); 1588 } 1589 1590 static union sockunion { 1591 struct sockaddr sa; 1592 struct sockaddr_in sin; 1593 struct sockaddr_dl sdl; 1594 struct sockaddr_in6 sin6; 1595 } so_dst, so_ifp; 1596 1597 static struct { 1598 struct rt_msghdr hdr; 1599 char space[512]; 1600 } rtmsg; 1601 1602 static int 1603 salen(struct sockaddr *sa) 1604 { 1605 switch (sa->sa_family) { 1606 case AF_INET: 1607 return (sizeof (struct sockaddr_in)); 1608 case AF_LINK: 1609 return (sizeof (struct sockaddr_dl)); 1610 case AF_INET6: 1611 return (sizeof (struct sockaddr_in6)); 1612 default: 1613 return (sizeof (struct sockaddr)); 1614 } 1615 } 1616 1617 #define ROUNDUP_LONG(a) \ 1618 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long)) 1619 1620 /* 1621 * Look up which zone is using a given IP address. The address in question 1622 * is expected to have been stuffed into the structure to which lifr points 1623 * via a previous SIOCGLIFADDR ioctl(). 1624 * 1625 * This is done using black router socket magic. 1626 * 1627 * Return the name of the zone on success or NULL on failure. 1628 * 1629 * This is a lot of code for a simple task; a new ioctl request to take care 1630 * of this might be a useful RFE. 1631 */ 1632 1633 static char * 1634 who_is_using(zlog_t *zlogp, struct lifreq *lifr) 1635 { 1636 static char answer[ZONENAME_MAX]; 1637 pid_t pid; 1638 int s, rlen, l, i; 1639 char *cp = rtmsg.space; 1640 struct sockaddr_dl *ifp = NULL; 1641 struct sockaddr *sa; 1642 char save_if_name[LIFNAMSIZ]; 1643 1644 answer[0] = '\0'; 1645 1646 pid = getpid(); 1647 if ((s = socket(PF_ROUTE, SOCK_RAW, 0)) < 0) { 1648 zerror(zlogp, B_TRUE, "could not get routing socket"); 1649 return (NULL); 1650 } 1651 1652 if (lifr->lifr_addr.ss_family == AF_INET) { 1653 struct sockaddr_in *sin4; 1654 1655 so_dst.sa.sa_family = AF_INET; 1656 sin4 = (struct sockaddr_in *)&lifr->lifr_addr; 1657 so_dst.sin.sin_addr = sin4->sin_addr; 1658 } else { 1659 struct sockaddr_in6 *sin6; 1660 1661 so_dst.sa.sa_family = AF_INET6; 1662 sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr; 1663 so_dst.sin6.sin6_addr = sin6->sin6_addr; 1664 } 1665 1666 so_ifp.sa.sa_family = AF_LINK; 1667 1668 (void) memset(&rtmsg, 0, sizeof (rtmsg)); 1669 rtmsg.hdr.rtm_type = RTM_GET; 1670 rtmsg.hdr.rtm_flags = RTF_UP | RTF_HOST; 1671 rtmsg.hdr.rtm_version = RTM_VERSION; 1672 rtmsg.hdr.rtm_seq = ++rts_seqno; 1673 rtmsg.hdr.rtm_addrs = RTA_IFP | RTA_DST; 1674 1675 l = ROUNDUP_LONG(salen(&so_dst.sa)); 1676 (void) memmove(cp, &(so_dst), l); 1677 cp += l; 1678 l = ROUNDUP_LONG(salen(&so_ifp.sa)); 1679 (void) memmove(cp, &(so_ifp), l); 1680 cp += l; 1681 1682 rtmsg.hdr.rtm_msglen = l = cp - (char *)&rtmsg; 1683 1684 if ((rlen = write(s, &rtmsg, l)) < 0) { 1685 zerror(zlogp, B_TRUE, "writing to routing socket"); 1686 return (NULL); 1687 } else if (rlen < (int)rtmsg.hdr.rtm_msglen) { 1688 zerror(zlogp, B_TRUE, 1689 "write to routing socket got only %d for len\n", rlen); 1690 return (NULL); 1691 } 1692 do { 1693 l = read(s, &rtmsg, sizeof (rtmsg)); 1694 } while (l > 0 && (rtmsg.hdr.rtm_seq != rts_seqno || 1695 rtmsg.hdr.rtm_pid != pid)); 1696 if (l < 0) { 1697 zerror(zlogp, B_TRUE, "reading from routing socket"); 1698 return (NULL); 1699 } 1700 1701 if (rtmsg.hdr.rtm_version != RTM_VERSION) { 1702 zerror(zlogp, B_FALSE, 1703 "routing message version %d not understood", 1704 rtmsg.hdr.rtm_version); 1705 return (NULL); 1706 } 1707 if (rtmsg.hdr.rtm_msglen != (ushort_t)l) { 1708 zerror(zlogp, B_FALSE, "message length mismatch, " 1709 "expected %d bytes, returned %d bytes", 1710 rtmsg.hdr.rtm_msglen, l); 1711 return (NULL); 1712 } 1713 if (rtmsg.hdr.rtm_errno != 0) { 1714 errno = rtmsg.hdr.rtm_errno; 1715 zerror(zlogp, B_TRUE, "RTM_GET routing socket message"); 1716 return (NULL); 1717 } 1718 if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) { 1719 zerror(zlogp, B_FALSE, "interface not found"); 1720 return (NULL); 1721 } 1722 cp = ((char *)(&rtmsg.hdr + 1)); 1723 for (i = 1; i != 0; i <<= 1) { 1724 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1725 sa = (struct sockaddr *)cp; 1726 if (i != RTA_IFP) { 1727 if ((i & rtmsg.hdr.rtm_addrs) != 0) 1728 cp += ROUNDUP_LONG(salen(sa)); 1729 continue; 1730 } 1731 if (sa->sa_family == AF_LINK && 1732 ((struct sockaddr_dl *)sa)->sdl_nlen != 0) 1733 ifp = (struct sockaddr_dl *)sa; 1734 break; 1735 } 1736 if (ifp == NULL) { 1737 zerror(zlogp, B_FALSE, "interface could not be determined"); 1738 return (NULL); 1739 } 1740 1741 /* 1742 * We need to set the I/F name to what we got above, then do the 1743 * appropriate ioctl to get its zone name. But lifr->lifr_name is 1744 * used by the calling function to do a REMOVEIF, so if we leave the 1745 * "good" zone's I/F name in place, *that* I/F will be removed instead 1746 * of the bad one. So we save the old (bad) I/F name before over- 1747 * writing it and doing the ioctl, then restore it after the ioctl. 1748 */ 1749 (void) strlcpy(save_if_name, lifr->lifr_name, sizeof (save_if_name)); 1750 (void) strncpy(lifr->lifr_name, ifp->sdl_data, ifp->sdl_nlen); 1751 lifr->lifr_name[ifp->sdl_nlen] = '\0'; 1752 i = ioctl(s, SIOCGLIFZONE, lifr); 1753 (void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name)); 1754 if (i < 0) { 1755 zerror(zlogp, B_TRUE, 1756 "%s: could not determine the zone interface belongs to", 1757 lifr->lifr_name); 1758 return (NULL); 1759 } 1760 if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0) 1761 (void) snprintf(answer, sizeof (answer), "%d", 1762 lifr->lifr_zoneid); 1763 1764 if (strlen(answer) > 0) 1765 return (answer); 1766 return (NULL); 1767 } 1768 1769 typedef struct mcast_rtmsg_s { 1770 struct rt_msghdr m_rtm; 1771 union { 1772 struct { 1773 struct sockaddr_in m_dst; 1774 struct sockaddr_in m_gw; 1775 struct sockaddr_in m_netmask; 1776 } m_v4; 1777 struct { 1778 struct sockaddr_in6 m_dst; 1779 struct sockaddr_in6 m_gw; 1780 struct sockaddr_in6 m_netmask; 1781 } m_v6; 1782 } m_u; 1783 } mcast_rtmsg_t; 1784 #define m_dst4 m_u.m_v4.m_dst 1785 #define m_dst6 m_u.m_v6.m_dst 1786 #define m_gw4 m_u.m_v4.m_gw 1787 #define m_gw6 m_u.m_v6.m_gw 1788 #define m_netmask4 m_u.m_v4.m_netmask 1789 #define m_netmask6 m_u.m_v6.m_netmask 1790 1791 /* 1792 * Configures a single interface: a new virtual interface is added, based on 1793 * the physical interface nwiftabptr->zone_nwif_physical, with the address 1794 * specified in nwiftabptr->zone_nwif_address, for zone zone_id. Note that 1795 * the "address" can be an IPv6 address (with a /prefixlength required), an 1796 * IPv4 address (with a /prefixlength optional), or a name; for the latter, 1797 * an IPv4 name-to-address resolution will be attempted. 1798 * 1799 * A default interface route for multicast is created on the first IPv4 and 1800 * IPv6 interfaces (that have the IFF_MULTICAST flag set), respectively. 1801 * This should really be done in the init scripts if we ever allow zones to 1802 * modify the routing tables. 1803 * 1804 * If anything goes wrong, we log an detailed error message, attempt to tear 1805 * down whatever we set up and return an error. 1806 */ 1807 static int 1808 configure_one_interface(zlog_t *zlogp, zoneid_t zone_id, 1809 struct zone_nwiftab *nwiftabptr, boolean_t *mcast_rt_v4_setp, 1810 boolean_t *mcast_rt_v6_setp) 1811 { 1812 struct lifreq lifr; 1813 struct sockaddr_in netmask4; 1814 struct sockaddr_in6 netmask6; 1815 struct in_addr in4; 1816 struct in6_addr in6; 1817 sa_family_t af; 1818 char *slashp = strchr(nwiftabptr->zone_nwif_address, '/'); 1819 mcast_rtmsg_t mcast_rtmsg; 1820 int s; 1821 int rs; 1822 int rlen; 1823 boolean_t got_netmask = B_FALSE; 1824 char addrstr4[INET_ADDRSTRLEN]; 1825 int res; 1826 1827 res = zonecfg_valid_net_address(nwiftabptr->zone_nwif_address, &lifr); 1828 if (res != Z_OK) { 1829 zerror(zlogp, B_FALSE, "%s: %s", zonecfg_strerror(res), 1830 nwiftabptr->zone_nwif_address); 1831 return (-1); 1832 } 1833 af = lifr.lifr_addr.ss_family; 1834 if (af == AF_INET) 1835 in4 = ((struct sockaddr_in *)(&lifr.lifr_addr))->sin_addr; 1836 else 1837 in6 = ((struct sockaddr_in6 *)(&lifr.lifr_addr))->sin6_addr; 1838 1839 if ((s = socket(af, SOCK_DGRAM, 0)) < 0) { 1840 zerror(zlogp, B_TRUE, "could not get socket"); 1841 return (-1); 1842 } 1843 1844 (void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical, 1845 sizeof (lifr.lifr_name)); 1846 if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) { 1847 zerror(zlogp, B_TRUE, "%s: could not add interface", 1848 lifr.lifr_name); 1849 (void) close(s); 1850 return (-1); 1851 } 1852 1853 if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) { 1854 zerror(zlogp, B_TRUE, 1855 "%s: could not set IP address to %s", 1856 lifr.lifr_name, nwiftabptr->zone_nwif_address); 1857 goto bad; 1858 } 1859 1860 /* Preserve literal IPv4 address for later potential printing. */ 1861 if (af == AF_INET) 1862 (void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN); 1863 1864 lifr.lifr_zoneid = zone_id; 1865 if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) { 1866 zerror(zlogp, B_TRUE, "%s: could not place interface into zone", 1867 lifr.lifr_name); 1868 goto bad; 1869 } 1870 1871 if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) { 1872 got_netmask = B_TRUE; /* default setting will be correct */ 1873 } else { 1874 if (af == AF_INET) { 1875 /* 1876 * The IPv4 netmask can be determined either 1877 * directly if a prefix length was supplied with 1878 * the address or via the netmasks database. Not 1879 * being able to determine it is a common failure, 1880 * but it often is not fatal to operation of the 1881 * interface. In that case, a warning will be 1882 * printed after the rest of the interface's 1883 * parameters have been configured. 1884 */ 1885 (void) memset(&netmask4, 0, sizeof (netmask4)); 1886 if (slashp != NULL) { 1887 if (addr2netmask(slashp + 1, V4_ADDR_LEN, 1888 (uchar_t *)&netmask4.sin_addr) != 0) { 1889 *slashp = '/'; 1890 zerror(zlogp, B_FALSE, 1891 "%s: invalid prefix length in %s", 1892 lifr.lifr_name, 1893 nwiftabptr->zone_nwif_address); 1894 goto bad; 1895 } 1896 got_netmask = B_TRUE; 1897 } else if (getnetmaskbyaddr(in4, 1898 &netmask4.sin_addr) == 0) { 1899 got_netmask = B_TRUE; 1900 } 1901 if (got_netmask) { 1902 netmask4.sin_family = af; 1903 (void) memcpy(&lifr.lifr_addr, &netmask4, 1904 sizeof (netmask4)); 1905 } 1906 } else { 1907 (void) memset(&netmask6, 0, sizeof (netmask6)); 1908 if (addr2netmask(slashp + 1, V6_ADDR_LEN, 1909 (uchar_t *)&netmask6.sin6_addr) != 0) { 1910 *slashp = '/'; 1911 zerror(zlogp, B_FALSE, 1912 "%s: invalid prefix length in %s", 1913 lifr.lifr_name, 1914 nwiftabptr->zone_nwif_address); 1915 goto bad; 1916 } 1917 got_netmask = B_TRUE; 1918 netmask6.sin6_family = af; 1919 (void) memcpy(&lifr.lifr_addr, &netmask6, 1920 sizeof (netmask6)); 1921 } 1922 if (got_netmask && 1923 ioctl(s, SIOCSLIFNETMASK, (caddr_t)&lifr) < 0) { 1924 zerror(zlogp, B_TRUE, "%s: could not set netmask", 1925 lifr.lifr_name); 1926 goto bad; 1927 } 1928 1929 /* 1930 * This doesn't set the broadcast address at all. Rather, it 1931 * gets, then sets the interface's address, relying on the fact 1932 * that resetting the address will reset the broadcast address. 1933 */ 1934 if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { 1935 zerror(zlogp, B_TRUE, "%s: could not get address", 1936 lifr.lifr_name); 1937 goto bad; 1938 } 1939 if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) { 1940 zerror(zlogp, B_TRUE, 1941 "%s: could not reset broadcast address", 1942 lifr.lifr_name); 1943 goto bad; 1944 } 1945 } 1946 1947 if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { 1948 zerror(zlogp, B_TRUE, "%s: could not get flags", 1949 lifr.lifr_name); 1950 goto bad; 1951 } 1952 lifr.lifr_flags |= IFF_UP; 1953 if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { 1954 int save_errno = errno; 1955 char *zone_using; 1956 1957 /* 1958 * If we failed with something other than EADDRNOTAVAIL, 1959 * then skip to the end. Otherwise, look up our address, 1960 * then call a function to determine which zone is already 1961 * using that address. 1962 */ 1963 if (errno != EADDRNOTAVAIL) { 1964 zerror(zlogp, B_TRUE, 1965 "%s: could not bring interface up", lifr.lifr_name); 1966 goto bad; 1967 } 1968 if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { 1969 zerror(zlogp, B_TRUE, "%s: could not get address", 1970 lifr.lifr_name); 1971 goto bad; 1972 } 1973 zone_using = who_is_using(zlogp, &lifr); 1974 errno = save_errno; 1975 if (zone_using == NULL) 1976 zerror(zlogp, B_TRUE, 1977 "%s: could not bring interface up", lifr.lifr_name); 1978 else 1979 zerror(zlogp, B_TRUE, "%s: could not bring interface " 1980 "up: address in use by zone '%s'", lifr.lifr_name, 1981 zone_using); 1982 goto bad; 1983 } 1984 if ((lifr.lifr_flags & IFF_MULTICAST) && ((af == AF_INET && 1985 mcast_rt_v4_setp != NULL && *mcast_rt_v4_setp == B_FALSE) || 1986 (af == AF_INET6 && 1987 mcast_rt_v6_setp != NULL && *mcast_rt_v6_setp == B_FALSE))) { 1988 rs = socket(PF_ROUTE, SOCK_RAW, 0); 1989 if (rs < 0) { 1990 zerror(zlogp, B_TRUE, "%s: could not create " 1991 "routing socket", lifr.lifr_name); 1992 goto bad; 1993 } 1994 (void) shutdown(rs, 0); 1995 (void) memset((void *)&mcast_rtmsg, 0, sizeof (mcast_rtmsg_t)); 1996 mcast_rtmsg.m_rtm.rtm_msglen = sizeof (struct rt_msghdr) + 1997 3 * (af == AF_INET ? sizeof (struct sockaddr_in) : 1998 sizeof (struct sockaddr_in6)); 1999 mcast_rtmsg.m_rtm.rtm_version = RTM_VERSION; 2000 mcast_rtmsg.m_rtm.rtm_type = RTM_ADD; 2001 mcast_rtmsg.m_rtm.rtm_flags = RTF_UP; 2002 mcast_rtmsg.m_rtm.rtm_addrs = 2003 RTA_DST | RTA_GATEWAY | RTA_NETMASK; 2004 mcast_rtmsg.m_rtm.rtm_seq = ++rts_seqno; 2005 if (af == AF_INET) { 2006 mcast_rtmsg.m_dst4.sin_family = AF_INET; 2007 mcast_rtmsg.m_dst4.sin_addr.s_addr = 2008 htonl(INADDR_UNSPEC_GROUP); 2009 mcast_rtmsg.m_gw4.sin_family = AF_INET; 2010 mcast_rtmsg.m_gw4.sin_addr = in4; 2011 mcast_rtmsg.m_netmask4.sin_family = AF_INET; 2012 mcast_rtmsg.m_netmask4.sin_addr.s_addr = 2013 htonl(IN_CLASSD_NET); 2014 } else { 2015 mcast_rtmsg.m_dst6.sin6_family = AF_INET6; 2016 mcast_rtmsg.m_dst6.sin6_addr.s6_addr[0] = 0xffU; 2017 mcast_rtmsg.m_gw6.sin6_family = AF_INET6; 2018 mcast_rtmsg.m_gw6.sin6_addr = in6; 2019 mcast_rtmsg.m_netmask6.sin6_family = AF_INET6; 2020 mcast_rtmsg.m_netmask6.sin6_addr.s6_addr[0] = 0xffU; 2021 } 2022 rlen = write(rs, (char *)&mcast_rtmsg, 2023 mcast_rtmsg.m_rtm.rtm_msglen); 2024 if (rlen < mcast_rtmsg.m_rtm.rtm_msglen) { 2025 if (rlen < 0) { 2026 zerror(zlogp, B_TRUE, "%s: could not set " 2027 "default interface for multicast", 2028 lifr.lifr_name); 2029 } else { 2030 zerror(zlogp, B_FALSE, "%s: write to routing " 2031 "socket returned %d", lifr.lifr_name, rlen); 2032 } 2033 (void) close(rs); 2034 goto bad; 2035 } 2036 if (af == AF_INET) { 2037 *mcast_rt_v4_setp = B_TRUE; 2038 } else { 2039 *mcast_rt_v6_setp = B_TRUE; 2040 } 2041 (void) close(rs); 2042 } 2043 2044 if (!got_netmask) { 2045 /* 2046 * A common, but often non-fatal problem, is that the system 2047 * cannot find the netmask for an interface address. This is 2048 * often caused by it being only in /etc/inet/netmasks, but 2049 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not 2050 * in that. This doesn't show up at boot because the netmask 2051 * is obtained from /etc/inet/netmasks when no network 2052 * interfaces are up, but isn't consulted when NIS/NIS+ is 2053 * available. We warn the user here that something like this 2054 * has happened and we're just running with a default and 2055 * possible incorrect netmask. 2056 */ 2057 char buffer[INET6_ADDRSTRLEN]; 2058 void *addr; 2059 2060 if (af == AF_INET) 2061 addr = &((struct sockaddr_in *) 2062 (&lifr.lifr_addr))->sin_addr; 2063 else 2064 addr = &((struct sockaddr_in6 *) 2065 (&lifr.lifr_addr))->sin6_addr; 2066 2067 /* Find out what netmask interface is going to be using */ 2068 if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 || 2069 inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL) 2070 goto bad; 2071 zerror(zlogp, B_FALSE, 2072 "WARNING: %s: no matching subnet found in netmasks(4) for " 2073 "%s; using default of %s.", 2074 lifr.lifr_name, addrstr4, buffer); 2075 } 2076 2077 (void) close(s); 2078 return (Z_OK); 2079 bad: 2080 (void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr); 2081 (void) close(s); 2082 return (-1); 2083 } 2084 2085 /* 2086 * Sets up network interfaces based on information from the zone configuration. 2087 * An IPv4 loopback interface is set up "for free", modeling the global system. 2088 * If any of the configuration interfaces were IPv6, then an IPv6 loopback 2089 * address is set up as well. 2090 * 2091 * If anything goes wrong, we log a general error message, attempt to tear down 2092 * whatever we set up, and return an error. 2093 */ 2094 static int 2095 configure_network_interfaces(zlog_t *zlogp) 2096 { 2097 zone_dochandle_t handle; 2098 struct zone_nwiftab nwiftab, loopback_iftab; 2099 boolean_t saw_v6 = B_FALSE; 2100 boolean_t mcast_rt_v4_set = B_FALSE; 2101 boolean_t mcast_rt_v6_set = B_FALSE; 2102 zoneid_t zoneid; 2103 2104 if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) { 2105 zerror(zlogp, B_TRUE, "unable to get zoneid"); 2106 return (-1); 2107 } 2108 2109 if ((handle = zonecfg_init_handle()) == NULL) { 2110 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2111 return (-1); 2112 } 2113 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2114 zerror(zlogp, B_FALSE, "invalid configuration"); 2115 zonecfg_fini_handle(handle); 2116 return (-1); 2117 } 2118 if (zonecfg_setnwifent(handle) == Z_OK) { 2119 for (;;) { 2120 struct in6_addr in6; 2121 2122 if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK) 2123 break; 2124 if (configure_one_interface(zlogp, zoneid, 2125 &nwiftab, &mcast_rt_v4_set, &mcast_rt_v6_set) != 2126 Z_OK) { 2127 (void) zonecfg_endnwifent(handle); 2128 zonecfg_fini_handle(handle); 2129 return (-1); 2130 } 2131 if (inet_pton(AF_INET6, nwiftab.zone_nwif_address, 2132 &in6) == 1) 2133 saw_v6 = B_TRUE; 2134 } 2135 (void) zonecfg_endnwifent(handle); 2136 } 2137 zonecfg_fini_handle(handle); 2138 (void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0", 2139 sizeof (loopback_iftab.zone_nwif_physical)); 2140 (void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1", 2141 sizeof (loopback_iftab.zone_nwif_address)); 2142 if (configure_one_interface(zlogp, zoneid, &loopback_iftab, NULL, NULL) 2143 != Z_OK) { 2144 return (-1); 2145 } 2146 if (saw_v6) { 2147 (void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128", 2148 sizeof (loopback_iftab.zone_nwif_address)); 2149 if (configure_one_interface(zlogp, zoneid, 2150 &loopback_iftab, NULL, NULL) != Z_OK) { 2151 return (-1); 2152 } 2153 } 2154 return (0); 2155 } 2156 2157 static int 2158 tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid, 2159 const struct sockaddr_storage *local, const struct sockaddr_storage *remote) 2160 { 2161 int fd; 2162 struct strioctl ioc; 2163 tcp_ioc_abort_conn_t conn; 2164 int error; 2165 2166 conn.ac_local = *local; 2167 conn.ac_remote = *remote; 2168 conn.ac_start = TCPS_SYN_SENT; 2169 conn.ac_end = TCPS_TIME_WAIT; 2170 conn.ac_zoneid = zoneid; 2171 2172 ioc.ic_cmd = TCP_IOC_ABORT_CONN; 2173 ioc.ic_timout = -1; /* infinite timeout */ 2174 ioc.ic_len = sizeof (conn); 2175 ioc.ic_dp = (char *)&conn; 2176 2177 if ((fd = open("/dev/tcp", O_RDONLY)) < 0) { 2178 zerror(zlogp, B_TRUE, "unable to open %s", "/dev/tcp"); 2179 return (-1); 2180 } 2181 2182 error = ioctl(fd, I_STR, &ioc); 2183 (void) close(fd); 2184 if (error == 0 || errno == ENOENT) /* ENOENT is not an error */ 2185 return (0); 2186 return (-1); 2187 } 2188 2189 static int 2190 tcp_abort_connections(zlog_t *zlogp, zoneid_t zoneid) 2191 { 2192 struct sockaddr_storage l, r; 2193 struct sockaddr_in *local, *remote; 2194 struct sockaddr_in6 *local6, *remote6; 2195 int error; 2196 2197 /* 2198 * Abort IPv4 connections. 2199 */ 2200 bzero(&l, sizeof (*local)); 2201 local = (struct sockaddr_in *)&l; 2202 local->sin_family = AF_INET; 2203 local->sin_addr.s_addr = INADDR_ANY; 2204 local->sin_port = 0; 2205 2206 bzero(&r, sizeof (*remote)); 2207 remote = (struct sockaddr_in *)&r; 2208 remote->sin_family = AF_INET; 2209 remote->sin_addr.s_addr = INADDR_ANY; 2210 remote->sin_port = 0; 2211 2212 if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0) 2213 return (error); 2214 2215 /* 2216 * Abort IPv6 connections. 2217 */ 2218 bzero(&l, sizeof (*local6)); 2219 local6 = (struct sockaddr_in6 *)&l; 2220 local6->sin6_family = AF_INET6; 2221 local6->sin6_port = 0; 2222 local6->sin6_addr = in6addr_any; 2223 2224 bzero(&r, sizeof (*remote6)); 2225 remote6 = (struct sockaddr_in6 *)&r; 2226 remote6->sin6_family = AF_INET6; 2227 remote6->sin6_port = 0; 2228 remote6->sin6_addr = in6addr_any; 2229 2230 if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0) 2231 return (error); 2232 return (0); 2233 } 2234 2235 static int 2236 devfsadm_call(zlog_t *zlogp, const char *arg) 2237 { 2238 char *argv[4]; 2239 int status; 2240 2241 argv[0] = DEVFSADM; 2242 argv[1] = (char *)arg; 2243 argv[2] = zone_name; 2244 argv[3] = NULL; 2245 status = forkexec(zlogp, DEVFSADM_PATH, argv); 2246 if (status == 0 || status == -1) 2247 return (status); 2248 zerror(zlogp, B_FALSE, "%s call (%s %s %s) unexpectedly returned %d", 2249 DEVFSADM, DEVFSADM_PATH, arg, zone_name, status); 2250 return (-1); 2251 } 2252 2253 static int 2254 devfsadm_register(zlog_t *zlogp) 2255 { 2256 /* 2257 * Ready the zone's devices. 2258 */ 2259 return (devfsadm_call(zlogp, "-z")); 2260 } 2261 2262 static int 2263 devfsadm_unregister(zlog_t *zlogp) 2264 { 2265 return (devfsadm_call(zlogp, "-Z")); 2266 } 2267 2268 static int 2269 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) 2270 { 2271 nvlist_t *nvl = NULL; 2272 char *nvl_packed = NULL; 2273 size_t nvl_size = 0; 2274 nvlist_t **nvlv = NULL; 2275 int rctlcount = 0; 2276 int error = -1; 2277 zone_dochandle_t handle; 2278 struct zone_rctltab rctltab; 2279 rctlblk_t *rctlblk = NULL; 2280 2281 *bufp = NULL; 2282 *bufsizep = 0; 2283 2284 if ((handle = zonecfg_init_handle()) == NULL) { 2285 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2286 return (-1); 2287 } 2288 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2289 zerror(zlogp, B_FALSE, "invalid configuration"); 2290 zonecfg_fini_handle(handle); 2291 return (-1); 2292 } 2293 2294 rctltab.zone_rctl_valptr = NULL; 2295 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) { 2296 zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc"); 2297 goto out; 2298 } 2299 2300 if (zonecfg_setrctlent(handle) != Z_OK) { 2301 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent"); 2302 goto out; 2303 } 2304 2305 if ((rctlblk = malloc(rctlblk_size())) == NULL) { 2306 zerror(zlogp, B_TRUE, "memory allocation failed"); 2307 goto out; 2308 } 2309 while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) { 2310 struct zone_rctlvaltab *rctlval; 2311 uint_t i, count; 2312 const char *name = rctltab.zone_rctl_name; 2313 2314 /* zoneadm should have already warned about unknown rctls. */ 2315 if (!zonecfg_is_rctl(name)) { 2316 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 2317 rctltab.zone_rctl_valptr = NULL; 2318 continue; 2319 } 2320 count = 0; 2321 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL; 2322 rctlval = rctlval->zone_rctlval_next) { 2323 count++; 2324 } 2325 if (count == 0) { /* ignore */ 2326 continue; /* Nothing to free */ 2327 } 2328 if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL) 2329 goto out; 2330 i = 0; 2331 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL; 2332 rctlval = rctlval->zone_rctlval_next, i++) { 2333 if (nvlist_alloc(&nvlv[i], NV_UNIQUE_NAME, 0) != 0) { 2334 zerror(zlogp, B_TRUE, "%s failed", 2335 "nvlist_alloc"); 2336 goto out; 2337 } 2338 if (zonecfg_construct_rctlblk(rctlval, rctlblk) 2339 != Z_OK) { 2340 zerror(zlogp, B_FALSE, "invalid rctl value: " 2341 "(priv=%s,limit=%s,action=%s)", 2342 rctlval->zone_rctlval_priv, 2343 rctlval->zone_rctlval_limit, 2344 rctlval->zone_rctlval_action); 2345 goto out; 2346 } 2347 if (!zonecfg_valid_rctl(name, rctlblk)) { 2348 zerror(zlogp, B_FALSE, 2349 "(priv=%s,limit=%s,action=%s) is not a " 2350 "valid value for rctl '%s'", 2351 rctlval->zone_rctlval_priv, 2352 rctlval->zone_rctlval_limit, 2353 rctlval->zone_rctlval_action, 2354 name); 2355 goto out; 2356 } 2357 if (nvlist_add_uint64(nvlv[i], "privilege", 2358 rctlblk_get_privilege(rctlblk)) != 0) { 2359 zerror(zlogp, B_FALSE, "%s failed", 2360 "nvlist_add_uint64"); 2361 goto out; 2362 } 2363 if (nvlist_add_uint64(nvlv[i], "limit", 2364 rctlblk_get_value(rctlblk)) != 0) { 2365 zerror(zlogp, B_FALSE, "%s failed", 2366 "nvlist_add_uint64"); 2367 goto out; 2368 } 2369 if (nvlist_add_uint64(nvlv[i], "action", 2370 (uint_t)rctlblk_get_local_action(rctlblk, NULL)) 2371 != 0) { 2372 zerror(zlogp, B_FALSE, "%s failed", 2373 "nvlist_add_uint64"); 2374 goto out; 2375 } 2376 } 2377 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 2378 rctltab.zone_rctl_valptr = NULL; 2379 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count) 2380 != 0) { 2381 zerror(zlogp, B_FALSE, "%s failed", 2382 "nvlist_add_nvlist_array"); 2383 goto out; 2384 } 2385 for (i = 0; i < count; i++) 2386 nvlist_free(nvlv[i]); 2387 free(nvlv); 2388 nvlv = NULL; 2389 rctlcount++; 2390 } 2391 (void) zonecfg_endrctlent(handle); 2392 2393 if (rctlcount == 0) { 2394 error = 0; 2395 goto out; 2396 } 2397 if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0) 2398 != 0) { 2399 zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack"); 2400 goto out; 2401 } 2402 2403 error = 0; 2404 *bufp = nvl_packed; 2405 *bufsizep = nvl_size; 2406 2407 out: 2408 free(rctlblk); 2409 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 2410 if (error && nvl_packed != NULL) 2411 free(nvl_packed); 2412 if (nvl != NULL) 2413 nvlist_free(nvl); 2414 if (nvlv != NULL) 2415 free(nvlv); 2416 if (handle != NULL) 2417 zonecfg_fini_handle(handle); 2418 return (error); 2419 } 2420 2421 static int 2422 get_zone_pool(zlog_t *zlogp, char *poolbuf, size_t bufsz) 2423 { 2424 zone_dochandle_t handle; 2425 int error; 2426 2427 if ((handle = zonecfg_init_handle()) == NULL) { 2428 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2429 return (-1); 2430 } 2431 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2432 zerror(zlogp, B_FALSE, "invalid configuration"); 2433 zonecfg_fini_handle(handle); 2434 return (-1); 2435 } 2436 error = zonecfg_get_pool(handle, poolbuf, bufsz); 2437 zonecfg_fini_handle(handle); 2438 return (error); 2439 } 2440 2441 static int 2442 bind_to_pool(zlog_t *zlogp, zoneid_t zoneid) 2443 { 2444 pool_conf_t *poolconf; 2445 pool_t *pool; 2446 char poolname[MAXPATHLEN]; 2447 int status; 2448 int error; 2449 2450 /* 2451 * Find the pool mentioned in the zone configuration, and bind to it. 2452 */ 2453 error = get_zone_pool(zlogp, poolname, sizeof (poolname)); 2454 if (error == Z_NO_ENTRY || (error == Z_OK && strlen(poolname) == 0)) { 2455 /* 2456 * The property is not set on the zone, so the pool 2457 * should be bound to the default pool. But that's 2458 * already done by the kernel, so we can just return. 2459 */ 2460 return (0); 2461 } 2462 if (error != Z_OK) { 2463 /* 2464 * Not an error, even though it shouldn't be happening. 2465 */ 2466 zerror(zlogp, B_FALSE, 2467 "WARNING: unable to retrieve default pool."); 2468 return (0); 2469 } 2470 /* 2471 * Don't do anything if pools aren't enabled. 2472 */ 2473 if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED) { 2474 zerror(zlogp, B_FALSE, "WARNING: pools facility not active; " 2475 "zone will not be bound to pool '%s'.", poolname); 2476 return (0); 2477 } 2478 /* 2479 * Try to provide a sane error message if the requested pool doesn't 2480 * exist. 2481 */ 2482 if ((poolconf = pool_conf_alloc()) == NULL) { 2483 zerror(zlogp, B_FALSE, "%s failed", "pool_conf_alloc"); 2484 return (-1); 2485 } 2486 if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) != 2487 PO_SUCCESS) { 2488 zerror(zlogp, B_FALSE, "%s failed", "pool_conf_open"); 2489 pool_conf_free(poolconf); 2490 return (-1); 2491 } 2492 pool = pool_get_pool(poolconf, poolname); 2493 (void) pool_conf_close(poolconf); 2494 pool_conf_free(poolconf); 2495 if (pool == NULL) { 2496 zerror(zlogp, B_FALSE, "WARNING: pool '%s' not found; " 2497 "using default pool.", poolname); 2498 return (0); 2499 } 2500 /* 2501 * Bind the zone to the pool. 2502 */ 2503 if (pool_set_binding(poolname, P_ZONEID, zoneid) != PO_SUCCESS) { 2504 zerror(zlogp, B_FALSE, "WARNING: unable to bind to pool '%s'; " 2505 "using default pool.", poolname); 2506 } 2507 return (0); 2508 } 2509 2510 int 2511 prtmount(const char *fs, void *x) { 2512 zerror((zlog_t *)x, B_FALSE, " %s", fs); 2513 return (0); 2514 } 2515 2516 /* 2517 * Look for zones running on the main system that are using this root (or any 2518 * subdirectory of it). Return B_TRUE and print an error if a conflicting zone 2519 * is found or if we can't tell. 2520 */ 2521 static boolean_t 2522 duplicate_zone_root(zlog_t *zlogp, const char *rootpath) 2523 { 2524 zoneid_t *zids = NULL; 2525 uint_t nzids = 0; 2526 boolean_t retv; 2527 int rlen, zlen; 2528 char zroot[MAXPATHLEN]; 2529 char zonename[ZONENAME_MAX]; 2530 2531 for (;;) { 2532 nzids += 10; 2533 zids = malloc(nzids * sizeof (*zids)); 2534 if (zids == NULL) { 2535 zerror(zlogp, B_TRUE, "unable to allocate memory"); 2536 return (B_TRUE); 2537 } 2538 if (zone_list(zids, &nzids) == 0) 2539 break; 2540 free(zids); 2541 } 2542 retv = B_FALSE; 2543 rlen = strlen(rootpath); 2544 while (nzids > 0) { 2545 /* 2546 * Ignore errors; they just mean that the zone has disappeared 2547 * while we were busy. 2548 */ 2549 if (zone_getattr(zids[--nzids], ZONE_ATTR_ROOT, zroot, 2550 sizeof (zroot)) == -1) 2551 continue; 2552 zlen = strlen(zroot); 2553 if (zlen > rlen) 2554 zlen = rlen; 2555 if (strncmp(rootpath, zroot, zlen) == 0 && 2556 (zroot[zlen] == '\0' || zroot[zlen] == '/') && 2557 (rootpath[zlen] == '\0' || rootpath[zlen] == '/')) { 2558 if (getzonenamebyid(zids[nzids], zonename, 2559 sizeof (zonename)) == -1) 2560 (void) snprintf(zonename, sizeof (zonename), 2561 "id %d", (int)zids[nzids]); 2562 zerror(zlogp, B_FALSE, 2563 "zone root %s already in use by zone %s", 2564 rootpath, zonename); 2565 retv = B_TRUE; 2566 break; 2567 } 2568 } 2569 free(zids); 2570 return (retv); 2571 } 2572 2573 /* 2574 * Search for loopback mounts that use this same source node (same device and 2575 * inode). Return B_TRUE if there is one or if we can't tell. 2576 */ 2577 static boolean_t 2578 duplicate_reachable_path(zlog_t *zlogp, const char *rootpath) 2579 { 2580 struct stat64 rst, zst; 2581 struct mnttab *mnp; 2582 2583 if (stat64(rootpath, &rst) == -1) { 2584 zerror(zlogp, B_TRUE, "can't stat %s", rootpath); 2585 return (B_TRUE); 2586 } 2587 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 2588 return (B_TRUE); 2589 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) { 2590 if (mnp->mnt_fstype == NULL || 2591 strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0) 2592 continue; 2593 /* We're looking at a loopback mount. Stat it. */ 2594 if (mnp->mnt_special != NULL && 2595 stat64(mnp->mnt_special, &zst) != -1 && 2596 rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) { 2597 zerror(zlogp, B_FALSE, 2598 "zone root %s is reachable through %s", 2599 rootpath, mnp->mnt_mountp); 2600 return (B_TRUE); 2601 } 2602 } 2603 return (B_FALSE); 2604 } 2605 2606 zoneid_t 2607 vplat_create(zlog_t *zlogp, boolean_t mount_cmd) 2608 { 2609 zoneid_t rval = -1; 2610 priv_set_t *privs; 2611 char rootpath[MAXPATHLEN]; 2612 char *rctlbuf = NULL; 2613 size_t rctlbufsz = 0; 2614 zoneid_t zoneid = -1; 2615 int xerr; 2616 char *kzone; 2617 FILE *fp = NULL; 2618 2619 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { 2620 zerror(zlogp, B_TRUE, "unable to determine zone root"); 2621 return (-1); 2622 } 2623 if (zonecfg_in_alt_root()) 2624 resolve_lofs(zlogp, rootpath, sizeof (rootpath)); 2625 2626 if ((privs = priv_allocset()) == NULL) { 2627 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 2628 return (-1); 2629 } 2630 priv_emptyset(privs); 2631 if (zonecfg_get_privset(privs) != Z_OK) { 2632 zerror(zlogp, B_TRUE, "Failed to initialize privileges"); 2633 goto error; 2634 } 2635 if (!mount_cmd && get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) { 2636 zerror(zlogp, B_FALSE, "Unable to get list of rctls"); 2637 goto error; 2638 } 2639 2640 kzone = zone_name; 2641 2642 /* 2643 * We must do this scan twice. First, we look for zones running on the 2644 * main system that are using this root (or any subdirectory of it). 2645 * Next, we reduce to the shortest path and search for loopback mounts 2646 * that use this same source node (same device and inode). 2647 */ 2648 if (duplicate_zone_root(zlogp, rootpath)) 2649 goto error; 2650 if (duplicate_reachable_path(zlogp, rootpath)) 2651 goto error; 2652 2653 if (mount_cmd) { 2654 root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE); 2655 2656 /* 2657 * Forge up a special root for this zone. When a zone is 2658 * mounted, we can't let the zone have its own root because the 2659 * tools that will be used in this "scratch zone" need access 2660 * to both the zone's resources and the running machine's 2661 * executables. 2662 * 2663 * Note that the mkdir here also catches read-only filesystems. 2664 */ 2665 if (mkdir(rootpath, 0755) != 0 && errno != EEXIST) { 2666 zerror(zlogp, B_TRUE, "cannot create %s", rootpath); 2667 goto error; 2668 } 2669 if (domount(zlogp, "tmpfs", "", "swap", rootpath) != 0) 2670 goto error; 2671 } 2672 2673 if (zonecfg_in_alt_root()) { 2674 /* 2675 * If we are mounting up a zone in an alternate root partition, 2676 * then we have some additional work to do before starting the 2677 * zone. First, resolve the root path down so that we're not 2678 * fooled by duplicates. Then forge up an internal name for 2679 * the zone. 2680 */ 2681 if ((fp = zonecfg_open_scratch("", B_TRUE)) == NULL) { 2682 zerror(zlogp, B_TRUE, "cannot open mapfile"); 2683 goto error; 2684 } 2685 if (zonecfg_lock_scratch(fp) != 0) { 2686 zerror(zlogp, B_TRUE, "cannot lock mapfile"); 2687 goto error; 2688 } 2689 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(), 2690 NULL, 0) == 0) { 2691 zerror(zlogp, B_FALSE, "scratch zone already running"); 2692 goto error; 2693 } 2694 /* This is the preferred name */ 2695 (void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s", 2696 zone_name); 2697 srandom(getpid()); 2698 while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL, 2699 0) == 0) { 2700 /* This is just an arbitrary name; note "." usage */ 2701 (void) snprintf(kernzone, sizeof (kernzone), 2702 "SUNWlu.%08lX%08lX", random(), random()); 2703 } 2704 kzone = kernzone; 2705 } 2706 2707 xerr = 0; 2708 if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf, 2709 rctlbufsz, &xerr)) == -1) { 2710 if (xerr == ZE_AREMOUNTS) { 2711 if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) { 2712 zerror(zlogp, B_FALSE, 2713 "An unknown file-system is mounted on " 2714 "a subdirectory of %s", rootpath); 2715 } else { 2716 2717 zerror(zlogp, B_FALSE, 2718 "These file-systems are mounted on " 2719 "subdirectories of %s:", rootpath); 2720 (void) zonecfg_find_mounts(rootpath, 2721 prtmount, zlogp); 2722 } 2723 } else if (xerr == ZE_CHROOTED) { 2724 zerror(zlogp, B_FALSE, "%s: " 2725 "cannot create a zone from a chrooted " 2726 "environment", "zone_create"); 2727 } else { 2728 zerror(zlogp, B_TRUE, "%s failed", "zone_create"); 2729 } 2730 goto error; 2731 } 2732 2733 if (zonecfg_in_alt_root() && 2734 zonecfg_add_scratch(fp, zone_name, kernzone, 2735 zonecfg_get_root()) == -1) { 2736 zerror(zlogp, B_TRUE, "cannot add mapfile entry"); 2737 goto error; 2738 } 2739 2740 /* 2741 * The following is a warning, not an error, and is not performed when 2742 * merely mounting a zone for administrative use. 2743 */ 2744 if (!mount_cmd && bind_to_pool(zlogp, zoneid) != 0) 2745 zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to " 2746 "requested pool; using default pool."); 2747 rval = zoneid; 2748 zoneid = -1; 2749 2750 error: 2751 if (zoneid != -1) 2752 (void) zone_destroy(zoneid); 2753 if (rctlbuf != NULL) 2754 free(rctlbuf); 2755 priv_freeset(privs); 2756 if (fp != NULL) 2757 zonecfg_close_scratch(fp); 2758 lofs_discard_mnttab(); 2759 return (rval); 2760 } 2761 2762 int 2763 vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd) 2764 { 2765 if (create_dev_files(zlogp) != 0 || 2766 mount_filesystems(zlogp, mount_cmd) != 0) { 2767 lofs_discard_mnttab(); 2768 return (-1); 2769 } 2770 if (!mount_cmd && (devfsadm_register(zlogp) != 0 || 2771 configure_network_interfaces(zlogp) != 0)) { 2772 lofs_discard_mnttab(); 2773 return (-1); 2774 } 2775 lofs_discard_mnttab(); 2776 return (0); 2777 } 2778 2779 static int 2780 lu_root_teardown(zlog_t *zlogp) 2781 { 2782 char zroot[MAXPATHLEN]; 2783 2784 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 2785 zerror(zlogp, B_FALSE, "unable to determine zone root"); 2786 return (-1); 2787 } 2788 root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE); 2789 2790 /* 2791 * At this point, the processes are gone, the filesystems (save the 2792 * root) are unmounted, and the zone is on death row. But there may 2793 * still be creds floating about in the system that reference the 2794 * zone_t, and which pin down zone_rootvp causing this call to fail 2795 * with EBUSY. Thus, we try for a little while before just giving up. 2796 * (How I wish this were not true, and umount2 just did the right 2797 * thing, or tmpfs supported MS_FORCE This is a gross hack.) 2798 */ 2799 if (umount2(zroot, MS_FORCE) != 0) { 2800 if (errno == ENOTSUP && umount2(zroot, 0) == 0) 2801 goto unmounted; 2802 if (errno == EBUSY) { 2803 int tries = 10; 2804 2805 while (--tries >= 0) { 2806 (void) sleep(1); 2807 if (umount2(zroot, 0) == 0) 2808 goto unmounted; 2809 if (errno != EBUSY) 2810 break; 2811 } 2812 } 2813 zerror(zlogp, B_TRUE, "unable to unmount '%s'", zroot); 2814 return (-1); 2815 } 2816 unmounted: 2817 2818 /* 2819 * Only zones in an alternate root environment have scratch zone 2820 * entries. 2821 */ 2822 if (zonecfg_in_alt_root()) { 2823 FILE *fp; 2824 int retv; 2825 2826 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) { 2827 zerror(zlogp, B_TRUE, "cannot open mapfile"); 2828 return (-1); 2829 } 2830 retv = -1; 2831 if (zonecfg_lock_scratch(fp) != 0) 2832 zerror(zlogp, B_TRUE, "cannot lock mapfile"); 2833 else if (zonecfg_delete_scratch(fp, kernzone) != 0) 2834 zerror(zlogp, B_TRUE, "cannot delete map entry"); 2835 else 2836 retv = 0; 2837 zonecfg_close_scratch(fp); 2838 return (retv); 2839 } else { 2840 return (0); 2841 } 2842 } 2843 2844 int 2845 vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd) 2846 { 2847 char *kzone; 2848 zoneid_t zoneid; 2849 2850 kzone = zone_name; 2851 if (zonecfg_in_alt_root()) { 2852 FILE *fp; 2853 2854 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) { 2855 zerror(zlogp, B_TRUE, "unable to open map file"); 2856 goto error; 2857 } 2858 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(), 2859 kernzone, sizeof (kernzone)) != 0) { 2860 zerror(zlogp, B_FALSE, "unable to find scratch zone"); 2861 zonecfg_close_scratch(fp); 2862 goto error; 2863 } 2864 zonecfg_close_scratch(fp); 2865 kzone = kernzone; 2866 } 2867 2868 if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) { 2869 if (!bringup_failure_recovery) 2870 zerror(zlogp, B_TRUE, "unable to get zoneid"); 2871 if (unmount_cmd) 2872 (void) lu_root_teardown(zlogp); 2873 goto error; 2874 } 2875 2876 if (zone_shutdown(zoneid) != 0) { 2877 zerror(zlogp, B_TRUE, "unable to shutdown zone"); 2878 goto error; 2879 } 2880 2881 if (!unmount_cmd && devfsadm_unregister(zlogp) != 0) 2882 goto error; 2883 2884 if (!unmount_cmd && 2885 unconfigure_network_interfaces(zlogp, zoneid) != 0) { 2886 zerror(zlogp, B_FALSE, 2887 "unable to unconfigure network interfaces in zone"); 2888 goto error; 2889 } 2890 2891 if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) { 2892 zerror(zlogp, B_TRUE, "unable to abort TCP connections"); 2893 goto error; 2894 } 2895 2896 if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) { 2897 zerror(zlogp, B_FALSE, 2898 "unable to unmount file systems in zone"); 2899 goto error; 2900 } 2901 2902 if (zone_destroy(zoneid) != 0) { 2903 zerror(zlogp, B_TRUE, "unable to destroy zone"); 2904 goto error; 2905 } 2906 2907 /* 2908 * Special teardown for alternate boot environments: remove the tmpfs 2909 * root for the zone and then remove it from the map file. 2910 */ 2911 if (unmount_cmd && lu_root_teardown(zlogp) != 0) 2912 goto error; 2913 2914 if (!unmount_cmd) 2915 destroy_console_slave(); 2916 2917 lofs_discard_mnttab(); 2918 return (0); 2919 2920 error: 2921 lofs_discard_mnttab(); 2922 return (-1); 2923 } 2924