1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This module contains functions used to bring up and tear down the 31 * Virtual Platform: [un]mounting file-systems, [un]plumbing network 32 * interfaces, [un]configuring devices, establishing resource controls, 33 * and creating/destroying the zone in the kernel. These actions, on 34 * the way up, ready the zone; on the way down, they halt the zone. 35 * See the much longer block comment at the beginning of zoneadmd.c 36 * for a bigger picture of how the whole program functions. 37 */ 38 39 #include <sys/param.h> 40 #include <sys/mount.h> 41 #include <sys/mntent.h> 42 #include <sys/socket.h> 43 #include <sys/utsname.h> 44 #include <sys/types.h> 45 #include <sys/stat.h> 46 #include <sys/sockio.h> 47 #include <sys/stropts.h> 48 #include <sys/conf.h> 49 50 #include <inet/tcp.h> 51 #include <arpa/inet.h> 52 #include <netinet/in.h> 53 #include <net/route.h> 54 #include <netdb.h> 55 56 #include <stdio.h> 57 #include <errno.h> 58 #include <fcntl.h> 59 #include <unistd.h> 60 #include <rctl.h> 61 #include <stdlib.h> 62 #include <string.h> 63 #include <strings.h> 64 #include <wait.h> 65 #include <limits.h> 66 #include <libgen.h> 67 #include <zone.h> 68 #include <assert.h> 69 70 #include <sys/mntio.h> 71 #include <sys/mnttab.h> 72 #include <sys/fs/autofs.h> /* for _autofssys() */ 73 #include <sys/fs/lofs_info.h> 74 75 #include <pool.h> 76 #include <sys/pool.h> 77 78 #include <libzonecfg.h> 79 #include "zoneadmd.h" 80 81 #define V4_ADDR_LEN 32 82 #define V6_ADDR_LEN 128 83 84 /* 0755 is the default directory mode. */ 85 #define DEFAULT_DIR_MODE \ 86 (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) 87 88 #define IPD_DEFAULT_OPTS \ 89 MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES 90 91 #define DFSTYPES "/etc/dfs/fstypes" 92 93 /* 94 * A list of directories which should be created. 95 */ 96 97 struct dir_info { 98 char *dir_name; 99 mode_t dir_mode; 100 }; 101 102 /* 103 * The pathnames below are relative to the zonepath 104 */ 105 static struct dir_info dev_dirs[] = { 106 { "/dev", 0755 }, 107 { "/dev/dsk", 0755 }, 108 { "/dev/fd", 0555 }, 109 { "/dev/pts", 0755 }, 110 { "/dev/rdsk", 0755 }, 111 { "/dev/rmt", 0755 }, 112 { "/dev/sad", 0755 }, 113 { "/dev/swap", 0755 }, 114 { "/dev/term", 0755 }, 115 }; 116 117 /* 118 * A list of devices which should be symlinked to /dev/zconsole. 119 */ 120 121 struct symlink_info { 122 char *sl_source; 123 char *sl_target; 124 }; 125 126 /* 127 * The "source" paths are relative to the zonepath 128 */ 129 static struct symlink_info dev_symlinks[] = { 130 { "/dev/stderr", "./fd/2" }, 131 { "/dev/stdin", "./fd/0" }, 132 { "/dev/stdout", "./fd/1" }, 133 { "/dev/dtremote", "/dev/null" }, 134 { "/dev/console", "zconsole" }, 135 { "/dev/syscon", "zconsole" }, 136 { "/dev/sysmsg", "zconsole" }, 137 { "/dev/systty", "zconsole" }, 138 { "/dev/msglog", "zconsole" }, 139 }; 140 141 /* for routing socket */ 142 static int rts_seqno = 0; 143 144 /* from libsocket, not in any header file */ 145 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *); 146 147 /* 148 * Private autofs system call 149 */ 150 extern int _autofssys(int, void *); 151 152 static int 153 autofs_cleanup(zoneid_t zoneid) 154 { 155 /* 156 * Ask autofs to unmount all trigger nodes in the given zone. 157 */ 158 return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid)); 159 } 160 161 static int 162 make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode) 163 { 164 char path[MAXPATHLEN]; 165 struct stat st; 166 167 if (snprintf(path, sizeof (path), "%s%s", prefix, subdir) > 168 sizeof (path)) { 169 zerror(zlogp, B_FALSE, "pathname %s%s is too long", prefix, 170 subdir); 171 return (-1); 172 } 173 174 if (lstat(path, &st) == 0) { 175 /* 176 * We don't check the file mode since presumably the zone 177 * administrator may have had good reason to change the mode, 178 * and we don't need to second guess him. 179 */ 180 if (!S_ISDIR(st.st_mode)) { 181 zerror(zlogp, B_FALSE, "%s is not a directory", path); 182 return (-1); 183 } 184 } else if (mkdirp(path, mode) != 0) { 185 if (errno == EROFS) 186 zerror(zlogp, B_FALSE, "Could not mkdir %s.\nIt is on " 187 "a read-only file system in this local zone.\nMake " 188 "sure %s exists in the global zone.", path, subdir); 189 else 190 zerror(zlogp, B_TRUE, "mkdirp of %s failed", path); 191 return (-1); 192 } 193 return (0); 194 } 195 196 /* 197 * Make /dev and various directories underneath it. 198 */ 199 static int 200 make_dev_dirs(zlog_t *zlogp, const char *zonepath) 201 { 202 int i; 203 204 for (i = 0; i < sizeof (dev_dirs) / sizeof (struct dir_info); i++) { 205 if (make_one_dir(zlogp, zonepath, dev_dirs[i].dir_name, 206 dev_dirs[i].dir_mode) != 0) 207 return (-1); 208 } 209 return (0); 210 } 211 212 /* 213 * Make various sym-links underneath /dev. 214 */ 215 static int 216 make_dev_links(zlog_t *zlogp, char *zonepath) 217 { 218 int i; 219 220 for (i = 0; i < sizeof (dev_symlinks) / sizeof (struct symlink_info); 221 i++) { 222 char dev[MAXPATHLEN]; 223 struct stat st; 224 225 (void) snprintf(dev, sizeof (dev), "%s%s", zonepath, 226 dev_symlinks[i].sl_source); 227 if (lstat(dev, &st) == 0) { 228 /* 229 * Try not to call unlink(2) on directories, since that 230 * makes UFS unhappy. 231 */ 232 if (S_ISDIR(st.st_mode)) { 233 zerror(zlogp, B_FALSE, "symlink path %s is a " 234 "directory", dev_symlinks[i].sl_source); 235 return (-1); 236 } 237 (void) unlink(dev); 238 } 239 if (symlink(dev_symlinks[i].sl_target, dev) != 0) { 240 zerror(zlogp, B_TRUE, "could not setup %s symlink", 241 dev_symlinks[i].sl_source); 242 return (-1); 243 } 244 } 245 return (0); 246 } 247 248 /* 249 * Create various directories and sym-links under /dev. 250 */ 251 static int 252 create_dev_files(zlog_t *zlogp) 253 { 254 char zonepath[MAXPATHLEN]; 255 256 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { 257 zerror(zlogp, B_TRUE, "unable to determine zone root"); 258 return (-1); 259 } 260 261 if (make_dev_dirs(zlogp, zonepath) != 0) 262 return (-1); 263 if (make_dev_links(zlogp, zonepath) != 0) 264 return (-1); 265 return (0); 266 } 267 268 static void 269 free_remote_fstypes(char **types) 270 { 271 uint_t i; 272 273 if (types == NULL) 274 return; 275 for (i = 0; types[i] != NULL; i++) 276 free(types[i]); 277 free(types); 278 } 279 280 static char ** 281 get_remote_fstypes(zlog_t *zlogp) 282 { 283 char **types = NULL; 284 FILE *fp; 285 char buf[MAXPATHLEN]; 286 char fstype[MAXPATHLEN]; 287 uint_t lines = 0; 288 uint_t i; 289 290 if ((fp = fopen(DFSTYPES, "r")) == NULL) { 291 zerror(zlogp, B_TRUE, "failed to open %s", DFSTYPES); 292 return (NULL); 293 } 294 /* 295 * Count the number of lines 296 */ 297 while (fgets(buf, sizeof (buf), fp) != NULL) 298 lines++; 299 if (lines == 0) /* didn't read anything; empty file */ 300 goto out; 301 rewind(fp); 302 /* 303 * Allocate enough space for a NULL-terminated array. 304 */ 305 types = calloc(lines + 1, sizeof (char *)); 306 if (types == NULL) { 307 zerror(zlogp, B_TRUE, "memory allocation failed"); 308 goto out; 309 } 310 i = 0; 311 while (fgets(buf, sizeof (buf), fp) != NULL) { 312 /* LINTED - fstype is big enough to hold buf */ 313 if (sscanf(buf, "%s", fstype) == 0) { 314 zerror(zlogp, B_FALSE, "unable to parse %s", DFSTYPES); 315 free_remote_fstypes(types); 316 types = NULL; 317 goto out; 318 } 319 types[i] = strdup(fstype); 320 if (types[i] == NULL) { 321 zerror(zlogp, B_TRUE, "memory allocation failed"); 322 free_remote_fstypes(types); 323 types = NULL; 324 goto out; 325 } 326 i++; 327 } 328 out: 329 (void) fclose(fp); 330 return (types); 331 } 332 333 static boolean_t 334 is_remote_fstype(const char *fstype, char *const *remote_fstypes) 335 { 336 uint_t i; 337 338 if (remote_fstypes == NULL) 339 return (B_FALSE); 340 for (i = 0; remote_fstypes[i] != NULL; i++) { 341 if (strcmp(remote_fstypes[i], fstype) == 0) 342 return (B_TRUE); 343 } 344 return (B_FALSE); 345 } 346 347 static void 348 free_mnttable(struct mnttab *mnt_array, uint_t nelem) 349 { 350 uint_t i; 351 352 if (mnt_array == NULL) 353 return; 354 for (i = 0; i < nelem; i++) { 355 free(mnt_array[i].mnt_mountp); 356 free(mnt_array[i].mnt_fstype); 357 assert(mnt_array[i].mnt_special == NULL); 358 assert(mnt_array[i].mnt_mntopts == NULL); 359 assert(mnt_array[i].mnt_time == NULL); 360 } 361 free(mnt_array); 362 } 363 364 /* 365 * Build the mount table for the zone rooted at "zroot", storing the resulting 366 * array of struct mnttabs in "mnt_arrayp" and the number of elements in the 367 * array in "nelemp". 368 */ 369 static int 370 build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab, 371 struct mnttab **mnt_arrayp, uint_t *nelemp) 372 { 373 struct mnttab mnt; 374 struct mnttab *mnts; 375 struct mnttab *mnp; 376 uint_t nmnt; 377 378 rewind(mnttab); 379 resetmnttab(mnttab); 380 nmnt = 0; 381 mnts = NULL; 382 while (getmntent(mnttab, &mnt) == 0) { 383 struct mnttab *tmp_array; 384 385 if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0) 386 continue; 387 nmnt++; 388 tmp_array = realloc(mnts, nmnt * sizeof (*mnts)); 389 if (tmp_array == NULL) { 390 nmnt--; 391 free_mnttable(mnts, nmnt); 392 return (-1); 393 } 394 mnts = tmp_array; 395 mnp = &mnts[nmnt - 1]; 396 /* 397 * Zero out the fields we won't be using. 398 */ 399 mnp->mnt_special = NULL; 400 mnp->mnt_mntopts = NULL; 401 mnp->mnt_time = NULL; 402 403 mnp->mnt_mountp = strdup(mnt.mnt_mountp); 404 mnp->mnt_fstype = strdup(mnt.mnt_fstype); 405 if (mnp->mnt_mountp == NULL || 406 mnp->mnt_fstype == NULL) { 407 zerror(zlogp, B_TRUE, "memory allocation failed"); 408 free_mnttable(mnts, nmnt); 409 return (-1); 410 } 411 } 412 *mnt_arrayp = mnts; 413 *nelemp = nmnt; 414 return (0); 415 } 416 417 /* 418 * The general strategy for unmounting filesystems is as follows: 419 * 420 * - Remote filesystems may be dead, and attempting to contact them as 421 * part of a regular unmount may hang forever; we want to always try to 422 * forcibly unmount such filesystems and only fall back to regular 423 * unmounts if the filesystem doesn't support forced unmounts. 424 * 425 * - We don't want to unnecessarily corrupt metadata on local 426 * filesystems (ie UFS), so we want to start off with graceful unmounts, 427 * and only escalate to doing forced unmounts if we get stuck. 428 * 429 * We start off walking backwards through the mount table. This doesn't 430 * give us strict ordering but ensures that we try to unmount submounts 431 * first. We thus limit the number of failed umount2(2) calls. 432 * 433 * The mechanism for determining if we're stuck is to count the number 434 * of failed unmounts each iteration through the mount table. This 435 * gives us an upper bound on the number of filesystems which remain 436 * mounted (autofs trigger nodes are dealt with separately). If at the 437 * end of one unmount+autofs_cleanup cycle we still have the same number 438 * of mounts that we started out with, we're stuck and try a forced 439 * unmount. If that fails (filesystem doesn't support forced unmounts) 440 * then we bail and are unable to teardown the zone. If it succeeds, 441 * we're no longer stuck so we continue with our policy of trying 442 * graceful mounts first. 443 * 444 * Zone must be down (ie, no processes or threads active). 445 */ 446 static int 447 unmount_filesystems(zlog_t *zlogp) 448 { 449 zoneid_t zoneid; 450 int error = 0; 451 FILE *mnttab; 452 struct mnttab *mnts; 453 uint_t nmnt; 454 char zroot[MAXPATHLEN + 1]; 455 size_t zrootlen; 456 uint_t oldcount = UINT_MAX; 457 boolean_t stuck = B_FALSE; 458 char **remote_fstypes = NULL; 459 460 if ((zoneid = getzoneidbyname(zone_name)) == -1) { 461 zerror(zlogp, B_TRUE, "unable to find zoneid"); 462 return (-1); 463 } 464 465 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 466 zerror(zlogp, B_FALSE, "unable to determine zone root"); 467 return (-1); 468 } 469 470 (void) strcat(zroot, "/"); 471 zrootlen = strlen(zroot); 472 473 if ((mnttab = fopen(MNTTAB, "r")) == NULL) { 474 zerror(zlogp, B_TRUE, "failed to open %s", MNTTAB); 475 return (-1); 476 } 477 /* 478 * Use our hacky mntfs ioctl so we see everything, even mounts with 479 * MS_NOMNTTAB. 480 */ 481 if (ioctl(fileno(mnttab), MNTIOC_SHOWHIDDEN, NULL) < 0) { 482 zerror(zlogp, B_TRUE, "unable to configure %s", MNTTAB); 483 error++; 484 goto out; 485 } 486 487 /* 488 * Build the list of remote fstypes so we know which ones we 489 * should forcibly unmount. 490 */ 491 remote_fstypes = get_remote_fstypes(zlogp); 492 for (; /* ever */; ) { 493 uint_t newcount = 0; 494 boolean_t unmounted; 495 struct mnttab *mnp; 496 char *path; 497 uint_t i; 498 499 mnts = NULL; 500 nmnt = 0; 501 /* 502 * MNTTAB gives us a way to walk through mounted 503 * filesystems; we need to be able to walk them in 504 * reverse order, so we build a list of all mounted 505 * filesystems. 506 */ 507 if (build_mnttable(zlogp, zroot, zrootlen, mnttab, &mnts, 508 &nmnt) != 0) { 509 error++; 510 goto out; 511 } 512 for (i = 0; i < nmnt; i++) { 513 mnp = &mnts[nmnt - i - 1]; /* access in reverse order */ 514 path = mnp->mnt_mountp; 515 unmounted = B_FALSE; 516 /* 517 * Try forced unmount first for remote filesystems. 518 * 519 * Not all remote filesystems support forced unmounts, 520 * so if this fails (ENOTSUP) we'll continue on 521 * and try a regular unmount. 522 */ 523 if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) { 524 if (umount2(path, MS_FORCE) == 0) 525 unmounted = B_TRUE; 526 } 527 /* 528 * Try forced unmount if we're stuck. 529 */ 530 if (stuck) { 531 if (umount2(path, MS_FORCE) == 0) { 532 unmounted = B_TRUE; 533 stuck = B_FALSE; 534 } else { 535 /* 536 * The first failure indicates a 537 * mount we won't be able to get 538 * rid of automatically, so we 539 * bail. 540 */ 541 error++; 542 zerror(zlogp, B_FALSE, 543 "unable to unmount '%s'", path); 544 free_mnttable(mnts, nmnt); 545 goto out; 546 } 547 } 548 /* 549 * Try regular unmounts for everything else. 550 */ 551 if (!unmounted && umount2(path, 0) != 0) 552 newcount++; 553 } 554 free_mnttable(mnts, nmnt); 555 556 if (newcount == 0) 557 break; 558 if (newcount >= oldcount) { 559 /* 560 * Last round didn't unmount anything; we're stuck and 561 * should start trying forced unmounts. 562 */ 563 stuck = B_TRUE; 564 } 565 oldcount = newcount; 566 567 /* 568 * Autofs doesn't let you unmount its trigger nodes from 569 * userland so we have to tell the kernel to cleanup for us. 570 */ 571 if (autofs_cleanup(zoneid) != 0) { 572 zerror(zlogp, B_TRUE, "unable to remove autofs nodes"); 573 error++; 574 goto out; 575 } 576 } 577 578 out: 579 free_remote_fstypes(remote_fstypes); 580 (void) fclose(mnttab); 581 return (error ? -1 : 0); 582 } 583 584 static int 585 fs_compare(const void *m1, const void *m2) 586 { 587 struct zone_fstab *i = (struct zone_fstab *)m1; 588 struct zone_fstab *j = (struct zone_fstab *)m2; 589 590 return (strcmp(i->zone_fs_dir, j->zone_fs_dir)); 591 } 592 593 /* 594 * Fork and exec (and wait for) the mentioned binary with the provided 595 * arguments. Returns (-1) if something went wrong with fork(2) or exec(2), 596 * returns the exit status otherwise. 597 * 598 * If we were unable to exec the provided pathname (for whatever 599 * reason), we return the special token ZEXIT_EXEC. The current value 600 * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the 601 * consumers of this function; any future consumers must make sure this 602 * remains the case. 603 */ 604 static int 605 forkexec(zlog_t *zlogp, const char *path, char *const argv[]) 606 { 607 pid_t child_pid; 608 int child_status = 0; 609 610 /* 611 * Do not let another thread localize a message while we are forking. 612 */ 613 (void) mutex_lock(&msglock); 614 child_pid = fork(); 615 (void) mutex_unlock(&msglock); 616 if (child_pid == -1) { 617 zerror(zlogp, B_TRUE, "could not fork for %s", argv[0]); 618 return (-1); 619 } else if (child_pid == 0) { 620 closefrom(0); 621 (void) execv(path, argv); 622 /* 623 * Since we are in the child, there is no point calling zerror() 624 * since there is nobody waiting to consume it. So exit with a 625 * special code that the parent will recognize and call zerror() 626 * accordingly. 627 */ 628 629 _exit(ZEXIT_EXEC); 630 } else { 631 (void) waitpid(child_pid, &child_status, 0); 632 } 633 634 if (WIFSIGNALED(child_status)) { 635 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 636 "signal %d", path, WTERMSIG(child_status)); 637 return (-1); 638 } 639 assert(WIFEXITED(child_status)); 640 if (WEXITSTATUS(child_status) == ZEXIT_EXEC) { 641 zerror(zlogp, B_FALSE, "failed to exec %s", path); 642 return (-1); 643 } 644 return (WEXITSTATUS(child_status)); 645 } 646 647 static int 648 dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev) 649 { 650 char cmdbuf[MAXPATHLEN]; 651 char *argv[4]; 652 int status; 653 654 /* 655 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but 656 * that would cost us an extra fork/exec without buying us anything. 657 */ 658 if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype) 659 > sizeof (cmdbuf)) { 660 zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); 661 return (-1); 662 } 663 664 argv[0] = "fsck"; 665 argv[1] = "-m"; 666 argv[2] = (char *)rawdev; 667 argv[3] = NULL; 668 669 status = forkexec(zlogp, cmdbuf, argv); 670 if (status == 0 || status == -1) 671 return (status); 672 zerror(zlogp, B_FALSE, "fsck of '%s' failed with exit status %d; " 673 "run fsck manually", rawdev, status); 674 return (-1); 675 } 676 677 static int 678 domount(zlog_t *zlogp, const char *fstype, const char *opts, 679 const char *special, const char *directory) 680 { 681 char cmdbuf[MAXPATHLEN]; 682 char *argv[6]; 683 int status; 684 685 /* 686 * We could alternatively have called /usr/sbin/mount -F <fstype>, but 687 * that would cost us an extra fork/exec without buying us anything. 688 */ 689 if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype) 690 > sizeof (cmdbuf)) { 691 zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); 692 return (-1); 693 } 694 argv[0] = "mount"; 695 if (opts[0] == '\0') { 696 argv[1] = (char *)special; 697 argv[2] = (char *)directory; 698 argv[3] = NULL; 699 } else { 700 argv[1] = "-o"; 701 argv[2] = (char *)opts; 702 argv[3] = (char *)special; 703 argv[4] = (char *)directory; 704 argv[5] = NULL; 705 } 706 707 status = forkexec(zlogp, cmdbuf, argv); 708 if (status == 0 || status == -1) 709 return (status); 710 if (opts[0] == '\0') 711 zerror(zlogp, B_FALSE, "\"%s %s %s\" " 712 "failed with exit code %d", 713 cmdbuf, special, directory, status); 714 else 715 zerror(zlogp, B_FALSE, "\"%s -o %s %s %s\" " 716 "failed with exit code %d", 717 cmdbuf, opts, special, directory, status); 718 return (-1); 719 } 720 721 /* 722 * Make sure if a given path exists, it is not a sym-link, and is a directory. 723 */ 724 static int 725 check_path(zlog_t *zlogp, const char *path) 726 { 727 struct stat statbuf; 728 char respath[MAXPATHLEN]; 729 int res; 730 731 if (lstat(path, &statbuf) != 0) { 732 if (errno == ENOENT) 733 return (0); 734 zerror(zlogp, B_TRUE, "can't stat %s", path); 735 return (-1); 736 } 737 if (S_ISLNK(statbuf.st_mode)) { 738 zerror(zlogp, B_FALSE, "%s is a symlink", path); 739 return (-1); 740 } 741 if (!S_ISDIR(statbuf.st_mode)) { 742 zerror(zlogp, B_FALSE, "%s is not a directory", path); 743 return (-1); 744 } 745 if ((res = resolvepath(path, respath, sizeof (respath))) == -1) { 746 zerror(zlogp, B_TRUE, "unable to resolve path %s", path); 747 return (-1); 748 } 749 respath[res] = '\0'; 750 if (strcmp(path, respath) != 0) { 751 /* 752 * We don't like ".."s and "."s throwing us off 753 */ 754 zerror(zlogp, B_FALSE, "%s is not a canonical path", path); 755 return (-1); 756 } 757 return (0); 758 } 759 760 /* 761 * Check every component of rootpath/relpath. If any component fails (ie, 762 * exists but isn't the canonical path to a directory), it is returned in 763 * badpath, which is assumed to be at least of size MAXPATHLEN. 764 * 765 * Relpath must begin with '/'. 766 */ 767 static boolean_t 768 valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *relpath) 769 { 770 char abspath[MAXPATHLEN], *slashp; 771 772 /* 773 * Make sure abspath has at least one '/' after its rootpath 774 * component, and ends with '/'. 775 */ 776 if (snprintf(abspath, sizeof (abspath), "%s%s/", rootpath, relpath) > 777 sizeof (abspath)) { 778 zerror(zlogp, B_FALSE, "pathname %s%s is too long", rootpath, 779 relpath); 780 return (B_FALSE); 781 } 782 783 slashp = &abspath[strlen(rootpath)]; 784 assert(*slashp == '/'); 785 do { 786 *slashp = '\0'; 787 if (check_path(zlogp, abspath) != 0) 788 return (B_FALSE); 789 *slashp = '/'; 790 slashp++; 791 } while ((slashp = strchr(slashp, '/')) != NULL); 792 return (B_TRUE); 793 } 794 795 static int 796 mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath) 797 { 798 char path[MAXPATHLEN]; 799 char optstr[MAX_MNTOPT_STR]; 800 zone_fsopt_t *optptr; 801 802 if (!valid_mount_path(zlogp, rootpath, fsptr->zone_fs_dir)) { 803 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 804 rootpath, fsptr->zone_fs_dir); 805 return (-1); 806 } 807 808 if (make_one_dir(zlogp, rootpath, fsptr->zone_fs_dir, 809 DEFAULT_DIR_MODE) != 0) 810 return (-1); 811 812 (void) snprintf(path, sizeof (path), "%s%s", rootpath, 813 fsptr->zone_fs_dir); 814 815 if (strlen(fsptr->zone_fs_special) == 0) { 816 /* 817 * A zero-length special is how we distinguish IPDs from 818 * general-purpose FSs. 819 */ 820 if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, 821 fsptr->zone_fs_dir, path) != 0) { 822 zerror(zlogp, B_TRUE, "failed to loopback mount %s", 823 fsptr->zone_fs_dir); 824 return (-1); 825 } 826 return (0); 827 } 828 829 /* 830 * In general the strategy here is to do just as much verification as 831 * necessary to avoid crashing or otherwise doing something bad; if the 832 * administrator initiated the operation via zoneadm(1m), he'll get 833 * auto-verification which will let him know what's wrong. If he 834 * modifies the zone configuration of a running zone and doesn't attempt 835 * to verify that it's OK we won't crash but won't bother trying to be 836 * too helpful either. zoneadm verify is only a couple keystrokes away. 837 */ 838 if (!zonecfg_valid_fs_type(fsptr->zone_fs_type)) { 839 zerror(zlogp, B_FALSE, "cannot mount %s on %s: " 840 "invalid file-system type %s", fsptr->zone_fs_special, 841 fsptr->zone_fs_dir, fsptr->zone_fs_type); 842 return (-1); 843 } 844 845 /* 846 * Run 'fsck -m' if there's a device to fsck. 847 */ 848 if (fsptr->zone_fs_raw[0] != '\0' && 849 dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_raw) != 0) 850 return (-1); 851 852 /* 853 * Build up mount option string. 854 */ 855 optstr[0] = '\0'; 856 if (fsptr->zone_fs_options != NULL) { 857 (void) strlcpy(optstr, fsptr->zone_fs_options->zone_fsopt_opt, 858 sizeof (optstr)); 859 for (optptr = fsptr->zone_fs_options->zone_fsopt_next; 860 optptr != NULL; optptr = optptr->zone_fsopt_next) { 861 (void) strlcat(optstr, ",", sizeof (optstr)); 862 (void) strlcat(optstr, optptr->zone_fsopt_opt, 863 sizeof (optstr)); 864 } 865 } 866 return (domount(zlogp, fsptr->zone_fs_type, optstr, 867 fsptr->zone_fs_special, path)); 868 } 869 870 static void 871 free_fs_data(struct zone_fstab *fsarray, uint_t nelem) 872 { 873 uint_t i; 874 875 if (fsarray == NULL) 876 return; 877 for (i = 0; i < nelem; i++) 878 zonecfg_free_fs_option_list(fsarray[i].zone_fs_options); 879 free(fsarray); 880 } 881 882 static int 883 mount_filesystems(zlog_t *zlogp) 884 { 885 char rootpath[MAXPATHLEN]; 886 char zonepath[MAXPATHLEN]; 887 int num_fs = 0, i; 888 struct zone_fstab fstab, *fs_ptr = NULL, *tmp_ptr; 889 struct zone_fstab *fsp; 890 zone_dochandle_t handle = NULL; 891 zone_state_t zstate; 892 893 if (zone_get_state(zone_name, &zstate) != Z_OK || 894 zstate != ZONE_STATE_READY) { 895 zerror(zlogp, B_FALSE, 896 "zone must be in '%s' state to mount file-systems", 897 zone_state_str(ZONE_STATE_READY)); 898 goto bad; 899 } 900 901 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { 902 zerror(zlogp, B_TRUE, "unable to determine zone path"); 903 goto bad; 904 } 905 906 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { 907 zerror(zlogp, B_TRUE, "unable to determine zone root"); 908 goto bad; 909 } 910 911 if ((handle = zonecfg_init_handle()) == NULL) { 912 zerror(zlogp, B_TRUE, 913 "could not get zone configuration handle"); 914 goto bad; 915 } 916 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK || 917 zonecfg_setfsent(handle) != Z_OK) { 918 zerror(zlogp, B_FALSE, "invalid configuration"); 919 goto bad; 920 } 921 922 /* 923 * /dev in the zone is loopback'd from the external /dev repository, 924 * in order to provide a largely read-only semantic. But because 925 * processes in the zone need to be able to chown, chmod, etc. zone 926 * /dev files, we can't use a 'ro' lofs mount. Instead we use a 927 * special mode just for zones, "zonedevfs". 928 * 929 * In the future we should front /dev with a full-fledged filesystem. 930 */ 931 num_fs++; 932 if ((tmp_ptr = realloc(fs_ptr, num_fs * sizeof (*tmp_ptr))) == NULL) { 933 zerror(zlogp, B_TRUE, "memory allocation failed"); 934 num_fs--; 935 goto bad; 936 } 937 fs_ptr = tmp_ptr; 938 fsp = &fs_ptr[num_fs - 1]; 939 (void) strlcpy(fsp->zone_fs_dir, "/dev", sizeof (fsp->zone_fs_dir)); 940 (void) snprintf(fsp->zone_fs_special, sizeof (fsp->zone_fs_special), 941 "%s/dev", zonepath); 942 fsp->zone_fs_raw[0] = '\0'; 943 (void) strlcpy(fsp->zone_fs_type, MNTTYPE_LOFS, 944 sizeof (fsp->zone_fs_type)); 945 fsp->zone_fs_options = NULL; 946 if (zonecfg_add_fs_option(fsp, MNTOPT_LOFS_ZONEDEVFS) != Z_OK) { 947 zerror(zlogp, B_FALSE, "error adding property"); 948 goto bad; 949 } 950 951 /* 952 * Iterate through the rest of the filesystems, first the IPDs, then 953 * the general FSs. Sort them all, then mount them in sorted order. 954 * This is to make sure the higher level directories (e.g., /usr) 955 * get mounted before any beneath them (e.g., /usr/local). 956 */ 957 if (zonecfg_setipdent(handle) != Z_OK) { 958 zerror(zlogp, B_FALSE, "invalid configuration"); 959 goto bad; 960 } 961 while (zonecfg_getipdent(handle, &fstab) == Z_OK) { 962 num_fs++; 963 if ((tmp_ptr = realloc(fs_ptr, 964 num_fs * sizeof (*tmp_ptr))) == NULL) { 965 zerror(zlogp, B_TRUE, "memory allocation failed"); 966 num_fs--; 967 (void) zonecfg_endipdent(handle); 968 goto bad; 969 } 970 fs_ptr = tmp_ptr; 971 fsp = &fs_ptr[num_fs - 1]; 972 /* 973 * IPDs logically only have a mount point; all other properties 974 * are implied. 975 */ 976 (void) strlcpy(fsp->zone_fs_dir, 977 fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir)); 978 fsp->zone_fs_special[0] = '\0'; 979 fsp->zone_fs_raw[0] = '\0'; 980 fsp->zone_fs_type[0] = '\0'; 981 fsp->zone_fs_options = NULL; 982 } 983 (void) zonecfg_endipdent(handle); 984 985 if (zonecfg_setfsent(handle) != Z_OK) { 986 zerror(zlogp, B_FALSE, "invalid configuration"); 987 goto bad; 988 } 989 while (zonecfg_getfsent(handle, &fstab) == Z_OK) { 990 num_fs++; 991 if ((tmp_ptr = realloc(fs_ptr, 992 num_fs * sizeof (*tmp_ptr))) == NULL) { 993 zerror(zlogp, B_TRUE, "memory allocation failed"); 994 num_fs--; 995 (void) zonecfg_endfsent(handle); 996 goto bad; 997 } 998 fs_ptr = tmp_ptr; 999 fsp = &fs_ptr[num_fs - 1]; 1000 (void) strlcpy(fsp->zone_fs_dir, 1001 fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir)); 1002 (void) strlcpy(fsp->zone_fs_special, fstab.zone_fs_special, 1003 sizeof (fsp->zone_fs_special)); 1004 (void) strlcpy(fsp->zone_fs_raw, fstab.zone_fs_raw, 1005 sizeof (fsp->zone_fs_raw)); 1006 (void) strlcpy(fsp->zone_fs_type, fstab.zone_fs_type, 1007 sizeof (fsp->zone_fs_type)); 1008 fsp->zone_fs_options = fstab.zone_fs_options; 1009 } 1010 (void) zonecfg_endfsent(handle); 1011 zonecfg_fini_handle(handle); 1012 handle = NULL; 1013 1014 qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare); 1015 for (i = 0; i < num_fs; i++) { 1016 if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0) 1017 goto bad; 1018 } 1019 free_fs_data(fs_ptr, num_fs); 1020 1021 /* 1022 * Everything looks fine. 1023 */ 1024 return (0); 1025 1026 bad: 1027 if (handle != NULL) 1028 zonecfg_fini_handle(handle); 1029 free_fs_data(fs_ptr, num_fs); 1030 return (-1); 1031 } 1032 1033 /* caller makes sure neither parameter is NULL */ 1034 static int 1035 addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr) 1036 { 1037 int prefixlen; 1038 1039 prefixlen = atoi(prefixstr); 1040 if (prefixlen < 0 || prefixlen > maxprefixlen) 1041 return (1); 1042 while (prefixlen > 0) { 1043 if (prefixlen >= 8) { 1044 *maskstr++ = 0xFF; 1045 prefixlen -= 8; 1046 continue; 1047 } 1048 *maskstr |= 1 << (8 - prefixlen); 1049 prefixlen--; 1050 } 1051 return (0); 1052 } 1053 1054 /* 1055 * Tear down all interfaces belonging to the given zone. This should 1056 * be called with the zone in a state other than "running", so that 1057 * interfaces can't be assigned to the zone after this returns. 1058 * 1059 * If anything goes wrong, log an error message and return an error. 1060 */ 1061 static int 1062 unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id) 1063 { 1064 struct lifnum lifn; 1065 struct lifconf lifc; 1066 struct lifreq *lifrp, lifrl; 1067 int64_t lifc_flags = LIFC_NOXMIT | LIFC_ALLZONES; 1068 int num_ifs, s, i, ret_code = 0; 1069 uint_t bufsize; 1070 char *buf = NULL; 1071 1072 if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { 1073 zerror(zlogp, B_TRUE, "could not get socket"); 1074 ret_code = -1; 1075 goto bad; 1076 } 1077 lifn.lifn_family = AF_UNSPEC; 1078 lifn.lifn_flags = (int)lifc_flags; 1079 if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) { 1080 zerror(zlogp, B_TRUE, 1081 "could not determine number of interfaces"); 1082 ret_code = -1; 1083 goto bad; 1084 } 1085 num_ifs = lifn.lifn_count; 1086 bufsize = num_ifs * sizeof (struct lifreq); 1087 if ((buf = malloc(bufsize)) == NULL) { 1088 zerror(zlogp, B_TRUE, "memory allocation failed"); 1089 ret_code = -1; 1090 goto bad; 1091 } 1092 lifc.lifc_family = AF_UNSPEC; 1093 lifc.lifc_flags = (int)lifc_flags; 1094 lifc.lifc_len = bufsize; 1095 lifc.lifc_buf = buf; 1096 if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) { 1097 zerror(zlogp, B_TRUE, "could not get configured interfaces"); 1098 ret_code = -1; 1099 goto bad; 1100 } 1101 lifrp = lifc.lifc_req; 1102 for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, lifrp++) { 1103 (void) close(s); 1104 if ((s = socket(lifrp->lifr_addr.ss_family, SOCK_DGRAM, 0)) < 1105 0) { 1106 zerror(zlogp, B_TRUE, "%s: could not get socket", 1107 lifrl.lifr_name); 1108 ret_code = -1; 1109 continue; 1110 } 1111 (void) memset(&lifrl, 0, sizeof (lifrl)); 1112 (void) strncpy(lifrl.lifr_name, lifrp->lifr_name, 1113 sizeof (lifrl.lifr_name)); 1114 if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifrl) < 0) { 1115 zerror(zlogp, B_TRUE, 1116 "%s: could not determine zone interface belongs to", 1117 lifrl.lifr_name); 1118 ret_code = -1; 1119 continue; 1120 } 1121 if (lifrl.lifr_zoneid == zone_id) { 1122 if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) { 1123 zerror(zlogp, B_TRUE, 1124 "%s: could not remove interface", 1125 lifrl.lifr_name); 1126 ret_code = -1; 1127 continue; 1128 } 1129 } 1130 } 1131 bad: 1132 if (s > 0) 1133 (void) close(s); 1134 if (buf) 1135 free(buf); 1136 return (ret_code); 1137 } 1138 1139 static union sockunion { 1140 struct sockaddr sa; 1141 struct sockaddr_in sin; 1142 struct sockaddr_dl sdl; 1143 struct sockaddr_in6 sin6; 1144 } so_dst, so_ifp; 1145 1146 static struct { 1147 struct rt_msghdr hdr; 1148 char space[512]; 1149 } rtmsg; 1150 1151 static int 1152 salen(struct sockaddr *sa) 1153 { 1154 switch (sa->sa_family) { 1155 case AF_INET: 1156 return (sizeof (struct sockaddr_in)); 1157 case AF_LINK: 1158 return (sizeof (struct sockaddr_dl)); 1159 case AF_INET6: 1160 return (sizeof (struct sockaddr_in6)); 1161 default: 1162 return (sizeof (struct sockaddr)); 1163 } 1164 } 1165 1166 #define ROUNDUP_LONG(a) \ 1167 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long)) 1168 1169 /* 1170 * Look up which zone is using a given IP address. The address in question 1171 * is expected to have been stuffed into the structure to which lifr points 1172 * via a previous SIOCGLIFADDR ioctl(). 1173 * 1174 * This is done using black router socket magic. 1175 * 1176 * Return the name of the zone on success or NULL on failure. 1177 * 1178 * This is a lot of code for a simple task; a new ioctl request to take care 1179 * of this might be a useful RFE. 1180 */ 1181 1182 static char * 1183 who_is_using(zlog_t *zlogp, struct lifreq *lifr) 1184 { 1185 static char answer[ZONENAME_MAX]; 1186 pid_t pid; 1187 int s, rlen, l, i; 1188 char *cp = rtmsg.space; 1189 struct sockaddr_dl *ifp = NULL; 1190 struct sockaddr *sa; 1191 char save_if_name[LIFNAMSIZ]; 1192 1193 answer[0] = '\0'; 1194 1195 pid = getpid(); 1196 if ((s = socket(PF_ROUTE, SOCK_RAW, 0)) < 0) { 1197 zerror(zlogp, B_TRUE, "could not get routing socket"); 1198 return (NULL); 1199 } 1200 1201 if (lifr->lifr_addr.ss_family == AF_INET) { 1202 struct sockaddr_in *sin4; 1203 1204 so_dst.sa.sa_family = AF_INET; 1205 sin4 = (struct sockaddr_in *)&lifr->lifr_addr; 1206 so_dst.sin.sin_addr = sin4->sin_addr; 1207 } else { 1208 struct sockaddr_in6 *sin6; 1209 1210 so_dst.sa.sa_family = AF_INET6; 1211 sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr; 1212 so_dst.sin6.sin6_addr = sin6->sin6_addr; 1213 } 1214 1215 so_ifp.sa.sa_family = AF_LINK; 1216 1217 (void) memset(&rtmsg, 0, sizeof (rtmsg)); 1218 rtmsg.hdr.rtm_type = RTM_GET; 1219 rtmsg.hdr.rtm_flags = RTF_UP | RTF_HOST; 1220 rtmsg.hdr.rtm_version = RTM_VERSION; 1221 rtmsg.hdr.rtm_seq = ++rts_seqno; 1222 rtmsg.hdr.rtm_addrs = RTA_IFP | RTA_DST; 1223 1224 l = ROUNDUP_LONG(salen(&so_dst.sa)); 1225 (void) memmove(cp, &(so_dst), l); 1226 cp += l; 1227 l = ROUNDUP_LONG(salen(&so_ifp.sa)); 1228 (void) memmove(cp, &(so_ifp), l); 1229 cp += l; 1230 1231 rtmsg.hdr.rtm_msglen = l = cp - (char *)&rtmsg; 1232 1233 if ((rlen = write(s, &rtmsg, l)) < 0) { 1234 zerror(zlogp, B_TRUE, "writing to routing socket"); 1235 return (NULL); 1236 } else if (rlen < (int)rtmsg.hdr.rtm_msglen) { 1237 zerror(zlogp, B_TRUE, 1238 "write to routing socket got only %d for len\n", rlen); 1239 return (NULL); 1240 } 1241 do { 1242 l = read(s, &rtmsg, sizeof (rtmsg)); 1243 } while (l > 0 && (rtmsg.hdr.rtm_seq != rts_seqno || 1244 rtmsg.hdr.rtm_pid != pid)); 1245 if (l < 0) { 1246 zerror(zlogp, B_TRUE, "reading from routing socket"); 1247 return (NULL); 1248 } 1249 1250 if (rtmsg.hdr.rtm_version != RTM_VERSION) { 1251 zerror(zlogp, B_FALSE, 1252 "routing message version %d not understood", 1253 rtmsg.hdr.rtm_version); 1254 return (NULL); 1255 } 1256 if (rtmsg.hdr.rtm_msglen != (ushort_t)l) { 1257 zerror(zlogp, B_FALSE, "message length mismatch, " 1258 "expected %d bytes, returned %d bytes", 1259 rtmsg.hdr.rtm_msglen, l); 1260 return (NULL); 1261 } 1262 if (rtmsg.hdr.rtm_errno != 0) { 1263 errno = rtmsg.hdr.rtm_errno; 1264 zerror(zlogp, B_TRUE, "RTM_GET routing socket message"); 1265 return (NULL); 1266 } 1267 if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) { 1268 zerror(zlogp, B_FALSE, "interface not found"); 1269 return (NULL); 1270 } 1271 cp = ((char *)(&rtmsg.hdr + 1)); 1272 for (i = 1; i != 0; i <<= 1) { 1273 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1274 sa = (struct sockaddr *)cp; 1275 if (i != RTA_IFP) { 1276 if ((i & rtmsg.hdr.rtm_addrs) != 0) 1277 cp += ROUNDUP_LONG(salen(sa)); 1278 continue; 1279 } 1280 if (sa->sa_family == AF_LINK && 1281 ((struct sockaddr_dl *)sa)->sdl_nlen != 0) 1282 ifp = (struct sockaddr_dl *)sa; 1283 break; 1284 } 1285 if (ifp == NULL) { 1286 zerror(zlogp, B_FALSE, "interface could not be determined"); 1287 return (NULL); 1288 } 1289 1290 /* 1291 * We need to set the I/F name to what we got above, then do the 1292 * appropriate ioctl to get its zone name. But lifr->lifr_name is 1293 * used by the calling function to do a REMOVEIF, so if we leave the 1294 * "good" zone's I/F name in place, *that* I/F will be removed instead 1295 * of the bad one. So we save the old (bad) I/F name before over- 1296 * writing it and doing the ioctl, then restore it after the ioctl. 1297 */ 1298 (void) strlcpy(save_if_name, lifr->lifr_name, sizeof (save_if_name)); 1299 (void) strncpy(lifr->lifr_name, ifp->sdl_data, ifp->sdl_nlen); 1300 lifr->lifr_name[ifp->sdl_nlen] = '\0'; 1301 i = ioctl(s, SIOCGLIFZONE, lifr); 1302 (void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name)); 1303 if (i < 0) { 1304 zerror(zlogp, B_TRUE, 1305 "%s: could not determine the zone interface belongs to", 1306 lifr->lifr_name); 1307 return (NULL); 1308 } 1309 if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0) 1310 (void) snprintf(answer, sizeof (answer), "%d", 1311 lifr->lifr_zoneid); 1312 1313 if (strlen(answer) > 0) 1314 return (answer); 1315 return (NULL); 1316 } 1317 1318 typedef struct mcast_rtmsg_s { 1319 struct rt_msghdr m_rtm; 1320 union { 1321 struct { 1322 struct sockaddr_in m_dst; 1323 struct sockaddr_in m_gw; 1324 struct sockaddr_in m_netmask; 1325 } m_v4; 1326 struct { 1327 struct sockaddr_in6 m_dst; 1328 struct sockaddr_in6 m_gw; 1329 struct sockaddr_in6 m_netmask; 1330 } m_v6; 1331 } m_u; 1332 } mcast_rtmsg_t; 1333 #define m_dst4 m_u.m_v4.m_dst 1334 #define m_dst6 m_u.m_v6.m_dst 1335 #define m_gw4 m_u.m_v4.m_gw 1336 #define m_gw6 m_u.m_v6.m_gw 1337 #define m_netmask4 m_u.m_v4.m_netmask 1338 #define m_netmask6 m_u.m_v6.m_netmask 1339 1340 /* 1341 * Configures a single interface: a new virtual interface is added, based on 1342 * the physical interface nwiftabptr->zone_nwif_physical, with the address 1343 * specified in nwiftabptr->zone_nwif_address, for zone zone_id. Note that 1344 * the "address" can be an IPv6 address (with a /prefixlength required), an 1345 * IPv4 address (with a /prefixlength optional), or a name; for the latter, 1346 * an IPv4 name-to-address resolution will be attempted. 1347 * 1348 * A default interface route for multicast is created on the first IPv4 and 1349 * IPv6 interfaces (that have the IFF_MULTICAST flag set), respectively. 1350 * This should really be done in the init scripts if we ever allow zones to 1351 * modify the routing tables. 1352 * 1353 * If anything goes wrong, we log an detailed error message, attempt to tear 1354 * down whatever we set up and return an error. 1355 */ 1356 static int 1357 configure_one_interface(zlog_t *zlogp, zoneid_t zone_id, 1358 struct zone_nwiftab *nwiftabptr, boolean_t *mcast_rt_v4_setp, 1359 boolean_t *mcast_rt_v6_setp) 1360 { 1361 struct lifreq lifr; 1362 struct sockaddr_in netmask4; 1363 struct sockaddr_in6 netmask6; 1364 struct in_addr in4; 1365 struct in6_addr in6; 1366 sa_family_t af; 1367 char *slashp = strchr(nwiftabptr->zone_nwif_address, '/'); 1368 mcast_rtmsg_t mcast_rtmsg; 1369 int s; 1370 int rs; 1371 int rlen; 1372 boolean_t got_netmask = B_FALSE; 1373 char addrstr4[INET_ADDRSTRLEN]; 1374 int res; 1375 1376 res = zonecfg_valid_net_address(nwiftabptr->zone_nwif_address, &lifr); 1377 if (res != Z_OK) { 1378 zerror(zlogp, B_FALSE, "%s: %s", zonecfg_strerror(res), 1379 nwiftabptr->zone_nwif_address); 1380 return (-1); 1381 } 1382 af = lifr.lifr_addr.ss_family; 1383 if (af == AF_INET) 1384 in4 = ((struct sockaddr_in *)(&lifr.lifr_addr))->sin_addr; 1385 else 1386 in6 = ((struct sockaddr_in6 *)(&lifr.lifr_addr))->sin6_addr; 1387 1388 if ((s = socket(af, SOCK_DGRAM, 0)) < 0) { 1389 zerror(zlogp, B_TRUE, "could not get socket"); 1390 return (-1); 1391 } 1392 1393 (void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical, 1394 sizeof (lifr.lifr_name)); 1395 if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) { 1396 zerror(zlogp, B_TRUE, "%s: could not add interface", 1397 lifr.lifr_name); 1398 (void) close(s); 1399 return (-1); 1400 } 1401 1402 if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) { 1403 zerror(zlogp, B_TRUE, 1404 "%s: could not set IP address to %s", 1405 lifr.lifr_name, nwiftabptr->zone_nwif_address); 1406 goto bad; 1407 } 1408 1409 /* Preserve literal IPv4 address for later potential printing. */ 1410 if (af == AF_INET) 1411 (void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN); 1412 1413 lifr.lifr_zoneid = zone_id; 1414 if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) { 1415 zerror(zlogp, B_TRUE, "%s: could not place interface into zone", 1416 lifr.lifr_name); 1417 goto bad; 1418 } 1419 1420 if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) { 1421 got_netmask = B_TRUE; /* default setting will be correct */ 1422 } else { 1423 if (af == AF_INET) { 1424 /* 1425 * The IPv4 netmask can be determined either 1426 * directly if a prefix length was supplied with 1427 * the address or via the netmasks database. Not 1428 * being able to determine it is a common failure, 1429 * but it often is not fatal to operation of the 1430 * interface. In that case, a warning will be 1431 * printed after the rest of the interface's 1432 * parameters have been configured. 1433 */ 1434 (void) memset(&netmask4, 0, sizeof (netmask4)); 1435 if (slashp != NULL) { 1436 if (addr2netmask(slashp + 1, V4_ADDR_LEN, 1437 (uchar_t *)&netmask4.sin_addr) != 0) { 1438 *slashp = '/'; 1439 zerror(zlogp, B_FALSE, 1440 "%s: invalid prefix length in %s", 1441 lifr.lifr_name, 1442 nwiftabptr->zone_nwif_address); 1443 goto bad; 1444 } 1445 got_netmask = B_TRUE; 1446 } else if (getnetmaskbyaddr(in4, 1447 &netmask4.sin_addr) == 0) { 1448 got_netmask = B_TRUE; 1449 } 1450 if (got_netmask) { 1451 netmask4.sin_family = af; 1452 (void) memcpy(&lifr.lifr_addr, &netmask4, 1453 sizeof (netmask4)); 1454 } 1455 } else { 1456 (void) memset(&netmask6, 0, sizeof (netmask6)); 1457 if (addr2netmask(slashp + 1, V6_ADDR_LEN, 1458 (uchar_t *)&netmask6.sin6_addr) != 0) { 1459 *slashp = '/'; 1460 zerror(zlogp, B_FALSE, 1461 "%s: invalid prefix length in %s", 1462 lifr.lifr_name, 1463 nwiftabptr->zone_nwif_address); 1464 goto bad; 1465 } 1466 got_netmask = B_TRUE; 1467 netmask6.sin6_family = af; 1468 (void) memcpy(&lifr.lifr_addr, &netmask6, 1469 sizeof (netmask6)); 1470 } 1471 if (got_netmask && 1472 ioctl(s, SIOCSLIFNETMASK, (caddr_t)&lifr) < 0) { 1473 zerror(zlogp, B_TRUE, "%s: could not set netmask", 1474 lifr.lifr_name); 1475 goto bad; 1476 } 1477 1478 /* 1479 * This doesn't set the broadcast address at all. Rather, it 1480 * gets, then sets the interface's address, relying on the fact 1481 * that resetting the address will reset the broadcast address. 1482 */ 1483 if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { 1484 zerror(zlogp, B_TRUE, "%s: could not get address", 1485 lifr.lifr_name); 1486 goto bad; 1487 } 1488 if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) { 1489 zerror(zlogp, B_TRUE, 1490 "%s: could not reset broadcast address", 1491 lifr.lifr_name); 1492 goto bad; 1493 } 1494 } 1495 1496 if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { 1497 zerror(zlogp, B_TRUE, "%s: could not get flags", 1498 lifr.lifr_name); 1499 goto bad; 1500 } 1501 lifr.lifr_flags |= IFF_UP; 1502 if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { 1503 int save_errno = errno; 1504 char *zone_using; 1505 1506 /* 1507 * If we failed with something other than EADDRNOTAVAIL, 1508 * then skip to the end. Otherwise, look up our address, 1509 * then call a function to determine which zone is already 1510 * using that address. 1511 */ 1512 if (errno != EADDRNOTAVAIL) { 1513 zerror(zlogp, B_TRUE, 1514 "%s: could not bring interface up", lifr.lifr_name); 1515 goto bad; 1516 } 1517 if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { 1518 zerror(zlogp, B_TRUE, "%s: could not get address", 1519 lifr.lifr_name); 1520 goto bad; 1521 } 1522 zone_using = who_is_using(zlogp, &lifr); 1523 errno = save_errno; 1524 if (zone_using == NULL) 1525 zerror(zlogp, B_TRUE, 1526 "%s: could not bring interface up", lifr.lifr_name); 1527 else 1528 zerror(zlogp, B_TRUE, "%s: could not bring interface " 1529 "up: address in use by zone '%s'", lifr.lifr_name, 1530 zone_using); 1531 goto bad; 1532 } 1533 if ((lifr.lifr_flags & IFF_MULTICAST) && ((af == AF_INET && 1534 mcast_rt_v4_setp != NULL && *mcast_rt_v4_setp == B_FALSE) || 1535 (af == AF_INET6 && 1536 mcast_rt_v6_setp != NULL && *mcast_rt_v6_setp == B_FALSE))) { 1537 rs = socket(PF_ROUTE, SOCK_RAW, 0); 1538 if (rs < 0) { 1539 zerror(zlogp, B_TRUE, "%s: could not create " 1540 "routing socket", lifr.lifr_name); 1541 goto bad; 1542 } 1543 (void) shutdown(rs, 0); 1544 (void) memset((void *)&mcast_rtmsg, 0, sizeof (mcast_rtmsg_t)); 1545 mcast_rtmsg.m_rtm.rtm_msglen = sizeof (struct rt_msghdr) + 1546 3 * (af == AF_INET ? sizeof (struct sockaddr_in) : 1547 sizeof (struct sockaddr_in6)); 1548 mcast_rtmsg.m_rtm.rtm_version = RTM_VERSION; 1549 mcast_rtmsg.m_rtm.rtm_type = RTM_ADD; 1550 mcast_rtmsg.m_rtm.rtm_flags = RTF_UP; 1551 mcast_rtmsg.m_rtm.rtm_addrs = 1552 RTA_DST | RTA_GATEWAY | RTA_NETMASK; 1553 mcast_rtmsg.m_rtm.rtm_seq = ++rts_seqno; 1554 if (af == AF_INET) { 1555 mcast_rtmsg.m_dst4.sin_family = AF_INET; 1556 mcast_rtmsg.m_dst4.sin_addr.s_addr = 1557 htonl(INADDR_UNSPEC_GROUP); 1558 mcast_rtmsg.m_gw4.sin_family = AF_INET; 1559 mcast_rtmsg.m_gw4.sin_addr = in4; 1560 mcast_rtmsg.m_netmask4.sin_family = AF_INET; 1561 mcast_rtmsg.m_netmask4.sin_addr.s_addr = 1562 htonl(IN_CLASSD_NET); 1563 } else { 1564 mcast_rtmsg.m_dst6.sin6_family = AF_INET6; 1565 mcast_rtmsg.m_dst6.sin6_addr.s6_addr[0] = 0xffU; 1566 mcast_rtmsg.m_gw6.sin6_family = AF_INET6; 1567 mcast_rtmsg.m_gw6.sin6_addr = in6; 1568 mcast_rtmsg.m_netmask6.sin6_family = AF_INET6; 1569 mcast_rtmsg.m_netmask6.sin6_addr.s6_addr[0] = 0xffU; 1570 } 1571 rlen = write(rs, (char *)&mcast_rtmsg, 1572 mcast_rtmsg.m_rtm.rtm_msglen); 1573 if (rlen < mcast_rtmsg.m_rtm.rtm_msglen) { 1574 if (rlen < 0) { 1575 zerror(zlogp, B_TRUE, "%s: could not set " 1576 "default interface for multicast", 1577 lifr.lifr_name); 1578 } else { 1579 zerror(zlogp, B_FALSE, "%s: write to routing " 1580 "socket returned %d", lifr.lifr_name, rlen); 1581 } 1582 (void) close(rs); 1583 goto bad; 1584 } 1585 if (af == AF_INET) { 1586 *mcast_rt_v4_setp = B_TRUE; 1587 } else { 1588 *mcast_rt_v6_setp = B_TRUE; 1589 } 1590 (void) close(rs); 1591 } 1592 1593 if (!got_netmask) { 1594 /* 1595 * A common, but often non-fatal problem, is that the system 1596 * cannot find the netmask for an interface address. This is 1597 * often caused by it being only in /etc/inet/netmasks, but 1598 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not 1599 * in that. This doesn't show up at boot because the netmask 1600 * is obtained from /etc/inet/netmasks when no network 1601 * interfaces are up, but isn't consulted when NIS/NIS+ is 1602 * available. We warn the user here that something like this 1603 * has happened and we're just running with a default and 1604 * possible incorrect netmask. 1605 */ 1606 char buffer[INET6_ADDRSTRLEN]; 1607 void *addr; 1608 1609 if (af == AF_INET) 1610 addr = &((struct sockaddr_in *) 1611 (&lifr.lifr_addr))->sin_addr; 1612 else 1613 addr = &((struct sockaddr_in6 *) 1614 (&lifr.lifr_addr))->sin6_addr; 1615 1616 /* Find out what netmask interface is going to be using */ 1617 if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 || 1618 inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL) 1619 goto bad; 1620 zerror(zlogp, B_FALSE, 1621 "WARNING: %s: no matching subnet found in netmasks(4) for " 1622 "%s; using default of %s.", 1623 lifr.lifr_name, addrstr4, buffer); 1624 } 1625 1626 (void) close(s); 1627 return (Z_OK); 1628 bad: 1629 (void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr); 1630 (void) close(s); 1631 return (-1); 1632 } 1633 1634 /* 1635 * Sets up network interfaces based on information from the zone configuration. 1636 * An IPv4 loopback interface is set up "for free", modeling the global system. 1637 * If any of the configuration interfaces were IPv6, then an IPv6 loopback 1638 * address is set up as well. 1639 * 1640 * If anything goes wrong, we log a general error message, attempt to tear down 1641 * whatever we set up, and return an error. 1642 */ 1643 static int 1644 configure_network_interfaces(zlog_t *zlogp) 1645 { 1646 zone_dochandle_t handle; 1647 struct zone_nwiftab nwiftab, loopback_iftab; 1648 boolean_t saw_v6 = B_FALSE; 1649 boolean_t mcast_rt_v4_set = B_FALSE; 1650 boolean_t mcast_rt_v6_set = B_FALSE; 1651 zoneid_t zoneid; 1652 1653 if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) { 1654 zerror(zlogp, B_TRUE, "unable to get zoneid"); 1655 return (-1); 1656 } 1657 1658 if ((handle = zonecfg_init_handle()) == NULL) { 1659 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 1660 return (-1); 1661 } 1662 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 1663 zerror(zlogp, B_FALSE, "invalid configuration"); 1664 zonecfg_fini_handle(handle); 1665 return (-1); 1666 } 1667 if (zonecfg_setnwifent(handle) == Z_OK) { 1668 for (;;) { 1669 struct in6_addr in6; 1670 1671 if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK) 1672 break; 1673 if (configure_one_interface(zlogp, zoneid, 1674 &nwiftab, &mcast_rt_v4_set, &mcast_rt_v6_set) != 1675 Z_OK) { 1676 (void) zonecfg_endnwifent(handle); 1677 zonecfg_fini_handle(handle); 1678 return (-1); 1679 } 1680 if (inet_pton(AF_INET6, nwiftab.zone_nwif_address, 1681 &in6) == 1) 1682 saw_v6 = B_TRUE; 1683 } 1684 (void) zonecfg_endnwifent(handle); 1685 } 1686 zonecfg_fini_handle(handle); 1687 (void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0", 1688 sizeof (loopback_iftab.zone_nwif_physical)); 1689 (void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1", 1690 sizeof (loopback_iftab.zone_nwif_address)); 1691 if (configure_one_interface(zlogp, zoneid, &loopback_iftab, NULL, NULL) 1692 != Z_OK) { 1693 return (-1); 1694 } 1695 if (saw_v6) { 1696 (void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128", 1697 sizeof (loopback_iftab.zone_nwif_address)); 1698 if (configure_one_interface(zlogp, zoneid, 1699 &loopback_iftab, NULL, NULL) != Z_OK) { 1700 return (-1); 1701 } 1702 } 1703 return (0); 1704 } 1705 1706 static int 1707 tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid, 1708 const struct sockaddr_storage *local, const struct sockaddr_storage *remote) 1709 { 1710 int fd; 1711 struct strioctl ioc; 1712 tcp_ioc_abort_conn_t conn; 1713 int error; 1714 1715 conn.ac_local = *local; 1716 conn.ac_remote = *remote; 1717 conn.ac_start = TCPS_SYN_SENT; 1718 conn.ac_end = TCPS_TIME_WAIT; 1719 conn.ac_zoneid = zoneid; 1720 1721 ioc.ic_cmd = TCP_IOC_ABORT_CONN; 1722 ioc.ic_timout = -1; /* infinite timeout */ 1723 ioc.ic_len = sizeof (conn); 1724 ioc.ic_dp = (char *)&conn; 1725 1726 if ((fd = open("/dev/tcp", O_RDONLY)) < 0) { 1727 zerror(zlogp, B_TRUE, "unable to open %s", "/dev/tcp"); 1728 return (-1); 1729 } 1730 1731 error = ioctl(fd, I_STR, &ioc); 1732 (void) close(fd); 1733 if (error == 0 || errno == ENOENT) /* ENOENT is not an error */ 1734 return (0); 1735 return (-1); 1736 } 1737 1738 static int 1739 tcp_abort_connections(zlog_t *zlogp, zoneid_t zoneid) 1740 { 1741 struct sockaddr_storage l, r; 1742 struct sockaddr_in *local, *remote; 1743 struct sockaddr_in6 *local6, *remote6; 1744 int error; 1745 1746 /* 1747 * Abort IPv4 connections. 1748 */ 1749 bzero(&l, sizeof (*local)); 1750 local = (struct sockaddr_in *)&l; 1751 local->sin_family = AF_INET; 1752 local->sin_addr.s_addr = INADDR_ANY; 1753 local->sin_port = 0; 1754 1755 bzero(&r, sizeof (*remote)); 1756 remote = (struct sockaddr_in *)&r; 1757 remote->sin_family = AF_INET; 1758 remote->sin_addr.s_addr = INADDR_ANY; 1759 remote->sin_port = 0; 1760 1761 if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0) 1762 return (error); 1763 1764 /* 1765 * Abort IPv6 connections. 1766 */ 1767 bzero(&l, sizeof (*local6)); 1768 local6 = (struct sockaddr_in6 *)&l; 1769 local6->sin6_family = AF_INET6; 1770 local6->sin6_port = 0; 1771 local6->sin6_addr = in6addr_any; 1772 1773 bzero(&r, sizeof (*remote6)); 1774 remote6 = (struct sockaddr_in6 *)&r; 1775 remote6->sin6_family = AF_INET6; 1776 remote6->sin6_port = 0; 1777 remote6->sin6_addr = in6addr_any; 1778 1779 if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0) 1780 return (error); 1781 return (0); 1782 } 1783 1784 static int 1785 devfsadm_call(zlog_t *zlogp, const char *arg) 1786 { 1787 char *argv[4]; 1788 int status; 1789 1790 argv[0] = DEVFSADM; 1791 argv[1] = (char *)arg; 1792 argv[2] = zone_name; 1793 argv[3] = NULL; 1794 status = forkexec(zlogp, DEVFSADM_PATH, argv); 1795 if (status == 0 || status == -1) 1796 return (status); 1797 zerror(zlogp, B_FALSE, "%s call (%s %s %s) unexpectedly returned %d", 1798 DEVFSADM, DEVFSADM_PATH, arg, zone_name, status); 1799 return (-1); 1800 } 1801 1802 static int 1803 devfsadm_register(zlog_t *zlogp) 1804 { 1805 /* 1806 * Ready the zone's devices. 1807 */ 1808 return (devfsadm_call(zlogp, "-z")); 1809 } 1810 1811 static int 1812 devfsadm_unregister(zlog_t *zlogp) 1813 { 1814 return (devfsadm_call(zlogp, "-Z")); 1815 } 1816 1817 static int 1818 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) 1819 { 1820 nvlist_t *nvl = NULL; 1821 char *nvl_packed = NULL; 1822 size_t nvl_size = 0; 1823 nvlist_t **nvlv = NULL; 1824 int rctlcount = 0; 1825 int error = -1; 1826 zone_dochandle_t handle; 1827 struct zone_rctltab rctltab; 1828 rctlblk_t *rctlblk = NULL; 1829 1830 *bufp = NULL; 1831 *bufsizep = 0; 1832 1833 if ((handle = zonecfg_init_handle()) == NULL) { 1834 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 1835 return (-1); 1836 } 1837 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 1838 zerror(zlogp, B_FALSE, "invalid configuration"); 1839 zonecfg_fini_handle(handle); 1840 return (-1); 1841 } 1842 1843 rctltab.zone_rctl_valptr = NULL; 1844 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) { 1845 zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc"); 1846 goto out; 1847 } 1848 1849 if (zonecfg_setrctlent(handle) != Z_OK) { 1850 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent"); 1851 goto out; 1852 } 1853 1854 if ((rctlblk = malloc(rctlblk_size())) == NULL) { 1855 zerror(zlogp, B_TRUE, "memory allocation failed"); 1856 goto out; 1857 } 1858 while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) { 1859 struct zone_rctlvaltab *rctlval; 1860 uint_t i, count; 1861 const char *name = rctltab.zone_rctl_name; 1862 1863 /* zoneadm should have already warned about unknown rctls. */ 1864 if (!zonecfg_is_rctl(name)) { 1865 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 1866 rctltab.zone_rctl_valptr = NULL; 1867 continue; 1868 } 1869 count = 0; 1870 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL; 1871 rctlval = rctlval->zone_rctlval_next) { 1872 count++; 1873 } 1874 if (count == 0) { /* ignore */ 1875 continue; /* Nothing to free */ 1876 } 1877 if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL) 1878 goto out; 1879 i = 0; 1880 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL; 1881 rctlval = rctlval->zone_rctlval_next, i++) { 1882 if (nvlist_alloc(&nvlv[i], NV_UNIQUE_NAME, 0) != 0) { 1883 zerror(zlogp, B_TRUE, "%s failed", 1884 "nvlist_alloc"); 1885 goto out; 1886 } 1887 if (zonecfg_construct_rctlblk(rctlval, rctlblk) 1888 != Z_OK) { 1889 zerror(zlogp, B_FALSE, "invalid rctl value: " 1890 "(priv=%s,limit=%s,action=%s)", 1891 rctlval->zone_rctlval_priv, 1892 rctlval->zone_rctlval_limit, 1893 rctlval->zone_rctlval_action); 1894 goto out; 1895 } 1896 if (!zonecfg_valid_rctl(name, rctlblk)) { 1897 zerror(zlogp, B_FALSE, 1898 "(priv=%s,limit=%s,action=%s) is not a " 1899 "valid value for rctl '%s'", 1900 rctlval->zone_rctlval_priv, 1901 rctlval->zone_rctlval_limit, 1902 rctlval->zone_rctlval_action, 1903 name); 1904 goto out; 1905 } 1906 if (nvlist_add_uint64(nvlv[i], "privilege", 1907 rctlblk_get_privilege(rctlblk)) != 0) { 1908 zerror(zlogp, B_FALSE, "%s failed", 1909 "nvlist_add_uint64"); 1910 goto out; 1911 } 1912 if (nvlist_add_uint64(nvlv[i], "limit", 1913 rctlblk_get_value(rctlblk)) != 0) { 1914 zerror(zlogp, B_FALSE, "%s failed", 1915 "nvlist_add_uint64"); 1916 goto out; 1917 } 1918 if (nvlist_add_uint64(nvlv[i], "action", 1919 (uint_t)rctlblk_get_local_action(rctlblk, NULL)) 1920 != 0) { 1921 zerror(zlogp, B_FALSE, "%s failed", 1922 "nvlist_add_uint64"); 1923 goto out; 1924 } 1925 } 1926 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 1927 rctltab.zone_rctl_valptr = NULL; 1928 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count) 1929 != 0) { 1930 zerror(zlogp, B_FALSE, "%s failed", 1931 "nvlist_add_nvlist_array"); 1932 goto out; 1933 } 1934 for (i = 0; i < count; i++) 1935 nvlist_free(nvlv[i]); 1936 free(nvlv); 1937 nvlv = NULL; 1938 rctlcount++; 1939 } 1940 (void) zonecfg_endrctlent(handle); 1941 1942 if (rctlcount == 0) { 1943 error = 0; 1944 goto out; 1945 } 1946 if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0) 1947 != 0) { 1948 zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack"); 1949 goto out; 1950 } 1951 1952 error = 0; 1953 *bufp = nvl_packed; 1954 *bufsizep = nvl_size; 1955 1956 out: 1957 free(rctlblk); 1958 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 1959 if (error && nvl_packed != NULL) 1960 free(nvl_packed); 1961 if (nvl != NULL) 1962 nvlist_free(nvl); 1963 if (nvlv != NULL) 1964 free(nvlv); 1965 if (handle != NULL) 1966 zonecfg_fini_handle(handle); 1967 return (error); 1968 } 1969 1970 static int 1971 get_zone_pool(zlog_t *zlogp, char *poolbuf, size_t bufsz) 1972 { 1973 zone_dochandle_t handle; 1974 int error; 1975 1976 if ((handle = zonecfg_init_handle()) == NULL) { 1977 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 1978 return (-1); 1979 } 1980 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 1981 zerror(zlogp, B_FALSE, "invalid configuration"); 1982 zonecfg_fini_handle(handle); 1983 return (-1); 1984 } 1985 error = zonecfg_get_pool(handle, poolbuf, bufsz); 1986 zonecfg_fini_handle(handle); 1987 return (error); 1988 } 1989 1990 static int 1991 bind_to_pool(zlog_t *zlogp, zoneid_t zoneid) 1992 { 1993 pool_conf_t *poolconf; 1994 pool_t *pool; 1995 char poolname[MAXPATHLEN]; 1996 int status; 1997 int error; 1998 1999 /* 2000 * Find the pool mentioned in the zone configuration, and bind to it. 2001 */ 2002 error = get_zone_pool(zlogp, poolname, sizeof (poolname)); 2003 if (error == Z_NO_ENTRY || (error == Z_OK && strlen(poolname) == 0)) { 2004 /* 2005 * The property is not set on the zone, so the pool 2006 * should be bound to the default pool. But that's 2007 * already done by the kernel, so we can just return. 2008 */ 2009 return (0); 2010 } 2011 if (error != Z_OK) { 2012 /* 2013 * Not an error, even though it shouldn't be happening. 2014 */ 2015 zerror(zlogp, B_FALSE, 2016 "WARNING: unable to retrieve default pool."); 2017 return (0); 2018 } 2019 /* 2020 * Don't do anything if pools aren't enabled. 2021 */ 2022 if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED) { 2023 zerror(zlogp, B_FALSE, "WARNING: pools facility not active; " 2024 "zone will not be bound to pool '%s'.", poolname); 2025 return (0); 2026 } 2027 /* 2028 * Try to provide a sane error message if the requested pool doesn't 2029 * exist. 2030 */ 2031 if ((poolconf = pool_conf_alloc()) == NULL) { 2032 zerror(zlogp, B_FALSE, "%s failed", "pool_conf_alloc"); 2033 return (-1); 2034 } 2035 if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) != 2036 PO_SUCCESS) { 2037 zerror(zlogp, B_FALSE, "%s failed", "pool_conf_open"); 2038 pool_conf_free(poolconf); 2039 return (-1); 2040 } 2041 pool = pool_get_pool(poolconf, poolname); 2042 (void) pool_conf_close(poolconf); 2043 pool_conf_free(poolconf); 2044 if (pool == NULL) { 2045 zerror(zlogp, B_FALSE, "WARNING: pool '%s' not found; " 2046 "using default pool.", poolname); 2047 return (0); 2048 } 2049 /* 2050 * Bind the zone to the pool. 2051 */ 2052 if (pool_set_binding(poolname, P_ZONEID, zoneid) != PO_SUCCESS) { 2053 zerror(zlogp, B_FALSE, "WARNING: unable to bind to pool '%s'; " 2054 "using default pool.", poolname); 2055 } 2056 return (0); 2057 } 2058 2059 int 2060 prtmount(const char *fs, void *x) { 2061 zerror((zlog_t *)x, B_FALSE, " %s", fs); 2062 return (0); 2063 } 2064 2065 int 2066 vplat_create(zlog_t *zlogp) 2067 { 2068 int rval = -1; 2069 priv_set_t *privs; 2070 char rootpath[MAXPATHLEN]; 2071 char *rctlbuf = NULL; 2072 size_t rctlbufsz; 2073 zoneid_t zoneid; 2074 int xerr; 2075 2076 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { 2077 zerror(zlogp, B_TRUE, "unable to determine zone root"); 2078 return (-1); 2079 } 2080 2081 if ((privs = priv_allocset()) == NULL) { 2082 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 2083 return (-1); 2084 } 2085 priv_emptyset(privs); 2086 if (zonecfg_get_privset(privs) != Z_OK) { 2087 zerror(zlogp, B_TRUE, "Failed to initialize privileges"); 2088 goto error; 2089 } 2090 if (get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) { 2091 zerror(zlogp, B_FALSE, "Unable to get list of rctls"); 2092 goto error; 2093 } 2094 2095 xerr = 0; 2096 if ((zoneid = zone_create(zone_name, rootpath, privs, rctlbuf, 2097 rctlbufsz, &xerr)) == -1) { 2098 if (xerr == ZE_AREMOUNTS) { 2099 if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) { 2100 zerror(zlogp, B_FALSE, 2101 "An unknown file-system is mounted on " 2102 "a subdirectory of %s", rootpath); 2103 } else { 2104 2105 zerror(zlogp, B_FALSE, 2106 "These file-systems are mounted on " 2107 "subdirectories of %s:", rootpath); 2108 (void) zonecfg_find_mounts(rootpath, 2109 prtmount, zlogp); 2110 } 2111 } else if (xerr == ZE_CHROOTED) { 2112 zerror(zlogp, B_FALSE, "%s: " 2113 "cannot create a zone from a chrooted " 2114 "environment", "zone_create"); 2115 } else { 2116 zerror(zlogp, B_TRUE, "%s failed", "zone_create"); 2117 } 2118 goto error; 2119 } 2120 /* 2121 * The following is a warning, not an error. 2122 */ 2123 if (bind_to_pool(zlogp, zoneid) != 0) 2124 zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to " 2125 "requested pool; using default pool."); 2126 rval = 0; 2127 error: 2128 if (rctlbuf != NULL) 2129 free(rctlbuf); 2130 priv_freeset(privs); 2131 return (rval); 2132 } 2133 2134 int 2135 vplat_bringup(zlog_t *zlogp) 2136 { 2137 if (create_dev_files(zlogp) != 0) 2138 return (-1); 2139 if (mount_filesystems(zlogp) != 0) 2140 return (-1); 2141 if (devfsadm_register(zlogp) != 0) 2142 return (-1); 2143 if (configure_network_interfaces(zlogp) != 0) 2144 return (-1); 2145 return (0); 2146 } 2147 2148 int 2149 vplat_teardown(zlog_t *zlogp) 2150 { 2151 zoneid_t zoneid; 2152 2153 if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) { 2154 if (!bringup_failure_recovery) 2155 zerror(zlogp, B_TRUE, "unable to get zoneid"); 2156 goto error; 2157 } 2158 2159 if (zone_shutdown(zoneid) != 0) { 2160 zerror(zlogp, B_TRUE, "unable to shutdown zone"); 2161 goto error; 2162 } 2163 2164 if (devfsadm_unregister(zlogp) != 0) 2165 goto error; 2166 2167 if (unconfigure_network_interfaces(zlogp, zoneid) != 0) { 2168 zerror(zlogp, B_FALSE, 2169 "unable to unconfigure network interfaces in zone"); 2170 goto error; 2171 } 2172 2173 if (tcp_abort_connections(zlogp, zoneid) != 0) { 2174 zerror(zlogp, B_TRUE, "unable to abort TCP connections"); 2175 goto error; 2176 } 2177 2178 if (unmount_filesystems(zlogp) != 0) { 2179 zerror(zlogp, B_FALSE, 2180 "unable to unmount file systems in zone"); 2181 goto error; 2182 } 2183 2184 if (zone_destroy(zoneid) != 0) { 2185 zerror(zlogp, B_TRUE, "unable to destroy zone"); 2186 goto error; 2187 } 2188 destroy_console_slave(); 2189 2190 return (0); 2191 2192 error: 2193 return (-1); 2194 } 2195