1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2018, Joyent Inc. 25 * Copyright (c) 2015, 2016 by Delphix. All rights reserved. 26 * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. 27 * Copyright 2020 RackTop Systems Inc. 28 * Copyright 2023 Oxide Computer Company 29 */ 30 31 /* 32 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 33 */ 34 35 /* 36 * This module contains functions used to bring up and tear down the 37 * Virtual Platform: [un]mounting file-systems, [un]plumbing network 38 * interfaces, [un]configuring devices, establishing resource controls, 39 * and creating/destroying the zone in the kernel. These actions, on 40 * the way up, ready the zone; on the way down, they halt the zone. 41 * See the much longer block comment at the beginning of zoneadmd.c 42 * for a bigger picture of how the whole program functions. 43 * 44 * This module also has primary responsibility for the layout of "scratch 45 * zones." These are mounted, but inactive, zones that are used during 46 * operating system upgrade and potentially other administrative action. The 47 * scratch zone environment is similar to the miniroot environment. The zone's 48 * actual root is mounted read-write on /a, and the standard paths (/usr, 49 * /sbin, /lib) all lead to read-only copies of the running system's binaries. 50 * This allows the administrative tools to manipulate the zone using "-R /a" 51 * without relying on any binaries in the zone itself. 52 * 53 * If the scratch zone is on an alternate root (Live Upgrade [LU] boot 54 * environment), then we must resolve the lofs mounts used there to uncover 55 * writable (unshared) resources. Shared resources, though, are always 56 * read-only. In addition, if the "same" zone with a different root path is 57 * currently running, then "/b" inside the zone points to the running zone's 58 * root. This allows LU to synchronize configuration files during the upgrade 59 * process. 60 * 61 * To construct this environment, this module creates a tmpfs mount on 62 * $ZONEPATH/lu. Inside this scratch area, the miniroot-like environment as 63 * described above is constructed on the fly. The zone is then created using 64 * $ZONEPATH/lu as the root. 65 * 66 * Note that scratch zones are inactive. The zone's bits are not running and 67 * likely cannot be run correctly until upgrade is done. Init is not running 68 * there, nor is SMF. Because of this, the "mounted" state of a scratch zone 69 * is not a part of the usual halt/ready/boot state machine. 70 */ 71 72 #include <sys/param.h> 73 #include <sys/mount.h> 74 #include <sys/mntent.h> 75 #include <sys/socket.h> 76 #include <sys/utsname.h> 77 #include <sys/types.h> 78 #include <sys/stat.h> 79 #include <sys/sockio.h> 80 #include <sys/stropts.h> 81 #include <sys/conf.h> 82 #include <sys/systeminfo.h> 83 #include <sys/secflags.h> 84 #include <sys/vnic.h> 85 86 #include <libdlpi.h> 87 #include <libdllink.h> 88 #include <libdlvlan.h> 89 #include <libdlvnic.h> 90 #include <libdlaggr.h> 91 92 #include <inet/tcp.h> 93 #include <arpa/inet.h> 94 #include <netinet/in.h> 95 #include <net/route.h> 96 97 #include <stdio.h> 98 #include <errno.h> 99 #include <fcntl.h> 100 #include <unistd.h> 101 #include <rctl.h> 102 #include <stdlib.h> 103 #include <string.h> 104 #include <strings.h> 105 #include <wait.h> 106 #include <limits.h> 107 #include <libgen.h> 108 #include <libzfs.h> 109 #include <libdevinfo.h> 110 #include <zone.h> 111 #include <assert.h> 112 #include <libcontract.h> 113 #include <libcontract_priv.h> 114 #include <uuid/uuid.h> 115 116 #include <sys/mntio.h> 117 #include <sys/mnttab.h> 118 #include <sys/fs/autofs.h> /* for _autofssys() */ 119 #include <sys/fs/lofs_info.h> 120 #include <sys/fs/zfs.h> 121 122 #include <pool.h> 123 #include <sys/pool.h> 124 #include <sys/priocntl.h> 125 126 #include <libbrand.h> 127 #include <sys/brand.h> 128 #include <libzonecfg.h> 129 #include <synch.h> 130 131 #include "zoneadmd.h" 132 #include <tsol/label.h> 133 #include <libtsnet.h> 134 #include <sys/priv.h> 135 #include <libinetutil.h> 136 137 #define V4_ADDR_LEN 32 138 #define V6_ADDR_LEN 128 139 140 #define RESOURCE_DEFAULT_OPTS \ 141 MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES 142 143 #define DFSTYPES "/etc/dfs/fstypes" 144 #define MAXTNZLEN 2048 145 146 #define ALT_MOUNT(mount_cmd) ((mount_cmd) != Z_MNT_BOOT) 147 148 /* a reasonable estimate for the number of lwps per process */ 149 #define LWPS_PER_PROCESS 10 150 151 /* for routing socket */ 152 static int rts_seqno = 0; 153 154 /* mangled zone name when mounting in an alternate root environment */ 155 static char kernzone[ZONENAME_MAX]; 156 157 /* array of cached mount entries for resolve_lofs */ 158 static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max; 159 160 /* for Trusted Extensions */ 161 static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *); 162 static int tsol_mounts(zlog_t *, char *, char *); 163 static void tsol_unmounts(zlog_t *, char *); 164 165 static m_label_t *zlabel = NULL; 166 static m_label_t *zid_label = NULL; 167 static priv_set_t *zprivs = NULL; 168 169 static const char *DFLT_FS_ALLOWED = "hsfs,smbfs,nfs,nfs3,nfs4,nfsdyn"; 170 171 /* from libsocket, not in any header file */ 172 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *); 173 174 /* from zoneadmd */ 175 extern char query_hook[]; 176 177 /* 178 * For each "net" resource configured in zonecfg, we track a zone_addr_list_t 179 * node in a linked list that is sorted by linkid. The list is constructed as 180 * the xml configuration file is parsed, and the information 181 * contained in each node is added to the kernel before the zone is 182 * booted, to be retrieved and applied from within the exclusive-IP NGZ 183 * on boot. 184 */ 185 typedef struct zone_addr_list { 186 struct zone_addr_list *za_next; 187 datalink_id_t za_linkid; /* datalink_id_t of interface */ 188 struct zone_nwiftab za_nwiftab; /* address, defrouter properties */ 189 } zone_addr_list_t; 190 191 /* 192 * An optimization for build_mnttable: reallocate (and potentially copy the 193 * data) only once every N times through the loop. 194 */ 195 #define MNTTAB_HUNK 32 196 197 /* some handy macros */ 198 #define SIN(s) ((struct sockaddr_in *)s) 199 #define SIN6(s) ((struct sockaddr_in6 *)s) 200 201 /* 202 * Private autofs system call 203 */ 204 extern int _autofssys(int, void *); 205 206 static int 207 autofs_cleanup(zoneid_t zoneid) 208 { 209 int r; 210 211 /* 212 * Ask autofs to unmount all trigger nodes in the given zone. 213 * Handle ENOSYS in the case that the autofs kernel module is not 214 * installed. 215 */ 216 r = _autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid); 217 if (r != 0 && errno == ENOSYS) { 218 return (0); 219 } 220 return (r); 221 } 222 223 static void 224 free_mnttable(struct mnttab *mnt_array, uint_t nelem) 225 { 226 uint_t i; 227 228 if (mnt_array == NULL) 229 return; 230 for (i = 0; i < nelem; i++) { 231 free(mnt_array[i].mnt_mountp); 232 free(mnt_array[i].mnt_fstype); 233 free(mnt_array[i].mnt_special); 234 free(mnt_array[i].mnt_mntopts); 235 assert(mnt_array[i].mnt_time == NULL); 236 } 237 free(mnt_array); 238 } 239 240 /* 241 * Build the mount table for the zone rooted at "zroot", storing the resulting 242 * array of struct mnttabs in "mnt_arrayp" and the number of elements in the 243 * array in "nelemp". 244 */ 245 static int 246 build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab, 247 struct mnttab **mnt_arrayp, uint_t *nelemp) 248 { 249 struct mnttab mnt; 250 struct mnttab *mnts; 251 struct mnttab *mnp; 252 uint_t nmnt; 253 254 rewind(mnttab); 255 resetmnttab(mnttab); 256 nmnt = 0; 257 mnts = NULL; 258 while (getmntent(mnttab, &mnt) == 0) { 259 struct mnttab *tmp_array; 260 261 if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0) 262 continue; 263 if (nmnt % MNTTAB_HUNK == 0) { 264 tmp_array = realloc(mnts, 265 (nmnt + MNTTAB_HUNK) * sizeof (*mnts)); 266 if (tmp_array == NULL) { 267 free_mnttable(mnts, nmnt); 268 return (-1); 269 } 270 mnts = tmp_array; 271 } 272 mnp = &mnts[nmnt++]; 273 274 /* 275 * Zero out any fields we're not using. 276 */ 277 (void) memset(mnp, 0, sizeof (*mnp)); 278 279 if (mnt.mnt_special != NULL) 280 mnp->mnt_special = strdup(mnt.mnt_special); 281 if (mnt.mnt_mntopts != NULL) 282 mnp->mnt_mntopts = strdup(mnt.mnt_mntopts); 283 mnp->mnt_mountp = strdup(mnt.mnt_mountp); 284 mnp->mnt_fstype = strdup(mnt.mnt_fstype); 285 if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) || 286 (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) || 287 mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) { 288 zerror(zlogp, B_TRUE, "memory allocation failed"); 289 free_mnttable(mnts, nmnt); 290 return (-1); 291 } 292 } 293 *mnt_arrayp = mnts; 294 *nelemp = nmnt; 295 return (0); 296 } 297 298 /* 299 * This is an optimization. The resolve_lofs function is used quite frequently 300 * to manipulate file paths, and on a machine with a large number of zones, 301 * there will be a huge number of mounted file systems. Thus, we trigger a 302 * reread of the list of mount points 303 */ 304 static void 305 lofs_discard_mnttab(void) 306 { 307 free_mnttable(resolve_lofs_mnts, 308 resolve_lofs_mnt_max - resolve_lofs_mnts); 309 resolve_lofs_mnts = resolve_lofs_mnt_max = NULL; 310 } 311 312 static int 313 lofs_read_mnttab(zlog_t *zlogp) 314 { 315 FILE *mnttab; 316 uint_t nmnts; 317 318 if ((mnttab = fopen(MNTTAB, "r")) == NULL) 319 return (-1); 320 if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts, 321 &nmnts) == -1) { 322 (void) fclose(mnttab); 323 return (-1); 324 } 325 (void) fclose(mnttab); 326 resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts; 327 return (0); 328 } 329 330 /* 331 * This function loops over potential loopback mounts and symlinks in a given 332 * path and resolves them all down to an absolute path. 333 */ 334 void 335 resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen) 336 { 337 int len, arlen; 338 const char *altroot; 339 char tmppath[MAXPATHLEN]; 340 boolean_t outside_altroot; 341 342 if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1) 343 return; 344 tmppath[len] = '\0'; 345 (void) strlcpy(path, tmppath, sizeof (tmppath)); 346 347 /* This happens once per zoneadmd operation. */ 348 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 349 return; 350 351 altroot = zonecfg_get_root(); 352 arlen = strlen(altroot); 353 outside_altroot = B_FALSE; 354 for (;;) { 355 struct mnttab *mnp; 356 357 /* Search in reverse order to find longest match */ 358 for (mnp = resolve_lofs_mnt_max - 1; mnp >= resolve_lofs_mnts; 359 mnp--) { 360 if (mnp->mnt_fstype == NULL || 361 mnp->mnt_mountp == NULL || 362 mnp->mnt_special == NULL) 363 continue; 364 len = strlen(mnp->mnt_mountp); 365 if (strncmp(mnp->mnt_mountp, path, len) == 0 && 366 (path[len] == '/' || path[len] == '\0')) 367 break; 368 } 369 if (mnp < resolve_lofs_mnts) 370 break; 371 /* If it's not a lofs then we're done */ 372 if (strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0) 373 break; 374 if (outside_altroot) { 375 char *cp; 376 int olen = sizeof (MNTOPT_RO) - 1; 377 378 /* 379 * If we run into a read-only mount outside of the 380 * alternate root environment, then the user doesn't 381 * want this path to be made read-write. 382 */ 383 if (mnp->mnt_mntopts != NULL && 384 (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) != 385 NULL && 386 (cp == mnp->mnt_mntopts || cp[-1] == ',') && 387 (cp[olen] == '\0' || cp[olen] == ',')) { 388 break; 389 } 390 } else if (arlen > 0 && 391 (strncmp(mnp->mnt_special, altroot, arlen) != 0 || 392 (mnp->mnt_special[arlen] != '\0' && 393 mnp->mnt_special[arlen] != '/'))) { 394 outside_altroot = B_TRUE; 395 } 396 /* use temporary buffer because new path might be longer */ 397 (void) snprintf(tmppath, sizeof (tmppath), "%s%s", 398 mnp->mnt_special, path + len); 399 if ((len = resolvepath(tmppath, path, pathlen)) == -1) 400 break; 401 path[len] = '\0'; 402 } 403 } 404 405 /* 406 * For a regular mount, check if a replacement lofs mount is needed because the 407 * referenced device is already mounted somewhere. 408 */ 409 static int 410 check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr) 411 { 412 struct mnttab *mnp; 413 zone_fsopt_t *optptr, *onext; 414 415 /* This happens once per zoneadmd operation. */ 416 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 417 return (-1); 418 419 /* 420 * If this special node isn't already in use, then it's ours alone; 421 * no need to worry about conflicting mounts. 422 */ 423 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; 424 mnp++) { 425 if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0) 426 break; 427 } 428 if (mnp >= resolve_lofs_mnt_max) 429 return (0); 430 431 /* 432 * Convert this duplicate mount into a lofs mount. 433 */ 434 (void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp, 435 sizeof (fsptr->zone_fs_special)); 436 (void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS, 437 sizeof (fsptr->zone_fs_type)); 438 fsptr->zone_fs_raw[0] = '\0'; 439 440 /* 441 * Discard all but one of the original options and set that to our 442 * default set of options used for resources. 443 */ 444 optptr = fsptr->zone_fs_options; 445 if (optptr == NULL) { 446 optptr = malloc(sizeof (*optptr)); 447 if (optptr == NULL) { 448 zerror(zlogp, B_TRUE, "cannot mount %s", 449 fsptr->zone_fs_dir); 450 return (-1); 451 } 452 } else { 453 while ((onext = optptr->zone_fsopt_next) != NULL) { 454 optptr->zone_fsopt_next = onext->zone_fsopt_next; 455 free(onext); 456 } 457 } 458 (void) strcpy(optptr->zone_fsopt_opt, RESOURCE_DEFAULT_OPTS); 459 optptr->zone_fsopt_next = NULL; 460 fsptr->zone_fs_options = optptr; 461 return (0); 462 } 463 464 int 465 make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode, 466 uid_t userid, gid_t groupid) 467 { 468 char path[MAXPATHLEN]; 469 struct stat st; 470 471 if (snprintf(path, sizeof (path), "%s%s", prefix, subdir) > 472 sizeof (path)) { 473 zerror(zlogp, B_FALSE, "pathname %s%s is too long", prefix, 474 subdir); 475 return (-1); 476 } 477 478 if (lstat(path, &st) == 0) { 479 /* 480 * We don't check the file mode since presumably the zone 481 * administrator may have had good reason to change the mode, 482 * and we don't need to second guess them. 483 */ 484 if (!S_ISDIR(st.st_mode)) { 485 if (S_ISREG(st.st_mode)) { 486 /* 487 * Allow readonly mounts of /etc/ files; this 488 * is needed most by Trusted Extensions. 489 */ 490 if (strncmp(subdir, "/etc/", 491 strlen("/etc/")) != 0) { 492 zerror(zlogp, B_FALSE, 493 "%s is not in /etc", path); 494 return (-1); 495 } 496 } else { 497 zerror(zlogp, B_FALSE, 498 "%s is not a directory", path); 499 return (-1); 500 } 501 } 502 return (0); 503 } 504 505 if (mkdirp(path, mode) != 0) { 506 if (errno == EROFS) 507 zerror(zlogp, B_FALSE, "Could not mkdir %s.\nIt is on " 508 "a read-only file system in this local zone.\nMake " 509 "sure %s exists in the global zone.", path, subdir); 510 else 511 zerror(zlogp, B_TRUE, "mkdirp of %s failed", path); 512 return (-1); 513 } 514 515 (void) chown(path, userid, groupid); 516 return (0); 517 } 518 519 static void 520 free_remote_fstypes(char **types) 521 { 522 uint_t i; 523 524 if (types == NULL) 525 return; 526 for (i = 0; types[i] != NULL; i++) 527 free(types[i]); 528 free(types); 529 } 530 531 static char ** 532 get_remote_fstypes(zlog_t *zlogp) 533 { 534 char **types = NULL; 535 FILE *fp; 536 char buf[MAXPATHLEN]; 537 char fstype[MAXPATHLEN]; 538 uint_t lines = 0; 539 uint_t i; 540 541 if ((fp = fopen(DFSTYPES, "r")) == NULL) { 542 zerror(zlogp, B_TRUE, "failed to open %s", DFSTYPES); 543 return (NULL); 544 } 545 /* 546 * Count the number of lines 547 */ 548 while (fgets(buf, sizeof (buf), fp) != NULL) 549 lines++; 550 if (lines == 0) /* didn't read anything; empty file */ 551 goto out; 552 rewind(fp); 553 /* 554 * Allocate enough space for a NULL-terminated array. 555 */ 556 types = calloc(lines + 1, sizeof (char *)); 557 if (types == NULL) { 558 zerror(zlogp, B_TRUE, "memory allocation failed"); 559 goto out; 560 } 561 i = 0; 562 while (fgets(buf, sizeof (buf), fp) != NULL) { 563 /* LINTED - fstype is big enough to hold buf */ 564 if (sscanf(buf, "%s", fstype) == 0) { 565 zerror(zlogp, B_FALSE, "unable to parse %s", DFSTYPES); 566 free_remote_fstypes(types); 567 types = NULL; 568 goto out; 569 } 570 types[i] = strdup(fstype); 571 if (types[i] == NULL) { 572 zerror(zlogp, B_TRUE, "memory allocation failed"); 573 free_remote_fstypes(types); 574 types = NULL; 575 goto out; 576 } 577 i++; 578 } 579 out: 580 (void) fclose(fp); 581 return (types); 582 } 583 584 static boolean_t 585 is_remote_fstype(const char *fstype, char *const *remote_fstypes) 586 { 587 uint_t i; 588 589 if (remote_fstypes == NULL) 590 return (B_FALSE); 591 for (i = 0; remote_fstypes[i] != NULL; i++) { 592 if (strcmp(remote_fstypes[i], fstype) == 0) 593 return (B_TRUE); 594 } 595 return (B_FALSE); 596 } 597 598 /* 599 * This converts a zone root path (normally of the form .../root) to a Live 600 * Upgrade scratch zone root (of the form .../lu). 601 */ 602 static void 603 root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved) 604 { 605 if (!isresolved && zonecfg_in_alt_root()) 606 resolve_lofs(zlogp, zroot, zrootlen); 607 (void) strcpy(strrchr(zroot, '/') + 1, "lu"); 608 } 609 610 /* 611 * The general strategy for unmounting filesystems is as follows: 612 * 613 * - Remote filesystems may be dead, and attempting to contact them as 614 * part of a regular unmount may hang forever; we want to always try to 615 * forcibly unmount such filesystems and only fall back to regular 616 * unmounts if the filesystem doesn't support forced unmounts. 617 * 618 * - We don't want to unnecessarily corrupt metadata on local 619 * filesystems (ie UFS), so we want to start off with graceful unmounts, 620 * and only escalate to doing forced unmounts if we get stuck. 621 * 622 * We start off walking backwards through the mount table. This doesn't 623 * give us strict ordering but ensures that we try to unmount submounts 624 * first. We thus limit the number of failed umount2(2) calls. 625 * 626 * The mechanism for determining if we're stuck is to count the number 627 * of failed unmounts each iteration through the mount table. This 628 * gives us an upper bound on the number of filesystems which remain 629 * mounted (autofs trigger nodes are dealt with separately). If at the 630 * end of one unmount+autofs_cleanup cycle we still have the same number 631 * of mounts that we started out with, we're stuck and try a forced 632 * unmount. If that fails (filesystem doesn't support forced unmounts) 633 * then we bail and are unable to teardown the zone. If it succeeds, 634 * we're no longer stuck so we continue with our policy of trying 635 * graceful mounts first. 636 * 637 * Zone must be down (ie, no processes or threads active). 638 */ 639 static int 640 unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd) 641 { 642 int error = 0; 643 FILE *mnttab; 644 struct mnttab *mnts; 645 uint_t nmnt; 646 char zroot[MAXPATHLEN + 1]; 647 size_t zrootlen; 648 uint_t oldcount = UINT_MAX; 649 boolean_t stuck = B_FALSE; 650 char **remote_fstypes = NULL; 651 652 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 653 zerror(zlogp, B_FALSE, "unable to determine zone root"); 654 return (-1); 655 } 656 if (unmount_cmd) 657 root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE); 658 659 (void) strcat(zroot, "/"); 660 zrootlen = strlen(zroot); 661 662 /* 663 * For Trusted Extensions unmount each higher level zone's mount 664 * of our zone's /export/home 665 */ 666 if (!unmount_cmd) 667 tsol_unmounts(zlogp, zone_name); 668 669 if ((mnttab = fopen(MNTTAB, "r")) == NULL) { 670 zerror(zlogp, B_TRUE, "failed to open %s", MNTTAB); 671 return (-1); 672 } 673 /* 674 * Use our hacky mntfs ioctl so we see everything, even mounts with 675 * MS_NOMNTTAB. 676 */ 677 if (ioctl(fileno(mnttab), MNTIOC_SHOWHIDDEN, NULL) < 0) { 678 zerror(zlogp, B_TRUE, "unable to configure %s", MNTTAB); 679 error++; 680 goto out; 681 } 682 683 /* 684 * Build the list of remote fstypes so we know which ones we 685 * should forcibly unmount. 686 */ 687 remote_fstypes = get_remote_fstypes(zlogp); 688 for (; /* ever */; ) { 689 uint_t newcount = 0; 690 boolean_t unmounted; 691 struct mnttab *mnp; 692 char *path; 693 uint_t i; 694 695 mnts = NULL; 696 nmnt = 0; 697 /* 698 * MNTTAB gives us a way to walk through mounted 699 * filesystems; we need to be able to walk them in 700 * reverse order, so we build a list of all mounted 701 * filesystems. 702 */ 703 if (build_mnttable(zlogp, zroot, zrootlen, mnttab, &mnts, 704 &nmnt) != 0) { 705 error++; 706 goto out; 707 } 708 for (i = 0; i < nmnt; i++) { 709 mnp = &mnts[nmnt - i - 1]; /* access in reverse order */ 710 path = mnp->mnt_mountp; 711 unmounted = B_FALSE; 712 /* 713 * Try forced unmount first for remote filesystems. 714 * 715 * Not all remote filesystems support forced unmounts, 716 * so if this fails (ENOTSUP) we'll continue on 717 * and try a regular unmount. 718 */ 719 if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) { 720 if (umount2(path, MS_FORCE) == 0) 721 unmounted = B_TRUE; 722 } 723 /* 724 * Try forced unmount if we're stuck. 725 */ 726 if (stuck) { 727 if (umount2(path, MS_FORCE) == 0) { 728 unmounted = B_TRUE; 729 stuck = B_FALSE; 730 } else { 731 /* 732 * The first failure indicates a 733 * mount we won't be able to get 734 * rid of automatically, so we 735 * bail. 736 */ 737 error++; 738 zerror(zlogp, B_FALSE, 739 "unable to unmount '%s'", path); 740 free_mnttable(mnts, nmnt); 741 goto out; 742 } 743 } 744 /* 745 * Try regular unmounts for everything else. 746 */ 747 if (!unmounted && umount2(path, 0) != 0) 748 newcount++; 749 } 750 free_mnttable(mnts, nmnt); 751 752 if (newcount == 0) 753 break; 754 if (newcount >= oldcount) { 755 /* 756 * Last round didn't unmount anything; we're stuck and 757 * should start trying forced unmounts. 758 */ 759 stuck = B_TRUE; 760 } 761 oldcount = newcount; 762 763 /* 764 * Autofs doesn't let you unmount its trigger nodes from 765 * userland so we have to tell the kernel to cleanup for us. 766 */ 767 if (autofs_cleanup(zoneid) != 0) { 768 zerror(zlogp, B_TRUE, "unable to remove autofs nodes"); 769 error++; 770 goto out; 771 } 772 } 773 774 out: 775 free_remote_fstypes(remote_fstypes); 776 (void) fclose(mnttab); 777 return (error ? -1 : 0); 778 } 779 780 static int 781 fs_compare(const void *m1, const void *m2) 782 { 783 struct zone_fstab *i = (struct zone_fstab *)m1; 784 struct zone_fstab *j = (struct zone_fstab *)m2; 785 786 return (strcmp(i->zone_fs_dir, j->zone_fs_dir)); 787 } 788 789 /* 790 * Fork and exec (and wait for) the mentioned binary with the provided 791 * arguments. Returns (-1) if something went wrong with fork(2) or exec(2), 792 * returns the exit status otherwise. 793 * 794 * If we were unable to exec the provided pathname (for whatever 795 * reason), we return the special token ZEXIT_EXEC. The current value 796 * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the 797 * consumers of this function; any future consumers must make sure this 798 * remains the case. 799 */ 800 static int 801 forkexec(zlog_t *zlogp, const char *path, char *const argv[]) 802 { 803 pid_t child_pid; 804 int child_status = 0; 805 806 /* 807 * Do not let another thread localize a message while we are forking. 808 */ 809 (void) mutex_lock(&msglock); 810 child_pid = fork(); 811 (void) mutex_unlock(&msglock); 812 if (child_pid == -1) { 813 zerror(zlogp, B_TRUE, "could not fork for %s", argv[0]); 814 return (-1); 815 } else if (child_pid == 0) { 816 closefrom(0); 817 /* redirect stdin, stdout & stderr to /dev/null */ 818 (void) open("/dev/null", O_RDONLY); /* stdin */ 819 (void) open("/dev/null", O_WRONLY); /* stdout */ 820 (void) open("/dev/null", O_WRONLY); /* stderr */ 821 (void) execv(path, argv); 822 /* 823 * Since we are in the child, there is no point calling zerror() 824 * since there is nobody waiting to consume it. So exit with a 825 * special code that the parent will recognize and call zerror() 826 * accordingly. 827 */ 828 829 _exit(ZEXIT_EXEC); 830 } else { 831 (void) waitpid(child_pid, &child_status, 0); 832 } 833 834 if (WIFSIGNALED(child_status)) { 835 zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " 836 "signal %d", path, WTERMSIG(child_status)); 837 return (-1); 838 } 839 assert(WIFEXITED(child_status)); 840 if (WEXITSTATUS(child_status) == ZEXIT_EXEC) { 841 zerror(zlogp, B_FALSE, "failed to exec %s", path); 842 return (-1); 843 } 844 return (WEXITSTATUS(child_status)); 845 } 846 847 static int 848 isregfile(const char *path) 849 { 850 struct stat64 st; 851 852 if (stat64(path, &st) == -1) 853 return (-1); 854 855 return (S_ISREG(st.st_mode)); 856 } 857 858 static int 859 dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev) 860 { 861 char cmdbuf[MAXPATHLEN]; 862 char *argv[5]; 863 int status; 864 865 /* 866 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but 867 * that would cost us an extra fork/exec without buying us anything. 868 */ 869 if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype) 870 >= sizeof (cmdbuf)) { 871 zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); 872 return (-1); 873 } 874 875 /* 876 * If it doesn't exist, that's OK: we verified this previously 877 * in zoneadm. 878 */ 879 if (isregfile(cmdbuf) == -1) 880 return (0); 881 882 argv[0] = "fsck"; 883 argv[1] = "-o"; 884 argv[2] = "p"; 885 argv[3] = (char *)rawdev; 886 argv[4] = NULL; 887 888 status = forkexec(zlogp, cmdbuf, argv); 889 if (status == 0 || status == -1) 890 return (status); 891 zerror(zlogp, B_FALSE, "fsck of '%s' failed with exit status %d; " 892 "run fsck manually", rawdev, status); 893 return (-1); 894 } 895 896 static int 897 domount(zlog_t *zlogp, const char *fstype, const char *opts, 898 const char *special, const char *directory) 899 { 900 char cmdbuf[MAXPATHLEN]; 901 char *argv[6]; 902 int status; 903 904 /* 905 * We could alternatively have called /usr/sbin/mount -F <fstype>, but 906 * that would cost us an extra fork/exec without buying us anything. 907 */ 908 if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype) 909 >= sizeof (cmdbuf)) { 910 zerror(zlogp, B_FALSE, "file-system type %s too long", fstype); 911 return (-1); 912 } 913 argv[0] = "mount"; 914 if (opts[0] == '\0') { 915 argv[1] = (char *)special; 916 argv[2] = (char *)directory; 917 argv[3] = NULL; 918 } else { 919 argv[1] = "-o"; 920 argv[2] = (char *)opts; 921 argv[3] = (char *)special; 922 argv[4] = (char *)directory; 923 argv[5] = NULL; 924 } 925 926 status = forkexec(zlogp, cmdbuf, argv); 927 if (status == 0 || status == -1) 928 return (status); 929 if (opts[0] == '\0') 930 zerror(zlogp, B_FALSE, "\"%s %s %s\" " 931 "failed with exit code %d", 932 cmdbuf, special, directory, status); 933 else 934 zerror(zlogp, B_FALSE, "\"%s -o %s %s %s\" " 935 "failed with exit code %d", 936 cmdbuf, opts, special, directory, status); 937 return (-1); 938 } 939 940 /* 941 * Check if a given mount point path exists. 942 * If it does, make sure it doesn't contain any symlinks. 943 * Note that if "leaf" is false we're checking an intermediate 944 * component of the mount point path, so it must be a directory. 945 * If "leaf" is true, then we're checking the entire mount point 946 * path, so the mount point itself can be anything aside from a 947 * symbolic link. 948 * 949 * If the path is invalid then a negative value is returned. If the 950 * path exists and is a valid mount point path then 0 is returned. 951 * If the path doesn't exist return a positive value. 952 */ 953 static int 954 valid_mount_point(zlog_t *zlogp, const char *path, const boolean_t leaf) 955 { 956 struct stat statbuf; 957 char respath[MAXPATHLEN]; 958 int res; 959 960 if (lstat(path, &statbuf) != 0) { 961 if (errno == ENOENT) 962 return (1); 963 zerror(zlogp, B_TRUE, "can't stat %s", path); 964 return (-1); 965 } 966 if (S_ISLNK(statbuf.st_mode)) { 967 zerror(zlogp, B_FALSE, "%s is a symlink", path); 968 return (-1); 969 } 970 if (!leaf && !S_ISDIR(statbuf.st_mode)) { 971 zerror(zlogp, B_FALSE, "%s is not a directory", path); 972 return (-1); 973 } 974 if ((res = resolvepath(path, respath, sizeof (respath))) == -1) { 975 zerror(zlogp, B_TRUE, "unable to resolve path %s", path); 976 return (-1); 977 } 978 respath[res] = '\0'; 979 if (strcmp(path, respath) != 0) { 980 /* 981 * We don't like ".."s, "."s, or "//"s throwing us off 982 */ 983 zerror(zlogp, B_FALSE, "%s is not a canonical path", path); 984 return (-1); 985 } 986 return (0); 987 } 988 989 /* 990 * Validate a mount point path. A valid mount point path is an 991 * absolute path that either doesn't exist, or, if it does exists it 992 * must be an absolute canonical path that doesn't have any symbolic 993 * links in it. The target of a mount point path can be any filesystem 994 * object. (Different filesystems can support different mount points, 995 * for example "lofs" and "mntfs" both support files and directories 996 * while "ufs" just supports directories.) 997 * 998 * If the path is invalid then a negative value is returned. If the 999 * path exists and is a valid mount point path then 0 is returned. 1000 * If the path doesn't exist return a positive value. 1001 */ 1002 int 1003 valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *spec, 1004 const char *dir, const char *fstype) 1005 { 1006 char abspath[MAXPATHLEN], *slashp, *slashp_next; 1007 int rv; 1008 1009 /* 1010 * Sanity check the target mount point path. 1011 * It must be a non-null string that starts with a '/'. 1012 */ 1013 if (dir[0] != '/') { 1014 /* Something went wrong. */ 1015 zerror(zlogp, B_FALSE, "invalid mount directory, " 1016 "type: \"%s\", special: \"%s\", dir: \"%s\"", 1017 fstype, spec, dir); 1018 return (-1); 1019 } 1020 1021 /* 1022 * Join rootpath and dir. Make sure abspath ends with '/', this 1023 * is added to all paths (even non-directory paths) to allow us 1024 * to detect the end of paths below. If the path already ends 1025 * in a '/', then that's ok too (although we'll fail the 1026 * cannonical path check in valid_mount_point()). 1027 */ 1028 if (snprintf(abspath, sizeof (abspath), 1029 "%s%s/", rootpath, dir) >= sizeof (abspath)) { 1030 zerror(zlogp, B_FALSE, "pathname %s%s is too long", 1031 rootpath, dir); 1032 return (-1); 1033 } 1034 1035 /* 1036 * Starting with rootpath, verify the mount path one component 1037 * at a time. Continue until we've evaluated all of abspath. 1038 */ 1039 slashp = &abspath[strlen(rootpath)]; 1040 assert(*slashp == '/'); 1041 do { 1042 slashp_next = strchr(slashp + 1, '/'); 1043 *slashp = '\0'; 1044 if (slashp_next != NULL) { 1045 /* This is an intermediary mount path component. */ 1046 rv = valid_mount_point(zlogp, abspath, B_FALSE); 1047 } else { 1048 /* This is the last component of the mount path. */ 1049 rv = valid_mount_point(zlogp, abspath, B_TRUE); 1050 } 1051 if (rv < 0) 1052 return (rv); 1053 *slashp = '/'; 1054 } while ((slashp = slashp_next) != NULL); 1055 return (rv); 1056 } 1057 1058 static int 1059 mount_one_dev_device_cb(void *arg, const char *match, const char *name) 1060 { 1061 di_prof_t prof = arg; 1062 1063 if (name == NULL) 1064 return (di_prof_add_dev(prof, match)); 1065 return (di_prof_add_map(prof, match, name)); 1066 } 1067 1068 static int 1069 mount_one_dev_symlink_cb(void *arg, const char *source, const char *target) 1070 { 1071 di_prof_t prof = arg; 1072 1073 return (di_prof_add_symlink(prof, source, target)); 1074 } 1075 1076 int 1077 vplat_get_iptype(zlog_t *zlogp, zone_iptype_t *iptypep) 1078 { 1079 zone_dochandle_t handle; 1080 1081 if ((handle = zonecfg_init_handle()) == NULL) { 1082 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 1083 return (-1); 1084 } 1085 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 1086 zerror(zlogp, B_FALSE, "invalid configuration"); 1087 zonecfg_fini_handle(handle); 1088 return (-1); 1089 } 1090 if (zonecfg_get_iptype(handle, iptypep) != Z_OK) { 1091 zerror(zlogp, B_FALSE, "invalid ip-type configuration"); 1092 zonecfg_fini_handle(handle); 1093 return (-1); 1094 } 1095 zonecfg_fini_handle(handle); 1096 return (0); 1097 } 1098 1099 /* 1100 * Apply the standard lists of devices/symlinks/mappings and the user-specified 1101 * list of devices (via zonecfg) to the /dev filesystem. The filesystem will 1102 * use these as a profile/filter to determine what exists in /dev. 1103 */ 1104 static int 1105 mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd) 1106 { 1107 char brand[MAXNAMELEN]; 1108 zone_dochandle_t handle = NULL; 1109 brand_handle_t bh = NULL; 1110 struct zone_devtab ztab; 1111 di_prof_t prof = NULL; 1112 int err; 1113 int retval = -1; 1114 zone_iptype_t iptype; 1115 const char *curr_iptype; 1116 1117 if (di_prof_init(devpath, &prof)) { 1118 zerror(zlogp, B_TRUE, "failed to initialize profile"); 1119 goto cleanup; 1120 } 1121 1122 /* 1123 * Get a handle to the brand info for this zone. 1124 * If we are mounting the zone, then we must always use the default 1125 * brand device mounts. 1126 */ 1127 if (ALT_MOUNT(mount_cmd)) { 1128 (void) strlcpy(brand, default_brand, sizeof (brand)); 1129 } else { 1130 (void) strlcpy(brand, brand_name, sizeof (brand)); 1131 } 1132 1133 if ((bh = brand_open(brand)) == NULL) { 1134 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1135 goto cleanup; 1136 } 1137 1138 if (vplat_get_iptype(zlogp, &iptype) < 0) { 1139 zerror(zlogp, B_TRUE, "unable to determine ip-type"); 1140 goto cleanup; 1141 } 1142 switch (iptype) { 1143 case ZS_SHARED: 1144 curr_iptype = "shared"; 1145 break; 1146 case ZS_EXCLUSIVE: 1147 curr_iptype = "exclusive"; 1148 break; 1149 default: 1150 zerror(zlogp, B_FALSE, "bad ip-type"); 1151 goto cleanup; 1152 } 1153 1154 if (brand_platform_iter_devices(bh, zone_name, 1155 mount_one_dev_device_cb, prof, curr_iptype) != 0) { 1156 zerror(zlogp, B_TRUE, "failed to add standard device"); 1157 goto cleanup; 1158 } 1159 1160 if (brand_platform_iter_link(bh, 1161 mount_one_dev_symlink_cb, prof) != 0) { 1162 zerror(zlogp, B_TRUE, "failed to add standard symlink"); 1163 goto cleanup; 1164 } 1165 1166 /* Add user-specified devices and directories */ 1167 if ((handle = zonecfg_init_handle()) == NULL) { 1168 zerror(zlogp, B_FALSE, "can't initialize zone handle"); 1169 goto cleanup; 1170 } 1171 if ((err = zonecfg_get_handle(zone_name, handle)) != 0) { 1172 zerror(zlogp, B_FALSE, "can't get handle for zone " 1173 "%s: %s", zone_name, zonecfg_strerror(err)); 1174 goto cleanup; 1175 } 1176 if ((err = zonecfg_setdevent(handle)) != 0) { 1177 zerror(zlogp, B_FALSE, "%s: %s", zone_name, 1178 zonecfg_strerror(err)); 1179 goto cleanup; 1180 } 1181 while (zonecfg_getdevent(handle, &ztab) == Z_OK) { 1182 if (di_prof_add_dev(prof, ztab.zone_dev_match)) { 1183 zerror(zlogp, B_TRUE, "failed to add " 1184 "user-specified device"); 1185 goto cleanup; 1186 } 1187 } 1188 (void) zonecfg_enddevent(handle); 1189 1190 /* Send profile to kernel */ 1191 if (di_prof_commit(prof)) { 1192 zerror(zlogp, B_TRUE, "failed to commit profile"); 1193 goto cleanup; 1194 } 1195 1196 retval = 0; 1197 1198 cleanup: 1199 if (bh != NULL) 1200 brand_close(bh); 1201 if (handle != NULL) 1202 zonecfg_fini_handle(handle); 1203 if (prof) 1204 di_prof_fini(prof); 1205 return (retval); 1206 } 1207 1208 static int 1209 mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath, 1210 zone_mnt_t mount_cmd) 1211 { 1212 char path[MAXPATHLEN]; 1213 char optstr[MAX_MNTOPT_STR]; 1214 zone_fsopt_t *optptr; 1215 int rv; 1216 1217 if ((rv = valid_mount_path(zlogp, rootpath, fsptr->zone_fs_special, 1218 fsptr->zone_fs_dir, fsptr->zone_fs_type)) < 0) { 1219 zerror(zlogp, B_FALSE, "%s%s is not a valid mount point", 1220 rootpath, fsptr->zone_fs_dir); 1221 return (-1); 1222 } else if (rv > 0) { 1223 /* The mount point path doesn't exist, create it now. */ 1224 if (make_one_dir(zlogp, rootpath, fsptr->zone_fs_dir, 1225 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 1226 DEFAULT_DIR_GROUP) != 0) { 1227 zerror(zlogp, B_FALSE, "failed to create mount point"); 1228 return (-1); 1229 } 1230 1231 /* 1232 * Now this might seem weird, but we need to invoke 1233 * valid_mount_path() again. Why? Because it checks 1234 * to make sure that the mount point path is canonical, 1235 * which it can only do if the path exists, so now that 1236 * we've created the path we have to verify it again. 1237 */ 1238 if ((rv = valid_mount_path(zlogp, rootpath, 1239 fsptr->zone_fs_special, fsptr->zone_fs_dir, 1240 fsptr->zone_fs_type)) < 0) { 1241 zerror(zlogp, B_FALSE, 1242 "%s%s is not a valid mount point", 1243 rootpath, fsptr->zone_fs_dir); 1244 return (-1); 1245 } 1246 } 1247 1248 (void) snprintf(path, sizeof (path), "%s%s", rootpath, 1249 fsptr->zone_fs_dir); 1250 1251 /* 1252 * In general the strategy here is to do just as much verification as 1253 * necessary to avoid crashing or otherwise doing something bad; if the 1254 * administrator initiated the operation via zoneadm(8), they'll get 1255 * auto-verification which will let them know what's wrong. If they 1256 * modify the zone configuration of a running zone, and don't attempt 1257 * to verify that it's OK, then we won't crash but won't bother trying 1258 * to be too helpful either. zoneadm verify is only a couple keystrokes 1259 * away. 1260 */ 1261 if (!zonecfg_valid_fs_type(fsptr->zone_fs_type)) { 1262 zerror(zlogp, B_FALSE, "cannot mount %s on %s: " 1263 "invalid file-system type %s", fsptr->zone_fs_special, 1264 fsptr->zone_fs_dir, fsptr->zone_fs_type); 1265 return (-1); 1266 } 1267 1268 /* 1269 * If we're looking at an alternate root environment, then construct 1270 * read-only loopback mounts as necessary. Note that any special 1271 * paths for lofs zone mounts in an alternate root must have 1272 * already been pre-pended with any alternate root path by the 1273 * time we get here. 1274 */ 1275 if (zonecfg_in_alt_root()) { 1276 struct stat64 st; 1277 1278 if (stat64(fsptr->zone_fs_special, &st) != -1 && 1279 S_ISBLK(st.st_mode)) { 1280 /* 1281 * If we're going to mount a block device we need 1282 * to check if that device is already mounted 1283 * somewhere else, and if so, do a lofs mount 1284 * of the device instead of a direct mount 1285 */ 1286 if (check_lofs_needed(zlogp, fsptr) == -1) 1287 return (-1); 1288 } else if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) { 1289 /* 1290 * For lofs mounts, the special node is inside the 1291 * alternate root. We need lofs resolution for 1292 * this case in order to get at the underlying 1293 * read-write path. 1294 */ 1295 resolve_lofs(zlogp, fsptr->zone_fs_special, 1296 sizeof (fsptr->zone_fs_special)); 1297 } 1298 } 1299 1300 /* 1301 * Run 'fsck -m' if there's a device to fsck. 1302 */ 1303 if (fsptr->zone_fs_raw[0] != '\0' && 1304 dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_raw) != 0) { 1305 return (-1); 1306 } else if (isregfile(fsptr->zone_fs_special) == 1 && 1307 dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_special) != 0) { 1308 return (-1); 1309 } 1310 1311 /* 1312 * Build up mount option string. 1313 */ 1314 optstr[0] = '\0'; 1315 if (fsptr->zone_fs_options != NULL) { 1316 (void) strlcpy(optstr, fsptr->zone_fs_options->zone_fsopt_opt, 1317 sizeof (optstr)); 1318 for (optptr = fsptr->zone_fs_options->zone_fsopt_next; 1319 optptr != NULL; optptr = optptr->zone_fsopt_next) { 1320 (void) strlcat(optstr, ",", sizeof (optstr)); 1321 (void) strlcat(optstr, optptr->zone_fsopt_opt, 1322 sizeof (optstr)); 1323 } 1324 } 1325 1326 if ((rv = domount(zlogp, fsptr->zone_fs_type, optstr, 1327 fsptr->zone_fs_special, path)) != 0) 1328 return (rv); 1329 1330 /* 1331 * The mount succeeded. If this was not a mount of /dev then 1332 * we're done. 1333 */ 1334 if (strcmp(fsptr->zone_fs_type, MNTTYPE_DEV) != 0) 1335 return (0); 1336 1337 /* 1338 * We just mounted an instance of a /dev filesystem, so now we 1339 * need to configure it. 1340 */ 1341 return (mount_one_dev(zlogp, path, mount_cmd)); 1342 } 1343 1344 static void 1345 free_fs_data(struct zone_fstab *fsarray, uint_t nelem) 1346 { 1347 uint_t i; 1348 1349 if (fsarray == NULL) 1350 return; 1351 for (i = 0; i < nelem; i++) 1352 zonecfg_free_fs_option_list(fsarray[i].zone_fs_options); 1353 free(fsarray); 1354 } 1355 1356 /* 1357 * This function initiates the creation of a small Solaris Environment for 1358 * scratch zone. The Environment creation process is split up into two 1359 * functions(build_mounted_pre_var() and build_mounted_post_var()). It 1360 * is done this way because: 1361 * We need to have both /etc and /var in the root of the scratchzone. 1362 * We loopback mount zone's own /etc and /var into the root of the 1363 * scratch zone. Unlike /etc, /var can be a seperate filesystem. So we 1364 * need to delay the mount of /var till the zone's root gets populated. 1365 * So mounting of localdirs[](/etc and /var) have been moved to the 1366 * build_mounted_post_var() which gets called only after the zone 1367 * specific filesystems are mounted. 1368 * 1369 * Note that the scratch zone we set up for updating the zone (Z_MNT_UPDATE) 1370 * does not loopback mount the zone's own /etc and /var into the root of the 1371 * scratch zone. 1372 */ 1373 static boolean_t 1374 build_mounted_pre_var(zlog_t *zlogp, char *rootpath, 1375 size_t rootlen, const char *zonepath, char *luroot, size_t lurootlen) 1376 { 1377 char tmp[MAXPATHLEN], fromdir[MAXPATHLEN]; 1378 const char **cpp; 1379 static const char *mkdirs[] = { 1380 "/system", "/system/contract", "/system/object", "/proc", 1381 "/dev", "/tmp", "/a", NULL 1382 }; 1383 char *altstr; 1384 FILE *fp; 1385 uuid_t uuid; 1386 1387 resolve_lofs(zlogp, rootpath, rootlen); 1388 (void) snprintf(luroot, lurootlen, "%s/lu", zonepath); 1389 resolve_lofs(zlogp, luroot, lurootlen); 1390 (void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot); 1391 (void) symlink("./usr/bin", tmp); 1392 1393 /* 1394 * These are mostly special mount points; not handled here. (See 1395 * zone_mount_early.) 1396 */ 1397 for (cpp = mkdirs; *cpp != NULL; cpp++) { 1398 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1399 if (mkdir(tmp, 0755) != 0) { 1400 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1401 return (B_FALSE); 1402 } 1403 } 1404 /* 1405 * This is here to support lucopy. If there's an instance of this same 1406 * zone on the current running system, then we mount its root up as 1407 * read-only inside the scratch zone. 1408 */ 1409 (void) zonecfg_get_uuid(zone_name, uuid); 1410 altstr = strdup(zonecfg_get_root()); 1411 if (altstr == NULL) { 1412 zerror(zlogp, B_TRUE, "memory allocation failed"); 1413 return (B_FALSE); 1414 } 1415 zonecfg_set_root(""); 1416 (void) strlcpy(tmp, zone_name, sizeof (tmp)); 1417 (void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp)); 1418 if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK && 1419 strcmp(fromdir, rootpath) != 0) { 1420 (void) snprintf(tmp, sizeof (tmp), "%s/b", luroot); 1421 if (mkdir(tmp, 0755) != 0) { 1422 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1423 return (B_FALSE); 1424 } 1425 if (domount(zlogp, MNTTYPE_LOFS, RESOURCE_DEFAULT_OPTS, fromdir, 1426 tmp) != 0) { 1427 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1428 fromdir); 1429 return (B_FALSE); 1430 } 1431 } 1432 zonecfg_set_root(altstr); 1433 free(altstr); 1434 1435 if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) { 1436 zerror(zlogp, B_TRUE, "cannot open zone mapfile"); 1437 return (B_FALSE); 1438 } 1439 (void) ftruncate(fileno(fp), 0); 1440 if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) { 1441 zerror(zlogp, B_TRUE, "cannot add zone mapfile entry"); 1442 } 1443 zonecfg_close_scratch(fp); 1444 (void) snprintf(tmp, sizeof (tmp), "%s/a", luroot); 1445 if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0) 1446 return (B_FALSE); 1447 (void) strlcpy(rootpath, tmp, rootlen); 1448 return (B_TRUE); 1449 } 1450 1451 1452 static boolean_t 1453 build_mounted_post_var(zlog_t *zlogp, zone_mnt_t mount_cmd, char *rootpath, 1454 const char *luroot) 1455 { 1456 char tmp[MAXPATHLEN], fromdir[MAXPATHLEN]; 1457 const char **cpp; 1458 const char **loopdirs; 1459 const char **tmpdirs; 1460 static const char *localdirs[] = { 1461 "/etc", "/var", NULL 1462 }; 1463 static const char *scr_loopdirs[] = { 1464 "/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform", 1465 "/usr", NULL 1466 }; 1467 static const char *upd_loopdirs[] = { 1468 "/etc", "/kernel", "/lib", "/opt", "/platform", "/sbin", 1469 "/usr", "/var", NULL 1470 }; 1471 static const char *scr_tmpdirs[] = { 1472 "/tmp", "/var/run", NULL 1473 }; 1474 static const char *upd_tmpdirs[] = { 1475 "/tmp", "/var/run", "/var/tmp", NULL 1476 }; 1477 struct stat st; 1478 1479 if (mount_cmd == Z_MNT_SCRATCH) { 1480 /* 1481 * These are mounted read-write from the zone undergoing 1482 * upgrade. We must be careful not to 'leak' things from the 1483 * main system into the zone, and this accomplishes that goal. 1484 */ 1485 for (cpp = localdirs; *cpp != NULL; cpp++) { 1486 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, 1487 *cpp); 1488 (void) snprintf(fromdir, sizeof (fromdir), "%s%s", 1489 rootpath, *cpp); 1490 if (mkdir(tmp, 0755) != 0) { 1491 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1492 return (B_FALSE); 1493 } 1494 if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp) 1495 != 0) { 1496 zerror(zlogp, B_TRUE, "cannot mount %s on %s", 1497 tmp, *cpp); 1498 return (B_FALSE); 1499 } 1500 } 1501 } 1502 1503 if (mount_cmd == Z_MNT_UPDATE) 1504 loopdirs = upd_loopdirs; 1505 else 1506 loopdirs = scr_loopdirs; 1507 1508 /* 1509 * These are things mounted read-only from the running system because 1510 * they contain binaries that must match system. 1511 */ 1512 for (cpp = loopdirs; *cpp != NULL; cpp++) { 1513 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1514 if (mkdir(tmp, 0755) != 0) { 1515 if (errno != EEXIST) { 1516 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1517 return (B_FALSE); 1518 } 1519 if (lstat(tmp, &st) != 0) { 1520 zerror(zlogp, B_TRUE, "cannot stat %s", tmp); 1521 return (B_FALSE); 1522 } 1523 /* 1524 * Ignore any non-directories encountered. These are 1525 * things that have been converted into symlinks 1526 * (/etc/fs and /etc/lib) and no longer need a lofs 1527 * fixup. 1528 */ 1529 if (!S_ISDIR(st.st_mode)) 1530 continue; 1531 } 1532 if (domount(zlogp, MNTTYPE_LOFS, RESOURCE_DEFAULT_OPTS, *cpp, 1533 tmp) != 0) { 1534 zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp, 1535 *cpp); 1536 return (B_FALSE); 1537 } 1538 } 1539 1540 if (mount_cmd == Z_MNT_UPDATE) 1541 tmpdirs = upd_tmpdirs; 1542 else 1543 tmpdirs = scr_tmpdirs; 1544 1545 /* 1546 * These are things with tmpfs mounted inside. 1547 */ 1548 for (cpp = tmpdirs; *cpp != NULL; cpp++) { 1549 (void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp); 1550 if (mount_cmd == Z_MNT_SCRATCH && mkdir(tmp, 0755) != 0 && 1551 errno != EEXIST) { 1552 zerror(zlogp, B_TRUE, "cannot create %s", tmp); 1553 return (B_FALSE); 1554 } 1555 1556 /* 1557 * We could set the mode for /tmp when we do the mkdir but 1558 * since that can be modified by the umask we will just set 1559 * the correct mode for /tmp now. 1560 */ 1561 if (strcmp(*cpp, "/tmp") == 0 && chmod(tmp, 01777) != 0) { 1562 zerror(zlogp, B_TRUE, "cannot chmod %s", tmp); 1563 return (B_FALSE); 1564 } 1565 1566 if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) { 1567 zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp); 1568 return (B_FALSE); 1569 } 1570 } 1571 return (B_TRUE); 1572 } 1573 1574 typedef struct plat_gmount_cb_data { 1575 zlog_t *pgcd_zlogp; 1576 struct zone_fstab **pgcd_fs_tab; 1577 int *pgcd_num_fs; 1578 } plat_gmount_cb_data_t; 1579 1580 /* 1581 * plat_gmount_cb() is a callback function invoked by libbrand to iterate 1582 * through all global brand platform mounts. 1583 */ 1584 int 1585 plat_gmount_cb(void *data, const char *spec, const char *dir, 1586 const char *fstype, const char *opt) 1587 { 1588 plat_gmount_cb_data_t *cp = data; 1589 zlog_t *zlogp = cp->pgcd_zlogp; 1590 struct zone_fstab *fs_ptr = *cp->pgcd_fs_tab; 1591 int num_fs = *cp->pgcd_num_fs; 1592 struct zone_fstab *fsp, *tmp_ptr; 1593 1594 num_fs++; 1595 if ((tmp_ptr = realloc(fs_ptr, num_fs * sizeof (*tmp_ptr))) == NULL) { 1596 zerror(zlogp, B_TRUE, "memory allocation failed"); 1597 return (-1); 1598 } 1599 1600 fs_ptr = tmp_ptr; 1601 fsp = &fs_ptr[num_fs - 1]; 1602 1603 /* update the callback struct passed in */ 1604 *cp->pgcd_fs_tab = fs_ptr; 1605 *cp->pgcd_num_fs = num_fs; 1606 1607 fsp->zone_fs_raw[0] = '\0'; 1608 (void) strlcpy(fsp->zone_fs_special, spec, 1609 sizeof (fsp->zone_fs_special)); 1610 (void) strlcpy(fsp->zone_fs_dir, dir, sizeof (fsp->zone_fs_dir)); 1611 (void) strlcpy(fsp->zone_fs_type, fstype, sizeof (fsp->zone_fs_type)); 1612 fsp->zone_fs_options = NULL; 1613 if ((opt != NULL) && 1614 (zonecfg_add_fs_option(fsp, (char *)opt) != Z_OK)) { 1615 zerror(zlogp, B_FALSE, "error adding property"); 1616 return (-1); 1617 } 1618 1619 return (0); 1620 } 1621 1622 static int 1623 mount_filesystems_fsent(zone_dochandle_t handle, zlog_t *zlogp, 1624 struct zone_fstab **fs_tabp, int *num_fsp, zone_mnt_t mount_cmd) 1625 { 1626 struct zone_fstab *tmp_ptr, *fs_ptr, *fsp, fstab; 1627 int num_fs; 1628 1629 num_fs = *num_fsp; 1630 fs_ptr = *fs_tabp; 1631 1632 if (zonecfg_setfsent(handle) != Z_OK) { 1633 zerror(zlogp, B_FALSE, "invalid configuration"); 1634 return (-1); 1635 } 1636 while (zonecfg_getfsent(handle, &fstab) == Z_OK) { 1637 /* 1638 * ZFS filesystems will not be accessible under an alternate 1639 * root, since the pool will not be known. Ignore them in this 1640 * case. 1641 */ 1642 if (ALT_MOUNT(mount_cmd) && 1643 strcmp(fstab.zone_fs_type, MNTTYPE_ZFS) == 0) 1644 continue; 1645 1646 num_fs++; 1647 if ((tmp_ptr = realloc(fs_ptr, 1648 num_fs * sizeof (*tmp_ptr))) == NULL) { 1649 zerror(zlogp, B_TRUE, "memory allocation failed"); 1650 (void) zonecfg_endfsent(handle); 1651 return (-1); 1652 } 1653 /* update the pointers passed in */ 1654 *fs_tabp = tmp_ptr; 1655 *num_fsp = num_fs; 1656 1657 fs_ptr = tmp_ptr; 1658 fsp = &fs_ptr[num_fs - 1]; 1659 (void) strlcpy(fsp->zone_fs_dir, 1660 fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir)); 1661 (void) strlcpy(fsp->zone_fs_raw, fstab.zone_fs_raw, 1662 sizeof (fsp->zone_fs_raw)); 1663 (void) strlcpy(fsp->zone_fs_type, fstab.zone_fs_type, 1664 sizeof (fsp->zone_fs_type)); 1665 fsp->zone_fs_options = fstab.zone_fs_options; 1666 1667 /* 1668 * For all lofs mounts, make sure that the 'special' 1669 * entry points inside the alternate root. The 1670 * source path for a lofs mount in a given zone needs 1671 * to be relative to the root of the boot environment 1672 * that contains the zone. Note that we don't do this 1673 * for non-lofs mounts since they will have a device 1674 * as a backing store and device paths must always be 1675 * specified relative to the current boot environment. 1676 */ 1677 fsp->zone_fs_special[0] = '\0'; 1678 if (strcmp(fsp->zone_fs_type, MNTTYPE_LOFS) == 0) { 1679 (void) strlcat(fsp->zone_fs_special, zonecfg_get_root(), 1680 sizeof (fsp->zone_fs_special)); 1681 } 1682 (void) strlcat(fsp->zone_fs_special, fstab.zone_fs_special, 1683 sizeof (fsp->zone_fs_special)); 1684 } 1685 (void) zonecfg_endfsent(handle); 1686 return (0); 1687 } 1688 1689 static int 1690 mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd) 1691 { 1692 char rootpath[MAXPATHLEN]; 1693 char zonepath[MAXPATHLEN]; 1694 char brand[MAXNAMELEN]; 1695 char luroot[MAXPATHLEN]; 1696 int i, num_fs = 0; 1697 struct zone_fstab *fs_ptr = NULL; 1698 zone_dochandle_t handle = NULL; 1699 zone_state_t zstate; 1700 brand_handle_t bh; 1701 plat_gmount_cb_data_t cb; 1702 1703 if (zone_get_state(zone_name, &zstate) != Z_OK || 1704 (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) { 1705 zerror(zlogp, B_FALSE, 1706 "zone must be in '%s' or '%s' state to mount file-systems", 1707 zone_state_str(ZONE_STATE_READY), 1708 zone_state_str(ZONE_STATE_MOUNTED)); 1709 goto bad; 1710 } 1711 1712 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { 1713 zerror(zlogp, B_TRUE, "unable to determine zone path"); 1714 goto bad; 1715 } 1716 1717 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { 1718 zerror(zlogp, B_TRUE, "unable to determine zone root"); 1719 goto bad; 1720 } 1721 1722 if ((handle = zonecfg_init_handle()) == NULL) { 1723 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 1724 goto bad; 1725 } 1726 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK || 1727 zonecfg_setfsent(handle) != Z_OK) { 1728 zerror(zlogp, B_FALSE, "invalid configuration"); 1729 goto bad; 1730 } 1731 1732 /* 1733 * If we are mounting the zone, then we must always use the default 1734 * brand global mounts. 1735 */ 1736 if (ALT_MOUNT(mount_cmd)) { 1737 (void) strlcpy(brand, default_brand, sizeof (brand)); 1738 } else { 1739 (void) strlcpy(brand, brand_name, sizeof (brand)); 1740 } 1741 1742 /* Get a handle to the brand info for this zone */ 1743 if ((bh = brand_open(brand)) == NULL) { 1744 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 1745 zonecfg_fini_handle(handle); 1746 return (-1); 1747 } 1748 1749 /* 1750 * Get the list of global filesystems to mount from the brand 1751 * configuration. 1752 */ 1753 cb.pgcd_zlogp = zlogp; 1754 cb.pgcd_fs_tab = &fs_ptr; 1755 cb.pgcd_num_fs = &num_fs; 1756 if (brand_platform_iter_gmounts(bh, zone_name, zonepath, 1757 plat_gmount_cb, &cb) != 0) { 1758 zerror(zlogp, B_FALSE, "unable to mount filesystems"); 1759 brand_close(bh); 1760 zonecfg_fini_handle(handle); 1761 return (-1); 1762 } 1763 brand_close(bh); 1764 1765 /* 1766 * Iterate through the rest of the filesystems. Sort them all, 1767 * then mount them in sorted order. This is to make sure the 1768 * higher level directories (e.g., /usr) get mounted before 1769 * any beneath them (e.g., /usr/local). 1770 */ 1771 if (mount_filesystems_fsent(handle, zlogp, &fs_ptr, &num_fs, 1772 mount_cmd) != 0) 1773 goto bad; 1774 1775 zonecfg_fini_handle(handle); 1776 handle = NULL; 1777 1778 /* 1779 * Normally when we mount a zone all the zone filesystems 1780 * get mounted relative to rootpath, which is usually 1781 * <zonepath>/root. But when mounting a zone for administration 1782 * purposes via the zone "mount" state, build_mounted_pre_var() 1783 * updates rootpath to be <zonepath>/lu/a so we'll mount all 1784 * the zones filesystems there instead. 1785 * 1786 * build_mounted_pre_var() and build_mounted_post_var() will 1787 * also do some extra work to create directories and lofs mount 1788 * a bunch of global zone file system paths into <zonepath>/lu. 1789 * 1790 * This allows us to be able to enter the zone (now rooted at 1791 * <zonepath>/lu) and run the upgrade/patch tools that are in the 1792 * global zone and have them upgrade the to-be-modified zone's 1793 * files mounted on /a. (Which mirrors the existing standard 1794 * upgrade environment.) 1795 * 1796 * There is of course one catch. When doing the upgrade 1797 * we need <zoneroot>/lu/dev to be the /dev filesystem 1798 * for the zone and we don't want to have any /dev filesystem 1799 * mounted at <zoneroot>/lu/a/dev. Since /dev is specified 1800 * as a normal zone filesystem by default we'll try to mount 1801 * it at <zoneroot>/lu/a/dev, so we have to detect this 1802 * case and instead mount it at <zoneroot>/lu/dev. 1803 * 1804 * All this work is done in three phases: 1805 * 1) Create and populate lu directory (build_mounted_pre_var()). 1806 * 2) Mount the required filesystems as per the zone configuration. 1807 * 3) Set up the rest of the scratch zone environment 1808 * (build_mounted_post_var()). 1809 */ 1810 if (ALT_MOUNT(mount_cmd) && !build_mounted_pre_var(zlogp, 1811 rootpath, sizeof (rootpath), zonepath, luroot, sizeof (luroot))) 1812 goto bad; 1813 1814 qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare); 1815 1816 for (i = 0; i < num_fs; i++) { 1817 if (ALT_MOUNT(mount_cmd) && 1818 strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) { 1819 size_t slen = strlen(rootpath) - 2; 1820 1821 /* 1822 * By default we'll try to mount /dev as /a/dev 1823 * but /dev is special and always goes at the top 1824 * so strip the trailing '/a' from the rootpath. 1825 */ 1826 assert(strcmp(&rootpath[slen], "/a") == 0); 1827 rootpath[slen] = '\0'; 1828 if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd) 1829 != 0) 1830 goto bad; 1831 rootpath[slen] = '/'; 1832 continue; 1833 } 1834 if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd) != 0) 1835 goto bad; 1836 } 1837 if (ALT_MOUNT(mount_cmd) && 1838 !build_mounted_post_var(zlogp, mount_cmd, rootpath, luroot)) 1839 goto bad; 1840 1841 /* 1842 * For Trusted Extensions cross-mount each lower level /export/home 1843 */ 1844 if (mount_cmd == Z_MNT_BOOT && 1845 tsol_mounts(zlogp, zone_name, rootpath) != 0) 1846 goto bad; 1847 1848 free_fs_data(fs_ptr, num_fs); 1849 1850 /* 1851 * Everything looks fine. 1852 */ 1853 return (0); 1854 1855 bad: 1856 if (handle != NULL) 1857 zonecfg_fini_handle(handle); 1858 free_fs_data(fs_ptr, num_fs); 1859 return (-1); 1860 } 1861 1862 /* caller makes sure neither parameter is NULL */ 1863 static int 1864 addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr) 1865 { 1866 int prefixlen; 1867 1868 prefixlen = atoi(prefixstr); 1869 if (prefixlen < 0 || prefixlen > maxprefixlen) 1870 return (1); 1871 while (prefixlen > 0) { 1872 if (prefixlen >= 8) { 1873 *maskstr++ = 0xFF; 1874 prefixlen -= 8; 1875 continue; 1876 } 1877 *maskstr |= 1 << (8 - prefixlen); 1878 prefixlen--; 1879 } 1880 return (0); 1881 } 1882 1883 /* 1884 * Tear down all interfaces belonging to the given zone. This should 1885 * be called with the zone in a state other than "running", so that 1886 * interfaces can't be assigned to the zone after this returns. 1887 * 1888 * If anything goes wrong, log an error message and return an error. 1889 */ 1890 static int 1891 unconfigure_shared_network_interfaces(zlog_t *zlogp, zoneid_t zone_id) 1892 { 1893 struct lifnum lifn; 1894 struct lifconf lifc; 1895 struct lifreq *lifrp, lifrl; 1896 int64_t lifc_flags = LIFC_NOXMIT | LIFC_ALLZONES; 1897 int num_ifs, s, i, ret_code = 0; 1898 uint_t bufsize; 1899 char *buf = NULL; 1900 1901 if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { 1902 zerror(zlogp, B_TRUE, "could not get socket"); 1903 ret_code = -1; 1904 goto bad; 1905 } 1906 lifn.lifn_family = AF_UNSPEC; 1907 lifn.lifn_flags = (int)lifc_flags; 1908 if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) { 1909 zerror(zlogp, B_TRUE, 1910 "could not determine number of network interfaces"); 1911 ret_code = -1; 1912 goto bad; 1913 } 1914 num_ifs = lifn.lifn_count; 1915 bufsize = num_ifs * sizeof (struct lifreq); 1916 if ((buf = malloc(bufsize)) == NULL) { 1917 zerror(zlogp, B_TRUE, "memory allocation failed"); 1918 ret_code = -1; 1919 goto bad; 1920 } 1921 lifc.lifc_family = AF_UNSPEC; 1922 lifc.lifc_flags = (int)lifc_flags; 1923 lifc.lifc_len = bufsize; 1924 lifc.lifc_buf = buf; 1925 if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) { 1926 zerror(zlogp, B_TRUE, "could not get configured network " 1927 "interfaces"); 1928 ret_code = -1; 1929 goto bad; 1930 } 1931 lifrp = lifc.lifc_req; 1932 for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, lifrp++) { 1933 (void) close(s); 1934 if ((s = socket(lifrp->lifr_addr.ss_family, SOCK_DGRAM, 0)) < 1935 0) { 1936 zerror(zlogp, B_TRUE, "%s: could not get socket", 1937 lifrl.lifr_name); 1938 ret_code = -1; 1939 continue; 1940 } 1941 (void) memset(&lifrl, 0, sizeof (lifrl)); 1942 (void) strncpy(lifrl.lifr_name, lifrp->lifr_name, 1943 sizeof (lifrl.lifr_name)); 1944 if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifrl) < 0) { 1945 if (errno == ENXIO) 1946 /* 1947 * Interface may have been removed by admin or 1948 * another zone halting. 1949 */ 1950 continue; 1951 zerror(zlogp, B_TRUE, 1952 "%s: could not determine the zone to which this " 1953 "network interface is bound", lifrl.lifr_name); 1954 ret_code = -1; 1955 continue; 1956 } 1957 if (lifrl.lifr_zoneid == zone_id) { 1958 if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) { 1959 zerror(zlogp, B_TRUE, 1960 "%s: could not remove network interface", 1961 lifrl.lifr_name); 1962 ret_code = -1; 1963 continue; 1964 } 1965 } 1966 } 1967 bad: 1968 if (s > 0) 1969 (void) close(s); 1970 if (buf) 1971 free(buf); 1972 return (ret_code); 1973 } 1974 1975 static union sockunion { 1976 struct sockaddr sa; 1977 struct sockaddr_in sin; 1978 struct sockaddr_dl sdl; 1979 struct sockaddr_in6 sin6; 1980 } so_dst, so_ifp; 1981 1982 static struct { 1983 struct rt_msghdr hdr; 1984 char space[512]; 1985 } rtmsg; 1986 1987 static int 1988 salen(struct sockaddr *sa) 1989 { 1990 switch (sa->sa_family) { 1991 case AF_INET: 1992 return (sizeof (struct sockaddr_in)); 1993 case AF_LINK: 1994 return (sizeof (struct sockaddr_dl)); 1995 case AF_INET6: 1996 return (sizeof (struct sockaddr_in6)); 1997 default: 1998 return (sizeof (struct sockaddr)); 1999 } 2000 } 2001 2002 #define ROUNDUP_LONG(a) \ 2003 ((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long)) 2004 2005 /* 2006 * Look up which zone is using a given IP address. The address in question 2007 * is expected to have been stuffed into the structure to which lifr points 2008 * via a previous SIOCGLIFADDR ioctl(). 2009 * 2010 * This is done using black router socket magic. 2011 * 2012 * Return the name of the zone on success or NULL on failure. 2013 * 2014 * This is a lot of code for a simple task; a new ioctl request to take care 2015 * of this might be a useful RFE. 2016 */ 2017 2018 static char * 2019 who_is_using(zlog_t *zlogp, struct lifreq *lifr) 2020 { 2021 static char answer[ZONENAME_MAX]; 2022 pid_t pid; 2023 int s, rlen, l, i; 2024 char *cp = rtmsg.space; 2025 struct sockaddr_dl *ifp = NULL; 2026 struct sockaddr *sa; 2027 char save_if_name[LIFNAMSIZ]; 2028 2029 answer[0] = '\0'; 2030 2031 pid = getpid(); 2032 if ((s = socket(PF_ROUTE, SOCK_RAW, 0)) < 0) { 2033 zerror(zlogp, B_TRUE, "could not get routing socket"); 2034 return (NULL); 2035 } 2036 2037 if (lifr->lifr_addr.ss_family == AF_INET) { 2038 struct sockaddr_in *sin4; 2039 2040 so_dst.sa.sa_family = AF_INET; 2041 sin4 = (struct sockaddr_in *)&lifr->lifr_addr; 2042 so_dst.sin.sin_addr = sin4->sin_addr; 2043 } else { 2044 struct sockaddr_in6 *sin6; 2045 2046 so_dst.sa.sa_family = AF_INET6; 2047 sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr; 2048 so_dst.sin6.sin6_addr = sin6->sin6_addr; 2049 } 2050 2051 so_ifp.sa.sa_family = AF_LINK; 2052 2053 (void) memset(&rtmsg, 0, sizeof (rtmsg)); 2054 rtmsg.hdr.rtm_type = RTM_GET; 2055 rtmsg.hdr.rtm_flags = RTF_UP | RTF_HOST; 2056 rtmsg.hdr.rtm_version = RTM_VERSION; 2057 rtmsg.hdr.rtm_seq = ++rts_seqno; 2058 rtmsg.hdr.rtm_addrs = RTA_IFP | RTA_DST; 2059 2060 l = ROUNDUP_LONG(salen(&so_dst.sa)); 2061 (void) memmove(cp, &(so_dst), l); 2062 cp += l; 2063 l = ROUNDUP_LONG(salen(&so_ifp.sa)); 2064 (void) memmove(cp, &(so_ifp), l); 2065 cp += l; 2066 2067 rtmsg.hdr.rtm_msglen = l = cp - (char *)&rtmsg; 2068 2069 if ((rlen = write(s, &rtmsg, l)) < 0) { 2070 zerror(zlogp, B_TRUE, "writing to routing socket"); 2071 return (NULL); 2072 } else if (rlen < (int)rtmsg.hdr.rtm_msglen) { 2073 zerror(zlogp, B_TRUE, 2074 "write to routing socket got only %d for len\n", rlen); 2075 return (NULL); 2076 } 2077 do { 2078 l = read(s, &rtmsg, sizeof (rtmsg)); 2079 } while (l > 0 && (rtmsg.hdr.rtm_seq != rts_seqno || 2080 rtmsg.hdr.rtm_pid != pid)); 2081 if (l < 0) { 2082 zerror(zlogp, B_TRUE, "reading from routing socket"); 2083 return (NULL); 2084 } 2085 2086 if (rtmsg.hdr.rtm_version != RTM_VERSION) { 2087 zerror(zlogp, B_FALSE, 2088 "routing message version %d not understood", 2089 rtmsg.hdr.rtm_version); 2090 return (NULL); 2091 } 2092 if (rtmsg.hdr.rtm_msglen != (ushort_t)l) { 2093 zerror(zlogp, B_FALSE, "message length mismatch, " 2094 "expected %d bytes, returned %d bytes", 2095 rtmsg.hdr.rtm_msglen, l); 2096 return (NULL); 2097 } 2098 if (rtmsg.hdr.rtm_errno != 0) { 2099 errno = rtmsg.hdr.rtm_errno; 2100 zerror(zlogp, B_TRUE, "RTM_GET routing socket message"); 2101 return (NULL); 2102 } 2103 if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) { 2104 zerror(zlogp, B_FALSE, "network interface not found"); 2105 return (NULL); 2106 } 2107 cp = ((char *)(&rtmsg.hdr + 1)); 2108 for (i = 1; i != 0; i <<= 1) { 2109 /* LINTED E_BAD_PTR_CAST_ALIGN */ 2110 sa = (struct sockaddr *)cp; 2111 if (i != RTA_IFP) { 2112 if ((i & rtmsg.hdr.rtm_addrs) != 0) 2113 cp += ROUNDUP_LONG(salen(sa)); 2114 continue; 2115 } 2116 if (sa->sa_family == AF_LINK && 2117 ((struct sockaddr_dl *)sa)->sdl_nlen != 0) 2118 ifp = (struct sockaddr_dl *)sa; 2119 break; 2120 } 2121 if (ifp == NULL) { 2122 zerror(zlogp, B_FALSE, "network interface could not be " 2123 "determined"); 2124 return (NULL); 2125 } 2126 2127 /* 2128 * We need to set the I/F name to what we got above, then do the 2129 * appropriate ioctl to get its zone name. But lifr->lifr_name is 2130 * used by the calling function to do a REMOVEIF, so if we leave the 2131 * "good" zone's I/F name in place, *that* I/F will be removed instead 2132 * of the bad one. So we save the old (bad) I/F name before over- 2133 * writing it and doing the ioctl, then restore it after the ioctl. 2134 */ 2135 (void) strlcpy(save_if_name, lifr->lifr_name, sizeof (save_if_name)); 2136 (void) strncpy(lifr->lifr_name, ifp->sdl_data, ifp->sdl_nlen); 2137 lifr->lifr_name[ifp->sdl_nlen] = '\0'; 2138 i = ioctl(s, SIOCGLIFZONE, lifr); 2139 (void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name)); 2140 if (i < 0) { 2141 zerror(zlogp, B_TRUE, 2142 "%s: could not determine the zone network interface " 2143 "belongs to", lifr->lifr_name); 2144 return (NULL); 2145 } 2146 if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0) 2147 (void) snprintf(answer, sizeof (answer), "%d", 2148 lifr->lifr_zoneid); 2149 2150 if (strlen(answer) > 0) 2151 return (answer); 2152 return (NULL); 2153 } 2154 2155 /* 2156 * Configures a single interface: a new virtual interface is added, based on 2157 * the physical interface nwiftabptr->zone_nwif_physical, with the address 2158 * specified in nwiftabptr->zone_nwif_address, for zone zone_id. Note that 2159 * the "address" can be an IPv6 address (with a /prefixlength required), an 2160 * IPv4 address (with a /prefixlength optional), or a name; for the latter, 2161 * an IPv4 name-to-address resolution will be attempted. 2162 * 2163 * If anything goes wrong, we log an detailed error message, attempt to tear 2164 * down whatever we set up and return an error. 2165 */ 2166 static int 2167 configure_one_interface(zlog_t *zlogp, zoneid_t zone_id, 2168 struct zone_nwiftab *nwiftabptr) 2169 { 2170 struct lifreq lifr; 2171 struct sockaddr_in netmask4; 2172 struct sockaddr_in6 netmask6; 2173 struct sockaddr_storage laddr; 2174 struct in_addr in4; 2175 sa_family_t af; 2176 char *slashp = strchr(nwiftabptr->zone_nwif_address, '/'); 2177 int s; 2178 boolean_t got_netmask = B_FALSE; 2179 boolean_t is_loopback = B_FALSE; 2180 char addrstr4[INET_ADDRSTRLEN]; 2181 int res; 2182 2183 res = zonecfg_valid_net_address(nwiftabptr->zone_nwif_address, &lifr); 2184 if (res != Z_OK) { 2185 zerror(zlogp, B_FALSE, "%s: %s", zonecfg_strerror(res), 2186 nwiftabptr->zone_nwif_address); 2187 return (-1); 2188 } 2189 af = lifr.lifr_addr.ss_family; 2190 if (af == AF_INET) 2191 in4 = ((struct sockaddr_in *)(&lifr.lifr_addr))->sin_addr; 2192 if ((s = socket(af, SOCK_DGRAM, 0)) < 0) { 2193 zerror(zlogp, B_TRUE, "could not get socket"); 2194 return (-1); 2195 } 2196 2197 /* 2198 * This is a similar kind of "hack" like in addif() to get around 2199 * the problem of SIOCLIFADDIF. The problem is that this ioctl 2200 * does not include the netmask when adding a logical interface. 2201 * To get around this problem, we first add the logical interface 2202 * with a 0 address. After that, we set the netmask if provided. 2203 * Finally we set the interface address. 2204 */ 2205 laddr = lifr.lifr_addr; 2206 (void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical, 2207 sizeof (lifr.lifr_name)); 2208 (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr)); 2209 2210 if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) { 2211 /* 2212 * Here, we know that the interface can't be brought up. 2213 * A similar warning message was already printed out to 2214 * the console by zoneadm(8) so instead we log the 2215 * message to syslog and continue. 2216 */ 2217 zerror(&logsys, B_TRUE, "WARNING: skipping network interface " 2218 "'%s' which may not be present/plumbed in the " 2219 "global zone.", lifr.lifr_name); 2220 (void) close(s); 2221 return (Z_OK); 2222 } 2223 2224 /* Preserve literal IPv4 address for later potential printing. */ 2225 if (af == AF_INET) 2226 (void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN); 2227 2228 lifr.lifr_zoneid = zone_id; 2229 if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) { 2230 zerror(zlogp, B_TRUE, "%s: could not place network interface " 2231 "into zone", lifr.lifr_name); 2232 goto bad; 2233 } 2234 2235 /* 2236 * Loopback interface will use the default netmask assigned, if no 2237 * netmask is found. 2238 */ 2239 if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) { 2240 is_loopback = B_TRUE; 2241 } 2242 if (af == AF_INET) { 2243 /* 2244 * The IPv4 netmask can be determined either 2245 * directly if a prefix length was supplied with 2246 * the address or via the netmasks database. Not 2247 * being able to determine it is a common failure, 2248 * but it often is not fatal to operation of the 2249 * interface. In that case, a warning will be 2250 * printed after the rest of the interface's 2251 * parameters have been configured. 2252 */ 2253 (void) memset(&netmask4, 0, sizeof (netmask4)); 2254 if (slashp != NULL) { 2255 if (addr2netmask(slashp + 1, V4_ADDR_LEN, 2256 (uchar_t *)&netmask4.sin_addr) != 0) { 2257 *slashp = '/'; 2258 zerror(zlogp, B_FALSE, 2259 "%s: invalid prefix length in %s", 2260 lifr.lifr_name, 2261 nwiftabptr->zone_nwif_address); 2262 goto bad; 2263 } 2264 got_netmask = B_TRUE; 2265 } else if (getnetmaskbyaddr(in4, 2266 &netmask4.sin_addr) == 0) { 2267 got_netmask = B_TRUE; 2268 } 2269 if (got_netmask) { 2270 netmask4.sin_family = af; 2271 (void) memcpy(&lifr.lifr_addr, &netmask4, 2272 sizeof (netmask4)); 2273 } 2274 } else { 2275 (void) memset(&netmask6, 0, sizeof (netmask6)); 2276 if (addr2netmask(slashp + 1, V6_ADDR_LEN, 2277 (uchar_t *)&netmask6.sin6_addr) != 0) { 2278 *slashp = '/'; 2279 zerror(zlogp, B_FALSE, 2280 "%s: invalid prefix length in %s", 2281 lifr.lifr_name, 2282 nwiftabptr->zone_nwif_address); 2283 goto bad; 2284 } 2285 got_netmask = B_TRUE; 2286 netmask6.sin6_family = af; 2287 (void) memcpy(&lifr.lifr_addr, &netmask6, 2288 sizeof (netmask6)); 2289 } 2290 if (got_netmask && 2291 ioctl(s, SIOCSLIFNETMASK, (caddr_t)&lifr) < 0) { 2292 zerror(zlogp, B_TRUE, "%s: could not set netmask", 2293 lifr.lifr_name); 2294 goto bad; 2295 } 2296 2297 /* Set the interface address */ 2298 lifr.lifr_addr = laddr; 2299 if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) { 2300 zerror(zlogp, B_TRUE, 2301 "%s: could not set IP address to %s", 2302 lifr.lifr_name, nwiftabptr->zone_nwif_address); 2303 goto bad; 2304 } 2305 2306 if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { 2307 zerror(zlogp, B_TRUE, "%s: could not get flags", 2308 lifr.lifr_name); 2309 goto bad; 2310 } 2311 lifr.lifr_flags |= IFF_UP; 2312 if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { 2313 int save_errno = errno; 2314 char *zone_using; 2315 2316 /* 2317 * If we failed with something other than EADDRNOTAVAIL, 2318 * then skip to the end. Otherwise, look up our address, 2319 * then call a function to determine which zone is already 2320 * using that address. 2321 */ 2322 if (errno != EADDRNOTAVAIL) { 2323 zerror(zlogp, B_TRUE, 2324 "%s: could not bring network interface up", 2325 lifr.lifr_name); 2326 goto bad; 2327 } 2328 if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) { 2329 zerror(zlogp, B_TRUE, "%s: could not get address", 2330 lifr.lifr_name); 2331 goto bad; 2332 } 2333 zone_using = who_is_using(zlogp, &lifr); 2334 errno = save_errno; 2335 if (zone_using == NULL) 2336 zerror(zlogp, B_TRUE, 2337 "%s: could not bring network interface up", 2338 lifr.lifr_name); 2339 else 2340 zerror(zlogp, B_TRUE, "%s: could not bring network " 2341 "interface up: address in use by zone '%s'", 2342 lifr.lifr_name, zone_using); 2343 goto bad; 2344 } 2345 2346 if (!got_netmask && !is_loopback) { 2347 /* 2348 * A common, but often non-fatal problem, is that the system 2349 * cannot find the netmask for an interface address. This is 2350 * often caused by it being only in /etc/inet/netmasks, but 2351 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not 2352 * in that. This doesn't show up at boot because the netmask 2353 * is obtained from /etc/inet/netmasks when no network 2354 * interfaces are up, but isn't consulted when NIS/NIS+ is 2355 * available. We warn the user here that something like this 2356 * has happened and we're just running with a default and 2357 * possible incorrect netmask. 2358 */ 2359 char buffer[INET6_ADDRSTRLEN]; 2360 void *addr; 2361 const char *nomatch = "no matching subnet found in netmasks(5)"; 2362 2363 if (af == AF_INET) 2364 addr = &((struct sockaddr_in *) 2365 (&lifr.lifr_addr))->sin_addr; 2366 else 2367 addr = &((struct sockaddr_in6 *) 2368 (&lifr.lifr_addr))->sin6_addr; 2369 2370 /* 2371 * Find out what netmask the interface is going to be using. 2372 * If we just brought up an IPMP data address on an underlying 2373 * interface above, the address will have already migrated, so 2374 * the SIOCGLIFNETMASK won't be able to find it (but we need 2375 * to bring the address up to get the actual netmask). Just 2376 * omit printing the actual netmask in this corner-case. 2377 */ 2378 if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 || 2379 inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL) { 2380 zerror(zlogp, B_FALSE, "WARNING: %s; using default.", 2381 nomatch); 2382 } else { 2383 zerror(zlogp, B_FALSE, 2384 "WARNING: %s: %s: %s; using default of %s.", 2385 lifr.lifr_name, nomatch, addrstr4, buffer); 2386 } 2387 } 2388 2389 /* 2390 * If a default router was specified for this interface 2391 * set the route now. Ignore if already set. 2392 */ 2393 if (strlen(nwiftabptr->zone_nwif_defrouter) > 0) { 2394 int status; 2395 char *argv[7]; 2396 2397 argv[0] = "route"; 2398 argv[1] = "add"; 2399 argv[2] = "-ifp"; 2400 argv[3] = nwiftabptr->zone_nwif_physical; 2401 argv[4] = "default"; 2402 argv[5] = nwiftabptr->zone_nwif_defrouter; 2403 argv[6] = NULL; 2404 2405 status = forkexec(zlogp, "/usr/sbin/route", argv); 2406 if (status != 0 && status != EEXIST) 2407 zerror(zlogp, B_FALSE, "Unable to set route for " 2408 "interface %s to %s\n", 2409 nwiftabptr->zone_nwif_physical, 2410 nwiftabptr->zone_nwif_defrouter); 2411 } 2412 2413 (void) close(s); 2414 return (Z_OK); 2415 bad: 2416 (void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr); 2417 (void) close(s); 2418 return (-1); 2419 } 2420 2421 /* 2422 * Sets up network interfaces based on information from the zone configuration. 2423 * IPv4 and IPv6 loopback interfaces are set up "for free", modeling the global 2424 * system. 2425 * 2426 * If anything goes wrong, we log a general error message, attempt to tear down 2427 * whatever we set up, and return an error. 2428 */ 2429 static int 2430 configure_shared_network_interfaces(zlog_t *zlogp) 2431 { 2432 zone_dochandle_t handle; 2433 struct zone_nwiftab nwiftab, loopback_iftab; 2434 zoneid_t zoneid; 2435 2436 if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) { 2437 zerror(zlogp, B_TRUE, "unable to get zoneid"); 2438 return (-1); 2439 } 2440 2441 if ((handle = zonecfg_init_handle()) == NULL) { 2442 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2443 return (-1); 2444 } 2445 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2446 zerror(zlogp, B_FALSE, "invalid configuration"); 2447 zonecfg_fini_handle(handle); 2448 return (-1); 2449 } 2450 if (zonecfg_setnwifent(handle) == Z_OK) { 2451 for (;;) { 2452 if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK) 2453 break; 2454 if (configure_one_interface(zlogp, zoneid, &nwiftab) != 2455 Z_OK) { 2456 (void) zonecfg_endnwifent(handle); 2457 zonecfg_fini_handle(handle); 2458 return (-1); 2459 } 2460 } 2461 (void) zonecfg_endnwifent(handle); 2462 } 2463 zonecfg_fini_handle(handle); 2464 if (is_system_labeled()) { 2465 /* 2466 * Labeled zones share the loopback interface 2467 * so it is not plumbed for shared stack instances. 2468 */ 2469 return (0); 2470 } 2471 (void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0", 2472 sizeof (loopback_iftab.zone_nwif_physical)); 2473 (void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1", 2474 sizeof (loopback_iftab.zone_nwif_address)); 2475 loopback_iftab.zone_nwif_defrouter[0] = '\0'; 2476 if (configure_one_interface(zlogp, zoneid, &loopback_iftab) != Z_OK) 2477 return (-1); 2478 2479 /* Always plumb up the IPv6 loopback interface. */ 2480 (void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128", 2481 sizeof (loopback_iftab.zone_nwif_address)); 2482 if (configure_one_interface(zlogp, zoneid, &loopback_iftab) != Z_OK) 2483 return (-1); 2484 return (0); 2485 } 2486 2487 static void 2488 zdlerror(zlog_t *zlogp, dladm_status_t err, const char *dlname, const char *str) 2489 { 2490 char errmsg[DLADM_STRSIZE]; 2491 2492 (void) dladm_status2str(err, errmsg); 2493 zerror(zlogp, B_FALSE, "%s '%s': %s", str, dlname, errmsg); 2494 } 2495 2496 static int 2497 add_datalink(zlog_t *zlogp, char *zone_name, datalink_id_t linkid, char *dlname) 2498 { 2499 dladm_status_t err; 2500 boolean_t cpuset, poolset; 2501 char *poolp; 2502 2503 /* First check if it's in use by global zone. */ 2504 if (zonecfg_ifname_exists(AF_INET, dlname) || 2505 zonecfg_ifname_exists(AF_INET6, dlname)) { 2506 zerror(zlogp, B_FALSE, "WARNING: skipping network interface " 2507 "'%s' which is used in the global zone", dlname); 2508 return (-1); 2509 } 2510 2511 /* Set zoneid of this link. */ 2512 err = dladm_set_linkprop(dld_handle, linkid, "zone", &zone_name, 1, 2513 DLADM_OPT_ACTIVE); 2514 if (err != DLADM_STATUS_OK) { 2515 zdlerror(zlogp, err, dlname, 2516 "WARNING: unable to add network interface"); 2517 return (-1); 2518 } 2519 2520 /* 2521 * Set the pool of this link if the zone has a pool and 2522 * neither the cpus nor the pool datalink property is 2523 * already set. 2524 */ 2525 err = dladm_linkprop_is_set(dld_handle, linkid, DLADM_PROP_VAL_CURRENT, 2526 "cpus", &cpuset); 2527 if (err != DLADM_STATUS_OK) { 2528 zdlerror(zlogp, err, dlname, 2529 "WARNING: unable to check if cpus link property is set"); 2530 } 2531 err = dladm_linkprop_is_set(dld_handle, linkid, DLADM_PROP_VAL_CURRENT, 2532 "pool", &poolset); 2533 if (err != DLADM_STATUS_OK) { 2534 zdlerror(zlogp, err, dlname, 2535 "WARNING: unable to check if pool link property is set"); 2536 } 2537 2538 if ((strlen(pool_name) != 0) && !cpuset && !poolset) { 2539 poolp = pool_name; 2540 err = dladm_set_linkprop(dld_handle, linkid, "pool", 2541 &poolp, 1, DLADM_OPT_ACTIVE); 2542 if (err != DLADM_STATUS_OK) { 2543 zerror(zlogp, B_FALSE, "WARNING: unable to set " 2544 "pool %s to datalink %s", pool_name, dlname); 2545 bzero(pool_name, sizeof (pool_name)); 2546 } 2547 } else { 2548 bzero(pool_name, sizeof (pool_name)); 2549 } 2550 return (0); 2551 } 2552 2553 static boolean_t 2554 sockaddr_to_str(sa_family_t af, const struct sockaddr *sockaddr, 2555 char *straddr, size_t len) 2556 { 2557 struct sockaddr_in *sin; 2558 struct sockaddr_in6 *sin6; 2559 const char *str = NULL; 2560 2561 if (af == AF_INET) { 2562 /* LINTED E_BAD_PTR_CAST_ALIGN */ 2563 sin = SIN(sockaddr); 2564 str = inet_ntop(AF_INET, (void *)&sin->sin_addr, straddr, len); 2565 } else if (af == AF_INET6) { 2566 /* LINTED E_BAD_PTR_CAST_ALIGN */ 2567 sin6 = SIN6(sockaddr); 2568 str = inet_ntop(AF_INET6, (void *)&sin6->sin6_addr, straddr, 2569 len); 2570 } 2571 2572 return (str != NULL); 2573 } 2574 2575 static int 2576 ipv4_prefixlen(struct sockaddr_in *sin) 2577 { 2578 struct sockaddr_in *m; 2579 struct sockaddr_storage mask; 2580 2581 m = SIN(&mask); 2582 m->sin_family = AF_INET; 2583 if (getnetmaskbyaddr(sin->sin_addr, &m->sin_addr) == 0) { 2584 return (mask2plen((struct sockaddr *)&mask)); 2585 } else if (IN_CLASSA(htonl(sin->sin_addr.s_addr))) { 2586 return (8); 2587 } else if (IN_CLASSB(ntohl(sin->sin_addr.s_addr))) { 2588 return (16); 2589 } else if (IN_CLASSC(ntohl(sin->sin_addr.s_addr))) { 2590 return (24); 2591 } 2592 return (0); 2593 } 2594 2595 static int 2596 zone_setattr_network(int type, zoneid_t zoneid, datalink_id_t linkid, 2597 void *buf, size_t bufsize) 2598 { 2599 zone_net_data_t *zndata; 2600 size_t znsize; 2601 int err; 2602 2603 znsize = sizeof (*zndata) + bufsize; 2604 zndata = calloc(1, znsize); 2605 if (zndata == NULL) 2606 return (ENOMEM); 2607 zndata->zn_type = type; 2608 zndata->zn_len = bufsize; 2609 zndata->zn_linkid = linkid; 2610 bcopy(buf, zndata->zn_val, zndata->zn_len); 2611 err = zone_setattr(zoneid, ZONE_ATTR_NETWORK, zndata, znsize); 2612 free(zndata); 2613 return (err); 2614 } 2615 2616 static int 2617 add_net_for_linkid(zlog_t *zlogp, zoneid_t zoneid, zone_addr_list_t *start) 2618 { 2619 struct lifreq lifr; 2620 char **astr, *address; 2621 dladm_status_t dlstatus; 2622 char *ip_nospoof = "ip-nospoof"; 2623 int nnet, naddr, err = 0, j; 2624 size_t zlen, cpleft; 2625 zone_addr_list_t *ptr, *end; 2626 char tmp[INET6_ADDRSTRLEN], *maskstr; 2627 char *zaddr, *cp; 2628 struct in6_addr *routes = NULL; 2629 boolean_t is_set; 2630 datalink_id_t linkid; 2631 2632 assert(start != NULL); 2633 naddr = 0; /* number of addresses */ 2634 nnet = 0; /* number of net resources */ 2635 linkid = start->za_linkid; 2636 for (ptr = start; ptr != NULL && ptr->za_linkid == linkid; 2637 ptr = ptr->za_next) { 2638 nnet++; 2639 } 2640 end = ptr; 2641 zlen = nnet * (INET6_ADDRSTRLEN + 1); 2642 astr = calloc(1, nnet * sizeof (uintptr_t)); 2643 zaddr = calloc(1, zlen); 2644 if (astr == NULL || zaddr == NULL) { 2645 err = ENOMEM; 2646 goto done; 2647 } 2648 cp = zaddr; 2649 cpleft = zlen; 2650 j = 0; 2651 for (ptr = start; ptr != end; ptr = ptr->za_next) { 2652 address = ptr->za_nwiftab.zone_nwif_allowed_address; 2653 if (address[0] == '\0') 2654 continue; 2655 (void) snprintf(tmp, sizeof (tmp), "%s", address); 2656 /* 2657 * Validate the data. zonecfg_valid_net_address() clobbers 2658 * the /<mask> in the address string. 2659 */ 2660 if (zonecfg_valid_net_address(address, &lifr) != Z_OK) { 2661 zerror(zlogp, B_FALSE, "invalid address [%s]\n", 2662 address); 2663 err = EINVAL; 2664 goto done; 2665 } 2666 /* 2667 * convert any hostnames to numeric address strings. 2668 */ 2669 if (!sockaddr_to_str(lifr.lifr_addr.ss_family, 2670 (const struct sockaddr *)&lifr.lifr_addr, cp, cpleft)) { 2671 err = EINVAL; 2672 goto done; 2673 } 2674 /* 2675 * make a copy of the numeric string for the data needed 2676 * by the "allowed-ips" datalink property. 2677 */ 2678 astr[j] = strdup(cp); 2679 if (astr[j] == NULL) { 2680 err = ENOMEM; 2681 goto done; 2682 } 2683 j++; 2684 /* 2685 * compute the default netmask from the address, if necessary 2686 */ 2687 if ((maskstr = strchr(tmp, '/')) == NULL) { 2688 int prefixlen; 2689 2690 if (lifr.lifr_addr.ss_family == AF_INET) { 2691 prefixlen = ipv4_prefixlen( 2692 SIN(&lifr.lifr_addr)); 2693 } else { 2694 struct sockaddr_in6 *sin6; 2695 2696 sin6 = SIN6(&lifr.lifr_addr); 2697 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 2698 prefixlen = 10; 2699 else 2700 prefixlen = 64; 2701 } 2702 (void) snprintf(tmp, sizeof (tmp), "%d", prefixlen); 2703 maskstr = tmp; 2704 } else { 2705 maskstr++; 2706 } 2707 /* append the "/<netmask>" */ 2708 (void) strlcat(cp, "/", cpleft); 2709 (void) strlcat(cp, maskstr, cpleft); 2710 (void) strlcat(cp, ",", cpleft); 2711 cp += strnlen(cp, zlen); 2712 cpleft = &zaddr[INET6_ADDRSTRLEN] - cp; 2713 } 2714 naddr = j; /* the actual number of addresses in the net resource */ 2715 assert(naddr <= nnet); 2716 2717 /* 2718 * zonecfg has already verified that the defrouter property can only 2719 * be set if there is at least one address defined for the net resource. 2720 * If j is 0, there are no addresses defined, and therefore no routers 2721 * to configure, and we are done at that point. 2722 */ 2723 if (j == 0) 2724 goto done; 2725 2726 /* over-write last ',' with '\0' */ 2727 zaddr[strnlen(zaddr, zlen) - 1] = '\0'; 2728 2729 /* 2730 * First make sure L3 protection is not already set on the link. 2731 */ 2732 dlstatus = dladm_linkprop_is_set(dld_handle, linkid, DLADM_OPT_ACTIVE, 2733 "protection", &is_set); 2734 if (dlstatus != DLADM_STATUS_OK) { 2735 err = EINVAL; 2736 zerror(zlogp, B_FALSE, "unable to check if protection is set"); 2737 goto done; 2738 } 2739 if (is_set) { 2740 err = EINVAL; 2741 zerror(zlogp, B_FALSE, "Protection is already set"); 2742 goto done; 2743 } 2744 dlstatus = dladm_linkprop_is_set(dld_handle, linkid, DLADM_OPT_ACTIVE, 2745 "allowed-ips", &is_set); 2746 if (dlstatus != DLADM_STATUS_OK) { 2747 err = EINVAL; 2748 zerror(zlogp, B_FALSE, "unable to check if allowed-ips is set"); 2749 goto done; 2750 } 2751 if (is_set) { 2752 zerror(zlogp, B_FALSE, "allowed-ips is already set"); 2753 err = EINVAL; 2754 goto done; 2755 } 2756 2757 /* 2758 * Enable ip-nospoof for the link, and add address to the allowed-ips 2759 * list. 2760 */ 2761 dlstatus = dladm_set_linkprop(dld_handle, linkid, "protection", 2762 &ip_nospoof, 1, DLADM_OPT_ACTIVE); 2763 if (dlstatus != DLADM_STATUS_OK) { 2764 zerror(zlogp, B_FALSE, "could not set protection\n"); 2765 err = EINVAL; 2766 goto done; 2767 } 2768 dlstatus = dladm_set_linkprop(dld_handle, linkid, "allowed-ips", 2769 astr, naddr, DLADM_OPT_ACTIVE); 2770 if (dlstatus != DLADM_STATUS_OK) { 2771 zerror(zlogp, B_FALSE, "could not set allowed-ips\n"); 2772 err = EINVAL; 2773 goto done; 2774 } 2775 2776 /* now set the address in the data-store */ 2777 err = zone_setattr_network(ZONE_NETWORK_ADDRESS, zoneid, linkid, 2778 zaddr, strnlen(zaddr, zlen) + 1); 2779 if (err != 0) 2780 goto done; 2781 2782 /* 2783 * add the defaultrouters 2784 */ 2785 routes = calloc(1, nnet * sizeof (*routes)); 2786 j = 0; 2787 for (ptr = start; ptr != end; ptr = ptr->za_next) { 2788 address = ptr->za_nwiftab.zone_nwif_defrouter; 2789 if (address[0] == '\0') 2790 continue; 2791 if (strchr(address, '/') == NULL && strchr(address, ':') != 0) { 2792 /* 2793 * zonecfg_valid_net_address() expects numeric IPv6 2794 * addresses to have a CIDR format netmask. 2795 */ 2796 (void) snprintf(tmp, sizeof (tmp), "/%d", V6_ADDR_LEN); 2797 (void) strlcat(address, tmp, INET6_ADDRSTRLEN); 2798 } 2799 if (zonecfg_valid_net_address(address, &lifr) != Z_OK) { 2800 zerror(zlogp, B_FALSE, 2801 "invalid router [%s]\n", address); 2802 err = EINVAL; 2803 goto done; 2804 } 2805 if (lifr.lifr_addr.ss_family == AF_INET6) { 2806 routes[j] = SIN6(&lifr.lifr_addr)->sin6_addr; 2807 } else { 2808 IN6_INADDR_TO_V4MAPPED(&SIN(&lifr.lifr_addr)->sin_addr, 2809 &routes[j]); 2810 } 2811 j++; 2812 } 2813 assert(j <= nnet); 2814 if (j > 0) { 2815 err = zone_setattr_network(ZONE_NETWORK_DEFROUTER, zoneid, 2816 linkid, routes, j * sizeof (*routes)); 2817 } 2818 done: 2819 free(routes); 2820 for (j = 0; j < naddr; j++) 2821 free(astr[j]); 2822 free(astr); 2823 free(zaddr); 2824 return (err); 2825 2826 } 2827 2828 static int 2829 add_net(zlog_t *zlogp, zoneid_t zoneid, zone_addr_list_t *zalist) 2830 { 2831 zone_addr_list_t *ptr; 2832 datalink_id_t linkid; 2833 int err; 2834 2835 if (zalist == NULL) 2836 return (0); 2837 2838 linkid = zalist->za_linkid; 2839 2840 err = add_net_for_linkid(zlogp, zoneid, zalist); 2841 if (err != 0) 2842 return (err); 2843 2844 for (ptr = zalist; ptr != NULL; ptr = ptr->za_next) { 2845 if (ptr->za_linkid == linkid) 2846 continue; 2847 linkid = ptr->za_linkid; 2848 err = add_net_for_linkid(zlogp, zoneid, ptr); 2849 if (err != 0) 2850 return (err); 2851 } 2852 return (0); 2853 } 2854 2855 /* 2856 * Add "new" to the list of network interfaces to be configured by 2857 * add_net on zone boot in "old". The list of interfaces in "old" is 2858 * sorted by datalink_id_t, with interfaces sorted FIFO for a given 2859 * datalink_id_t. 2860 * 2861 * Returns the merged list of IP interfaces containing "old" and "new" 2862 */ 2863 static zone_addr_list_t * 2864 add_ip_interface(zone_addr_list_t *old, zone_addr_list_t *new) 2865 { 2866 zone_addr_list_t *ptr, *next; 2867 datalink_id_t linkid = new->za_linkid; 2868 2869 assert(old != new); 2870 2871 if (old == NULL) 2872 return (new); 2873 for (ptr = old; ptr != NULL; ptr = ptr->za_next) { 2874 if (ptr->za_linkid == linkid) 2875 break; 2876 } 2877 if (ptr == NULL) { 2878 /* linkid does not already exist, add to the beginning */ 2879 new->za_next = old; 2880 return (new); 2881 } 2882 /* 2883 * adding to the middle of the list; ptr points at the first 2884 * occurrence of linkid. Find the last occurrence. 2885 */ 2886 while ((next = ptr->za_next) != NULL) { 2887 if (next->za_linkid != linkid) 2888 break; 2889 ptr = next; 2890 } 2891 /* insert new after ptr */ 2892 new->za_next = next; 2893 ptr->za_next = new; 2894 return (old); 2895 } 2896 2897 void 2898 free_ip_interface(zone_addr_list_t *zalist) 2899 { 2900 zone_addr_list_t *ptr, *new; 2901 2902 for (ptr = zalist; ptr != NULL; ) { 2903 new = ptr; 2904 ptr = ptr->za_next; 2905 free(new); 2906 } 2907 } 2908 2909 /* 2910 * Add the kernel access control information for the interface names. 2911 * If anything goes wrong, we log a general error message, attempt to tear down 2912 * whatever we set up, and return an error. 2913 */ 2914 static int 2915 configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) 2916 { 2917 zone_dochandle_t handle; 2918 struct zone_nwiftab nwiftab; 2919 char rootpath[MAXPATHLEN]; 2920 char path[MAXPATHLEN]; 2921 datalink_id_t linkid; 2922 di_prof_t prof = NULL; 2923 boolean_t added = B_FALSE; 2924 zone_addr_list_t *zalist = NULL, *new; 2925 2926 if ((handle = zonecfg_init_handle()) == NULL) { 2927 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 2928 return (-1); 2929 } 2930 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 2931 zerror(zlogp, B_FALSE, "invalid configuration"); 2932 zonecfg_fini_handle(handle); 2933 return (-1); 2934 } 2935 2936 if (zonecfg_setnwifent(handle) != Z_OK) { 2937 zonecfg_fini_handle(handle); 2938 return (0); 2939 } 2940 2941 for (;;) { 2942 if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK) 2943 break; 2944 2945 if (prof == NULL) { 2946 if (zone_get_devroot(zone_name, rootpath, 2947 sizeof (rootpath)) != Z_OK) { 2948 (void) zonecfg_endnwifent(handle); 2949 zonecfg_fini_handle(handle); 2950 zerror(zlogp, B_TRUE, 2951 "unable to determine dev root"); 2952 return (-1); 2953 } 2954 (void) snprintf(path, sizeof (path), "%s%s", rootpath, 2955 "/dev"); 2956 if (di_prof_init(path, &prof) != 0) { 2957 (void) zonecfg_endnwifent(handle); 2958 zonecfg_fini_handle(handle); 2959 zerror(zlogp, B_TRUE, 2960 "failed to initialize profile"); 2961 return (-1); 2962 } 2963 } 2964 2965 /* 2966 * Create the /dev entry for backward compatibility. 2967 * Only create the /dev entry if it's not in use. 2968 * Note that the zone still boots when the assigned 2969 * interface is inaccessible, used by others, etc. 2970 * Also, when vanity naming is used, some interface do 2971 * do not have corresponding /dev node names (for example, 2972 * vanity named aggregations). The /dev entry is not 2973 * created in that case. The /dev/net entry is always 2974 * accessible. 2975 */ 2976 if (dladm_name2info(dld_handle, nwiftab.zone_nwif_physical, 2977 &linkid, NULL, NULL, NULL) == DLADM_STATUS_OK && 2978 add_datalink(zlogp, zone_name, linkid, 2979 nwiftab.zone_nwif_physical) == 0) { 2980 added = B_TRUE; 2981 } else { 2982 (void) zonecfg_endnwifent(handle); 2983 zonecfg_fini_handle(handle); 2984 zerror(zlogp, B_TRUE, "failed to add network device"); 2985 return (-1); 2986 } 2987 /* set up the new IP interface, and add them all later */ 2988 new = malloc(sizeof (*new)); 2989 if (new == NULL) { 2990 zerror(zlogp, B_TRUE, "no memory for %s", 2991 nwiftab.zone_nwif_physical); 2992 zonecfg_fini_handle(handle); 2993 free_ip_interface(zalist); 2994 } 2995 bzero(new, sizeof (*new)); 2996 new->za_nwiftab = nwiftab; 2997 new->za_linkid = linkid; 2998 zalist = add_ip_interface(zalist, new); 2999 } 3000 if (zalist != NULL) { 3001 if ((errno = add_net(zlogp, zoneid, zalist)) != 0) { 3002 (void) zonecfg_endnwifent(handle); 3003 zonecfg_fini_handle(handle); 3004 zerror(zlogp, B_TRUE, "failed to add address"); 3005 free_ip_interface(zalist); 3006 return (-1); 3007 } 3008 free_ip_interface(zalist); 3009 } 3010 (void) zonecfg_endnwifent(handle); 3011 zonecfg_fini_handle(handle); 3012 3013 if (prof != NULL && added) { 3014 if (di_prof_commit(prof) != 0) { 3015 zerror(zlogp, B_TRUE, "failed to commit profile"); 3016 return (-1); 3017 } 3018 } 3019 if (prof != NULL) 3020 di_prof_fini(prof); 3021 3022 return (0); 3023 } 3024 3025 /* 3026 * Retrieve the list of datalink IDs assigned to a zone. 3027 * 3028 * On return, *count will be updated with the total number of links and, if it 3029 * is not NULL, **linksp will be updated to point to allocated memory 3030 * containing the link IDs. This should be passed to free() when the caller is 3031 * finished with it. 3032 */ 3033 static int 3034 fetch_zone_datalinks(zlog_t *zlogp, zoneid_t zoneid, int *countp, 3035 datalink_id_t **linksp) 3036 { 3037 datalink_id_t *links = NULL; 3038 int links_size = 0; 3039 int num_links; 3040 3041 if (linksp != NULL) 3042 *linksp = NULL; 3043 *countp = 0; 3044 3045 num_links = 0; 3046 if (zone_list_datalink(zoneid, &num_links, NULL) != 0) { 3047 zerror(zlogp, B_TRUE, 3048 "unable to determine number of network interfaces"); 3049 return (-1); 3050 } 3051 3052 if (num_links == 0) 3053 return (0); 3054 3055 /* If linkp is NULL, the caller only wants the count. */ 3056 if (linksp == NULL) { 3057 *countp = num_links; 3058 return (0); 3059 } 3060 3061 do { 3062 datalink_id_t *p; 3063 3064 links_size = num_links; 3065 p = reallocarray(links, links_size, sizeof (datalink_id_t)); 3066 3067 if (p == NULL) { 3068 zerror(zlogp, B_TRUE, 3069 "failed to allocate memory for zone links"); 3070 free(links); 3071 return (-1); 3072 } 3073 links = p; 3074 3075 if (zone_list_datalink(zoneid, &num_links, links) != 0) { 3076 zerror(zlogp, B_TRUE, "failed to list zone links"); 3077 free(links); 3078 return (-1); 3079 } 3080 } while (links_size < num_links); 3081 3082 *countp = num_links; 3083 *linksp = links; 3084 3085 return (0); 3086 } 3087 3088 static int 3089 remove_datalink_pool(zlog_t *zlogp, zoneid_t zoneid) 3090 { 3091 ushort_t flags; 3092 zone_iptype_t iptype; 3093 int i; 3094 dladm_status_t err; 3095 3096 if (strlen(pool_name) == 0) 3097 return (0); 3098 3099 if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags, 3100 sizeof (flags)) < 0) { 3101 if (vplat_get_iptype(zlogp, &iptype) < 0) { 3102 zerror(zlogp, B_FALSE, "unable to determine ip-type"); 3103 return (-1); 3104 } 3105 } else { 3106 if (flags & ZF_NET_EXCL) 3107 iptype = ZS_EXCLUSIVE; 3108 else 3109 iptype = ZS_SHARED; 3110 } 3111 3112 if (iptype == ZS_EXCLUSIVE) { 3113 datalink_id_t *dllinks = NULL; 3114 int dlnum = 0; 3115 3116 if (fetch_zone_datalinks(zlogp, zoneid, &dlnum, &dllinks) != 0) 3117 return (-1); 3118 3119 bzero(pool_name, sizeof (pool_name)); 3120 for (i = 0; i < dlnum; i++) { 3121 err = dladm_set_linkprop(dld_handle, dllinks[i], "pool", 3122 NULL, 0, DLADM_OPT_ACTIVE); 3123 if (err != DLADM_STATUS_OK) { 3124 zerror(zlogp, B_TRUE, 3125 "WARNING: unable to clear pool"); 3126 } 3127 } 3128 free(dllinks); 3129 } 3130 return (0); 3131 } 3132 3133 static int 3134 remove_datalink_protect(zlog_t *zlogp, zoneid_t zoneid) 3135 { 3136 ushort_t flags; 3137 zone_iptype_t iptype; 3138 int i, dlnum = 0; 3139 dladm_status_t dlstatus; 3140 datalink_id_t *dllinks = NULL; 3141 3142 if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags, 3143 sizeof (flags)) < 0) { 3144 if (vplat_get_iptype(zlogp, &iptype) < 0) { 3145 zerror(zlogp, B_FALSE, "unable to determine ip-type"); 3146 return (-1); 3147 } 3148 } else { 3149 if (flags & ZF_NET_EXCL) 3150 iptype = ZS_EXCLUSIVE; 3151 else 3152 iptype = ZS_SHARED; 3153 } 3154 3155 if (iptype != ZS_EXCLUSIVE) 3156 return (0); 3157 3158 /* 3159 * Get the datalink count and for each datalink, attempt to clear the 3160 * protection and allowed_ips properties. 3161 */ 3162 3163 if (fetch_zone_datalinks(zlogp, zoneid, &dlnum, &dllinks) != 0) 3164 return (-1); 3165 3166 for (i = 0; i < dlnum; i++) { 3167 char dlerr[DLADM_STRSIZE]; 3168 3169 dlstatus = dladm_set_linkprop(dld_handle, dllinks[i], 3170 "protection", NULL, 0, DLADM_OPT_ACTIVE); 3171 if (dlstatus == DLADM_STATUS_NOTFOUND) { 3172 /* datalink does not belong to the GZ */ 3173 continue; 3174 } 3175 if (dlstatus != DLADM_STATUS_OK) { 3176 zerror(zlogp, B_FALSE, 3177 "clear link %d 'protection' link property: %s", 3178 dllinks[i], dladm_status2str(dlstatus, dlerr)); 3179 } 3180 3181 dlstatus = dladm_set_linkprop(dld_handle, dllinks[i], 3182 "allowed-ips", NULL, 0, DLADM_OPT_ACTIVE); 3183 if (dlstatus != DLADM_STATUS_OK) { 3184 zerror(zlogp, B_FALSE, 3185 "clear link %d 'allowed-ips' link property: %s", 3186 dllinks[i], dladm_status2str(dlstatus, dlerr)); 3187 } 3188 } 3189 free(dllinks); 3190 return (0); 3191 } 3192 3193 static int 3194 unconfigure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) 3195 { 3196 datalink_id_t *dllinks; 3197 int dlnum = 0; 3198 uint_t i; 3199 3200 /* 3201 * The kernel shutdown callback for the dls module should have removed 3202 * all datalinks from this zone. If any remain, then there's a 3203 * problem. 3204 */ 3205 3206 if (fetch_zone_datalinks(zlogp, zoneid, &dlnum, &dllinks) != 0) 3207 return (-1); 3208 3209 if (dlnum == 0) 3210 return (0); 3211 3212 /* 3213 * There are some datalinks left in the zone. The most likely cause of 3214 * this is that the datalink-management daemon (dlmgmtd) was not 3215 * running when the zone was shut down. That prevented the kernel from 3216 * doing the required upcall to move the links back to the GZ. To 3217 * attempt recovery, do that now. 3218 */ 3219 3220 for (i = 0; i < dlnum; i++) { 3221 char dlerr[DLADM_STRSIZE]; 3222 dladm_status_t status; 3223 uint32_t link_flags; 3224 datalink_id_t link = dllinks[i]; 3225 char *prop_vals[] = { GLOBAL_ZONENAME }; 3226 3227 status = dladm_datalink_id2info(dld_handle, link, 3228 &link_flags, NULL, NULL, NULL, 0); 3229 3230 if (status != DLADM_STATUS_OK) { 3231 zerror(zlogp, B_FALSE, 3232 "failed to get link info for %u: %s", 3233 link, dladm_status2str(status, dlerr)); 3234 continue; 3235 } 3236 3237 if (link_flags & DLADM_OPT_TRANSIENT) 3238 continue; 3239 3240 status = dladm_set_linkprop(dld_handle, link, "zone", 3241 prop_vals, 1, DLADM_OPT_ACTIVE); 3242 3243 if (status != DLADM_STATUS_OK) { 3244 zerror(zlogp, B_FALSE, 3245 "failed to move link %u to GZ: %s", 3246 link, dladm_status2str(status, dlerr)); 3247 } 3248 } 3249 3250 free(dllinks); 3251 3252 /* Check again and log a message if links remain */ 3253 3254 if (fetch_zone_datalinks(zlogp, zoneid, &dlnum, NULL) != 0) 3255 return (-1); 3256 3257 if (dlnum == 0) 3258 return (0); 3259 3260 zerror(zlogp, B_FALSE, "%d datalink(s) remain in zone after shutdown", 3261 dlnum); 3262 3263 return (-1); 3264 } 3265 3266 static int 3267 tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid, 3268 const struct sockaddr_storage *local, const struct sockaddr_storage *remote) 3269 { 3270 int fd; 3271 struct strioctl ioc; 3272 tcp_ioc_abort_conn_t conn; 3273 int error; 3274 3275 conn.ac_local = *local; 3276 conn.ac_remote = *remote; 3277 conn.ac_start = TCPS_SYN_SENT; 3278 conn.ac_end = TCPS_TIME_WAIT; 3279 conn.ac_zoneid = zoneid; 3280 3281 ioc.ic_cmd = TCP_IOC_ABORT_CONN; 3282 ioc.ic_timout = -1; /* infinite timeout */ 3283 ioc.ic_len = sizeof (conn); 3284 ioc.ic_dp = (char *)&conn; 3285 3286 if ((fd = open("/dev/tcp", O_RDONLY)) < 0) { 3287 zerror(zlogp, B_TRUE, "unable to open %s", "/dev/tcp"); 3288 return (-1); 3289 } 3290 3291 error = ioctl(fd, I_STR, &ioc); 3292 (void) close(fd); 3293 if (error == 0 || errno == ENOENT) /* ENOENT is not an error */ 3294 return (0); 3295 return (-1); 3296 } 3297 3298 static int 3299 tcp_abort_connections(zlog_t *zlogp, zoneid_t zoneid) 3300 { 3301 struct sockaddr_storage l, r; 3302 struct sockaddr_in *local, *remote; 3303 struct sockaddr_in6 *local6, *remote6; 3304 int error; 3305 3306 /* 3307 * Abort IPv4 connections. 3308 */ 3309 bzero(&l, sizeof (*local)); 3310 local = (struct sockaddr_in *)&l; 3311 local->sin_family = AF_INET; 3312 local->sin_addr.s_addr = INADDR_ANY; 3313 local->sin_port = 0; 3314 3315 bzero(&r, sizeof (*remote)); 3316 remote = (struct sockaddr_in *)&r; 3317 remote->sin_family = AF_INET; 3318 remote->sin_addr.s_addr = INADDR_ANY; 3319 remote->sin_port = 0; 3320 3321 if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0) 3322 return (error); 3323 3324 /* 3325 * Abort IPv6 connections. 3326 */ 3327 bzero(&l, sizeof (*local6)); 3328 local6 = (struct sockaddr_in6 *)&l; 3329 local6->sin6_family = AF_INET6; 3330 local6->sin6_port = 0; 3331 local6->sin6_addr = in6addr_any; 3332 3333 bzero(&r, sizeof (*remote6)); 3334 remote6 = (struct sockaddr_in6 *)&r; 3335 remote6->sin6_family = AF_INET6; 3336 remote6->sin6_port = 0; 3337 remote6->sin6_addr = in6addr_any; 3338 3339 if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0) 3340 return (error); 3341 return (0); 3342 } 3343 3344 static int 3345 get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd) 3346 { 3347 int error = -1; 3348 zone_dochandle_t handle; 3349 char *privname = NULL; 3350 3351 if ((handle = zonecfg_init_handle()) == NULL) { 3352 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 3353 return (-1); 3354 } 3355 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 3356 zerror(zlogp, B_FALSE, "invalid configuration"); 3357 zonecfg_fini_handle(handle); 3358 return (-1); 3359 } 3360 3361 if (ALT_MOUNT(mount_cmd)) { 3362 zone_iptype_t iptype; 3363 const char *curr_iptype; 3364 3365 if (zonecfg_get_iptype(handle, &iptype) != Z_OK) { 3366 zerror(zlogp, B_TRUE, "unable to determine ip-type"); 3367 zonecfg_fini_handle(handle); 3368 return (-1); 3369 } 3370 3371 switch (iptype) { 3372 case ZS_SHARED: 3373 curr_iptype = "shared"; 3374 break; 3375 case ZS_EXCLUSIVE: 3376 curr_iptype = "exclusive"; 3377 break; 3378 default: 3379 zerror(zlogp, B_FALSE, "bad ip-type"); 3380 zonecfg_fini_handle(handle); 3381 return (-1); 3382 } 3383 3384 if (zonecfg_default_privset(privs, curr_iptype) == Z_OK) { 3385 zonecfg_fini_handle(handle); 3386 return (0); 3387 } 3388 zerror(zlogp, B_FALSE, 3389 "failed to determine the zone's default privilege set"); 3390 zonecfg_fini_handle(handle); 3391 return (-1); 3392 } 3393 3394 switch (zonecfg_get_privset(handle, privs, &privname)) { 3395 case Z_OK: 3396 error = 0; 3397 break; 3398 case Z_PRIV_PROHIBITED: 3399 zerror(zlogp, B_FALSE, "privilege \"%s\" is not permitted " 3400 "within the zone's privilege set", privname); 3401 break; 3402 case Z_PRIV_REQUIRED: 3403 zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing " 3404 "from the zone's privilege set", privname); 3405 break; 3406 case Z_PRIV_UNKNOWN: 3407 zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified " 3408 "in the zone's privilege set", privname); 3409 break; 3410 default: 3411 zerror(zlogp, B_FALSE, "failed to determine the zone's " 3412 "privilege set"); 3413 break; 3414 } 3415 3416 free(privname); 3417 zonecfg_fini_handle(handle); 3418 return (error); 3419 } 3420 3421 static int 3422 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) 3423 { 3424 nvlist_t *nvl = NULL; 3425 char *nvl_packed = NULL; 3426 size_t nvl_size = 0; 3427 nvlist_t **nvlv = NULL; 3428 int rctlcount = 0; 3429 int error = -1; 3430 zone_dochandle_t handle; 3431 struct zone_rctltab rctltab; 3432 rctlblk_t *rctlblk = NULL; 3433 uint64_t maxlwps; 3434 uint64_t maxprocs; 3435 int rproc, rlwp; 3436 3437 *bufp = NULL; 3438 *bufsizep = 0; 3439 3440 if ((handle = zonecfg_init_handle()) == NULL) { 3441 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 3442 return (-1); 3443 } 3444 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 3445 zerror(zlogp, B_FALSE, "invalid configuration"); 3446 zonecfg_fini_handle(handle); 3447 return (-1); 3448 } 3449 3450 rctltab.zone_rctl_valptr = NULL; 3451 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) { 3452 zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc"); 3453 goto out; 3454 } 3455 3456 /* 3457 * Allow the administrator to control both the maximum number of 3458 * process table slots, and the maximum number of lwps, with a single 3459 * max-processes or max-lwps property. If only the max-processes 3460 * property is set, we add a max-lwps property with a limit derived 3461 * from max-processes. If only the max-lwps property is set, we add a 3462 * max-processes property with the same limit as max-lwps. 3463 */ 3464 rproc = zonecfg_get_aliased_rctl(handle, ALIAS_MAXPROCS, &maxprocs); 3465 rlwp = zonecfg_get_aliased_rctl(handle, ALIAS_MAXLWPS, &maxlwps); 3466 if (rproc == Z_OK && rlwp == Z_NO_ENTRY) { 3467 if (zonecfg_set_aliased_rctl(handle, ALIAS_MAXLWPS, 3468 maxprocs * LWPS_PER_PROCESS) != Z_OK) { 3469 zerror(zlogp, B_FALSE, "unable to set max-lwps alias"); 3470 goto out; 3471 } 3472 } else if (rlwp == Z_OK && rproc == Z_NO_ENTRY) { 3473 /* no scaling for max-proc value */ 3474 if (zonecfg_set_aliased_rctl(handle, ALIAS_MAXPROCS, 3475 maxlwps) != Z_OK) { 3476 zerror(zlogp, B_FALSE, 3477 "unable to set max-processes alias"); 3478 goto out; 3479 } 3480 } 3481 3482 if (zonecfg_setrctlent(handle) != Z_OK) { 3483 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent"); 3484 goto out; 3485 } 3486 3487 if ((rctlblk = malloc(rctlblk_size())) == NULL) { 3488 zerror(zlogp, B_TRUE, "memory allocation failed"); 3489 goto out; 3490 } 3491 while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) { 3492 struct zone_rctlvaltab *rctlval; 3493 uint_t i, count; 3494 const char *name = rctltab.zone_rctl_name; 3495 3496 /* zoneadm should have already warned about unknown rctls. */ 3497 if (!zonecfg_is_rctl(name)) { 3498 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 3499 rctltab.zone_rctl_valptr = NULL; 3500 continue; 3501 } 3502 count = 0; 3503 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL; 3504 rctlval = rctlval->zone_rctlval_next) { 3505 count++; 3506 } 3507 if (count == 0) { /* ignore */ 3508 continue; /* Nothing to free */ 3509 } 3510 if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL) 3511 goto out; 3512 i = 0; 3513 for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL; 3514 rctlval = rctlval->zone_rctlval_next, i++) { 3515 if (nvlist_alloc(&nvlv[i], NV_UNIQUE_NAME, 0) != 0) { 3516 zerror(zlogp, B_TRUE, "%s failed", 3517 "nvlist_alloc"); 3518 goto out; 3519 } 3520 if (zonecfg_construct_rctlblk(rctlval, rctlblk) 3521 != Z_OK) { 3522 zerror(zlogp, B_FALSE, "invalid rctl value: " 3523 "(priv=%s,limit=%s,action=%s)", 3524 rctlval->zone_rctlval_priv, 3525 rctlval->zone_rctlval_limit, 3526 rctlval->zone_rctlval_action); 3527 goto out; 3528 } 3529 if (!zonecfg_valid_rctl(name, rctlblk)) { 3530 zerror(zlogp, B_FALSE, 3531 "(priv=%s,limit=%s,action=%s) is not a " 3532 "valid value for rctl '%s'", 3533 rctlval->zone_rctlval_priv, 3534 rctlval->zone_rctlval_limit, 3535 rctlval->zone_rctlval_action, 3536 name); 3537 goto out; 3538 } 3539 if (nvlist_add_uint64(nvlv[i], "privilege", 3540 rctlblk_get_privilege(rctlblk)) != 0) { 3541 zerror(zlogp, B_FALSE, "%s failed", 3542 "nvlist_add_uint64"); 3543 goto out; 3544 } 3545 if (nvlist_add_uint64(nvlv[i], "limit", 3546 rctlblk_get_value(rctlblk)) != 0) { 3547 zerror(zlogp, B_FALSE, "%s failed", 3548 "nvlist_add_uint64"); 3549 goto out; 3550 } 3551 if (nvlist_add_uint64(nvlv[i], "action", 3552 (uint_t)rctlblk_get_local_action(rctlblk, NULL)) 3553 != 0) { 3554 zerror(zlogp, B_FALSE, "%s failed", 3555 "nvlist_add_uint64"); 3556 goto out; 3557 } 3558 } 3559 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 3560 rctltab.zone_rctl_valptr = NULL; 3561 if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count) 3562 != 0) { 3563 zerror(zlogp, B_FALSE, "%s failed", 3564 "nvlist_add_nvlist_array"); 3565 goto out; 3566 } 3567 for (i = 0; i < count; i++) 3568 nvlist_free(nvlv[i]); 3569 free(nvlv); 3570 nvlv = NULL; 3571 rctlcount++; 3572 } 3573 (void) zonecfg_endrctlent(handle); 3574 3575 if (rctlcount == 0) { 3576 error = 0; 3577 goto out; 3578 } 3579 if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0) 3580 != 0) { 3581 zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack"); 3582 goto out; 3583 } 3584 3585 error = 0; 3586 *bufp = nvl_packed; 3587 *bufsizep = nvl_size; 3588 3589 out: 3590 free(rctlblk); 3591 zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); 3592 if (error && nvl_packed != NULL) 3593 free(nvl_packed); 3594 nvlist_free(nvl); 3595 if (nvlv != NULL) 3596 free(nvlv); 3597 if (handle != NULL) 3598 zonecfg_fini_handle(handle); 3599 return (error); 3600 } 3601 3602 static int 3603 get_implicit_datasets(zlog_t *zlogp, char **retstr) 3604 { 3605 char cmdbuf[2 * MAXPATHLEN]; 3606 3607 if (query_hook[0] == '\0') 3608 return (0); 3609 3610 if (snprintf(cmdbuf, sizeof (cmdbuf), "%s datasets", query_hook) 3611 > sizeof (cmdbuf)) 3612 return (-1); 3613 3614 if (do_subproc(zlogp, cmdbuf, retstr) != 0) 3615 return (-1); 3616 3617 return (0); 3618 } 3619 3620 static int 3621 get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep) 3622 { 3623 zone_dochandle_t handle; 3624 struct zone_dstab dstab; 3625 size_t total, offset, len; 3626 int error = -1; 3627 char *str = NULL; 3628 char *implicit_datasets = NULL; 3629 int implicit_len = 0; 3630 3631 *bufp = NULL; 3632 *bufsizep = 0; 3633 3634 if ((handle = zonecfg_init_handle()) == NULL) { 3635 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 3636 return (-1); 3637 } 3638 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 3639 zerror(zlogp, B_FALSE, "invalid configuration"); 3640 zonecfg_fini_handle(handle); 3641 return (-1); 3642 } 3643 3644 if (get_implicit_datasets(zlogp, &implicit_datasets) != 0) { 3645 zerror(zlogp, B_FALSE, "getting implicit datasets failed"); 3646 goto out; 3647 } 3648 3649 if (zonecfg_setdsent(handle) != Z_OK) { 3650 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent"); 3651 goto out; 3652 } 3653 3654 total = 0; 3655 while (zonecfg_getdsent(handle, &dstab) == Z_OK) 3656 total += strlen(dstab.zone_dataset_name) + 1; 3657 (void) zonecfg_enddsent(handle); 3658 3659 if (implicit_datasets != NULL) 3660 implicit_len = strlen(implicit_datasets); 3661 if (implicit_len > 0) 3662 total += implicit_len + 1; 3663 3664 if (total == 0) { 3665 error = 0; 3666 goto out; 3667 } 3668 3669 if ((str = malloc(total)) == NULL) { 3670 zerror(zlogp, B_TRUE, "memory allocation failed"); 3671 goto out; 3672 } 3673 3674 if (zonecfg_setdsent(handle) != Z_OK) { 3675 zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent"); 3676 goto out; 3677 } 3678 offset = 0; 3679 while (zonecfg_getdsent(handle, &dstab) == Z_OK) { 3680 len = strlen(dstab.zone_dataset_name); 3681 (void) strlcpy(str + offset, dstab.zone_dataset_name, 3682 total - offset); 3683 offset += len; 3684 if (offset < total - 1) 3685 str[offset++] = ','; 3686 } 3687 (void) zonecfg_enddsent(handle); 3688 3689 if (implicit_len > 0) 3690 (void) strlcpy(str + offset, implicit_datasets, total - offset); 3691 3692 error = 0; 3693 *bufp = str; 3694 *bufsizep = total; 3695 3696 out: 3697 if (error != 0 && str != NULL) 3698 free(str); 3699 if (handle != NULL) 3700 zonecfg_fini_handle(handle); 3701 if (implicit_datasets != NULL) 3702 free(implicit_datasets); 3703 3704 return (error); 3705 } 3706 3707 static int 3708 validate_datasets(zlog_t *zlogp) 3709 { 3710 zone_dochandle_t handle; 3711 struct zone_dstab dstab; 3712 zfs_handle_t *zhp; 3713 libzfs_handle_t *hdl; 3714 3715 if ((handle = zonecfg_init_handle()) == NULL) { 3716 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 3717 return (-1); 3718 } 3719 if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { 3720 zerror(zlogp, B_FALSE, "invalid configuration"); 3721 zonecfg_fini_handle(handle); 3722 return (-1); 3723 } 3724 3725 if (zonecfg_setdsent(handle) != Z_OK) { 3726 zerror(zlogp, B_FALSE, "invalid configuration"); 3727 zonecfg_fini_handle(handle); 3728 return (-1); 3729 } 3730 3731 if ((hdl = libzfs_init()) == NULL) { 3732 zerror(zlogp, B_FALSE, "opening ZFS library"); 3733 zonecfg_fini_handle(handle); 3734 return (-1); 3735 } 3736 3737 while (zonecfg_getdsent(handle, &dstab) == Z_OK) { 3738 3739 if ((zhp = zfs_open(hdl, dstab.zone_dataset_name, 3740 ZFS_TYPE_FILESYSTEM)) == NULL) { 3741 zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'", 3742 dstab.zone_dataset_name); 3743 zonecfg_fini_handle(handle); 3744 libzfs_fini(hdl); 3745 return (-1); 3746 } 3747 3748 /* 3749 * Automatically set the 'zoned' property. We check the value 3750 * first because we'll get EPERM if it is already set. 3751 */ 3752 if (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) && 3753 zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_ZONED), 3754 "on") != 0) { 3755 zerror(zlogp, B_FALSE, "cannot set 'zoned' " 3756 "property for ZFS dataset '%s'\n", 3757 dstab.zone_dataset_name); 3758 zonecfg_fini_handle(handle); 3759 zfs_close(zhp); 3760 libzfs_fini(hdl); 3761 return (-1); 3762 } 3763 3764 zfs_close(zhp); 3765 } 3766 (void) zonecfg_enddsent(handle); 3767 3768 zonecfg_fini_handle(handle); 3769 libzfs_fini(hdl); 3770 3771 return (0); 3772 } 3773 3774 /* 3775 * Return true if the path is its own zfs file system. We determine this 3776 * by stat-ing the path to see if it is zfs and stat-ing the parent to see 3777 * if it is a different fs. 3778 */ 3779 boolean_t 3780 is_zonepath_zfs(char *zonepath) 3781 { 3782 int res; 3783 char *path; 3784 char *parent; 3785 struct statvfs64 buf1, buf2; 3786 3787 if (statvfs64(zonepath, &buf1) != 0) 3788 return (B_FALSE); 3789 3790 if (strcmp(buf1.f_basetype, "zfs") != 0) 3791 return (B_FALSE); 3792 3793 if ((path = strdup(zonepath)) == NULL) 3794 return (B_FALSE); 3795 3796 parent = dirname(path); 3797 res = statvfs64(parent, &buf2); 3798 free(path); 3799 3800 if (res != 0) 3801 return (B_FALSE); 3802 3803 if (buf1.f_fsid == buf2.f_fsid) 3804 return (B_FALSE); 3805 3806 return (B_TRUE); 3807 } 3808 3809 /* 3810 * Verify the MAC label in the root dataset for the zone. 3811 * If the label exists, it must match the label configured for the zone. 3812 * Otherwise if there's no label on the dataset, create one here. 3813 */ 3814 3815 static int 3816 validate_rootds_label(zlog_t *zlogp, char *rootpath, m_label_t *zone_sl) 3817 { 3818 int error = -1; 3819 zfs_handle_t *zhp; 3820 libzfs_handle_t *hdl; 3821 m_label_t ds_sl; 3822 char zonepath[MAXPATHLEN]; 3823 char ds_hexsl[MAXNAMELEN]; 3824 3825 if (!is_system_labeled()) 3826 return (0); 3827 3828 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { 3829 zerror(zlogp, B_TRUE, "unable to determine zone path"); 3830 return (-1); 3831 } 3832 3833 if (!is_zonepath_zfs(zonepath)) 3834 return (0); 3835 3836 if ((hdl = libzfs_init()) == NULL) { 3837 zerror(zlogp, B_FALSE, "opening ZFS library"); 3838 return (-1); 3839 } 3840 3841 if ((zhp = zfs_path_to_zhandle(hdl, rootpath, 3842 ZFS_TYPE_FILESYSTEM)) == NULL) { 3843 zerror(zlogp, B_FALSE, "cannot open ZFS dataset for path '%s'", 3844 rootpath); 3845 libzfs_fini(hdl); 3846 return (-1); 3847 } 3848 3849 /* Get the mlslabel property if it exists. */ 3850 if ((zfs_prop_get(zhp, ZFS_PROP_MLSLABEL, ds_hexsl, MAXNAMELEN, 3851 NULL, NULL, 0, B_TRUE) != 0) || 3852 (strcmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)) { 3853 char *str2 = NULL; 3854 3855 /* 3856 * No label on the dataset (or default only); create one. 3857 * (Only do this automatic labeling for the labeled brand.) 3858 */ 3859 if (strcmp(brand_name, LABELED_BRAND_NAME) != 0) { 3860 error = 0; 3861 goto out; 3862 } 3863 3864 error = l_to_str_internal(zone_sl, &str2); 3865 if (error) 3866 goto out; 3867 if (str2 == NULL) { 3868 error = -1; 3869 goto out; 3870 } 3871 if ((error = zfs_prop_set(zhp, 3872 zfs_prop_to_name(ZFS_PROP_MLSLABEL), str2)) != 0) { 3873 zerror(zlogp, B_FALSE, "cannot set 'mlslabel' " 3874 "property for root dataset at '%s'\n", rootpath); 3875 } 3876 free(str2); 3877 goto out; 3878 } 3879 3880 /* Convert the retrieved dataset label to binary form. */ 3881 error = hexstr_to_label(ds_hexsl, &ds_sl); 3882 if (error) { 3883 zerror(zlogp, B_FALSE, "invalid 'mlslabel' " 3884 "property on root dataset at '%s'\n", rootpath); 3885 goto out; /* exit with error */ 3886 } 3887 3888 /* 3889 * Perform a MAC check by comparing the zone label with the 3890 * dataset label. 3891 */ 3892 error = (!blequal(zone_sl, &ds_sl)); 3893 if (error) 3894 zerror(zlogp, B_FALSE, "Rootpath dataset has mismatched label"); 3895 out: 3896 zfs_close(zhp); 3897 libzfs_fini(hdl); 3898 3899 return (error); 3900 } 3901 3902 /* 3903 * Mount lower level home directories into/from current zone 3904 * Share exported directories specified in dfstab for zone 3905 */ 3906 static int 3907 tsol_mounts(zlog_t *zlogp, char *zone_name, char *rootpath) 3908 { 3909 zoneid_t *zids = NULL; 3910 priv_set_t *zid_privs; 3911 const priv_impl_info_t *ip = NULL; 3912 uint_t nzents_saved; 3913 uint_t nzents; 3914 int i; 3915 char readonly[] = "ro"; 3916 struct zone_fstab lower_fstab; 3917 char *argv[4]; 3918 3919 if (!is_system_labeled()) 3920 return (0); 3921 3922 if (zid_label == NULL) { 3923 zid_label = m_label_alloc(MAC_LABEL); 3924 if (zid_label == NULL) 3925 return (-1); 3926 } 3927 3928 /* Make sure our zone has an /export/home dir */ 3929 (void) make_one_dir(zlogp, rootpath, "/export/home", 3930 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, DEFAULT_DIR_GROUP); 3931 3932 lower_fstab.zone_fs_raw[0] = '\0'; 3933 (void) strlcpy(lower_fstab.zone_fs_type, MNTTYPE_LOFS, 3934 sizeof (lower_fstab.zone_fs_type)); 3935 lower_fstab.zone_fs_options = NULL; 3936 (void) zonecfg_add_fs_option(&lower_fstab, readonly); 3937 3938 /* 3939 * Get the list of zones from the kernel 3940 */ 3941 if (zone_list(NULL, &nzents) != 0) { 3942 zerror(zlogp, B_TRUE, "unable to list zones"); 3943 zonecfg_free_fs_option_list(lower_fstab.zone_fs_options); 3944 return (-1); 3945 } 3946 again: 3947 if (nzents == 0) { 3948 zonecfg_free_fs_option_list(lower_fstab.zone_fs_options); 3949 return (-1); 3950 } 3951 3952 zids = malloc(nzents * sizeof (zoneid_t)); 3953 if (zids == NULL) { 3954 zerror(zlogp, B_TRUE, "memory allocation failed"); 3955 return (-1); 3956 } 3957 nzents_saved = nzents; 3958 3959 if (zone_list(zids, &nzents) != 0) { 3960 zerror(zlogp, B_TRUE, "unable to list zones"); 3961 zonecfg_free_fs_option_list(lower_fstab.zone_fs_options); 3962 free(zids); 3963 return (-1); 3964 } 3965 if (nzents != nzents_saved) { 3966 /* list changed, try again */ 3967 free(zids); 3968 goto again; 3969 } 3970 3971 ip = getprivimplinfo(); 3972 if ((zid_privs = priv_allocset()) == NULL) { 3973 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 3974 zonecfg_free_fs_option_list( 3975 lower_fstab.zone_fs_options); 3976 free(zids); 3977 return (-1); 3978 } 3979 3980 for (i = 0; i < nzents; i++) { 3981 char zid_name[ZONENAME_MAX]; 3982 zone_state_t zid_state; 3983 char zid_rpath[MAXPATHLEN]; 3984 struct stat stat_buf; 3985 3986 if (zids[i] == GLOBAL_ZONEID) 3987 continue; 3988 3989 if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1) 3990 continue; 3991 3992 /* 3993 * Do special setup for the zone we are booting 3994 */ 3995 if (strcmp(zid_name, zone_name) == 0) { 3996 struct zone_fstab autofs_fstab; 3997 char map_path[MAXPATHLEN]; 3998 int fd; 3999 4000 /* 4001 * Create auto_home_<zone> map for this zone 4002 * in the global zone. The non-global zone entry 4003 * will be created by automount when the zone 4004 * is booted. 4005 */ 4006 4007 (void) snprintf(autofs_fstab.zone_fs_special, 4008 MAXPATHLEN, "auto_home_%s", zid_name); 4009 4010 (void) snprintf(autofs_fstab.zone_fs_dir, MAXPATHLEN, 4011 "/zone/%s/home", zid_name); 4012 4013 (void) snprintf(map_path, sizeof (map_path), 4014 "/etc/%s", autofs_fstab.zone_fs_special); 4015 /* 4016 * If the map file doesn't exist create a template 4017 */ 4018 if ((fd = open(map_path, O_RDWR | O_CREAT | O_EXCL, 4019 S_IRUSR | S_IWUSR | S_IRGRP| S_IROTH)) != -1) { 4020 int len; 4021 char map_rec[MAXPATHLEN]; 4022 4023 len = snprintf(map_rec, sizeof (map_rec), 4024 "+%s\n*\t-fstype=lofs\t:%s/export/home/&\n", 4025 autofs_fstab.zone_fs_special, rootpath); 4026 (void) write(fd, map_rec, len); 4027 (void) close(fd); 4028 } 4029 4030 /* 4031 * Mount auto_home_<zone> in the global zone if absent. 4032 * If it's already of type autofs, then 4033 * don't mount it again. 4034 */ 4035 if ((stat(autofs_fstab.zone_fs_dir, &stat_buf) == -1) || 4036 strcmp(stat_buf.st_fstype, MNTTYPE_AUTOFS) != 0) { 4037 char optstr[] = "indirect,ignore,nobrowse"; 4038 4039 (void) make_one_dir(zlogp, "", 4040 autofs_fstab.zone_fs_dir, DEFAULT_DIR_MODE, 4041 DEFAULT_DIR_USER, DEFAULT_DIR_GROUP); 4042 4043 /* 4044 * Mount will fail if automounter has already 4045 * processed the auto_home_<zonename> map 4046 */ 4047 (void) domount(zlogp, MNTTYPE_AUTOFS, optstr, 4048 autofs_fstab.zone_fs_special, 4049 autofs_fstab.zone_fs_dir); 4050 } 4051 continue; 4052 } 4053 4054 4055 if (zone_get_state(zid_name, &zid_state) != Z_OK || 4056 (zid_state != ZONE_STATE_READY && 4057 zid_state != ZONE_STATE_RUNNING)) 4058 /* Skip over zones without mounted filesystems */ 4059 continue; 4060 4061 if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label, 4062 sizeof (m_label_t)) < 0) 4063 /* Skip over zones with unspecified label */ 4064 continue; 4065 4066 if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath, 4067 sizeof (zid_rpath)) == -1) 4068 /* Skip over zones with bad path */ 4069 continue; 4070 4071 if (zone_getattr(zids[i], ZONE_ATTR_PRIVSET, zid_privs, 4072 sizeof (priv_chunk_t) * ip->priv_setsize) == -1) 4073 /* Skip over zones with bad privs */ 4074 continue; 4075 4076 /* 4077 * Reading down is valid according to our label model 4078 * but some customers want to disable it because it 4079 * allows execute down and other possible attacks. 4080 * Therefore, we restrict this feature to zones that 4081 * have the NET_MAC_AWARE privilege which is required 4082 * for NFS read-down semantics. 4083 */ 4084 if ((bldominates(zlabel, zid_label)) && 4085 (priv_ismember(zprivs, PRIV_NET_MAC_AWARE))) { 4086 /* 4087 * Our zone dominates this one. 4088 * Create a lofs mount from lower zone's /export/home 4089 */ 4090 (void) snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN, 4091 "%s/zone/%s/export/home", rootpath, zid_name); 4092 4093 /* 4094 * If the target is already an LOFS mount 4095 * then don't do it again. 4096 */ 4097 if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) || 4098 strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) { 4099 4100 if (snprintf(lower_fstab.zone_fs_special, 4101 MAXPATHLEN, "%s/export", 4102 zid_rpath) > MAXPATHLEN) 4103 continue; 4104 4105 /* 4106 * Make sure the lower-level home exists 4107 */ 4108 if (make_one_dir(zlogp, 4109 lower_fstab.zone_fs_special, "/home", 4110 DEFAULT_DIR_MODE, DEFAULT_DIR_USER, 4111 DEFAULT_DIR_GROUP) != 0) 4112 continue; 4113 4114 (void) strlcat(lower_fstab.zone_fs_special, 4115 "/home", MAXPATHLEN); 4116 4117 /* 4118 * Mount can fail because the lower-level 4119 * zone may have already done a mount up. 4120 */ 4121 (void) mount_one(zlogp, &lower_fstab, "", 4122 Z_MNT_BOOT); 4123 } 4124 } else if ((bldominates(zid_label, zlabel)) && 4125 (priv_ismember(zid_privs, PRIV_NET_MAC_AWARE))) { 4126 /* 4127 * This zone dominates our zone. 4128 * Create a lofs mount from our zone's /export/home 4129 */ 4130 if (snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN, 4131 "%s/zone/%s/export/home", zid_rpath, 4132 zone_name) > MAXPATHLEN) 4133 continue; 4134 4135 /* 4136 * If the target is already an LOFS mount 4137 * then don't do it again. 4138 */ 4139 if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) || 4140 strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) { 4141 4142 (void) snprintf(lower_fstab.zone_fs_special, 4143 MAXPATHLEN, "%s/export/home", rootpath); 4144 4145 /* 4146 * Mount can fail because the higher-level 4147 * zone may have already done a mount down. 4148 */ 4149 (void) mount_one(zlogp, &lower_fstab, "", 4150 Z_MNT_BOOT); 4151 } 4152 } 4153 } 4154 zonecfg_free_fs_option_list(lower_fstab.zone_fs_options); 4155 priv_freeset(zid_privs); 4156 free(zids); 4157 4158 /* 4159 * Now share any exported directories from this zone. 4160 * Each zone can have its own dfstab. 4161 */ 4162 4163 argv[0] = "zoneshare"; 4164 argv[1] = "-z"; 4165 argv[2] = zone_name; 4166 argv[3] = NULL; 4167 4168 (void) forkexec(zlogp, "/usr/lib/zones/zoneshare", argv); 4169 /* Don't check for errors since they don't affect the zone */ 4170 4171 return (0); 4172 } 4173 4174 /* 4175 * Unmount lofs mounts from higher level zones 4176 * Unshare nfs exported directories 4177 */ 4178 static void 4179 tsol_unmounts(zlog_t *zlogp, char *zone_name) 4180 { 4181 zoneid_t *zids = NULL; 4182 uint_t nzents_saved; 4183 uint_t nzents; 4184 int i; 4185 char *argv[4]; 4186 char path[MAXPATHLEN]; 4187 4188 if (!is_system_labeled()) 4189 return; 4190 4191 /* 4192 * Get the list of zones from the kernel 4193 */ 4194 if (zone_list(NULL, &nzents) != 0) { 4195 return; 4196 } 4197 4198 if (zid_label == NULL) { 4199 zid_label = m_label_alloc(MAC_LABEL); 4200 if (zid_label == NULL) 4201 return; 4202 } 4203 4204 again: 4205 if (nzents == 0) 4206 return; 4207 4208 zids = malloc(nzents * sizeof (zoneid_t)); 4209 if (zids == NULL) { 4210 zerror(zlogp, B_TRUE, "memory allocation failed"); 4211 return; 4212 } 4213 nzents_saved = nzents; 4214 4215 if (zone_list(zids, &nzents) != 0) { 4216 free(zids); 4217 return; 4218 } 4219 if (nzents != nzents_saved) { 4220 /* list changed, try again */ 4221 free(zids); 4222 goto again; 4223 } 4224 4225 for (i = 0; i < nzents; i++) { 4226 char zid_name[ZONENAME_MAX]; 4227 zone_state_t zid_state; 4228 char zid_rpath[MAXPATHLEN]; 4229 4230 if (zids[i] == GLOBAL_ZONEID) 4231 continue; 4232 4233 if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1) 4234 continue; 4235 4236 /* 4237 * Skip the zone we are halting 4238 */ 4239 if (strcmp(zid_name, zone_name) == 0) 4240 continue; 4241 4242 if ((zone_getattr(zids[i], ZONE_ATTR_STATUS, &zid_state, 4243 sizeof (zid_state)) < 0) || 4244 (zid_state < ZONE_IS_READY)) 4245 /* Skip over zones without mounted filesystems */ 4246 continue; 4247 4248 if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label, 4249 sizeof (m_label_t)) < 0) 4250 /* Skip over zones with unspecified label */ 4251 continue; 4252 4253 if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath, 4254 sizeof (zid_rpath)) == -1) 4255 /* Skip over zones with bad path */ 4256 continue; 4257 4258 if (zlabel != NULL && bldominates(zid_label, zlabel)) { 4259 /* 4260 * This zone dominates our zone. 4261 * Unmount the lofs mount of our zone's /export/home 4262 */ 4263 4264 if (snprintf(path, MAXPATHLEN, 4265 "%s/zone/%s/export/home", zid_rpath, 4266 zone_name) > MAXPATHLEN) 4267 continue; 4268 4269 /* Skip over mount failures */ 4270 (void) umount(path); 4271 } 4272 } 4273 free(zids); 4274 4275 /* 4276 * Unmount global zone autofs trigger for this zone 4277 */ 4278 (void) snprintf(path, MAXPATHLEN, "/zone/%s/home", zone_name); 4279 /* Skip over mount failures */ 4280 (void) umount(path); 4281 4282 /* 4283 * Next unshare any exported directories from this zone. 4284 */ 4285 4286 argv[0] = "zoneunshare"; 4287 argv[1] = "-z"; 4288 argv[2] = zone_name; 4289 argv[3] = NULL; 4290 4291 (void) forkexec(zlogp, "/usr/lib/zones/zoneunshare", argv); 4292 /* Don't check for errors since they don't affect the zone */ 4293 4294 /* 4295 * Finally, deallocate any devices in the zone. 4296 */ 4297 4298 argv[0] = "deallocate"; 4299 argv[1] = "-Isz"; 4300 argv[2] = zone_name; 4301 argv[3] = NULL; 4302 4303 (void) forkexec(zlogp, "/usr/sbin/deallocate", argv); 4304 /* Don't check for errors since they don't affect the zone */ 4305 } 4306 4307 /* 4308 * Fetch the Trusted Extensions label and multi-level ports (MLPs) for 4309 * this zone. 4310 */ 4311 static tsol_zcent_t * 4312 get_zone_label(zlog_t *zlogp, priv_set_t *privs) 4313 { 4314 FILE *fp; 4315 tsol_zcent_t *zcent = NULL; 4316 char line[MAXTNZLEN]; 4317 4318 if ((fp = fopen(TNZONECFG_PATH, "r")) == NULL) { 4319 zerror(zlogp, B_TRUE, "%s", TNZONECFG_PATH); 4320 return (NULL); 4321 } 4322 4323 while (fgets(line, sizeof (line), fp) != NULL) { 4324 /* 4325 * Check for malformed database 4326 */ 4327 if (strlen(line) == MAXTNZLEN - 1) 4328 break; 4329 if ((zcent = tsol_sgetzcent(line, NULL, NULL)) == NULL) 4330 continue; 4331 if (strcmp(zcent->zc_name, zone_name) == 0) 4332 break; 4333 tsol_freezcent(zcent); 4334 zcent = NULL; 4335 } 4336 (void) fclose(fp); 4337 4338 if (zcent == NULL) { 4339 zerror(zlogp, B_FALSE, "zone requires a label assignment. " 4340 "See tnzonecfg(5)"); 4341 } else { 4342 if (zlabel == NULL) 4343 zlabel = m_label_alloc(MAC_LABEL); 4344 /* 4345 * Save this zone's privileges for later read-down processing 4346 */ 4347 if ((zprivs = priv_allocset()) == NULL) { 4348 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 4349 return (NULL); 4350 } else { 4351 priv_copyset(privs, zprivs); 4352 } 4353 } 4354 return (zcent); 4355 } 4356 4357 /* 4358 * Add the Trusted Extensions multi-level ports for this zone. 4359 */ 4360 static void 4361 set_mlps(zlog_t *zlogp, zoneid_t zoneid, tsol_zcent_t *zcent) 4362 { 4363 tsol_mlp_t *mlp; 4364 tsol_mlpent_t tsme; 4365 4366 if (!is_system_labeled()) 4367 return; 4368 4369 tsme.tsme_zoneid = zoneid; 4370 tsme.tsme_flags = 0; 4371 for (mlp = zcent->zc_private_mlp; !TSOL_MLP_END(mlp); mlp++) { 4372 tsme.tsme_mlp = *mlp; 4373 if (tnmlp(TNDB_LOAD, &tsme) != 0) { 4374 zerror(zlogp, B_TRUE, "cannot set zone-specific MLP " 4375 "on %d-%d/%d", mlp->mlp_port, 4376 mlp->mlp_port_upper, mlp->mlp_ipp); 4377 } 4378 } 4379 4380 tsme.tsme_flags = TSOL_MEF_SHARED; 4381 for (mlp = zcent->zc_shared_mlp; !TSOL_MLP_END(mlp); mlp++) { 4382 tsme.tsme_mlp = *mlp; 4383 if (tnmlp(TNDB_LOAD, &tsme) != 0) { 4384 zerror(zlogp, B_TRUE, "cannot set shared MLP " 4385 "on %d-%d/%d", mlp->mlp_port, 4386 mlp->mlp_port_upper, mlp->mlp_ipp); 4387 } 4388 } 4389 } 4390 4391 static void 4392 remove_mlps(zlog_t *zlogp, zoneid_t zoneid) 4393 { 4394 tsol_mlpent_t tsme; 4395 4396 if (!is_system_labeled()) 4397 return; 4398 4399 (void) memset(&tsme, 0, sizeof (tsme)); 4400 tsme.tsme_zoneid = zoneid; 4401 if (tnmlp(TNDB_FLUSH, &tsme) != 0) 4402 zerror(zlogp, B_TRUE, "cannot flush MLPs"); 4403 } 4404 4405 int 4406 prtmount(const struct mnttab *fs, void *x) 4407 { 4408 zerror((zlog_t *)x, B_FALSE, " %s", fs->mnt_mountp); 4409 return (0); 4410 } 4411 4412 /* 4413 * Look for zones running on the main system that are using this root (or any 4414 * subdirectory of it). Return B_TRUE and print an error if a conflicting zone 4415 * is found or if we can't tell. 4416 */ 4417 static boolean_t 4418 duplicate_zone_root(zlog_t *zlogp, const char *rootpath) 4419 { 4420 zoneid_t *zids = NULL; 4421 uint_t nzids = 0; 4422 boolean_t retv; 4423 int rlen, zlen; 4424 char zroot[MAXPATHLEN]; 4425 char zonename[ZONENAME_MAX]; 4426 4427 for (;;) { 4428 nzids += 10; 4429 zids = malloc(nzids * sizeof (*zids)); 4430 if (zids == NULL) { 4431 zerror(zlogp, B_TRUE, "memory allocation failed"); 4432 return (B_TRUE); 4433 } 4434 if (zone_list(zids, &nzids) == 0) 4435 break; 4436 free(zids); 4437 } 4438 retv = B_FALSE; 4439 rlen = strlen(rootpath); 4440 while (nzids > 0) { 4441 /* 4442 * Ignore errors; they just mean that the zone has disappeared 4443 * while we were busy. 4444 */ 4445 if (zone_getattr(zids[--nzids], ZONE_ATTR_ROOT, zroot, 4446 sizeof (zroot)) == -1) 4447 continue; 4448 zlen = strlen(zroot); 4449 if (zlen > rlen) 4450 zlen = rlen; 4451 if (strncmp(rootpath, zroot, zlen) == 0 && 4452 (zroot[zlen] == '\0' || zroot[zlen] == '/') && 4453 (rootpath[zlen] == '\0' || rootpath[zlen] == '/')) { 4454 if (getzonenamebyid(zids[nzids], zonename, 4455 sizeof (zonename)) == -1) 4456 (void) snprintf(zonename, sizeof (zonename), 4457 "id %d", (int)zids[nzids]); 4458 zerror(zlogp, B_FALSE, 4459 "zone root %s already in use by zone %s", 4460 rootpath, zonename); 4461 retv = B_TRUE; 4462 break; 4463 } 4464 } 4465 free(zids); 4466 return (retv); 4467 } 4468 4469 /* 4470 * Search for loopback mounts that use this same source node (same device and 4471 * inode). Return B_TRUE if there is one or if we can't tell. 4472 */ 4473 static boolean_t 4474 duplicate_reachable_path(zlog_t *zlogp, const char *rootpath) 4475 { 4476 struct stat64 rst, zst; 4477 struct mnttab *mnp; 4478 4479 if (stat64(rootpath, &rst) == -1) { 4480 zerror(zlogp, B_TRUE, "can't stat %s", rootpath); 4481 return (B_TRUE); 4482 } 4483 if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1) 4484 return (B_TRUE); 4485 for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) { 4486 if (mnp->mnt_fstype == NULL || 4487 strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0) 4488 continue; 4489 /* We're looking at a loopback mount. Stat it. */ 4490 if (mnp->mnt_special != NULL && 4491 stat64(mnp->mnt_special, &zst) != -1 && 4492 rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) { 4493 zerror(zlogp, B_FALSE, 4494 "zone root %s is reachable through %s", 4495 rootpath, mnp->mnt_mountp); 4496 return (B_TRUE); 4497 } 4498 } 4499 return (B_FALSE); 4500 } 4501 4502 /* 4503 * Set memory cap and pool info for the zone's resource management 4504 * configuration. 4505 */ 4506 static int 4507 setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid) 4508 { 4509 int res; 4510 uint64_t tmp; 4511 struct zone_mcaptab mcap; 4512 char sched[MAXNAMELEN]; 4513 zone_dochandle_t handle = NULL; 4514 char pool_err[128]; 4515 4516 if ((handle = zonecfg_init_handle()) == NULL) { 4517 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 4518 return (Z_BAD_HANDLE); 4519 } 4520 4521 if ((res = zonecfg_get_snapshot_handle(zone_name, handle)) != Z_OK) { 4522 zerror(zlogp, B_FALSE, "invalid configuration"); 4523 zonecfg_fini_handle(handle); 4524 return (res); 4525 } 4526 4527 /* 4528 * If a memory cap is configured, set the cap in the kernel using 4529 * zone_setattr() and make sure the rcapd SMF service is enabled. 4530 */ 4531 if (zonecfg_getmcapent(handle, &mcap) == Z_OK) { 4532 uint64_t num; 4533 char smf_err[128]; 4534 4535 num = (uint64_t)strtoull(mcap.zone_physmem_cap, NULL, 10); 4536 if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) { 4537 zerror(zlogp, B_TRUE, "could not set zone memory cap"); 4538 zonecfg_fini_handle(handle); 4539 return (Z_INVAL); 4540 } 4541 4542 if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) { 4543 zerror(zlogp, B_FALSE, "enabling system/rcap service " 4544 "failed: %s", smf_err); 4545 zonecfg_fini_handle(handle); 4546 return (Z_INVAL); 4547 } 4548 } 4549 4550 /* Get the scheduling class set in the zone configuration. */ 4551 if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK && 4552 strlen(sched) > 0) { 4553 if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched, 4554 strlen(sched)) == -1) 4555 zerror(zlogp, B_TRUE, "WARNING: unable to set the " 4556 "default scheduling class"); 4557 4558 } else if (zonecfg_get_aliased_rctl(handle, ALIAS_SHARES, &tmp) 4559 == Z_OK) { 4560 /* 4561 * If the zone has the zone.cpu-shares rctl set then we want to 4562 * use the Fair Share Scheduler (FSS) for processes in the 4563 * zone. Check what scheduling class the zone would be running 4564 * in by default so we can print a warning and modify the class 4565 * if we wouldn't be using FSS. 4566 */ 4567 char class_name[PC_CLNMSZ]; 4568 4569 if (zonecfg_get_dflt_sched_class(handle, class_name, 4570 sizeof (class_name)) != Z_OK) { 4571 zerror(zlogp, B_FALSE, "WARNING: unable to determine " 4572 "the zone's scheduling class"); 4573 4574 } else if (strcmp("FSS", class_name) != 0) { 4575 zerror(zlogp, B_FALSE, "WARNING: The zone.cpu-shares " 4576 "rctl is set but\nFSS is not the default " 4577 "scheduling class for\nthis zone. FSS will be " 4578 "used for processes\nin the zone but to get the " 4579 "full benefit of FSS,\nit should be the default " 4580 "scheduling class.\nSee dispadmin(8) for more " 4581 "details."); 4582 4583 if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, "FSS", 4584 strlen("FSS")) == -1) 4585 zerror(zlogp, B_TRUE, "WARNING: unable to set " 4586 "zone scheduling class to FSS"); 4587 } 4588 } 4589 4590 /* 4591 * The next few blocks of code attempt to set up temporary pools as 4592 * well as persistent pools. In all cases we call the functions 4593 * unconditionally. Within each funtion the code will check if the 4594 * zone is actually configured for a temporary pool or persistent pool 4595 * and just return if there is nothing to do. 4596 * 4597 * If we are rebooting we want to attempt to reuse any temporary pool 4598 * that was previously set up. zonecfg_bind_tmp_pool() will do the 4599 * right thing in all cases (reuse or create) based on the current 4600 * zonecfg. 4601 */ 4602 if ((res = zonecfg_bind_tmp_pool(handle, zoneid, pool_err, 4603 sizeof (pool_err))) != Z_OK) { 4604 if (res == Z_POOL || res == Z_POOL_CREATE || res == Z_POOL_BIND) 4605 zerror(zlogp, B_FALSE, "%s: %s\ndedicated-cpu setting " 4606 "cannot be instantiated", zonecfg_strerror(res), 4607 pool_err); 4608 else 4609 zerror(zlogp, B_FALSE, "could not bind zone to " 4610 "temporary pool: %s", zonecfg_strerror(res)); 4611 zonecfg_fini_handle(handle); 4612 return (Z_POOL_BIND); 4613 } 4614 4615 /* 4616 * Check if we need to warn about poold not being enabled. 4617 */ 4618 if (zonecfg_warn_poold(handle)) { 4619 zerror(zlogp, B_FALSE, "WARNING: A range of dedicated-cpus has " 4620 "been specified\nbut the dynamic pool service is not " 4621 "enabled.\nThe system will not dynamically adjust the\n" 4622 "processor allocation within the specified range\n" 4623 "until svc:/system/pools/dynamic is enabled.\n" 4624 "See poold(8)."); 4625 } 4626 4627 /* The following is a warning, not an error. */ 4628 if ((res = zonecfg_bind_pool(handle, zoneid, pool_err, 4629 sizeof (pool_err))) != Z_OK) { 4630 if (res == Z_POOL_BIND) 4631 zerror(zlogp, B_FALSE, "WARNING: unable to bind to " 4632 "pool '%s'; using default pool.", pool_err); 4633 else if (res == Z_POOL) 4634 zerror(zlogp, B_FALSE, "WARNING: %s: %s", 4635 zonecfg_strerror(res), pool_err); 4636 else 4637 zerror(zlogp, B_FALSE, "WARNING: %s", 4638 zonecfg_strerror(res)); 4639 } 4640 4641 /* Update saved pool name in case it has changed */ 4642 (void) zonecfg_get_poolname(handle, zone_name, pool_name, 4643 sizeof (pool_name)); 4644 4645 zonecfg_fini_handle(handle); 4646 return (Z_OK); 4647 } 4648 4649 static void 4650 report_prop_err(zlog_t *zlogp, const char *name, const char *value, int res) 4651 { 4652 switch (res) { 4653 case Z_TOO_BIG: 4654 zerror(zlogp, B_FALSE, "%s property value is too large.", name); 4655 break; 4656 4657 case Z_INVALID_PROPERTY: 4658 zerror(zlogp, B_FALSE, "%s property value \"%s\" is not valid", 4659 name, value); 4660 break; 4661 4662 default: 4663 zerror(zlogp, B_TRUE, "fetching property %s: %d", name, res); 4664 break; 4665 } 4666 } 4667 4668 /* 4669 * Sets the hostid of the new zone based on its configured value. The zone's 4670 * zone_t structure must already exist in kernel memory. 'zlogp' refers to the 4671 * log used to report errors and warnings and must be non-NULL. 'zone_namep' 4672 * is the name of the new zone and must be non-NULL. 'zoneid' is the numeric 4673 * ID of the new zone. 4674 * 4675 * This function returns zero on success and a nonzero error code on failure. 4676 */ 4677 static int 4678 setup_zone_hostid(zone_dochandle_t handle, zlog_t *zlogp, zoneid_t zoneid) 4679 { 4680 int res; 4681 char hostidp[HW_HOSTID_LEN]; 4682 unsigned int hostid; 4683 4684 res = zonecfg_get_hostid(handle, hostidp, sizeof (hostidp)); 4685 4686 if (res == Z_BAD_PROPERTY) { 4687 return (Z_OK); 4688 } else if (res != Z_OK) { 4689 report_prop_err(zlogp, "hostid", hostidp, res); 4690 return (res); 4691 } 4692 4693 hostid = (unsigned int)strtoul(hostidp, NULL, 16); 4694 if ((res = zone_setattr(zoneid, ZONE_ATTR_HOSTID, &hostid, 4695 sizeof (hostid))) != 0) { 4696 zerror(zlogp, B_TRUE, 4697 "zone hostid is not valid: %s: %d", hostidp, res); 4698 return (Z_SYSTEM); 4699 } 4700 4701 return (res); 4702 } 4703 4704 static int 4705 secflags_parse_check(secflagset_t *flagset, const char *flagstr, char *descr, 4706 zlog_t *zlogp) 4707 { 4708 secflagdelta_t delt; 4709 4710 if (secflags_parse(NULL, flagstr, &delt) == -1) { 4711 zerror(zlogp, B_FALSE, 4712 "failed to parse %s security-flags '%s': %s", 4713 descr, flagstr, strerror(errno)); 4714 return (Z_BAD_PROPERTY); 4715 } 4716 4717 if (delt.psd_ass_active != B_TRUE) { 4718 zerror(zlogp, B_FALSE, 4719 "relative security-flags are not allowed " 4720 "(%s security-flags: '%s')", descr, flagstr); 4721 return (Z_BAD_PROPERTY); 4722 } 4723 4724 secflags_copy(flagset, &delt.psd_assign); 4725 4726 return (Z_OK); 4727 } 4728 4729 static int 4730 setup_zone_secflags(zone_dochandle_t handle, zlog_t *zlogp, zoneid_t zoneid) 4731 { 4732 psecflags_t secflags; 4733 struct zone_secflagstab tab = {0}; 4734 secflagset_t flagset; 4735 int res; 4736 4737 res = zonecfg_lookup_secflags(handle, &tab); 4738 4739 /* 4740 * If the zone configuration does not define any security flag sets, 4741 * then check to see if there are any default flags configured for 4742 * the brand. If so, set these as the default set for this zone and 4743 * the lower/upper sets will become none/all as per the defaults. 4744 * 4745 * If there is no brand default either, then the flags will be 4746 * defaulted below. 4747 */ 4748 if (res == Z_NO_ENTRY) { 4749 char flagstr[ZONECFG_SECFLAGS_MAX]; 4750 brand_handle_t bh = NULL; 4751 4752 if ((bh = brand_open(brand_name)) == NULL) { 4753 zerror(zlogp, B_FALSE, 4754 "unable to find brand named %s", brand_name); 4755 return (Z_BAD_PROPERTY); 4756 } 4757 if (brand_get_secflags(bh, flagstr, sizeof (flagstr)) != 0) { 4758 brand_close(bh); 4759 zerror(zlogp, B_FALSE, 4760 "unable to retrieve brand default security flags"); 4761 return (Z_BAD_PROPERTY); 4762 } 4763 brand_close(bh); 4764 4765 if (*flagstr != '\0' && 4766 strlcpy(tab.zone_secflags_default, flagstr, 4767 sizeof (tab.zone_secflags_default)) >= 4768 sizeof (tab.zone_secflags_default)) { 4769 zerror(zlogp, B_FALSE, 4770 "brand default security-flags is too long"); 4771 return (Z_BAD_PROPERTY); 4772 } 4773 } else if (res != Z_OK) { 4774 zerror(zlogp, B_FALSE, 4775 "security-flags property is invalid: %d", res); 4776 return (res); 4777 } 4778 4779 if (strlen(tab.zone_secflags_lower) == 0) { 4780 (void) strlcpy(tab.zone_secflags_lower, "none", 4781 sizeof (tab.zone_secflags_lower)); 4782 } 4783 if (strlen(tab.zone_secflags_default) == 0) { 4784 (void) strlcpy(tab.zone_secflags_default, 4785 tab.zone_secflags_lower, 4786 sizeof (tab.zone_secflags_default)); 4787 } 4788 if (strlen(tab.zone_secflags_upper) == 0) { 4789 (void) strlcpy(tab.zone_secflags_upper, "all", 4790 sizeof (tab.zone_secflags_upper)); 4791 } 4792 4793 if ((res = secflags_parse_check(&flagset, tab.zone_secflags_default, 4794 "default", zlogp)) != Z_OK) { 4795 return (res); 4796 } else { 4797 secflags_copy(&secflags.psf_inherit, &flagset); 4798 secflags_copy(&secflags.psf_effective, &flagset); 4799 } 4800 4801 if ((res = secflags_parse_check(&flagset, tab.zone_secflags_lower, 4802 "lower", zlogp)) != Z_OK) { 4803 return (res); 4804 } else { 4805 secflags_copy(&secflags.psf_lower, &flagset); 4806 } 4807 4808 if ((res = secflags_parse_check(&flagset, tab.zone_secflags_upper, 4809 "upper", zlogp)) != Z_OK) { 4810 return (res); 4811 } else { 4812 secflags_copy(&secflags.psf_upper, &flagset); 4813 } 4814 4815 if (!psecflags_validate(&secflags)) { 4816 zerror(zlogp, B_TRUE, "security-flags violate invariants"); 4817 return (Z_BAD_PROPERTY); 4818 } 4819 4820 if ((res = zone_setattr(zoneid, ZONE_ATTR_SECFLAGS, &secflags, 4821 sizeof (secflags))) != 0) { 4822 zerror(zlogp, B_TRUE, 4823 "security-flags couldn't be set: %d", res); 4824 return (Z_SYSTEM); 4825 } 4826 4827 return (Z_OK); 4828 } 4829 4830 static int 4831 setup_zone_fs_allowed(zone_dochandle_t handle, zlog_t *zlogp, zoneid_t zoneid) 4832 { 4833 char fsallowed[ZONE_FS_ALLOWED_MAX]; 4834 char *fsallowedp = fsallowed; 4835 int len = sizeof (fsallowed); 4836 int res; 4837 4838 res = zonecfg_get_fs_allowed(handle, fsallowed, len); 4839 4840 if (res == Z_BAD_PROPERTY) { 4841 /* No value, set the defaults */ 4842 (void) strlcpy(fsallowed, DFLT_FS_ALLOWED, len); 4843 } else if (res != Z_OK) { 4844 report_prop_err(zlogp, "fs-allowed", fsallowed, res); 4845 return (res); 4846 } else if (fsallowed[0] == '-') { 4847 /* dropping default filesystems - use remaining list */ 4848 if (fsallowed[1] != ',') 4849 return (Z_OK); 4850 fsallowedp += 2; 4851 len -= 2; 4852 } else { 4853 /* Has a value, append the defaults */ 4854 if (strlcat(fsallowed, ",", len) >= len || 4855 strlcat(fsallowed, DFLT_FS_ALLOWED, len) >= len) { 4856 report_prop_err(zlogp, "fs-allowed", fsallowed, 4857 Z_TOO_BIG); 4858 return (Z_TOO_BIG); 4859 } 4860 } 4861 4862 if (zone_setattr(zoneid, ZONE_ATTR_FS_ALLOWED, fsallowedp, len) != 0) { 4863 zerror(zlogp, B_TRUE, 4864 "fs-allowed couldn't be set: %s: %d", fsallowedp, res); 4865 return (Z_SYSTEM); 4866 } 4867 4868 return (Z_OK); 4869 } 4870 4871 static int 4872 setup_zone_attrs(zlog_t *zlogp, char *zone_namep, zoneid_t zoneid) 4873 { 4874 zone_dochandle_t handle; 4875 int res = Z_OK; 4876 4877 if ((handle = zonecfg_init_handle()) == NULL) { 4878 zerror(zlogp, B_TRUE, "getting zone configuration handle"); 4879 return (Z_BAD_HANDLE); 4880 } 4881 if ((res = zonecfg_get_snapshot_handle(zone_namep, handle)) != Z_OK) { 4882 zerror(zlogp, B_FALSE, "invalid configuration"); 4883 goto out; 4884 } 4885 4886 if ((res = setup_zone_hostid(handle, zlogp, zoneid)) != Z_OK) 4887 goto out; 4888 4889 if ((res = setup_zone_fs_allowed(handle, zlogp, zoneid)) != Z_OK) 4890 goto out; 4891 4892 if ((res = setup_zone_secflags(handle, zlogp, zoneid)) != Z_OK) 4893 goto out; 4894 4895 out: 4896 zonecfg_fini_handle(handle); 4897 return (res); 4898 } 4899 4900 zoneid_t 4901 vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd) 4902 { 4903 zoneid_t rval = -1; 4904 priv_set_t *privs; 4905 char rootpath[MAXPATHLEN]; 4906 char *rctlbuf = NULL; 4907 size_t rctlbufsz = 0; 4908 char *zfsbuf = NULL; 4909 size_t zfsbufsz = 0; 4910 zoneid_t zoneid = -1; 4911 int xerr; 4912 char *kzone; 4913 FILE *fp = NULL; 4914 tsol_zcent_t *zcent = NULL; 4915 int match = 0; 4916 int doi = 0; 4917 int flags; 4918 zone_iptype_t iptype; 4919 4920 if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { 4921 zerror(zlogp, B_TRUE, "unable to determine zone root"); 4922 return (-1); 4923 } 4924 if (zonecfg_in_alt_root()) 4925 resolve_lofs(zlogp, rootpath, sizeof (rootpath)); 4926 4927 if (vplat_get_iptype(zlogp, &iptype) < 0) { 4928 zerror(zlogp, B_TRUE, "unable to determine ip-type"); 4929 return (-1); 4930 } 4931 if (iptype == ZS_EXCLUSIVE) { 4932 flags = ZCF_NET_EXCL; 4933 } else { 4934 flags = 0; 4935 } 4936 4937 if ((privs = priv_allocset()) == NULL) { 4938 zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); 4939 return (-1); 4940 } 4941 priv_emptyset(privs); 4942 if (get_privset(zlogp, privs, mount_cmd) != 0) 4943 goto error; 4944 4945 if (mount_cmd == Z_MNT_BOOT && 4946 get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) { 4947 zerror(zlogp, B_FALSE, "Unable to get list of rctls"); 4948 goto error; 4949 } 4950 4951 if (get_datasets(zlogp, &zfsbuf, &zfsbufsz) != 0) { 4952 zerror(zlogp, B_FALSE, "Unable to get list of ZFS datasets"); 4953 goto error; 4954 } 4955 4956 if (mount_cmd == Z_MNT_BOOT && is_system_labeled()) { 4957 zcent = get_zone_label(zlogp, privs); 4958 if (zcent != NULL) { 4959 match = zcent->zc_match; 4960 doi = zcent->zc_doi; 4961 *zlabel = zcent->zc_label; 4962 } else { 4963 goto error; 4964 } 4965 if (validate_rootds_label(zlogp, rootpath, zlabel) != 0) 4966 goto error; 4967 } 4968 4969 kzone = zone_name; 4970 4971 /* 4972 * We must do this scan twice. First, we look for zones running on the 4973 * main system that are using this root (or any subdirectory of it). 4974 * Next, we reduce to the shortest path and search for loopback mounts 4975 * that use this same source node (same device and inode). 4976 */ 4977 if (duplicate_zone_root(zlogp, rootpath)) 4978 goto error; 4979 if (duplicate_reachable_path(zlogp, rootpath)) 4980 goto error; 4981 4982 if (ALT_MOUNT(mount_cmd)) { 4983 root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE); 4984 4985 /* 4986 * Forge up a special root for this zone. When a zone is 4987 * mounted, we can't let the zone have its own root because the 4988 * tools that will be used in this "scratch zone" need access 4989 * to both the zone's resources and the running machine's 4990 * executables. 4991 * 4992 * Note that the mkdir here also catches read-only filesystems. 4993 */ 4994 if (mkdir(rootpath, 0755) != 0 && errno != EEXIST) { 4995 zerror(zlogp, B_TRUE, "cannot create %s", rootpath); 4996 goto error; 4997 } 4998 if (domount(zlogp, "tmpfs", "", "swap", rootpath) != 0) 4999 goto error; 5000 } 5001 5002 if (zonecfg_in_alt_root()) { 5003 /* 5004 * If we are mounting up a zone in an alternate root partition, 5005 * then we have some additional work to do before starting the 5006 * zone. First, resolve the root path down so that we're not 5007 * fooled by duplicates. Then forge up an internal name for 5008 * the zone. 5009 */ 5010 if ((fp = zonecfg_open_scratch("", B_TRUE)) == NULL) { 5011 zerror(zlogp, B_TRUE, "cannot open mapfile"); 5012 goto error; 5013 } 5014 if (zonecfg_lock_scratch(fp) != 0) { 5015 zerror(zlogp, B_TRUE, "cannot lock mapfile"); 5016 goto error; 5017 } 5018 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(), 5019 NULL, 0) == 0) { 5020 zerror(zlogp, B_FALSE, "scratch zone already running"); 5021 goto error; 5022 } 5023 /* This is the preferred name */ 5024 (void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s", 5025 zone_name); 5026 srandom(getpid()); 5027 while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL, 5028 0) == 0) { 5029 /* This is just an arbitrary name; note "." usage */ 5030 (void) snprintf(kernzone, sizeof (kernzone), 5031 "SUNWlu.%08lX%08lX", random(), random()); 5032 } 5033 kzone = kernzone; 5034 } 5035 5036 xerr = 0; 5037 if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf, 5038 rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel, 5039 flags)) == -1) { 5040 if (xerr == ZE_AREMOUNTS) { 5041 if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) { 5042 zerror(zlogp, B_FALSE, 5043 "An unknown file-system is mounted on " 5044 "a subdirectory of %s", rootpath); 5045 } else { 5046 5047 zerror(zlogp, B_FALSE, 5048 "These file-systems are mounted on " 5049 "subdirectories of %s:", rootpath); 5050 (void) zonecfg_find_mounts(rootpath, 5051 prtmount, zlogp); 5052 } 5053 } else if (xerr == ZE_CHROOTED) { 5054 zerror(zlogp, B_FALSE, "%s: " 5055 "cannot create a zone from a chrooted " 5056 "environment", "zone_create"); 5057 } else if (xerr == ZE_LABELINUSE) { 5058 char zonename[ZONENAME_MAX]; 5059 (void) getzonenamebyid(getzoneidbylabel(zlabel), 5060 zonename, ZONENAME_MAX); 5061 zerror(zlogp, B_FALSE, "The zone label is already " 5062 "used by the zone '%s'.", zonename); 5063 } else { 5064 zerror(zlogp, B_TRUE, "%s failed", "zone_create"); 5065 } 5066 goto error; 5067 } 5068 5069 if (zonecfg_in_alt_root() && 5070 zonecfg_add_scratch(fp, zone_name, kernzone, 5071 zonecfg_get_root()) == -1) { 5072 zerror(zlogp, B_TRUE, "cannot add mapfile entry"); 5073 goto error; 5074 } 5075 5076 /* 5077 * The following actions are not performed when merely mounting a zone 5078 * for administrative use. 5079 */ 5080 if (mount_cmd == Z_MNT_BOOT) { 5081 brand_handle_t bh; 5082 struct brand_attr attr; 5083 char modname[MAXPATHLEN]; 5084 5085 if (setup_zone_attrs(zlogp, zone_name, zoneid) != Z_OK) 5086 goto error; 5087 5088 if ((bh = brand_open(brand_name)) == NULL) { 5089 zerror(zlogp, B_FALSE, 5090 "unable to determine brand name"); 5091 goto error; 5092 } 5093 5094 if (!is_system_labeled() && 5095 (strcmp(brand_name, LABELED_BRAND_NAME) == 0)) { 5096 brand_close(bh); 5097 zerror(zlogp, B_FALSE, 5098 "cannot boot labeled zone on unlabeled system"); 5099 goto error; 5100 } 5101 5102 /* 5103 * If this brand requires any kernel support, now is the time to 5104 * get it loaded and initialized. 5105 */ 5106 if (brand_get_modname(bh, modname, MAXPATHLEN) < 0) { 5107 brand_close(bh); 5108 zerror(zlogp, B_FALSE, 5109 "unable to determine brand kernel module"); 5110 goto error; 5111 } 5112 brand_close(bh); 5113 5114 if (strlen(modname) > 0) { 5115 (void) strlcpy(attr.ba_brandname, brand_name, 5116 sizeof (attr.ba_brandname)); 5117 (void) strlcpy(attr.ba_modname, modname, 5118 sizeof (attr.ba_modname)); 5119 if (zone_setattr(zoneid, ZONE_ATTR_BRAND, &attr, 5120 sizeof (attr) != 0)) { 5121 zerror(zlogp, B_TRUE, 5122 "could not set zone brand attribute."); 5123 goto error; 5124 } 5125 } 5126 5127 if (setup_zone_rm(zlogp, zone_name, zoneid) != Z_OK) 5128 goto error; 5129 5130 set_mlps(zlogp, zoneid, zcent); 5131 } 5132 5133 rval = zoneid; 5134 zoneid = -1; 5135 5136 error: 5137 if (zoneid != -1) { 5138 (void) zone_shutdown(zoneid); 5139 (void) zone_destroy(zoneid); 5140 } 5141 if (rctlbuf != NULL) 5142 free(rctlbuf); 5143 priv_freeset(privs); 5144 if (fp != NULL) 5145 zonecfg_close_scratch(fp); 5146 lofs_discard_mnttab(); 5147 if (zcent != NULL) 5148 tsol_freezcent(zcent); 5149 return (rval); 5150 } 5151 5152 /* 5153 * Enter the zone and write a /etc/zones/index file there. This allows 5154 * libzonecfg (and thus zoneadm) to report the UUID and potentially other zone 5155 * details from inside the zone. 5156 */ 5157 static void 5158 write_index_file(zoneid_t zoneid) 5159 { 5160 FILE *zef; 5161 FILE *zet; 5162 struct zoneent *zep; 5163 pid_t child; 5164 int tmpl_fd; 5165 ctid_t ct; 5166 int fd; 5167 char uuidstr[UUID_PRINTABLE_STRING_LENGTH]; 5168 5169 /* Locate the zone entry in the global zone's index file */ 5170 if ((zef = setzoneent()) == NULL) 5171 return; 5172 while ((zep = getzoneent_private(zef)) != NULL) { 5173 if (strcmp(zep->zone_name, zone_name) == 0) 5174 break; 5175 free(zep); 5176 } 5177 endzoneent(zef); 5178 if (zep == NULL) 5179 return; 5180 5181 if ((tmpl_fd = init_template()) == -1) { 5182 free(zep); 5183 return; 5184 } 5185 5186 if ((child = fork()) == -1) { 5187 (void) ct_tmpl_clear(tmpl_fd); 5188 (void) close(tmpl_fd); 5189 free(zep); 5190 return; 5191 } 5192 5193 /* parent waits for child to finish */ 5194 if (child != 0) { 5195 free(zep); 5196 if (contract_latest(&ct) == -1) 5197 ct = -1; 5198 (void) ct_tmpl_clear(tmpl_fd); 5199 (void) close(tmpl_fd); 5200 (void) waitpid(child, NULL, 0); 5201 (void) contract_abandon_id(ct); 5202 return; 5203 } 5204 5205 /* child enters zone and sets up index file */ 5206 (void) ct_tmpl_clear(tmpl_fd); 5207 if (zone_enter(zoneid) != -1) { 5208 (void) mkdir(ZONE_CONFIG_ROOT, ZONE_CONFIG_MODE); 5209 (void) chown(ZONE_CONFIG_ROOT, ZONE_CONFIG_UID, 5210 ZONE_CONFIG_GID); 5211 fd = open(ZONE_INDEX_FILE, O_WRONLY|O_CREAT|O_TRUNC, 5212 ZONE_INDEX_MODE); 5213 if (fd != -1 && (zet = fdopen(fd, "w")) != NULL) { 5214 (void) fchown(fd, ZONE_INDEX_UID, ZONE_INDEX_GID); 5215 if (uuid_is_null(zep->zone_uuid)) 5216 uuidstr[0] = '\0'; 5217 else 5218 uuid_unparse(zep->zone_uuid, uuidstr); 5219 (void) fprintf(zet, "%s:%s:/:%s\n", zep->zone_name, 5220 zone_state_str(zep->zone_state), 5221 uuidstr); 5222 (void) fclose(zet); 5223 } 5224 } 5225 _exit(0); 5226 } 5227 5228 int 5229 vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid) 5230 { 5231 char zonepath[MAXPATHLEN]; 5232 5233 if (mount_cmd == Z_MNT_BOOT && validate_datasets(zlogp) != 0) { 5234 lofs_discard_mnttab(); 5235 return (-1); 5236 } 5237 5238 /* 5239 * Before we try to mount filesystems we need to create the 5240 * attribute backing store for /dev 5241 */ 5242 if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { 5243 lofs_discard_mnttab(); 5244 return (-1); 5245 } 5246 resolve_lofs(zlogp, zonepath, sizeof (zonepath)); 5247 5248 /* Make /dev directory owned by root, grouped sys */ 5249 if (make_one_dir(zlogp, zonepath, "/dev", DEFAULT_DIR_MODE, 5250 0, 3) != 0) { 5251 lofs_discard_mnttab(); 5252 return (-1); 5253 } 5254 5255 if (mount_filesystems(zlogp, mount_cmd) != 0) { 5256 lofs_discard_mnttab(); 5257 return (-1); 5258 } 5259 5260 if (mount_cmd == Z_MNT_BOOT) { 5261 zone_iptype_t iptype; 5262 5263 if (vplat_get_iptype(zlogp, &iptype) < 0) { 5264 zerror(zlogp, B_TRUE, "unable to determine ip-type"); 5265 lofs_discard_mnttab(); 5266 return (-1); 5267 } 5268 5269 switch (iptype) { 5270 case ZS_SHARED: 5271 /* Always do this to make lo0 get configured */ 5272 if (configure_shared_network_interfaces(zlogp) != 0) { 5273 lofs_discard_mnttab(); 5274 return (-1); 5275 } 5276 break; 5277 case ZS_EXCLUSIVE: 5278 if (configure_exclusive_network_interfaces(zlogp, 5279 zoneid) != 5280 0) { 5281 lofs_discard_mnttab(); 5282 return (-1); 5283 } 5284 break; 5285 } 5286 } 5287 5288 write_index_file(zoneid); 5289 5290 lofs_discard_mnttab(); 5291 return (0); 5292 } 5293 5294 static int 5295 lu_root_teardown(zlog_t *zlogp) 5296 { 5297 char zroot[MAXPATHLEN]; 5298 5299 if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) { 5300 zerror(zlogp, B_FALSE, "unable to determine zone root"); 5301 return (-1); 5302 } 5303 root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE); 5304 5305 /* 5306 * At this point, the processes are gone, the filesystems (save the 5307 * root) are unmounted, and the zone is on death row. But there may 5308 * still be creds floating about in the system that reference the 5309 * zone_t, and which pin down zone_rootvp causing this call to fail 5310 * with EBUSY. Thus, we try for a little while before just giving up. 5311 * (How I wish this were not true, and umount2 just did the right 5312 * thing, or tmpfs supported MS_FORCE This is a gross hack.) 5313 */ 5314 if (umount2(zroot, MS_FORCE) != 0) { 5315 if (errno == ENOTSUP && umount2(zroot, 0) == 0) 5316 goto unmounted; 5317 if (errno == EBUSY) { 5318 int tries = 10; 5319 5320 while (--tries >= 0) { 5321 (void) sleep(1); 5322 if (umount2(zroot, 0) == 0) 5323 goto unmounted; 5324 if (errno != EBUSY) 5325 break; 5326 } 5327 } 5328 zerror(zlogp, B_TRUE, "unable to unmount '%s'", zroot); 5329 return (-1); 5330 } 5331 unmounted: 5332 5333 /* 5334 * Only zones in an alternate root environment have scratch zone 5335 * entries. 5336 */ 5337 if (zonecfg_in_alt_root()) { 5338 FILE *fp; 5339 int retv; 5340 5341 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) { 5342 zerror(zlogp, B_TRUE, "cannot open mapfile"); 5343 return (-1); 5344 } 5345 retv = -1; 5346 if (zonecfg_lock_scratch(fp) != 0) 5347 zerror(zlogp, B_TRUE, "cannot lock mapfile"); 5348 else if (zonecfg_delete_scratch(fp, kernzone) != 0) 5349 zerror(zlogp, B_TRUE, "cannot delete map entry"); 5350 else 5351 retv = 0; 5352 zonecfg_close_scratch(fp); 5353 return (retv); 5354 } else { 5355 return (0); 5356 } 5357 } 5358 5359 /* 5360 * Delete all transient links belonging to this zone. A transient link 5361 * is one that is created and destroyed along with the lifetime of the 5362 * zone. Non-transient links, ones that are assigned from the GZ to a 5363 * NGZ, are reassigned to the GZ in zone_shutdown() via the 5364 * zone-specific data (zsd) callbacks. 5365 */ 5366 static int 5367 delete_transient_links(zlog_t *zlogp, zoneid_t zoneid) 5368 { 5369 datalink_id_t *dllinks = NULL; 5370 int dlnum = 0; 5371 uint_t i; 5372 5373 if (fetch_zone_datalinks(zlogp, zoneid, &dlnum, &dllinks) != 0) 5374 return (-1); 5375 5376 if (dlnum == 0) 5377 return (0); 5378 5379 for (i = 0; i < dlnum; i++) { 5380 char link_name[MAXLINKNAMELEN]; 5381 char dlerr[DLADM_STRSIZE]; 5382 datalink_id_t link = dllinks[i]; 5383 datalink_class_t link_class; 5384 dladm_status_t status; 5385 uint32_t link_flags; 5386 5387 status = dladm_datalink_id2info(dld_handle, link, &link_flags, 5388 &link_class, NULL, link_name, sizeof (link_name)); 5389 5390 if (status != DLADM_STATUS_OK) { 5391 zerror(zlogp, B_FALSE, 5392 "failed to get link info for %u: %s", 5393 link, dladm_status2str(status, dlerr)); 5394 continue; 5395 } 5396 5397 if (!(link_flags & DLADM_OPT_TRANSIENT)) 5398 continue; 5399 5400 switch (link_class) { 5401 case DATALINK_CLASS_VNIC: 5402 case DATALINK_CLASS_ETHERSTUB: 5403 status = dladm_vnic_delete(dld_handle, link, 5404 DLADM_OPT_ACTIVE); 5405 break; 5406 case DATALINK_CLASS_VLAN: 5407 status = dladm_vlan_delete(dld_handle, link, 5408 DLADM_OPT_ACTIVE); 5409 break; 5410 case DATALINK_CLASS_AGGR: 5411 status = dladm_aggr_delete(dld_handle, link, 5412 DLADM_OPT_ACTIVE); 5413 break; 5414 default: 5415 zerror(zlogp, B_FALSE, 5416 "unhandled class for transient link %s (%u)", 5417 link_name, link); 5418 continue; 5419 } 5420 5421 if (status != DLADM_STATUS_OK) { 5422 zerror(zlogp, B_TRUE, 5423 "failed to delete transient link %s (%u): %s", 5424 link_name, link, dladm_status2str(status, dlerr)); 5425 } 5426 } 5427 5428 free(dllinks); 5429 return (0); 5430 } 5431 5432 int 5433 vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) 5434 { 5435 char *kzone; 5436 zoneid_t zoneid; 5437 int res; 5438 char pool_err[128]; 5439 char zpath[MAXPATHLEN]; 5440 char cmdbuf[MAXPATHLEN]; 5441 brand_handle_t bh = NULL; 5442 dladm_status_t status; 5443 char errmsg[DLADM_STRSIZE]; 5444 ushort_t flags; 5445 5446 kzone = zone_name; 5447 if (zonecfg_in_alt_root()) { 5448 FILE *fp; 5449 5450 if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) { 5451 zerror(zlogp, B_TRUE, "unable to open map file"); 5452 goto error; 5453 } 5454 if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(), 5455 kernzone, sizeof (kernzone)) != 0) { 5456 zerror(zlogp, B_FALSE, "unable to find scratch zone"); 5457 zonecfg_close_scratch(fp); 5458 goto error; 5459 } 5460 zonecfg_close_scratch(fp); 5461 kzone = kernzone; 5462 } 5463 5464 if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) { 5465 if (!bringup_failure_recovery) 5466 zerror(zlogp, B_TRUE, "unable to get zoneid"); 5467 if (unmount_cmd) 5468 (void) lu_root_teardown(zlogp); 5469 goto error; 5470 } 5471 5472 if (remove_datalink_pool(zlogp, zoneid) != 0) { 5473 zerror(zlogp, B_FALSE, 5474 "unable to clear datalink pool property"); 5475 } 5476 5477 if (remove_datalink_protect(zlogp, zoneid) != 0) { 5478 zerror(zlogp, B_FALSE, 5479 "unable to clear datalink protect property"); 5480 } 5481 5482 /* 5483 * The datalinks assigned to the zone will be removed from the NGZ as 5484 * part of zone_shutdown() so that we need to remove protect/pool etc. 5485 * before zone_shutdown(). Even if the shutdown itself fails, the zone 5486 * will not be able to violate any constraints applied because the 5487 * datalinks are no longer available to the zone. 5488 */ 5489 if (zone_shutdown(zoneid) != 0) { 5490 zerror(zlogp, B_TRUE, "unable to shutdown zone"); 5491 goto error; 5492 } 5493 5494 /* Get the zonepath of this zone */ 5495 if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { 5496 zerror(zlogp, B_FALSE, "unable to determine zone path"); 5497 goto error; 5498 } 5499 5500 /* Get a handle to the brand info for this zone */ 5501 if ((bh = brand_open(brand_name)) == NULL) { 5502 zerror(zlogp, B_FALSE, "unable to determine zone brand"); 5503 return (-1); 5504 } 5505 /* 5506 * If there is a brand 'halt' callback, execute it now to give the 5507 * brand a chance to cleanup any custom configuration. 5508 */ 5509 (void) strcpy(cmdbuf, EXEC_PREFIX); 5510 if (brand_get_halt(bh, zone_name, zpath, cmdbuf + EXEC_LEN, 5511 sizeof (cmdbuf) - EXEC_LEN) < 0) { 5512 brand_close(bh); 5513 zerror(zlogp, B_FALSE, "unable to determine branded zone's " 5514 "halt callback."); 5515 goto error; 5516 } 5517 brand_close(bh); 5518 5519 if ((strlen(cmdbuf) > EXEC_LEN) && 5520 (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { 5521 zerror(zlogp, B_FALSE, "%s failed", cmdbuf); 5522 goto error; 5523 } 5524 5525 if (!unmount_cmd) { 5526 zone_iptype_t iptype; 5527 5528 if (zone_getattr(zoneid, ZONE_ATTR_FLAGS, &flags, 5529 sizeof (flags)) < 0) { 5530 if (vplat_get_iptype(zlogp, &iptype) < 0) { 5531 zerror(zlogp, B_TRUE, "unable to determine " 5532 "ip-type"); 5533 goto error; 5534 } 5535 } else { 5536 if (flags & ZF_NET_EXCL) 5537 iptype = ZS_EXCLUSIVE; 5538 else 5539 iptype = ZS_SHARED; 5540 } 5541 5542 switch (iptype) { 5543 case ZS_SHARED: 5544 if (unconfigure_shared_network_interfaces(zlogp, 5545 zoneid) != 0) { 5546 zerror(zlogp, B_FALSE, "unable to unconfigure " 5547 "network interfaces in zone"); 5548 goto error; 5549 } 5550 break; 5551 case ZS_EXCLUSIVE: 5552 if (delete_transient_links(zlogp, zoneid) != 0) { 5553 zerror(zlogp, B_FALSE, "unable to delete " 5554 "transient links in zone"); 5555 goto error; 5556 } 5557 if (unconfigure_exclusive_network_interfaces(zlogp, 5558 zoneid) != 0) { 5559 zerror(zlogp, B_FALSE, "unable to unconfigure " 5560 "network interfaces in zone"); 5561 goto error; 5562 } 5563 status = dladm_zone_halt(dld_handle, zoneid); 5564 if (status != DLADM_STATUS_OK) { 5565 zerror(zlogp, B_FALSE, "unable to notify " 5566 "dlmgmtd of zone halt: %s", 5567 dladm_status2str(status, errmsg)); 5568 } 5569 break; 5570 } 5571 } 5572 5573 if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) { 5574 zerror(zlogp, B_TRUE, "unable to abort TCP connections"); 5575 goto error; 5576 } 5577 5578 if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) { 5579 zerror(zlogp, B_FALSE, 5580 "unable to unmount file systems in zone"); 5581 goto error; 5582 } 5583 5584 /* 5585 * If we are rebooting then we normally don't want to destroy an 5586 * existing temporary pool at this point so that we can just reuse it 5587 * when the zone boots back up. However, it is also possible we were 5588 * running with a temporary pool and the zone configuration has been 5589 * modified to no longer use a temporary pool. In that case we need 5590 * to destroy the temporary pool now. This case looks like the case 5591 * where we never had a temporary pool configured but 5592 * zonecfg_destroy_tmp_pool will do the right thing either way. 5593 */ 5594 if (!unmount_cmd) { 5595 boolean_t destroy_tmp_pool = B_TRUE; 5596 5597 if (rebooting) { 5598 struct zone_psettab pset_tab; 5599 zone_dochandle_t handle; 5600 5601 if ((handle = zonecfg_init_handle()) != NULL && 5602 zonecfg_get_handle(zone_name, handle) == Z_OK && 5603 zonecfg_lookup_pset(handle, &pset_tab) == Z_OK) 5604 destroy_tmp_pool = B_FALSE; 5605 5606 zonecfg_fini_handle(handle); 5607 } 5608 5609 if (destroy_tmp_pool) { 5610 if ((res = zonecfg_destroy_tmp_pool(zone_name, pool_err, 5611 sizeof (pool_err))) != Z_OK) { 5612 if (res == Z_POOL) 5613 zerror(zlogp, B_FALSE, pool_err); 5614 } 5615 } 5616 } 5617 5618 remove_mlps(zlogp, zoneid); 5619 5620 if (zone_destroy(zoneid) != 0) { 5621 zerror(zlogp, B_TRUE, "unable to destroy zone"); 5622 goto error; 5623 } 5624 5625 /* 5626 * Special teardown for alternate boot environments: remove the tmpfs 5627 * root for the zone and then remove it from the map file. 5628 */ 5629 if (unmount_cmd && lu_root_teardown(zlogp) != 0) 5630 goto error; 5631 5632 lofs_discard_mnttab(); 5633 return (0); 5634 5635 error: 5636 lofs_discard_mnttab(); 5637 return (-1); 5638 } 5639