xref: /illumos-gate/usr/src/cmd/zoneadmd/vplat.c (revision 9512fe850e98fdd448c638ca63fdd92a8a510255)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * This module contains functions used to bring up and tear down the
31  * Virtual Platform: [un]mounting file-systems, [un]plumbing network
32  * interfaces, [un]configuring devices, establishing resource controls,
33  * and creating/destroying the zone in the kernel.  These actions, on
34  * the way up, ready the zone; on the way down, they halt the zone.
35  * See the much longer block comment at the beginning of zoneadmd.c
36  * for a bigger picture of how the whole program functions.
37  *
38  * This module also has primary responsibility for the layout of "scratch
39  * zones."  These are mounted, but inactive, zones that are used during
40  * operating system upgrade and potentially other administrative action.  The
41  * scratch zone environment is similar to the miniroot environment.  The zone's
42  * actual root is mounted read-write on /a, and the standard paths (/usr,
43  * /sbin, /lib) all lead to read-only copies of the running system's binaries.
44  * This allows the administrative tools to manipulate the zone using "-R /a"
45  * without relying on any binaries in the zone itself.
46  *
47  * If the scratch zone is on an alternate root (Live Upgrade [LU] boot
48  * environment), then we must resolve the lofs mounts used there to uncover
49  * writable (unshared) resources.  Shared resources, though, are always
50  * read-only.  In addition, if the "same" zone with a different root path is
51  * currently running, then "/b" inside the zone points to the running zone's
52  * root.  This allows LU to synchronize configuration files during the upgrade
53  * process.
54  *
55  * To construct this environment, this module creates a tmpfs mount on
56  * $ZONEPATH/lu.  Inside this scratch area, the miniroot-like environment as
57  * described above is constructed on the fly.  The zone is then created using
58  * $ZONEPATH/lu as the root.
59  *
60  * Note that scratch zones are inactive.  The zone's bits are not running and
61  * likely cannot be run correctly until upgrade is done.  Init is not running
62  * there, nor is SMF.  Because of this, the "mounted" state of a scratch zone
63  * is not a part of the usual halt/ready/boot state machine.
64  */
65 
66 #include <sys/param.h>
67 #include <sys/mount.h>
68 #include <sys/mntent.h>
69 #include <sys/socket.h>
70 #include <sys/utsname.h>
71 #include <sys/types.h>
72 #include <sys/stat.h>
73 #include <sys/sockio.h>
74 #include <sys/stropts.h>
75 #include <sys/conf.h>
76 
77 #include <inet/tcp.h>
78 #include <arpa/inet.h>
79 #include <netinet/in.h>
80 #include <net/route.h>
81 
82 #include <stdio.h>
83 #include <errno.h>
84 #include <fcntl.h>
85 #include <unistd.h>
86 #include <rctl.h>
87 #include <stdlib.h>
88 #include <string.h>
89 #include <strings.h>
90 #include <wait.h>
91 #include <limits.h>
92 #include <libgen.h>
93 #include <libzfs.h>
94 #include <libdevinfo.h>
95 #include <zone.h>
96 #include <assert.h>
97 #include <libcontract.h>
98 #include <libcontract_priv.h>
99 #include <uuid/uuid.h>
100 
101 #include <sys/mntio.h>
102 #include <sys/mnttab.h>
103 #include <sys/fs/autofs.h>	/* for _autofssys() */
104 #include <sys/fs/lofs_info.h>
105 #include <sys/fs/zfs.h>
106 
107 #include <pool.h>
108 #include <sys/pool.h>
109 
110 #include <libzonecfg.h>
111 #include <synch.h>
112 
113 #include "zoneadmd.h"
114 #include <tsol/label.h>
115 #include <libtsnet.h>
116 #include <sys/priv.h>
117 
118 #define	V4_ADDR_LEN	32
119 #define	V6_ADDR_LEN	128
120 
121 /* 0755 is the default directory mode. */
122 #define	DEFAULT_DIR_MODE \
123 	(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)
124 
125 #define	IPD_DEFAULT_OPTS \
126 	MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
127 
128 #define	DFSTYPES	"/etc/dfs/fstypes"
129 #define	MAXTNZLEN	2048
130 
131 /*
132  * This is the set of directories and devices (relative to <zone_root>/dev)
133  * which must be present in every zone.  Users can augment this list with
134  * additional device rules in their zone configuration, but at present cannot
135  * remove any of the this set of standard devices.
136  */
137 static const char *standard_devs[] = {
138 	"arp",
139 	"conslog",
140 	"cpu/self/cpuid",
141 	"crypto",
142 	"cryptoadm",
143 	"dsk",
144 	"dtrace/helper",
145 	"fd",
146 	"kstat",
147 	"lo0",
148 	"lo1",
149 	"lo2",
150 	"lo3",
151 	"log",
152 	"logindmux",
153 	"null",
154 #ifdef __sparc
155 	"openprom",
156 #endif
157 	"poll",
158 	"pool",
159 	"ptmx",
160 	"pts/*",
161 	"random",
162 	"rdsk",
163 	"rmt",
164 	"sad/user",
165 	"swap",
166 	"sysevent",
167 	"tcp",
168 	"tcp6",
169 	"term",
170 	"ticlts",
171 	"ticots",
172 	"ticotsord",
173 	"tty",
174 	"udp",
175 	"udp6",
176 	"urandom",
177 	"zero",
178 	"zfs",
179 	NULL
180 };
181 
182 struct source_target {
183 	const char *source;
184 	const char *target;
185 };
186 
187 /*
188  * Set of symlinks (relative to <zone_root>/dev) which must be present in
189  * every zone.
190  */
191 static struct source_target standard_devlinks[] = {
192 	{ "stderr",	"./fd/2" },
193 	{ "stdin",	"./fd/0" },
194 	{ "stdout",	"./fd/1" },
195 	{ "dtremote",	"/dev/null" },
196 	{ "console",	"zconsole" },
197 	{ "syscon",	"zconsole" },
198 	{ "sysmsg",	"zconsole" },
199 	{ "systty",	"zconsole" },
200 	{ "msglog",	"zconsole" },
201 	{ NULL, NULL }
202 };
203 
204 static int vplat_mount_dev(zlog_t *);
205 
206 /* for routing socket */
207 static int rts_seqno = 0;
208 
209 /* mangled zone name when mounting in an alternate root environment */
210 static char kernzone[ZONENAME_MAX];
211 
212 /* array of cached mount entries for resolve_lofs */
213 static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
214 
215 /* for Trusted Extensions */
216 static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *);
217 static int tsol_mounts(zlog_t *, char *, char *);
218 static void tsol_unmounts(zlog_t *, char *);
219 static m_label_t *zlabel = NULL;
220 static m_label_t *zid_label = NULL;
221 static priv_set_t *zprivs = NULL;
222 
223 /* from libsocket, not in any header file */
224 extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
225 
226 /*
227  * An optimization for build_mnttable: reallocate (and potentially copy the
228  * data) only once every N times through the loop.
229  */
230 #define	MNTTAB_HUNK	32
231 
232 /*
233  * Private autofs system call
234  */
235 extern int _autofssys(int, void *);
236 
237 static int
238 autofs_cleanup(zoneid_t zoneid)
239 {
240 	/*
241 	 * Ask autofs to unmount all trigger nodes in the given zone.
242 	 */
243 	return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid));
244 }
245 
246 static void
247 free_mnttable(struct mnttab *mnt_array, uint_t nelem)
248 {
249 	uint_t i;
250 
251 	if (mnt_array == NULL)
252 		return;
253 	for (i = 0; i < nelem; i++) {
254 		free(mnt_array[i].mnt_mountp);
255 		free(mnt_array[i].mnt_fstype);
256 		free(mnt_array[i].mnt_special);
257 		free(mnt_array[i].mnt_mntopts);
258 		assert(mnt_array[i].mnt_time == NULL);
259 	}
260 	free(mnt_array);
261 }
262 
263 /*
264  * Build the mount table for the zone rooted at "zroot", storing the resulting
265  * array of struct mnttabs in "mnt_arrayp" and the number of elements in the
266  * array in "nelemp".
267  */
268 static int
269 build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab,
270     struct mnttab **mnt_arrayp, uint_t *nelemp)
271 {
272 	struct mnttab mnt;
273 	struct mnttab *mnts;
274 	struct mnttab *mnp;
275 	uint_t nmnt;
276 
277 	rewind(mnttab);
278 	resetmnttab(mnttab);
279 	nmnt = 0;
280 	mnts = NULL;
281 	while (getmntent(mnttab, &mnt) == 0) {
282 		struct mnttab *tmp_array;
283 
284 		if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0)
285 			continue;
286 		if (nmnt % MNTTAB_HUNK == 0) {
287 			tmp_array = realloc(mnts,
288 			    (nmnt + MNTTAB_HUNK) * sizeof (*mnts));
289 			if (tmp_array == NULL) {
290 				free_mnttable(mnts, nmnt);
291 				return (-1);
292 			}
293 			mnts = tmp_array;
294 		}
295 		mnp = &mnts[nmnt++];
296 
297 		/*
298 		 * Zero out any fields we're not using.
299 		 */
300 		(void) memset(mnp, 0, sizeof (*mnp));
301 
302 		if (mnt.mnt_special != NULL)
303 			mnp->mnt_special = strdup(mnt.mnt_special);
304 		if (mnt.mnt_mntopts != NULL)
305 			mnp->mnt_mntopts = strdup(mnt.mnt_mntopts);
306 		mnp->mnt_mountp = strdup(mnt.mnt_mountp);
307 		mnp->mnt_fstype = strdup(mnt.mnt_fstype);
308 		if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) ||
309 		    (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) ||
310 		    mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) {
311 			zerror(zlogp, B_TRUE, "memory allocation failed");
312 			free_mnttable(mnts, nmnt);
313 			return (-1);
314 		}
315 	}
316 	*mnt_arrayp = mnts;
317 	*nelemp = nmnt;
318 	return (0);
319 }
320 
321 /*
322  * This is an optimization.  The resolve_lofs function is used quite frequently
323  * to manipulate file paths, and on a machine with a large number of zones,
324  * there will be a huge number of mounted file systems.  Thus, we trigger a
325  * reread of the list of mount points
326  */
327 static void
328 lofs_discard_mnttab(void)
329 {
330 	free_mnttable(resolve_lofs_mnts,
331 	    resolve_lofs_mnt_max - resolve_lofs_mnts);
332 	resolve_lofs_mnts = resolve_lofs_mnt_max = NULL;
333 }
334 
335 static int
336 lofs_read_mnttab(zlog_t *zlogp)
337 {
338 	FILE *mnttab;
339 	uint_t nmnts;
340 
341 	if ((mnttab = fopen(MNTTAB, "r")) == NULL)
342 		return (-1);
343 	if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts,
344 	    &nmnts) == -1) {
345 		(void) fclose(mnttab);
346 		return (-1);
347 	}
348 	(void) fclose(mnttab);
349 	resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts;
350 	return (0);
351 }
352 
353 /*
354  * This function loops over potential loopback mounts and symlinks in a given
355  * path and resolves them all down to an absolute path.
356  */
357 static void
358 resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen)
359 {
360 	int len, arlen;
361 	const char *altroot;
362 	char tmppath[MAXPATHLEN];
363 	boolean_t outside_altroot;
364 
365 	if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1)
366 		return;
367 	tmppath[len] = '\0';
368 	(void) strlcpy(path, tmppath, sizeof (tmppath));
369 
370 	/* This happens once per zoneadmd operation. */
371 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
372 		return;
373 
374 	altroot = zonecfg_get_root();
375 	arlen = strlen(altroot);
376 	outside_altroot = B_FALSE;
377 	for (;;) {
378 		struct mnttab *mnp;
379 
380 		for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
381 		    mnp++) {
382 			if (mnp->mnt_fstype == NULL ||
383 			    mnp->mnt_mountp == NULL ||
384 			    mnp->mnt_special == NULL ||
385 			    strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0)
386 				continue;
387 			len = strlen(mnp->mnt_mountp);
388 			if (strncmp(mnp->mnt_mountp, path, len) == 0 &&
389 			    (path[len] == '/' || path[len] == '\0'))
390 				break;
391 		}
392 		if (mnp >= resolve_lofs_mnt_max)
393 			break;
394 		if (outside_altroot) {
395 			char *cp;
396 			int olen = sizeof (MNTOPT_RO) - 1;
397 
398 			/*
399 			 * If we run into a read-only mount outside of the
400 			 * alternate root environment, then the user doesn't
401 			 * want this path to be made read-write.
402 			 */
403 			if (mnp->mnt_mntopts != NULL &&
404 			    (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) !=
405 			    NULL &&
406 			    (cp == mnp->mnt_mntopts || cp[-1] == ',') &&
407 			    (cp[olen] == '\0' || cp[olen] == ',')) {
408 				break;
409 			}
410 		} else if (arlen > 0 &&
411 		    (strncmp(mnp->mnt_special, altroot, arlen) != 0 ||
412 		    (mnp->mnt_special[arlen] != '\0' &&
413 		    mnp->mnt_special[arlen] != '/'))) {
414 			outside_altroot = B_TRUE;
415 		}
416 		/* use temporary buffer because new path might be longer */
417 		(void) snprintf(tmppath, sizeof (tmppath), "%s%s",
418 		    mnp->mnt_special, path + len);
419 		if ((len = resolvepath(tmppath, path, pathlen)) == -1)
420 			break;
421 		path[len] = '\0';
422 	}
423 }
424 
425 /*
426  * For a regular mount, check if a replacement lofs mount is needed because the
427  * referenced device is already mounted somewhere.
428  */
429 static int
430 check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr)
431 {
432 	struct mnttab *mnp;
433 	zone_fsopt_t *optptr, *onext;
434 
435 	/* This happens once per zoneadmd operation. */
436 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
437 		return (-1);
438 
439 	/*
440 	 * If this special node isn't already in use, then it's ours alone;
441 	 * no need to worry about conflicting mounts.
442 	 */
443 	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
444 	    mnp++) {
445 		if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0)
446 			break;
447 	}
448 	if (mnp >= resolve_lofs_mnt_max)
449 		return (0);
450 
451 	/*
452 	 * Convert this duplicate mount into a lofs mount.
453 	 */
454 	(void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp,
455 	    sizeof (fsptr->zone_fs_special));
456 	(void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS,
457 	    sizeof (fsptr->zone_fs_type));
458 	fsptr->zone_fs_raw[0] = '\0';
459 
460 	/*
461 	 * Discard all but one of the original options and set that to be the
462 	 * same set of options used for inherit package directory resources.
463 	 */
464 	optptr = fsptr->zone_fs_options;
465 	if (optptr == NULL) {
466 		optptr = malloc(sizeof (*optptr));
467 		if (optptr == NULL) {
468 			zerror(zlogp, B_TRUE, "cannot mount %s",
469 			    fsptr->zone_fs_dir);
470 			return (-1);
471 		}
472 	} else {
473 		while ((onext = optptr->zone_fsopt_next) != NULL) {
474 			optptr->zone_fsopt_next = onext->zone_fsopt_next;
475 			free(onext);
476 		}
477 	}
478 	(void) strcpy(optptr->zone_fsopt_opt, IPD_DEFAULT_OPTS);
479 	optptr->zone_fsopt_next = NULL;
480 	fsptr->zone_fs_options = optptr;
481 	return (0);
482 }
483 
484 static int
485 make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode)
486 {
487 	char path[MAXPATHLEN];
488 	struct stat st;
489 
490 	if (snprintf(path, sizeof (path), "%s%s", prefix, subdir) >
491 	    sizeof (path)) {
492 		zerror(zlogp, B_FALSE, "pathname %s%s is too long", prefix,
493 		    subdir);
494 		return (-1);
495 	}
496 
497 	if (lstat(path, &st) == 0) {
498 		/*
499 		 * We don't check the file mode since presumably the zone
500 		 * administrator may have had good reason to change the mode,
501 		 * and we don't need to second guess him.
502 		 */
503 		if (!S_ISDIR(st.st_mode)) {
504 			if (is_system_labeled() &&
505 			    S_ISREG(st.st_mode)) {
506 				/*
507 				 * The need to mount readonly copies of
508 				 * global zone /etc/ files is unique to
509 				 * Trusted Extensions.
510 				 */
511 				if (strncmp(subdir, "/etc/",
512 				    strlen("/etc/")) != 0) {
513 					zerror(zlogp, B_FALSE,
514 					    "%s is not in /etc", path);
515 					return (-1);
516 				}
517 			} else {
518 				zerror(zlogp, B_FALSE,
519 				    "%s is not a directory", path);
520 				return (-1);
521 			}
522 		}
523 	} else if (mkdirp(path, mode) != 0) {
524 		if (errno == EROFS)
525 			zerror(zlogp, B_FALSE, "Could not mkdir %s.\nIt is on "
526 			    "a read-only file system in this local zone.\nMake "
527 			    "sure %s exists in the global zone.", path, subdir);
528 		else
529 			zerror(zlogp, B_TRUE, "mkdirp of %s failed", path);
530 		return (-1);
531 	}
532 	return (0);
533 }
534 
535 static void
536 free_remote_fstypes(char **types)
537 {
538 	uint_t i;
539 
540 	if (types == NULL)
541 		return;
542 	for (i = 0; types[i] != NULL; i++)
543 		free(types[i]);
544 	free(types);
545 }
546 
547 static char **
548 get_remote_fstypes(zlog_t *zlogp)
549 {
550 	char **types = NULL;
551 	FILE *fp;
552 	char buf[MAXPATHLEN];
553 	char fstype[MAXPATHLEN];
554 	uint_t lines = 0;
555 	uint_t i;
556 
557 	if ((fp = fopen(DFSTYPES, "r")) == NULL) {
558 		zerror(zlogp, B_TRUE, "failed to open %s", DFSTYPES);
559 		return (NULL);
560 	}
561 	/*
562 	 * Count the number of lines
563 	 */
564 	while (fgets(buf, sizeof (buf), fp) != NULL)
565 		lines++;
566 	if (lines == 0)	/* didn't read anything; empty file */
567 		goto out;
568 	rewind(fp);
569 	/*
570 	 * Allocate enough space for a NULL-terminated array.
571 	 */
572 	types = calloc(lines + 1, sizeof (char *));
573 	if (types == NULL) {
574 		zerror(zlogp, B_TRUE, "memory allocation failed");
575 		goto out;
576 	}
577 	i = 0;
578 	while (fgets(buf, sizeof (buf), fp) != NULL) {
579 		/* LINTED - fstype is big enough to hold buf */
580 		if (sscanf(buf, "%s", fstype) == 0) {
581 			zerror(zlogp, B_FALSE, "unable to parse %s", DFSTYPES);
582 			free_remote_fstypes(types);
583 			types = NULL;
584 			goto out;
585 		}
586 		types[i] = strdup(fstype);
587 		if (types[i] == NULL) {
588 			zerror(zlogp, B_TRUE, "memory allocation failed");
589 			free_remote_fstypes(types);
590 			types = NULL;
591 			goto out;
592 		}
593 		i++;
594 	}
595 out:
596 	(void) fclose(fp);
597 	return (types);
598 }
599 
600 static boolean_t
601 is_remote_fstype(const char *fstype, char *const *remote_fstypes)
602 {
603 	uint_t i;
604 
605 	if (remote_fstypes == NULL)
606 		return (B_FALSE);
607 	for (i = 0; remote_fstypes[i] != NULL; i++) {
608 		if (strcmp(remote_fstypes[i], fstype) == 0)
609 			return (B_TRUE);
610 	}
611 	return (B_FALSE);
612 }
613 
614 /*
615  * This converts a zone root path (normally of the form .../root) to a Live
616  * Upgrade scratch zone root (of the form .../lu).
617  */
618 static void
619 root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
620 {
621 	if (!isresolved && zonecfg_in_alt_root())
622 		resolve_lofs(zlogp, zroot, zrootlen);
623 	(void) strcpy(strrchr(zroot, '/') + 1, "lu");
624 }
625 
626 /*
627  * The general strategy for unmounting filesystems is as follows:
628  *
629  * - Remote filesystems may be dead, and attempting to contact them as
630  * part of a regular unmount may hang forever; we want to always try to
631  * forcibly unmount such filesystems and only fall back to regular
632  * unmounts if the filesystem doesn't support forced unmounts.
633  *
634  * - We don't want to unnecessarily corrupt metadata on local
635  * filesystems (ie UFS), so we want to start off with graceful unmounts,
636  * and only escalate to doing forced unmounts if we get stuck.
637  *
638  * We start off walking backwards through the mount table.  This doesn't
639  * give us strict ordering but ensures that we try to unmount submounts
640  * first.  We thus limit the number of failed umount2(2) calls.
641  *
642  * The mechanism for determining if we're stuck is to count the number
643  * of failed unmounts each iteration through the mount table.  This
644  * gives us an upper bound on the number of filesystems which remain
645  * mounted (autofs trigger nodes are dealt with separately).  If at the
646  * end of one unmount+autofs_cleanup cycle we still have the same number
647  * of mounts that we started out with, we're stuck and try a forced
648  * unmount.  If that fails (filesystem doesn't support forced unmounts)
649  * then we bail and are unable to teardown the zone.  If it succeeds,
650  * we're no longer stuck so we continue with our policy of trying
651  * graceful mounts first.
652  *
653  * Zone must be down (ie, no processes or threads active).
654  */
655 static int
656 unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
657 {
658 	int error = 0;
659 	FILE *mnttab;
660 	struct mnttab *mnts;
661 	uint_t nmnt;
662 	char zroot[MAXPATHLEN + 1];
663 	size_t zrootlen;
664 	uint_t oldcount = UINT_MAX;
665 	boolean_t stuck = B_FALSE;
666 	char **remote_fstypes = NULL;
667 
668 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
669 		zerror(zlogp, B_FALSE, "unable to determine zone root");
670 		return (-1);
671 	}
672 	if (unmount_cmd)
673 		root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
674 
675 	(void) strcat(zroot, "/");
676 	zrootlen = strlen(zroot);
677 
678 	/*
679 	 * For Trusted Extensions unmount each higher level zone's mount
680 	 * of our zone's /export/home
681 	 */
682 	if (!unmount_cmd)
683 		tsol_unmounts(zlogp, zone_name);
684 
685 	if ((mnttab = fopen(MNTTAB, "r")) == NULL) {
686 		zerror(zlogp, B_TRUE, "failed to open %s", MNTTAB);
687 		return (-1);
688 	}
689 	/*
690 	 * Use our hacky mntfs ioctl so we see everything, even mounts with
691 	 * MS_NOMNTTAB.
692 	 */
693 	if (ioctl(fileno(mnttab), MNTIOC_SHOWHIDDEN, NULL) < 0) {
694 		zerror(zlogp, B_TRUE, "unable to configure %s", MNTTAB);
695 		error++;
696 		goto out;
697 	}
698 
699 	/*
700 	 * Build the list of remote fstypes so we know which ones we
701 	 * should forcibly unmount.
702 	 */
703 	remote_fstypes = get_remote_fstypes(zlogp);
704 	for (; /* ever */; ) {
705 		uint_t newcount = 0;
706 		boolean_t unmounted;
707 		struct mnttab *mnp;
708 		char *path;
709 		uint_t i;
710 
711 		mnts = NULL;
712 		nmnt = 0;
713 		/*
714 		 * MNTTAB gives us a way to walk through mounted
715 		 * filesystems; we need to be able to walk them in
716 		 * reverse order, so we build a list of all mounted
717 		 * filesystems.
718 		 */
719 		if (build_mnttable(zlogp, zroot, zrootlen, mnttab, &mnts,
720 		    &nmnt) != 0) {
721 			error++;
722 			goto out;
723 		}
724 		for (i = 0; i < nmnt; i++) {
725 			mnp = &mnts[nmnt - i - 1]; /* access in reverse order */
726 			path = mnp->mnt_mountp;
727 			unmounted = B_FALSE;
728 			/*
729 			 * Try forced unmount first for remote filesystems.
730 			 *
731 			 * Not all remote filesystems support forced unmounts,
732 			 * so if this fails (ENOTSUP) we'll continue on
733 			 * and try a regular unmount.
734 			 */
735 			if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) {
736 				if (umount2(path, MS_FORCE) == 0)
737 					unmounted = B_TRUE;
738 			}
739 			/*
740 			 * Try forced unmount if we're stuck.
741 			 */
742 			if (stuck) {
743 				if (umount2(path, MS_FORCE) == 0) {
744 					unmounted = B_TRUE;
745 					stuck = B_FALSE;
746 				} else {
747 					/*
748 					 * The first failure indicates a
749 					 * mount we won't be able to get
750 					 * rid of automatically, so we
751 					 * bail.
752 					 */
753 					error++;
754 					zerror(zlogp, B_FALSE,
755 					    "unable to unmount '%s'", path);
756 					free_mnttable(mnts, nmnt);
757 					goto out;
758 				}
759 			}
760 			/*
761 			 * Try regular unmounts for everything else.
762 			 */
763 			if (!unmounted && umount2(path, 0) != 0)
764 				newcount++;
765 		}
766 		free_mnttable(mnts, nmnt);
767 
768 		if (newcount == 0)
769 			break;
770 		if (newcount >= oldcount) {
771 			/*
772 			 * Last round didn't unmount anything; we're stuck and
773 			 * should start trying forced unmounts.
774 			 */
775 			stuck = B_TRUE;
776 		}
777 		oldcount = newcount;
778 
779 		/*
780 		 * Autofs doesn't let you unmount its trigger nodes from
781 		 * userland so we have to tell the kernel to cleanup for us.
782 		 */
783 		if (autofs_cleanup(zoneid) != 0) {
784 			zerror(zlogp, B_TRUE, "unable to remove autofs nodes");
785 			error++;
786 			goto out;
787 		}
788 	}
789 
790 out:
791 	free_remote_fstypes(remote_fstypes);
792 	(void) fclose(mnttab);
793 	return (error ? -1 : 0);
794 }
795 
796 static int
797 fs_compare(const void *m1, const void *m2)
798 {
799 	struct zone_fstab *i = (struct zone_fstab *)m1;
800 	struct zone_fstab *j = (struct zone_fstab *)m2;
801 
802 	return (strcmp(i->zone_fs_dir, j->zone_fs_dir));
803 }
804 
805 /*
806  * Fork and exec (and wait for) the mentioned binary with the provided
807  * arguments.  Returns (-1) if something went wrong with fork(2) or exec(2),
808  * returns the exit status otherwise.
809  *
810  * If we were unable to exec the provided pathname (for whatever
811  * reason), we return the special token ZEXIT_EXEC.  The current value
812  * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the
813  * consumers of this function; any future consumers must make sure this
814  * remains the case.
815  */
816 static int
817 forkexec(zlog_t *zlogp, const char *path, char *const argv[])
818 {
819 	pid_t child_pid;
820 	int child_status = 0;
821 
822 	/*
823 	 * Do not let another thread localize a message while we are forking.
824 	 */
825 	(void) mutex_lock(&msglock);
826 	child_pid = fork();
827 	(void) mutex_unlock(&msglock);
828 	if (child_pid == -1) {
829 		zerror(zlogp, B_TRUE, "could not fork for %s", argv[0]);
830 		return (-1);
831 	} else if (child_pid == 0) {
832 		closefrom(0);
833 		/* redirect stdin, stdout & stderr to /dev/null */
834 		(void) open("/dev/null", O_RDONLY);	/* stdin */
835 		(void) open("/dev/null", O_WRONLY);	/* stdout */
836 		(void) open("/dev/null", O_WRONLY);	/* stderr */
837 		(void) execv(path, argv);
838 		/*
839 		 * Since we are in the child, there is no point calling zerror()
840 		 * since there is nobody waiting to consume it.  So exit with a
841 		 * special code that the parent will recognize and call zerror()
842 		 * accordingly.
843 		 */
844 
845 		_exit(ZEXIT_EXEC);
846 	} else {
847 		(void) waitpid(child_pid, &child_status, 0);
848 	}
849 
850 	if (WIFSIGNALED(child_status)) {
851 		zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
852 		    "signal %d", path, WTERMSIG(child_status));
853 		return (-1);
854 	}
855 	assert(WIFEXITED(child_status));
856 	if (WEXITSTATUS(child_status) == ZEXIT_EXEC) {
857 		zerror(zlogp, B_FALSE, "failed to exec %s", path);
858 		return (-1);
859 	}
860 	return (WEXITSTATUS(child_status));
861 }
862 
863 static int
864 dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev)
865 {
866 	char cmdbuf[MAXPATHLEN];
867 	char *argv[4];
868 	int status;
869 
870 	/*
871 	 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but
872 	 * that would cost us an extra fork/exec without buying us anything.
873 	 */
874 	if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype)
875 	    > sizeof (cmdbuf)) {
876 		zerror(zlogp, B_FALSE, "file-system type %s too long", fstype);
877 		return (-1);
878 	}
879 
880 	argv[0] = "fsck";
881 	argv[1] = "-m";
882 	argv[2] = (char *)rawdev;
883 	argv[3] = NULL;
884 
885 	status = forkexec(zlogp, cmdbuf, argv);
886 	if (status == 0 || status == -1)
887 		return (status);
888 	zerror(zlogp, B_FALSE, "fsck of '%s' failed with exit status %d; "
889 	    "run fsck manually", rawdev, status);
890 	return (-1);
891 }
892 
893 static int
894 domount(zlog_t *zlogp, const char *fstype, const char *opts,
895     const char *special, const char *directory)
896 {
897 	char cmdbuf[MAXPATHLEN];
898 	char *argv[6];
899 	int status;
900 
901 	/*
902 	 * We could alternatively have called /usr/sbin/mount -F <fstype>, but
903 	 * that would cost us an extra fork/exec without buying us anything.
904 	 */
905 	if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype)
906 	    > sizeof (cmdbuf)) {
907 		zerror(zlogp, B_FALSE, "file-system type %s too long", fstype);
908 		return (-1);
909 	}
910 	argv[0] = "mount";
911 	if (opts[0] == '\0') {
912 		argv[1] = (char *)special;
913 		argv[2] = (char *)directory;
914 		argv[3] = NULL;
915 	} else {
916 		argv[1] = "-o";
917 		argv[2] = (char *)opts;
918 		argv[3] = (char *)special;
919 		argv[4] = (char *)directory;
920 		argv[5] = NULL;
921 	}
922 
923 	status = forkexec(zlogp, cmdbuf, argv);
924 	if (status == 0 || status == -1)
925 		return (status);
926 	if (opts[0] == '\0')
927 		zerror(zlogp, B_FALSE, "\"%s %s %s\" "
928 		    "failed with exit code %d",
929 		    cmdbuf, special, directory, status);
930 	else
931 		zerror(zlogp, B_FALSE, "\"%s -o %s %s %s\" "
932 		    "failed with exit code %d",
933 		    cmdbuf, opts, special, directory, status);
934 	return (-1);
935 }
936 
937 /*
938  * Make sure if a given path exists, it is not a sym-link, and is a directory.
939  */
940 static int
941 check_path(zlog_t *zlogp, const char *path)
942 {
943 	struct stat statbuf;
944 	char respath[MAXPATHLEN];
945 	int res;
946 
947 	if (lstat(path, &statbuf) != 0) {
948 		if (errno == ENOENT)
949 			return (0);
950 		zerror(zlogp, B_TRUE, "can't stat %s", path);
951 		return (-1);
952 	}
953 	if (S_ISLNK(statbuf.st_mode)) {
954 		zerror(zlogp, B_FALSE, "%s is a symlink", path);
955 		return (-1);
956 	}
957 	if (!S_ISDIR(statbuf.st_mode)) {
958 		if (is_system_labeled() && S_ISREG(statbuf.st_mode)) {
959 			/*
960 			 * The need to mount readonly copies of
961 			 * global zone /etc/ files is unique to
962 			 * Trusted Extensions.
963 			 * The check for /etc/ via strstr() is to
964 			 * allow paths like $ZONEROOT/etc/passwd
965 			 */
966 			if (strstr(path, "/etc/") == NULL) {
967 				zerror(zlogp, B_FALSE,
968 				    "%s is not in /etc", path);
969 				return (-1);
970 			}
971 		} else {
972 			zerror(zlogp, B_FALSE, "%s is not a directory", path);
973 			return (-1);
974 		}
975 	}
976 	if ((res = resolvepath(path, respath, sizeof (respath))) == -1) {
977 		zerror(zlogp, B_TRUE, "unable to resolve path %s", path);
978 		return (-1);
979 	}
980 	respath[res] = '\0';
981 	if (strcmp(path, respath) != 0) {
982 		/*
983 		 * We don't like ".."s and "."s throwing us off
984 		 */
985 		zerror(zlogp, B_FALSE, "%s is not a canonical path", path);
986 		return (-1);
987 	}
988 	return (0);
989 }
990 
991 /*
992  * Check every component of rootpath/relpath.  If any component fails (ie,
993  * exists but isn't the canonical path to a directory), it is returned in
994  * badpath, which is assumed to be at least of size MAXPATHLEN.
995  *
996  * Relpath must begin with '/'.
997  */
998 static boolean_t
999 valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *relpath)
1000 {
1001 	char abspath[MAXPATHLEN], *slashp;
1002 
1003 	/*
1004 	 * Make sure abspath has at least one '/' after its rootpath
1005 	 * component, and ends with '/'.
1006 	 */
1007 	if (snprintf(abspath, sizeof (abspath), "%s%s/", rootpath, relpath) >
1008 	    sizeof (abspath)) {
1009 		zerror(zlogp, B_FALSE, "pathname %s%s is too long", rootpath,
1010 		    relpath);
1011 		return (B_FALSE);
1012 	}
1013 
1014 	slashp = &abspath[strlen(rootpath)];
1015 	assert(*slashp == '/');
1016 	do {
1017 		*slashp = '\0';
1018 		if (check_path(zlogp, abspath) != 0)
1019 			return (B_FALSE);
1020 		*slashp = '/';
1021 		slashp++;
1022 	} while ((slashp = strchr(slashp, '/')) != NULL);
1023 	return (B_TRUE);
1024 }
1025 
1026 static int
1027 mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath)
1028 {
1029 	char    path[MAXPATHLEN];
1030 	char	specpath[MAXPATHLEN];
1031 	char    optstr[MAX_MNTOPT_STR];
1032 	zone_fsopt_t *optptr;
1033 
1034 	if (!valid_mount_path(zlogp, rootpath, fsptr->zone_fs_dir)) {
1035 		zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
1036 		    rootpath, fsptr->zone_fs_dir);
1037 		return (-1);
1038 	}
1039 
1040 	if (make_one_dir(zlogp, rootpath, fsptr->zone_fs_dir,
1041 	    DEFAULT_DIR_MODE) != 0)
1042 		return (-1);
1043 
1044 	(void) snprintf(path, sizeof (path), "%s%s", rootpath,
1045 	    fsptr->zone_fs_dir);
1046 
1047 	if (strlen(fsptr->zone_fs_special) == 0) {
1048 		/*
1049 		 * A zero-length special is how we distinguish IPDs from
1050 		 * general-purpose FSs.  Make sure it mounts from a place that
1051 		 * can be seen via the alternate zone's root.
1052 		 */
1053 		if (snprintf(specpath, sizeof (specpath), "%s%s",
1054 		    zonecfg_get_root(), fsptr->zone_fs_dir) >=
1055 		    sizeof (specpath)) {
1056 			zerror(zlogp, B_FALSE, "cannot mount %s: path too "
1057 			    "long in alternate root", fsptr->zone_fs_dir);
1058 			return (-1);
1059 		}
1060 		if (zonecfg_in_alt_root())
1061 			resolve_lofs(zlogp, specpath, sizeof (specpath));
1062 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS,
1063 		    specpath, path) != 0) {
1064 			zerror(zlogp, B_TRUE, "failed to loopback mount %s",
1065 			    specpath);
1066 			return (-1);
1067 		}
1068 		return (0);
1069 	}
1070 
1071 	/*
1072 	 * In general the strategy here is to do just as much verification as
1073 	 * necessary to avoid crashing or otherwise doing something bad; if the
1074 	 * administrator initiated the operation via zoneadm(1m), he'll get
1075 	 * auto-verification which will let him know what's wrong.  If he
1076 	 * modifies the zone configuration of a running zone and doesn't attempt
1077 	 * to verify that it's OK we won't crash but won't bother trying to be
1078 	 * too helpful either.  zoneadm verify is only a couple keystrokes away.
1079 	 */
1080 	if (!zonecfg_valid_fs_type(fsptr->zone_fs_type)) {
1081 		zerror(zlogp, B_FALSE, "cannot mount %s on %s: "
1082 		    "invalid file-system type %s", fsptr->zone_fs_special,
1083 		    fsptr->zone_fs_dir, fsptr->zone_fs_type);
1084 		return (-1);
1085 	}
1086 
1087 	/*
1088 	 * If we're looking at an alternate root environment, then construct
1089 	 * read-only loopback mounts as necessary.  For all lofs mounts, make
1090 	 * sure that the 'special' entry points inside the alternate root.  (We
1091 	 * don't do this with other mounts, as devfs isn't in the alternate
1092 	 * root, and we need to assume the device environment is roughly the
1093 	 * same.)
1094 	 */
1095 	if (zonecfg_in_alt_root()) {
1096 		struct stat64 st;
1097 
1098 		if (stat64(fsptr->zone_fs_special, &st) != -1 &&
1099 		    S_ISBLK(st.st_mode) &&
1100 		    check_lofs_needed(zlogp, fsptr) == -1)
1101 			return (-1);
1102 		if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) {
1103 			if (snprintf(specpath, sizeof (specpath), "%s%s",
1104 			    zonecfg_get_root(), fsptr->zone_fs_special) >=
1105 			    sizeof (specpath)) {
1106 				zerror(zlogp, B_FALSE, "cannot mount %s: path "
1107 				    "too long in alternate root",
1108 				    fsptr->zone_fs_special);
1109 				return (-1);
1110 			}
1111 			resolve_lofs(zlogp, specpath, sizeof (specpath));
1112 			(void) strlcpy(fsptr->zone_fs_special, specpath,
1113 			    sizeof (fsptr->zone_fs_special));
1114 		}
1115 	}
1116 
1117 	/*
1118 	 * Run 'fsck -m' if there's a device to fsck.
1119 	 */
1120 	if (fsptr->zone_fs_raw[0] != '\0' &&
1121 	    dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_raw) != 0)
1122 		return (-1);
1123 
1124 	/*
1125 	 * Build up mount option string.
1126 	 */
1127 	optstr[0] = '\0';
1128 	if (fsptr->zone_fs_options != NULL) {
1129 		(void) strlcpy(optstr, fsptr->zone_fs_options->zone_fsopt_opt,
1130 		    sizeof (optstr));
1131 		for (optptr = fsptr->zone_fs_options->zone_fsopt_next;
1132 		    optptr != NULL; optptr = optptr->zone_fsopt_next) {
1133 			(void) strlcat(optstr, ",", sizeof (optstr));
1134 			(void) strlcat(optstr, optptr->zone_fsopt_opt,
1135 			    sizeof (optstr));
1136 		}
1137 	}
1138 	return (domount(zlogp, fsptr->zone_fs_type, optstr,
1139 	    fsptr->zone_fs_special, path));
1140 }
1141 
1142 static void
1143 free_fs_data(struct zone_fstab *fsarray, uint_t nelem)
1144 {
1145 	uint_t i;
1146 
1147 	if (fsarray == NULL)
1148 		return;
1149 	for (i = 0; i < nelem; i++)
1150 		zonecfg_free_fs_option_list(fsarray[i].zone_fs_options);
1151 	free(fsarray);
1152 }
1153 
1154 /*
1155  * This function constructs the miniroot-like "scratch zone" environment.  If
1156  * it returns B_FALSE, then the error has already been logged.
1157  */
1158 static boolean_t
1159 build_mounted(zlog_t *zlogp, char *rootpath, size_t rootlen,
1160     const char *zonepath)
1161 {
1162 	char tmp[MAXPATHLEN], fromdir[MAXPATHLEN];
1163 	char luroot[MAXPATHLEN];
1164 	const char **cpp;
1165 	static const char *mkdirs[] = {
1166 		"/system", "/system/contract", "/system/object", "/proc",
1167 		"/dev", "/tmp", "/a", NULL
1168 	};
1169 	static const char *localdirs[] = {
1170 		"/etc", "/var", NULL
1171 	};
1172 	static const char *loopdirs[] = {
1173 		"/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform",
1174 		"/usr", NULL
1175 	};
1176 	static const char *tmpdirs[] = {
1177 		"/tmp", "/var/run", NULL
1178 	};
1179 	FILE *fp;
1180 	struct stat st;
1181 	char *altstr;
1182 	uuid_t uuid;
1183 
1184 	/*
1185 	 * Construct a small Solaris environment, including the zone root
1186 	 * mounted on '/a' inside that environment.
1187 	 */
1188 	resolve_lofs(zlogp, rootpath, rootlen);
1189 	(void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
1190 	resolve_lofs(zlogp, luroot, sizeof (luroot));
1191 	(void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot);
1192 	(void) symlink("./usr/bin", tmp);
1193 
1194 	/*
1195 	 * These are mostly special mount points; not handled here.  (See
1196 	 * zone_mount_early.)
1197 	 */
1198 	for (cpp = mkdirs; *cpp != NULL; cpp++) {
1199 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1200 		if (mkdir(tmp, 0755) != 0) {
1201 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1202 			return (B_FALSE);
1203 		}
1204 	}
1205 
1206 	/*
1207 	 * These are mounted read-write from the zone undergoing upgrade.  We
1208 	 * must be careful not to 'leak' things from the main system into the
1209 	 * zone, and this accomplishes that goal.
1210 	 */
1211 	for (cpp = localdirs; *cpp != NULL; cpp++) {
1212 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1213 		(void) snprintf(fromdir, sizeof (fromdir), "%s%s", rootpath,
1214 		    *cpp);
1215 		if (mkdir(tmp, 0755) != 0) {
1216 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1217 			return (B_FALSE);
1218 		}
1219 		if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp) != 0) {
1220 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1221 			    *cpp);
1222 			return (B_FALSE);
1223 		}
1224 	}
1225 
1226 	/*
1227 	 * These are things mounted read-only from the running system because
1228 	 * they contain binaries that must match system.
1229 	 */
1230 	for (cpp = loopdirs; *cpp != NULL; cpp++) {
1231 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1232 		if (mkdir(tmp, 0755) != 0) {
1233 			if (errno != EEXIST) {
1234 				zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1235 				return (B_FALSE);
1236 			}
1237 			if (lstat(tmp, &st) != 0) {
1238 				zerror(zlogp, B_TRUE, "cannot stat %s", tmp);
1239 				return (B_FALSE);
1240 			}
1241 			/*
1242 			 * Ignore any non-directories encountered.  These are
1243 			 * things that have been converted into symlinks
1244 			 * (/etc/fs and /etc/lib) and no longer need a lofs
1245 			 * fixup.
1246 			 */
1247 			if (!S_ISDIR(st.st_mode))
1248 				continue;
1249 		}
1250 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, *cpp,
1251 		    tmp) != 0) {
1252 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1253 			    *cpp);
1254 			return (B_FALSE);
1255 		}
1256 	}
1257 
1258 	/*
1259 	 * These are things with tmpfs mounted inside.
1260 	 */
1261 	for (cpp = tmpdirs; *cpp != NULL; cpp++) {
1262 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1263 		if (mkdir(tmp, 0755) != 0 && errno != EEXIST) {
1264 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1265 			return (B_FALSE);
1266 		}
1267 		if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) {
1268 			zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp);
1269 			return (B_FALSE);
1270 		}
1271 	}
1272 
1273 	/*
1274 	 * This is here to support lucopy.  If there's an instance of this same
1275 	 * zone on the current running system, then we mount its root up as
1276 	 * read-only inside the scratch zone.
1277 	 */
1278 	(void) zonecfg_get_uuid(zone_name, uuid);
1279 	altstr = strdup(zonecfg_get_root());
1280 	if (altstr == NULL) {
1281 		zerror(zlogp, B_TRUE, "memory allocation failed");
1282 		return (B_FALSE);
1283 	}
1284 	zonecfg_set_root("");
1285 	(void) strlcpy(tmp, zone_name, sizeof (tmp));
1286 	(void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp));
1287 	if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK &&
1288 	    strcmp(fromdir, rootpath) != 0) {
1289 		(void) snprintf(tmp, sizeof (tmp), "%s/b", luroot);
1290 		if (mkdir(tmp, 0755) != 0) {
1291 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1292 			return (B_FALSE);
1293 		}
1294 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, fromdir,
1295 		    tmp) != 0) {
1296 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1297 			    fromdir);
1298 			return (B_FALSE);
1299 		}
1300 	}
1301 	zonecfg_set_root(altstr);
1302 	free(altstr);
1303 
1304 	if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) {
1305 		zerror(zlogp, B_TRUE, "cannot open zone mapfile");
1306 		return (B_FALSE);
1307 	}
1308 	(void) ftruncate(fileno(fp), 0);
1309 	if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) {
1310 		zerror(zlogp, B_TRUE, "cannot add zone mapfile entry");
1311 	}
1312 	zonecfg_close_scratch(fp);
1313 	(void) snprintf(tmp, sizeof (tmp), "%s/a", luroot);
1314 	if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0)
1315 		return (B_FALSE);
1316 	(void) strlcpy(rootpath, tmp, rootlen);
1317 	return (B_TRUE);
1318 }
1319 
1320 static int
1321 mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd)
1322 {
1323 	char	rootpath[MAXPATHLEN];
1324 	char	zonepath[MAXPATHLEN];
1325 	int	num_fs = 0, i;
1326 	struct zone_fstab fstab, *fs_ptr = NULL, *tmp_ptr;
1327 	struct zone_fstab *fsp;
1328 	zone_dochandle_t handle = NULL;
1329 	zone_state_t zstate;
1330 
1331 	if (zone_get_state(zone_name, &zstate) != Z_OK ||
1332 	    (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
1333 		zerror(zlogp, B_FALSE,
1334 		    "zone must be in '%s' or '%s' state to mount file-systems",
1335 		    zone_state_str(ZONE_STATE_READY),
1336 		    zone_state_str(ZONE_STATE_MOUNTED));
1337 		goto bad;
1338 	}
1339 
1340 	if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
1341 		zerror(zlogp, B_TRUE, "unable to determine zone path");
1342 		goto bad;
1343 	}
1344 
1345 	if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
1346 		zerror(zlogp, B_TRUE, "unable to determine zone root");
1347 		goto bad;
1348 	}
1349 
1350 	if ((handle = zonecfg_init_handle()) == NULL) {
1351 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
1352 		goto bad;
1353 	}
1354 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
1355 	    zonecfg_setfsent(handle) != Z_OK) {
1356 		zerror(zlogp, B_FALSE, "invalid configuration");
1357 		goto bad;
1358 	}
1359 
1360 	/*
1361 	 * Iterate through the rest of the filesystems, first the IPDs, then
1362 	 * the general FSs.  Sort them all, then mount them in sorted order.
1363 	 * This is to make sure the higher level directories (e.g., /usr)
1364 	 * get mounted before any beneath them (e.g., /usr/local).
1365 	 */
1366 	if (zonecfg_setipdent(handle) != Z_OK) {
1367 		zerror(zlogp, B_FALSE, "invalid configuration");
1368 		goto bad;
1369 	}
1370 	while (zonecfg_getipdent(handle, &fstab) == Z_OK) {
1371 		num_fs++;
1372 		if ((tmp_ptr = realloc(fs_ptr,
1373 		    num_fs * sizeof (*tmp_ptr))) == NULL) {
1374 			zerror(zlogp, B_TRUE, "memory allocation failed");
1375 			num_fs--;
1376 			(void) zonecfg_endipdent(handle);
1377 			goto bad;
1378 		}
1379 		fs_ptr = tmp_ptr;
1380 		fsp = &fs_ptr[num_fs - 1];
1381 		/*
1382 		 * IPDs logically only have a mount point; all other properties
1383 		 * are implied.
1384 		 */
1385 		(void) strlcpy(fsp->zone_fs_dir,
1386 		    fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir));
1387 		fsp->zone_fs_special[0] = '\0';
1388 		fsp->zone_fs_raw[0] = '\0';
1389 		fsp->zone_fs_type[0] = '\0';
1390 		fsp->zone_fs_options = NULL;
1391 	}
1392 	(void) zonecfg_endipdent(handle);
1393 
1394 	if (zonecfg_setfsent(handle) != Z_OK) {
1395 		zerror(zlogp, B_FALSE, "invalid configuration");
1396 		goto bad;
1397 	}
1398 	while (zonecfg_getfsent(handle, &fstab) == Z_OK) {
1399 		/*
1400 		 * ZFS filesystems will not be accessible under an alternate
1401 		 * root, since the pool will not be known.  Ignore them in this
1402 		 * case.
1403 		 */
1404 		if (mount_cmd && strcmp(fstab.zone_fs_type, MNTTYPE_ZFS) == 0)
1405 			continue;
1406 
1407 		num_fs++;
1408 		if ((tmp_ptr = realloc(fs_ptr,
1409 		    num_fs * sizeof (*tmp_ptr))) == NULL) {
1410 			zerror(zlogp, B_TRUE, "memory allocation failed");
1411 			num_fs--;
1412 			(void) zonecfg_endfsent(handle);
1413 			goto bad;
1414 		}
1415 		fs_ptr = tmp_ptr;
1416 		fsp = &fs_ptr[num_fs - 1];
1417 		(void) strlcpy(fsp->zone_fs_dir,
1418 		    fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir));
1419 		(void) strlcpy(fsp->zone_fs_special, fstab.zone_fs_special,
1420 		    sizeof (fsp->zone_fs_special));
1421 		(void) strlcpy(fsp->zone_fs_raw, fstab.zone_fs_raw,
1422 		    sizeof (fsp->zone_fs_raw));
1423 		(void) strlcpy(fsp->zone_fs_type, fstab.zone_fs_type,
1424 		    sizeof (fsp->zone_fs_type));
1425 		fsp->zone_fs_options = fstab.zone_fs_options;
1426 	}
1427 	(void) zonecfg_endfsent(handle);
1428 	zonecfg_fini_handle(handle);
1429 	handle = NULL;
1430 
1431 	/*
1432 	 * When we're mounting a zone for administration, / is the
1433 	 * scratch zone and dev is mounted at /dev.  The to-be-upgraded
1434 	 * zone is mounted at /a, and we set up that environment so that
1435 	 * process can access both the running system's utilities
1436 	 * and the to-be-modified zone's files.  The only exception
1437 	 * is the zone's /dev which isn't mounted at all, which is
1438 	 * the same as global zone installation where /a/dev and
1439 	 * /a/devices are not mounted.
1440 	 */
1441 	if (mount_cmd &&
1442 	    !build_mounted(zlogp, rootpath, sizeof (rootpath), zonepath))
1443 		goto bad;
1444 
1445 	qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
1446 	for (i = 0; i < num_fs; i++) {
1447 		if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0)
1448 			goto bad;
1449 	}
1450 
1451 	/*
1452 	 * For Trusted Extensions cross-mount each lower level /export/home
1453 	 */
1454 	if (!mount_cmd && tsol_mounts(zlogp, zone_name, rootpath) != 0)
1455 		goto bad;
1456 
1457 	free_fs_data(fs_ptr, num_fs);
1458 
1459 	/*
1460 	 * Everything looks fine.
1461 	 */
1462 	return (0);
1463 
1464 bad:
1465 	if (handle != NULL)
1466 		zonecfg_fini_handle(handle);
1467 	free_fs_data(fs_ptr, num_fs);
1468 	return (-1);
1469 }
1470 
1471 /* caller makes sure neither parameter is NULL */
1472 static int
1473 addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr)
1474 {
1475 	int prefixlen;
1476 
1477 	prefixlen = atoi(prefixstr);
1478 	if (prefixlen < 0 || prefixlen > maxprefixlen)
1479 		return (1);
1480 	while (prefixlen > 0) {
1481 		if (prefixlen >= 8) {
1482 			*maskstr++ = 0xFF;
1483 			prefixlen -= 8;
1484 			continue;
1485 		}
1486 		*maskstr |= 1 << (8 - prefixlen);
1487 		prefixlen--;
1488 	}
1489 	return (0);
1490 }
1491 
1492 /*
1493  * Tear down all interfaces belonging to the given zone.  This should
1494  * be called with the zone in a state other than "running", so that
1495  * interfaces can't be assigned to the zone after this returns.
1496  *
1497  * If anything goes wrong, log an error message and return an error.
1498  */
1499 static int
1500 unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id)
1501 {
1502 	struct lifnum lifn;
1503 	struct lifconf lifc;
1504 	struct lifreq *lifrp, lifrl;
1505 	int64_t lifc_flags = LIFC_NOXMIT | LIFC_ALLZONES;
1506 	int num_ifs, s, i, ret_code = 0;
1507 	uint_t bufsize;
1508 	char *buf = NULL;
1509 
1510 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
1511 		zerror(zlogp, B_TRUE, "could not get socket");
1512 		ret_code = -1;
1513 		goto bad;
1514 	}
1515 	lifn.lifn_family = AF_UNSPEC;
1516 	lifn.lifn_flags = (int)lifc_flags;
1517 	if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) {
1518 		zerror(zlogp, B_TRUE,
1519 		    "could not determine number of interfaces");
1520 		ret_code = -1;
1521 		goto bad;
1522 	}
1523 	num_ifs = lifn.lifn_count;
1524 	bufsize = num_ifs * sizeof (struct lifreq);
1525 	if ((buf = malloc(bufsize)) == NULL) {
1526 		zerror(zlogp, B_TRUE, "memory allocation failed");
1527 		ret_code = -1;
1528 		goto bad;
1529 	}
1530 	lifc.lifc_family = AF_UNSPEC;
1531 	lifc.lifc_flags = (int)lifc_flags;
1532 	lifc.lifc_len = bufsize;
1533 	lifc.lifc_buf = buf;
1534 	if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) {
1535 		zerror(zlogp, B_TRUE, "could not get configured interfaces");
1536 		ret_code = -1;
1537 		goto bad;
1538 	}
1539 	lifrp = lifc.lifc_req;
1540 	for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, lifrp++) {
1541 		(void) close(s);
1542 		if ((s = socket(lifrp->lifr_addr.ss_family, SOCK_DGRAM, 0)) <
1543 		    0) {
1544 			zerror(zlogp, B_TRUE, "%s: could not get socket",
1545 			    lifrl.lifr_name);
1546 			ret_code = -1;
1547 			continue;
1548 		}
1549 		(void) memset(&lifrl, 0, sizeof (lifrl));
1550 		(void) strncpy(lifrl.lifr_name, lifrp->lifr_name,
1551 		    sizeof (lifrl.lifr_name));
1552 		if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifrl) < 0) {
1553 			zerror(zlogp, B_TRUE,
1554 			    "%s: could not determine zone interface belongs to",
1555 			    lifrl.lifr_name);
1556 			ret_code = -1;
1557 			continue;
1558 		}
1559 		if (lifrl.lifr_zoneid == zone_id) {
1560 			if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) {
1561 				zerror(zlogp, B_TRUE,
1562 				    "%s: could not remove interface",
1563 				    lifrl.lifr_name);
1564 				ret_code = -1;
1565 				continue;
1566 			}
1567 		}
1568 	}
1569 bad:
1570 	if (s > 0)
1571 		(void) close(s);
1572 	if (buf)
1573 		free(buf);
1574 	return (ret_code);
1575 }
1576 
1577 static union	sockunion {
1578 	struct	sockaddr sa;
1579 	struct	sockaddr_in sin;
1580 	struct	sockaddr_dl sdl;
1581 	struct	sockaddr_in6 sin6;
1582 } so_dst, so_ifp;
1583 
1584 static struct {
1585 	struct	rt_msghdr hdr;
1586 	char	space[512];
1587 } rtmsg;
1588 
1589 static int
1590 salen(struct sockaddr *sa)
1591 {
1592 	switch (sa->sa_family) {
1593 	case AF_INET:
1594 		return (sizeof (struct sockaddr_in));
1595 	case AF_LINK:
1596 		return (sizeof (struct sockaddr_dl));
1597 	case AF_INET6:
1598 		return (sizeof (struct sockaddr_in6));
1599 	default:
1600 		return (sizeof (struct sockaddr));
1601 	}
1602 }
1603 
1604 #define	ROUNDUP_LONG(a) \
1605 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
1606 
1607 /*
1608  * Look up which zone is using a given IP address.  The address in question
1609  * is expected to have been stuffed into the structure to which lifr points
1610  * via a previous SIOCGLIFADDR ioctl().
1611  *
1612  * This is done using black router socket magic.
1613  *
1614  * Return the name of the zone on success or NULL on failure.
1615  *
1616  * This is a lot of code for a simple task; a new ioctl request to take care
1617  * of this might be a useful RFE.
1618  */
1619 
1620 static char *
1621 who_is_using(zlog_t *zlogp, struct lifreq *lifr)
1622 {
1623 	static char answer[ZONENAME_MAX];
1624 	pid_t pid;
1625 	int s, rlen, l, i;
1626 	char *cp = rtmsg.space;
1627 	struct sockaddr_dl *ifp = NULL;
1628 	struct sockaddr *sa;
1629 	char save_if_name[LIFNAMSIZ];
1630 
1631 	answer[0] = '\0';
1632 
1633 	pid = getpid();
1634 	if ((s = socket(PF_ROUTE, SOCK_RAW, 0)) < 0) {
1635 		zerror(zlogp, B_TRUE, "could not get routing socket");
1636 		return (NULL);
1637 	}
1638 
1639 	if (lifr->lifr_addr.ss_family == AF_INET) {
1640 		struct sockaddr_in *sin4;
1641 
1642 		so_dst.sa.sa_family = AF_INET;
1643 		sin4 = (struct sockaddr_in *)&lifr->lifr_addr;
1644 		so_dst.sin.sin_addr = sin4->sin_addr;
1645 	} else {
1646 		struct sockaddr_in6 *sin6;
1647 
1648 		so_dst.sa.sa_family = AF_INET6;
1649 		sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr;
1650 		so_dst.sin6.sin6_addr = sin6->sin6_addr;
1651 	}
1652 
1653 	so_ifp.sa.sa_family = AF_LINK;
1654 
1655 	(void) memset(&rtmsg, 0, sizeof (rtmsg));
1656 	rtmsg.hdr.rtm_type = RTM_GET;
1657 	rtmsg.hdr.rtm_flags = RTF_UP | RTF_HOST;
1658 	rtmsg.hdr.rtm_version = RTM_VERSION;
1659 	rtmsg.hdr.rtm_seq = ++rts_seqno;
1660 	rtmsg.hdr.rtm_addrs = RTA_IFP | RTA_DST;
1661 
1662 	l = ROUNDUP_LONG(salen(&so_dst.sa));
1663 	(void) memmove(cp, &(so_dst), l);
1664 	cp += l;
1665 	l = ROUNDUP_LONG(salen(&so_ifp.sa));
1666 	(void) memmove(cp, &(so_ifp), l);
1667 	cp += l;
1668 
1669 	rtmsg.hdr.rtm_msglen = l = cp - (char *)&rtmsg;
1670 
1671 	if ((rlen = write(s, &rtmsg, l)) < 0) {
1672 		zerror(zlogp, B_TRUE, "writing to routing socket");
1673 		return (NULL);
1674 	} else if (rlen < (int)rtmsg.hdr.rtm_msglen) {
1675 		zerror(zlogp, B_TRUE,
1676 		    "write to routing socket got only %d for len\n", rlen);
1677 		return (NULL);
1678 	}
1679 	do {
1680 		l = read(s, &rtmsg, sizeof (rtmsg));
1681 	} while (l > 0 && (rtmsg.hdr.rtm_seq != rts_seqno ||
1682 	    rtmsg.hdr.rtm_pid != pid));
1683 	if (l < 0) {
1684 		zerror(zlogp, B_TRUE, "reading from routing socket");
1685 		return (NULL);
1686 	}
1687 
1688 	if (rtmsg.hdr.rtm_version != RTM_VERSION) {
1689 		zerror(zlogp, B_FALSE,
1690 		    "routing message version %d not understood",
1691 		    rtmsg.hdr.rtm_version);
1692 		return (NULL);
1693 	}
1694 	if (rtmsg.hdr.rtm_msglen != (ushort_t)l) {
1695 		zerror(zlogp, B_FALSE, "message length mismatch, "
1696 		    "expected %d bytes, returned %d bytes",
1697 		    rtmsg.hdr.rtm_msglen, l);
1698 		return (NULL);
1699 	}
1700 	if (rtmsg.hdr.rtm_errno != 0)  {
1701 		errno = rtmsg.hdr.rtm_errno;
1702 		zerror(zlogp, B_TRUE, "RTM_GET routing socket message");
1703 		return (NULL);
1704 	}
1705 	if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) {
1706 		zerror(zlogp, B_FALSE, "interface not found");
1707 		return (NULL);
1708 	}
1709 	cp = ((char *)(&rtmsg.hdr + 1));
1710 	for (i = 1; i != 0; i <<= 1) {
1711 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1712 		sa = (struct sockaddr *)cp;
1713 		if (i != RTA_IFP) {
1714 			if ((i & rtmsg.hdr.rtm_addrs) != 0)
1715 				cp += ROUNDUP_LONG(salen(sa));
1716 			continue;
1717 		}
1718 		if (sa->sa_family == AF_LINK &&
1719 		    ((struct sockaddr_dl *)sa)->sdl_nlen != 0)
1720 			ifp = (struct sockaddr_dl *)sa;
1721 		break;
1722 	}
1723 	if (ifp == NULL) {
1724 		zerror(zlogp, B_FALSE, "interface could not be determined");
1725 		return (NULL);
1726 	}
1727 
1728 	/*
1729 	 * We need to set the I/F name to what we got above, then do the
1730 	 * appropriate ioctl to get its zone name.  But lifr->lifr_name is
1731 	 * used by the calling function to do a REMOVEIF, so if we leave the
1732 	 * "good" zone's I/F name in place, *that* I/F will be removed instead
1733 	 * of the bad one.  So we save the old (bad) I/F name before over-
1734 	 * writing it and doing the ioctl, then restore it after the ioctl.
1735 	 */
1736 	(void) strlcpy(save_if_name, lifr->lifr_name, sizeof (save_if_name));
1737 	(void) strncpy(lifr->lifr_name, ifp->sdl_data, ifp->sdl_nlen);
1738 	lifr->lifr_name[ifp->sdl_nlen] = '\0';
1739 	i = ioctl(s, SIOCGLIFZONE, lifr);
1740 	(void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name));
1741 	if (i < 0) {
1742 		zerror(zlogp, B_TRUE,
1743 		    "%s: could not determine the zone interface belongs to",
1744 		    lifr->lifr_name);
1745 		return (NULL);
1746 	}
1747 	if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0)
1748 		(void) snprintf(answer, sizeof (answer), "%d",
1749 		    lifr->lifr_zoneid);
1750 
1751 	if (strlen(answer) > 0)
1752 		return (answer);
1753 	return (NULL);
1754 }
1755 
1756 typedef struct mcast_rtmsg_s {
1757 	struct rt_msghdr	m_rtm;
1758 	union {
1759 		struct {
1760 			struct sockaddr_in	m_dst;
1761 			struct sockaddr_in	m_gw;
1762 			struct sockaddr_in	m_netmask;
1763 		} m_v4;
1764 		struct {
1765 			struct sockaddr_in6	m_dst;
1766 			struct sockaddr_in6	m_gw;
1767 			struct sockaddr_in6	m_netmask;
1768 		} m_v6;
1769 	} m_u;
1770 } mcast_rtmsg_t;
1771 #define	m_dst4		m_u.m_v4.m_dst
1772 #define	m_dst6		m_u.m_v6.m_dst
1773 #define	m_gw4		m_u.m_v4.m_gw
1774 #define	m_gw6		m_u.m_v6.m_gw
1775 #define	m_netmask4	m_u.m_v4.m_netmask
1776 #define	m_netmask6	m_u.m_v6.m_netmask
1777 
1778 /*
1779  * Configures a single interface: a new virtual interface is added, based on
1780  * the physical interface nwiftabptr->zone_nwif_physical, with the address
1781  * specified in nwiftabptr->zone_nwif_address, for zone zone_id.  Note that
1782  * the "address" can be an IPv6 address (with a /prefixlength required), an
1783  * IPv4 address (with a /prefixlength optional), or a name; for the latter,
1784  * an IPv4 name-to-address resolution will be attempted.
1785  *
1786  * A default interface route for multicast is created on the first IPv4 and
1787  * IPv6 interfaces (that have the IFF_MULTICAST flag set), respectively.
1788  * This should really be done in the init scripts if we ever allow zones to
1789  * modify the routing tables.
1790  *
1791  * If anything goes wrong, we log an detailed error message, attempt to tear
1792  * down whatever we set up and return an error.
1793  */
1794 static int
1795 configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
1796     struct zone_nwiftab *nwiftabptr, boolean_t *mcast_rt_v4_setp,
1797     boolean_t *mcast_rt_v6_setp)
1798 {
1799 	struct lifreq lifr;
1800 	struct sockaddr_in netmask4;
1801 	struct sockaddr_in6 netmask6;
1802 	struct in_addr in4;
1803 	struct in6_addr in6;
1804 	sa_family_t af;
1805 	char *slashp = strchr(nwiftabptr->zone_nwif_address, '/');
1806 	mcast_rtmsg_t mcast_rtmsg;
1807 	int s;
1808 	int rs;
1809 	int rlen;
1810 	boolean_t got_netmask = B_FALSE;
1811 	char addrstr4[INET_ADDRSTRLEN];
1812 	int res;
1813 
1814 	res = zonecfg_valid_net_address(nwiftabptr->zone_nwif_address, &lifr);
1815 	if (res != Z_OK) {
1816 		zerror(zlogp, B_FALSE, "%s: %s", zonecfg_strerror(res),
1817 		    nwiftabptr->zone_nwif_address);
1818 		return (-1);
1819 	}
1820 	af = lifr.lifr_addr.ss_family;
1821 	if (af == AF_INET)
1822 		in4 = ((struct sockaddr_in *)(&lifr.lifr_addr))->sin_addr;
1823 	else
1824 		in6 = ((struct sockaddr_in6 *)(&lifr.lifr_addr))->sin6_addr;
1825 
1826 	if ((s = socket(af, SOCK_DGRAM, 0)) < 0) {
1827 		zerror(zlogp, B_TRUE, "could not get socket");
1828 		return (-1);
1829 	}
1830 
1831 	(void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical,
1832 	    sizeof (lifr.lifr_name));
1833 	if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) {
1834 		/*
1835 		 * Here, we know that the interface can't be brought up.
1836 		 * A similar warning message was already printed out to
1837 		 * the console by zoneadm(1M) so instead we log the
1838 		 * message to syslog and continue.
1839 		 */
1840 		zerror(&logsys, B_TRUE, "WARNING: skipping interface "
1841 		    "'%s' which may not be present/plumbed in the "
1842 		    "global zone.", lifr.lifr_name);
1843 		(void) close(s);
1844 		return (Z_OK);
1845 	}
1846 
1847 	if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) {
1848 		zerror(zlogp, B_TRUE,
1849 		    "%s: could not set IP address to %s",
1850 		    lifr.lifr_name, nwiftabptr->zone_nwif_address);
1851 		goto bad;
1852 	}
1853 
1854 	/* Preserve literal IPv4 address for later potential printing. */
1855 	if (af == AF_INET)
1856 		(void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN);
1857 
1858 	lifr.lifr_zoneid = zone_id;
1859 	if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) {
1860 		zerror(zlogp, B_TRUE, "%s: could not place interface into zone",
1861 		    lifr.lifr_name);
1862 		goto bad;
1863 	}
1864 
1865 	if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) {
1866 		got_netmask = B_TRUE;	/* default setting will be correct */
1867 	} else {
1868 		if (af == AF_INET) {
1869 			/*
1870 			 * The IPv4 netmask can be determined either
1871 			 * directly if a prefix length was supplied with
1872 			 * the address or via the netmasks database.  Not
1873 			 * being able to determine it is a common failure,
1874 			 * but it often is not fatal to operation of the
1875 			 * interface.  In that case, a warning will be
1876 			 * printed after the rest of the interface's
1877 			 * parameters have been configured.
1878 			 */
1879 			(void) memset(&netmask4, 0, sizeof (netmask4));
1880 			if (slashp != NULL) {
1881 				if (addr2netmask(slashp + 1, V4_ADDR_LEN,
1882 				    (uchar_t *)&netmask4.sin_addr) != 0) {
1883 					*slashp = '/';
1884 					zerror(zlogp, B_FALSE,
1885 					    "%s: invalid prefix length in %s",
1886 					    lifr.lifr_name,
1887 					    nwiftabptr->zone_nwif_address);
1888 					goto bad;
1889 				}
1890 				got_netmask = B_TRUE;
1891 			} else if (getnetmaskbyaddr(in4,
1892 			    &netmask4.sin_addr) == 0) {
1893 				got_netmask = B_TRUE;
1894 			}
1895 			if (got_netmask) {
1896 				netmask4.sin_family = af;
1897 				(void) memcpy(&lifr.lifr_addr, &netmask4,
1898 				    sizeof (netmask4));
1899 			}
1900 		} else {
1901 			(void) memset(&netmask6, 0, sizeof (netmask6));
1902 			if (addr2netmask(slashp + 1, V6_ADDR_LEN,
1903 			    (uchar_t *)&netmask6.sin6_addr) != 0) {
1904 				*slashp = '/';
1905 				zerror(zlogp, B_FALSE,
1906 				    "%s: invalid prefix length in %s",
1907 				    lifr.lifr_name,
1908 				    nwiftabptr->zone_nwif_address);
1909 				goto bad;
1910 			}
1911 			got_netmask = B_TRUE;
1912 			netmask6.sin6_family = af;
1913 			(void) memcpy(&lifr.lifr_addr, &netmask6,
1914 			    sizeof (netmask6));
1915 		}
1916 		if (got_netmask &&
1917 		    ioctl(s, SIOCSLIFNETMASK, (caddr_t)&lifr) < 0) {
1918 			zerror(zlogp, B_TRUE, "%s: could not set netmask",
1919 			    lifr.lifr_name);
1920 			goto bad;
1921 		}
1922 
1923 		/*
1924 		 * This doesn't set the broadcast address at all. Rather, it
1925 		 * gets, then sets the interface's address, relying on the fact
1926 		 * that resetting the address will reset the broadcast address.
1927 		 */
1928 		if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) {
1929 			zerror(zlogp, B_TRUE, "%s: could not get address",
1930 			    lifr.lifr_name);
1931 			goto bad;
1932 		}
1933 		if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) {
1934 			zerror(zlogp, B_TRUE,
1935 			    "%s: could not reset broadcast address",
1936 			    lifr.lifr_name);
1937 			goto bad;
1938 		}
1939 	}
1940 
1941 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
1942 		zerror(zlogp, B_TRUE, "%s: could not get flags",
1943 		    lifr.lifr_name);
1944 		goto bad;
1945 	}
1946 	lifr.lifr_flags |= IFF_UP;
1947 	if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
1948 		int save_errno = errno;
1949 		char *zone_using;
1950 
1951 		/*
1952 		 * If we failed with something other than EADDRNOTAVAIL,
1953 		 * then skip to the end.  Otherwise, look up our address,
1954 		 * then call a function to determine which zone is already
1955 		 * using that address.
1956 		 */
1957 		if (errno != EADDRNOTAVAIL) {
1958 			zerror(zlogp, B_TRUE,
1959 			    "%s: could not bring interface up", lifr.lifr_name);
1960 			goto bad;
1961 		}
1962 		if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) {
1963 			zerror(zlogp, B_TRUE, "%s: could not get address",
1964 			    lifr.lifr_name);
1965 			goto bad;
1966 		}
1967 		zone_using = who_is_using(zlogp, &lifr);
1968 		errno = save_errno;
1969 		if (zone_using == NULL)
1970 			zerror(zlogp, B_TRUE,
1971 			    "%s: could not bring interface up", lifr.lifr_name);
1972 		else
1973 			zerror(zlogp, B_TRUE, "%s: could not bring interface "
1974 			    "up: address in use by zone '%s'", lifr.lifr_name,
1975 			    zone_using);
1976 		goto bad;
1977 	}
1978 	if ((lifr.lifr_flags & IFF_MULTICAST) && ((af == AF_INET &&
1979 	    mcast_rt_v4_setp != NULL && *mcast_rt_v4_setp == B_FALSE) ||
1980 	    (af == AF_INET6 &&
1981 	    mcast_rt_v6_setp != NULL && *mcast_rt_v6_setp == B_FALSE))) {
1982 		rs = socket(PF_ROUTE, SOCK_RAW, 0);
1983 		if (rs < 0) {
1984 			zerror(zlogp, B_TRUE, "%s: could not create "
1985 			    "routing socket", lifr.lifr_name);
1986 			goto bad;
1987 		}
1988 		(void) shutdown(rs, 0);
1989 		(void) memset((void *)&mcast_rtmsg, 0, sizeof (mcast_rtmsg_t));
1990 		mcast_rtmsg.m_rtm.rtm_msglen =  sizeof (struct rt_msghdr) +
1991 		    3 * (af == AF_INET ? sizeof (struct sockaddr_in) :
1992 		    sizeof (struct sockaddr_in6));
1993 		mcast_rtmsg.m_rtm.rtm_version = RTM_VERSION;
1994 		mcast_rtmsg.m_rtm.rtm_type = RTM_ADD;
1995 		mcast_rtmsg.m_rtm.rtm_flags = RTF_UP;
1996 		mcast_rtmsg.m_rtm.rtm_addrs =
1997 		    RTA_DST | RTA_GATEWAY | RTA_NETMASK;
1998 		mcast_rtmsg.m_rtm.rtm_seq = ++rts_seqno;
1999 		if (af == AF_INET) {
2000 			mcast_rtmsg.m_dst4.sin_family = AF_INET;
2001 			mcast_rtmsg.m_dst4.sin_addr.s_addr =
2002 			    htonl(INADDR_UNSPEC_GROUP);
2003 			mcast_rtmsg.m_gw4.sin_family = AF_INET;
2004 			mcast_rtmsg.m_gw4.sin_addr = in4;
2005 			mcast_rtmsg.m_netmask4.sin_family = AF_INET;
2006 			mcast_rtmsg.m_netmask4.sin_addr.s_addr =
2007 			    htonl(IN_CLASSD_NET);
2008 		} else {
2009 			mcast_rtmsg.m_dst6.sin6_family = AF_INET6;
2010 			mcast_rtmsg.m_dst6.sin6_addr.s6_addr[0] = 0xffU;
2011 			mcast_rtmsg.m_gw6.sin6_family = AF_INET6;
2012 			mcast_rtmsg.m_gw6.sin6_addr = in6;
2013 			mcast_rtmsg.m_netmask6.sin6_family = AF_INET6;
2014 			mcast_rtmsg.m_netmask6.sin6_addr.s6_addr[0] = 0xffU;
2015 		}
2016 		rlen = write(rs, (char *)&mcast_rtmsg,
2017 		    mcast_rtmsg.m_rtm.rtm_msglen);
2018 		/*
2019 		 * The write to the multicast socket will fail if the
2020 		 * interface belongs to a failed IPMP group. This is a
2021 		 * non-fatal error and the zone will continue booting.
2022 		 * While the zone is running, if any interface in the
2023 		 * failed IPMP group recovers, the zone will fallback to
2024 		 * using that interface.
2025 		 */
2026 		if (rlen < mcast_rtmsg.m_rtm.rtm_msglen) {
2027 			if (rlen < 0) {
2028 				zerror(zlogp, B_TRUE, "WARNING: interface "
2029 				    "'%s' not available as default for "
2030 				    "multicast.", lifr.lifr_name);
2031 			} else {
2032 				zerror(zlogp, B_FALSE, "WARNING: interface "
2033 				    "'%s' not available as default for "
2034 				    "multicast; routing socket returned "
2035 				    "unexpected %d bytes.",
2036 				    lifr.lifr_name, rlen);
2037 			}
2038 		} else {
2039 
2040 			if (af == AF_INET) {
2041 				*mcast_rt_v4_setp = B_TRUE;
2042 			} else {
2043 				*mcast_rt_v6_setp = B_TRUE;
2044 			}
2045 		}
2046 		(void) close(rs);
2047 	}
2048 
2049 	if (!got_netmask) {
2050 		/*
2051 		 * A common, but often non-fatal problem, is that the system
2052 		 * cannot find the netmask for an interface address. This is
2053 		 * often caused by it being only in /etc/inet/netmasks, but
2054 		 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not
2055 		 * in that. This doesn't show up at boot because the netmask
2056 		 * is obtained from /etc/inet/netmasks when no network
2057 		 * interfaces are up, but isn't consulted when NIS/NIS+ is
2058 		 * available. We warn the user here that something like this
2059 		 * has happened and we're just running with a default and
2060 		 * possible incorrect netmask.
2061 		 */
2062 		char buffer[INET6_ADDRSTRLEN];
2063 		void  *addr;
2064 
2065 		if (af == AF_INET)
2066 			addr = &((struct sockaddr_in *)
2067 			    (&lifr.lifr_addr))->sin_addr;
2068 		else
2069 			addr = &((struct sockaddr_in6 *)
2070 			    (&lifr.lifr_addr))->sin6_addr;
2071 
2072 		/* Find out what netmask interface is going to be using */
2073 		if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 ||
2074 		    inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL)
2075 			goto bad;
2076 		zerror(zlogp, B_FALSE,
2077 		    "WARNING: %s: no matching subnet found in netmasks(4) for "
2078 		    "%s; using default of %s.",
2079 		    lifr.lifr_name, addrstr4, buffer);
2080 	}
2081 
2082 	(void) close(s);
2083 	return (Z_OK);
2084 bad:
2085 	(void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr);
2086 	(void) close(s);
2087 	return (-1);
2088 }
2089 
2090 /*
2091  * Sets up network interfaces based on information from the zone configuration.
2092  * An IPv4 loopback interface is set up "for free", modeling the global system.
2093  * If any of the configuration interfaces were IPv6, then an IPv6 loopback
2094  * address is set up as well.
2095  *
2096  * If anything goes wrong, we log a general error message, attempt to tear down
2097  * whatever we set up, and return an error.
2098  */
2099 static int
2100 configure_network_interfaces(zlog_t *zlogp)
2101 {
2102 	zone_dochandle_t handle;
2103 	struct zone_nwiftab nwiftab, loopback_iftab;
2104 	boolean_t saw_v6 = B_FALSE;
2105 	boolean_t mcast_rt_v4_set = B_FALSE;
2106 	boolean_t mcast_rt_v6_set = B_FALSE;
2107 	zoneid_t zoneid;
2108 
2109 	if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) {
2110 		zerror(zlogp, B_TRUE, "unable to get zoneid");
2111 		return (-1);
2112 	}
2113 
2114 	if ((handle = zonecfg_init_handle()) == NULL) {
2115 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2116 		return (-1);
2117 	}
2118 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2119 		zerror(zlogp, B_FALSE, "invalid configuration");
2120 		zonecfg_fini_handle(handle);
2121 		return (-1);
2122 	}
2123 	if (zonecfg_setnwifent(handle) == Z_OK) {
2124 		for (;;) {
2125 			struct in6_addr in6;
2126 
2127 			if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
2128 				break;
2129 			if (configure_one_interface(zlogp, zoneid,
2130 			    &nwiftab, &mcast_rt_v4_set, &mcast_rt_v6_set) !=
2131 			    Z_OK) {
2132 				(void) zonecfg_endnwifent(handle);
2133 				zonecfg_fini_handle(handle);
2134 				return (-1);
2135 			}
2136 			if (inet_pton(AF_INET6, nwiftab.zone_nwif_address,
2137 			    &in6) == 1)
2138 				saw_v6 = B_TRUE;
2139 		}
2140 		(void) zonecfg_endnwifent(handle);
2141 	}
2142 	zonecfg_fini_handle(handle);
2143 	(void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0",
2144 	    sizeof (loopback_iftab.zone_nwif_physical));
2145 	(void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1",
2146 	    sizeof (loopback_iftab.zone_nwif_address));
2147 	if (configure_one_interface(zlogp, zoneid, &loopback_iftab, NULL, NULL)
2148 	    != Z_OK) {
2149 		return (-1);
2150 	}
2151 	if (saw_v6) {
2152 		(void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128",
2153 		    sizeof (loopback_iftab.zone_nwif_address));
2154 		if (configure_one_interface(zlogp, zoneid,
2155 		    &loopback_iftab, NULL, NULL) != Z_OK) {
2156 			return (-1);
2157 		}
2158 	}
2159 	return (0);
2160 }
2161 
2162 static int
2163 tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid,
2164     const struct sockaddr_storage *local, const struct sockaddr_storage *remote)
2165 {
2166 	int fd;
2167 	struct strioctl ioc;
2168 	tcp_ioc_abort_conn_t conn;
2169 	int error;
2170 
2171 	conn.ac_local = *local;
2172 	conn.ac_remote = *remote;
2173 	conn.ac_start = TCPS_SYN_SENT;
2174 	conn.ac_end = TCPS_TIME_WAIT;
2175 	conn.ac_zoneid = zoneid;
2176 
2177 	ioc.ic_cmd = TCP_IOC_ABORT_CONN;
2178 	ioc.ic_timout = -1; /* infinite timeout */
2179 	ioc.ic_len = sizeof (conn);
2180 	ioc.ic_dp = (char *)&conn;
2181 
2182 	if ((fd = open("/dev/tcp", O_RDONLY)) < 0) {
2183 		zerror(zlogp, B_TRUE, "unable to open %s", "/dev/tcp");
2184 		return (-1);
2185 	}
2186 
2187 	error = ioctl(fd, I_STR, &ioc);
2188 	(void) close(fd);
2189 	if (error == 0 || errno == ENOENT)	/* ENOENT is not an error */
2190 		return (0);
2191 	return (-1);
2192 }
2193 
2194 static int
2195 tcp_abort_connections(zlog_t *zlogp, zoneid_t zoneid)
2196 {
2197 	struct sockaddr_storage l, r;
2198 	struct sockaddr_in *local, *remote;
2199 	struct sockaddr_in6 *local6, *remote6;
2200 	int error;
2201 
2202 	/*
2203 	 * Abort IPv4 connections.
2204 	 */
2205 	bzero(&l, sizeof (*local));
2206 	local = (struct sockaddr_in *)&l;
2207 	local->sin_family = AF_INET;
2208 	local->sin_addr.s_addr = INADDR_ANY;
2209 	local->sin_port = 0;
2210 
2211 	bzero(&r, sizeof (*remote));
2212 	remote = (struct sockaddr_in *)&r;
2213 	remote->sin_family = AF_INET;
2214 	remote->sin_addr.s_addr = INADDR_ANY;
2215 	remote->sin_port = 0;
2216 
2217 	if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
2218 		return (error);
2219 
2220 	/*
2221 	 * Abort IPv6 connections.
2222 	 */
2223 	bzero(&l, sizeof (*local6));
2224 	local6 = (struct sockaddr_in6 *)&l;
2225 	local6->sin6_family = AF_INET6;
2226 	local6->sin6_port = 0;
2227 	local6->sin6_addr = in6addr_any;
2228 
2229 	bzero(&r, sizeof (*remote6));
2230 	remote6 = (struct sockaddr_in6 *)&r;
2231 	remote6->sin6_family = AF_INET6;
2232 	remote6->sin6_port = 0;
2233 	remote6->sin6_addr = in6addr_any;
2234 
2235 	if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
2236 		return (error);
2237 	return (0);
2238 }
2239 
2240 static int
2241 get_privset(zlog_t *zlogp, priv_set_t *privs, boolean_t mount_cmd)
2242 {
2243 	int error = -1;
2244 	zone_dochandle_t handle;
2245 	char *privname = NULL;
2246 
2247 	if (mount_cmd) {
2248 		if (zonecfg_default_privset(privs) == Z_OK)
2249 			return (0);
2250 		zerror(zlogp, B_FALSE,
2251 		    "failed to determine the zone's default privilege set");
2252 		return (-1);
2253 	}
2254 
2255 	if ((handle = zonecfg_init_handle()) == NULL) {
2256 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2257 		return (-1);
2258 	}
2259 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2260 		zerror(zlogp, B_FALSE, "invalid configuration");
2261 		zonecfg_fini_handle(handle);
2262 		return (-1);
2263 	}
2264 
2265 	switch (zonecfg_get_privset(handle, privs, &privname)) {
2266 	case Z_OK:
2267 		error = 0;
2268 		break;
2269 	case Z_PRIV_PROHIBITED:
2270 		zerror(zlogp, B_FALSE, "privilege \"%s\" is not permitted "
2271 		    "within the zone's privilege set", privname);
2272 		break;
2273 	case Z_PRIV_REQUIRED:
2274 		zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing "
2275 		    "from the zone's privilege set", privname);
2276 		break;
2277 	case Z_PRIV_UNKNOWN:
2278 		zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified "
2279 		    "in the zone's privilege set", privname);
2280 		break;
2281 	default:
2282 		zerror(zlogp, B_FALSE, "failed to determine the zone's "
2283 		    "privilege set");
2284 		break;
2285 	}
2286 
2287 	free(privname);
2288 	zonecfg_fini_handle(handle);
2289 	return (error);
2290 }
2291 
2292 static int
2293 get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
2294 {
2295 	nvlist_t *nvl = NULL;
2296 	char *nvl_packed = NULL;
2297 	size_t nvl_size = 0;
2298 	nvlist_t **nvlv = NULL;
2299 	int rctlcount = 0;
2300 	int error = -1;
2301 	zone_dochandle_t handle;
2302 	struct zone_rctltab rctltab;
2303 	rctlblk_t *rctlblk = NULL;
2304 
2305 	*bufp = NULL;
2306 	*bufsizep = 0;
2307 
2308 	if ((handle = zonecfg_init_handle()) == NULL) {
2309 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2310 		return (-1);
2311 	}
2312 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2313 		zerror(zlogp, B_FALSE, "invalid configuration");
2314 		zonecfg_fini_handle(handle);
2315 		return (-1);
2316 	}
2317 
2318 	rctltab.zone_rctl_valptr = NULL;
2319 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
2320 		zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
2321 		goto out;
2322 	}
2323 
2324 	if (zonecfg_setrctlent(handle) != Z_OK) {
2325 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
2326 		goto out;
2327 	}
2328 
2329 	if ((rctlblk = malloc(rctlblk_size())) == NULL) {
2330 		zerror(zlogp, B_TRUE, "memory allocation failed");
2331 		goto out;
2332 	}
2333 	while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
2334 		struct zone_rctlvaltab *rctlval;
2335 		uint_t i, count;
2336 		const char *name = rctltab.zone_rctl_name;
2337 
2338 		/* zoneadm should have already warned about unknown rctls. */
2339 		if (!zonecfg_is_rctl(name)) {
2340 			zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
2341 			rctltab.zone_rctl_valptr = NULL;
2342 			continue;
2343 		}
2344 		count = 0;
2345 		for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
2346 		    rctlval = rctlval->zone_rctlval_next) {
2347 			count++;
2348 		}
2349 		if (count == 0) {	/* ignore */
2350 			continue;	/* Nothing to free */
2351 		}
2352 		if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL)
2353 			goto out;
2354 		i = 0;
2355 		for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
2356 		    rctlval = rctlval->zone_rctlval_next, i++) {
2357 			if (nvlist_alloc(&nvlv[i], NV_UNIQUE_NAME, 0) != 0) {
2358 				zerror(zlogp, B_TRUE, "%s failed",
2359 				    "nvlist_alloc");
2360 				goto out;
2361 			}
2362 			if (zonecfg_construct_rctlblk(rctlval, rctlblk)
2363 			    != Z_OK) {
2364 				zerror(zlogp, B_FALSE, "invalid rctl value: "
2365 				    "(priv=%s,limit=%s,action=%s)",
2366 				    rctlval->zone_rctlval_priv,
2367 				    rctlval->zone_rctlval_limit,
2368 				    rctlval->zone_rctlval_action);
2369 				goto out;
2370 			}
2371 			if (!zonecfg_valid_rctl(name, rctlblk)) {
2372 				zerror(zlogp, B_FALSE,
2373 				    "(priv=%s,limit=%s,action=%s) is not a "
2374 				    "valid value for rctl '%s'",
2375 				    rctlval->zone_rctlval_priv,
2376 				    rctlval->zone_rctlval_limit,
2377 				    rctlval->zone_rctlval_action,
2378 				    name);
2379 				goto out;
2380 			}
2381 			if (nvlist_add_uint64(nvlv[i], "privilege",
2382 			    rctlblk_get_privilege(rctlblk)) != 0) {
2383 				zerror(zlogp, B_FALSE, "%s failed",
2384 				    "nvlist_add_uint64");
2385 				goto out;
2386 			}
2387 			if (nvlist_add_uint64(nvlv[i], "limit",
2388 			    rctlblk_get_value(rctlblk)) != 0) {
2389 				zerror(zlogp, B_FALSE, "%s failed",
2390 				    "nvlist_add_uint64");
2391 				goto out;
2392 			}
2393 			if (nvlist_add_uint64(nvlv[i], "action",
2394 			    (uint_t)rctlblk_get_local_action(rctlblk, NULL))
2395 			    != 0) {
2396 				zerror(zlogp, B_FALSE, "%s failed",
2397 				    "nvlist_add_uint64");
2398 				goto out;
2399 			}
2400 		}
2401 		zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
2402 		rctltab.zone_rctl_valptr = NULL;
2403 		if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
2404 		    != 0) {
2405 			zerror(zlogp, B_FALSE, "%s failed",
2406 			    "nvlist_add_nvlist_array");
2407 			goto out;
2408 		}
2409 		for (i = 0; i < count; i++)
2410 			nvlist_free(nvlv[i]);
2411 		free(nvlv);
2412 		nvlv = NULL;
2413 		rctlcount++;
2414 	}
2415 	(void) zonecfg_endrctlent(handle);
2416 
2417 	if (rctlcount == 0) {
2418 		error = 0;
2419 		goto out;
2420 	}
2421 	if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0)
2422 	    != 0) {
2423 		zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack");
2424 		goto out;
2425 	}
2426 
2427 	error = 0;
2428 	*bufp = nvl_packed;
2429 	*bufsizep = nvl_size;
2430 
2431 out:
2432 	free(rctlblk);
2433 	zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
2434 	if (error && nvl_packed != NULL)
2435 		free(nvl_packed);
2436 	if (nvl != NULL)
2437 		nvlist_free(nvl);
2438 	if (nvlv != NULL)
2439 		free(nvlv);
2440 	if (handle != NULL)
2441 		zonecfg_fini_handle(handle);
2442 	return (error);
2443 }
2444 
2445 static int
2446 get_zone_pool(zlog_t *zlogp, char *poolbuf, size_t bufsz)
2447 {
2448 	zone_dochandle_t handle;
2449 	int error;
2450 
2451 	if ((handle = zonecfg_init_handle()) == NULL) {
2452 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2453 		return (Z_NOMEM);
2454 	}
2455 	error = zonecfg_get_snapshot_handle(zone_name, handle);
2456 	if (error != Z_OK) {
2457 		zerror(zlogp, B_FALSE, "invalid configuration");
2458 		zonecfg_fini_handle(handle);
2459 		return (error);
2460 	}
2461 	error = zonecfg_get_pool(handle, poolbuf, bufsz);
2462 	zonecfg_fini_handle(handle);
2463 	return (error);
2464 }
2465 
2466 static int
2467 get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
2468 {
2469 	zone_dochandle_t handle;
2470 	struct zone_dstab dstab;
2471 	size_t total, offset, len;
2472 	int error = -1;
2473 	char *str;
2474 
2475 	*bufp = NULL;
2476 	*bufsizep = 0;
2477 
2478 	if ((handle = zonecfg_init_handle()) == NULL) {
2479 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2480 		return (-1);
2481 	}
2482 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2483 		zerror(zlogp, B_FALSE, "invalid configuration");
2484 		zonecfg_fini_handle(handle);
2485 		return (-1);
2486 	}
2487 
2488 	if (zonecfg_setdsent(handle) != Z_OK) {
2489 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
2490 		goto out;
2491 	}
2492 
2493 	total = 0;
2494 	while (zonecfg_getdsent(handle, &dstab) == Z_OK)
2495 		total += strlen(dstab.zone_dataset_name) + 1;
2496 	(void) zonecfg_enddsent(handle);
2497 
2498 	if (total == 0) {
2499 		error = 0;
2500 		goto out;
2501 	}
2502 
2503 	if ((str = malloc(total)) == NULL) {
2504 		zerror(zlogp, B_TRUE, "memory allocation failed");
2505 		goto out;
2506 	}
2507 
2508 	if (zonecfg_setdsent(handle) != Z_OK) {
2509 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
2510 		goto out;
2511 	}
2512 	offset = 0;
2513 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
2514 		len = strlen(dstab.zone_dataset_name);
2515 		(void) strlcpy(str + offset, dstab.zone_dataset_name,
2516 		    sizeof (dstab.zone_dataset_name) - offset);
2517 		offset += len;
2518 		if (offset != total - 1)
2519 			str[offset++] = ',';
2520 	}
2521 	(void) zonecfg_enddsent(handle);
2522 
2523 	error = 0;
2524 	*bufp = str;
2525 	*bufsizep = total;
2526 
2527 out:
2528 	if (error != 0 && str != NULL)
2529 		free(str);
2530 	if (handle != NULL)
2531 		zonecfg_fini_handle(handle);
2532 
2533 	return (error);
2534 }
2535 
2536 static int
2537 validate_datasets(zlog_t *zlogp)
2538 {
2539 	zone_dochandle_t handle;
2540 	struct zone_dstab dstab;
2541 	zfs_handle_t *zhp;
2542 	libzfs_handle_t *hdl;
2543 
2544 	if ((handle = zonecfg_init_handle()) == NULL) {
2545 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2546 		return (-1);
2547 	}
2548 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2549 		zerror(zlogp, B_FALSE, "invalid configuration");
2550 		zonecfg_fini_handle(handle);
2551 		return (-1);
2552 	}
2553 
2554 	if (zonecfg_setdsent(handle) != Z_OK) {
2555 		zerror(zlogp, B_FALSE, "invalid configuration");
2556 		zonecfg_fini_handle(handle);
2557 		return (-1);
2558 	}
2559 
2560 	if ((hdl = libzfs_init()) == NULL) {
2561 		zerror(zlogp, B_FALSE, "opening ZFS library");
2562 		zonecfg_fini_handle(handle);
2563 		return (-1);
2564 	}
2565 
2566 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
2567 
2568 		if ((zhp = zfs_open(hdl, dstab.zone_dataset_name,
2569 		    ZFS_TYPE_FILESYSTEM)) == NULL) {
2570 			zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'",
2571 			    dstab.zone_dataset_name);
2572 			zonecfg_fini_handle(handle);
2573 			libzfs_fini(hdl);
2574 			return (-1);
2575 		}
2576 
2577 		/*
2578 		 * Automatically set the 'zoned' property.  We check the value
2579 		 * first because we'll get EPERM if it is already set.
2580 		 */
2581 		if (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
2582 		    zfs_prop_set(zhp, ZFS_PROP_ZONED, "on") != 0) {
2583 			zerror(zlogp, B_FALSE, "cannot set 'zoned' "
2584 			    "property for ZFS dataset '%s'\n",
2585 			    dstab.zone_dataset_name);
2586 			zonecfg_fini_handle(handle);
2587 			zfs_close(zhp);
2588 			libzfs_fini(hdl);
2589 			return (-1);
2590 		}
2591 
2592 		zfs_close(zhp);
2593 	}
2594 	(void) zonecfg_enddsent(handle);
2595 
2596 	zonecfg_fini_handle(handle);
2597 	libzfs_fini(hdl);
2598 
2599 	return (0);
2600 }
2601 
2602 static int
2603 bind_to_pool(zlog_t *zlogp, zoneid_t zoneid)
2604 {
2605 	pool_conf_t *poolconf;
2606 	pool_t *pool;
2607 	char poolname[MAXPATHLEN];
2608 	int status;
2609 	int error;
2610 
2611 	/*
2612 	 * Find the pool mentioned in the zone configuration, and bind to it.
2613 	 */
2614 	error = get_zone_pool(zlogp, poolname, sizeof (poolname));
2615 	if (error == Z_NO_ENTRY || (error == Z_OK && strlen(poolname) == 0)) {
2616 		/*
2617 		 * The property is not set on the zone, so the pool
2618 		 * should be bound to the default pool.  But that's
2619 		 * already done by the kernel, so we can just return.
2620 		 */
2621 		return (0);
2622 	}
2623 	if (error != Z_OK) {
2624 		/*
2625 		 * Not an error, even though it shouldn't be happening.
2626 		 */
2627 		zerror(zlogp, B_FALSE,
2628 		    "WARNING: unable to retrieve default pool.");
2629 		return (0);
2630 	}
2631 	/*
2632 	 * Don't do anything if pools aren't enabled.
2633 	 */
2634 	if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED) {
2635 		zerror(zlogp, B_FALSE, "WARNING: pools facility not active; "
2636 		    "zone will not be bound to pool '%s'.", poolname);
2637 		return (0);
2638 	}
2639 	/*
2640 	 * Try to provide a sane error message if the requested pool doesn't
2641 	 * exist.
2642 	 */
2643 	if ((poolconf = pool_conf_alloc()) == NULL) {
2644 		zerror(zlogp, B_FALSE, "%s failed", "pool_conf_alloc");
2645 		return (-1);
2646 	}
2647 	if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) !=
2648 	    PO_SUCCESS) {
2649 		zerror(zlogp, B_FALSE, "%s failed", "pool_conf_open");
2650 		pool_conf_free(poolconf);
2651 		return (-1);
2652 	}
2653 	pool = pool_get_pool(poolconf, poolname);
2654 	(void) pool_conf_close(poolconf);
2655 	pool_conf_free(poolconf);
2656 	if (pool == NULL) {
2657 		zerror(zlogp, B_FALSE, "WARNING: pool '%s' not found; "
2658 		    "using default pool.", poolname);
2659 		return (0);
2660 	}
2661 	/*
2662 	 * Bind the zone to the pool.
2663 	 */
2664 	if (pool_set_binding(poolname, P_ZONEID, zoneid) != PO_SUCCESS) {
2665 		zerror(zlogp, B_FALSE, "WARNING: unable to bind to pool '%s'; "
2666 		    "using default pool.", poolname);
2667 	}
2668 	return (0);
2669 }
2670 
2671 /*
2672  * Mount lower level home directories into/from current zone
2673  * Share exported directories specified in dfstab for zone
2674  */
2675 static int
2676 tsol_mounts(zlog_t *zlogp, char *zone_name, char *rootpath)
2677 {
2678 	zoneid_t *zids = NULL;
2679 	priv_set_t *zid_privs;
2680 	const priv_impl_info_t *ip = NULL;
2681 	uint_t nzents_saved;
2682 	uint_t nzents;
2683 	int i;
2684 	char readonly[] = "ro";
2685 	struct zone_fstab lower_fstab;
2686 	char *argv[4];
2687 
2688 	if (!is_system_labeled())
2689 		return (0);
2690 
2691 	if (zid_label == NULL) {
2692 		zid_label = m_label_alloc(MAC_LABEL);
2693 		if (zid_label == NULL)
2694 			return (-1);
2695 	}
2696 
2697 	/* Make sure our zone has an /export/home dir */
2698 	(void) make_one_dir(zlogp, rootpath, "/export/home",
2699 	    DEFAULT_DIR_MODE);
2700 
2701 	lower_fstab.zone_fs_raw[0] = '\0';
2702 	(void) strlcpy(lower_fstab.zone_fs_type, MNTTYPE_LOFS,
2703 	    sizeof (lower_fstab.zone_fs_type));
2704 	lower_fstab.zone_fs_options = NULL;
2705 	(void) zonecfg_add_fs_option(&lower_fstab, readonly);
2706 
2707 	/*
2708 	 * Get the list of zones from the kernel
2709 	 */
2710 	if (zone_list(NULL, &nzents) != 0) {
2711 		zerror(zlogp, B_TRUE, "unable to list zones");
2712 		zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
2713 		return (-1);
2714 	}
2715 again:
2716 	if (nzents == 0) {
2717 		zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
2718 		return (-1);
2719 	}
2720 
2721 	zids = malloc(nzents * sizeof (zoneid_t));
2722 	if (zids == NULL) {
2723 		zerror(zlogp, B_TRUE, "memory allocation failed");
2724 		return (-1);
2725 	}
2726 	nzents_saved = nzents;
2727 
2728 	if (zone_list(zids, &nzents) != 0) {
2729 		zerror(zlogp, B_TRUE, "unable to list zones");
2730 		zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
2731 		free(zids);
2732 		return (-1);
2733 	}
2734 	if (nzents != nzents_saved) {
2735 		/* list changed, try again */
2736 		free(zids);
2737 		goto again;
2738 	}
2739 
2740 	ip = getprivimplinfo();
2741 	if ((zid_privs = priv_allocset()) == NULL) {
2742 		zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
2743 		zonecfg_free_fs_option_list(
2744 		    lower_fstab.zone_fs_options);
2745 		free(zids);
2746 		return (-1);
2747 	}
2748 
2749 	for (i = 0; i < nzents; i++) {
2750 		char zid_name[ZONENAME_MAX];
2751 		zone_state_t zid_state;
2752 		char zid_rpath[MAXPATHLEN];
2753 		struct stat stat_buf;
2754 
2755 		if (zids[i] == GLOBAL_ZONEID)
2756 			continue;
2757 
2758 		if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1)
2759 			continue;
2760 
2761 		/*
2762 		 * Do special setup for the zone we are booting
2763 		 */
2764 		if (strcmp(zid_name, zone_name) == 0) {
2765 			struct zone_fstab autofs_fstab;
2766 			char map_path[MAXPATHLEN];
2767 			int fd;
2768 
2769 			/*
2770 			 * Create auto_home_<zone> map for this zone
2771 			 * in the global zone. The local zone entry
2772 			 * will be created by automount when the zone
2773 			 * is booted.
2774 			 */
2775 
2776 			(void) snprintf(autofs_fstab.zone_fs_special,
2777 			    MAXPATHLEN, "auto_home_%s", zid_name);
2778 
2779 			(void) snprintf(autofs_fstab.zone_fs_dir, MAXPATHLEN,
2780 			    "/zone/%s/home", zid_name);
2781 
2782 			(void) snprintf(map_path, sizeof (map_path),
2783 			    "/etc/%s", autofs_fstab.zone_fs_special);
2784 			/*
2785 			 * If the map file doesn't exist create a template
2786 			 */
2787 			if ((fd = open(map_path, O_RDWR | O_CREAT | O_EXCL,
2788 			    S_IRUSR | S_IWUSR | S_IRGRP| S_IROTH)) != -1) {
2789 				int len;
2790 				char map_rec[MAXPATHLEN];
2791 
2792 				len = snprintf(map_rec, sizeof (map_rec),
2793 				    "+%s\n*\t-fstype=lofs\t:%s/export/home/&\n",
2794 				    autofs_fstab.zone_fs_special, rootpath);
2795 				(void) write(fd, map_rec, len);
2796 				(void) close(fd);
2797 			}
2798 
2799 			/*
2800 			 * Mount auto_home_<zone> in the global zone if absent.
2801 			 * If it's already of type autofs, then
2802 			 * don't mount it again.
2803 			 */
2804 			if ((stat(autofs_fstab.zone_fs_dir, &stat_buf) == -1) ||
2805 			    strcmp(stat_buf.st_fstype, MNTTYPE_AUTOFS) != 0) {
2806 				char optstr[] = "indirect,ignore,nobrowse";
2807 
2808 				(void) make_one_dir(zlogp, "",
2809 				    autofs_fstab.zone_fs_dir, DEFAULT_DIR_MODE);
2810 
2811 				/*
2812 				 * Mount will fail if automounter has already
2813 				 * processed the auto_home_<zonename> map
2814 				 */
2815 				(void) domount(zlogp, MNTTYPE_AUTOFS, optstr,
2816 				    autofs_fstab.zone_fs_special,
2817 				    autofs_fstab.zone_fs_dir);
2818 			}
2819 			continue;
2820 		}
2821 
2822 
2823 		if (zone_get_state(zid_name, &zid_state) != Z_OK ||
2824 		    (zid_state != ZONE_STATE_READY &&
2825 		    zid_state != ZONE_STATE_RUNNING))
2826 			/* Skip over zones without mounted filesystems */
2827 			continue;
2828 
2829 		if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label,
2830 		    sizeof (m_label_t)) < 0)
2831 			/* Skip over zones with unspecified label */
2832 			continue;
2833 
2834 		if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath,
2835 		    sizeof (zid_rpath)) == -1)
2836 			/* Skip over zones with bad path */
2837 			continue;
2838 
2839 		if (zone_getattr(zids[i], ZONE_ATTR_PRIVSET, zid_privs,
2840 		    sizeof (priv_chunk_t) * ip->priv_setsize) == -1)
2841 			/* Skip over zones with bad privs */
2842 			continue;
2843 
2844 		/*
2845 		 * Reading down is valid according to our label model
2846 		 * but some customers want to disable it because it
2847 		 * allows execute down and other possible attacks.
2848 		 * Therefore, we restrict this feature to zones that
2849 		 * have the NET_MAC_AWARE privilege which is required
2850 		 * for NFS read-down semantics.
2851 		 */
2852 		if ((bldominates(zlabel, zid_label)) &&
2853 		    (priv_ismember(zprivs, PRIV_NET_MAC_AWARE))) {
2854 			/*
2855 			 * Our zone dominates this one.
2856 			 * Create a lofs mount from lower zone's /export/home
2857 			 */
2858 			(void) snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN,
2859 			    "%s/zone/%s/export/home", rootpath, zid_name);
2860 
2861 			/*
2862 			 * If the target is already an LOFS mount
2863 			 * then don't do it again.
2864 			 */
2865 			if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) ||
2866 			    strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) {
2867 
2868 				if (snprintf(lower_fstab.zone_fs_special,
2869 				    MAXPATHLEN, "%s/export",
2870 				    zid_rpath) > MAXPATHLEN)
2871 					continue;
2872 
2873 				/*
2874 				 * Make sure the lower-level home exists
2875 				 */
2876 				if (make_one_dir(zlogp,
2877 				    lower_fstab.zone_fs_special,
2878 				    "/home", DEFAULT_DIR_MODE) != 0)
2879 					continue;
2880 
2881 				(void) strlcat(lower_fstab.zone_fs_special,
2882 				    "/home", MAXPATHLEN);
2883 
2884 				/*
2885 				 * Mount can fail because the lower-level
2886 				 * zone may have already done a mount up.
2887 				 */
2888 				(void) mount_one(zlogp, &lower_fstab, "");
2889 			}
2890 		} else if ((bldominates(zid_label, zlabel)) &&
2891 		    (priv_ismember(zid_privs, PRIV_NET_MAC_AWARE))) {
2892 			/*
2893 			 * This zone dominates our zone.
2894 			 * Create a lofs mount from our zone's /export/home
2895 			 */
2896 			if (snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN,
2897 			    "%s/zone/%s/export/home", zid_rpath,
2898 			    zone_name) > MAXPATHLEN)
2899 				continue;
2900 
2901 			/*
2902 			 * If the target is already an LOFS mount
2903 			 * then don't do it again.
2904 			 */
2905 			if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) ||
2906 			    strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) {
2907 
2908 				(void) snprintf(lower_fstab.zone_fs_special,
2909 				    MAXPATHLEN, "%s/export/home", rootpath);
2910 
2911 				/*
2912 				 * Mount can fail because the higher-level
2913 				 * zone may have already done a mount down.
2914 				 */
2915 				(void) mount_one(zlogp, &lower_fstab, "");
2916 			}
2917 		}
2918 	}
2919 	zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
2920 	priv_freeset(zid_privs);
2921 	free(zids);
2922 
2923 	/*
2924 	 * Now share any exported directories from this zone.
2925 	 * Each zone can have its own dfstab.
2926 	 */
2927 
2928 	argv[0] = "zoneshare";
2929 	argv[1] = "-z";
2930 	argv[2] = zone_name;
2931 	argv[3] = NULL;
2932 
2933 	(void) forkexec(zlogp, "/usr/lib/zones/zoneshare", argv);
2934 	/* Don't check for errors since they don't affect the zone */
2935 
2936 	return (0);
2937 }
2938 
2939 /*
2940  * Unmount lofs mounts from higher level zones
2941  * Unshare nfs exported directories
2942  */
2943 static void
2944 tsol_unmounts(zlog_t *zlogp, char *zone_name)
2945 {
2946 	zoneid_t *zids = NULL;
2947 	uint_t nzents_saved;
2948 	uint_t nzents;
2949 	int i;
2950 	char *argv[4];
2951 	char path[MAXPATHLEN];
2952 
2953 	if (!is_system_labeled())
2954 		return;
2955 
2956 	/*
2957 	 * Get the list of zones from the kernel
2958 	 */
2959 	if (zone_list(NULL, &nzents) != 0) {
2960 		return;
2961 	}
2962 
2963 	if (zid_label == NULL) {
2964 		zid_label = m_label_alloc(MAC_LABEL);
2965 		if (zid_label == NULL)
2966 			return;
2967 	}
2968 
2969 again:
2970 	if (nzents == 0)
2971 		return;
2972 
2973 	zids = malloc(nzents * sizeof (zoneid_t));
2974 	if (zids == NULL) {
2975 		zerror(zlogp, B_TRUE, "memory allocation failed");
2976 		return;
2977 	}
2978 	nzents_saved = nzents;
2979 
2980 	if (zone_list(zids, &nzents) != 0) {
2981 		free(zids);
2982 		return;
2983 	}
2984 	if (nzents != nzents_saved) {
2985 		/* list changed, try again */
2986 		free(zids);
2987 		goto again;
2988 	}
2989 
2990 	for (i = 0; i < nzents; i++) {
2991 		char zid_name[ZONENAME_MAX];
2992 		zone_state_t zid_state;
2993 		char zid_rpath[MAXPATHLEN];
2994 
2995 		if (zids[i] == GLOBAL_ZONEID)
2996 			continue;
2997 
2998 		if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1)
2999 			continue;
3000 
3001 		/*
3002 		 * Skip the zone we are halting
3003 		 */
3004 		if (strcmp(zid_name, zone_name) == 0)
3005 			continue;
3006 
3007 		if ((zone_getattr(zids[i], ZONE_ATTR_STATUS, &zid_state,
3008 		    sizeof (zid_state)) < 0) ||
3009 		    (zid_state < ZONE_IS_READY))
3010 			/* Skip over zones without mounted filesystems */
3011 			continue;
3012 
3013 		if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label,
3014 		    sizeof (m_label_t)) < 0)
3015 			/* Skip over zones with unspecified label */
3016 			continue;
3017 
3018 		if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath,
3019 		    sizeof (zid_rpath)) == -1)
3020 			/* Skip over zones with bad path */
3021 			continue;
3022 
3023 		if (zlabel != NULL && bldominates(zid_label, zlabel)) {
3024 			/*
3025 			 * This zone dominates our zone.
3026 			 * Unmount the lofs mount of our zone's /export/home
3027 			 */
3028 
3029 			if (snprintf(path, MAXPATHLEN,
3030 			    "%s/zone/%s/export/home", zid_rpath,
3031 			    zone_name) > MAXPATHLEN)
3032 				continue;
3033 
3034 			/* Skip over mount failures */
3035 			(void) umount(path);
3036 		}
3037 	}
3038 	free(zids);
3039 
3040 	/*
3041 	 * Unmount global zone autofs trigger for this zone
3042 	 */
3043 	(void) snprintf(path, MAXPATHLEN, "/zone/%s/home", zone_name);
3044 	/* Skip over mount failures */
3045 	(void) umount(path);
3046 
3047 	/*
3048 	 * Next unshare any exported directories from this zone.
3049 	 */
3050 
3051 	argv[0] = "zoneunshare";
3052 	argv[1] = "-z";
3053 	argv[2] = zone_name;
3054 	argv[3] = NULL;
3055 
3056 	(void) forkexec(zlogp, "/usr/lib/zones/zoneunshare", argv);
3057 	/* Don't check for errors since they don't affect the zone */
3058 
3059 	/*
3060 	 * Finally, deallocate any devices in the zone.
3061 	 */
3062 
3063 	argv[0] = "deallocate";
3064 	argv[1] = "-Isz";
3065 	argv[2] = zone_name;
3066 	argv[3] = NULL;
3067 
3068 	(void) forkexec(zlogp, "/usr/sbin/deallocate", argv);
3069 	/* Don't check for errors since they don't affect the zone */
3070 }
3071 
3072 /*
3073  * Fetch the Trusted Extensions label and multi-level ports (MLPs) for
3074  * this zone.
3075  */
3076 static tsol_zcent_t *
3077 get_zone_label(zlog_t *zlogp, priv_set_t *privs)
3078 {
3079 	FILE *fp;
3080 	tsol_zcent_t *zcent = NULL;
3081 	char line[MAXTNZLEN];
3082 
3083 	if ((fp = fopen(TNZONECFG_PATH, "r")) == NULL) {
3084 		zerror(zlogp, B_TRUE, "%s", TNZONECFG_PATH);
3085 		return (NULL);
3086 	}
3087 
3088 	while (fgets(line, sizeof (line), fp) != NULL) {
3089 		/*
3090 		 * Check for malformed database
3091 		 */
3092 		if (strlen(line) == MAXTNZLEN - 1)
3093 			break;
3094 		if ((zcent = tsol_sgetzcent(line, NULL, NULL)) == NULL)
3095 			continue;
3096 		if (strcmp(zcent->zc_name, zone_name) == 0)
3097 			break;
3098 		tsol_freezcent(zcent);
3099 		zcent = NULL;
3100 	}
3101 	(void) fclose(fp);
3102 
3103 	if (zcent == NULL) {
3104 		zerror(zlogp, B_FALSE, "zone requires a label assignment. "
3105 		    "See tnzonecfg(4)");
3106 	} else {
3107 		if (zlabel == NULL)
3108 			zlabel = m_label_alloc(MAC_LABEL);
3109 		/*
3110 		 * Save this zone's privileges for later read-down processing
3111 		 */
3112 		if ((zprivs = priv_allocset()) == NULL) {
3113 			zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
3114 			return (NULL);
3115 		} else {
3116 			priv_copyset(privs, zprivs);
3117 		}
3118 	}
3119 	return (zcent);
3120 }
3121 
3122 /*
3123  * Add the Trusted Extensions multi-level ports for this zone.
3124  */
3125 static void
3126 set_mlps(zlog_t *zlogp, zoneid_t zoneid, tsol_zcent_t *zcent)
3127 {
3128 	tsol_mlp_t *mlp;
3129 	tsol_mlpent_t tsme;
3130 
3131 	if (!is_system_labeled())
3132 		return;
3133 
3134 	tsme.tsme_zoneid = zoneid;
3135 	tsme.tsme_flags = 0;
3136 	for (mlp = zcent->zc_private_mlp; !TSOL_MLP_END(mlp); mlp++) {
3137 		tsme.tsme_mlp = *mlp;
3138 		if (tnmlp(TNDB_LOAD, &tsme) != 0) {
3139 			zerror(zlogp, B_TRUE, "cannot set zone-specific MLP "
3140 			    "on %d-%d/%d", mlp->mlp_port,
3141 			    mlp->mlp_port_upper, mlp->mlp_ipp);
3142 		}
3143 	}
3144 
3145 	tsme.tsme_flags = TSOL_MEF_SHARED;
3146 	for (mlp = zcent->zc_shared_mlp; !TSOL_MLP_END(mlp); mlp++) {
3147 		tsme.tsme_mlp = *mlp;
3148 		if (tnmlp(TNDB_LOAD, &tsme) != 0) {
3149 			zerror(zlogp, B_TRUE, "cannot set shared MLP "
3150 			    "on %d-%d/%d", mlp->mlp_port,
3151 			    mlp->mlp_port_upper, mlp->mlp_ipp);
3152 		}
3153 	}
3154 }
3155 
3156 static void
3157 remove_mlps(zlog_t *zlogp, zoneid_t zoneid)
3158 {
3159 	tsol_mlpent_t tsme;
3160 
3161 	if (!is_system_labeled())
3162 		return;
3163 
3164 	(void) memset(&tsme, 0, sizeof (tsme));
3165 	tsme.tsme_zoneid = zoneid;
3166 	if (tnmlp(TNDB_FLUSH, &tsme) != 0)
3167 		zerror(zlogp, B_TRUE, "cannot flush MLPs");
3168 }
3169 
3170 int
3171 prtmount(const char *fs, void *x) {
3172 	zerror((zlog_t *)x, B_FALSE, "  %s", fs);
3173 	return (0);
3174 }
3175 
3176 /*
3177  * Look for zones running on the main system that are using this root (or any
3178  * subdirectory of it).  Return B_TRUE and print an error if a conflicting zone
3179  * is found or if we can't tell.
3180  */
3181 static boolean_t
3182 duplicate_zone_root(zlog_t *zlogp, const char *rootpath)
3183 {
3184 	zoneid_t *zids = NULL;
3185 	uint_t nzids = 0;
3186 	boolean_t retv;
3187 	int rlen, zlen;
3188 	char zroot[MAXPATHLEN];
3189 	char zonename[ZONENAME_MAX];
3190 
3191 	for (;;) {
3192 		nzids += 10;
3193 		zids = malloc(nzids * sizeof (*zids));
3194 		if (zids == NULL) {
3195 			zerror(zlogp, B_TRUE, "memory allocation failed");
3196 			return (B_TRUE);
3197 		}
3198 		if (zone_list(zids, &nzids) == 0)
3199 			break;
3200 		free(zids);
3201 	}
3202 	retv = B_FALSE;
3203 	rlen = strlen(rootpath);
3204 	while (nzids > 0) {
3205 		/*
3206 		 * Ignore errors; they just mean that the zone has disappeared
3207 		 * while we were busy.
3208 		 */
3209 		if (zone_getattr(zids[--nzids], ZONE_ATTR_ROOT, zroot,
3210 		    sizeof (zroot)) == -1)
3211 			continue;
3212 		zlen = strlen(zroot);
3213 		if (zlen > rlen)
3214 			zlen = rlen;
3215 		if (strncmp(rootpath, zroot, zlen) == 0 &&
3216 		    (zroot[zlen] == '\0' || zroot[zlen] == '/') &&
3217 		    (rootpath[zlen] == '\0' || rootpath[zlen] == '/')) {
3218 			if (getzonenamebyid(zids[nzids], zonename,
3219 			    sizeof (zonename)) == -1)
3220 				(void) snprintf(zonename, sizeof (zonename),
3221 				    "id %d", (int)zids[nzids]);
3222 			zerror(zlogp, B_FALSE,
3223 			    "zone root %s already in use by zone %s",
3224 			    rootpath, zonename);
3225 			retv = B_TRUE;
3226 			break;
3227 		}
3228 	}
3229 	free(zids);
3230 	return (retv);
3231 }
3232 
3233 /*
3234  * Search for loopback mounts that use this same source node (same device and
3235  * inode).  Return B_TRUE if there is one or if we can't tell.
3236  */
3237 static boolean_t
3238 duplicate_reachable_path(zlog_t *zlogp, const char *rootpath)
3239 {
3240 	struct stat64 rst, zst;
3241 	struct mnttab *mnp;
3242 
3243 	if (stat64(rootpath, &rst) == -1) {
3244 		zerror(zlogp, B_TRUE, "can't stat %s", rootpath);
3245 		return (B_TRUE);
3246 	}
3247 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
3248 		return (B_TRUE);
3249 	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) {
3250 		if (mnp->mnt_fstype == NULL ||
3251 		    strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0)
3252 			continue;
3253 		/* We're looking at a loopback mount.  Stat it. */
3254 		if (mnp->mnt_special != NULL &&
3255 		    stat64(mnp->mnt_special, &zst) != -1 &&
3256 		    rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
3257 			zerror(zlogp, B_FALSE,
3258 			    "zone root %s is reachable through %s",
3259 			    rootpath, mnp->mnt_mountp);
3260 			return (B_TRUE);
3261 		}
3262 	}
3263 	return (B_FALSE);
3264 }
3265 
3266 zoneid_t
3267 vplat_create(zlog_t *zlogp, boolean_t mount_cmd)
3268 {
3269 	zoneid_t rval = -1;
3270 	priv_set_t *privs;
3271 	char rootpath[MAXPATHLEN];
3272 	char *rctlbuf = NULL;
3273 	size_t rctlbufsz = 0;
3274 	char *zfsbuf = NULL;
3275 	size_t zfsbufsz = 0;
3276 	zoneid_t zoneid = -1;
3277 	int xerr;
3278 	char *kzone;
3279 	FILE *fp = NULL;
3280 	tsol_zcent_t *zcent = NULL;
3281 	int match = 0;
3282 	int doi = 0;
3283 
3284 	if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
3285 		zerror(zlogp, B_TRUE, "unable to determine zone root");
3286 		return (-1);
3287 	}
3288 	if (zonecfg_in_alt_root())
3289 		resolve_lofs(zlogp, rootpath, sizeof (rootpath));
3290 
3291 	if ((privs = priv_allocset()) == NULL) {
3292 		zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
3293 		return (-1);
3294 	}
3295 	priv_emptyset(privs);
3296 	if (get_privset(zlogp, privs, mount_cmd) != 0)
3297 		goto error;
3298 
3299 	if (!mount_cmd && get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) {
3300 		zerror(zlogp, B_FALSE, "Unable to get list of rctls");
3301 		goto error;
3302 	}
3303 
3304 	if (get_datasets(zlogp, &zfsbuf, &zfsbufsz) != 0) {
3305 		zerror(zlogp, B_FALSE, "Unable to get list of ZFS datasets");
3306 		goto error;
3307 	}
3308 
3309 	if (!mount_cmd && is_system_labeled()) {
3310 		zcent = get_zone_label(zlogp, privs);
3311 		if (zcent != NULL) {
3312 			match = zcent->zc_match;
3313 			doi = zcent->zc_doi;
3314 			*zlabel = zcent->zc_label;
3315 		} else {
3316 			goto error;
3317 		}
3318 	}
3319 
3320 	kzone = zone_name;
3321 
3322 	/*
3323 	 * We must do this scan twice.  First, we look for zones running on the
3324 	 * main system that are using this root (or any subdirectory of it).
3325 	 * Next, we reduce to the shortest path and search for loopback mounts
3326 	 * that use this same source node (same device and inode).
3327 	 */
3328 	if (duplicate_zone_root(zlogp, rootpath))
3329 		goto error;
3330 	if (duplicate_reachable_path(zlogp, rootpath))
3331 		goto error;
3332 
3333 	if (mount_cmd) {
3334 		root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE);
3335 
3336 		/*
3337 		 * Forge up a special root for this zone.  When a zone is
3338 		 * mounted, we can't let the zone have its own root because the
3339 		 * tools that will be used in this "scratch zone" need access
3340 		 * to both the zone's resources and the running machine's
3341 		 * executables.
3342 		 *
3343 		 * Note that the mkdir here also catches read-only filesystems.
3344 		 */
3345 		if (mkdir(rootpath, 0755) != 0 && errno != EEXIST) {
3346 			zerror(zlogp, B_TRUE, "cannot create %s", rootpath);
3347 			goto error;
3348 		}
3349 		if (domount(zlogp, "tmpfs", "", "swap", rootpath) != 0)
3350 			goto error;
3351 	}
3352 
3353 	if (zonecfg_in_alt_root()) {
3354 		/*
3355 		 * If we are mounting up a zone in an alternate root partition,
3356 		 * then we have some additional work to do before starting the
3357 		 * zone.  First, resolve the root path down so that we're not
3358 		 * fooled by duplicates.  Then forge up an internal name for
3359 		 * the zone.
3360 		 */
3361 		if ((fp = zonecfg_open_scratch("", B_TRUE)) == NULL) {
3362 			zerror(zlogp, B_TRUE, "cannot open mapfile");
3363 			goto error;
3364 		}
3365 		if (zonecfg_lock_scratch(fp) != 0) {
3366 			zerror(zlogp, B_TRUE, "cannot lock mapfile");
3367 			goto error;
3368 		}
3369 		if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
3370 		    NULL, 0) == 0) {
3371 			zerror(zlogp, B_FALSE, "scratch zone already running");
3372 			goto error;
3373 		}
3374 		/* This is the preferred name */
3375 		(void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s",
3376 		    zone_name);
3377 		srandom(getpid());
3378 		while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL,
3379 		    0) == 0) {
3380 			/* This is just an arbitrary name; note "." usage */
3381 			(void) snprintf(kernzone, sizeof (kernzone),
3382 			    "SUNWlu.%08lX%08lX", random(), random());
3383 		}
3384 		kzone = kernzone;
3385 	}
3386 
3387 	xerr = 0;
3388 	if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf,
3389 	    rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel)) == -1) {
3390 		if (xerr == ZE_AREMOUNTS) {
3391 			if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) {
3392 				zerror(zlogp, B_FALSE,
3393 				    "An unknown file-system is mounted on "
3394 				    "a subdirectory of %s", rootpath);
3395 			} else {
3396 
3397 				zerror(zlogp, B_FALSE,
3398 				    "These file-systems are mounted on "
3399 				    "subdirectories of %s:", rootpath);
3400 				(void) zonecfg_find_mounts(rootpath,
3401 				    prtmount, zlogp);
3402 			}
3403 		} else if (xerr == ZE_CHROOTED) {
3404 			zerror(zlogp, B_FALSE, "%s: "
3405 			    "cannot create a zone from a chrooted "
3406 			    "environment", "zone_create");
3407 		} else {
3408 			zerror(zlogp, B_TRUE, "%s failed", "zone_create");
3409 		}
3410 		goto error;
3411 	}
3412 
3413 	if (zonecfg_in_alt_root() &&
3414 	    zonecfg_add_scratch(fp, zone_name, kernzone,
3415 	    zonecfg_get_root()) == -1) {
3416 		zerror(zlogp, B_TRUE, "cannot add mapfile entry");
3417 		goto error;
3418 	}
3419 
3420 	/*
3421 	 * The following is a warning, not an error, and is not performed when
3422 	 * merely mounting a zone for administrative use.
3423 	 */
3424 	if (!mount_cmd && bind_to_pool(zlogp, zoneid) != 0)
3425 		zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to "
3426 		    "requested pool; using default pool.");
3427 	if (!mount_cmd)
3428 		set_mlps(zlogp, zoneid, zcent);
3429 	rval = zoneid;
3430 	zoneid = -1;
3431 
3432 error:
3433 	if (zoneid != -1)
3434 		(void) zone_destroy(zoneid);
3435 	if (rctlbuf != NULL)
3436 		free(rctlbuf);
3437 	priv_freeset(privs);
3438 	if (fp != NULL)
3439 		zonecfg_close_scratch(fp);
3440 	lofs_discard_mnttab();
3441 	if (zcent != NULL)
3442 		tsol_freezcent(zcent);
3443 	return (rval);
3444 }
3445 
3446 /*
3447  * Enter the zone and write a /etc/zones/index file there.  This allows
3448  * libzonecfg (and thus zoneadm) to report the UUID and potentially other zone
3449  * details from inside the zone.
3450  */
3451 static void
3452 write_index_file(zoneid_t zoneid)
3453 {
3454 	FILE *zef;
3455 	FILE *zet;
3456 	struct zoneent *zep;
3457 	pid_t child;
3458 	int tmpl_fd;
3459 	ctid_t ct;
3460 	int fd;
3461 	char uuidstr[UUID_PRINTABLE_STRING_LENGTH];
3462 
3463 	/* Locate the zone entry in the global zone's index file */
3464 	if ((zef = setzoneent()) == NULL)
3465 		return;
3466 	while ((zep = getzoneent_private(zef)) != NULL) {
3467 		if (strcmp(zep->zone_name, zone_name) == 0)
3468 			break;
3469 		free(zep);
3470 	}
3471 	endzoneent(zef);
3472 	if (zep == NULL)
3473 		return;
3474 
3475 	if ((tmpl_fd = init_template()) == -1) {
3476 		free(zep);
3477 		return;
3478 	}
3479 
3480 	if ((child = fork()) == -1) {
3481 		(void) ct_tmpl_clear(tmpl_fd);
3482 		(void) close(tmpl_fd);
3483 		free(zep);
3484 		return;
3485 	}
3486 
3487 	/* parent waits for child to finish */
3488 	if (child != 0) {
3489 		free(zep);
3490 		if (contract_latest(&ct) == -1)
3491 			ct = -1;
3492 		(void) ct_tmpl_clear(tmpl_fd);
3493 		(void) close(tmpl_fd);
3494 		(void) waitpid(child, NULL, 0);
3495 		(void) contract_abandon_id(ct);
3496 		return;
3497 	}
3498 
3499 	/* child enters zone and sets up index file */
3500 	(void) ct_tmpl_clear(tmpl_fd);
3501 	if (zone_enter(zoneid) != -1) {
3502 		(void) mkdir(ZONE_CONFIG_ROOT, ZONE_CONFIG_MODE);
3503 		(void) chown(ZONE_CONFIG_ROOT, ZONE_CONFIG_UID,
3504 		    ZONE_CONFIG_GID);
3505 		fd = open(ZONE_INDEX_FILE, O_WRONLY|O_CREAT|O_TRUNC,
3506 		    ZONE_INDEX_MODE);
3507 		if (fd != -1 && (zet = fdopen(fd, "w")) != NULL) {
3508 			(void) fchown(fd, ZONE_INDEX_UID, ZONE_INDEX_GID);
3509 			if (uuid_is_null(zep->zone_uuid))
3510 				uuidstr[0] = '\0';
3511 			else
3512 				uuid_unparse(zep->zone_uuid, uuidstr);
3513 			(void) fprintf(zet, "%s:%s:/:%s\n", zep->zone_name,
3514 			    zone_state_str(zep->zone_state),
3515 			    uuidstr);
3516 			(void) fclose(zet);
3517 		}
3518 	}
3519 	_exit(0);
3520 }
3521 
3522 int
3523 vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd, zoneid_t zoneid)
3524 {
3525 
3526 	if (!mount_cmd && validate_datasets(zlogp) != 0) {
3527 		lofs_discard_mnttab();
3528 		return (-1);
3529 	}
3530 
3531 	if (mount_filesystems(zlogp, mount_cmd) != 0) {
3532 		lofs_discard_mnttab();
3533 		return (-1);
3534 	}
3535 
3536 	/* mount /dev for zone (both normal and scratch zone) */
3537 	if (vplat_mount_dev(zlogp) != 0) {
3538 		lofs_discard_mnttab();
3539 		return (-1);
3540 	}
3541 
3542 	if (!mount_cmd && configure_network_interfaces(zlogp) != 0) {
3543 		lofs_discard_mnttab();
3544 		return (-1);
3545 	}
3546 
3547 	write_index_file(zoneid);
3548 
3549 	lofs_discard_mnttab();
3550 	return (0);
3551 }
3552 
3553 static int
3554 lu_root_teardown(zlog_t *zlogp)
3555 {
3556 	char zroot[MAXPATHLEN];
3557 
3558 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
3559 		zerror(zlogp, B_FALSE, "unable to determine zone root");
3560 		return (-1);
3561 	}
3562 	root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
3563 
3564 	/*
3565 	 * At this point, the processes are gone, the filesystems (save the
3566 	 * root) are unmounted, and the zone is on death row.  But there may
3567 	 * still be creds floating about in the system that reference the
3568 	 * zone_t, and which pin down zone_rootvp causing this call to fail
3569 	 * with EBUSY.  Thus, we try for a little while before just giving up.
3570 	 * (How I wish this were not true, and umount2 just did the right
3571 	 * thing, or tmpfs supported MS_FORCE This is a gross hack.)
3572 	 */
3573 	if (umount2(zroot, MS_FORCE) != 0) {
3574 		if (errno == ENOTSUP && umount2(zroot, 0) == 0)
3575 			goto unmounted;
3576 		if (errno == EBUSY) {
3577 			int tries = 10;
3578 
3579 			while (--tries >= 0) {
3580 				(void) sleep(1);
3581 				if (umount2(zroot, 0) == 0)
3582 					goto unmounted;
3583 				if (errno != EBUSY)
3584 					break;
3585 			}
3586 		}
3587 		zerror(zlogp, B_TRUE, "unable to unmount '%s'", zroot);
3588 		return (-1);
3589 	}
3590 unmounted:
3591 
3592 	/*
3593 	 * Only zones in an alternate root environment have scratch zone
3594 	 * entries.
3595 	 */
3596 	if (zonecfg_in_alt_root()) {
3597 		FILE *fp;
3598 		int retv;
3599 
3600 		if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
3601 			zerror(zlogp, B_TRUE, "cannot open mapfile");
3602 			return (-1);
3603 		}
3604 		retv = -1;
3605 		if (zonecfg_lock_scratch(fp) != 0)
3606 			zerror(zlogp, B_TRUE, "cannot lock mapfile");
3607 		else if (zonecfg_delete_scratch(fp, kernzone) != 0)
3608 			zerror(zlogp, B_TRUE, "cannot delete map entry");
3609 		else
3610 			retv = 0;
3611 		zonecfg_close_scratch(fp);
3612 		return (retv);
3613 	} else {
3614 		return (0);
3615 	}
3616 }
3617 
3618 int
3619 vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd)
3620 {
3621 	char *kzone;
3622 	zoneid_t zoneid;
3623 
3624 	kzone = zone_name;
3625 	if (zonecfg_in_alt_root()) {
3626 		FILE *fp;
3627 
3628 		if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
3629 			zerror(zlogp, B_TRUE, "unable to open map file");
3630 			goto error;
3631 		}
3632 		if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
3633 		    kernzone, sizeof (kernzone)) != 0) {
3634 			zerror(zlogp, B_FALSE, "unable to find scratch zone");
3635 			zonecfg_close_scratch(fp);
3636 			goto error;
3637 		}
3638 		zonecfg_close_scratch(fp);
3639 		kzone = kernzone;
3640 	}
3641 
3642 	if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) {
3643 		if (!bringup_failure_recovery)
3644 			zerror(zlogp, B_TRUE, "unable to get zoneid");
3645 		if (unmount_cmd)
3646 			(void) lu_root_teardown(zlogp);
3647 		goto error;
3648 	}
3649 
3650 	if (zone_shutdown(zoneid) != 0) {
3651 		zerror(zlogp, B_TRUE, "unable to shutdown zone");
3652 		goto error;
3653 	}
3654 
3655 	if (!unmount_cmd &&
3656 	    unconfigure_network_interfaces(zlogp, zoneid) != 0) {
3657 		zerror(zlogp, B_FALSE,
3658 		    "unable to unconfigure network interfaces in zone");
3659 		goto error;
3660 	}
3661 
3662 	if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) {
3663 		zerror(zlogp, B_TRUE, "unable to abort TCP connections");
3664 		goto error;
3665 	}
3666 
3667 	/* destroy zconsole before umount /dev */
3668 	if (!unmount_cmd)
3669 		destroy_console_slave();
3670 
3671 	if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) {
3672 		zerror(zlogp, B_FALSE,
3673 		    "unable to unmount file systems in zone");
3674 		goto error;
3675 	}
3676 
3677 	remove_mlps(zlogp, zoneid);
3678 
3679 	if (zone_destroy(zoneid) != 0) {
3680 		zerror(zlogp, B_TRUE, "unable to destroy zone");
3681 		goto error;
3682 	}
3683 
3684 	/*
3685 	 * Special teardown for alternate boot environments: remove the tmpfs
3686 	 * root for the zone and then remove it from the map file.
3687 	 */
3688 	if (unmount_cmd && lu_root_teardown(zlogp) != 0)
3689 		goto error;
3690 
3691 	lofs_discard_mnttab();
3692 	return (0);
3693 
3694 error:
3695 	lofs_discard_mnttab();
3696 	return (-1);
3697 }
3698 
3699 /*
3700  * Apply the standard lists of devices/symlinks/mappings and the user-specified
3701  * list of devices (via zonecfg) to the /dev filesystem.  The filesystem will
3702  * use these as a profile/filter to determine what exists in /dev.
3703  */
3704 static int
3705 vplat_mount_dev(zlog_t *zlogp)
3706 {
3707 	char			zonedevpath[MAXPATHLEN];
3708 	zone_dochandle_t	handle = NULL;
3709 	struct zone_devtab	ztab;
3710 	zone_fsopt_t		opt_attr;
3711 	di_prof_t		prof = NULL;
3712 	int			i, err, len;
3713 	int			retval = -1;
3714 
3715 	struct zone_fstab devtab = {
3716 		"/dev",
3717 		"/dev",
3718 		MNTTYPE_DEV,
3719 		NULL,
3720 		""
3721 	};
3722 
3723 	if (err = zone_get_devroot(zone_name, zonedevpath,
3724 	    sizeof (zonedevpath))) {
3725 		zerror(zlogp, B_FALSE, "can't get zone dev: %s",
3726 		    zonecfg_strerror(err));
3727 		return (-1);
3728 	}
3729 
3730 	/*
3731 	 * The old /dev was a lofs mount from <zonepath>/dev, with
3732 	 * dev fs, that becomes a mount on <zonepath>/root/dev.
3733 	 * However, we need to preserve device permission bits during
3734 	 * upgrade.  What we should do is migrate the attribute directory
3735 	 * on upgrade, but for now, preserve it at <zonepath>/dev.
3736 	 */
3737 	(void) strcpy(opt_attr.zone_fsopt_opt, "attrdir=");
3738 	len = strlen(opt_attr.zone_fsopt_opt);
3739 	if (err = zone_get_zonepath(zone_name,
3740 	    opt_attr.zone_fsopt_opt + len, MAX_MNTOPT_STR - len)) {
3741 		zerror(zlogp, B_FALSE, "can't get zone path: %s",
3742 		    zonecfg_strerror(err));
3743 		return (-1);
3744 	}
3745 
3746 	if (make_one_dir(zlogp, opt_attr.zone_fsopt_opt + len, "/dev",
3747 	    DEFAULT_DIR_MODE) != 0)
3748 		return (-1);
3749 
3750 	(void) strlcat(opt_attr.zone_fsopt_opt, "/dev", MAX_MNTOPT_STR);
3751 	devtab.zone_fs_options = &opt_attr;
3752 	opt_attr.zone_fsopt_next = NULL;
3753 
3754 	/* mount /dev inside the zone */
3755 	i = strlen(zonedevpath);
3756 	if (mount_one(zlogp, &devtab, zonedevpath))
3757 		return (-1);
3758 
3759 	(void) strlcat(zonedevpath, "/dev", sizeof (zonedevpath));
3760 	if (di_prof_init(zonedevpath, &prof)) {
3761 		zerror(zlogp, B_TRUE, "failed to initialize profile");
3762 		goto cleanup;
3763 	}
3764 
3765 	/* Add the standard devices and directories */
3766 	for (i = 0; standard_devs[i] != NULL; ++i) {
3767 		if (di_prof_add_dev(prof, standard_devs[i])) {
3768 			zerror(zlogp, B_TRUE, "failed to add "
3769 			    "standard device");
3770 			goto cleanup;
3771 		}
3772 	}
3773 
3774 	/* Add the standard symlinks */
3775 	for (i = 0; standard_devlinks[i].source != NULL; ++i) {
3776 		if (di_prof_add_symlink(prof,
3777 		    standard_devlinks[i].source,
3778 		    standard_devlinks[i].target)) {
3779 			zerror(zlogp, B_TRUE, "failed to add "
3780 			    "standard symlink");
3781 			goto cleanup;
3782 		}
3783 	}
3784 
3785 	/* Add user-specified devices and directories */
3786 	if ((handle = zonecfg_init_handle()) == NULL) {
3787 		zerror(zlogp, B_FALSE, "can't initialize zone handle");
3788 		goto cleanup;
3789 	}
3790 	if (err = zonecfg_get_handle(zone_name, handle)) {
3791 		zerror(zlogp, B_FALSE, "can't get handle for zone "
3792 		    "%s: %s", zone_name, zonecfg_strerror(err));
3793 		goto cleanup;
3794 	}
3795 	if (err = zonecfg_setdevent(handle)) {
3796 		zerror(zlogp, B_FALSE, "%s: %s", zone_name,
3797 		    zonecfg_strerror(err));
3798 		goto cleanup;
3799 	}
3800 	while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
3801 		if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
3802 			zerror(zlogp, B_TRUE, "failed to add "
3803 			    "user-specified device");
3804 			goto cleanup;
3805 		}
3806 	}
3807 	(void) zonecfg_enddevent(handle);
3808 
3809 	/* Send profile to kernel */
3810 	if (di_prof_commit(prof)) {
3811 		zerror(zlogp, B_TRUE, "failed to commit profile");
3812 		goto cleanup;
3813 	}
3814 
3815 	retval = 0;
3816 
3817 cleanup:
3818 	if (handle)
3819 		zonecfg_fini_handle(handle);
3820 	if (prof)
3821 		di_prof_fini(prof);
3822 	return (retval);
3823 }
3824