xref: /titanic_53/usr/src/cmd/zoneadmd/vplat.c (revision e9dbad6f263d5570ed7ff5443ec5b958af8c24d7)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
21ffbafc53Scomay 
227c478bd9Sstevel@tonic-gate /*
23ea8dc4b6Seschrock  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * This module contains functions used to bring up and tear down the
317c478bd9Sstevel@tonic-gate  * Virtual Platform: [un]mounting file-systems, [un]plumbing network
327c478bd9Sstevel@tonic-gate  * interfaces, [un]configuring devices, establishing resource controls,
337c478bd9Sstevel@tonic-gate  * and creating/destroying the zone in the kernel.  These actions, on
347c478bd9Sstevel@tonic-gate  * the way up, ready the zone; on the way down, they halt the zone.
357c478bd9Sstevel@tonic-gate  * See the much longer block comment at the beginning of zoneadmd.c
367c478bd9Sstevel@tonic-gate  * for a bigger picture of how the whole program functions.
37108322fbScarlsonj  *
38108322fbScarlsonj  * This module also has primary responsibility for the layout of "scratch
39108322fbScarlsonj  * zones."  These are mounted, but inactive, zones that are used during
40108322fbScarlsonj  * operating system upgrade and potentially other administrative action.  The
41108322fbScarlsonj  * scratch zone environment is similar to the miniroot environment.  The zone's
42108322fbScarlsonj  * actual root is mounted read-write on /a, and the standard paths (/usr,
43108322fbScarlsonj  * /sbin, /lib) all lead to read-only copies of the running system's binaries.
44108322fbScarlsonj  * This allows the administrative tools to manipulate the zone using "-R /a"
45108322fbScarlsonj  * without relying on any binaries in the zone itself.
46108322fbScarlsonj  *
47108322fbScarlsonj  * If the scratch zone is on an alternate root (Live Upgrade [LU] boot
48108322fbScarlsonj  * environment), then we must resolve the lofs mounts used there to uncover
49108322fbScarlsonj  * writable (unshared) resources.  Shared resources, though, are always
50108322fbScarlsonj  * read-only.  In addition, if the "same" zone with a different root path is
51108322fbScarlsonj  * currently running, then "/b" inside the zone points to the running zone's
52108322fbScarlsonj  * root.  This allows LU to synchronize configuration files during the upgrade
53108322fbScarlsonj  * process.
54108322fbScarlsonj  *
55108322fbScarlsonj  * To construct this environment, this module creates a tmpfs mount on
56108322fbScarlsonj  * $ZONEPATH/lu.  Inside this scratch area, the miniroot-like environment as
57108322fbScarlsonj  * described above is constructed on the fly.  The zone is then created using
58108322fbScarlsonj  * $ZONEPATH/lu as the root.
59108322fbScarlsonj  *
60108322fbScarlsonj  * Note that scratch zones are inactive.  The zone's bits are not running and
61108322fbScarlsonj  * likely cannot be run correctly until upgrade is done.  Init is not running
62108322fbScarlsonj  * there, nor is SMF.  Because of this, the "mounted" state of a scratch zone
63108322fbScarlsonj  * is not a part of the usual halt/ready/boot state machine.
647c478bd9Sstevel@tonic-gate  */
657c478bd9Sstevel@tonic-gate 
667c478bd9Sstevel@tonic-gate #include <sys/param.h>
677c478bd9Sstevel@tonic-gate #include <sys/mount.h>
687c478bd9Sstevel@tonic-gate #include <sys/mntent.h>
697c478bd9Sstevel@tonic-gate #include <sys/socket.h>
707c478bd9Sstevel@tonic-gate #include <sys/utsname.h>
717c478bd9Sstevel@tonic-gate #include <sys/types.h>
727c478bd9Sstevel@tonic-gate #include <sys/stat.h>
737c478bd9Sstevel@tonic-gate #include <sys/sockio.h>
747c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
757c478bd9Sstevel@tonic-gate #include <sys/conf.h>
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate #include <inet/tcp.h>
787c478bd9Sstevel@tonic-gate #include <arpa/inet.h>
797c478bd9Sstevel@tonic-gate #include <netinet/in.h>
807c478bd9Sstevel@tonic-gate #include <net/route.h>
817c478bd9Sstevel@tonic-gate 
827c478bd9Sstevel@tonic-gate #include <stdio.h>
837c478bd9Sstevel@tonic-gate #include <errno.h>
847c478bd9Sstevel@tonic-gate #include <fcntl.h>
857c478bd9Sstevel@tonic-gate #include <unistd.h>
867c478bd9Sstevel@tonic-gate #include <rctl.h>
877c478bd9Sstevel@tonic-gate #include <stdlib.h>
887c478bd9Sstevel@tonic-gate #include <string.h>
897c478bd9Sstevel@tonic-gate #include <strings.h>
907c478bd9Sstevel@tonic-gate #include <wait.h>
917c478bd9Sstevel@tonic-gate #include <limits.h>
927c478bd9Sstevel@tonic-gate #include <libgen.h>
93fa9e4066Sahrens #include <libzfs.h>
94facf4a8dSllai1 #include <libdevinfo.h>
957c478bd9Sstevel@tonic-gate #include <zone.h>
967c478bd9Sstevel@tonic-gate #include <assert.h>
97555afedfScarlsonj #include <libcontract.h>
98555afedfScarlsonj #include <libcontract_priv.h>
99555afedfScarlsonj #include <uuid/uuid.h>
1007c478bd9Sstevel@tonic-gate 
1017c478bd9Sstevel@tonic-gate #include <sys/mntio.h>
1027c478bd9Sstevel@tonic-gate #include <sys/mnttab.h>
1037c478bd9Sstevel@tonic-gate #include <sys/fs/autofs.h>	/* for _autofssys() */
1047c478bd9Sstevel@tonic-gate #include <sys/fs/lofs_info.h>
105fa9e4066Sahrens #include <sys/fs/zfs.h>
1067c478bd9Sstevel@tonic-gate 
1077c478bd9Sstevel@tonic-gate #include <pool.h>
1087c478bd9Sstevel@tonic-gate #include <sys/pool.h>
1097c478bd9Sstevel@tonic-gate 
1107c478bd9Sstevel@tonic-gate #include <libzonecfg.h>
11139d3e169Sevanl #include <synch.h>
11222321485Svp157776 
1137c478bd9Sstevel@tonic-gate #include "zoneadmd.h"
11445916cd2Sjpk #include <tsol/label.h>
11545916cd2Sjpk #include <libtsnet.h>
11645916cd2Sjpk #include <sys/priv.h>
1177c478bd9Sstevel@tonic-gate 
1187c478bd9Sstevel@tonic-gate #define	V4_ADDR_LEN	32
1197c478bd9Sstevel@tonic-gate #define	V6_ADDR_LEN	128
1207c478bd9Sstevel@tonic-gate 
1217c478bd9Sstevel@tonic-gate /* 0755 is the default directory mode. */
1227c478bd9Sstevel@tonic-gate #define	DEFAULT_DIR_MODE \
1237c478bd9Sstevel@tonic-gate 	(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)
1247c478bd9Sstevel@tonic-gate 
1257c478bd9Sstevel@tonic-gate #define	IPD_DEFAULT_OPTS \
1267c478bd9Sstevel@tonic-gate 	MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
1277c478bd9Sstevel@tonic-gate 
1287c478bd9Sstevel@tonic-gate #define	DFSTYPES	"/etc/dfs/fstypes"
12945916cd2Sjpk #define	MAXTNZLEN	2048
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate /*
132facf4a8dSllai1  * This is the set of directories and devices (relative to <zone_root>/dev)
133facf4a8dSllai1  * which must be present in every zone.  Users can augment this list with
134facf4a8dSllai1  * additional device rules in their zone configuration, but at present cannot
135facf4a8dSllai1  * remove any of the this set of standard devices.
1367c478bd9Sstevel@tonic-gate  */
137facf4a8dSllai1 static const char *standard_devs[] = {
138facf4a8dSllai1 	"arp",
139facf4a8dSllai1 	"conslog",
140facf4a8dSllai1 	"cpu/self/cpuid",
141facf4a8dSllai1 	"crypto",
142facf4a8dSllai1 	"cryptoadm",
143facf4a8dSllai1 	"dsk",
14452782930Sszhou 	"dtrace/*",
14552782930Sszhou 	"dtrace/provider/*",
146facf4a8dSllai1 	"fd",
147facf4a8dSllai1 	"kstat",
148facf4a8dSllai1 	"lo0",
149facf4a8dSllai1 	"lo1",
150facf4a8dSllai1 	"lo2",
151facf4a8dSllai1 	"lo3",
152facf4a8dSllai1 	"log",
153facf4a8dSllai1 	"logindmux",
154facf4a8dSllai1 	"null",
155facf4a8dSllai1 #ifdef __sparc
156facf4a8dSllai1 	"openprom",
157facf4a8dSllai1 #endif
158facf4a8dSllai1 	"poll",
159facf4a8dSllai1 	"pool",
160facf4a8dSllai1 	"ptmx",
161facf4a8dSllai1 	"pts/*",
162facf4a8dSllai1 	"random",
163facf4a8dSllai1 	"rdsk",
164facf4a8dSllai1 	"rmt",
165facf4a8dSllai1 	"sad/user",
166facf4a8dSllai1 	"swap",
167facf4a8dSllai1 	"sysevent",
168facf4a8dSllai1 	"tcp",
169facf4a8dSllai1 	"tcp6",
170facf4a8dSllai1 	"term",
171facf4a8dSllai1 	"ticlts",
172facf4a8dSllai1 	"ticots",
173facf4a8dSllai1 	"ticotsord",
174facf4a8dSllai1 	"tty",
175facf4a8dSllai1 	"udp",
176facf4a8dSllai1 	"udp6",
177facf4a8dSllai1 	"urandom",
178facf4a8dSllai1 	"zero",
179facf4a8dSllai1 	"zfs",
180facf4a8dSllai1 	NULL
181facf4a8dSllai1 };
1827c478bd9Sstevel@tonic-gate 
183facf4a8dSllai1 struct source_target {
184facf4a8dSllai1 	const char *source;
185facf4a8dSllai1 	const char *target;
1867c478bd9Sstevel@tonic-gate };
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate /*
189facf4a8dSllai1  * Set of symlinks (relative to <zone_root>/dev) which must be present in
190facf4a8dSllai1  * every zone.
1917c478bd9Sstevel@tonic-gate  */
192facf4a8dSllai1 static struct source_target standard_devlinks[] = {
193facf4a8dSllai1 	{ "stderr",	"./fd/2" },
194facf4a8dSllai1 	{ "stdin",	"./fd/0" },
195facf4a8dSllai1 	{ "stdout",	"./fd/1" },
196facf4a8dSllai1 	{ "dtremote",	"/dev/null" },
197facf4a8dSllai1 	{ "console",	"zconsole" },
198facf4a8dSllai1 	{ "syscon",	"zconsole" },
199facf4a8dSllai1 	{ "sysmsg",	"zconsole" },
200facf4a8dSllai1 	{ "systty",	"zconsole" },
201facf4a8dSllai1 	{ "msglog",	"zconsole" },
202facf4a8dSllai1 	{ NULL, NULL }
2037c478bd9Sstevel@tonic-gate };
2047c478bd9Sstevel@tonic-gate 
205facf4a8dSllai1 static int vplat_mount_dev(zlog_t *);
2067c478bd9Sstevel@tonic-gate 
2077c478bd9Sstevel@tonic-gate /* for routing socket */
2087c478bd9Sstevel@tonic-gate static int rts_seqno = 0;
2097c478bd9Sstevel@tonic-gate 
210108322fbScarlsonj /* mangled zone name when mounting in an alternate root environment */
211108322fbScarlsonj static char kernzone[ZONENAME_MAX];
212108322fbScarlsonj 
213108322fbScarlsonj /* array of cached mount entries for resolve_lofs */
214108322fbScarlsonj static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
215108322fbScarlsonj 
21645916cd2Sjpk /* for Trusted Extensions */
21745916cd2Sjpk static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *);
21845916cd2Sjpk static int tsol_mounts(zlog_t *, char *, char *);
21945916cd2Sjpk static void tsol_unmounts(zlog_t *, char *);
22045916cd2Sjpk static m_label_t *zlabel = NULL;
22145916cd2Sjpk static m_label_t *zid_label = NULL;
22245916cd2Sjpk static priv_set_t *zprivs = NULL;
22345916cd2Sjpk 
2247c478bd9Sstevel@tonic-gate /* from libsocket, not in any header file */
2257c478bd9Sstevel@tonic-gate extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
2267c478bd9Sstevel@tonic-gate 
2277c478bd9Sstevel@tonic-gate /*
228108322fbScarlsonj  * An optimization for build_mnttable: reallocate (and potentially copy the
229108322fbScarlsonj  * data) only once every N times through the loop.
230108322fbScarlsonj  */
231108322fbScarlsonj #define	MNTTAB_HUNK	32
232108322fbScarlsonj 
233108322fbScarlsonj /*
2347c478bd9Sstevel@tonic-gate  * Private autofs system call
2357c478bd9Sstevel@tonic-gate  */
2367c478bd9Sstevel@tonic-gate extern int _autofssys(int, void *);
2377c478bd9Sstevel@tonic-gate 
2387c478bd9Sstevel@tonic-gate static int
2397c478bd9Sstevel@tonic-gate autofs_cleanup(zoneid_t zoneid)
2407c478bd9Sstevel@tonic-gate {
2417c478bd9Sstevel@tonic-gate 	/*
2427c478bd9Sstevel@tonic-gate 	 * Ask autofs to unmount all trigger nodes in the given zone.
2437c478bd9Sstevel@tonic-gate 	 */
2447c478bd9Sstevel@tonic-gate 	return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid));
2457c478bd9Sstevel@tonic-gate }
2467c478bd9Sstevel@tonic-gate 
247108322fbScarlsonj static void
248108322fbScarlsonj free_mnttable(struct mnttab *mnt_array, uint_t nelem)
249108322fbScarlsonj {
250108322fbScarlsonj 	uint_t i;
251108322fbScarlsonj 
252108322fbScarlsonj 	if (mnt_array == NULL)
253108322fbScarlsonj 		return;
254108322fbScarlsonj 	for (i = 0; i < nelem; i++) {
255108322fbScarlsonj 		free(mnt_array[i].mnt_mountp);
256108322fbScarlsonj 		free(mnt_array[i].mnt_fstype);
257108322fbScarlsonj 		free(mnt_array[i].mnt_special);
258108322fbScarlsonj 		free(mnt_array[i].mnt_mntopts);
259108322fbScarlsonj 		assert(mnt_array[i].mnt_time == NULL);
260108322fbScarlsonj 	}
261108322fbScarlsonj 	free(mnt_array);
262108322fbScarlsonj }
263108322fbScarlsonj 
264108322fbScarlsonj /*
265108322fbScarlsonj  * Build the mount table for the zone rooted at "zroot", storing the resulting
266108322fbScarlsonj  * array of struct mnttabs in "mnt_arrayp" and the number of elements in the
267108322fbScarlsonj  * array in "nelemp".
268108322fbScarlsonj  */
269108322fbScarlsonj static int
270108322fbScarlsonj build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab,
271108322fbScarlsonj     struct mnttab **mnt_arrayp, uint_t *nelemp)
272108322fbScarlsonj {
273108322fbScarlsonj 	struct mnttab mnt;
274108322fbScarlsonj 	struct mnttab *mnts;
275108322fbScarlsonj 	struct mnttab *mnp;
276108322fbScarlsonj 	uint_t nmnt;
277108322fbScarlsonj 
278108322fbScarlsonj 	rewind(mnttab);
279108322fbScarlsonj 	resetmnttab(mnttab);
280108322fbScarlsonj 	nmnt = 0;
281108322fbScarlsonj 	mnts = NULL;
282108322fbScarlsonj 	while (getmntent(mnttab, &mnt) == 0) {
283108322fbScarlsonj 		struct mnttab *tmp_array;
284108322fbScarlsonj 
285108322fbScarlsonj 		if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0)
286108322fbScarlsonj 			continue;
287108322fbScarlsonj 		if (nmnt % MNTTAB_HUNK == 0) {
288108322fbScarlsonj 			tmp_array = realloc(mnts,
289108322fbScarlsonj 			    (nmnt + MNTTAB_HUNK) * sizeof (*mnts));
290108322fbScarlsonj 			if (tmp_array == NULL) {
291108322fbScarlsonj 				free_mnttable(mnts, nmnt);
292108322fbScarlsonj 				return (-1);
293108322fbScarlsonj 			}
294108322fbScarlsonj 			mnts = tmp_array;
295108322fbScarlsonj 		}
296108322fbScarlsonj 		mnp = &mnts[nmnt++];
297108322fbScarlsonj 
298108322fbScarlsonj 		/*
299108322fbScarlsonj 		 * Zero out any fields we're not using.
300108322fbScarlsonj 		 */
301108322fbScarlsonj 		(void) memset(mnp, 0, sizeof (*mnp));
302108322fbScarlsonj 
303108322fbScarlsonj 		if (mnt.mnt_special != NULL)
304108322fbScarlsonj 			mnp->mnt_special = strdup(mnt.mnt_special);
305108322fbScarlsonj 		if (mnt.mnt_mntopts != NULL)
306108322fbScarlsonj 			mnp->mnt_mntopts = strdup(mnt.mnt_mntopts);
307108322fbScarlsonj 		mnp->mnt_mountp = strdup(mnt.mnt_mountp);
308108322fbScarlsonj 		mnp->mnt_fstype = strdup(mnt.mnt_fstype);
309108322fbScarlsonj 		if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) ||
310108322fbScarlsonj 		    (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) ||
311108322fbScarlsonj 		    mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) {
312108322fbScarlsonj 			zerror(zlogp, B_TRUE, "memory allocation failed");
313108322fbScarlsonj 			free_mnttable(mnts, nmnt);
314108322fbScarlsonj 			return (-1);
315108322fbScarlsonj 		}
316108322fbScarlsonj 	}
317108322fbScarlsonj 	*mnt_arrayp = mnts;
318108322fbScarlsonj 	*nelemp = nmnt;
319108322fbScarlsonj 	return (0);
320108322fbScarlsonj }
321108322fbScarlsonj 
322108322fbScarlsonj /*
323108322fbScarlsonj  * This is an optimization.  The resolve_lofs function is used quite frequently
324108322fbScarlsonj  * to manipulate file paths, and on a machine with a large number of zones,
325108322fbScarlsonj  * there will be a huge number of mounted file systems.  Thus, we trigger a
326108322fbScarlsonj  * reread of the list of mount points
327108322fbScarlsonj  */
328108322fbScarlsonj static void
329108322fbScarlsonj lofs_discard_mnttab(void)
330108322fbScarlsonj {
331108322fbScarlsonj 	free_mnttable(resolve_lofs_mnts,
332108322fbScarlsonj 	    resolve_lofs_mnt_max - resolve_lofs_mnts);
333108322fbScarlsonj 	resolve_lofs_mnts = resolve_lofs_mnt_max = NULL;
334108322fbScarlsonj }
335108322fbScarlsonj 
336108322fbScarlsonj static int
337108322fbScarlsonj lofs_read_mnttab(zlog_t *zlogp)
338108322fbScarlsonj {
339108322fbScarlsonj 	FILE *mnttab;
340108322fbScarlsonj 	uint_t nmnts;
341108322fbScarlsonj 
342108322fbScarlsonj 	if ((mnttab = fopen(MNTTAB, "r")) == NULL)
343108322fbScarlsonj 		return (-1);
344108322fbScarlsonj 	if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts,
345108322fbScarlsonj 	    &nmnts) == -1) {
346108322fbScarlsonj 		(void) fclose(mnttab);
347108322fbScarlsonj 		return (-1);
348108322fbScarlsonj 	}
349108322fbScarlsonj 	(void) fclose(mnttab);
350108322fbScarlsonj 	resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts;
351108322fbScarlsonj 	return (0);
352108322fbScarlsonj }
353108322fbScarlsonj 
354108322fbScarlsonj /*
355108322fbScarlsonj  * This function loops over potential loopback mounts and symlinks in a given
356108322fbScarlsonj  * path and resolves them all down to an absolute path.
357108322fbScarlsonj  */
358108322fbScarlsonj static void
359108322fbScarlsonj resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen)
360108322fbScarlsonj {
361108322fbScarlsonj 	int len, arlen;
362108322fbScarlsonj 	const char *altroot;
363108322fbScarlsonj 	char tmppath[MAXPATHLEN];
364108322fbScarlsonj 	boolean_t outside_altroot;
365108322fbScarlsonj 
366108322fbScarlsonj 	if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1)
367108322fbScarlsonj 		return;
368108322fbScarlsonj 	tmppath[len] = '\0';
369108322fbScarlsonj 	(void) strlcpy(path, tmppath, sizeof (tmppath));
370108322fbScarlsonj 
371108322fbScarlsonj 	/* This happens once per zoneadmd operation. */
372108322fbScarlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
373108322fbScarlsonj 		return;
374108322fbScarlsonj 
375108322fbScarlsonj 	altroot = zonecfg_get_root();
376108322fbScarlsonj 	arlen = strlen(altroot);
377108322fbScarlsonj 	outside_altroot = B_FALSE;
378108322fbScarlsonj 	for (;;) {
379108322fbScarlsonj 		struct mnttab *mnp;
380108322fbScarlsonj 
381108322fbScarlsonj 		for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
382108322fbScarlsonj 		    mnp++) {
383108322fbScarlsonj 			if (mnp->mnt_fstype == NULL ||
384108322fbScarlsonj 			    mnp->mnt_mountp == NULL ||
385108322fbScarlsonj 			    mnp->mnt_special == NULL ||
386108322fbScarlsonj 			    strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0)
387108322fbScarlsonj 				continue;
388108322fbScarlsonj 			len = strlen(mnp->mnt_mountp);
389108322fbScarlsonj 			if (strncmp(mnp->mnt_mountp, path, len) == 0 &&
390108322fbScarlsonj 			    (path[len] == '/' || path[len] == '\0'))
391108322fbScarlsonj 				break;
392108322fbScarlsonj 		}
393108322fbScarlsonj 		if (mnp >= resolve_lofs_mnt_max)
394108322fbScarlsonj 			break;
395108322fbScarlsonj 		if (outside_altroot) {
396108322fbScarlsonj 			char *cp;
397108322fbScarlsonj 			int olen = sizeof (MNTOPT_RO) - 1;
398108322fbScarlsonj 
399108322fbScarlsonj 			/*
400108322fbScarlsonj 			 * If we run into a read-only mount outside of the
401108322fbScarlsonj 			 * alternate root environment, then the user doesn't
402108322fbScarlsonj 			 * want this path to be made read-write.
403108322fbScarlsonj 			 */
404108322fbScarlsonj 			if (mnp->mnt_mntopts != NULL &&
405108322fbScarlsonj 			    (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) !=
406108322fbScarlsonj 			    NULL &&
407108322fbScarlsonj 			    (cp == mnp->mnt_mntopts || cp[-1] == ',') &&
408108322fbScarlsonj 			    (cp[olen] == '\0' || cp[olen] == ',')) {
409108322fbScarlsonj 				break;
410108322fbScarlsonj 			}
411108322fbScarlsonj 		} else if (arlen > 0 &&
412108322fbScarlsonj 		    (strncmp(mnp->mnt_special, altroot, arlen) != 0 ||
413108322fbScarlsonj 		    (mnp->mnt_special[arlen] != '\0' &&
414108322fbScarlsonj 		    mnp->mnt_special[arlen] != '/'))) {
415108322fbScarlsonj 			outside_altroot = B_TRUE;
416108322fbScarlsonj 		}
417108322fbScarlsonj 		/* use temporary buffer because new path might be longer */
418108322fbScarlsonj 		(void) snprintf(tmppath, sizeof (tmppath), "%s%s",
419108322fbScarlsonj 		    mnp->mnt_special, path + len);
420108322fbScarlsonj 		if ((len = resolvepath(tmppath, path, pathlen)) == -1)
421108322fbScarlsonj 			break;
422108322fbScarlsonj 		path[len] = '\0';
423108322fbScarlsonj 	}
424108322fbScarlsonj }
425108322fbScarlsonj 
426108322fbScarlsonj /*
427108322fbScarlsonj  * For a regular mount, check if a replacement lofs mount is needed because the
428108322fbScarlsonj  * referenced device is already mounted somewhere.
429108322fbScarlsonj  */
430108322fbScarlsonj static int
431108322fbScarlsonj check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr)
432108322fbScarlsonj {
433108322fbScarlsonj 	struct mnttab *mnp;
434108322fbScarlsonj 	zone_fsopt_t *optptr, *onext;
435108322fbScarlsonj 
436108322fbScarlsonj 	/* This happens once per zoneadmd operation. */
437108322fbScarlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
438108322fbScarlsonj 		return (-1);
439108322fbScarlsonj 
440108322fbScarlsonj 	/*
441108322fbScarlsonj 	 * If this special node isn't already in use, then it's ours alone;
442108322fbScarlsonj 	 * no need to worry about conflicting mounts.
443108322fbScarlsonj 	 */
444108322fbScarlsonj 	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
445108322fbScarlsonj 	    mnp++) {
446108322fbScarlsonj 		if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0)
447108322fbScarlsonj 			break;
448108322fbScarlsonj 	}
449108322fbScarlsonj 	if (mnp >= resolve_lofs_mnt_max)
450108322fbScarlsonj 		return (0);
451108322fbScarlsonj 
452108322fbScarlsonj 	/*
453108322fbScarlsonj 	 * Convert this duplicate mount into a lofs mount.
454108322fbScarlsonj 	 */
455108322fbScarlsonj 	(void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp,
456108322fbScarlsonj 	    sizeof (fsptr->zone_fs_special));
457108322fbScarlsonj 	(void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS,
458108322fbScarlsonj 	    sizeof (fsptr->zone_fs_type));
459108322fbScarlsonj 	fsptr->zone_fs_raw[0] = '\0';
460108322fbScarlsonj 
461108322fbScarlsonj 	/*
462108322fbScarlsonj 	 * Discard all but one of the original options and set that to be the
463108322fbScarlsonj 	 * same set of options used for inherit package directory resources.
464108322fbScarlsonj 	 */
465108322fbScarlsonj 	optptr = fsptr->zone_fs_options;
466108322fbScarlsonj 	if (optptr == NULL) {
467108322fbScarlsonj 		optptr = malloc(sizeof (*optptr));
468108322fbScarlsonj 		if (optptr == NULL) {
469108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s",
470108322fbScarlsonj 			    fsptr->zone_fs_dir);
471108322fbScarlsonj 			return (-1);
472108322fbScarlsonj 		}
473108322fbScarlsonj 	} else {
474108322fbScarlsonj 		while ((onext = optptr->zone_fsopt_next) != NULL) {
475108322fbScarlsonj 			optptr->zone_fsopt_next = onext->zone_fsopt_next;
476108322fbScarlsonj 			free(onext);
477108322fbScarlsonj 		}
478108322fbScarlsonj 	}
479108322fbScarlsonj 	(void) strcpy(optptr->zone_fsopt_opt, IPD_DEFAULT_OPTS);
480108322fbScarlsonj 	optptr->zone_fsopt_next = NULL;
481108322fbScarlsonj 	fsptr->zone_fs_options = optptr;
482108322fbScarlsonj 	return (0);
483108322fbScarlsonj }
484108322fbScarlsonj 
4857c478bd9Sstevel@tonic-gate static int
4867c478bd9Sstevel@tonic-gate make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode)
4877c478bd9Sstevel@tonic-gate {
4887c478bd9Sstevel@tonic-gate 	char path[MAXPATHLEN];
4897c478bd9Sstevel@tonic-gate 	struct stat st;
4907c478bd9Sstevel@tonic-gate 
4917c478bd9Sstevel@tonic-gate 	if (snprintf(path, sizeof (path), "%s%s", prefix, subdir) >
4927c478bd9Sstevel@tonic-gate 	    sizeof (path)) {
4937c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "pathname %s%s is too long", prefix,
4947c478bd9Sstevel@tonic-gate 		    subdir);
4957c478bd9Sstevel@tonic-gate 		return (-1);
4967c478bd9Sstevel@tonic-gate 	}
4977c478bd9Sstevel@tonic-gate 
4987c478bd9Sstevel@tonic-gate 	if (lstat(path, &st) == 0) {
4997c478bd9Sstevel@tonic-gate 		/*
5007c478bd9Sstevel@tonic-gate 		 * We don't check the file mode since presumably the zone
5017c478bd9Sstevel@tonic-gate 		 * administrator may have had good reason to change the mode,
5027c478bd9Sstevel@tonic-gate 		 * and we don't need to second guess him.
5037c478bd9Sstevel@tonic-gate 		 */
5047c478bd9Sstevel@tonic-gate 		if (!S_ISDIR(st.st_mode)) {
50545916cd2Sjpk 			if (is_system_labeled() &&
50645916cd2Sjpk 			    S_ISREG(st.st_mode)) {
50745916cd2Sjpk 				/*
50845916cd2Sjpk 				 * The need to mount readonly copies of
50945916cd2Sjpk 				 * global zone /etc/ files is unique to
51045916cd2Sjpk 				 * Trusted Extensions.
51145916cd2Sjpk 				 */
51245916cd2Sjpk 				if (strncmp(subdir, "/etc/",
51345916cd2Sjpk 				    strlen("/etc/")) != 0) {
51445916cd2Sjpk 					zerror(zlogp, B_FALSE,
51545916cd2Sjpk 					    "%s is not in /etc", path);
5167c478bd9Sstevel@tonic-gate 					return (-1);
5177c478bd9Sstevel@tonic-gate 				}
51845916cd2Sjpk 			} else {
51945916cd2Sjpk 				zerror(zlogp, B_FALSE,
52045916cd2Sjpk 				    "%s is not a directory", path);
52145916cd2Sjpk 				return (-1);
52245916cd2Sjpk 			}
52345916cd2Sjpk 		}
5247c478bd9Sstevel@tonic-gate 	} else if (mkdirp(path, mode) != 0) {
5257c478bd9Sstevel@tonic-gate 		if (errno == EROFS)
5267c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "Could not mkdir %s.\nIt is on "
5277c478bd9Sstevel@tonic-gate 			    "a read-only file system in this local zone.\nMake "
5287c478bd9Sstevel@tonic-gate 			    "sure %s exists in the global zone.", path, subdir);
5297c478bd9Sstevel@tonic-gate 		else
5307c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "mkdirp of %s failed", path);
5317c478bd9Sstevel@tonic-gate 		return (-1);
5327c478bd9Sstevel@tonic-gate 	}
5337c478bd9Sstevel@tonic-gate 	return (0);
5347c478bd9Sstevel@tonic-gate }
5357c478bd9Sstevel@tonic-gate 
5367c478bd9Sstevel@tonic-gate static void
5377c478bd9Sstevel@tonic-gate free_remote_fstypes(char **types)
5387c478bd9Sstevel@tonic-gate {
5397c478bd9Sstevel@tonic-gate 	uint_t i;
5407c478bd9Sstevel@tonic-gate 
5417c478bd9Sstevel@tonic-gate 	if (types == NULL)
5427c478bd9Sstevel@tonic-gate 		return;
5437c478bd9Sstevel@tonic-gate 	for (i = 0; types[i] != NULL; i++)
5447c478bd9Sstevel@tonic-gate 		free(types[i]);
5457c478bd9Sstevel@tonic-gate 	free(types);
5467c478bd9Sstevel@tonic-gate }
5477c478bd9Sstevel@tonic-gate 
5487c478bd9Sstevel@tonic-gate static char **
5497c478bd9Sstevel@tonic-gate get_remote_fstypes(zlog_t *zlogp)
5507c478bd9Sstevel@tonic-gate {
5517c478bd9Sstevel@tonic-gate 	char **types = NULL;
5527c478bd9Sstevel@tonic-gate 	FILE *fp;
5537c478bd9Sstevel@tonic-gate 	char buf[MAXPATHLEN];
5547c478bd9Sstevel@tonic-gate 	char fstype[MAXPATHLEN];
5557c478bd9Sstevel@tonic-gate 	uint_t lines = 0;
5567c478bd9Sstevel@tonic-gate 	uint_t i;
5577c478bd9Sstevel@tonic-gate 
5587c478bd9Sstevel@tonic-gate 	if ((fp = fopen(DFSTYPES, "r")) == NULL) {
5597c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "failed to open %s", DFSTYPES);
5607c478bd9Sstevel@tonic-gate 		return (NULL);
5617c478bd9Sstevel@tonic-gate 	}
5627c478bd9Sstevel@tonic-gate 	/*
5637c478bd9Sstevel@tonic-gate 	 * Count the number of lines
5647c478bd9Sstevel@tonic-gate 	 */
5657c478bd9Sstevel@tonic-gate 	while (fgets(buf, sizeof (buf), fp) != NULL)
5667c478bd9Sstevel@tonic-gate 		lines++;
5677c478bd9Sstevel@tonic-gate 	if (lines == 0)	/* didn't read anything; empty file */
5687c478bd9Sstevel@tonic-gate 		goto out;
5697c478bd9Sstevel@tonic-gate 	rewind(fp);
5707c478bd9Sstevel@tonic-gate 	/*
5717c478bd9Sstevel@tonic-gate 	 * Allocate enough space for a NULL-terminated array.
5727c478bd9Sstevel@tonic-gate 	 */
5737c478bd9Sstevel@tonic-gate 	types = calloc(lines + 1, sizeof (char *));
5747c478bd9Sstevel@tonic-gate 	if (types == NULL) {
5757c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
5767c478bd9Sstevel@tonic-gate 		goto out;
5777c478bd9Sstevel@tonic-gate 	}
5787c478bd9Sstevel@tonic-gate 	i = 0;
5797c478bd9Sstevel@tonic-gate 	while (fgets(buf, sizeof (buf), fp) != NULL) {
5807c478bd9Sstevel@tonic-gate 		/* LINTED - fstype is big enough to hold buf */
5817c478bd9Sstevel@tonic-gate 		if (sscanf(buf, "%s", fstype) == 0) {
5827c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "unable to parse %s", DFSTYPES);
5837c478bd9Sstevel@tonic-gate 			free_remote_fstypes(types);
5847c478bd9Sstevel@tonic-gate 			types = NULL;
5857c478bd9Sstevel@tonic-gate 			goto out;
5867c478bd9Sstevel@tonic-gate 		}
5877c478bd9Sstevel@tonic-gate 		types[i] = strdup(fstype);
5887c478bd9Sstevel@tonic-gate 		if (types[i] == NULL) {
5897c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "memory allocation failed");
5907c478bd9Sstevel@tonic-gate 			free_remote_fstypes(types);
5917c478bd9Sstevel@tonic-gate 			types = NULL;
5927c478bd9Sstevel@tonic-gate 			goto out;
5937c478bd9Sstevel@tonic-gate 		}
5947c478bd9Sstevel@tonic-gate 		i++;
5957c478bd9Sstevel@tonic-gate 	}
5967c478bd9Sstevel@tonic-gate out:
5977c478bd9Sstevel@tonic-gate 	(void) fclose(fp);
5987c478bd9Sstevel@tonic-gate 	return (types);
5997c478bd9Sstevel@tonic-gate }
6007c478bd9Sstevel@tonic-gate 
6017c478bd9Sstevel@tonic-gate static boolean_t
6027c478bd9Sstevel@tonic-gate is_remote_fstype(const char *fstype, char *const *remote_fstypes)
6037c478bd9Sstevel@tonic-gate {
6047c478bd9Sstevel@tonic-gate 	uint_t i;
6057c478bd9Sstevel@tonic-gate 
6067c478bd9Sstevel@tonic-gate 	if (remote_fstypes == NULL)
6077c478bd9Sstevel@tonic-gate 		return (B_FALSE);
6087c478bd9Sstevel@tonic-gate 	for (i = 0; remote_fstypes[i] != NULL; i++) {
6097c478bd9Sstevel@tonic-gate 		if (strcmp(remote_fstypes[i], fstype) == 0)
6107c478bd9Sstevel@tonic-gate 			return (B_TRUE);
6117c478bd9Sstevel@tonic-gate 	}
6127c478bd9Sstevel@tonic-gate 	return (B_FALSE);
6137c478bd9Sstevel@tonic-gate }
6147c478bd9Sstevel@tonic-gate 
615108322fbScarlsonj /*
616108322fbScarlsonj  * This converts a zone root path (normally of the form .../root) to a Live
617108322fbScarlsonj  * Upgrade scratch zone root (of the form .../lu).
618108322fbScarlsonj  */
6197c478bd9Sstevel@tonic-gate static void
620108322fbScarlsonj root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
6217c478bd9Sstevel@tonic-gate {
622108322fbScarlsonj 	if (!isresolved && zonecfg_in_alt_root())
623108322fbScarlsonj 		resolve_lofs(zlogp, zroot, zrootlen);
624108322fbScarlsonj 	(void) strcpy(strrchr(zroot, '/') + 1, "lu");
6257c478bd9Sstevel@tonic-gate }
6267c478bd9Sstevel@tonic-gate 
6277c478bd9Sstevel@tonic-gate /*
6287c478bd9Sstevel@tonic-gate  * The general strategy for unmounting filesystems is as follows:
6297c478bd9Sstevel@tonic-gate  *
6307c478bd9Sstevel@tonic-gate  * - Remote filesystems may be dead, and attempting to contact them as
6317c478bd9Sstevel@tonic-gate  * part of a regular unmount may hang forever; we want to always try to
6327c478bd9Sstevel@tonic-gate  * forcibly unmount such filesystems and only fall back to regular
6337c478bd9Sstevel@tonic-gate  * unmounts if the filesystem doesn't support forced unmounts.
6347c478bd9Sstevel@tonic-gate  *
6357c478bd9Sstevel@tonic-gate  * - We don't want to unnecessarily corrupt metadata on local
6367c478bd9Sstevel@tonic-gate  * filesystems (ie UFS), so we want to start off with graceful unmounts,
6377c478bd9Sstevel@tonic-gate  * and only escalate to doing forced unmounts if we get stuck.
6387c478bd9Sstevel@tonic-gate  *
6397c478bd9Sstevel@tonic-gate  * We start off walking backwards through the mount table.  This doesn't
6407c478bd9Sstevel@tonic-gate  * give us strict ordering but ensures that we try to unmount submounts
6417c478bd9Sstevel@tonic-gate  * first.  We thus limit the number of failed umount2(2) calls.
6427c478bd9Sstevel@tonic-gate  *
6437c478bd9Sstevel@tonic-gate  * The mechanism for determining if we're stuck is to count the number
6447c478bd9Sstevel@tonic-gate  * of failed unmounts each iteration through the mount table.  This
6457c478bd9Sstevel@tonic-gate  * gives us an upper bound on the number of filesystems which remain
6467c478bd9Sstevel@tonic-gate  * mounted (autofs trigger nodes are dealt with separately).  If at the
6477c478bd9Sstevel@tonic-gate  * end of one unmount+autofs_cleanup cycle we still have the same number
6487c478bd9Sstevel@tonic-gate  * of mounts that we started out with, we're stuck and try a forced
6497c478bd9Sstevel@tonic-gate  * unmount.  If that fails (filesystem doesn't support forced unmounts)
6507c478bd9Sstevel@tonic-gate  * then we bail and are unable to teardown the zone.  If it succeeds,
6517c478bd9Sstevel@tonic-gate  * we're no longer stuck so we continue with our policy of trying
6527c478bd9Sstevel@tonic-gate  * graceful mounts first.
6537c478bd9Sstevel@tonic-gate  *
6547c478bd9Sstevel@tonic-gate  * Zone must be down (ie, no processes or threads active).
6557c478bd9Sstevel@tonic-gate  */
6567c478bd9Sstevel@tonic-gate static int
657108322fbScarlsonj unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
6587c478bd9Sstevel@tonic-gate {
6597c478bd9Sstevel@tonic-gate 	int error = 0;
6607c478bd9Sstevel@tonic-gate 	FILE *mnttab;
6617c478bd9Sstevel@tonic-gate 	struct mnttab *mnts;
6627c478bd9Sstevel@tonic-gate 	uint_t nmnt;
6637c478bd9Sstevel@tonic-gate 	char zroot[MAXPATHLEN + 1];
6647c478bd9Sstevel@tonic-gate 	size_t zrootlen;
6657c478bd9Sstevel@tonic-gate 	uint_t oldcount = UINT_MAX;
6667c478bd9Sstevel@tonic-gate 	boolean_t stuck = B_FALSE;
6677c478bd9Sstevel@tonic-gate 	char **remote_fstypes = NULL;
6687c478bd9Sstevel@tonic-gate 
6697c478bd9Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
6707c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "unable to determine zone root");
6717c478bd9Sstevel@tonic-gate 		return (-1);
6727c478bd9Sstevel@tonic-gate 	}
673108322fbScarlsonj 	if (unmount_cmd)
674108322fbScarlsonj 		root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
6757c478bd9Sstevel@tonic-gate 
6767c478bd9Sstevel@tonic-gate 	(void) strcat(zroot, "/");
6777c478bd9Sstevel@tonic-gate 	zrootlen = strlen(zroot);
6787c478bd9Sstevel@tonic-gate 
67945916cd2Sjpk 	/*
68045916cd2Sjpk 	 * For Trusted Extensions unmount each higher level zone's mount
68145916cd2Sjpk 	 * of our zone's /export/home
68245916cd2Sjpk 	 */
68348451833Scarlsonj 	if (!unmount_cmd)
68445916cd2Sjpk 		tsol_unmounts(zlogp, zone_name);
68545916cd2Sjpk 
6867c478bd9Sstevel@tonic-gate 	if ((mnttab = fopen(MNTTAB, "r")) == NULL) {
6877c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "failed to open %s", MNTTAB);
6887c478bd9Sstevel@tonic-gate 		return (-1);
6897c478bd9Sstevel@tonic-gate 	}
6907c478bd9Sstevel@tonic-gate 	/*
6917c478bd9Sstevel@tonic-gate 	 * Use our hacky mntfs ioctl so we see everything, even mounts with
6927c478bd9Sstevel@tonic-gate 	 * MS_NOMNTTAB.
6937c478bd9Sstevel@tonic-gate 	 */
6947c478bd9Sstevel@tonic-gate 	if (ioctl(fileno(mnttab), MNTIOC_SHOWHIDDEN, NULL) < 0) {
6957c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to configure %s", MNTTAB);
6967c478bd9Sstevel@tonic-gate 		error++;
6977c478bd9Sstevel@tonic-gate 		goto out;
6987c478bd9Sstevel@tonic-gate 	}
6997c478bd9Sstevel@tonic-gate 
7007c478bd9Sstevel@tonic-gate 	/*
7017c478bd9Sstevel@tonic-gate 	 * Build the list of remote fstypes so we know which ones we
7027c478bd9Sstevel@tonic-gate 	 * should forcibly unmount.
7037c478bd9Sstevel@tonic-gate 	 */
7047c478bd9Sstevel@tonic-gate 	remote_fstypes = get_remote_fstypes(zlogp);
7057c478bd9Sstevel@tonic-gate 	for (; /* ever */; ) {
7067c478bd9Sstevel@tonic-gate 		uint_t newcount = 0;
7077c478bd9Sstevel@tonic-gate 		boolean_t unmounted;
7087c478bd9Sstevel@tonic-gate 		struct mnttab *mnp;
7097c478bd9Sstevel@tonic-gate 		char *path;
7107c478bd9Sstevel@tonic-gate 		uint_t i;
7117c478bd9Sstevel@tonic-gate 
7127c478bd9Sstevel@tonic-gate 		mnts = NULL;
7137c478bd9Sstevel@tonic-gate 		nmnt = 0;
7147c478bd9Sstevel@tonic-gate 		/*
7157c478bd9Sstevel@tonic-gate 		 * MNTTAB gives us a way to walk through mounted
7167c478bd9Sstevel@tonic-gate 		 * filesystems; we need to be able to walk them in
7177c478bd9Sstevel@tonic-gate 		 * reverse order, so we build a list of all mounted
7187c478bd9Sstevel@tonic-gate 		 * filesystems.
7197c478bd9Sstevel@tonic-gate 		 */
7207c478bd9Sstevel@tonic-gate 		if (build_mnttable(zlogp, zroot, zrootlen, mnttab, &mnts,
7217c478bd9Sstevel@tonic-gate 		    &nmnt) != 0) {
7227c478bd9Sstevel@tonic-gate 			error++;
7237c478bd9Sstevel@tonic-gate 			goto out;
7247c478bd9Sstevel@tonic-gate 		}
7257c478bd9Sstevel@tonic-gate 		for (i = 0; i < nmnt; i++) {
7267c478bd9Sstevel@tonic-gate 			mnp = &mnts[nmnt - i - 1]; /* access in reverse order */
7277c478bd9Sstevel@tonic-gate 			path = mnp->mnt_mountp;
7287c478bd9Sstevel@tonic-gate 			unmounted = B_FALSE;
7297c478bd9Sstevel@tonic-gate 			/*
7307c478bd9Sstevel@tonic-gate 			 * Try forced unmount first for remote filesystems.
7317c478bd9Sstevel@tonic-gate 			 *
7327c478bd9Sstevel@tonic-gate 			 * Not all remote filesystems support forced unmounts,
7337c478bd9Sstevel@tonic-gate 			 * so if this fails (ENOTSUP) we'll continue on
7347c478bd9Sstevel@tonic-gate 			 * and try a regular unmount.
7357c478bd9Sstevel@tonic-gate 			 */
7367c478bd9Sstevel@tonic-gate 			if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) {
7377c478bd9Sstevel@tonic-gate 				if (umount2(path, MS_FORCE) == 0)
7387c478bd9Sstevel@tonic-gate 					unmounted = B_TRUE;
7397c478bd9Sstevel@tonic-gate 			}
7407c478bd9Sstevel@tonic-gate 			/*
7417c478bd9Sstevel@tonic-gate 			 * Try forced unmount if we're stuck.
7427c478bd9Sstevel@tonic-gate 			 */
7437c478bd9Sstevel@tonic-gate 			if (stuck) {
7447c478bd9Sstevel@tonic-gate 				if (umount2(path, MS_FORCE) == 0) {
7457c478bd9Sstevel@tonic-gate 					unmounted = B_TRUE;
7467c478bd9Sstevel@tonic-gate 					stuck = B_FALSE;
7477c478bd9Sstevel@tonic-gate 				} else {
7487c478bd9Sstevel@tonic-gate 					/*
7497c478bd9Sstevel@tonic-gate 					 * The first failure indicates a
7507c478bd9Sstevel@tonic-gate 					 * mount we won't be able to get
7517c478bd9Sstevel@tonic-gate 					 * rid of automatically, so we
7527c478bd9Sstevel@tonic-gate 					 * bail.
7537c478bd9Sstevel@tonic-gate 					 */
7547c478bd9Sstevel@tonic-gate 					error++;
7557c478bd9Sstevel@tonic-gate 					zerror(zlogp, B_FALSE,
7567c478bd9Sstevel@tonic-gate 					    "unable to unmount '%s'", path);
7577c478bd9Sstevel@tonic-gate 					free_mnttable(mnts, nmnt);
7587c478bd9Sstevel@tonic-gate 					goto out;
7597c478bd9Sstevel@tonic-gate 				}
7607c478bd9Sstevel@tonic-gate 			}
7617c478bd9Sstevel@tonic-gate 			/*
7627c478bd9Sstevel@tonic-gate 			 * Try regular unmounts for everything else.
7637c478bd9Sstevel@tonic-gate 			 */
7647c478bd9Sstevel@tonic-gate 			if (!unmounted && umount2(path, 0) != 0)
7657c478bd9Sstevel@tonic-gate 				newcount++;
7667c478bd9Sstevel@tonic-gate 		}
7677c478bd9Sstevel@tonic-gate 		free_mnttable(mnts, nmnt);
7687c478bd9Sstevel@tonic-gate 
7697c478bd9Sstevel@tonic-gate 		if (newcount == 0)
7707c478bd9Sstevel@tonic-gate 			break;
7717c478bd9Sstevel@tonic-gate 		if (newcount >= oldcount) {
7727c478bd9Sstevel@tonic-gate 			/*
7737c478bd9Sstevel@tonic-gate 			 * Last round didn't unmount anything; we're stuck and
7747c478bd9Sstevel@tonic-gate 			 * should start trying forced unmounts.
7757c478bd9Sstevel@tonic-gate 			 */
7767c478bd9Sstevel@tonic-gate 			stuck = B_TRUE;
7777c478bd9Sstevel@tonic-gate 		}
7787c478bd9Sstevel@tonic-gate 		oldcount = newcount;
7797c478bd9Sstevel@tonic-gate 
7807c478bd9Sstevel@tonic-gate 		/*
7817c478bd9Sstevel@tonic-gate 		 * Autofs doesn't let you unmount its trigger nodes from
7827c478bd9Sstevel@tonic-gate 		 * userland so we have to tell the kernel to cleanup for us.
7837c478bd9Sstevel@tonic-gate 		 */
7847c478bd9Sstevel@tonic-gate 		if (autofs_cleanup(zoneid) != 0) {
7857c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "unable to remove autofs nodes");
7867c478bd9Sstevel@tonic-gate 			error++;
7877c478bd9Sstevel@tonic-gate 			goto out;
7887c478bd9Sstevel@tonic-gate 		}
7897c478bd9Sstevel@tonic-gate 	}
7907c478bd9Sstevel@tonic-gate 
7917c478bd9Sstevel@tonic-gate out:
7927c478bd9Sstevel@tonic-gate 	free_remote_fstypes(remote_fstypes);
7937c478bd9Sstevel@tonic-gate 	(void) fclose(mnttab);
7947c478bd9Sstevel@tonic-gate 	return (error ? -1 : 0);
7957c478bd9Sstevel@tonic-gate }
7967c478bd9Sstevel@tonic-gate 
7977c478bd9Sstevel@tonic-gate static int
7987c478bd9Sstevel@tonic-gate fs_compare(const void *m1, const void *m2)
7997c478bd9Sstevel@tonic-gate {
8007c478bd9Sstevel@tonic-gate 	struct zone_fstab *i = (struct zone_fstab *)m1;
8017c478bd9Sstevel@tonic-gate 	struct zone_fstab *j = (struct zone_fstab *)m2;
8027c478bd9Sstevel@tonic-gate 
8037c478bd9Sstevel@tonic-gate 	return (strcmp(i->zone_fs_dir, j->zone_fs_dir));
8047c478bd9Sstevel@tonic-gate }
8057c478bd9Sstevel@tonic-gate 
8067c478bd9Sstevel@tonic-gate /*
8077c478bd9Sstevel@tonic-gate  * Fork and exec (and wait for) the mentioned binary with the provided
8087c478bd9Sstevel@tonic-gate  * arguments.  Returns (-1) if something went wrong with fork(2) or exec(2),
8097c478bd9Sstevel@tonic-gate  * returns the exit status otherwise.
8107c478bd9Sstevel@tonic-gate  *
8117c478bd9Sstevel@tonic-gate  * If we were unable to exec the provided pathname (for whatever
8127c478bd9Sstevel@tonic-gate  * reason), we return the special token ZEXIT_EXEC.  The current value
8137c478bd9Sstevel@tonic-gate  * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the
8147c478bd9Sstevel@tonic-gate  * consumers of this function; any future consumers must make sure this
8157c478bd9Sstevel@tonic-gate  * remains the case.
8167c478bd9Sstevel@tonic-gate  */
8177c478bd9Sstevel@tonic-gate static int
8187c478bd9Sstevel@tonic-gate forkexec(zlog_t *zlogp, const char *path, char *const argv[])
8197c478bd9Sstevel@tonic-gate {
8207c478bd9Sstevel@tonic-gate 	pid_t child_pid;
8217c478bd9Sstevel@tonic-gate 	int child_status = 0;
8227c478bd9Sstevel@tonic-gate 
8237c478bd9Sstevel@tonic-gate 	/*
8247c478bd9Sstevel@tonic-gate 	 * Do not let another thread localize a message while we are forking.
8257c478bd9Sstevel@tonic-gate 	 */
8267c478bd9Sstevel@tonic-gate 	(void) mutex_lock(&msglock);
8277c478bd9Sstevel@tonic-gate 	child_pid = fork();
8287c478bd9Sstevel@tonic-gate 	(void) mutex_unlock(&msglock);
8297c478bd9Sstevel@tonic-gate 	if (child_pid == -1) {
8307c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not fork for %s", argv[0]);
8317c478bd9Sstevel@tonic-gate 		return (-1);
8327c478bd9Sstevel@tonic-gate 	} else if (child_pid == 0) {
8337c478bd9Sstevel@tonic-gate 		closefrom(0);
8341390a385Sgjelinek 		/* redirect stdin, stdout & stderr to /dev/null */
8351390a385Sgjelinek 		(void) open("/dev/null", O_RDONLY);	/* stdin */
8361390a385Sgjelinek 		(void) open("/dev/null", O_WRONLY);	/* stdout */
8371390a385Sgjelinek 		(void) open("/dev/null", O_WRONLY);	/* stderr */
8387c478bd9Sstevel@tonic-gate 		(void) execv(path, argv);
8397c478bd9Sstevel@tonic-gate 		/*
8407c478bd9Sstevel@tonic-gate 		 * Since we are in the child, there is no point calling zerror()
8417c478bd9Sstevel@tonic-gate 		 * since there is nobody waiting to consume it.  So exit with a
8427c478bd9Sstevel@tonic-gate 		 * special code that the parent will recognize and call zerror()
8437c478bd9Sstevel@tonic-gate 		 * accordingly.
8447c478bd9Sstevel@tonic-gate 		 */
8457c478bd9Sstevel@tonic-gate 
8467c478bd9Sstevel@tonic-gate 		_exit(ZEXIT_EXEC);
8477c478bd9Sstevel@tonic-gate 	} else {
8487c478bd9Sstevel@tonic-gate 		(void) waitpid(child_pid, &child_status, 0);
8497c478bd9Sstevel@tonic-gate 	}
8507c478bd9Sstevel@tonic-gate 
8517c478bd9Sstevel@tonic-gate 	if (WIFSIGNALED(child_status)) {
8527c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
8537c478bd9Sstevel@tonic-gate 		    "signal %d", path, WTERMSIG(child_status));
8547c478bd9Sstevel@tonic-gate 		return (-1);
8557c478bd9Sstevel@tonic-gate 	}
8567c478bd9Sstevel@tonic-gate 	assert(WIFEXITED(child_status));
8577c478bd9Sstevel@tonic-gate 	if (WEXITSTATUS(child_status) == ZEXIT_EXEC) {
8587c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "failed to exec %s", path);
8597c478bd9Sstevel@tonic-gate 		return (-1);
8607c478bd9Sstevel@tonic-gate 	}
8617c478bd9Sstevel@tonic-gate 	return (WEXITSTATUS(child_status));
8627c478bd9Sstevel@tonic-gate }
8637c478bd9Sstevel@tonic-gate 
8647c478bd9Sstevel@tonic-gate static int
8657c478bd9Sstevel@tonic-gate dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev)
8667c478bd9Sstevel@tonic-gate {
8677c478bd9Sstevel@tonic-gate 	char cmdbuf[MAXPATHLEN];
8687c478bd9Sstevel@tonic-gate 	char *argv[4];
8697c478bd9Sstevel@tonic-gate 	int status;
8707c478bd9Sstevel@tonic-gate 
8717c478bd9Sstevel@tonic-gate 	/*
8727c478bd9Sstevel@tonic-gate 	 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but
8737c478bd9Sstevel@tonic-gate 	 * that would cost us an extra fork/exec without buying us anything.
8747c478bd9Sstevel@tonic-gate 	 */
8757c478bd9Sstevel@tonic-gate 	if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype)
8767c478bd9Sstevel@tonic-gate 	    > sizeof (cmdbuf)) {
8777c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "file-system type %s too long", fstype);
8787c478bd9Sstevel@tonic-gate 		return (-1);
8797c478bd9Sstevel@tonic-gate 	}
8807c478bd9Sstevel@tonic-gate 
8817c478bd9Sstevel@tonic-gate 	argv[0] = "fsck";
8827c478bd9Sstevel@tonic-gate 	argv[1] = "-m";
8837c478bd9Sstevel@tonic-gate 	argv[2] = (char *)rawdev;
8847c478bd9Sstevel@tonic-gate 	argv[3] = NULL;
8857c478bd9Sstevel@tonic-gate 
8867c478bd9Sstevel@tonic-gate 	status = forkexec(zlogp, cmdbuf, argv);
8877c478bd9Sstevel@tonic-gate 	if (status == 0 || status == -1)
8887c478bd9Sstevel@tonic-gate 		return (status);
8897c478bd9Sstevel@tonic-gate 	zerror(zlogp, B_FALSE, "fsck of '%s' failed with exit status %d; "
8907c478bd9Sstevel@tonic-gate 	    "run fsck manually", rawdev, status);
8917c478bd9Sstevel@tonic-gate 	return (-1);
8927c478bd9Sstevel@tonic-gate }
8937c478bd9Sstevel@tonic-gate 
8947c478bd9Sstevel@tonic-gate static int
8957c478bd9Sstevel@tonic-gate domount(zlog_t *zlogp, const char *fstype, const char *opts,
8967c478bd9Sstevel@tonic-gate     const char *special, const char *directory)
8977c478bd9Sstevel@tonic-gate {
8987c478bd9Sstevel@tonic-gate 	char cmdbuf[MAXPATHLEN];
8997c478bd9Sstevel@tonic-gate 	char *argv[6];
9007c478bd9Sstevel@tonic-gate 	int status;
9017c478bd9Sstevel@tonic-gate 
9027c478bd9Sstevel@tonic-gate 	/*
9037c478bd9Sstevel@tonic-gate 	 * We could alternatively have called /usr/sbin/mount -F <fstype>, but
9047c478bd9Sstevel@tonic-gate 	 * that would cost us an extra fork/exec without buying us anything.
9057c478bd9Sstevel@tonic-gate 	 */
9067c478bd9Sstevel@tonic-gate 	if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype)
9077c478bd9Sstevel@tonic-gate 	    > sizeof (cmdbuf)) {
9087c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "file-system type %s too long", fstype);
9097c478bd9Sstevel@tonic-gate 		return (-1);
9107c478bd9Sstevel@tonic-gate 	}
9117c478bd9Sstevel@tonic-gate 	argv[0] = "mount";
9127c478bd9Sstevel@tonic-gate 	if (opts[0] == '\0') {
9137c478bd9Sstevel@tonic-gate 		argv[1] = (char *)special;
9147c478bd9Sstevel@tonic-gate 		argv[2] = (char *)directory;
9157c478bd9Sstevel@tonic-gate 		argv[3] = NULL;
9167c478bd9Sstevel@tonic-gate 	} else {
9177c478bd9Sstevel@tonic-gate 		argv[1] = "-o";
9187c478bd9Sstevel@tonic-gate 		argv[2] = (char *)opts;
9197c478bd9Sstevel@tonic-gate 		argv[3] = (char *)special;
9207c478bd9Sstevel@tonic-gate 		argv[4] = (char *)directory;
9217c478bd9Sstevel@tonic-gate 		argv[5] = NULL;
9227c478bd9Sstevel@tonic-gate 	}
9237c478bd9Sstevel@tonic-gate 
9247c478bd9Sstevel@tonic-gate 	status = forkexec(zlogp, cmdbuf, argv);
9257c478bd9Sstevel@tonic-gate 	if (status == 0 || status == -1)
9267c478bd9Sstevel@tonic-gate 		return (status);
9277c478bd9Sstevel@tonic-gate 	if (opts[0] == '\0')
9287c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "\"%s %s %s\" "
9297c478bd9Sstevel@tonic-gate 		    "failed with exit code %d",
9307c478bd9Sstevel@tonic-gate 		    cmdbuf, special, directory, status);
9317c478bd9Sstevel@tonic-gate 	else
9327c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "\"%s -o %s %s %s\" "
9337c478bd9Sstevel@tonic-gate 		    "failed with exit code %d",
9347c478bd9Sstevel@tonic-gate 		    cmdbuf, opts, special, directory, status);
9357c478bd9Sstevel@tonic-gate 	return (-1);
9367c478bd9Sstevel@tonic-gate }
9377c478bd9Sstevel@tonic-gate 
9387c478bd9Sstevel@tonic-gate /*
9397c478bd9Sstevel@tonic-gate  * Make sure if a given path exists, it is not a sym-link, and is a directory.
9407c478bd9Sstevel@tonic-gate  */
9417c478bd9Sstevel@tonic-gate static int
9427c478bd9Sstevel@tonic-gate check_path(zlog_t *zlogp, const char *path)
9437c478bd9Sstevel@tonic-gate {
9447c478bd9Sstevel@tonic-gate 	struct stat statbuf;
9457c478bd9Sstevel@tonic-gate 	char respath[MAXPATHLEN];
9467c478bd9Sstevel@tonic-gate 	int res;
9477c478bd9Sstevel@tonic-gate 
9487c478bd9Sstevel@tonic-gate 	if (lstat(path, &statbuf) != 0) {
9497c478bd9Sstevel@tonic-gate 		if (errno == ENOENT)
9507c478bd9Sstevel@tonic-gate 			return (0);
9517c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "can't stat %s", path);
9527c478bd9Sstevel@tonic-gate 		return (-1);
9537c478bd9Sstevel@tonic-gate 	}
9547c478bd9Sstevel@tonic-gate 	if (S_ISLNK(statbuf.st_mode)) {
9557c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s is a symlink", path);
9567c478bd9Sstevel@tonic-gate 		return (-1);
9577c478bd9Sstevel@tonic-gate 	}
9587c478bd9Sstevel@tonic-gate 	if (!S_ISDIR(statbuf.st_mode)) {
95945916cd2Sjpk 		if (is_system_labeled() && S_ISREG(statbuf.st_mode)) {
96045916cd2Sjpk 			/*
96145916cd2Sjpk 			 * The need to mount readonly copies of
96245916cd2Sjpk 			 * global zone /etc/ files is unique to
96345916cd2Sjpk 			 * Trusted Extensions.
96445916cd2Sjpk 			 * The check for /etc/ via strstr() is to
96545916cd2Sjpk 			 * allow paths like $ZONEROOT/etc/passwd
96645916cd2Sjpk 			 */
96745916cd2Sjpk 			if (strstr(path, "/etc/") == NULL) {
96845916cd2Sjpk 				zerror(zlogp, B_FALSE,
96945916cd2Sjpk 				    "%s is not in /etc", path);
97045916cd2Sjpk 				return (-1);
97145916cd2Sjpk 			}
97245916cd2Sjpk 		} else {
9737c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s is not a directory", path);
9747c478bd9Sstevel@tonic-gate 			return (-1);
9757c478bd9Sstevel@tonic-gate 		}
97645916cd2Sjpk 	}
9777c478bd9Sstevel@tonic-gate 	if ((res = resolvepath(path, respath, sizeof (respath))) == -1) {
9787c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to resolve path %s", path);
9797c478bd9Sstevel@tonic-gate 		return (-1);
9807c478bd9Sstevel@tonic-gate 	}
9817c478bd9Sstevel@tonic-gate 	respath[res] = '\0';
9827c478bd9Sstevel@tonic-gate 	if (strcmp(path, respath) != 0) {
9837c478bd9Sstevel@tonic-gate 		/*
9847c478bd9Sstevel@tonic-gate 		 * We don't like ".."s and "."s throwing us off
9857c478bd9Sstevel@tonic-gate 		 */
9867c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s is not a canonical path", path);
9877c478bd9Sstevel@tonic-gate 		return (-1);
9887c478bd9Sstevel@tonic-gate 	}
9897c478bd9Sstevel@tonic-gate 	return (0);
9907c478bd9Sstevel@tonic-gate }
9917c478bd9Sstevel@tonic-gate 
9927c478bd9Sstevel@tonic-gate /*
9937c478bd9Sstevel@tonic-gate  * Check every component of rootpath/relpath.  If any component fails (ie,
9947c478bd9Sstevel@tonic-gate  * exists but isn't the canonical path to a directory), it is returned in
9957c478bd9Sstevel@tonic-gate  * badpath, which is assumed to be at least of size MAXPATHLEN.
9967c478bd9Sstevel@tonic-gate  *
9977c478bd9Sstevel@tonic-gate  * Relpath must begin with '/'.
9987c478bd9Sstevel@tonic-gate  */
9997c478bd9Sstevel@tonic-gate static boolean_t
10007c478bd9Sstevel@tonic-gate valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *relpath)
10017c478bd9Sstevel@tonic-gate {
10027c478bd9Sstevel@tonic-gate 	char abspath[MAXPATHLEN], *slashp;
10037c478bd9Sstevel@tonic-gate 
10047c478bd9Sstevel@tonic-gate 	/*
10057c478bd9Sstevel@tonic-gate 	 * Make sure abspath has at least one '/' after its rootpath
10067c478bd9Sstevel@tonic-gate 	 * component, and ends with '/'.
10077c478bd9Sstevel@tonic-gate 	 */
10087c478bd9Sstevel@tonic-gate 	if (snprintf(abspath, sizeof (abspath), "%s%s/", rootpath, relpath) >
10097c478bd9Sstevel@tonic-gate 	    sizeof (abspath)) {
10107c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "pathname %s%s is too long", rootpath,
10117c478bd9Sstevel@tonic-gate 		    relpath);
10127c478bd9Sstevel@tonic-gate 		return (B_FALSE);
10137c478bd9Sstevel@tonic-gate 	}
10147c478bd9Sstevel@tonic-gate 
10157c478bd9Sstevel@tonic-gate 	slashp = &abspath[strlen(rootpath)];
10167c478bd9Sstevel@tonic-gate 	assert(*slashp == '/');
10177c478bd9Sstevel@tonic-gate 	do {
10187c478bd9Sstevel@tonic-gate 		*slashp = '\0';
10197c478bd9Sstevel@tonic-gate 		if (check_path(zlogp, abspath) != 0)
10207c478bd9Sstevel@tonic-gate 			return (B_FALSE);
10217c478bd9Sstevel@tonic-gate 		*slashp = '/';
10227c478bd9Sstevel@tonic-gate 		slashp++;
10237c478bd9Sstevel@tonic-gate 	} while ((slashp = strchr(slashp, '/')) != NULL);
10247c478bd9Sstevel@tonic-gate 	return (B_TRUE);
10257c478bd9Sstevel@tonic-gate }
10267c478bd9Sstevel@tonic-gate 
10277c478bd9Sstevel@tonic-gate static int
10287c478bd9Sstevel@tonic-gate mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath)
10297c478bd9Sstevel@tonic-gate {
10307c478bd9Sstevel@tonic-gate 	char    path[MAXPATHLEN];
1031108322fbScarlsonj 	char	specpath[MAXPATHLEN];
10327c478bd9Sstevel@tonic-gate 	char    optstr[MAX_MNTOPT_STR];
10337c478bd9Sstevel@tonic-gate 	zone_fsopt_t *optptr;
10347c478bd9Sstevel@tonic-gate 
10357c478bd9Sstevel@tonic-gate 	if (!valid_mount_path(zlogp, rootpath, fsptr->zone_fs_dir)) {
10367c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
10377c478bd9Sstevel@tonic-gate 		    rootpath, fsptr->zone_fs_dir);
10387c478bd9Sstevel@tonic-gate 		return (-1);
10397c478bd9Sstevel@tonic-gate 	}
10407c478bd9Sstevel@tonic-gate 
10417c478bd9Sstevel@tonic-gate 	if (make_one_dir(zlogp, rootpath, fsptr->zone_fs_dir,
10427c478bd9Sstevel@tonic-gate 	    DEFAULT_DIR_MODE) != 0)
10437c478bd9Sstevel@tonic-gate 		return (-1);
10447c478bd9Sstevel@tonic-gate 
10457c478bd9Sstevel@tonic-gate 	(void) snprintf(path, sizeof (path), "%s%s", rootpath,
10467c478bd9Sstevel@tonic-gate 	    fsptr->zone_fs_dir);
10477c478bd9Sstevel@tonic-gate 
10487c478bd9Sstevel@tonic-gate 	if (strlen(fsptr->zone_fs_special) == 0) {
10497c478bd9Sstevel@tonic-gate 		/*
10507c478bd9Sstevel@tonic-gate 		 * A zero-length special is how we distinguish IPDs from
1051108322fbScarlsonj 		 * general-purpose FSs.  Make sure it mounts from a place that
1052108322fbScarlsonj 		 * can be seen via the alternate zone's root.
10537c478bd9Sstevel@tonic-gate 		 */
1054108322fbScarlsonj 		if (snprintf(specpath, sizeof (specpath), "%s%s",
1055108322fbScarlsonj 		    zonecfg_get_root(), fsptr->zone_fs_dir) >=
1056108322fbScarlsonj 		    sizeof (specpath)) {
1057108322fbScarlsonj 			zerror(zlogp, B_FALSE, "cannot mount %s: path too "
1058108322fbScarlsonj 			    "long in alternate root", fsptr->zone_fs_dir);
1059108322fbScarlsonj 			return (-1);
1060108322fbScarlsonj 		}
1061108322fbScarlsonj 		if (zonecfg_in_alt_root())
1062108322fbScarlsonj 			resolve_lofs(zlogp, specpath, sizeof (specpath));
10637c478bd9Sstevel@tonic-gate 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS,
1064108322fbScarlsonj 		    specpath, path) != 0) {
10657c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "failed to loopback mount %s",
1066108322fbScarlsonj 			    specpath);
10677c478bd9Sstevel@tonic-gate 			return (-1);
10687c478bd9Sstevel@tonic-gate 		}
10697c478bd9Sstevel@tonic-gate 		return (0);
10707c478bd9Sstevel@tonic-gate 	}
10717c478bd9Sstevel@tonic-gate 
10727c478bd9Sstevel@tonic-gate 	/*
10737c478bd9Sstevel@tonic-gate 	 * In general the strategy here is to do just as much verification as
10747c478bd9Sstevel@tonic-gate 	 * necessary to avoid crashing or otherwise doing something bad; if the
10757c478bd9Sstevel@tonic-gate 	 * administrator initiated the operation via zoneadm(1m), he'll get
10767c478bd9Sstevel@tonic-gate 	 * auto-verification which will let him know what's wrong.  If he
10777c478bd9Sstevel@tonic-gate 	 * modifies the zone configuration of a running zone and doesn't attempt
10787c478bd9Sstevel@tonic-gate 	 * to verify that it's OK we won't crash but won't bother trying to be
10797c478bd9Sstevel@tonic-gate 	 * too helpful either.  zoneadm verify is only a couple keystrokes away.
10807c478bd9Sstevel@tonic-gate 	 */
10817c478bd9Sstevel@tonic-gate 	if (!zonecfg_valid_fs_type(fsptr->zone_fs_type)) {
10827c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "cannot mount %s on %s: "
10837c478bd9Sstevel@tonic-gate 		    "invalid file-system type %s", fsptr->zone_fs_special,
10847c478bd9Sstevel@tonic-gate 		    fsptr->zone_fs_dir, fsptr->zone_fs_type);
10857c478bd9Sstevel@tonic-gate 		return (-1);
10867c478bd9Sstevel@tonic-gate 	}
10877c478bd9Sstevel@tonic-gate 
10887c478bd9Sstevel@tonic-gate 	/*
1089108322fbScarlsonj 	 * If we're looking at an alternate root environment, then construct
1090108322fbScarlsonj 	 * read-only loopback mounts as necessary.  For all lofs mounts, make
1091108322fbScarlsonj 	 * sure that the 'special' entry points inside the alternate root.  (We
1092108322fbScarlsonj 	 * don't do this with other mounts, as devfs isn't in the alternate
1093108322fbScarlsonj 	 * root, and we need to assume the device environment is roughly the
1094108322fbScarlsonj 	 * same.)
1095108322fbScarlsonj 	 */
1096108322fbScarlsonj 	if (zonecfg_in_alt_root()) {
1097108322fbScarlsonj 		struct stat64 st;
1098108322fbScarlsonj 
1099108322fbScarlsonj 		if (stat64(fsptr->zone_fs_special, &st) != -1 &&
1100108322fbScarlsonj 		    S_ISBLK(st.st_mode) &&
1101108322fbScarlsonj 		    check_lofs_needed(zlogp, fsptr) == -1)
1102108322fbScarlsonj 			return (-1);
1103108322fbScarlsonj 		if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) {
1104108322fbScarlsonj 			if (snprintf(specpath, sizeof (specpath), "%s%s",
1105108322fbScarlsonj 			    zonecfg_get_root(), fsptr->zone_fs_special) >=
1106108322fbScarlsonj 			    sizeof (specpath)) {
1107108322fbScarlsonj 				zerror(zlogp, B_FALSE, "cannot mount %s: path "
1108108322fbScarlsonj 				    "too long in alternate root",
1109108322fbScarlsonj 				    fsptr->zone_fs_special);
1110108322fbScarlsonj 				return (-1);
1111108322fbScarlsonj 			}
1112108322fbScarlsonj 			resolve_lofs(zlogp, specpath, sizeof (specpath));
1113108322fbScarlsonj 			(void) strlcpy(fsptr->zone_fs_special, specpath,
1114108322fbScarlsonj 			    sizeof (fsptr->zone_fs_special));
1115108322fbScarlsonj 		}
1116108322fbScarlsonj 	}
1117108322fbScarlsonj 
1118108322fbScarlsonj 	/*
11197c478bd9Sstevel@tonic-gate 	 * Run 'fsck -m' if there's a device to fsck.
11207c478bd9Sstevel@tonic-gate 	 */
11217c478bd9Sstevel@tonic-gate 	if (fsptr->zone_fs_raw[0] != '\0' &&
11227c478bd9Sstevel@tonic-gate 	    dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_raw) != 0)
11237c478bd9Sstevel@tonic-gate 		return (-1);
11247c478bd9Sstevel@tonic-gate 
11257c478bd9Sstevel@tonic-gate 	/*
11267c478bd9Sstevel@tonic-gate 	 * Build up mount option string.
11277c478bd9Sstevel@tonic-gate 	 */
11287c478bd9Sstevel@tonic-gate 	optstr[0] = '\0';
11297c478bd9Sstevel@tonic-gate 	if (fsptr->zone_fs_options != NULL) {
11307c478bd9Sstevel@tonic-gate 		(void) strlcpy(optstr, fsptr->zone_fs_options->zone_fsopt_opt,
11317c478bd9Sstevel@tonic-gate 		    sizeof (optstr));
11327c478bd9Sstevel@tonic-gate 		for (optptr = fsptr->zone_fs_options->zone_fsopt_next;
11337c478bd9Sstevel@tonic-gate 		    optptr != NULL; optptr = optptr->zone_fsopt_next) {
11347c478bd9Sstevel@tonic-gate 			(void) strlcat(optstr, ",", sizeof (optstr));
11357c478bd9Sstevel@tonic-gate 			(void) strlcat(optstr, optptr->zone_fsopt_opt,
11367c478bd9Sstevel@tonic-gate 			    sizeof (optstr));
11377c478bd9Sstevel@tonic-gate 		}
11387c478bd9Sstevel@tonic-gate 	}
11397c478bd9Sstevel@tonic-gate 	return (domount(zlogp, fsptr->zone_fs_type, optstr,
11407c478bd9Sstevel@tonic-gate 	    fsptr->zone_fs_special, path));
11417c478bd9Sstevel@tonic-gate }
11427c478bd9Sstevel@tonic-gate 
11437c478bd9Sstevel@tonic-gate static void
11447c478bd9Sstevel@tonic-gate free_fs_data(struct zone_fstab *fsarray, uint_t nelem)
11457c478bd9Sstevel@tonic-gate {
11467c478bd9Sstevel@tonic-gate 	uint_t i;
11477c478bd9Sstevel@tonic-gate 
11487c478bd9Sstevel@tonic-gate 	if (fsarray == NULL)
11497c478bd9Sstevel@tonic-gate 		return;
11507c478bd9Sstevel@tonic-gate 	for (i = 0; i < nelem; i++)
11517c478bd9Sstevel@tonic-gate 		zonecfg_free_fs_option_list(fsarray[i].zone_fs_options);
11527c478bd9Sstevel@tonic-gate 	free(fsarray);
11537c478bd9Sstevel@tonic-gate }
11547c478bd9Sstevel@tonic-gate 
1155108322fbScarlsonj /*
1156f4368d3dSvp157776  * This function initiates the creation of a small Solaris Environment for
1157f4368d3dSvp157776  * scratch zone. The Environment creation process is split up into two
1158f4368d3dSvp157776  * functions(build_mounted_pre_var() and build_mounted_post_var()). It
1159f4368d3dSvp157776  * is done this way because:
1160f4368d3dSvp157776  * 	We need to have both /etc and /var in the root of the scratchzone.
1161f4368d3dSvp157776  * 	We loopback mount zone's own /etc and /var into the root of the
1162f4368d3dSvp157776  * 	scratch zone. Unlike /etc, /var can be a seperate filesystem. So we
1163f4368d3dSvp157776  * 	need to delay the mount of /var till the zone's root gets populated.
1164f4368d3dSvp157776  *	So mounting of localdirs[](/etc and /var) have been moved to the
1165f4368d3dSvp157776  * 	build_mounted_post_var() which gets called only after the zone
1166f4368d3dSvp157776  * 	specific filesystems are mounted.
1167108322fbScarlsonj  */
1168108322fbScarlsonj static boolean_t
1169f4368d3dSvp157776 build_mounted_pre_var(zlog_t *zlogp, char *rootpath,
1170f4368d3dSvp157776     size_t rootlen, const char *zonepath)
1171108322fbScarlsonj {
1172108322fbScarlsonj 	char tmp[MAXPATHLEN], fromdir[MAXPATHLEN];
1173108322fbScarlsonj 	char luroot[MAXPATHLEN];
1174108322fbScarlsonj 	const char **cpp;
1175108322fbScarlsonj 	static const char *mkdirs[] = {
11763f604e0fSdp 		"/system", "/system/contract", "/system/object", "/proc",
11773f604e0fSdp 		"/dev", "/tmp", "/a", NULL
1178108322fbScarlsonj 	};
1179108322fbScarlsonj 	char *altstr;
1180f4368d3dSvp157776 	FILE *fp;
1181108322fbScarlsonj 	uuid_t uuid;
1182108322fbScarlsonj 
1183108322fbScarlsonj 	resolve_lofs(zlogp, rootpath, rootlen);
1184108322fbScarlsonj 	(void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
1185108322fbScarlsonj 	resolve_lofs(zlogp, luroot, sizeof (luroot));
1186108322fbScarlsonj 	(void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot);
1187108322fbScarlsonj 	(void) symlink("./usr/bin", tmp);
1188108322fbScarlsonj 
1189108322fbScarlsonj 	/*
1190108322fbScarlsonj 	 * These are mostly special mount points; not handled here.  (See
1191108322fbScarlsonj 	 * zone_mount_early.)
1192108322fbScarlsonj 	 */
1193108322fbScarlsonj 	for (cpp = mkdirs; *cpp != NULL; cpp++) {
1194108322fbScarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1195108322fbScarlsonj 		if (mkdir(tmp, 0755) != 0) {
1196108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1197108322fbScarlsonj 			return (B_FALSE);
1198108322fbScarlsonj 		}
1199108322fbScarlsonj 	}
1200f4368d3dSvp157776 	/*
1201f4368d3dSvp157776 	 * This is here to support lucopy.  If there's an instance of this same
1202f4368d3dSvp157776 	 * zone on the current running system, then we mount its root up as
1203f4368d3dSvp157776 	 * read-only inside the scratch zone.
1204f4368d3dSvp157776 	 */
1205f4368d3dSvp157776 	(void) zonecfg_get_uuid(zone_name, uuid);
1206f4368d3dSvp157776 	altstr = strdup(zonecfg_get_root());
1207f4368d3dSvp157776 	if (altstr == NULL) {
1208f4368d3dSvp157776 		zerror(zlogp, B_TRUE, "memory allocation failed");
1209f4368d3dSvp157776 		return (B_FALSE);
1210f4368d3dSvp157776 	}
1211f4368d3dSvp157776 	zonecfg_set_root("");
1212f4368d3dSvp157776 	(void) strlcpy(tmp, zone_name, sizeof (tmp));
1213f4368d3dSvp157776 	(void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp));
1214f4368d3dSvp157776 	if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK &&
1215f4368d3dSvp157776 	    strcmp(fromdir, rootpath) != 0) {
1216f4368d3dSvp157776 		(void) snprintf(tmp, sizeof (tmp), "%s/b", luroot);
1217f4368d3dSvp157776 		if (mkdir(tmp, 0755) != 0) {
1218f4368d3dSvp157776 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1219f4368d3dSvp157776 			return (B_FALSE);
1220f4368d3dSvp157776 		}
1221f4368d3dSvp157776 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, fromdir,
1222f4368d3dSvp157776 		    tmp) != 0) {
1223f4368d3dSvp157776 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1224f4368d3dSvp157776 			    fromdir);
1225f4368d3dSvp157776 			return (B_FALSE);
1226f4368d3dSvp157776 		}
1227f4368d3dSvp157776 	}
1228f4368d3dSvp157776 	zonecfg_set_root(altstr);
1229f4368d3dSvp157776 	free(altstr);
1230f4368d3dSvp157776 
1231f4368d3dSvp157776 	if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) {
1232f4368d3dSvp157776 		zerror(zlogp, B_TRUE, "cannot open zone mapfile");
1233f4368d3dSvp157776 		return (B_FALSE);
1234f4368d3dSvp157776 	}
1235f4368d3dSvp157776 	(void) ftruncate(fileno(fp), 0);
1236f4368d3dSvp157776 	if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) {
1237f4368d3dSvp157776 		zerror(zlogp, B_TRUE, "cannot add zone mapfile entry");
1238f4368d3dSvp157776 	}
1239f4368d3dSvp157776 	zonecfg_close_scratch(fp);
1240f4368d3dSvp157776 	(void) snprintf(tmp, sizeof (tmp), "%s/a", luroot);
1241f4368d3dSvp157776 	if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0)
1242f4368d3dSvp157776 		return (B_FALSE);
1243f4368d3dSvp157776 	(void) strlcpy(rootpath, tmp, rootlen);
1244f4368d3dSvp157776 	return (B_TRUE);
1245f4368d3dSvp157776 }
1246f4368d3dSvp157776 
1247f4368d3dSvp157776 
1248f4368d3dSvp157776 static boolean_t
1249f4368d3dSvp157776 build_mounted_post_var(zlog_t *zlogp, char *rootpath, const char *zonepath)
1250f4368d3dSvp157776 {
1251f4368d3dSvp157776 	char tmp[MAXPATHLEN], fromdir[MAXPATHLEN];
1252f4368d3dSvp157776 	char luroot[MAXPATHLEN];
1253f4368d3dSvp157776 	const char **cpp;
1254f4368d3dSvp157776 	static const char *localdirs[] = {
1255f4368d3dSvp157776 		"/etc", "/var", NULL
1256f4368d3dSvp157776 	};
1257f4368d3dSvp157776 	static const char *loopdirs[] = {
1258f4368d3dSvp157776 		"/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform",
1259f4368d3dSvp157776 		"/usr", NULL
1260f4368d3dSvp157776 	};
1261f4368d3dSvp157776 	static const char *tmpdirs[] = {
1262f4368d3dSvp157776 		"/tmp", "/var/run", NULL
1263f4368d3dSvp157776 	};
1264f4368d3dSvp157776 	struct stat st;
1265f4368d3dSvp157776 
1266f4368d3dSvp157776 	(void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
1267108322fbScarlsonj 
1268108322fbScarlsonj 	/*
1269108322fbScarlsonj 	 * These are mounted read-write from the zone undergoing upgrade.  We
1270108322fbScarlsonj 	 * must be careful not to 'leak' things from the main system into the
1271108322fbScarlsonj 	 * zone, and this accomplishes that goal.
1272108322fbScarlsonj 	 */
1273108322fbScarlsonj 	for (cpp = localdirs; *cpp != NULL; cpp++) {
1274108322fbScarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1275108322fbScarlsonj 		(void) snprintf(fromdir, sizeof (fromdir), "%s%s", rootpath,
1276108322fbScarlsonj 		    *cpp);
1277108322fbScarlsonj 		if (mkdir(tmp, 0755) != 0) {
1278108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1279108322fbScarlsonj 			return (B_FALSE);
1280108322fbScarlsonj 		}
1281108322fbScarlsonj 		if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp) != 0) {
1282108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1283108322fbScarlsonj 			    *cpp);
1284108322fbScarlsonj 			return (B_FALSE);
1285108322fbScarlsonj 		}
1286108322fbScarlsonj 	}
1287108322fbScarlsonj 
1288108322fbScarlsonj 	/*
1289108322fbScarlsonj 	 * These are things mounted read-only from the running system because
1290108322fbScarlsonj 	 * they contain binaries that must match system.
1291108322fbScarlsonj 	 */
1292108322fbScarlsonj 	for (cpp = loopdirs; *cpp != NULL; cpp++) {
1293108322fbScarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1294108322fbScarlsonj 		if (mkdir(tmp, 0755) != 0) {
1295108322fbScarlsonj 			if (errno != EEXIST) {
1296108322fbScarlsonj 				zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1297108322fbScarlsonj 				return (B_FALSE);
1298108322fbScarlsonj 			}
1299108322fbScarlsonj 			if (lstat(tmp, &st) != 0) {
1300108322fbScarlsonj 				zerror(zlogp, B_TRUE, "cannot stat %s", tmp);
1301108322fbScarlsonj 				return (B_FALSE);
1302108322fbScarlsonj 			}
1303108322fbScarlsonj 			/*
1304108322fbScarlsonj 			 * Ignore any non-directories encountered.  These are
1305108322fbScarlsonj 			 * things that have been converted into symlinks
1306108322fbScarlsonj 			 * (/etc/fs and /etc/lib) and no longer need a lofs
1307108322fbScarlsonj 			 * fixup.
1308108322fbScarlsonj 			 */
1309108322fbScarlsonj 			if (!S_ISDIR(st.st_mode))
1310108322fbScarlsonj 				continue;
1311108322fbScarlsonj 		}
1312108322fbScarlsonj 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, *cpp,
1313108322fbScarlsonj 		    tmp) != 0) {
1314108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1315108322fbScarlsonj 			    *cpp);
1316108322fbScarlsonj 			return (B_FALSE);
1317108322fbScarlsonj 		}
1318108322fbScarlsonj 	}
1319108322fbScarlsonj 
1320108322fbScarlsonj 	/*
1321108322fbScarlsonj 	 * These are things with tmpfs mounted inside.
1322108322fbScarlsonj 	 */
1323108322fbScarlsonj 	for (cpp = tmpdirs; *cpp != NULL; cpp++) {
1324108322fbScarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1325108322fbScarlsonj 		if (mkdir(tmp, 0755) != 0 && errno != EEXIST) {
1326108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1327108322fbScarlsonj 			return (B_FALSE);
1328108322fbScarlsonj 		}
1329108322fbScarlsonj 		if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) {
1330108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp);
1331108322fbScarlsonj 			return (B_FALSE);
1332108322fbScarlsonj 		}
1333108322fbScarlsonj 	}
1334108322fbScarlsonj 	return (B_TRUE);
1335108322fbScarlsonj }
1336108322fbScarlsonj 
13377c478bd9Sstevel@tonic-gate static int
1338108322fbScarlsonj mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd)
13397c478bd9Sstevel@tonic-gate {
13407c478bd9Sstevel@tonic-gate 	char	rootpath[MAXPATHLEN];
13417c478bd9Sstevel@tonic-gate 	char	zonepath[MAXPATHLEN];
13427c478bd9Sstevel@tonic-gate 	int	num_fs = 0, i;
13437c478bd9Sstevel@tonic-gate 	struct zone_fstab fstab, *fs_ptr = NULL, *tmp_ptr;
13447c478bd9Sstevel@tonic-gate 	struct zone_fstab *fsp;
13457c478bd9Sstevel@tonic-gate 	zone_dochandle_t handle = NULL;
13467c478bd9Sstevel@tonic-gate 	zone_state_t zstate;
13477c478bd9Sstevel@tonic-gate 
13487c478bd9Sstevel@tonic-gate 	if (zone_get_state(zone_name, &zstate) != Z_OK ||
1349108322fbScarlsonj 	    (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
13507c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
1351108322fbScarlsonj 		    "zone must be in '%s' or '%s' state to mount file-systems",
1352108322fbScarlsonj 		    zone_state_str(ZONE_STATE_READY),
1353108322fbScarlsonj 		    zone_state_str(ZONE_STATE_MOUNTED));
13547c478bd9Sstevel@tonic-gate 		goto bad;
13557c478bd9Sstevel@tonic-gate 	}
13567c478bd9Sstevel@tonic-gate 
13577c478bd9Sstevel@tonic-gate 	if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
13587c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone path");
13597c478bd9Sstevel@tonic-gate 		goto bad;
13607c478bd9Sstevel@tonic-gate 	}
13617c478bd9Sstevel@tonic-gate 
13627c478bd9Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
13637c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone root");
13647c478bd9Sstevel@tonic-gate 		goto bad;
13657c478bd9Sstevel@tonic-gate 	}
13667c478bd9Sstevel@tonic-gate 
13677c478bd9Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
1368ffbafc53Scomay 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
13697c478bd9Sstevel@tonic-gate 		goto bad;
13707c478bd9Sstevel@tonic-gate 	}
13717c478bd9Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
13727c478bd9Sstevel@tonic-gate 	    zonecfg_setfsent(handle) != Z_OK) {
13737c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
13747c478bd9Sstevel@tonic-gate 		goto bad;
13757c478bd9Sstevel@tonic-gate 	}
13767c478bd9Sstevel@tonic-gate 
13777c478bd9Sstevel@tonic-gate 	/*
13787c478bd9Sstevel@tonic-gate 	 * Iterate through the rest of the filesystems, first the IPDs, then
13797c478bd9Sstevel@tonic-gate 	 * the general FSs.  Sort them all, then mount them in sorted order.
13807c478bd9Sstevel@tonic-gate 	 * This is to make sure the higher level directories (e.g., /usr)
13817c478bd9Sstevel@tonic-gate 	 * get mounted before any beneath them (e.g., /usr/local).
13827c478bd9Sstevel@tonic-gate 	 */
13837c478bd9Sstevel@tonic-gate 	if (zonecfg_setipdent(handle) != Z_OK) {
13847c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
13857c478bd9Sstevel@tonic-gate 		goto bad;
13867c478bd9Sstevel@tonic-gate 	}
13877c478bd9Sstevel@tonic-gate 	while (zonecfg_getipdent(handle, &fstab) == Z_OK) {
13887c478bd9Sstevel@tonic-gate 		num_fs++;
13897c478bd9Sstevel@tonic-gate 		if ((tmp_ptr = realloc(fs_ptr,
13907c478bd9Sstevel@tonic-gate 		    num_fs * sizeof (*tmp_ptr))) == NULL) {
13917c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "memory allocation failed");
13927c478bd9Sstevel@tonic-gate 			num_fs--;
13937c478bd9Sstevel@tonic-gate 			(void) zonecfg_endipdent(handle);
13947c478bd9Sstevel@tonic-gate 			goto bad;
13957c478bd9Sstevel@tonic-gate 		}
13967c478bd9Sstevel@tonic-gate 		fs_ptr = tmp_ptr;
13977c478bd9Sstevel@tonic-gate 		fsp = &fs_ptr[num_fs - 1];
13987c478bd9Sstevel@tonic-gate 		/*
13997c478bd9Sstevel@tonic-gate 		 * IPDs logically only have a mount point; all other properties
14007c478bd9Sstevel@tonic-gate 		 * are implied.
14017c478bd9Sstevel@tonic-gate 		 */
14027c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_dir,
14037c478bd9Sstevel@tonic-gate 		    fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir));
14047c478bd9Sstevel@tonic-gate 		fsp->zone_fs_special[0] = '\0';
14057c478bd9Sstevel@tonic-gate 		fsp->zone_fs_raw[0] = '\0';
14067c478bd9Sstevel@tonic-gate 		fsp->zone_fs_type[0] = '\0';
14077c478bd9Sstevel@tonic-gate 		fsp->zone_fs_options = NULL;
14087c478bd9Sstevel@tonic-gate 	}
14097c478bd9Sstevel@tonic-gate 	(void) zonecfg_endipdent(handle);
14107c478bd9Sstevel@tonic-gate 
14117c478bd9Sstevel@tonic-gate 	if (zonecfg_setfsent(handle) != Z_OK) {
14127c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
14137c478bd9Sstevel@tonic-gate 		goto bad;
14147c478bd9Sstevel@tonic-gate 	}
14157c478bd9Sstevel@tonic-gate 	while (zonecfg_getfsent(handle, &fstab) == Z_OK) {
1416fa9e4066Sahrens 		/*
1417fa9e4066Sahrens 		 * ZFS filesystems will not be accessible under an alternate
1418fa9e4066Sahrens 		 * root, since the pool will not be known.  Ignore them in this
1419fa9e4066Sahrens 		 * case.
1420fa9e4066Sahrens 		 */
1421fa9e4066Sahrens 		if (mount_cmd && strcmp(fstab.zone_fs_type, MNTTYPE_ZFS) == 0)
1422fa9e4066Sahrens 			continue;
1423fa9e4066Sahrens 
14247c478bd9Sstevel@tonic-gate 		num_fs++;
14257c478bd9Sstevel@tonic-gate 		if ((tmp_ptr = realloc(fs_ptr,
14267c478bd9Sstevel@tonic-gate 		    num_fs * sizeof (*tmp_ptr))) == NULL) {
14277c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "memory allocation failed");
14287c478bd9Sstevel@tonic-gate 			num_fs--;
14297c478bd9Sstevel@tonic-gate 			(void) zonecfg_endfsent(handle);
14307c478bd9Sstevel@tonic-gate 			goto bad;
14317c478bd9Sstevel@tonic-gate 		}
14327c478bd9Sstevel@tonic-gate 		fs_ptr = tmp_ptr;
14337c478bd9Sstevel@tonic-gate 		fsp = &fs_ptr[num_fs - 1];
14347c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_dir,
14357c478bd9Sstevel@tonic-gate 		    fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir));
14367c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_special, fstab.zone_fs_special,
14377c478bd9Sstevel@tonic-gate 		    sizeof (fsp->zone_fs_special));
14387c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_raw, fstab.zone_fs_raw,
14397c478bd9Sstevel@tonic-gate 		    sizeof (fsp->zone_fs_raw));
14407c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_type, fstab.zone_fs_type,
14417c478bd9Sstevel@tonic-gate 		    sizeof (fsp->zone_fs_type));
14427c478bd9Sstevel@tonic-gate 		fsp->zone_fs_options = fstab.zone_fs_options;
14437c478bd9Sstevel@tonic-gate 	}
14447c478bd9Sstevel@tonic-gate 	(void) zonecfg_endfsent(handle);
14457c478bd9Sstevel@tonic-gate 	zonecfg_fini_handle(handle);
14467c478bd9Sstevel@tonic-gate 	handle = NULL;
14477c478bd9Sstevel@tonic-gate 
1448108322fbScarlsonj 	/*
1449facf4a8dSllai1 	 * When we're mounting a zone for administration, / is the
1450facf4a8dSllai1 	 * scratch zone and dev is mounted at /dev.  The to-be-upgraded
1451facf4a8dSllai1 	 * zone is mounted at /a, and we set up that environment so that
1452facf4a8dSllai1 	 * process can access both the running system's utilities
1453facf4a8dSllai1 	 * and the to-be-modified zone's files.  The only exception
1454facf4a8dSllai1 	 * is the zone's /dev which isn't mounted at all, which is
1455facf4a8dSllai1 	 * the same as global zone installation where /a/dev and
1456facf4a8dSllai1 	 * /a/devices are not mounted.
1457f4368d3dSvp157776 	 * Zone mounting is done in three phases.
1458f4368d3dSvp157776 	 *   1) Create and populate lu directory (build_mounted_pre_var()).
1459f4368d3dSvp157776 	 *   2) Mount the required filesystems as per the zone configuration.
1460f4368d3dSvp157776 	 *   3) Set up the rest of the scratch zone environment
1461f4368d3dSvp157776 	 *	(build_mounted_post_var()).
1462108322fbScarlsonj 	 */
1463108322fbScarlsonj 	if (mount_cmd &&
1464f4368d3dSvp157776 	    !build_mounted_pre_var(zlogp,
1465f4368d3dSvp157776 	    rootpath, sizeof (rootpath), zonepath))
1466108322fbScarlsonj 		goto bad;
1467108322fbScarlsonj 
14687c478bd9Sstevel@tonic-gate 	qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
14697c478bd9Sstevel@tonic-gate 	for (i = 0; i < num_fs; i++) {
14707c478bd9Sstevel@tonic-gate 		if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0)
14717c478bd9Sstevel@tonic-gate 			goto bad;
14727c478bd9Sstevel@tonic-gate 	}
1473f4368d3dSvp157776 	if (mount_cmd &&
1474f4368d3dSvp157776 	    !build_mounted_post_var(zlogp, rootpath, zonepath))
1475f4368d3dSvp157776 		goto bad;
147645916cd2Sjpk 
147745916cd2Sjpk 	/*
147845916cd2Sjpk 	 * For Trusted Extensions cross-mount each lower level /export/home
147945916cd2Sjpk 	 */
148048451833Scarlsonj 	if (!mount_cmd && tsol_mounts(zlogp, zone_name, rootpath) != 0)
148145916cd2Sjpk 		goto bad;
148245916cd2Sjpk 
14837c478bd9Sstevel@tonic-gate 	free_fs_data(fs_ptr, num_fs);
14847c478bd9Sstevel@tonic-gate 
14857c478bd9Sstevel@tonic-gate 	/*
14867c478bd9Sstevel@tonic-gate 	 * Everything looks fine.
14877c478bd9Sstevel@tonic-gate 	 */
14887c478bd9Sstevel@tonic-gate 	return (0);
14897c478bd9Sstevel@tonic-gate 
14907c478bd9Sstevel@tonic-gate bad:
14917c478bd9Sstevel@tonic-gate 	if (handle != NULL)
14927c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
14937c478bd9Sstevel@tonic-gate 	free_fs_data(fs_ptr, num_fs);
14947c478bd9Sstevel@tonic-gate 	return (-1);
14957c478bd9Sstevel@tonic-gate }
14967c478bd9Sstevel@tonic-gate 
14977c478bd9Sstevel@tonic-gate /* caller makes sure neither parameter is NULL */
14987c478bd9Sstevel@tonic-gate static int
14997c478bd9Sstevel@tonic-gate addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr)
15007c478bd9Sstevel@tonic-gate {
15017c478bd9Sstevel@tonic-gate 	int prefixlen;
15027c478bd9Sstevel@tonic-gate 
15037c478bd9Sstevel@tonic-gate 	prefixlen = atoi(prefixstr);
15047c478bd9Sstevel@tonic-gate 	if (prefixlen < 0 || prefixlen > maxprefixlen)
15057c478bd9Sstevel@tonic-gate 		return (1);
15067c478bd9Sstevel@tonic-gate 	while (prefixlen > 0) {
15077c478bd9Sstevel@tonic-gate 		if (prefixlen >= 8) {
15087c478bd9Sstevel@tonic-gate 			*maskstr++ = 0xFF;
15097c478bd9Sstevel@tonic-gate 			prefixlen -= 8;
15107c478bd9Sstevel@tonic-gate 			continue;
15117c478bd9Sstevel@tonic-gate 		}
15127c478bd9Sstevel@tonic-gate 		*maskstr |= 1 << (8 - prefixlen);
15137c478bd9Sstevel@tonic-gate 		prefixlen--;
15147c478bd9Sstevel@tonic-gate 	}
15157c478bd9Sstevel@tonic-gate 	return (0);
15167c478bd9Sstevel@tonic-gate }
15177c478bd9Sstevel@tonic-gate 
15187c478bd9Sstevel@tonic-gate /*
15197c478bd9Sstevel@tonic-gate  * Tear down all interfaces belonging to the given zone.  This should
15207c478bd9Sstevel@tonic-gate  * be called with the zone in a state other than "running", so that
15217c478bd9Sstevel@tonic-gate  * interfaces can't be assigned to the zone after this returns.
15227c478bd9Sstevel@tonic-gate  *
15237c478bd9Sstevel@tonic-gate  * If anything goes wrong, log an error message and return an error.
15247c478bd9Sstevel@tonic-gate  */
15257c478bd9Sstevel@tonic-gate static int
15267c478bd9Sstevel@tonic-gate unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id)
15277c478bd9Sstevel@tonic-gate {
15287c478bd9Sstevel@tonic-gate 	struct lifnum lifn;
15297c478bd9Sstevel@tonic-gate 	struct lifconf lifc;
15307c478bd9Sstevel@tonic-gate 	struct lifreq *lifrp, lifrl;
15317c478bd9Sstevel@tonic-gate 	int64_t lifc_flags = LIFC_NOXMIT | LIFC_ALLZONES;
15327c478bd9Sstevel@tonic-gate 	int num_ifs, s, i, ret_code = 0;
15337c478bd9Sstevel@tonic-gate 	uint_t bufsize;
15347c478bd9Sstevel@tonic-gate 	char *buf = NULL;
15357c478bd9Sstevel@tonic-gate 
15367c478bd9Sstevel@tonic-gate 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
15377c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get socket");
15387c478bd9Sstevel@tonic-gate 		ret_code = -1;
15397c478bd9Sstevel@tonic-gate 		goto bad;
15407c478bd9Sstevel@tonic-gate 	}
15417c478bd9Sstevel@tonic-gate 	lifn.lifn_family = AF_UNSPEC;
15427c478bd9Sstevel@tonic-gate 	lifn.lifn_flags = (int)lifc_flags;
15437c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) {
15447c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
15457c478bd9Sstevel@tonic-gate 		    "could not determine number of interfaces");
15467c478bd9Sstevel@tonic-gate 		ret_code = -1;
15477c478bd9Sstevel@tonic-gate 		goto bad;
15487c478bd9Sstevel@tonic-gate 	}
15497c478bd9Sstevel@tonic-gate 	num_ifs = lifn.lifn_count;
15507c478bd9Sstevel@tonic-gate 	bufsize = num_ifs * sizeof (struct lifreq);
15517c478bd9Sstevel@tonic-gate 	if ((buf = malloc(bufsize)) == NULL) {
15527c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
15537c478bd9Sstevel@tonic-gate 		ret_code = -1;
15547c478bd9Sstevel@tonic-gate 		goto bad;
15557c478bd9Sstevel@tonic-gate 	}
15567c478bd9Sstevel@tonic-gate 	lifc.lifc_family = AF_UNSPEC;
15577c478bd9Sstevel@tonic-gate 	lifc.lifc_flags = (int)lifc_flags;
15587c478bd9Sstevel@tonic-gate 	lifc.lifc_len = bufsize;
15597c478bd9Sstevel@tonic-gate 	lifc.lifc_buf = buf;
15607c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) {
15617c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get configured interfaces");
15627c478bd9Sstevel@tonic-gate 		ret_code = -1;
15637c478bd9Sstevel@tonic-gate 		goto bad;
15647c478bd9Sstevel@tonic-gate 	}
15657c478bd9Sstevel@tonic-gate 	lifrp = lifc.lifc_req;
15667c478bd9Sstevel@tonic-gate 	for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, lifrp++) {
15677c478bd9Sstevel@tonic-gate 		(void) close(s);
15687c478bd9Sstevel@tonic-gate 		if ((s = socket(lifrp->lifr_addr.ss_family, SOCK_DGRAM, 0)) <
15697c478bd9Sstevel@tonic-gate 		    0) {
15707c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not get socket",
15717c478bd9Sstevel@tonic-gate 			    lifrl.lifr_name);
15727c478bd9Sstevel@tonic-gate 			ret_code = -1;
15737c478bd9Sstevel@tonic-gate 			continue;
15747c478bd9Sstevel@tonic-gate 		}
15757c478bd9Sstevel@tonic-gate 		(void) memset(&lifrl, 0, sizeof (lifrl));
15767c478bd9Sstevel@tonic-gate 		(void) strncpy(lifrl.lifr_name, lifrp->lifr_name,
15777c478bd9Sstevel@tonic-gate 		    sizeof (lifrl.lifr_name));
15787c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifrl) < 0) {
15797c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
15807c478bd9Sstevel@tonic-gate 			    "%s: could not determine zone interface belongs to",
15817c478bd9Sstevel@tonic-gate 			    lifrl.lifr_name);
15827c478bd9Sstevel@tonic-gate 			ret_code = -1;
15837c478bd9Sstevel@tonic-gate 			continue;
15847c478bd9Sstevel@tonic-gate 		}
15857c478bd9Sstevel@tonic-gate 		if (lifrl.lifr_zoneid == zone_id) {
15867c478bd9Sstevel@tonic-gate 			if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) {
15877c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_TRUE,
15887c478bd9Sstevel@tonic-gate 				    "%s: could not remove interface",
15897c478bd9Sstevel@tonic-gate 				    lifrl.lifr_name);
15907c478bd9Sstevel@tonic-gate 				ret_code = -1;
15917c478bd9Sstevel@tonic-gate 				continue;
15927c478bd9Sstevel@tonic-gate 			}
15937c478bd9Sstevel@tonic-gate 		}
15947c478bd9Sstevel@tonic-gate 	}
15957c478bd9Sstevel@tonic-gate bad:
15967c478bd9Sstevel@tonic-gate 	if (s > 0)
15977c478bd9Sstevel@tonic-gate 		(void) close(s);
15987c478bd9Sstevel@tonic-gate 	if (buf)
15997c478bd9Sstevel@tonic-gate 		free(buf);
16007c478bd9Sstevel@tonic-gate 	return (ret_code);
16017c478bd9Sstevel@tonic-gate }
16027c478bd9Sstevel@tonic-gate 
16037c478bd9Sstevel@tonic-gate static union	sockunion {
16047c478bd9Sstevel@tonic-gate 	struct	sockaddr sa;
16057c478bd9Sstevel@tonic-gate 	struct	sockaddr_in sin;
16067c478bd9Sstevel@tonic-gate 	struct	sockaddr_dl sdl;
16077c478bd9Sstevel@tonic-gate 	struct	sockaddr_in6 sin6;
16087c478bd9Sstevel@tonic-gate } so_dst, so_ifp;
16097c478bd9Sstevel@tonic-gate 
16107c478bd9Sstevel@tonic-gate static struct {
16117c478bd9Sstevel@tonic-gate 	struct	rt_msghdr hdr;
16127c478bd9Sstevel@tonic-gate 	char	space[512];
16137c478bd9Sstevel@tonic-gate } rtmsg;
16147c478bd9Sstevel@tonic-gate 
16157c478bd9Sstevel@tonic-gate static int
16167c478bd9Sstevel@tonic-gate salen(struct sockaddr *sa)
16177c478bd9Sstevel@tonic-gate {
16187c478bd9Sstevel@tonic-gate 	switch (sa->sa_family) {
16197c478bd9Sstevel@tonic-gate 	case AF_INET:
16207c478bd9Sstevel@tonic-gate 		return (sizeof (struct sockaddr_in));
16217c478bd9Sstevel@tonic-gate 	case AF_LINK:
16227c478bd9Sstevel@tonic-gate 		return (sizeof (struct sockaddr_dl));
16237c478bd9Sstevel@tonic-gate 	case AF_INET6:
16247c478bd9Sstevel@tonic-gate 		return (sizeof (struct sockaddr_in6));
16257c478bd9Sstevel@tonic-gate 	default:
16267c478bd9Sstevel@tonic-gate 		return (sizeof (struct sockaddr));
16277c478bd9Sstevel@tonic-gate 	}
16287c478bd9Sstevel@tonic-gate }
16297c478bd9Sstevel@tonic-gate 
16307c478bd9Sstevel@tonic-gate #define	ROUNDUP_LONG(a) \
16317c478bd9Sstevel@tonic-gate 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
16327c478bd9Sstevel@tonic-gate 
16337c478bd9Sstevel@tonic-gate /*
16347c478bd9Sstevel@tonic-gate  * Look up which zone is using a given IP address.  The address in question
16357c478bd9Sstevel@tonic-gate  * is expected to have been stuffed into the structure to which lifr points
16367c478bd9Sstevel@tonic-gate  * via a previous SIOCGLIFADDR ioctl().
16377c478bd9Sstevel@tonic-gate  *
16387c478bd9Sstevel@tonic-gate  * This is done using black router socket magic.
16397c478bd9Sstevel@tonic-gate  *
16407c478bd9Sstevel@tonic-gate  * Return the name of the zone on success or NULL on failure.
16417c478bd9Sstevel@tonic-gate  *
16427c478bd9Sstevel@tonic-gate  * This is a lot of code for a simple task; a new ioctl request to take care
16437c478bd9Sstevel@tonic-gate  * of this might be a useful RFE.
16447c478bd9Sstevel@tonic-gate  */
16457c478bd9Sstevel@tonic-gate 
16467c478bd9Sstevel@tonic-gate static char *
16477c478bd9Sstevel@tonic-gate who_is_using(zlog_t *zlogp, struct lifreq *lifr)
16487c478bd9Sstevel@tonic-gate {
16497c478bd9Sstevel@tonic-gate 	static char answer[ZONENAME_MAX];
16507c478bd9Sstevel@tonic-gate 	pid_t pid;
16517c478bd9Sstevel@tonic-gate 	int s, rlen, l, i;
16527c478bd9Sstevel@tonic-gate 	char *cp = rtmsg.space;
16537c478bd9Sstevel@tonic-gate 	struct sockaddr_dl *ifp = NULL;
16547c478bd9Sstevel@tonic-gate 	struct sockaddr *sa;
16557c478bd9Sstevel@tonic-gate 	char save_if_name[LIFNAMSIZ];
16567c478bd9Sstevel@tonic-gate 
16577c478bd9Sstevel@tonic-gate 	answer[0] = '\0';
16587c478bd9Sstevel@tonic-gate 
16597c478bd9Sstevel@tonic-gate 	pid = getpid();
16607c478bd9Sstevel@tonic-gate 	if ((s = socket(PF_ROUTE, SOCK_RAW, 0)) < 0) {
16617c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get routing socket");
16627c478bd9Sstevel@tonic-gate 		return (NULL);
16637c478bd9Sstevel@tonic-gate 	}
16647c478bd9Sstevel@tonic-gate 
16657c478bd9Sstevel@tonic-gate 	if (lifr->lifr_addr.ss_family == AF_INET) {
16667c478bd9Sstevel@tonic-gate 		struct sockaddr_in *sin4;
16677c478bd9Sstevel@tonic-gate 
16687c478bd9Sstevel@tonic-gate 		so_dst.sa.sa_family = AF_INET;
16697c478bd9Sstevel@tonic-gate 		sin4 = (struct sockaddr_in *)&lifr->lifr_addr;
16707c478bd9Sstevel@tonic-gate 		so_dst.sin.sin_addr = sin4->sin_addr;
16717c478bd9Sstevel@tonic-gate 	} else {
16727c478bd9Sstevel@tonic-gate 		struct sockaddr_in6 *sin6;
16737c478bd9Sstevel@tonic-gate 
16747c478bd9Sstevel@tonic-gate 		so_dst.sa.sa_family = AF_INET6;
16757c478bd9Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr;
16767c478bd9Sstevel@tonic-gate 		so_dst.sin6.sin6_addr = sin6->sin6_addr;
16777c478bd9Sstevel@tonic-gate 	}
16787c478bd9Sstevel@tonic-gate 
16797c478bd9Sstevel@tonic-gate 	so_ifp.sa.sa_family = AF_LINK;
16807c478bd9Sstevel@tonic-gate 
16817c478bd9Sstevel@tonic-gate 	(void) memset(&rtmsg, 0, sizeof (rtmsg));
16827c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_type = RTM_GET;
16837c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_flags = RTF_UP | RTF_HOST;
16847c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_version = RTM_VERSION;
16857c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_seq = ++rts_seqno;
16867c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_addrs = RTA_IFP | RTA_DST;
16877c478bd9Sstevel@tonic-gate 
16887c478bd9Sstevel@tonic-gate 	l = ROUNDUP_LONG(salen(&so_dst.sa));
16897c478bd9Sstevel@tonic-gate 	(void) memmove(cp, &(so_dst), l);
16907c478bd9Sstevel@tonic-gate 	cp += l;
16917c478bd9Sstevel@tonic-gate 	l = ROUNDUP_LONG(salen(&so_ifp.sa));
16927c478bd9Sstevel@tonic-gate 	(void) memmove(cp, &(so_ifp), l);
16937c478bd9Sstevel@tonic-gate 	cp += l;
16947c478bd9Sstevel@tonic-gate 
16957c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_msglen = l = cp - (char *)&rtmsg;
16967c478bd9Sstevel@tonic-gate 
16977c478bd9Sstevel@tonic-gate 	if ((rlen = write(s, &rtmsg, l)) < 0) {
16987c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "writing to routing socket");
16997c478bd9Sstevel@tonic-gate 		return (NULL);
17007c478bd9Sstevel@tonic-gate 	} else if (rlen < (int)rtmsg.hdr.rtm_msglen) {
17017c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
17027c478bd9Sstevel@tonic-gate 		    "write to routing socket got only %d for len\n", rlen);
17037c478bd9Sstevel@tonic-gate 		return (NULL);
17047c478bd9Sstevel@tonic-gate 	}
17057c478bd9Sstevel@tonic-gate 	do {
17067c478bd9Sstevel@tonic-gate 		l = read(s, &rtmsg, sizeof (rtmsg));
17077c478bd9Sstevel@tonic-gate 	} while (l > 0 && (rtmsg.hdr.rtm_seq != rts_seqno ||
17087c478bd9Sstevel@tonic-gate 	    rtmsg.hdr.rtm_pid != pid));
17097c478bd9Sstevel@tonic-gate 	if (l < 0) {
17107c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "reading from routing socket");
17117c478bd9Sstevel@tonic-gate 		return (NULL);
17127c478bd9Sstevel@tonic-gate 	}
17137c478bd9Sstevel@tonic-gate 
17147c478bd9Sstevel@tonic-gate 	if (rtmsg.hdr.rtm_version != RTM_VERSION) {
17157c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
17167c478bd9Sstevel@tonic-gate 		    "routing message version %d not understood",
17177c478bd9Sstevel@tonic-gate 		    rtmsg.hdr.rtm_version);
17187c478bd9Sstevel@tonic-gate 		return (NULL);
17197c478bd9Sstevel@tonic-gate 	}
17207c478bd9Sstevel@tonic-gate 	if (rtmsg.hdr.rtm_msglen != (ushort_t)l) {
17217c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "message length mismatch, "
17227c478bd9Sstevel@tonic-gate 		    "expected %d bytes, returned %d bytes",
17237c478bd9Sstevel@tonic-gate 		    rtmsg.hdr.rtm_msglen, l);
17247c478bd9Sstevel@tonic-gate 		return (NULL);
17257c478bd9Sstevel@tonic-gate 	}
17267c478bd9Sstevel@tonic-gate 	if (rtmsg.hdr.rtm_errno != 0)  {
17277c478bd9Sstevel@tonic-gate 		errno = rtmsg.hdr.rtm_errno;
17287c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "RTM_GET routing socket message");
17297c478bd9Sstevel@tonic-gate 		return (NULL);
17307c478bd9Sstevel@tonic-gate 	}
17317c478bd9Sstevel@tonic-gate 	if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) {
17327c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "interface not found");
17337c478bd9Sstevel@tonic-gate 		return (NULL);
17347c478bd9Sstevel@tonic-gate 	}
17357c478bd9Sstevel@tonic-gate 	cp = ((char *)(&rtmsg.hdr + 1));
17367c478bd9Sstevel@tonic-gate 	for (i = 1; i != 0; i <<= 1) {
17377c478bd9Sstevel@tonic-gate 		/* LINTED E_BAD_PTR_CAST_ALIGN */
17387c478bd9Sstevel@tonic-gate 		sa = (struct sockaddr *)cp;
17397c478bd9Sstevel@tonic-gate 		if (i != RTA_IFP) {
17407c478bd9Sstevel@tonic-gate 			if ((i & rtmsg.hdr.rtm_addrs) != 0)
17417c478bd9Sstevel@tonic-gate 				cp += ROUNDUP_LONG(salen(sa));
17427c478bd9Sstevel@tonic-gate 			continue;
17437c478bd9Sstevel@tonic-gate 		}
17447c478bd9Sstevel@tonic-gate 		if (sa->sa_family == AF_LINK &&
17457c478bd9Sstevel@tonic-gate 		    ((struct sockaddr_dl *)sa)->sdl_nlen != 0)
17467c478bd9Sstevel@tonic-gate 			ifp = (struct sockaddr_dl *)sa;
17477c478bd9Sstevel@tonic-gate 		break;
17487c478bd9Sstevel@tonic-gate 	}
17497c478bd9Sstevel@tonic-gate 	if (ifp == NULL) {
17507c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "interface could not be determined");
17517c478bd9Sstevel@tonic-gate 		return (NULL);
17527c478bd9Sstevel@tonic-gate 	}
17537c478bd9Sstevel@tonic-gate 
17547c478bd9Sstevel@tonic-gate 	/*
17557c478bd9Sstevel@tonic-gate 	 * We need to set the I/F name to what we got above, then do the
17567c478bd9Sstevel@tonic-gate 	 * appropriate ioctl to get its zone name.  But lifr->lifr_name is
17577c478bd9Sstevel@tonic-gate 	 * used by the calling function to do a REMOVEIF, so if we leave the
17587c478bd9Sstevel@tonic-gate 	 * "good" zone's I/F name in place, *that* I/F will be removed instead
17597c478bd9Sstevel@tonic-gate 	 * of the bad one.  So we save the old (bad) I/F name before over-
17607c478bd9Sstevel@tonic-gate 	 * writing it and doing the ioctl, then restore it after the ioctl.
17617c478bd9Sstevel@tonic-gate 	 */
17627c478bd9Sstevel@tonic-gate 	(void) strlcpy(save_if_name, lifr->lifr_name, sizeof (save_if_name));
17637c478bd9Sstevel@tonic-gate 	(void) strncpy(lifr->lifr_name, ifp->sdl_data, ifp->sdl_nlen);
17647c478bd9Sstevel@tonic-gate 	lifr->lifr_name[ifp->sdl_nlen] = '\0';
17657c478bd9Sstevel@tonic-gate 	i = ioctl(s, SIOCGLIFZONE, lifr);
17667c478bd9Sstevel@tonic-gate 	(void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name));
17677c478bd9Sstevel@tonic-gate 	if (i < 0) {
17687c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
17697c478bd9Sstevel@tonic-gate 		    "%s: could not determine the zone interface belongs to",
17707c478bd9Sstevel@tonic-gate 		    lifr->lifr_name);
17717c478bd9Sstevel@tonic-gate 		return (NULL);
17727c478bd9Sstevel@tonic-gate 	}
17737c478bd9Sstevel@tonic-gate 	if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0)
17747c478bd9Sstevel@tonic-gate 		(void) snprintf(answer, sizeof (answer), "%d",
17757c478bd9Sstevel@tonic-gate 		    lifr->lifr_zoneid);
17767c478bd9Sstevel@tonic-gate 
17777c478bd9Sstevel@tonic-gate 	if (strlen(answer) > 0)
17787c478bd9Sstevel@tonic-gate 		return (answer);
17797c478bd9Sstevel@tonic-gate 	return (NULL);
17807c478bd9Sstevel@tonic-gate }
17817c478bd9Sstevel@tonic-gate 
17827c478bd9Sstevel@tonic-gate typedef struct mcast_rtmsg_s {
17837c478bd9Sstevel@tonic-gate 	struct rt_msghdr	m_rtm;
17847c478bd9Sstevel@tonic-gate 	union {
17857c478bd9Sstevel@tonic-gate 		struct {
17867c478bd9Sstevel@tonic-gate 			struct sockaddr_in	m_dst;
17877c478bd9Sstevel@tonic-gate 			struct sockaddr_in	m_gw;
17887c478bd9Sstevel@tonic-gate 			struct sockaddr_in	m_netmask;
17897c478bd9Sstevel@tonic-gate 		} m_v4;
17907c478bd9Sstevel@tonic-gate 		struct {
17917c478bd9Sstevel@tonic-gate 			struct sockaddr_in6	m_dst;
17927c478bd9Sstevel@tonic-gate 			struct sockaddr_in6	m_gw;
17937c478bd9Sstevel@tonic-gate 			struct sockaddr_in6	m_netmask;
17947c478bd9Sstevel@tonic-gate 		} m_v6;
17957c478bd9Sstevel@tonic-gate 	} m_u;
17967c478bd9Sstevel@tonic-gate } mcast_rtmsg_t;
17977c478bd9Sstevel@tonic-gate #define	m_dst4		m_u.m_v4.m_dst
17987c478bd9Sstevel@tonic-gate #define	m_dst6		m_u.m_v6.m_dst
17997c478bd9Sstevel@tonic-gate #define	m_gw4		m_u.m_v4.m_gw
18007c478bd9Sstevel@tonic-gate #define	m_gw6		m_u.m_v6.m_gw
18017c478bd9Sstevel@tonic-gate #define	m_netmask4	m_u.m_v4.m_netmask
18027c478bd9Sstevel@tonic-gate #define	m_netmask6	m_u.m_v6.m_netmask
18037c478bd9Sstevel@tonic-gate 
18047c478bd9Sstevel@tonic-gate /*
18057c478bd9Sstevel@tonic-gate  * Configures a single interface: a new virtual interface is added, based on
18067c478bd9Sstevel@tonic-gate  * the physical interface nwiftabptr->zone_nwif_physical, with the address
18077c478bd9Sstevel@tonic-gate  * specified in nwiftabptr->zone_nwif_address, for zone zone_id.  Note that
18087c478bd9Sstevel@tonic-gate  * the "address" can be an IPv6 address (with a /prefixlength required), an
18097c478bd9Sstevel@tonic-gate  * IPv4 address (with a /prefixlength optional), or a name; for the latter,
18107c478bd9Sstevel@tonic-gate  * an IPv4 name-to-address resolution will be attempted.
18117c478bd9Sstevel@tonic-gate  *
18127c478bd9Sstevel@tonic-gate  * A default interface route for multicast is created on the first IPv4 and
18137c478bd9Sstevel@tonic-gate  * IPv6 interfaces (that have the IFF_MULTICAST flag set), respectively.
18147c478bd9Sstevel@tonic-gate  * This should really be done in the init scripts if we ever allow zones to
18157c478bd9Sstevel@tonic-gate  * modify the routing tables.
18167c478bd9Sstevel@tonic-gate  *
18177c478bd9Sstevel@tonic-gate  * If anything goes wrong, we log an detailed error message, attempt to tear
18187c478bd9Sstevel@tonic-gate  * down whatever we set up and return an error.
18197c478bd9Sstevel@tonic-gate  */
18207c478bd9Sstevel@tonic-gate static int
18217c478bd9Sstevel@tonic-gate configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
18227c478bd9Sstevel@tonic-gate     struct zone_nwiftab *nwiftabptr, boolean_t *mcast_rt_v4_setp,
18237c478bd9Sstevel@tonic-gate     boolean_t *mcast_rt_v6_setp)
18247c478bd9Sstevel@tonic-gate {
18257c478bd9Sstevel@tonic-gate 	struct lifreq lifr;
18267c478bd9Sstevel@tonic-gate 	struct sockaddr_in netmask4;
18277c478bd9Sstevel@tonic-gate 	struct sockaddr_in6 netmask6;
18287c478bd9Sstevel@tonic-gate 	struct in_addr in4;
18297c478bd9Sstevel@tonic-gate 	struct in6_addr in6;
18307c478bd9Sstevel@tonic-gate 	sa_family_t af;
18317c478bd9Sstevel@tonic-gate 	char *slashp = strchr(nwiftabptr->zone_nwif_address, '/');
18327c478bd9Sstevel@tonic-gate 	mcast_rtmsg_t mcast_rtmsg;
18337c478bd9Sstevel@tonic-gate 	int s;
18347c478bd9Sstevel@tonic-gate 	int rs;
18357c478bd9Sstevel@tonic-gate 	int rlen;
18367c478bd9Sstevel@tonic-gate 	boolean_t got_netmask = B_FALSE;
18377c478bd9Sstevel@tonic-gate 	char addrstr4[INET_ADDRSTRLEN];
18387c478bd9Sstevel@tonic-gate 	int res;
18397c478bd9Sstevel@tonic-gate 
18407c478bd9Sstevel@tonic-gate 	res = zonecfg_valid_net_address(nwiftabptr->zone_nwif_address, &lifr);
18417c478bd9Sstevel@tonic-gate 	if (res != Z_OK) {
18427c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s: %s", zonecfg_strerror(res),
18437c478bd9Sstevel@tonic-gate 		    nwiftabptr->zone_nwif_address);
18447c478bd9Sstevel@tonic-gate 		return (-1);
18457c478bd9Sstevel@tonic-gate 	}
18467c478bd9Sstevel@tonic-gate 	af = lifr.lifr_addr.ss_family;
18477c478bd9Sstevel@tonic-gate 	if (af == AF_INET)
18487c478bd9Sstevel@tonic-gate 		in4 = ((struct sockaddr_in *)(&lifr.lifr_addr))->sin_addr;
18497c478bd9Sstevel@tonic-gate 	else
18507c478bd9Sstevel@tonic-gate 		in6 = ((struct sockaddr_in6 *)(&lifr.lifr_addr))->sin6_addr;
18517c478bd9Sstevel@tonic-gate 
18527c478bd9Sstevel@tonic-gate 	if ((s = socket(af, SOCK_DGRAM, 0)) < 0) {
18537c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get socket");
18547c478bd9Sstevel@tonic-gate 		return (-1);
18557c478bd9Sstevel@tonic-gate 	}
18567c478bd9Sstevel@tonic-gate 
18577c478bd9Sstevel@tonic-gate 	(void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical,
18587c478bd9Sstevel@tonic-gate 	    sizeof (lifr.lifr_name));
18597c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) {
186022321485Svp157776 		/*
186122321485Svp157776 		 * Here, we know that the interface can't be brought up.
186222321485Svp157776 		 * A similar warning message was already printed out to
186322321485Svp157776 		 * the console by zoneadm(1M) so instead we log the
186422321485Svp157776 		 * message to syslog and continue.
186522321485Svp157776 		 */
186622321485Svp157776 		zerror(&logsys, B_TRUE, "WARNING: skipping interface "
186722321485Svp157776 		    "'%s' which may not be present/plumbed in the "
186822321485Svp157776 		    "global zone.", lifr.lifr_name);
18697c478bd9Sstevel@tonic-gate 		(void) close(s);
187022321485Svp157776 		return (Z_OK);
18717c478bd9Sstevel@tonic-gate 	}
18727c478bd9Sstevel@tonic-gate 
18737c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) {
18747c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
18757c478bd9Sstevel@tonic-gate 		    "%s: could not set IP address to %s",
18767c478bd9Sstevel@tonic-gate 		    lifr.lifr_name, nwiftabptr->zone_nwif_address);
18777c478bd9Sstevel@tonic-gate 		goto bad;
18787c478bd9Sstevel@tonic-gate 	}
18797c478bd9Sstevel@tonic-gate 
18807c478bd9Sstevel@tonic-gate 	/* Preserve literal IPv4 address for later potential printing. */
18817c478bd9Sstevel@tonic-gate 	if (af == AF_INET)
18827c478bd9Sstevel@tonic-gate 		(void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN);
18837c478bd9Sstevel@tonic-gate 
18847c478bd9Sstevel@tonic-gate 	lifr.lifr_zoneid = zone_id;
18857c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) {
18867c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s: could not place interface into zone",
18877c478bd9Sstevel@tonic-gate 		    lifr.lifr_name);
18887c478bd9Sstevel@tonic-gate 		goto bad;
18897c478bd9Sstevel@tonic-gate 	}
18907c478bd9Sstevel@tonic-gate 
18917c478bd9Sstevel@tonic-gate 	if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) {
18927c478bd9Sstevel@tonic-gate 		got_netmask = B_TRUE;	/* default setting will be correct */
18937c478bd9Sstevel@tonic-gate 	} else {
18947c478bd9Sstevel@tonic-gate 		if (af == AF_INET) {
18957c478bd9Sstevel@tonic-gate 			/*
18967c478bd9Sstevel@tonic-gate 			 * The IPv4 netmask can be determined either
18977c478bd9Sstevel@tonic-gate 			 * directly if a prefix length was supplied with
18987c478bd9Sstevel@tonic-gate 			 * the address or via the netmasks database.  Not
18997c478bd9Sstevel@tonic-gate 			 * being able to determine it is a common failure,
19007c478bd9Sstevel@tonic-gate 			 * but it often is not fatal to operation of the
19017c478bd9Sstevel@tonic-gate 			 * interface.  In that case, a warning will be
19027c478bd9Sstevel@tonic-gate 			 * printed after the rest of the interface's
19037c478bd9Sstevel@tonic-gate 			 * parameters have been configured.
19047c478bd9Sstevel@tonic-gate 			 */
19057c478bd9Sstevel@tonic-gate 			(void) memset(&netmask4, 0, sizeof (netmask4));
19067c478bd9Sstevel@tonic-gate 			if (slashp != NULL) {
19077c478bd9Sstevel@tonic-gate 				if (addr2netmask(slashp + 1, V4_ADDR_LEN,
19087c478bd9Sstevel@tonic-gate 				    (uchar_t *)&netmask4.sin_addr) != 0) {
19097c478bd9Sstevel@tonic-gate 					*slashp = '/';
19107c478bd9Sstevel@tonic-gate 					zerror(zlogp, B_FALSE,
19117c478bd9Sstevel@tonic-gate 					    "%s: invalid prefix length in %s",
19127c478bd9Sstevel@tonic-gate 					    lifr.lifr_name,
19137c478bd9Sstevel@tonic-gate 					    nwiftabptr->zone_nwif_address);
19147c478bd9Sstevel@tonic-gate 					goto bad;
19157c478bd9Sstevel@tonic-gate 				}
19167c478bd9Sstevel@tonic-gate 				got_netmask = B_TRUE;
19177c478bd9Sstevel@tonic-gate 			} else if (getnetmaskbyaddr(in4,
19187c478bd9Sstevel@tonic-gate 			    &netmask4.sin_addr) == 0) {
19197c478bd9Sstevel@tonic-gate 				got_netmask = B_TRUE;
19207c478bd9Sstevel@tonic-gate 			}
19217c478bd9Sstevel@tonic-gate 			if (got_netmask) {
19227c478bd9Sstevel@tonic-gate 				netmask4.sin_family = af;
19237c478bd9Sstevel@tonic-gate 				(void) memcpy(&lifr.lifr_addr, &netmask4,
19247c478bd9Sstevel@tonic-gate 				    sizeof (netmask4));
19257c478bd9Sstevel@tonic-gate 			}
19267c478bd9Sstevel@tonic-gate 		} else {
19277c478bd9Sstevel@tonic-gate 			(void) memset(&netmask6, 0, sizeof (netmask6));
19287c478bd9Sstevel@tonic-gate 			if (addr2netmask(slashp + 1, V6_ADDR_LEN,
19297c478bd9Sstevel@tonic-gate 			    (uchar_t *)&netmask6.sin6_addr) != 0) {
19307c478bd9Sstevel@tonic-gate 				*slashp = '/';
19317c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
19327c478bd9Sstevel@tonic-gate 				    "%s: invalid prefix length in %s",
19337c478bd9Sstevel@tonic-gate 				    lifr.lifr_name,
19347c478bd9Sstevel@tonic-gate 				    nwiftabptr->zone_nwif_address);
19357c478bd9Sstevel@tonic-gate 				goto bad;
19367c478bd9Sstevel@tonic-gate 			}
19377c478bd9Sstevel@tonic-gate 			got_netmask = B_TRUE;
19387c478bd9Sstevel@tonic-gate 			netmask6.sin6_family = af;
19397c478bd9Sstevel@tonic-gate 			(void) memcpy(&lifr.lifr_addr, &netmask6,
19407c478bd9Sstevel@tonic-gate 			    sizeof (netmask6));
19417c478bd9Sstevel@tonic-gate 		}
19427c478bd9Sstevel@tonic-gate 		if (got_netmask &&
19437c478bd9Sstevel@tonic-gate 		    ioctl(s, SIOCSLIFNETMASK, (caddr_t)&lifr) < 0) {
19447c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not set netmask",
19457c478bd9Sstevel@tonic-gate 			    lifr.lifr_name);
19467c478bd9Sstevel@tonic-gate 			goto bad;
19477c478bd9Sstevel@tonic-gate 		}
19487c478bd9Sstevel@tonic-gate 
19497c478bd9Sstevel@tonic-gate 		/*
19507c478bd9Sstevel@tonic-gate 		 * This doesn't set the broadcast address at all. Rather, it
19517c478bd9Sstevel@tonic-gate 		 * gets, then sets the interface's address, relying on the fact
19527c478bd9Sstevel@tonic-gate 		 * that resetting the address will reset the broadcast address.
19537c478bd9Sstevel@tonic-gate 		 */
19547c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) {
19557c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not get address",
19567c478bd9Sstevel@tonic-gate 			    lifr.lifr_name);
19577c478bd9Sstevel@tonic-gate 			goto bad;
19587c478bd9Sstevel@tonic-gate 		}
19597c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) {
19607c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
19617c478bd9Sstevel@tonic-gate 			    "%s: could not reset broadcast address",
19627c478bd9Sstevel@tonic-gate 			    lifr.lifr_name);
19637c478bd9Sstevel@tonic-gate 			goto bad;
19647c478bd9Sstevel@tonic-gate 		}
19657c478bd9Sstevel@tonic-gate 	}
19667c478bd9Sstevel@tonic-gate 
19677c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
19687c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s: could not get flags",
19697c478bd9Sstevel@tonic-gate 		    lifr.lifr_name);
19707c478bd9Sstevel@tonic-gate 		goto bad;
19717c478bd9Sstevel@tonic-gate 	}
19727c478bd9Sstevel@tonic-gate 	lifr.lifr_flags |= IFF_UP;
19737c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
19747c478bd9Sstevel@tonic-gate 		int save_errno = errno;
19757c478bd9Sstevel@tonic-gate 		char *zone_using;
19767c478bd9Sstevel@tonic-gate 
19777c478bd9Sstevel@tonic-gate 		/*
19787c478bd9Sstevel@tonic-gate 		 * If we failed with something other than EADDRNOTAVAIL,
19797c478bd9Sstevel@tonic-gate 		 * then skip to the end.  Otherwise, look up our address,
19807c478bd9Sstevel@tonic-gate 		 * then call a function to determine which zone is already
19817c478bd9Sstevel@tonic-gate 		 * using that address.
19827c478bd9Sstevel@tonic-gate 		 */
19837c478bd9Sstevel@tonic-gate 		if (errno != EADDRNOTAVAIL) {
19847c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
19857c478bd9Sstevel@tonic-gate 			    "%s: could not bring interface up", lifr.lifr_name);
19867c478bd9Sstevel@tonic-gate 			goto bad;
19877c478bd9Sstevel@tonic-gate 		}
19887c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) {
19897c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not get address",
19907c478bd9Sstevel@tonic-gate 			    lifr.lifr_name);
19917c478bd9Sstevel@tonic-gate 			goto bad;
19927c478bd9Sstevel@tonic-gate 		}
19937c478bd9Sstevel@tonic-gate 		zone_using = who_is_using(zlogp, &lifr);
19947c478bd9Sstevel@tonic-gate 		errno = save_errno;
19957c478bd9Sstevel@tonic-gate 		if (zone_using == NULL)
19967c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
19977c478bd9Sstevel@tonic-gate 			    "%s: could not bring interface up", lifr.lifr_name);
19987c478bd9Sstevel@tonic-gate 		else
19997c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not bring interface "
20007c478bd9Sstevel@tonic-gate 			    "up: address in use by zone '%s'", lifr.lifr_name,
20017c478bd9Sstevel@tonic-gate 			    zone_using);
20027c478bd9Sstevel@tonic-gate 		goto bad;
20037c478bd9Sstevel@tonic-gate 	}
20047c478bd9Sstevel@tonic-gate 	if ((lifr.lifr_flags & IFF_MULTICAST) && ((af == AF_INET &&
20057c478bd9Sstevel@tonic-gate 	    mcast_rt_v4_setp != NULL && *mcast_rt_v4_setp == B_FALSE) ||
20067c478bd9Sstevel@tonic-gate 	    (af == AF_INET6 &&
20077c478bd9Sstevel@tonic-gate 	    mcast_rt_v6_setp != NULL && *mcast_rt_v6_setp == B_FALSE))) {
20087c478bd9Sstevel@tonic-gate 		rs = socket(PF_ROUTE, SOCK_RAW, 0);
20097c478bd9Sstevel@tonic-gate 		if (rs < 0) {
20107c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not create "
20117c478bd9Sstevel@tonic-gate 			    "routing socket", lifr.lifr_name);
20127c478bd9Sstevel@tonic-gate 			goto bad;
20137c478bd9Sstevel@tonic-gate 		}
20147c478bd9Sstevel@tonic-gate 		(void) shutdown(rs, 0);
20157c478bd9Sstevel@tonic-gate 		(void) memset((void *)&mcast_rtmsg, 0, sizeof (mcast_rtmsg_t));
20167c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_msglen =  sizeof (struct rt_msghdr) +
20177c478bd9Sstevel@tonic-gate 		    3 * (af == AF_INET ? sizeof (struct sockaddr_in) :
20187c478bd9Sstevel@tonic-gate 		    sizeof (struct sockaddr_in6));
20197c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_version = RTM_VERSION;
20207c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_type = RTM_ADD;
20217c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_flags = RTF_UP;
20227c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_addrs =
20237c478bd9Sstevel@tonic-gate 		    RTA_DST | RTA_GATEWAY | RTA_NETMASK;
20247c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_seq = ++rts_seqno;
20257c478bd9Sstevel@tonic-gate 		if (af == AF_INET) {
20267c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_dst4.sin_family = AF_INET;
20277c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_dst4.sin_addr.s_addr =
20287c478bd9Sstevel@tonic-gate 			    htonl(INADDR_UNSPEC_GROUP);
20297c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_gw4.sin_family = AF_INET;
20307c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_gw4.sin_addr = in4;
20317c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_netmask4.sin_family = AF_INET;
20327c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_netmask4.sin_addr.s_addr =
20337c478bd9Sstevel@tonic-gate 			    htonl(IN_CLASSD_NET);
20347c478bd9Sstevel@tonic-gate 		} else {
20357c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_dst6.sin6_family = AF_INET6;
20367c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_dst6.sin6_addr.s6_addr[0] = 0xffU;
20377c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_gw6.sin6_family = AF_INET6;
20387c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_gw6.sin6_addr = in6;
20397c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_netmask6.sin6_family = AF_INET6;
20407c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_netmask6.sin6_addr.s6_addr[0] = 0xffU;
20417c478bd9Sstevel@tonic-gate 		}
20427c478bd9Sstevel@tonic-gate 		rlen = write(rs, (char *)&mcast_rtmsg,
20437c478bd9Sstevel@tonic-gate 		    mcast_rtmsg.m_rtm.rtm_msglen);
204422321485Svp157776 		/*
204522321485Svp157776 		 * The write to the multicast socket will fail if the
204622321485Svp157776 		 * interface belongs to a failed IPMP group. This is a
204722321485Svp157776 		 * non-fatal error and the zone will continue booting.
204822321485Svp157776 		 * While the zone is running, if any interface in the
204922321485Svp157776 		 * failed IPMP group recovers, the zone will fallback to
205022321485Svp157776 		 * using that interface.
205122321485Svp157776 		 */
20527c478bd9Sstevel@tonic-gate 		if (rlen < mcast_rtmsg.m_rtm.rtm_msglen) {
20537c478bd9Sstevel@tonic-gate 			if (rlen < 0) {
205422321485Svp157776 				zerror(zlogp, B_TRUE, "WARNING: interface "
205522321485Svp157776 				    "'%s' not available as default for "
205622321485Svp157776 				    "multicast.", lifr.lifr_name);
20577c478bd9Sstevel@tonic-gate 			} else {
205822321485Svp157776 				zerror(zlogp, B_FALSE, "WARNING: interface "
205922321485Svp157776 				    "'%s' not available as default for "
206022321485Svp157776 				    "multicast; routing socket returned "
206122321485Svp157776 				    "unexpected %d bytes.",
206222321485Svp157776 				    lifr.lifr_name, rlen);
20637c478bd9Sstevel@tonic-gate 			}
206422321485Svp157776 		} else {
206522321485Svp157776 
20667c478bd9Sstevel@tonic-gate 			if (af == AF_INET) {
20677c478bd9Sstevel@tonic-gate 				*mcast_rt_v4_setp = B_TRUE;
20687c478bd9Sstevel@tonic-gate 			} else {
20697c478bd9Sstevel@tonic-gate 				*mcast_rt_v6_setp = B_TRUE;
20707c478bd9Sstevel@tonic-gate 			}
207122321485Svp157776 		}
20727c478bd9Sstevel@tonic-gate 		(void) close(rs);
20737c478bd9Sstevel@tonic-gate 	}
20747c478bd9Sstevel@tonic-gate 
20757c478bd9Sstevel@tonic-gate 	if (!got_netmask) {
20767c478bd9Sstevel@tonic-gate 		/*
20777c478bd9Sstevel@tonic-gate 		 * A common, but often non-fatal problem, is that the system
20787c478bd9Sstevel@tonic-gate 		 * cannot find the netmask for an interface address. This is
20797c478bd9Sstevel@tonic-gate 		 * often caused by it being only in /etc/inet/netmasks, but
20807c478bd9Sstevel@tonic-gate 		 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not
20817c478bd9Sstevel@tonic-gate 		 * in that. This doesn't show up at boot because the netmask
20827c478bd9Sstevel@tonic-gate 		 * is obtained from /etc/inet/netmasks when no network
20837c478bd9Sstevel@tonic-gate 		 * interfaces are up, but isn't consulted when NIS/NIS+ is
20847c478bd9Sstevel@tonic-gate 		 * available. We warn the user here that something like this
20857c478bd9Sstevel@tonic-gate 		 * has happened and we're just running with a default and
20867c478bd9Sstevel@tonic-gate 		 * possible incorrect netmask.
20877c478bd9Sstevel@tonic-gate 		 */
20887c478bd9Sstevel@tonic-gate 		char buffer[INET6_ADDRSTRLEN];
20897c478bd9Sstevel@tonic-gate 		void  *addr;
20907c478bd9Sstevel@tonic-gate 
20917c478bd9Sstevel@tonic-gate 		if (af == AF_INET)
20927c478bd9Sstevel@tonic-gate 			addr = &((struct sockaddr_in *)
20937c478bd9Sstevel@tonic-gate 			    (&lifr.lifr_addr))->sin_addr;
20947c478bd9Sstevel@tonic-gate 		else
20957c478bd9Sstevel@tonic-gate 			addr = &((struct sockaddr_in6 *)
20967c478bd9Sstevel@tonic-gate 			    (&lifr.lifr_addr))->sin6_addr;
20977c478bd9Sstevel@tonic-gate 
20987c478bd9Sstevel@tonic-gate 		/* Find out what netmask interface is going to be using */
20997c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 ||
21007c478bd9Sstevel@tonic-gate 		    inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL)
21017c478bd9Sstevel@tonic-gate 			goto bad;
21027c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
21037c478bd9Sstevel@tonic-gate 		    "WARNING: %s: no matching subnet found in netmasks(4) for "
21047c478bd9Sstevel@tonic-gate 		    "%s; using default of %s.",
21057c478bd9Sstevel@tonic-gate 		    lifr.lifr_name, addrstr4, buffer);
21067c478bd9Sstevel@tonic-gate 	}
21077c478bd9Sstevel@tonic-gate 
21087c478bd9Sstevel@tonic-gate 	(void) close(s);
21097c478bd9Sstevel@tonic-gate 	return (Z_OK);
21107c478bd9Sstevel@tonic-gate bad:
21117c478bd9Sstevel@tonic-gate 	(void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr);
21127c478bd9Sstevel@tonic-gate 	(void) close(s);
21137c478bd9Sstevel@tonic-gate 	return (-1);
21147c478bd9Sstevel@tonic-gate }
21157c478bd9Sstevel@tonic-gate 
21167c478bd9Sstevel@tonic-gate /*
21177c478bd9Sstevel@tonic-gate  * Sets up network interfaces based on information from the zone configuration.
21187c478bd9Sstevel@tonic-gate  * An IPv4 loopback interface is set up "for free", modeling the global system.
21197c478bd9Sstevel@tonic-gate  * If any of the configuration interfaces were IPv6, then an IPv6 loopback
21207c478bd9Sstevel@tonic-gate  * address is set up as well.
21217c478bd9Sstevel@tonic-gate  *
21227c478bd9Sstevel@tonic-gate  * If anything goes wrong, we log a general error message, attempt to tear down
21237c478bd9Sstevel@tonic-gate  * whatever we set up, and return an error.
21247c478bd9Sstevel@tonic-gate  */
21257c478bd9Sstevel@tonic-gate static int
21267c478bd9Sstevel@tonic-gate configure_network_interfaces(zlog_t *zlogp)
21277c478bd9Sstevel@tonic-gate {
21287c478bd9Sstevel@tonic-gate 	zone_dochandle_t handle;
21297c478bd9Sstevel@tonic-gate 	struct zone_nwiftab nwiftab, loopback_iftab;
21307c478bd9Sstevel@tonic-gate 	boolean_t saw_v6 = B_FALSE;
21317c478bd9Sstevel@tonic-gate 	boolean_t mcast_rt_v4_set = B_FALSE;
21327c478bd9Sstevel@tonic-gate 	boolean_t mcast_rt_v6_set = B_FALSE;
21337c478bd9Sstevel@tonic-gate 	zoneid_t zoneid;
21347c478bd9Sstevel@tonic-gate 
21357c478bd9Sstevel@tonic-gate 	if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) {
21367c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to get zoneid");
21377c478bd9Sstevel@tonic-gate 		return (-1);
21387c478bd9Sstevel@tonic-gate 	}
21397c478bd9Sstevel@tonic-gate 
21407c478bd9Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
21417c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
21427c478bd9Sstevel@tonic-gate 		return (-1);
21437c478bd9Sstevel@tonic-gate 	}
21447c478bd9Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
21457c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
21467c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
21477c478bd9Sstevel@tonic-gate 		return (-1);
21487c478bd9Sstevel@tonic-gate 	}
21497c478bd9Sstevel@tonic-gate 	if (zonecfg_setnwifent(handle) == Z_OK) {
21507c478bd9Sstevel@tonic-gate 		for (;;) {
21517c478bd9Sstevel@tonic-gate 			struct in6_addr in6;
21527c478bd9Sstevel@tonic-gate 
21537c478bd9Sstevel@tonic-gate 			if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
21547c478bd9Sstevel@tonic-gate 				break;
21557c478bd9Sstevel@tonic-gate 			if (configure_one_interface(zlogp, zoneid,
21567c478bd9Sstevel@tonic-gate 			    &nwiftab, &mcast_rt_v4_set, &mcast_rt_v6_set) !=
21577c478bd9Sstevel@tonic-gate 			    Z_OK) {
21587c478bd9Sstevel@tonic-gate 				(void) zonecfg_endnwifent(handle);
21597c478bd9Sstevel@tonic-gate 				zonecfg_fini_handle(handle);
21607c478bd9Sstevel@tonic-gate 				return (-1);
21617c478bd9Sstevel@tonic-gate 			}
21627c478bd9Sstevel@tonic-gate 			if (inet_pton(AF_INET6, nwiftab.zone_nwif_address,
21637c478bd9Sstevel@tonic-gate 			    &in6) == 1)
21647c478bd9Sstevel@tonic-gate 				saw_v6 = B_TRUE;
21657c478bd9Sstevel@tonic-gate 		}
21667c478bd9Sstevel@tonic-gate 		(void) zonecfg_endnwifent(handle);
21677c478bd9Sstevel@tonic-gate 	}
21687c478bd9Sstevel@tonic-gate 	zonecfg_fini_handle(handle);
21697c478bd9Sstevel@tonic-gate 	(void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0",
21707c478bd9Sstevel@tonic-gate 	    sizeof (loopback_iftab.zone_nwif_physical));
21717c478bd9Sstevel@tonic-gate 	(void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1",
21727c478bd9Sstevel@tonic-gate 	    sizeof (loopback_iftab.zone_nwif_address));
21737c478bd9Sstevel@tonic-gate 	if (configure_one_interface(zlogp, zoneid, &loopback_iftab, NULL, NULL)
21747c478bd9Sstevel@tonic-gate 	    != Z_OK) {
21757c478bd9Sstevel@tonic-gate 		return (-1);
21767c478bd9Sstevel@tonic-gate 	}
21777c478bd9Sstevel@tonic-gate 	if (saw_v6) {
21787c478bd9Sstevel@tonic-gate 		(void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128",
21797c478bd9Sstevel@tonic-gate 		    sizeof (loopback_iftab.zone_nwif_address));
21807c478bd9Sstevel@tonic-gate 		if (configure_one_interface(zlogp, zoneid,
21817c478bd9Sstevel@tonic-gate 		    &loopback_iftab, NULL, NULL) != Z_OK) {
21827c478bd9Sstevel@tonic-gate 			return (-1);
21837c478bd9Sstevel@tonic-gate 		}
21847c478bd9Sstevel@tonic-gate 	}
21857c478bd9Sstevel@tonic-gate 	return (0);
21867c478bd9Sstevel@tonic-gate }
21877c478bd9Sstevel@tonic-gate 
21887c478bd9Sstevel@tonic-gate static int
21897c478bd9Sstevel@tonic-gate tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid,
21907c478bd9Sstevel@tonic-gate     const struct sockaddr_storage *local, const struct sockaddr_storage *remote)
21917c478bd9Sstevel@tonic-gate {
21927c478bd9Sstevel@tonic-gate 	int fd;
21937c478bd9Sstevel@tonic-gate 	struct strioctl ioc;
21947c478bd9Sstevel@tonic-gate 	tcp_ioc_abort_conn_t conn;
21957c478bd9Sstevel@tonic-gate 	int error;
21967c478bd9Sstevel@tonic-gate 
21977c478bd9Sstevel@tonic-gate 	conn.ac_local = *local;
21987c478bd9Sstevel@tonic-gate 	conn.ac_remote = *remote;
21997c478bd9Sstevel@tonic-gate 	conn.ac_start = TCPS_SYN_SENT;
22007c478bd9Sstevel@tonic-gate 	conn.ac_end = TCPS_TIME_WAIT;
22017c478bd9Sstevel@tonic-gate 	conn.ac_zoneid = zoneid;
22027c478bd9Sstevel@tonic-gate 
22037c478bd9Sstevel@tonic-gate 	ioc.ic_cmd = TCP_IOC_ABORT_CONN;
22047c478bd9Sstevel@tonic-gate 	ioc.ic_timout = -1; /* infinite timeout */
22057c478bd9Sstevel@tonic-gate 	ioc.ic_len = sizeof (conn);
22067c478bd9Sstevel@tonic-gate 	ioc.ic_dp = (char *)&conn;
22077c478bd9Sstevel@tonic-gate 
22087c478bd9Sstevel@tonic-gate 	if ((fd = open("/dev/tcp", O_RDONLY)) < 0) {
22097c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to open %s", "/dev/tcp");
22107c478bd9Sstevel@tonic-gate 		return (-1);
22117c478bd9Sstevel@tonic-gate 	}
22127c478bd9Sstevel@tonic-gate 
22137c478bd9Sstevel@tonic-gate 	error = ioctl(fd, I_STR, &ioc);
22147c478bd9Sstevel@tonic-gate 	(void) close(fd);
22157c478bd9Sstevel@tonic-gate 	if (error == 0 || errno == ENOENT)	/* ENOENT is not an error */
22167c478bd9Sstevel@tonic-gate 		return (0);
22177c478bd9Sstevel@tonic-gate 	return (-1);
22187c478bd9Sstevel@tonic-gate }
22197c478bd9Sstevel@tonic-gate 
22207c478bd9Sstevel@tonic-gate static int
22217c478bd9Sstevel@tonic-gate tcp_abort_connections(zlog_t *zlogp, zoneid_t zoneid)
22227c478bd9Sstevel@tonic-gate {
22237c478bd9Sstevel@tonic-gate 	struct sockaddr_storage l, r;
22247c478bd9Sstevel@tonic-gate 	struct sockaddr_in *local, *remote;
22257c478bd9Sstevel@tonic-gate 	struct sockaddr_in6 *local6, *remote6;
22267c478bd9Sstevel@tonic-gate 	int error;
22277c478bd9Sstevel@tonic-gate 
22287c478bd9Sstevel@tonic-gate 	/*
22297c478bd9Sstevel@tonic-gate 	 * Abort IPv4 connections.
22307c478bd9Sstevel@tonic-gate 	 */
22317c478bd9Sstevel@tonic-gate 	bzero(&l, sizeof (*local));
22327c478bd9Sstevel@tonic-gate 	local = (struct sockaddr_in *)&l;
22337c478bd9Sstevel@tonic-gate 	local->sin_family = AF_INET;
22347c478bd9Sstevel@tonic-gate 	local->sin_addr.s_addr = INADDR_ANY;
22357c478bd9Sstevel@tonic-gate 	local->sin_port = 0;
22367c478bd9Sstevel@tonic-gate 
22377c478bd9Sstevel@tonic-gate 	bzero(&r, sizeof (*remote));
22387c478bd9Sstevel@tonic-gate 	remote = (struct sockaddr_in *)&r;
22397c478bd9Sstevel@tonic-gate 	remote->sin_family = AF_INET;
22407c478bd9Sstevel@tonic-gate 	remote->sin_addr.s_addr = INADDR_ANY;
22417c478bd9Sstevel@tonic-gate 	remote->sin_port = 0;
22427c478bd9Sstevel@tonic-gate 
22437c478bd9Sstevel@tonic-gate 	if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
22447c478bd9Sstevel@tonic-gate 		return (error);
22457c478bd9Sstevel@tonic-gate 
22467c478bd9Sstevel@tonic-gate 	/*
22477c478bd9Sstevel@tonic-gate 	 * Abort IPv6 connections.
22487c478bd9Sstevel@tonic-gate 	 */
22497c478bd9Sstevel@tonic-gate 	bzero(&l, sizeof (*local6));
22507c478bd9Sstevel@tonic-gate 	local6 = (struct sockaddr_in6 *)&l;
22517c478bd9Sstevel@tonic-gate 	local6->sin6_family = AF_INET6;
22527c478bd9Sstevel@tonic-gate 	local6->sin6_port = 0;
22537c478bd9Sstevel@tonic-gate 	local6->sin6_addr = in6addr_any;
22547c478bd9Sstevel@tonic-gate 
22557c478bd9Sstevel@tonic-gate 	bzero(&r, sizeof (*remote6));
22567c478bd9Sstevel@tonic-gate 	remote6 = (struct sockaddr_in6 *)&r;
22577c478bd9Sstevel@tonic-gate 	remote6->sin6_family = AF_INET6;
22587c478bd9Sstevel@tonic-gate 	remote6->sin6_port = 0;
22597c478bd9Sstevel@tonic-gate 	remote6->sin6_addr = in6addr_any;
22607c478bd9Sstevel@tonic-gate 
22617c478bd9Sstevel@tonic-gate 	if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
22627c478bd9Sstevel@tonic-gate 		return (error);
22637c478bd9Sstevel@tonic-gate 	return (0);
22647c478bd9Sstevel@tonic-gate }
22657c478bd9Sstevel@tonic-gate 
22667c478bd9Sstevel@tonic-gate static int
2267ffbafc53Scomay get_privset(zlog_t *zlogp, priv_set_t *privs, boolean_t mount_cmd)
2268ffbafc53Scomay {
2269ffbafc53Scomay 	int error = -1;
2270ffbafc53Scomay 	zone_dochandle_t handle;
2271ffbafc53Scomay 	char *privname = NULL;
2272ffbafc53Scomay 
2273ffbafc53Scomay 	if (mount_cmd) {
2274ffbafc53Scomay 		if (zonecfg_default_privset(privs) == Z_OK)
2275ffbafc53Scomay 			return (0);
2276ffbafc53Scomay 		zerror(zlogp, B_FALSE,
2277ffbafc53Scomay 		    "failed to determine the zone's default privilege set");
2278ffbafc53Scomay 		return (-1);
2279ffbafc53Scomay 	}
2280ffbafc53Scomay 
2281ffbafc53Scomay 	if ((handle = zonecfg_init_handle()) == NULL) {
2282ffbafc53Scomay 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2283ffbafc53Scomay 		return (-1);
2284ffbafc53Scomay 	}
2285ffbafc53Scomay 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2286ffbafc53Scomay 		zerror(zlogp, B_FALSE, "invalid configuration");
2287ffbafc53Scomay 		zonecfg_fini_handle(handle);
2288ffbafc53Scomay 		return (-1);
2289ffbafc53Scomay 	}
2290ffbafc53Scomay 
2291ffbafc53Scomay 	switch (zonecfg_get_privset(handle, privs, &privname)) {
2292ffbafc53Scomay 	case Z_OK:
2293ffbafc53Scomay 		error = 0;
2294ffbafc53Scomay 		break;
2295ffbafc53Scomay 	case Z_PRIV_PROHIBITED:
2296ffbafc53Scomay 		zerror(zlogp, B_FALSE, "privilege \"%s\" is not permitted "
2297ffbafc53Scomay 		    "within the zone's privilege set", privname);
2298ffbafc53Scomay 		break;
2299ffbafc53Scomay 	case Z_PRIV_REQUIRED:
2300ffbafc53Scomay 		zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing "
2301ffbafc53Scomay 		    "from the zone's privilege set", privname);
2302ffbafc53Scomay 		break;
2303ffbafc53Scomay 	case Z_PRIV_UNKNOWN:
2304ffbafc53Scomay 		zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified "
2305ffbafc53Scomay 		    "in the zone's privilege set", privname);
2306ffbafc53Scomay 		break;
2307ffbafc53Scomay 	default:
2308ffbafc53Scomay 		zerror(zlogp, B_FALSE, "failed to determine the zone's "
2309ffbafc53Scomay 		    "privilege set");
2310ffbafc53Scomay 		break;
2311ffbafc53Scomay 	}
2312ffbafc53Scomay 
2313ffbafc53Scomay 	free(privname);
2314ffbafc53Scomay 	zonecfg_fini_handle(handle);
2315ffbafc53Scomay 	return (error);
2316ffbafc53Scomay }
2317ffbafc53Scomay 
2318ffbafc53Scomay static int
23197c478bd9Sstevel@tonic-gate get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
23207c478bd9Sstevel@tonic-gate {
23217c478bd9Sstevel@tonic-gate 	nvlist_t *nvl = NULL;
23227c478bd9Sstevel@tonic-gate 	char *nvl_packed = NULL;
23237c478bd9Sstevel@tonic-gate 	size_t nvl_size = 0;
23247c478bd9Sstevel@tonic-gate 	nvlist_t **nvlv = NULL;
23257c478bd9Sstevel@tonic-gate 	int rctlcount = 0;
23267c478bd9Sstevel@tonic-gate 	int error = -1;
23277c478bd9Sstevel@tonic-gate 	zone_dochandle_t handle;
23287c478bd9Sstevel@tonic-gate 	struct zone_rctltab rctltab;
23297c478bd9Sstevel@tonic-gate 	rctlblk_t *rctlblk = NULL;
23307c478bd9Sstevel@tonic-gate 
23317c478bd9Sstevel@tonic-gate 	*bufp = NULL;
23327c478bd9Sstevel@tonic-gate 	*bufsizep = 0;
23337c478bd9Sstevel@tonic-gate 
23347c478bd9Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
23357c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
23367c478bd9Sstevel@tonic-gate 		return (-1);
23377c478bd9Sstevel@tonic-gate 	}
23387c478bd9Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
23397c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
23407c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
23417c478bd9Sstevel@tonic-gate 		return (-1);
23427c478bd9Sstevel@tonic-gate 	}
23437c478bd9Sstevel@tonic-gate 
23447c478bd9Sstevel@tonic-gate 	rctltab.zone_rctl_valptr = NULL;
23457c478bd9Sstevel@tonic-gate 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
23467c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
23477c478bd9Sstevel@tonic-gate 		goto out;
23487c478bd9Sstevel@tonic-gate 	}
23497c478bd9Sstevel@tonic-gate 
23507c478bd9Sstevel@tonic-gate 	if (zonecfg_setrctlent(handle) != Z_OK) {
23517c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
23527c478bd9Sstevel@tonic-gate 		goto out;
23537c478bd9Sstevel@tonic-gate 	}
23547c478bd9Sstevel@tonic-gate 
23557c478bd9Sstevel@tonic-gate 	if ((rctlblk = malloc(rctlblk_size())) == NULL) {
23567c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
23577c478bd9Sstevel@tonic-gate 		goto out;
23587c478bd9Sstevel@tonic-gate 	}
23597c478bd9Sstevel@tonic-gate 	while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
23607c478bd9Sstevel@tonic-gate 		struct zone_rctlvaltab *rctlval;
23617c478bd9Sstevel@tonic-gate 		uint_t i, count;
23627c478bd9Sstevel@tonic-gate 		const char *name = rctltab.zone_rctl_name;
23637c478bd9Sstevel@tonic-gate 
23647c478bd9Sstevel@tonic-gate 		/* zoneadm should have already warned about unknown rctls. */
23657c478bd9Sstevel@tonic-gate 		if (!zonecfg_is_rctl(name)) {
23667c478bd9Sstevel@tonic-gate 			zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
23677c478bd9Sstevel@tonic-gate 			rctltab.zone_rctl_valptr = NULL;
23687c478bd9Sstevel@tonic-gate 			continue;
23697c478bd9Sstevel@tonic-gate 		}
23707c478bd9Sstevel@tonic-gate 		count = 0;
23717c478bd9Sstevel@tonic-gate 		for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
23727c478bd9Sstevel@tonic-gate 		    rctlval = rctlval->zone_rctlval_next) {
23737c478bd9Sstevel@tonic-gate 			count++;
23747c478bd9Sstevel@tonic-gate 		}
23757c478bd9Sstevel@tonic-gate 		if (count == 0) {	/* ignore */
23767c478bd9Sstevel@tonic-gate 			continue;	/* Nothing to free */
23777c478bd9Sstevel@tonic-gate 		}
23787c478bd9Sstevel@tonic-gate 		if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL)
23797c478bd9Sstevel@tonic-gate 			goto out;
23807c478bd9Sstevel@tonic-gate 		i = 0;
23817c478bd9Sstevel@tonic-gate 		for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
23827c478bd9Sstevel@tonic-gate 		    rctlval = rctlval->zone_rctlval_next, i++) {
23837c478bd9Sstevel@tonic-gate 			if (nvlist_alloc(&nvlv[i], NV_UNIQUE_NAME, 0) != 0) {
23847c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_TRUE, "%s failed",
23857c478bd9Sstevel@tonic-gate 				    "nvlist_alloc");
23867c478bd9Sstevel@tonic-gate 				goto out;
23877c478bd9Sstevel@tonic-gate 			}
23887c478bd9Sstevel@tonic-gate 			if (zonecfg_construct_rctlblk(rctlval, rctlblk)
23897c478bd9Sstevel@tonic-gate 			    != Z_OK) {
23907c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "invalid rctl value: "
23917c478bd9Sstevel@tonic-gate 				    "(priv=%s,limit=%s,action=%s)",
23927c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_priv,
23937c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_limit,
23947c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_action);
23957c478bd9Sstevel@tonic-gate 				goto out;
23967c478bd9Sstevel@tonic-gate 			}
23977c478bd9Sstevel@tonic-gate 			if (!zonecfg_valid_rctl(name, rctlblk)) {
23987c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
23997c478bd9Sstevel@tonic-gate 				    "(priv=%s,limit=%s,action=%s) is not a "
24007c478bd9Sstevel@tonic-gate 				    "valid value for rctl '%s'",
24017c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_priv,
24027c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_limit,
24037c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_action,
24047c478bd9Sstevel@tonic-gate 				    name);
24057c478bd9Sstevel@tonic-gate 				goto out;
24067c478bd9Sstevel@tonic-gate 			}
24077c478bd9Sstevel@tonic-gate 			if (nvlist_add_uint64(nvlv[i], "privilege",
24087c478bd9Sstevel@tonic-gate 			    rctlblk_get_privilege(rctlblk)) != 0) {
24097c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s failed",
24107c478bd9Sstevel@tonic-gate 				    "nvlist_add_uint64");
24117c478bd9Sstevel@tonic-gate 				goto out;
24127c478bd9Sstevel@tonic-gate 			}
24137c478bd9Sstevel@tonic-gate 			if (nvlist_add_uint64(nvlv[i], "limit",
24147c478bd9Sstevel@tonic-gate 			    rctlblk_get_value(rctlblk)) != 0) {
24157c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s failed",
24167c478bd9Sstevel@tonic-gate 				    "nvlist_add_uint64");
24177c478bd9Sstevel@tonic-gate 				goto out;
24187c478bd9Sstevel@tonic-gate 			}
24197c478bd9Sstevel@tonic-gate 			if (nvlist_add_uint64(nvlv[i], "action",
24207c478bd9Sstevel@tonic-gate 			    (uint_t)rctlblk_get_local_action(rctlblk, NULL))
24217c478bd9Sstevel@tonic-gate 			    != 0) {
24227c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s failed",
24237c478bd9Sstevel@tonic-gate 				    "nvlist_add_uint64");
24247c478bd9Sstevel@tonic-gate 				goto out;
24257c478bd9Sstevel@tonic-gate 			}
24267c478bd9Sstevel@tonic-gate 		}
24277c478bd9Sstevel@tonic-gate 		zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
24287c478bd9Sstevel@tonic-gate 		rctltab.zone_rctl_valptr = NULL;
24297c478bd9Sstevel@tonic-gate 		if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
24307c478bd9Sstevel@tonic-gate 		    != 0) {
24317c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s failed",
24327c478bd9Sstevel@tonic-gate 			    "nvlist_add_nvlist_array");
24337c478bd9Sstevel@tonic-gate 			goto out;
24347c478bd9Sstevel@tonic-gate 		}
24357c478bd9Sstevel@tonic-gate 		for (i = 0; i < count; i++)
24367c478bd9Sstevel@tonic-gate 			nvlist_free(nvlv[i]);
24377c478bd9Sstevel@tonic-gate 		free(nvlv);
24387c478bd9Sstevel@tonic-gate 		nvlv = NULL;
24397c478bd9Sstevel@tonic-gate 		rctlcount++;
24407c478bd9Sstevel@tonic-gate 	}
24417c478bd9Sstevel@tonic-gate 	(void) zonecfg_endrctlent(handle);
24427c478bd9Sstevel@tonic-gate 
24437c478bd9Sstevel@tonic-gate 	if (rctlcount == 0) {
24447c478bd9Sstevel@tonic-gate 		error = 0;
24457c478bd9Sstevel@tonic-gate 		goto out;
24467c478bd9Sstevel@tonic-gate 	}
24477c478bd9Sstevel@tonic-gate 	if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0)
24487c478bd9Sstevel@tonic-gate 	    != 0) {
24497c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack");
24507c478bd9Sstevel@tonic-gate 		goto out;
24517c478bd9Sstevel@tonic-gate 	}
24527c478bd9Sstevel@tonic-gate 
24537c478bd9Sstevel@tonic-gate 	error = 0;
24547c478bd9Sstevel@tonic-gate 	*bufp = nvl_packed;
24557c478bd9Sstevel@tonic-gate 	*bufsizep = nvl_size;
24567c478bd9Sstevel@tonic-gate 
24577c478bd9Sstevel@tonic-gate out:
24587c478bd9Sstevel@tonic-gate 	free(rctlblk);
24597c478bd9Sstevel@tonic-gate 	zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
24607c478bd9Sstevel@tonic-gate 	if (error && nvl_packed != NULL)
24617c478bd9Sstevel@tonic-gate 		free(nvl_packed);
24627c478bd9Sstevel@tonic-gate 	if (nvl != NULL)
24637c478bd9Sstevel@tonic-gate 		nvlist_free(nvl);
24647c478bd9Sstevel@tonic-gate 	if (nvlv != NULL)
24657c478bd9Sstevel@tonic-gate 		free(nvlv);
24667c478bd9Sstevel@tonic-gate 	if (handle != NULL)
24677c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
24687c478bd9Sstevel@tonic-gate 	return (error);
24697c478bd9Sstevel@tonic-gate }
24707c478bd9Sstevel@tonic-gate 
24717c478bd9Sstevel@tonic-gate static int
24727c478bd9Sstevel@tonic-gate get_zone_pool(zlog_t *zlogp, char *poolbuf, size_t bufsz)
24737c478bd9Sstevel@tonic-gate {
24747c478bd9Sstevel@tonic-gate 	zone_dochandle_t handle;
24757c478bd9Sstevel@tonic-gate 	int error;
24767c478bd9Sstevel@tonic-gate 
24777c478bd9Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
24787c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2479ffbafc53Scomay 		return (Z_NOMEM);
24807c478bd9Sstevel@tonic-gate 	}
2481ffbafc53Scomay 	error = zonecfg_get_snapshot_handle(zone_name, handle);
2482ffbafc53Scomay 	if (error != Z_OK) {
24837c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
24847c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
2485ffbafc53Scomay 		return (error);
24867c478bd9Sstevel@tonic-gate 	}
24877c478bd9Sstevel@tonic-gate 	error = zonecfg_get_pool(handle, poolbuf, bufsz);
24887c478bd9Sstevel@tonic-gate 	zonecfg_fini_handle(handle);
24897c478bd9Sstevel@tonic-gate 	return (error);
24907c478bd9Sstevel@tonic-gate }
24917c478bd9Sstevel@tonic-gate 
24927c478bd9Sstevel@tonic-gate static int
2493fa9e4066Sahrens get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
2494fa9e4066Sahrens {
2495fa9e4066Sahrens 	zone_dochandle_t handle;
2496fa9e4066Sahrens 	struct zone_dstab dstab;
2497fa9e4066Sahrens 	size_t total, offset, len;
2498fa9e4066Sahrens 	int error = -1;
2499fa9e4066Sahrens 	char *str;
2500fa9e4066Sahrens 
2501fa9e4066Sahrens 	*bufp = NULL;
2502fa9e4066Sahrens 	*bufsizep = 0;
2503fa9e4066Sahrens 
2504fa9e4066Sahrens 	if ((handle = zonecfg_init_handle()) == NULL) {
2505fa9e4066Sahrens 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2506fa9e4066Sahrens 		return (-1);
2507fa9e4066Sahrens 	}
2508fa9e4066Sahrens 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2509fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "invalid configuration");
2510fa9e4066Sahrens 		zonecfg_fini_handle(handle);
2511fa9e4066Sahrens 		return (-1);
2512fa9e4066Sahrens 	}
2513fa9e4066Sahrens 
2514fa9e4066Sahrens 	if (zonecfg_setdsent(handle) != Z_OK) {
2515fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
2516fa9e4066Sahrens 		goto out;
2517fa9e4066Sahrens 	}
2518fa9e4066Sahrens 
2519fa9e4066Sahrens 	total = 0;
2520fa9e4066Sahrens 	while (zonecfg_getdsent(handle, &dstab) == Z_OK)
2521fa9e4066Sahrens 		total += strlen(dstab.zone_dataset_name) + 1;
2522fa9e4066Sahrens 	(void) zonecfg_enddsent(handle);
2523fa9e4066Sahrens 
2524fa9e4066Sahrens 	if (total == 0) {
2525fa9e4066Sahrens 		error = 0;
2526fa9e4066Sahrens 		goto out;
2527fa9e4066Sahrens 	}
2528fa9e4066Sahrens 
2529fa9e4066Sahrens 	if ((str = malloc(total)) == NULL) {
2530fa9e4066Sahrens 		zerror(zlogp, B_TRUE, "memory allocation failed");
2531fa9e4066Sahrens 		goto out;
2532fa9e4066Sahrens 	}
2533fa9e4066Sahrens 
2534fa9e4066Sahrens 	if (zonecfg_setdsent(handle) != Z_OK) {
2535fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
2536fa9e4066Sahrens 		goto out;
2537fa9e4066Sahrens 	}
2538fa9e4066Sahrens 	offset = 0;
2539fa9e4066Sahrens 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
2540fa9e4066Sahrens 		len = strlen(dstab.zone_dataset_name);
2541fa9e4066Sahrens 		(void) strlcpy(str + offset, dstab.zone_dataset_name,
2542fa9e4066Sahrens 		    sizeof (dstab.zone_dataset_name) - offset);
2543fa9e4066Sahrens 		offset += len;
2544fa9e4066Sahrens 		if (offset != total - 1)
2545fa9e4066Sahrens 			str[offset++] = ',';
2546fa9e4066Sahrens 	}
2547fa9e4066Sahrens 	(void) zonecfg_enddsent(handle);
2548fa9e4066Sahrens 
2549fa9e4066Sahrens 	error = 0;
2550fa9e4066Sahrens 	*bufp = str;
2551fa9e4066Sahrens 	*bufsizep = total;
2552fa9e4066Sahrens 
2553fa9e4066Sahrens out:
2554fa9e4066Sahrens 	if (error != 0 && str != NULL)
2555fa9e4066Sahrens 		free(str);
2556fa9e4066Sahrens 	if (handle != NULL)
2557fa9e4066Sahrens 		zonecfg_fini_handle(handle);
2558fa9e4066Sahrens 
2559fa9e4066Sahrens 	return (error);
2560fa9e4066Sahrens }
2561fa9e4066Sahrens 
2562fa9e4066Sahrens static int
2563fa9e4066Sahrens validate_datasets(zlog_t *zlogp)
2564fa9e4066Sahrens {
2565fa9e4066Sahrens 	zone_dochandle_t handle;
2566fa9e4066Sahrens 	struct zone_dstab dstab;
2567fa9e4066Sahrens 	zfs_handle_t *zhp;
256899653d4eSeschrock 	libzfs_handle_t *hdl;
2569fa9e4066Sahrens 
2570fa9e4066Sahrens 	if ((handle = zonecfg_init_handle()) == NULL) {
2571fa9e4066Sahrens 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2572fa9e4066Sahrens 		return (-1);
2573fa9e4066Sahrens 	}
2574fa9e4066Sahrens 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2575fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "invalid configuration");
2576fa9e4066Sahrens 		zonecfg_fini_handle(handle);
2577fa9e4066Sahrens 		return (-1);
2578fa9e4066Sahrens 	}
2579fa9e4066Sahrens 
2580fa9e4066Sahrens 	if (zonecfg_setdsent(handle) != Z_OK) {
2581fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "invalid configuration");
2582fa9e4066Sahrens 		zonecfg_fini_handle(handle);
2583fa9e4066Sahrens 		return (-1);
2584fa9e4066Sahrens 	}
2585fa9e4066Sahrens 
258699653d4eSeschrock 	if ((hdl = libzfs_init()) == NULL) {
258799653d4eSeschrock 		zerror(zlogp, B_FALSE, "opening ZFS library");
258899653d4eSeschrock 		zonecfg_fini_handle(handle);
258999653d4eSeschrock 		return (-1);
259099653d4eSeschrock 	}
2591fa9e4066Sahrens 
2592fa9e4066Sahrens 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
2593fa9e4066Sahrens 
259499653d4eSeschrock 		if ((zhp = zfs_open(hdl, dstab.zone_dataset_name,
2595fa9e4066Sahrens 		    ZFS_TYPE_FILESYSTEM)) == NULL) {
2596fa9e4066Sahrens 			zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'",
2597fa9e4066Sahrens 			    dstab.zone_dataset_name);
2598fa9e4066Sahrens 			zonecfg_fini_handle(handle);
259999653d4eSeschrock 			libzfs_fini(hdl);
2600fa9e4066Sahrens 			return (-1);
2601fa9e4066Sahrens 		}
2602fa9e4066Sahrens 
2603fa9e4066Sahrens 		/*
2604fa9e4066Sahrens 		 * Automatically set the 'zoned' property.  We check the value
2605fa9e4066Sahrens 		 * first because we'll get EPERM if it is already set.
2606fa9e4066Sahrens 		 */
2607fa9e4066Sahrens 		if (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
2608*e9dbad6fSeschrock 		    zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_ZONED),
2609*e9dbad6fSeschrock 		    "on") != 0) {
2610fa9e4066Sahrens 			zerror(zlogp, B_FALSE, "cannot set 'zoned' "
2611fa9e4066Sahrens 			    "property for ZFS dataset '%s'\n",
2612fa9e4066Sahrens 			    dstab.zone_dataset_name);
2613fa9e4066Sahrens 			zonecfg_fini_handle(handle);
2614fa9e4066Sahrens 			zfs_close(zhp);
261599653d4eSeschrock 			libzfs_fini(hdl);
2616fa9e4066Sahrens 			return (-1);
2617fa9e4066Sahrens 		}
2618fa9e4066Sahrens 
2619fa9e4066Sahrens 		zfs_close(zhp);
2620fa9e4066Sahrens 	}
2621fa9e4066Sahrens 	(void) zonecfg_enddsent(handle);
2622fa9e4066Sahrens 
2623fa9e4066Sahrens 	zonecfg_fini_handle(handle);
262499653d4eSeschrock 	libzfs_fini(hdl);
2625fa9e4066Sahrens 
2626fa9e4066Sahrens 	return (0);
2627fa9e4066Sahrens }
2628fa9e4066Sahrens 
2629fa9e4066Sahrens static int
26307c478bd9Sstevel@tonic-gate bind_to_pool(zlog_t *zlogp, zoneid_t zoneid)
26317c478bd9Sstevel@tonic-gate {
26327c478bd9Sstevel@tonic-gate 	pool_conf_t *poolconf;
26337c478bd9Sstevel@tonic-gate 	pool_t *pool;
26347c478bd9Sstevel@tonic-gate 	char poolname[MAXPATHLEN];
26357c478bd9Sstevel@tonic-gate 	int status;
26367c478bd9Sstevel@tonic-gate 	int error;
26377c478bd9Sstevel@tonic-gate 
26387c478bd9Sstevel@tonic-gate 	/*
26397c478bd9Sstevel@tonic-gate 	 * Find the pool mentioned in the zone configuration, and bind to it.
26407c478bd9Sstevel@tonic-gate 	 */
26417c478bd9Sstevel@tonic-gate 	error = get_zone_pool(zlogp, poolname, sizeof (poolname));
26427c478bd9Sstevel@tonic-gate 	if (error == Z_NO_ENTRY || (error == Z_OK && strlen(poolname) == 0)) {
26437c478bd9Sstevel@tonic-gate 		/*
26447c478bd9Sstevel@tonic-gate 		 * The property is not set on the zone, so the pool
26457c478bd9Sstevel@tonic-gate 		 * should be bound to the default pool.  But that's
26467c478bd9Sstevel@tonic-gate 		 * already done by the kernel, so we can just return.
26477c478bd9Sstevel@tonic-gate 		 */
26487c478bd9Sstevel@tonic-gate 		return (0);
26497c478bd9Sstevel@tonic-gate 	}
26507c478bd9Sstevel@tonic-gate 	if (error != Z_OK) {
26517c478bd9Sstevel@tonic-gate 		/*
26527c478bd9Sstevel@tonic-gate 		 * Not an error, even though it shouldn't be happening.
26537c478bd9Sstevel@tonic-gate 		 */
26547c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
26557c478bd9Sstevel@tonic-gate 		    "WARNING: unable to retrieve default pool.");
26567c478bd9Sstevel@tonic-gate 		return (0);
26577c478bd9Sstevel@tonic-gate 	}
26587c478bd9Sstevel@tonic-gate 	/*
26597c478bd9Sstevel@tonic-gate 	 * Don't do anything if pools aren't enabled.
26607c478bd9Sstevel@tonic-gate 	 */
26617c478bd9Sstevel@tonic-gate 	if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED) {
26627c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: pools facility not active; "
26637c478bd9Sstevel@tonic-gate 		    "zone will not be bound to pool '%s'.", poolname);
26647c478bd9Sstevel@tonic-gate 		return (0);
26657c478bd9Sstevel@tonic-gate 	}
26667c478bd9Sstevel@tonic-gate 	/*
26677c478bd9Sstevel@tonic-gate 	 * Try to provide a sane error message if the requested pool doesn't
26687c478bd9Sstevel@tonic-gate 	 * exist.
26697c478bd9Sstevel@tonic-gate 	 */
26707c478bd9Sstevel@tonic-gate 	if ((poolconf = pool_conf_alloc()) == NULL) {
26717c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "pool_conf_alloc");
26727c478bd9Sstevel@tonic-gate 		return (-1);
26737c478bd9Sstevel@tonic-gate 	}
26747c478bd9Sstevel@tonic-gate 	if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) !=
26757c478bd9Sstevel@tonic-gate 	    PO_SUCCESS) {
26767c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "pool_conf_open");
26777c478bd9Sstevel@tonic-gate 		pool_conf_free(poolconf);
26787c478bd9Sstevel@tonic-gate 		return (-1);
26797c478bd9Sstevel@tonic-gate 	}
26807c478bd9Sstevel@tonic-gate 	pool = pool_get_pool(poolconf, poolname);
26817c478bd9Sstevel@tonic-gate 	(void) pool_conf_close(poolconf);
26827c478bd9Sstevel@tonic-gate 	pool_conf_free(poolconf);
26837c478bd9Sstevel@tonic-gate 	if (pool == NULL) {
26847c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: pool '%s' not found; "
26857c478bd9Sstevel@tonic-gate 		    "using default pool.", poolname);
26867c478bd9Sstevel@tonic-gate 		return (0);
26877c478bd9Sstevel@tonic-gate 	}
26887c478bd9Sstevel@tonic-gate 	/*
26897c478bd9Sstevel@tonic-gate 	 * Bind the zone to the pool.
26907c478bd9Sstevel@tonic-gate 	 */
26917c478bd9Sstevel@tonic-gate 	if (pool_set_binding(poolname, P_ZONEID, zoneid) != PO_SUCCESS) {
26927c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: unable to bind to pool '%s'; "
26937c478bd9Sstevel@tonic-gate 		    "using default pool.", poolname);
26947c478bd9Sstevel@tonic-gate 	}
26957c478bd9Sstevel@tonic-gate 	return (0);
26967c478bd9Sstevel@tonic-gate }
26977c478bd9Sstevel@tonic-gate 
269845916cd2Sjpk /*
269945916cd2Sjpk  * Mount lower level home directories into/from current zone
270045916cd2Sjpk  * Share exported directories specified in dfstab for zone
270145916cd2Sjpk  */
270245916cd2Sjpk static int
270345916cd2Sjpk tsol_mounts(zlog_t *zlogp, char *zone_name, char *rootpath)
270445916cd2Sjpk {
270545916cd2Sjpk 	zoneid_t *zids = NULL;
270645916cd2Sjpk 	priv_set_t *zid_privs;
270745916cd2Sjpk 	const priv_impl_info_t *ip = NULL;
270845916cd2Sjpk 	uint_t nzents_saved;
270945916cd2Sjpk 	uint_t nzents;
271045916cd2Sjpk 	int i;
271145916cd2Sjpk 	char readonly[] = "ro";
271245916cd2Sjpk 	struct zone_fstab lower_fstab;
271345916cd2Sjpk 	char *argv[4];
271445916cd2Sjpk 
271545916cd2Sjpk 	if (!is_system_labeled())
271645916cd2Sjpk 		return (0);
271745916cd2Sjpk 
271845916cd2Sjpk 	if (zid_label == NULL) {
271945916cd2Sjpk 		zid_label = m_label_alloc(MAC_LABEL);
272045916cd2Sjpk 		if (zid_label == NULL)
272145916cd2Sjpk 			return (-1);
272245916cd2Sjpk 	}
272345916cd2Sjpk 
272445916cd2Sjpk 	/* Make sure our zone has an /export/home dir */
272545916cd2Sjpk 	(void) make_one_dir(zlogp, rootpath, "/export/home",
272645916cd2Sjpk 	    DEFAULT_DIR_MODE);
272745916cd2Sjpk 
272845916cd2Sjpk 	lower_fstab.zone_fs_raw[0] = '\0';
272945916cd2Sjpk 	(void) strlcpy(lower_fstab.zone_fs_type, MNTTYPE_LOFS,
273045916cd2Sjpk 	    sizeof (lower_fstab.zone_fs_type));
273145916cd2Sjpk 	lower_fstab.zone_fs_options = NULL;
273245916cd2Sjpk 	(void) zonecfg_add_fs_option(&lower_fstab, readonly);
273345916cd2Sjpk 
273445916cd2Sjpk 	/*
273545916cd2Sjpk 	 * Get the list of zones from the kernel
273645916cd2Sjpk 	 */
273745916cd2Sjpk 	if (zone_list(NULL, &nzents) != 0) {
273845916cd2Sjpk 		zerror(zlogp, B_TRUE, "unable to list zones");
273945916cd2Sjpk 		zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
274045916cd2Sjpk 		return (-1);
274145916cd2Sjpk 	}
274245916cd2Sjpk again:
274345916cd2Sjpk 	if (nzents == 0) {
274445916cd2Sjpk 		zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
274545916cd2Sjpk 		return (-1);
274645916cd2Sjpk 	}
274745916cd2Sjpk 
274845916cd2Sjpk 	zids = malloc(nzents * sizeof (zoneid_t));
274945916cd2Sjpk 	if (zids == NULL) {
27503f2f09c1Sdp 		zerror(zlogp, B_TRUE, "memory allocation failed");
275145916cd2Sjpk 		return (-1);
275245916cd2Sjpk 	}
275345916cd2Sjpk 	nzents_saved = nzents;
275445916cd2Sjpk 
275545916cd2Sjpk 	if (zone_list(zids, &nzents) != 0) {
275645916cd2Sjpk 		zerror(zlogp, B_TRUE, "unable to list zones");
275745916cd2Sjpk 		zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
275845916cd2Sjpk 		free(zids);
275945916cd2Sjpk 		return (-1);
276045916cd2Sjpk 	}
276145916cd2Sjpk 	if (nzents != nzents_saved) {
276245916cd2Sjpk 		/* list changed, try again */
276345916cd2Sjpk 		free(zids);
276445916cd2Sjpk 		goto again;
276545916cd2Sjpk 	}
276645916cd2Sjpk 
276745916cd2Sjpk 	ip = getprivimplinfo();
276845916cd2Sjpk 	if ((zid_privs = priv_allocset()) == NULL) {
276945916cd2Sjpk 		zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
277045916cd2Sjpk 		zonecfg_free_fs_option_list(
277145916cd2Sjpk 		    lower_fstab.zone_fs_options);
277245916cd2Sjpk 		free(zids);
277345916cd2Sjpk 		return (-1);
277445916cd2Sjpk 	}
277545916cd2Sjpk 
277645916cd2Sjpk 	for (i = 0; i < nzents; i++) {
277745916cd2Sjpk 		char zid_name[ZONENAME_MAX];
277845916cd2Sjpk 		zone_state_t zid_state;
277945916cd2Sjpk 		char zid_rpath[MAXPATHLEN];
278045916cd2Sjpk 		struct stat stat_buf;
278145916cd2Sjpk 
278245916cd2Sjpk 		if (zids[i] == GLOBAL_ZONEID)
278345916cd2Sjpk 			continue;
278445916cd2Sjpk 
278545916cd2Sjpk 		if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1)
278645916cd2Sjpk 			continue;
278745916cd2Sjpk 
278845916cd2Sjpk 		/*
278945916cd2Sjpk 		 * Do special setup for the zone we are booting
279045916cd2Sjpk 		 */
279145916cd2Sjpk 		if (strcmp(zid_name, zone_name) == 0) {
279245916cd2Sjpk 			struct zone_fstab autofs_fstab;
279345916cd2Sjpk 			char map_path[MAXPATHLEN];
279445916cd2Sjpk 			int fd;
279545916cd2Sjpk 
279645916cd2Sjpk 			/*
279745916cd2Sjpk 			 * Create auto_home_<zone> map for this zone
279845916cd2Sjpk 			 * in the global zone. The local zone entry
279945916cd2Sjpk 			 * will be created by automount when the zone
280045916cd2Sjpk 			 * is booted.
280145916cd2Sjpk 			 */
280245916cd2Sjpk 
280345916cd2Sjpk 			(void) snprintf(autofs_fstab.zone_fs_special,
280445916cd2Sjpk 			    MAXPATHLEN, "auto_home_%s", zid_name);
280545916cd2Sjpk 
280645916cd2Sjpk 			(void) snprintf(autofs_fstab.zone_fs_dir, MAXPATHLEN,
280745916cd2Sjpk 			    "/zone/%s/home", zid_name);
280845916cd2Sjpk 
280945916cd2Sjpk 			(void) snprintf(map_path, sizeof (map_path),
281045916cd2Sjpk 			    "/etc/%s", autofs_fstab.zone_fs_special);
281145916cd2Sjpk 			/*
281245916cd2Sjpk 			 * If the map file doesn't exist create a template
281345916cd2Sjpk 			 */
281445916cd2Sjpk 			if ((fd = open(map_path, O_RDWR | O_CREAT | O_EXCL,
281545916cd2Sjpk 			    S_IRUSR | S_IWUSR | S_IRGRP| S_IROTH)) != -1) {
281645916cd2Sjpk 				int len;
281745916cd2Sjpk 				char map_rec[MAXPATHLEN];
281845916cd2Sjpk 
281945916cd2Sjpk 				len = snprintf(map_rec, sizeof (map_rec),
282045916cd2Sjpk 				    "+%s\n*\t-fstype=lofs\t:%s/export/home/&\n",
282145916cd2Sjpk 				    autofs_fstab.zone_fs_special, rootpath);
282245916cd2Sjpk 				(void) write(fd, map_rec, len);
282345916cd2Sjpk 				(void) close(fd);
282445916cd2Sjpk 			}
282545916cd2Sjpk 
282645916cd2Sjpk 			/*
282745916cd2Sjpk 			 * Mount auto_home_<zone> in the global zone if absent.
282845916cd2Sjpk 			 * If it's already of type autofs, then
282945916cd2Sjpk 			 * don't mount it again.
283045916cd2Sjpk 			 */
283145916cd2Sjpk 			if ((stat(autofs_fstab.zone_fs_dir, &stat_buf) == -1) ||
283245916cd2Sjpk 			    strcmp(stat_buf.st_fstype, MNTTYPE_AUTOFS) != 0) {
283345916cd2Sjpk 				char optstr[] = "indirect,ignore,nobrowse";
283445916cd2Sjpk 
283545916cd2Sjpk 				(void) make_one_dir(zlogp, "",
283645916cd2Sjpk 				    autofs_fstab.zone_fs_dir, DEFAULT_DIR_MODE);
283745916cd2Sjpk 
283845916cd2Sjpk 				/*
283945916cd2Sjpk 				 * Mount will fail if automounter has already
284045916cd2Sjpk 				 * processed the auto_home_<zonename> map
284145916cd2Sjpk 				 */
284245916cd2Sjpk 				(void) domount(zlogp, MNTTYPE_AUTOFS, optstr,
284345916cd2Sjpk 				    autofs_fstab.zone_fs_special,
284445916cd2Sjpk 				    autofs_fstab.zone_fs_dir);
284545916cd2Sjpk 			}
284645916cd2Sjpk 			continue;
284745916cd2Sjpk 		}
284845916cd2Sjpk 
284945916cd2Sjpk 
285045916cd2Sjpk 		if (zone_get_state(zid_name, &zid_state) != Z_OK ||
285148451833Scarlsonj 		    (zid_state != ZONE_STATE_READY &&
285248451833Scarlsonj 		    zid_state != ZONE_STATE_RUNNING))
285345916cd2Sjpk 			/* Skip over zones without mounted filesystems */
285445916cd2Sjpk 			continue;
285545916cd2Sjpk 
285645916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label,
285745916cd2Sjpk 		    sizeof (m_label_t)) < 0)
285845916cd2Sjpk 			/* Skip over zones with unspecified label */
285945916cd2Sjpk 			continue;
286045916cd2Sjpk 
286145916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath,
286245916cd2Sjpk 		    sizeof (zid_rpath)) == -1)
286345916cd2Sjpk 			/* Skip over zones with bad path */
286445916cd2Sjpk 			continue;
286545916cd2Sjpk 
286645916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_PRIVSET, zid_privs,
286745916cd2Sjpk 		    sizeof (priv_chunk_t) * ip->priv_setsize) == -1)
286845916cd2Sjpk 			/* Skip over zones with bad privs */
286945916cd2Sjpk 			continue;
287045916cd2Sjpk 
287145916cd2Sjpk 		/*
287245916cd2Sjpk 		 * Reading down is valid according to our label model
287345916cd2Sjpk 		 * but some customers want to disable it because it
287445916cd2Sjpk 		 * allows execute down and other possible attacks.
287545916cd2Sjpk 		 * Therefore, we restrict this feature to zones that
287645916cd2Sjpk 		 * have the NET_MAC_AWARE privilege which is required
287745916cd2Sjpk 		 * for NFS read-down semantics.
287845916cd2Sjpk 		 */
287945916cd2Sjpk 		if ((bldominates(zlabel, zid_label)) &&
288045916cd2Sjpk 		    (priv_ismember(zprivs, PRIV_NET_MAC_AWARE))) {
288145916cd2Sjpk 			/*
288245916cd2Sjpk 			 * Our zone dominates this one.
288345916cd2Sjpk 			 * Create a lofs mount from lower zone's /export/home
288445916cd2Sjpk 			 */
288545916cd2Sjpk 			(void) snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN,
288645916cd2Sjpk 			    "%s/zone/%s/export/home", rootpath, zid_name);
288745916cd2Sjpk 
288845916cd2Sjpk 			/*
288945916cd2Sjpk 			 * If the target is already an LOFS mount
289045916cd2Sjpk 			 * then don't do it again.
289145916cd2Sjpk 			 */
289245916cd2Sjpk 			if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) ||
289345916cd2Sjpk 			    strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) {
289445916cd2Sjpk 
289545916cd2Sjpk 				if (snprintf(lower_fstab.zone_fs_special,
289645916cd2Sjpk 				    MAXPATHLEN, "%s/export",
289745916cd2Sjpk 				    zid_rpath) > MAXPATHLEN)
289845916cd2Sjpk 					continue;
289945916cd2Sjpk 
290045916cd2Sjpk 				/*
290145916cd2Sjpk 				 * Make sure the lower-level home exists
290245916cd2Sjpk 				 */
290345916cd2Sjpk 				if (make_one_dir(zlogp,
290445916cd2Sjpk 				    lower_fstab.zone_fs_special,
290545916cd2Sjpk 				    "/home", DEFAULT_DIR_MODE) != 0)
290645916cd2Sjpk 					continue;
290745916cd2Sjpk 
290845916cd2Sjpk 				(void) strlcat(lower_fstab.zone_fs_special,
290945916cd2Sjpk 				    "/home", MAXPATHLEN);
291045916cd2Sjpk 
291145916cd2Sjpk 				/*
291245916cd2Sjpk 				 * Mount can fail because the lower-level
291345916cd2Sjpk 				 * zone may have already done a mount up.
291445916cd2Sjpk 				 */
291545916cd2Sjpk 				(void) mount_one(zlogp, &lower_fstab, "");
291645916cd2Sjpk 			}
291745916cd2Sjpk 		} else if ((bldominates(zid_label, zlabel)) &&
291845916cd2Sjpk 		    (priv_ismember(zid_privs, PRIV_NET_MAC_AWARE))) {
291945916cd2Sjpk 			/*
292045916cd2Sjpk 			 * This zone dominates our zone.
292145916cd2Sjpk 			 * Create a lofs mount from our zone's /export/home
292245916cd2Sjpk 			 */
292345916cd2Sjpk 			if (snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN,
292445916cd2Sjpk 			    "%s/zone/%s/export/home", zid_rpath,
292545916cd2Sjpk 			    zone_name) > MAXPATHLEN)
292645916cd2Sjpk 				continue;
292745916cd2Sjpk 
292845916cd2Sjpk 			/*
292945916cd2Sjpk 			 * If the target is already an LOFS mount
293045916cd2Sjpk 			 * then don't do it again.
293145916cd2Sjpk 			 */
293245916cd2Sjpk 			if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) ||
293345916cd2Sjpk 			    strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) {
293445916cd2Sjpk 
293545916cd2Sjpk 				(void) snprintf(lower_fstab.zone_fs_special,
293645916cd2Sjpk 				    MAXPATHLEN, "%s/export/home", rootpath);
293745916cd2Sjpk 
293845916cd2Sjpk 				/*
293945916cd2Sjpk 				 * Mount can fail because the higher-level
294045916cd2Sjpk 				 * zone may have already done a mount down.
294145916cd2Sjpk 				 */
294245916cd2Sjpk 				(void) mount_one(zlogp, &lower_fstab, "");
294345916cd2Sjpk 			}
294445916cd2Sjpk 		}
294545916cd2Sjpk 	}
294645916cd2Sjpk 	zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
294745916cd2Sjpk 	priv_freeset(zid_privs);
294845916cd2Sjpk 	free(zids);
294945916cd2Sjpk 
295045916cd2Sjpk 	/*
295145916cd2Sjpk 	 * Now share any exported directories from this zone.
295245916cd2Sjpk 	 * Each zone can have its own dfstab.
295345916cd2Sjpk 	 */
295445916cd2Sjpk 
295545916cd2Sjpk 	argv[0] = "zoneshare";
295645916cd2Sjpk 	argv[1] = "-z";
295745916cd2Sjpk 	argv[2] = zone_name;
295845916cd2Sjpk 	argv[3] = NULL;
295945916cd2Sjpk 
296045916cd2Sjpk 	(void) forkexec(zlogp, "/usr/lib/zones/zoneshare", argv);
296145916cd2Sjpk 	/* Don't check for errors since they don't affect the zone */
296245916cd2Sjpk 
296345916cd2Sjpk 	return (0);
296445916cd2Sjpk }
296545916cd2Sjpk 
296645916cd2Sjpk /*
296745916cd2Sjpk  * Unmount lofs mounts from higher level zones
296845916cd2Sjpk  * Unshare nfs exported directories
296945916cd2Sjpk  */
297045916cd2Sjpk static void
297145916cd2Sjpk tsol_unmounts(zlog_t *zlogp, char *zone_name)
297245916cd2Sjpk {
297345916cd2Sjpk 	zoneid_t *zids = NULL;
297445916cd2Sjpk 	uint_t nzents_saved;
297545916cd2Sjpk 	uint_t nzents;
297645916cd2Sjpk 	int i;
297745916cd2Sjpk 	char *argv[4];
297845916cd2Sjpk 	char path[MAXPATHLEN];
297945916cd2Sjpk 
298045916cd2Sjpk 	if (!is_system_labeled())
298145916cd2Sjpk 		return;
298245916cd2Sjpk 
298345916cd2Sjpk 	/*
298445916cd2Sjpk 	 * Get the list of zones from the kernel
298545916cd2Sjpk 	 */
298645916cd2Sjpk 	if (zone_list(NULL, &nzents) != 0) {
298745916cd2Sjpk 		return;
298845916cd2Sjpk 	}
298945916cd2Sjpk 
299045916cd2Sjpk 	if (zid_label == NULL) {
299145916cd2Sjpk 		zid_label = m_label_alloc(MAC_LABEL);
299245916cd2Sjpk 		if (zid_label == NULL)
299345916cd2Sjpk 			return;
299445916cd2Sjpk 	}
299545916cd2Sjpk 
299645916cd2Sjpk again:
299745916cd2Sjpk 	if (nzents == 0)
299845916cd2Sjpk 		return;
299945916cd2Sjpk 
300045916cd2Sjpk 	zids = malloc(nzents * sizeof (zoneid_t));
300145916cd2Sjpk 	if (zids == NULL) {
30023f2f09c1Sdp 		zerror(zlogp, B_TRUE, "memory allocation failed");
300345916cd2Sjpk 		return;
300445916cd2Sjpk 	}
300545916cd2Sjpk 	nzents_saved = nzents;
300645916cd2Sjpk 
300745916cd2Sjpk 	if (zone_list(zids, &nzents) != 0) {
300845916cd2Sjpk 		free(zids);
300945916cd2Sjpk 		return;
301045916cd2Sjpk 	}
301145916cd2Sjpk 	if (nzents != nzents_saved) {
301245916cd2Sjpk 		/* list changed, try again */
301345916cd2Sjpk 		free(zids);
301445916cd2Sjpk 		goto again;
301545916cd2Sjpk 	}
301645916cd2Sjpk 
301745916cd2Sjpk 	for (i = 0; i < nzents; i++) {
301845916cd2Sjpk 		char zid_name[ZONENAME_MAX];
301945916cd2Sjpk 		zone_state_t zid_state;
302045916cd2Sjpk 		char zid_rpath[MAXPATHLEN];
302145916cd2Sjpk 
302245916cd2Sjpk 		if (zids[i] == GLOBAL_ZONEID)
302345916cd2Sjpk 			continue;
302445916cd2Sjpk 
302545916cd2Sjpk 		if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1)
302645916cd2Sjpk 			continue;
302745916cd2Sjpk 
302845916cd2Sjpk 		/*
302945916cd2Sjpk 		 * Skip the zone we are halting
303045916cd2Sjpk 		 */
303145916cd2Sjpk 		if (strcmp(zid_name, zone_name) == 0)
303245916cd2Sjpk 			continue;
303345916cd2Sjpk 
303445916cd2Sjpk 		if ((zone_getattr(zids[i], ZONE_ATTR_STATUS, &zid_state,
303545916cd2Sjpk 		    sizeof (zid_state)) < 0) ||
303645916cd2Sjpk 		    (zid_state < ZONE_IS_READY))
303745916cd2Sjpk 			/* Skip over zones without mounted filesystems */
303845916cd2Sjpk 			continue;
303945916cd2Sjpk 
304045916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label,
304145916cd2Sjpk 		    sizeof (m_label_t)) < 0)
304245916cd2Sjpk 			/* Skip over zones with unspecified label */
304345916cd2Sjpk 			continue;
304445916cd2Sjpk 
304545916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath,
304645916cd2Sjpk 		    sizeof (zid_rpath)) == -1)
304745916cd2Sjpk 			/* Skip over zones with bad path */
304845916cd2Sjpk 			continue;
304945916cd2Sjpk 
305045916cd2Sjpk 		if (zlabel != NULL && bldominates(zid_label, zlabel)) {
305145916cd2Sjpk 			/*
305245916cd2Sjpk 			 * This zone dominates our zone.
305345916cd2Sjpk 			 * Unmount the lofs mount of our zone's /export/home
305445916cd2Sjpk 			 */
305545916cd2Sjpk 
305645916cd2Sjpk 			if (snprintf(path, MAXPATHLEN,
305745916cd2Sjpk 			    "%s/zone/%s/export/home", zid_rpath,
305845916cd2Sjpk 			    zone_name) > MAXPATHLEN)
305945916cd2Sjpk 				continue;
306045916cd2Sjpk 
306145916cd2Sjpk 			/* Skip over mount failures */
306245916cd2Sjpk 			(void) umount(path);
306345916cd2Sjpk 		}
306445916cd2Sjpk 	}
306545916cd2Sjpk 	free(zids);
306645916cd2Sjpk 
306745916cd2Sjpk 	/*
306845916cd2Sjpk 	 * Unmount global zone autofs trigger for this zone
306945916cd2Sjpk 	 */
307045916cd2Sjpk 	(void) snprintf(path, MAXPATHLEN, "/zone/%s/home", zone_name);
307145916cd2Sjpk 	/* Skip over mount failures */
307245916cd2Sjpk 	(void) umount(path);
307345916cd2Sjpk 
307445916cd2Sjpk 	/*
307545916cd2Sjpk 	 * Next unshare any exported directories from this zone.
307645916cd2Sjpk 	 */
307745916cd2Sjpk 
307845916cd2Sjpk 	argv[0] = "zoneunshare";
307945916cd2Sjpk 	argv[1] = "-z";
308045916cd2Sjpk 	argv[2] = zone_name;
308145916cd2Sjpk 	argv[3] = NULL;
308245916cd2Sjpk 
308345916cd2Sjpk 	(void) forkexec(zlogp, "/usr/lib/zones/zoneunshare", argv);
308445916cd2Sjpk 	/* Don't check for errors since they don't affect the zone */
308545916cd2Sjpk 
308645916cd2Sjpk 	/*
308745916cd2Sjpk 	 * Finally, deallocate any devices in the zone.
308845916cd2Sjpk 	 */
308945916cd2Sjpk 
309045916cd2Sjpk 	argv[0] = "deallocate";
309145916cd2Sjpk 	argv[1] = "-Isz";
309245916cd2Sjpk 	argv[2] = zone_name;
309345916cd2Sjpk 	argv[3] = NULL;
309445916cd2Sjpk 
309545916cd2Sjpk 	(void) forkexec(zlogp, "/usr/sbin/deallocate", argv);
309645916cd2Sjpk 	/* Don't check for errors since they don't affect the zone */
309745916cd2Sjpk }
309845916cd2Sjpk 
309945916cd2Sjpk /*
310045916cd2Sjpk  * Fetch the Trusted Extensions label and multi-level ports (MLPs) for
310145916cd2Sjpk  * this zone.
310245916cd2Sjpk  */
310345916cd2Sjpk static tsol_zcent_t *
310445916cd2Sjpk get_zone_label(zlog_t *zlogp, priv_set_t *privs)
310545916cd2Sjpk {
310645916cd2Sjpk 	FILE *fp;
310745916cd2Sjpk 	tsol_zcent_t *zcent = NULL;
310845916cd2Sjpk 	char line[MAXTNZLEN];
310945916cd2Sjpk 
311045916cd2Sjpk 	if ((fp = fopen(TNZONECFG_PATH, "r")) == NULL) {
311145916cd2Sjpk 		zerror(zlogp, B_TRUE, "%s", TNZONECFG_PATH);
311245916cd2Sjpk 		return (NULL);
311345916cd2Sjpk 	}
311445916cd2Sjpk 
311545916cd2Sjpk 	while (fgets(line, sizeof (line), fp) != NULL) {
311645916cd2Sjpk 		/*
311745916cd2Sjpk 		 * Check for malformed database
311845916cd2Sjpk 		 */
311945916cd2Sjpk 		if (strlen(line) == MAXTNZLEN - 1)
312045916cd2Sjpk 			break;
312145916cd2Sjpk 		if ((zcent = tsol_sgetzcent(line, NULL, NULL)) == NULL)
312245916cd2Sjpk 			continue;
312345916cd2Sjpk 		if (strcmp(zcent->zc_name, zone_name) == 0)
312445916cd2Sjpk 			break;
312545916cd2Sjpk 		tsol_freezcent(zcent);
312645916cd2Sjpk 		zcent = NULL;
312745916cd2Sjpk 	}
312845916cd2Sjpk 	(void) fclose(fp);
312945916cd2Sjpk 
313045916cd2Sjpk 	if (zcent == NULL) {
313145916cd2Sjpk 		zerror(zlogp, B_FALSE, "zone requires a label assignment. "
313245916cd2Sjpk 		    "See tnzonecfg(4)");
313345916cd2Sjpk 	} else {
313445916cd2Sjpk 		if (zlabel == NULL)
313545916cd2Sjpk 			zlabel = m_label_alloc(MAC_LABEL);
313645916cd2Sjpk 		/*
313745916cd2Sjpk 		 * Save this zone's privileges for later read-down processing
313845916cd2Sjpk 		 */
313945916cd2Sjpk 		if ((zprivs = priv_allocset()) == NULL) {
314045916cd2Sjpk 			zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
314145916cd2Sjpk 			return (NULL);
314245916cd2Sjpk 		} else {
314345916cd2Sjpk 			priv_copyset(privs, zprivs);
314445916cd2Sjpk 		}
314545916cd2Sjpk 	}
314645916cd2Sjpk 	return (zcent);
314745916cd2Sjpk }
314845916cd2Sjpk 
314945916cd2Sjpk /*
315045916cd2Sjpk  * Add the Trusted Extensions multi-level ports for this zone.
315145916cd2Sjpk  */
315245916cd2Sjpk static void
315345916cd2Sjpk set_mlps(zlog_t *zlogp, zoneid_t zoneid, tsol_zcent_t *zcent)
315445916cd2Sjpk {
315545916cd2Sjpk 	tsol_mlp_t *mlp;
315645916cd2Sjpk 	tsol_mlpent_t tsme;
315745916cd2Sjpk 
315845916cd2Sjpk 	if (!is_system_labeled())
315945916cd2Sjpk 		return;
316045916cd2Sjpk 
316145916cd2Sjpk 	tsme.tsme_zoneid = zoneid;
316245916cd2Sjpk 	tsme.tsme_flags = 0;
316345916cd2Sjpk 	for (mlp = zcent->zc_private_mlp; !TSOL_MLP_END(mlp); mlp++) {
316445916cd2Sjpk 		tsme.tsme_mlp = *mlp;
316545916cd2Sjpk 		if (tnmlp(TNDB_LOAD, &tsme) != 0) {
316645916cd2Sjpk 			zerror(zlogp, B_TRUE, "cannot set zone-specific MLP "
316745916cd2Sjpk 			    "on %d-%d/%d", mlp->mlp_port,
316845916cd2Sjpk 			    mlp->mlp_port_upper, mlp->mlp_ipp);
316945916cd2Sjpk 		}
317045916cd2Sjpk 	}
317145916cd2Sjpk 
317245916cd2Sjpk 	tsme.tsme_flags = TSOL_MEF_SHARED;
317345916cd2Sjpk 	for (mlp = zcent->zc_shared_mlp; !TSOL_MLP_END(mlp); mlp++) {
317445916cd2Sjpk 		tsme.tsme_mlp = *mlp;
317545916cd2Sjpk 		if (tnmlp(TNDB_LOAD, &tsme) != 0) {
317645916cd2Sjpk 			zerror(zlogp, B_TRUE, "cannot set shared MLP "
317745916cd2Sjpk 			    "on %d-%d/%d", mlp->mlp_port,
317845916cd2Sjpk 			    mlp->mlp_port_upper, mlp->mlp_ipp);
317945916cd2Sjpk 		}
318045916cd2Sjpk 	}
318145916cd2Sjpk }
318245916cd2Sjpk 
318345916cd2Sjpk static void
318445916cd2Sjpk remove_mlps(zlog_t *zlogp, zoneid_t zoneid)
318545916cd2Sjpk {
318645916cd2Sjpk 	tsol_mlpent_t tsme;
318745916cd2Sjpk 
318845916cd2Sjpk 	if (!is_system_labeled())
318945916cd2Sjpk 		return;
319045916cd2Sjpk 
319145916cd2Sjpk 	(void) memset(&tsme, 0, sizeof (tsme));
319245916cd2Sjpk 	tsme.tsme_zoneid = zoneid;
319345916cd2Sjpk 	if (tnmlp(TNDB_FLUSH, &tsme) != 0)
319445916cd2Sjpk 		zerror(zlogp, B_TRUE, "cannot flush MLPs");
319545916cd2Sjpk }
319645916cd2Sjpk 
31977c478bd9Sstevel@tonic-gate int
31987c478bd9Sstevel@tonic-gate prtmount(const char *fs, void *x) {
31997c478bd9Sstevel@tonic-gate 	zerror((zlog_t *)x, B_FALSE, "  %s", fs);
32007c478bd9Sstevel@tonic-gate 	return (0);
32017c478bd9Sstevel@tonic-gate }
32027c478bd9Sstevel@tonic-gate 
3203108322fbScarlsonj /*
3204108322fbScarlsonj  * Look for zones running on the main system that are using this root (or any
3205108322fbScarlsonj  * subdirectory of it).  Return B_TRUE and print an error if a conflicting zone
3206108322fbScarlsonj  * is found or if we can't tell.
3207108322fbScarlsonj  */
3208108322fbScarlsonj static boolean_t
3209108322fbScarlsonj duplicate_zone_root(zlog_t *zlogp, const char *rootpath)
32107c478bd9Sstevel@tonic-gate {
3211108322fbScarlsonj 	zoneid_t *zids = NULL;
3212108322fbScarlsonj 	uint_t nzids = 0;
3213108322fbScarlsonj 	boolean_t retv;
3214108322fbScarlsonj 	int rlen, zlen;
3215108322fbScarlsonj 	char zroot[MAXPATHLEN];
3216108322fbScarlsonj 	char zonename[ZONENAME_MAX];
3217108322fbScarlsonj 
3218108322fbScarlsonj 	for (;;) {
3219108322fbScarlsonj 		nzids += 10;
3220108322fbScarlsonj 		zids = malloc(nzids * sizeof (*zids));
3221108322fbScarlsonj 		if (zids == NULL) {
32223f2f09c1Sdp 			zerror(zlogp, B_TRUE, "memory allocation failed");
3223108322fbScarlsonj 			return (B_TRUE);
3224108322fbScarlsonj 		}
3225108322fbScarlsonj 		if (zone_list(zids, &nzids) == 0)
3226108322fbScarlsonj 			break;
3227108322fbScarlsonj 		free(zids);
3228108322fbScarlsonj 	}
3229108322fbScarlsonj 	retv = B_FALSE;
3230108322fbScarlsonj 	rlen = strlen(rootpath);
3231108322fbScarlsonj 	while (nzids > 0) {
3232108322fbScarlsonj 		/*
3233108322fbScarlsonj 		 * Ignore errors; they just mean that the zone has disappeared
3234108322fbScarlsonj 		 * while we were busy.
3235108322fbScarlsonj 		 */
3236108322fbScarlsonj 		if (zone_getattr(zids[--nzids], ZONE_ATTR_ROOT, zroot,
3237108322fbScarlsonj 		    sizeof (zroot)) == -1)
3238108322fbScarlsonj 			continue;
3239108322fbScarlsonj 		zlen = strlen(zroot);
3240108322fbScarlsonj 		if (zlen > rlen)
3241108322fbScarlsonj 			zlen = rlen;
3242108322fbScarlsonj 		if (strncmp(rootpath, zroot, zlen) == 0 &&
3243108322fbScarlsonj 		    (zroot[zlen] == '\0' || zroot[zlen] == '/') &&
3244108322fbScarlsonj 		    (rootpath[zlen] == '\0' || rootpath[zlen] == '/')) {
3245108322fbScarlsonj 			if (getzonenamebyid(zids[nzids], zonename,
3246108322fbScarlsonj 			    sizeof (zonename)) == -1)
3247108322fbScarlsonj 				(void) snprintf(zonename, sizeof (zonename),
3248108322fbScarlsonj 				    "id %d", (int)zids[nzids]);
3249108322fbScarlsonj 			zerror(zlogp, B_FALSE,
3250108322fbScarlsonj 			    "zone root %s already in use by zone %s",
3251108322fbScarlsonj 			    rootpath, zonename);
3252108322fbScarlsonj 			retv = B_TRUE;
3253108322fbScarlsonj 			break;
3254108322fbScarlsonj 		}
3255108322fbScarlsonj 	}
3256108322fbScarlsonj 	free(zids);
3257108322fbScarlsonj 	return (retv);
3258108322fbScarlsonj }
3259108322fbScarlsonj 
3260108322fbScarlsonj /*
3261108322fbScarlsonj  * Search for loopback mounts that use this same source node (same device and
3262108322fbScarlsonj  * inode).  Return B_TRUE if there is one or if we can't tell.
3263108322fbScarlsonj  */
3264108322fbScarlsonj static boolean_t
3265108322fbScarlsonj duplicate_reachable_path(zlog_t *zlogp, const char *rootpath)
3266108322fbScarlsonj {
3267108322fbScarlsonj 	struct stat64 rst, zst;
3268108322fbScarlsonj 	struct mnttab *mnp;
3269108322fbScarlsonj 
3270108322fbScarlsonj 	if (stat64(rootpath, &rst) == -1) {
3271108322fbScarlsonj 		zerror(zlogp, B_TRUE, "can't stat %s", rootpath);
3272108322fbScarlsonj 		return (B_TRUE);
3273108322fbScarlsonj 	}
3274108322fbScarlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
3275108322fbScarlsonj 		return (B_TRUE);
3276108322fbScarlsonj 	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) {
3277108322fbScarlsonj 		if (mnp->mnt_fstype == NULL ||
3278108322fbScarlsonj 		    strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0)
3279108322fbScarlsonj 			continue;
3280108322fbScarlsonj 		/* We're looking at a loopback mount.  Stat it. */
3281108322fbScarlsonj 		if (mnp->mnt_special != NULL &&
3282108322fbScarlsonj 		    stat64(mnp->mnt_special, &zst) != -1 &&
3283108322fbScarlsonj 		    rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
3284108322fbScarlsonj 			zerror(zlogp, B_FALSE,
3285108322fbScarlsonj 			    "zone root %s is reachable through %s",
3286108322fbScarlsonj 			    rootpath, mnp->mnt_mountp);
3287108322fbScarlsonj 			return (B_TRUE);
3288108322fbScarlsonj 		}
3289108322fbScarlsonj 	}
3290108322fbScarlsonj 	return (B_FALSE);
3291108322fbScarlsonj }
3292108322fbScarlsonj 
3293108322fbScarlsonj zoneid_t
3294108322fbScarlsonj vplat_create(zlog_t *zlogp, boolean_t mount_cmd)
3295108322fbScarlsonj {
3296108322fbScarlsonj 	zoneid_t rval = -1;
32977c478bd9Sstevel@tonic-gate 	priv_set_t *privs;
32987c478bd9Sstevel@tonic-gate 	char rootpath[MAXPATHLEN];
32997c478bd9Sstevel@tonic-gate 	char *rctlbuf = NULL;
3300108322fbScarlsonj 	size_t rctlbufsz = 0;
3301fa9e4066Sahrens 	char *zfsbuf = NULL;
3302fa9e4066Sahrens 	size_t zfsbufsz = 0;
3303108322fbScarlsonj 	zoneid_t zoneid = -1;
33047c478bd9Sstevel@tonic-gate 	int xerr;
3305108322fbScarlsonj 	char *kzone;
3306108322fbScarlsonj 	FILE *fp = NULL;
330745916cd2Sjpk 	tsol_zcent_t *zcent = NULL;
330845916cd2Sjpk 	int match = 0;
330945916cd2Sjpk 	int doi = 0;
33107c478bd9Sstevel@tonic-gate 
33117c478bd9Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
33127c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone root");
33137c478bd9Sstevel@tonic-gate 		return (-1);
33147c478bd9Sstevel@tonic-gate 	}
3315108322fbScarlsonj 	if (zonecfg_in_alt_root())
3316108322fbScarlsonj 		resolve_lofs(zlogp, rootpath, sizeof (rootpath));
33177c478bd9Sstevel@tonic-gate 
33187c478bd9Sstevel@tonic-gate 	if ((privs = priv_allocset()) == NULL) {
33197c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
33207c478bd9Sstevel@tonic-gate 		return (-1);
33217c478bd9Sstevel@tonic-gate 	}
33227c478bd9Sstevel@tonic-gate 	priv_emptyset(privs);
3323ffbafc53Scomay 	if (get_privset(zlogp, privs, mount_cmd) != 0)
33247c478bd9Sstevel@tonic-gate 		goto error;
3325ffbafc53Scomay 
3326108322fbScarlsonj 	if (!mount_cmd && get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) {
33277c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "Unable to get list of rctls");
33287c478bd9Sstevel@tonic-gate 		goto error;
33297c478bd9Sstevel@tonic-gate 	}
3330ffbafc53Scomay 
3331fa9e4066Sahrens 	if (get_datasets(zlogp, &zfsbuf, &zfsbufsz) != 0) {
3332fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "Unable to get list of ZFS datasets");
3333fa9e4066Sahrens 		goto error;
3334fa9e4066Sahrens 	}
33357c478bd9Sstevel@tonic-gate 
333648451833Scarlsonj 	if (!mount_cmd && is_system_labeled()) {
333745916cd2Sjpk 		zcent = get_zone_label(zlogp, privs);
333848451833Scarlsonj 		if (zcent != NULL) {
333945916cd2Sjpk 			match = zcent->zc_match;
334045916cd2Sjpk 			doi = zcent->zc_doi;
334145916cd2Sjpk 			*zlabel = zcent->zc_label;
334245916cd2Sjpk 		} else {
334345916cd2Sjpk 			goto error;
334445916cd2Sjpk 		}
334545916cd2Sjpk 	}
334645916cd2Sjpk 
3347108322fbScarlsonj 	kzone = zone_name;
3348108322fbScarlsonj 
3349108322fbScarlsonj 	/*
3350108322fbScarlsonj 	 * We must do this scan twice.  First, we look for zones running on the
3351108322fbScarlsonj 	 * main system that are using this root (or any subdirectory of it).
3352108322fbScarlsonj 	 * Next, we reduce to the shortest path and search for loopback mounts
3353108322fbScarlsonj 	 * that use this same source node (same device and inode).
3354108322fbScarlsonj 	 */
3355108322fbScarlsonj 	if (duplicate_zone_root(zlogp, rootpath))
3356108322fbScarlsonj 		goto error;
3357108322fbScarlsonj 	if (duplicate_reachable_path(zlogp, rootpath))
3358108322fbScarlsonj 		goto error;
3359108322fbScarlsonj 
3360108322fbScarlsonj 	if (mount_cmd) {
3361108322fbScarlsonj 		root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE);
3362108322fbScarlsonj 
3363108322fbScarlsonj 		/*
3364108322fbScarlsonj 		 * Forge up a special root for this zone.  When a zone is
3365108322fbScarlsonj 		 * mounted, we can't let the zone have its own root because the
3366108322fbScarlsonj 		 * tools that will be used in this "scratch zone" need access
3367108322fbScarlsonj 		 * to both the zone's resources and the running machine's
3368108322fbScarlsonj 		 * executables.
3369108322fbScarlsonj 		 *
3370108322fbScarlsonj 		 * Note that the mkdir here also catches read-only filesystems.
3371108322fbScarlsonj 		 */
3372108322fbScarlsonj 		if (mkdir(rootpath, 0755) != 0 && errno != EEXIST) {
3373108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", rootpath);
3374108322fbScarlsonj 			goto error;
3375108322fbScarlsonj 		}
3376108322fbScarlsonj 		if (domount(zlogp, "tmpfs", "", "swap", rootpath) != 0)
3377108322fbScarlsonj 			goto error;
3378108322fbScarlsonj 	}
3379108322fbScarlsonj 
3380108322fbScarlsonj 	if (zonecfg_in_alt_root()) {
3381108322fbScarlsonj 		/*
3382108322fbScarlsonj 		 * If we are mounting up a zone in an alternate root partition,
3383108322fbScarlsonj 		 * then we have some additional work to do before starting the
3384108322fbScarlsonj 		 * zone.  First, resolve the root path down so that we're not
3385108322fbScarlsonj 		 * fooled by duplicates.  Then forge up an internal name for
3386108322fbScarlsonj 		 * the zone.
3387108322fbScarlsonj 		 */
3388108322fbScarlsonj 		if ((fp = zonecfg_open_scratch("", B_TRUE)) == NULL) {
3389108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot open mapfile");
3390108322fbScarlsonj 			goto error;
3391108322fbScarlsonj 		}
3392108322fbScarlsonj 		if (zonecfg_lock_scratch(fp) != 0) {
3393108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot lock mapfile");
3394108322fbScarlsonj 			goto error;
3395108322fbScarlsonj 		}
3396108322fbScarlsonj 		if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
3397108322fbScarlsonj 		    NULL, 0) == 0) {
3398108322fbScarlsonj 			zerror(zlogp, B_FALSE, "scratch zone already running");
3399108322fbScarlsonj 			goto error;
3400108322fbScarlsonj 		}
3401108322fbScarlsonj 		/* This is the preferred name */
3402108322fbScarlsonj 		(void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s",
3403108322fbScarlsonj 		    zone_name);
3404108322fbScarlsonj 		srandom(getpid());
3405108322fbScarlsonj 		while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL,
3406108322fbScarlsonj 		    0) == 0) {
3407108322fbScarlsonj 			/* This is just an arbitrary name; note "." usage */
3408108322fbScarlsonj 			(void) snprintf(kernzone, sizeof (kernzone),
3409108322fbScarlsonj 			    "SUNWlu.%08lX%08lX", random(), random());
3410108322fbScarlsonj 		}
3411108322fbScarlsonj 		kzone = kernzone;
3412108322fbScarlsonj 	}
3413108322fbScarlsonj 
34147c478bd9Sstevel@tonic-gate 	xerr = 0;
3415108322fbScarlsonj 	if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf,
341645916cd2Sjpk 	    rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel)) == -1) {
34177c478bd9Sstevel@tonic-gate 		if (xerr == ZE_AREMOUNTS) {
34187c478bd9Sstevel@tonic-gate 			if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) {
34197c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
34207c478bd9Sstevel@tonic-gate 				    "An unknown file-system is mounted on "
34217c478bd9Sstevel@tonic-gate 				    "a subdirectory of %s", rootpath);
34227c478bd9Sstevel@tonic-gate 			} else {
34237c478bd9Sstevel@tonic-gate 
34247c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
34257c478bd9Sstevel@tonic-gate 				    "These file-systems are mounted on "
34267c478bd9Sstevel@tonic-gate 				    "subdirectories of %s:", rootpath);
34277c478bd9Sstevel@tonic-gate 				(void) zonecfg_find_mounts(rootpath,
34287c478bd9Sstevel@tonic-gate 				    prtmount, zlogp);
34297c478bd9Sstevel@tonic-gate 			}
34307c478bd9Sstevel@tonic-gate 		} else if (xerr == ZE_CHROOTED) {
34317c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s: "
34327c478bd9Sstevel@tonic-gate 			    "cannot create a zone from a chrooted "
34337c478bd9Sstevel@tonic-gate 			    "environment", "zone_create");
34347c478bd9Sstevel@tonic-gate 		} else {
34357c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s failed", "zone_create");
34367c478bd9Sstevel@tonic-gate 		}
34377c478bd9Sstevel@tonic-gate 		goto error;
34387c478bd9Sstevel@tonic-gate 	}
3439108322fbScarlsonj 
3440108322fbScarlsonj 	if (zonecfg_in_alt_root() &&
3441108322fbScarlsonj 	    zonecfg_add_scratch(fp, zone_name, kernzone,
3442108322fbScarlsonj 	    zonecfg_get_root()) == -1) {
3443108322fbScarlsonj 		zerror(zlogp, B_TRUE, "cannot add mapfile entry");
3444108322fbScarlsonj 		goto error;
3445108322fbScarlsonj 	}
3446108322fbScarlsonj 
34477c478bd9Sstevel@tonic-gate 	/*
3448108322fbScarlsonj 	 * The following is a warning, not an error, and is not performed when
3449108322fbScarlsonj 	 * merely mounting a zone for administrative use.
34507c478bd9Sstevel@tonic-gate 	 */
3451108322fbScarlsonj 	if (!mount_cmd && bind_to_pool(zlogp, zoneid) != 0)
34527c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to "
34537c478bd9Sstevel@tonic-gate 		    "requested pool; using default pool.");
345448451833Scarlsonj 	if (!mount_cmd)
345545916cd2Sjpk 		set_mlps(zlogp, zoneid, zcent);
3456108322fbScarlsonj 	rval = zoneid;
3457108322fbScarlsonj 	zoneid = -1;
3458108322fbScarlsonj 
34597c478bd9Sstevel@tonic-gate error:
3460108322fbScarlsonj 	if (zoneid != -1)
3461108322fbScarlsonj 		(void) zone_destroy(zoneid);
34627c478bd9Sstevel@tonic-gate 	if (rctlbuf != NULL)
34637c478bd9Sstevel@tonic-gate 		free(rctlbuf);
34647c478bd9Sstevel@tonic-gate 	priv_freeset(privs);
3465108322fbScarlsonj 	if (fp != NULL)
3466108322fbScarlsonj 		zonecfg_close_scratch(fp);
3467108322fbScarlsonj 	lofs_discard_mnttab();
346845916cd2Sjpk 	if (zcent != NULL)
346945916cd2Sjpk 		tsol_freezcent(zcent);
34707c478bd9Sstevel@tonic-gate 	return (rval);
34717c478bd9Sstevel@tonic-gate }
34727c478bd9Sstevel@tonic-gate 
3473555afedfScarlsonj /*
3474555afedfScarlsonj  * Enter the zone and write a /etc/zones/index file there.  This allows
3475555afedfScarlsonj  * libzonecfg (and thus zoneadm) to report the UUID and potentially other zone
3476555afedfScarlsonj  * details from inside the zone.
3477555afedfScarlsonj  */
3478555afedfScarlsonj static void
3479555afedfScarlsonj write_index_file(zoneid_t zoneid)
3480555afedfScarlsonj {
3481555afedfScarlsonj 	FILE *zef;
3482555afedfScarlsonj 	FILE *zet;
3483555afedfScarlsonj 	struct zoneent *zep;
3484555afedfScarlsonj 	pid_t child;
3485555afedfScarlsonj 	int tmpl_fd;
3486555afedfScarlsonj 	ctid_t ct;
3487555afedfScarlsonj 	int fd;
3488555afedfScarlsonj 	char uuidstr[UUID_PRINTABLE_STRING_LENGTH];
3489555afedfScarlsonj 
3490555afedfScarlsonj 	/* Locate the zone entry in the global zone's index file */
3491555afedfScarlsonj 	if ((zef = setzoneent()) == NULL)
3492555afedfScarlsonj 		return;
3493555afedfScarlsonj 	while ((zep = getzoneent_private(zef)) != NULL) {
3494555afedfScarlsonj 		if (strcmp(zep->zone_name, zone_name) == 0)
3495555afedfScarlsonj 			break;
3496555afedfScarlsonj 		free(zep);
3497555afedfScarlsonj 	}
3498555afedfScarlsonj 	endzoneent(zef);
3499555afedfScarlsonj 	if (zep == NULL)
3500555afedfScarlsonj 		return;
3501555afedfScarlsonj 
3502555afedfScarlsonj 	if ((tmpl_fd = init_template()) == -1) {
3503555afedfScarlsonj 		free(zep);
3504555afedfScarlsonj 		return;
3505555afedfScarlsonj 	}
3506555afedfScarlsonj 
3507555afedfScarlsonj 	if ((child = fork()) == -1) {
3508555afedfScarlsonj 		(void) ct_tmpl_clear(tmpl_fd);
3509555afedfScarlsonj 		(void) close(tmpl_fd);
3510555afedfScarlsonj 		free(zep);
3511555afedfScarlsonj 		return;
3512555afedfScarlsonj 	}
3513555afedfScarlsonj 
3514555afedfScarlsonj 	/* parent waits for child to finish */
3515555afedfScarlsonj 	if (child != 0) {
3516555afedfScarlsonj 		free(zep);
3517555afedfScarlsonj 		if (contract_latest(&ct) == -1)
3518555afedfScarlsonj 			ct = -1;
3519555afedfScarlsonj 		(void) ct_tmpl_clear(tmpl_fd);
3520555afedfScarlsonj 		(void) close(tmpl_fd);
3521555afedfScarlsonj 		(void) waitpid(child, NULL, 0);
3522555afedfScarlsonj 		(void) contract_abandon_id(ct);
3523555afedfScarlsonj 		return;
3524555afedfScarlsonj 	}
3525555afedfScarlsonj 
3526555afedfScarlsonj 	/* child enters zone and sets up index file */
3527555afedfScarlsonj 	(void) ct_tmpl_clear(tmpl_fd);
3528555afedfScarlsonj 	if (zone_enter(zoneid) != -1) {
3529555afedfScarlsonj 		(void) mkdir(ZONE_CONFIG_ROOT, ZONE_CONFIG_MODE);
3530555afedfScarlsonj 		(void) chown(ZONE_CONFIG_ROOT, ZONE_CONFIG_UID,
3531555afedfScarlsonj 		    ZONE_CONFIG_GID);
3532555afedfScarlsonj 		fd = open(ZONE_INDEX_FILE, O_WRONLY|O_CREAT|O_TRUNC,
3533555afedfScarlsonj 		    ZONE_INDEX_MODE);
3534555afedfScarlsonj 		if (fd != -1 && (zet = fdopen(fd, "w")) != NULL) {
3535555afedfScarlsonj 			(void) fchown(fd, ZONE_INDEX_UID, ZONE_INDEX_GID);
3536555afedfScarlsonj 			if (uuid_is_null(zep->zone_uuid))
3537555afedfScarlsonj 				uuidstr[0] = '\0';
3538555afedfScarlsonj 			else
3539555afedfScarlsonj 				uuid_unparse(zep->zone_uuid, uuidstr);
3540555afedfScarlsonj 			(void) fprintf(zet, "%s:%s:/:%s\n", zep->zone_name,
3541555afedfScarlsonj 			    zone_state_str(zep->zone_state),
3542555afedfScarlsonj 			    uuidstr);
3543555afedfScarlsonj 			(void) fclose(zet);
3544555afedfScarlsonj 		}
3545555afedfScarlsonj 	}
3546555afedfScarlsonj 	_exit(0);
3547555afedfScarlsonj }
3548555afedfScarlsonj 
35497c478bd9Sstevel@tonic-gate int
3550555afedfScarlsonj vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd, zoneid_t zoneid)
35517c478bd9Sstevel@tonic-gate {
35525749802bSdp 
3553fa9e4066Sahrens 	if (!mount_cmd && validate_datasets(zlogp) != 0) {
3554fa9e4066Sahrens 		lofs_discard_mnttab();
3555fa9e4066Sahrens 		return (-1);
3556fa9e4066Sahrens 	}
3557fa9e4066Sahrens 
3558facf4a8dSllai1 	if (mount_filesystems(zlogp, mount_cmd) != 0) {
3559108322fbScarlsonj 		lofs_discard_mnttab();
35607c478bd9Sstevel@tonic-gate 		return (-1);
3561108322fbScarlsonj 	}
3562facf4a8dSllai1 
3563facf4a8dSllai1 	/* mount /dev for zone (both normal and scratch zone) */
3564facf4a8dSllai1 	if (vplat_mount_dev(zlogp) != 0) {
3565facf4a8dSllai1 		lofs_discard_mnttab();
3566facf4a8dSllai1 		return (-1);
3567facf4a8dSllai1 	}
3568facf4a8dSllai1 
3569facf4a8dSllai1 	if (!mount_cmd && configure_network_interfaces(zlogp) != 0) {
3570108322fbScarlsonj 		lofs_discard_mnttab();
35717c478bd9Sstevel@tonic-gate 		return (-1);
3572108322fbScarlsonj 	}
3573555afedfScarlsonj 
3574555afedfScarlsonj 	write_index_file(zoneid);
3575555afedfScarlsonj 
3576108322fbScarlsonj 	lofs_discard_mnttab();
35777c478bd9Sstevel@tonic-gate 	return (0);
35787c478bd9Sstevel@tonic-gate }
35797c478bd9Sstevel@tonic-gate 
3580108322fbScarlsonj static int
3581108322fbScarlsonj lu_root_teardown(zlog_t *zlogp)
35827c478bd9Sstevel@tonic-gate {
3583108322fbScarlsonj 	char zroot[MAXPATHLEN];
3584108322fbScarlsonj 
3585108322fbScarlsonj 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
3586108322fbScarlsonj 		zerror(zlogp, B_FALSE, "unable to determine zone root");
3587108322fbScarlsonj 		return (-1);
3588108322fbScarlsonj 	}
3589108322fbScarlsonj 	root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
3590108322fbScarlsonj 
3591108322fbScarlsonj 	/*
3592108322fbScarlsonj 	 * At this point, the processes are gone, the filesystems (save the
3593108322fbScarlsonj 	 * root) are unmounted, and the zone is on death row.  But there may
3594108322fbScarlsonj 	 * still be creds floating about in the system that reference the
3595108322fbScarlsonj 	 * zone_t, and which pin down zone_rootvp causing this call to fail
3596108322fbScarlsonj 	 * with EBUSY.  Thus, we try for a little while before just giving up.
3597108322fbScarlsonj 	 * (How I wish this were not true, and umount2 just did the right
3598108322fbScarlsonj 	 * thing, or tmpfs supported MS_FORCE This is a gross hack.)
3599108322fbScarlsonj 	 */
3600108322fbScarlsonj 	if (umount2(zroot, MS_FORCE) != 0) {
3601108322fbScarlsonj 		if (errno == ENOTSUP && umount2(zroot, 0) == 0)
3602108322fbScarlsonj 			goto unmounted;
3603108322fbScarlsonj 		if (errno == EBUSY) {
3604108322fbScarlsonj 			int tries = 10;
3605108322fbScarlsonj 
3606108322fbScarlsonj 			while (--tries >= 0) {
3607108322fbScarlsonj 				(void) sleep(1);
3608108322fbScarlsonj 				if (umount2(zroot, 0) == 0)
3609108322fbScarlsonj 					goto unmounted;
3610108322fbScarlsonj 				if (errno != EBUSY)
3611108322fbScarlsonj 					break;
3612108322fbScarlsonj 			}
3613108322fbScarlsonj 		}
3614108322fbScarlsonj 		zerror(zlogp, B_TRUE, "unable to unmount '%s'", zroot);
3615108322fbScarlsonj 		return (-1);
3616108322fbScarlsonj 	}
3617108322fbScarlsonj unmounted:
3618108322fbScarlsonj 
3619108322fbScarlsonj 	/*
3620108322fbScarlsonj 	 * Only zones in an alternate root environment have scratch zone
3621108322fbScarlsonj 	 * entries.
3622108322fbScarlsonj 	 */
3623108322fbScarlsonj 	if (zonecfg_in_alt_root()) {
3624108322fbScarlsonj 		FILE *fp;
3625108322fbScarlsonj 		int retv;
3626108322fbScarlsonj 
3627108322fbScarlsonj 		if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
3628108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot open mapfile");
3629108322fbScarlsonj 			return (-1);
3630108322fbScarlsonj 		}
3631108322fbScarlsonj 		retv = -1;
3632108322fbScarlsonj 		if (zonecfg_lock_scratch(fp) != 0)
3633108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot lock mapfile");
3634108322fbScarlsonj 		else if (zonecfg_delete_scratch(fp, kernzone) != 0)
3635108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot delete map entry");
3636108322fbScarlsonj 		else
3637108322fbScarlsonj 			retv = 0;
3638108322fbScarlsonj 		zonecfg_close_scratch(fp);
3639108322fbScarlsonj 		return (retv);
3640108322fbScarlsonj 	} else {
3641108322fbScarlsonj 		return (0);
3642108322fbScarlsonj 	}
3643108322fbScarlsonj }
3644108322fbScarlsonj 
3645108322fbScarlsonj int
3646108322fbScarlsonj vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd)
3647108322fbScarlsonj {
3648108322fbScarlsonj 	char *kzone;
36497c478bd9Sstevel@tonic-gate 	zoneid_t zoneid;
36507c478bd9Sstevel@tonic-gate 
3651108322fbScarlsonj 	kzone = zone_name;
3652108322fbScarlsonj 	if (zonecfg_in_alt_root()) {
3653108322fbScarlsonj 		FILE *fp;
3654108322fbScarlsonj 
3655108322fbScarlsonj 		if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
3656108322fbScarlsonj 			zerror(zlogp, B_TRUE, "unable to open map file");
3657108322fbScarlsonj 			goto error;
3658108322fbScarlsonj 		}
3659108322fbScarlsonj 		if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
3660108322fbScarlsonj 		    kernzone, sizeof (kernzone)) != 0) {
3661108322fbScarlsonj 			zerror(zlogp, B_FALSE, "unable to find scratch zone");
3662108322fbScarlsonj 			zonecfg_close_scratch(fp);
3663108322fbScarlsonj 			goto error;
3664108322fbScarlsonj 		}
3665108322fbScarlsonj 		zonecfg_close_scratch(fp);
3666108322fbScarlsonj 		kzone = kernzone;
3667108322fbScarlsonj 	}
3668108322fbScarlsonj 
3669108322fbScarlsonj 	if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) {
36707c478bd9Sstevel@tonic-gate 		if (!bringup_failure_recovery)
36717c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "unable to get zoneid");
3672108322fbScarlsonj 		if (unmount_cmd)
3673108322fbScarlsonj 			(void) lu_root_teardown(zlogp);
36747c478bd9Sstevel@tonic-gate 		goto error;
36757c478bd9Sstevel@tonic-gate 	}
36767c478bd9Sstevel@tonic-gate 
36777c478bd9Sstevel@tonic-gate 	if (zone_shutdown(zoneid) != 0) {
36787c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to shutdown zone");
36797c478bd9Sstevel@tonic-gate 		goto error;
36807c478bd9Sstevel@tonic-gate 	}
36817c478bd9Sstevel@tonic-gate 
3682108322fbScarlsonj 	if (!unmount_cmd &&
3683108322fbScarlsonj 	    unconfigure_network_interfaces(zlogp, zoneid) != 0) {
36847c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
36857c478bd9Sstevel@tonic-gate 		    "unable to unconfigure network interfaces in zone");
36867c478bd9Sstevel@tonic-gate 		goto error;
36877c478bd9Sstevel@tonic-gate 	}
36887c478bd9Sstevel@tonic-gate 
3689108322fbScarlsonj 	if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) {
36907c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to abort TCP connections");
36917c478bd9Sstevel@tonic-gate 		goto error;
36927c478bd9Sstevel@tonic-gate 	}
36937c478bd9Sstevel@tonic-gate 
3694facf4a8dSllai1 	/* destroy zconsole before umount /dev */
3695facf4a8dSllai1 	if (!unmount_cmd)
3696facf4a8dSllai1 		destroy_console_slave();
3697facf4a8dSllai1 
3698108322fbScarlsonj 	if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) {
36997c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
37007c478bd9Sstevel@tonic-gate 		    "unable to unmount file systems in zone");
37017c478bd9Sstevel@tonic-gate 		goto error;
37027c478bd9Sstevel@tonic-gate 	}
37037c478bd9Sstevel@tonic-gate 
370445916cd2Sjpk 	remove_mlps(zlogp, zoneid);
370545916cd2Sjpk 
37067c478bd9Sstevel@tonic-gate 	if (zone_destroy(zoneid) != 0) {
37077c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to destroy zone");
37087c478bd9Sstevel@tonic-gate 		goto error;
37097c478bd9Sstevel@tonic-gate 	}
3710108322fbScarlsonj 
3711108322fbScarlsonj 	/*
3712108322fbScarlsonj 	 * Special teardown for alternate boot environments: remove the tmpfs
3713108322fbScarlsonj 	 * root for the zone and then remove it from the map file.
3714108322fbScarlsonj 	 */
3715108322fbScarlsonj 	if (unmount_cmd && lu_root_teardown(zlogp) != 0)
3716108322fbScarlsonj 		goto error;
3717108322fbScarlsonj 
3718108322fbScarlsonj 	lofs_discard_mnttab();
37197c478bd9Sstevel@tonic-gate 	return (0);
37207c478bd9Sstevel@tonic-gate 
37217c478bd9Sstevel@tonic-gate error:
3722108322fbScarlsonj 	lofs_discard_mnttab();
37237c478bd9Sstevel@tonic-gate 	return (-1);
37247c478bd9Sstevel@tonic-gate }
3725facf4a8dSllai1 
3726facf4a8dSllai1 /*
3727facf4a8dSllai1  * Apply the standard lists of devices/symlinks/mappings and the user-specified
3728facf4a8dSllai1  * list of devices (via zonecfg) to the /dev filesystem.  The filesystem will
3729facf4a8dSllai1  * use these as a profile/filter to determine what exists in /dev.
3730facf4a8dSllai1  */
3731facf4a8dSllai1 static int
3732facf4a8dSllai1 vplat_mount_dev(zlog_t *zlogp)
3733facf4a8dSllai1 {
3734facf4a8dSllai1 	char			zonedevpath[MAXPATHLEN];
3735facf4a8dSllai1 	zone_dochandle_t	handle = NULL;
3736facf4a8dSllai1 	struct zone_devtab	ztab;
3737facf4a8dSllai1 	zone_fsopt_t		opt_attr;
3738facf4a8dSllai1 	di_prof_t		prof = NULL;
3739facf4a8dSllai1 	int			i, err, len;
3740facf4a8dSllai1 	int			retval = -1;
3741facf4a8dSllai1 
3742facf4a8dSllai1 	struct zone_fstab devtab = {
3743facf4a8dSllai1 		"/dev",
3744facf4a8dSllai1 		"/dev",
3745facf4a8dSllai1 		MNTTYPE_DEV,
3746facf4a8dSllai1 		NULL,
3747facf4a8dSllai1 		""
3748facf4a8dSllai1 	};
3749facf4a8dSllai1 
3750facf4a8dSllai1 	if (err = zone_get_devroot(zone_name, zonedevpath,
3751facf4a8dSllai1 	    sizeof (zonedevpath))) {
3752facf4a8dSllai1 		zerror(zlogp, B_FALSE, "can't get zone dev: %s",
3753facf4a8dSllai1 		    zonecfg_strerror(err));
3754facf4a8dSllai1 		return (-1);
3755facf4a8dSllai1 	}
3756facf4a8dSllai1 
3757facf4a8dSllai1 	/*
3758facf4a8dSllai1 	 * The old /dev was a lofs mount from <zonepath>/dev, with
3759facf4a8dSllai1 	 * dev fs, that becomes a mount on <zonepath>/root/dev.
3760facf4a8dSllai1 	 * However, we need to preserve device permission bits during
3761facf4a8dSllai1 	 * upgrade.  What we should do is migrate the attribute directory
3762facf4a8dSllai1 	 * on upgrade, but for now, preserve it at <zonepath>/dev.
3763facf4a8dSllai1 	 */
3764facf4a8dSllai1 	(void) strcpy(opt_attr.zone_fsopt_opt, "attrdir=");
3765facf4a8dSllai1 	len = strlen(opt_attr.zone_fsopt_opt);
3766facf4a8dSllai1 	if (err = zone_get_zonepath(zone_name,
3767facf4a8dSllai1 	    opt_attr.zone_fsopt_opt + len, MAX_MNTOPT_STR - len)) {
3768facf4a8dSllai1 		zerror(zlogp, B_FALSE, "can't get zone path: %s",
3769facf4a8dSllai1 		    zonecfg_strerror(err));
3770facf4a8dSllai1 		return (-1);
3771facf4a8dSllai1 	}
3772facf4a8dSllai1 
3773facf4a8dSllai1 	if (make_one_dir(zlogp, opt_attr.zone_fsopt_opt + len, "/dev",
3774facf4a8dSllai1 	    DEFAULT_DIR_MODE) != 0)
3775facf4a8dSllai1 		return (-1);
3776facf4a8dSllai1 
3777facf4a8dSllai1 	(void) strlcat(opt_attr.zone_fsopt_opt, "/dev", MAX_MNTOPT_STR);
3778facf4a8dSllai1 	devtab.zone_fs_options = &opt_attr;
3779facf4a8dSllai1 	opt_attr.zone_fsopt_next = NULL;
3780facf4a8dSllai1 
3781facf4a8dSllai1 	/* mount /dev inside the zone */
3782facf4a8dSllai1 	i = strlen(zonedevpath);
3783facf4a8dSllai1 	if (mount_one(zlogp, &devtab, zonedevpath))
3784facf4a8dSllai1 		return (-1);
3785facf4a8dSllai1 
3786facf4a8dSllai1 	(void) strlcat(zonedevpath, "/dev", sizeof (zonedevpath));
3787facf4a8dSllai1 	if (di_prof_init(zonedevpath, &prof)) {
3788facf4a8dSllai1 		zerror(zlogp, B_TRUE, "failed to initialize profile");
3789facf4a8dSllai1 		goto cleanup;
3790facf4a8dSllai1 	}
3791facf4a8dSllai1 
3792facf4a8dSllai1 	/* Add the standard devices and directories */
3793facf4a8dSllai1 	for (i = 0; standard_devs[i] != NULL; ++i) {
3794facf4a8dSllai1 		if (di_prof_add_dev(prof, standard_devs[i])) {
3795facf4a8dSllai1 			zerror(zlogp, B_TRUE, "failed to add "
3796facf4a8dSllai1 			    "standard device");
3797facf4a8dSllai1 			goto cleanup;
3798facf4a8dSllai1 		}
3799facf4a8dSllai1 	}
3800facf4a8dSllai1 
3801facf4a8dSllai1 	/* Add the standard symlinks */
3802facf4a8dSllai1 	for (i = 0; standard_devlinks[i].source != NULL; ++i) {
3803facf4a8dSllai1 		if (di_prof_add_symlink(prof,
3804facf4a8dSllai1 		    standard_devlinks[i].source,
3805facf4a8dSllai1 		    standard_devlinks[i].target)) {
3806facf4a8dSllai1 			zerror(zlogp, B_TRUE, "failed to add "
3807facf4a8dSllai1 			    "standard symlink");
3808facf4a8dSllai1 			goto cleanup;
3809facf4a8dSllai1 		}
3810facf4a8dSllai1 	}
3811facf4a8dSllai1 
3812facf4a8dSllai1 	/* Add user-specified devices and directories */
3813facf4a8dSllai1 	if ((handle = zonecfg_init_handle()) == NULL) {
3814facf4a8dSllai1 		zerror(zlogp, B_FALSE, "can't initialize zone handle");
3815facf4a8dSllai1 		goto cleanup;
3816facf4a8dSllai1 	}
3817facf4a8dSllai1 	if (err = zonecfg_get_handle(zone_name, handle)) {
3818facf4a8dSllai1 		zerror(zlogp, B_FALSE, "can't get handle for zone "
3819facf4a8dSllai1 		    "%s: %s", zone_name, zonecfg_strerror(err));
3820facf4a8dSllai1 		goto cleanup;
3821facf4a8dSllai1 	}
3822facf4a8dSllai1 	if (err = zonecfg_setdevent(handle)) {
3823facf4a8dSllai1 		zerror(zlogp, B_FALSE, "%s: %s", zone_name,
3824facf4a8dSllai1 		    zonecfg_strerror(err));
3825facf4a8dSllai1 		goto cleanup;
3826facf4a8dSllai1 	}
3827facf4a8dSllai1 	while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
3828facf4a8dSllai1 		if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
3829facf4a8dSllai1 			zerror(zlogp, B_TRUE, "failed to add "
3830facf4a8dSllai1 			    "user-specified device");
3831facf4a8dSllai1 			goto cleanup;
3832facf4a8dSllai1 		}
3833facf4a8dSllai1 	}
3834facf4a8dSllai1 	(void) zonecfg_enddevent(handle);
3835facf4a8dSllai1 
3836facf4a8dSllai1 	/* Send profile to kernel */
3837facf4a8dSllai1 	if (di_prof_commit(prof)) {
3838facf4a8dSllai1 		zerror(zlogp, B_TRUE, "failed to commit profile");
3839facf4a8dSllai1 		goto cleanup;
3840facf4a8dSllai1 	}
3841facf4a8dSllai1 
3842facf4a8dSllai1 	retval = 0;
3843facf4a8dSllai1 
3844facf4a8dSllai1 cleanup:
3845facf4a8dSllai1 	if (handle)
3846facf4a8dSllai1 		zonecfg_fini_handle(handle);
3847facf4a8dSllai1 	if (prof)
3848facf4a8dSllai1 		di_prof_fini(prof);
3849facf4a8dSllai1 	return (retval);
3850facf4a8dSllai1 }
3851