xref: /titanic_50/usr/src/cmd/zoneadmd/vplat.c (revision f4368d3d425a0b665226846c328087c66a2bd21e)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
21ffbafc53Scomay 
227c478bd9Sstevel@tonic-gate /*
23ea8dc4b6Seschrock  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * This module contains functions used to bring up and tear down the
317c478bd9Sstevel@tonic-gate  * Virtual Platform: [un]mounting file-systems, [un]plumbing network
327c478bd9Sstevel@tonic-gate  * interfaces, [un]configuring devices, establishing resource controls,
337c478bd9Sstevel@tonic-gate  * and creating/destroying the zone in the kernel.  These actions, on
347c478bd9Sstevel@tonic-gate  * the way up, ready the zone; on the way down, they halt the zone.
357c478bd9Sstevel@tonic-gate  * See the much longer block comment at the beginning of zoneadmd.c
367c478bd9Sstevel@tonic-gate  * for a bigger picture of how the whole program functions.
37108322fbScarlsonj  *
38108322fbScarlsonj  * This module also has primary responsibility for the layout of "scratch
39108322fbScarlsonj  * zones."  These are mounted, but inactive, zones that are used during
40108322fbScarlsonj  * operating system upgrade and potentially other administrative action.  The
41108322fbScarlsonj  * scratch zone environment is similar to the miniroot environment.  The zone's
42108322fbScarlsonj  * actual root is mounted read-write on /a, and the standard paths (/usr,
43108322fbScarlsonj  * /sbin, /lib) all lead to read-only copies of the running system's binaries.
44108322fbScarlsonj  * This allows the administrative tools to manipulate the zone using "-R /a"
45108322fbScarlsonj  * without relying on any binaries in the zone itself.
46108322fbScarlsonj  *
47108322fbScarlsonj  * If the scratch zone is on an alternate root (Live Upgrade [LU] boot
48108322fbScarlsonj  * environment), then we must resolve the lofs mounts used there to uncover
49108322fbScarlsonj  * writable (unshared) resources.  Shared resources, though, are always
50108322fbScarlsonj  * read-only.  In addition, if the "same" zone with a different root path is
51108322fbScarlsonj  * currently running, then "/b" inside the zone points to the running zone's
52108322fbScarlsonj  * root.  This allows LU to synchronize configuration files during the upgrade
53108322fbScarlsonj  * process.
54108322fbScarlsonj  *
55108322fbScarlsonj  * To construct this environment, this module creates a tmpfs mount on
56108322fbScarlsonj  * $ZONEPATH/lu.  Inside this scratch area, the miniroot-like environment as
57108322fbScarlsonj  * described above is constructed on the fly.  The zone is then created using
58108322fbScarlsonj  * $ZONEPATH/lu as the root.
59108322fbScarlsonj  *
60108322fbScarlsonj  * Note that scratch zones are inactive.  The zone's bits are not running and
61108322fbScarlsonj  * likely cannot be run correctly until upgrade is done.  Init is not running
62108322fbScarlsonj  * there, nor is SMF.  Because of this, the "mounted" state of a scratch zone
63108322fbScarlsonj  * is not a part of the usual halt/ready/boot state machine.
647c478bd9Sstevel@tonic-gate  */
657c478bd9Sstevel@tonic-gate 
667c478bd9Sstevel@tonic-gate #include <sys/param.h>
677c478bd9Sstevel@tonic-gate #include <sys/mount.h>
687c478bd9Sstevel@tonic-gate #include <sys/mntent.h>
697c478bd9Sstevel@tonic-gate #include <sys/socket.h>
707c478bd9Sstevel@tonic-gate #include <sys/utsname.h>
717c478bd9Sstevel@tonic-gate #include <sys/types.h>
727c478bd9Sstevel@tonic-gate #include <sys/stat.h>
737c478bd9Sstevel@tonic-gate #include <sys/sockio.h>
747c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
757c478bd9Sstevel@tonic-gate #include <sys/conf.h>
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate #include <inet/tcp.h>
787c478bd9Sstevel@tonic-gate #include <arpa/inet.h>
797c478bd9Sstevel@tonic-gate #include <netinet/in.h>
807c478bd9Sstevel@tonic-gate #include <net/route.h>
817c478bd9Sstevel@tonic-gate 
827c478bd9Sstevel@tonic-gate #include <stdio.h>
837c478bd9Sstevel@tonic-gate #include <errno.h>
847c478bd9Sstevel@tonic-gate #include <fcntl.h>
857c478bd9Sstevel@tonic-gate #include <unistd.h>
867c478bd9Sstevel@tonic-gate #include <rctl.h>
877c478bd9Sstevel@tonic-gate #include <stdlib.h>
887c478bd9Sstevel@tonic-gate #include <string.h>
897c478bd9Sstevel@tonic-gate #include <strings.h>
907c478bd9Sstevel@tonic-gate #include <wait.h>
917c478bd9Sstevel@tonic-gate #include <limits.h>
927c478bd9Sstevel@tonic-gate #include <libgen.h>
93fa9e4066Sahrens #include <libzfs.h>
94facf4a8dSllai1 #include <libdevinfo.h>
957c478bd9Sstevel@tonic-gate #include <zone.h>
967c478bd9Sstevel@tonic-gate #include <assert.h>
97555afedfScarlsonj #include <libcontract.h>
98555afedfScarlsonj #include <libcontract_priv.h>
99555afedfScarlsonj #include <uuid/uuid.h>
1007c478bd9Sstevel@tonic-gate 
1017c478bd9Sstevel@tonic-gate #include <sys/mntio.h>
1027c478bd9Sstevel@tonic-gate #include <sys/mnttab.h>
1037c478bd9Sstevel@tonic-gate #include <sys/fs/autofs.h>	/* for _autofssys() */
1047c478bd9Sstevel@tonic-gate #include <sys/fs/lofs_info.h>
105fa9e4066Sahrens #include <sys/fs/zfs.h>
1067c478bd9Sstevel@tonic-gate 
1077c478bd9Sstevel@tonic-gate #include <pool.h>
1087c478bd9Sstevel@tonic-gate #include <sys/pool.h>
1097c478bd9Sstevel@tonic-gate 
1107c478bd9Sstevel@tonic-gate #include <libzonecfg.h>
11139d3e169Sevanl #include <synch.h>
11222321485Svp157776 
1137c478bd9Sstevel@tonic-gate #include "zoneadmd.h"
11445916cd2Sjpk #include <tsol/label.h>
11545916cd2Sjpk #include <libtsnet.h>
11645916cd2Sjpk #include <sys/priv.h>
1177c478bd9Sstevel@tonic-gate 
1187c478bd9Sstevel@tonic-gate #define	V4_ADDR_LEN	32
1197c478bd9Sstevel@tonic-gate #define	V6_ADDR_LEN	128
1207c478bd9Sstevel@tonic-gate 
1217c478bd9Sstevel@tonic-gate /* 0755 is the default directory mode. */
1227c478bd9Sstevel@tonic-gate #define	DEFAULT_DIR_MODE \
1237c478bd9Sstevel@tonic-gate 	(S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)
1247c478bd9Sstevel@tonic-gate 
1257c478bd9Sstevel@tonic-gate #define	IPD_DEFAULT_OPTS \
1267c478bd9Sstevel@tonic-gate 	MNTOPT_RO "," MNTOPT_LOFS_NOSUB "," MNTOPT_NODEVICES
1277c478bd9Sstevel@tonic-gate 
1287c478bd9Sstevel@tonic-gate #define	DFSTYPES	"/etc/dfs/fstypes"
12945916cd2Sjpk #define	MAXTNZLEN	2048
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate /*
132facf4a8dSllai1  * This is the set of directories and devices (relative to <zone_root>/dev)
133facf4a8dSllai1  * which must be present in every zone.  Users can augment this list with
134facf4a8dSllai1  * additional device rules in their zone configuration, but at present cannot
135facf4a8dSllai1  * remove any of the this set of standard devices.
1367c478bd9Sstevel@tonic-gate  */
137facf4a8dSllai1 static const char *standard_devs[] = {
138facf4a8dSllai1 	"arp",
139facf4a8dSllai1 	"conslog",
140facf4a8dSllai1 	"cpu/self/cpuid",
141facf4a8dSllai1 	"crypto",
142facf4a8dSllai1 	"cryptoadm",
143facf4a8dSllai1 	"dsk",
144facf4a8dSllai1 	"dtrace/helper",
145facf4a8dSllai1 	"fd",
146facf4a8dSllai1 	"kstat",
147facf4a8dSllai1 	"lo0",
148facf4a8dSllai1 	"lo1",
149facf4a8dSllai1 	"lo2",
150facf4a8dSllai1 	"lo3",
151facf4a8dSllai1 	"log",
152facf4a8dSllai1 	"logindmux",
153facf4a8dSllai1 	"null",
154facf4a8dSllai1 #ifdef __sparc
155facf4a8dSllai1 	"openprom",
156facf4a8dSllai1 #endif
157facf4a8dSllai1 	"poll",
158facf4a8dSllai1 	"pool",
159facf4a8dSllai1 	"ptmx",
160facf4a8dSllai1 	"pts/*",
161facf4a8dSllai1 	"random",
162facf4a8dSllai1 	"rdsk",
163facf4a8dSllai1 	"rmt",
164facf4a8dSllai1 	"sad/user",
165facf4a8dSllai1 	"swap",
166facf4a8dSllai1 	"sysevent",
167facf4a8dSllai1 	"tcp",
168facf4a8dSllai1 	"tcp6",
169facf4a8dSllai1 	"term",
170facf4a8dSllai1 	"ticlts",
171facf4a8dSllai1 	"ticots",
172facf4a8dSllai1 	"ticotsord",
173facf4a8dSllai1 	"tty",
174facf4a8dSllai1 	"udp",
175facf4a8dSllai1 	"udp6",
176facf4a8dSllai1 	"urandom",
177facf4a8dSllai1 	"zero",
178facf4a8dSllai1 	"zfs",
179facf4a8dSllai1 	NULL
180facf4a8dSllai1 };
1817c478bd9Sstevel@tonic-gate 
182facf4a8dSllai1 struct source_target {
183facf4a8dSllai1 	const char *source;
184facf4a8dSllai1 	const char *target;
1857c478bd9Sstevel@tonic-gate };
1867c478bd9Sstevel@tonic-gate 
1877c478bd9Sstevel@tonic-gate /*
188facf4a8dSllai1  * Set of symlinks (relative to <zone_root>/dev) which must be present in
189facf4a8dSllai1  * every zone.
1907c478bd9Sstevel@tonic-gate  */
191facf4a8dSllai1 static struct source_target standard_devlinks[] = {
192facf4a8dSllai1 	{ "stderr",	"./fd/2" },
193facf4a8dSllai1 	{ "stdin",	"./fd/0" },
194facf4a8dSllai1 	{ "stdout",	"./fd/1" },
195facf4a8dSllai1 	{ "dtremote",	"/dev/null" },
196facf4a8dSllai1 	{ "console",	"zconsole" },
197facf4a8dSllai1 	{ "syscon",	"zconsole" },
198facf4a8dSllai1 	{ "sysmsg",	"zconsole" },
199facf4a8dSllai1 	{ "systty",	"zconsole" },
200facf4a8dSllai1 	{ "msglog",	"zconsole" },
201facf4a8dSllai1 	{ NULL, NULL }
2027c478bd9Sstevel@tonic-gate };
2037c478bd9Sstevel@tonic-gate 
204facf4a8dSllai1 static int vplat_mount_dev(zlog_t *);
2057c478bd9Sstevel@tonic-gate 
2067c478bd9Sstevel@tonic-gate /* for routing socket */
2077c478bd9Sstevel@tonic-gate static int rts_seqno = 0;
2087c478bd9Sstevel@tonic-gate 
209108322fbScarlsonj /* mangled zone name when mounting in an alternate root environment */
210108322fbScarlsonj static char kernzone[ZONENAME_MAX];
211108322fbScarlsonj 
212108322fbScarlsonj /* array of cached mount entries for resolve_lofs */
213108322fbScarlsonj static struct mnttab *resolve_lofs_mnts, *resolve_lofs_mnt_max;
214108322fbScarlsonj 
21545916cd2Sjpk /* for Trusted Extensions */
21645916cd2Sjpk static tsol_zcent_t *get_zone_label(zlog_t *, priv_set_t *);
21745916cd2Sjpk static int tsol_mounts(zlog_t *, char *, char *);
21845916cd2Sjpk static void tsol_unmounts(zlog_t *, char *);
21945916cd2Sjpk static m_label_t *zlabel = NULL;
22045916cd2Sjpk static m_label_t *zid_label = NULL;
22145916cd2Sjpk static priv_set_t *zprivs = NULL;
22245916cd2Sjpk 
2237c478bd9Sstevel@tonic-gate /* from libsocket, not in any header file */
2247c478bd9Sstevel@tonic-gate extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
2257c478bd9Sstevel@tonic-gate 
2267c478bd9Sstevel@tonic-gate /*
227108322fbScarlsonj  * An optimization for build_mnttable: reallocate (and potentially copy the
228108322fbScarlsonj  * data) only once every N times through the loop.
229108322fbScarlsonj  */
230108322fbScarlsonj #define	MNTTAB_HUNK	32
231108322fbScarlsonj 
232108322fbScarlsonj /*
2337c478bd9Sstevel@tonic-gate  * Private autofs system call
2347c478bd9Sstevel@tonic-gate  */
2357c478bd9Sstevel@tonic-gate extern int _autofssys(int, void *);
2367c478bd9Sstevel@tonic-gate 
2377c478bd9Sstevel@tonic-gate static int
2387c478bd9Sstevel@tonic-gate autofs_cleanup(zoneid_t zoneid)
2397c478bd9Sstevel@tonic-gate {
2407c478bd9Sstevel@tonic-gate 	/*
2417c478bd9Sstevel@tonic-gate 	 * Ask autofs to unmount all trigger nodes in the given zone.
2427c478bd9Sstevel@tonic-gate 	 */
2437c478bd9Sstevel@tonic-gate 	return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid));
2447c478bd9Sstevel@tonic-gate }
2457c478bd9Sstevel@tonic-gate 
246108322fbScarlsonj static void
247108322fbScarlsonj free_mnttable(struct mnttab *mnt_array, uint_t nelem)
248108322fbScarlsonj {
249108322fbScarlsonj 	uint_t i;
250108322fbScarlsonj 
251108322fbScarlsonj 	if (mnt_array == NULL)
252108322fbScarlsonj 		return;
253108322fbScarlsonj 	for (i = 0; i < nelem; i++) {
254108322fbScarlsonj 		free(mnt_array[i].mnt_mountp);
255108322fbScarlsonj 		free(mnt_array[i].mnt_fstype);
256108322fbScarlsonj 		free(mnt_array[i].mnt_special);
257108322fbScarlsonj 		free(mnt_array[i].mnt_mntopts);
258108322fbScarlsonj 		assert(mnt_array[i].mnt_time == NULL);
259108322fbScarlsonj 	}
260108322fbScarlsonj 	free(mnt_array);
261108322fbScarlsonj }
262108322fbScarlsonj 
263108322fbScarlsonj /*
264108322fbScarlsonj  * Build the mount table for the zone rooted at "zroot", storing the resulting
265108322fbScarlsonj  * array of struct mnttabs in "mnt_arrayp" and the number of elements in the
266108322fbScarlsonj  * array in "nelemp".
267108322fbScarlsonj  */
268108322fbScarlsonj static int
269108322fbScarlsonj build_mnttable(zlog_t *zlogp, const char *zroot, size_t zrootlen, FILE *mnttab,
270108322fbScarlsonj     struct mnttab **mnt_arrayp, uint_t *nelemp)
271108322fbScarlsonj {
272108322fbScarlsonj 	struct mnttab mnt;
273108322fbScarlsonj 	struct mnttab *mnts;
274108322fbScarlsonj 	struct mnttab *mnp;
275108322fbScarlsonj 	uint_t nmnt;
276108322fbScarlsonj 
277108322fbScarlsonj 	rewind(mnttab);
278108322fbScarlsonj 	resetmnttab(mnttab);
279108322fbScarlsonj 	nmnt = 0;
280108322fbScarlsonj 	mnts = NULL;
281108322fbScarlsonj 	while (getmntent(mnttab, &mnt) == 0) {
282108322fbScarlsonj 		struct mnttab *tmp_array;
283108322fbScarlsonj 
284108322fbScarlsonj 		if (strncmp(mnt.mnt_mountp, zroot, zrootlen) != 0)
285108322fbScarlsonj 			continue;
286108322fbScarlsonj 		if (nmnt % MNTTAB_HUNK == 0) {
287108322fbScarlsonj 			tmp_array = realloc(mnts,
288108322fbScarlsonj 			    (nmnt + MNTTAB_HUNK) * sizeof (*mnts));
289108322fbScarlsonj 			if (tmp_array == NULL) {
290108322fbScarlsonj 				free_mnttable(mnts, nmnt);
291108322fbScarlsonj 				return (-1);
292108322fbScarlsonj 			}
293108322fbScarlsonj 			mnts = tmp_array;
294108322fbScarlsonj 		}
295108322fbScarlsonj 		mnp = &mnts[nmnt++];
296108322fbScarlsonj 
297108322fbScarlsonj 		/*
298108322fbScarlsonj 		 * Zero out any fields we're not using.
299108322fbScarlsonj 		 */
300108322fbScarlsonj 		(void) memset(mnp, 0, sizeof (*mnp));
301108322fbScarlsonj 
302108322fbScarlsonj 		if (mnt.mnt_special != NULL)
303108322fbScarlsonj 			mnp->mnt_special = strdup(mnt.mnt_special);
304108322fbScarlsonj 		if (mnt.mnt_mntopts != NULL)
305108322fbScarlsonj 			mnp->mnt_mntopts = strdup(mnt.mnt_mntopts);
306108322fbScarlsonj 		mnp->mnt_mountp = strdup(mnt.mnt_mountp);
307108322fbScarlsonj 		mnp->mnt_fstype = strdup(mnt.mnt_fstype);
308108322fbScarlsonj 		if ((mnt.mnt_special != NULL && mnp->mnt_special == NULL) ||
309108322fbScarlsonj 		    (mnt.mnt_mntopts != NULL && mnp->mnt_mntopts == NULL) ||
310108322fbScarlsonj 		    mnp->mnt_mountp == NULL || mnp->mnt_fstype == NULL) {
311108322fbScarlsonj 			zerror(zlogp, B_TRUE, "memory allocation failed");
312108322fbScarlsonj 			free_mnttable(mnts, nmnt);
313108322fbScarlsonj 			return (-1);
314108322fbScarlsonj 		}
315108322fbScarlsonj 	}
316108322fbScarlsonj 	*mnt_arrayp = mnts;
317108322fbScarlsonj 	*nelemp = nmnt;
318108322fbScarlsonj 	return (0);
319108322fbScarlsonj }
320108322fbScarlsonj 
321108322fbScarlsonj /*
322108322fbScarlsonj  * This is an optimization.  The resolve_lofs function is used quite frequently
323108322fbScarlsonj  * to manipulate file paths, and on a machine with a large number of zones,
324108322fbScarlsonj  * there will be a huge number of mounted file systems.  Thus, we trigger a
325108322fbScarlsonj  * reread of the list of mount points
326108322fbScarlsonj  */
327108322fbScarlsonj static void
328108322fbScarlsonj lofs_discard_mnttab(void)
329108322fbScarlsonj {
330108322fbScarlsonj 	free_mnttable(resolve_lofs_mnts,
331108322fbScarlsonj 	    resolve_lofs_mnt_max - resolve_lofs_mnts);
332108322fbScarlsonj 	resolve_lofs_mnts = resolve_lofs_mnt_max = NULL;
333108322fbScarlsonj }
334108322fbScarlsonj 
335108322fbScarlsonj static int
336108322fbScarlsonj lofs_read_mnttab(zlog_t *zlogp)
337108322fbScarlsonj {
338108322fbScarlsonj 	FILE *mnttab;
339108322fbScarlsonj 	uint_t nmnts;
340108322fbScarlsonj 
341108322fbScarlsonj 	if ((mnttab = fopen(MNTTAB, "r")) == NULL)
342108322fbScarlsonj 		return (-1);
343108322fbScarlsonj 	if (build_mnttable(zlogp, "", 0, mnttab, &resolve_lofs_mnts,
344108322fbScarlsonj 	    &nmnts) == -1) {
345108322fbScarlsonj 		(void) fclose(mnttab);
346108322fbScarlsonj 		return (-1);
347108322fbScarlsonj 	}
348108322fbScarlsonj 	(void) fclose(mnttab);
349108322fbScarlsonj 	resolve_lofs_mnt_max = resolve_lofs_mnts + nmnts;
350108322fbScarlsonj 	return (0);
351108322fbScarlsonj }
352108322fbScarlsonj 
353108322fbScarlsonj /*
354108322fbScarlsonj  * This function loops over potential loopback mounts and symlinks in a given
355108322fbScarlsonj  * path and resolves them all down to an absolute path.
356108322fbScarlsonj  */
357108322fbScarlsonj static void
358108322fbScarlsonj resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen)
359108322fbScarlsonj {
360108322fbScarlsonj 	int len, arlen;
361108322fbScarlsonj 	const char *altroot;
362108322fbScarlsonj 	char tmppath[MAXPATHLEN];
363108322fbScarlsonj 	boolean_t outside_altroot;
364108322fbScarlsonj 
365108322fbScarlsonj 	if ((len = resolvepath(path, tmppath, sizeof (tmppath))) == -1)
366108322fbScarlsonj 		return;
367108322fbScarlsonj 	tmppath[len] = '\0';
368108322fbScarlsonj 	(void) strlcpy(path, tmppath, sizeof (tmppath));
369108322fbScarlsonj 
370108322fbScarlsonj 	/* This happens once per zoneadmd operation. */
371108322fbScarlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
372108322fbScarlsonj 		return;
373108322fbScarlsonj 
374108322fbScarlsonj 	altroot = zonecfg_get_root();
375108322fbScarlsonj 	arlen = strlen(altroot);
376108322fbScarlsonj 	outside_altroot = B_FALSE;
377108322fbScarlsonj 	for (;;) {
378108322fbScarlsonj 		struct mnttab *mnp;
379108322fbScarlsonj 
380108322fbScarlsonj 		for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
381108322fbScarlsonj 		    mnp++) {
382108322fbScarlsonj 			if (mnp->mnt_fstype == NULL ||
383108322fbScarlsonj 			    mnp->mnt_mountp == NULL ||
384108322fbScarlsonj 			    mnp->mnt_special == NULL ||
385108322fbScarlsonj 			    strcmp(mnp->mnt_fstype, MNTTYPE_LOFS) != 0)
386108322fbScarlsonj 				continue;
387108322fbScarlsonj 			len = strlen(mnp->mnt_mountp);
388108322fbScarlsonj 			if (strncmp(mnp->mnt_mountp, path, len) == 0 &&
389108322fbScarlsonj 			    (path[len] == '/' || path[len] == '\0'))
390108322fbScarlsonj 				break;
391108322fbScarlsonj 		}
392108322fbScarlsonj 		if (mnp >= resolve_lofs_mnt_max)
393108322fbScarlsonj 			break;
394108322fbScarlsonj 		if (outside_altroot) {
395108322fbScarlsonj 			char *cp;
396108322fbScarlsonj 			int olen = sizeof (MNTOPT_RO) - 1;
397108322fbScarlsonj 
398108322fbScarlsonj 			/*
399108322fbScarlsonj 			 * If we run into a read-only mount outside of the
400108322fbScarlsonj 			 * alternate root environment, then the user doesn't
401108322fbScarlsonj 			 * want this path to be made read-write.
402108322fbScarlsonj 			 */
403108322fbScarlsonj 			if (mnp->mnt_mntopts != NULL &&
404108322fbScarlsonj 			    (cp = strstr(mnp->mnt_mntopts, MNTOPT_RO)) !=
405108322fbScarlsonj 			    NULL &&
406108322fbScarlsonj 			    (cp == mnp->mnt_mntopts || cp[-1] == ',') &&
407108322fbScarlsonj 			    (cp[olen] == '\0' || cp[olen] == ',')) {
408108322fbScarlsonj 				break;
409108322fbScarlsonj 			}
410108322fbScarlsonj 		} else if (arlen > 0 &&
411108322fbScarlsonj 		    (strncmp(mnp->mnt_special, altroot, arlen) != 0 ||
412108322fbScarlsonj 		    (mnp->mnt_special[arlen] != '\0' &&
413108322fbScarlsonj 		    mnp->mnt_special[arlen] != '/'))) {
414108322fbScarlsonj 			outside_altroot = B_TRUE;
415108322fbScarlsonj 		}
416108322fbScarlsonj 		/* use temporary buffer because new path might be longer */
417108322fbScarlsonj 		(void) snprintf(tmppath, sizeof (tmppath), "%s%s",
418108322fbScarlsonj 		    mnp->mnt_special, path + len);
419108322fbScarlsonj 		if ((len = resolvepath(tmppath, path, pathlen)) == -1)
420108322fbScarlsonj 			break;
421108322fbScarlsonj 		path[len] = '\0';
422108322fbScarlsonj 	}
423108322fbScarlsonj }
424108322fbScarlsonj 
425108322fbScarlsonj /*
426108322fbScarlsonj  * For a regular mount, check if a replacement lofs mount is needed because the
427108322fbScarlsonj  * referenced device is already mounted somewhere.
428108322fbScarlsonj  */
429108322fbScarlsonj static int
430108322fbScarlsonj check_lofs_needed(zlog_t *zlogp, struct zone_fstab *fsptr)
431108322fbScarlsonj {
432108322fbScarlsonj 	struct mnttab *mnp;
433108322fbScarlsonj 	zone_fsopt_t *optptr, *onext;
434108322fbScarlsonj 
435108322fbScarlsonj 	/* This happens once per zoneadmd operation. */
436108322fbScarlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
437108322fbScarlsonj 		return (-1);
438108322fbScarlsonj 
439108322fbScarlsonj 	/*
440108322fbScarlsonj 	 * If this special node isn't already in use, then it's ours alone;
441108322fbScarlsonj 	 * no need to worry about conflicting mounts.
442108322fbScarlsonj 	 */
443108322fbScarlsonj 	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max;
444108322fbScarlsonj 	    mnp++) {
445108322fbScarlsonj 		if (strcmp(mnp->mnt_special, fsptr->zone_fs_special) == 0)
446108322fbScarlsonj 			break;
447108322fbScarlsonj 	}
448108322fbScarlsonj 	if (mnp >= resolve_lofs_mnt_max)
449108322fbScarlsonj 		return (0);
450108322fbScarlsonj 
451108322fbScarlsonj 	/*
452108322fbScarlsonj 	 * Convert this duplicate mount into a lofs mount.
453108322fbScarlsonj 	 */
454108322fbScarlsonj 	(void) strlcpy(fsptr->zone_fs_special, mnp->mnt_mountp,
455108322fbScarlsonj 	    sizeof (fsptr->zone_fs_special));
456108322fbScarlsonj 	(void) strlcpy(fsptr->zone_fs_type, MNTTYPE_LOFS,
457108322fbScarlsonj 	    sizeof (fsptr->zone_fs_type));
458108322fbScarlsonj 	fsptr->zone_fs_raw[0] = '\0';
459108322fbScarlsonj 
460108322fbScarlsonj 	/*
461108322fbScarlsonj 	 * Discard all but one of the original options and set that to be the
462108322fbScarlsonj 	 * same set of options used for inherit package directory resources.
463108322fbScarlsonj 	 */
464108322fbScarlsonj 	optptr = fsptr->zone_fs_options;
465108322fbScarlsonj 	if (optptr == NULL) {
466108322fbScarlsonj 		optptr = malloc(sizeof (*optptr));
467108322fbScarlsonj 		if (optptr == NULL) {
468108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s",
469108322fbScarlsonj 			    fsptr->zone_fs_dir);
470108322fbScarlsonj 			return (-1);
471108322fbScarlsonj 		}
472108322fbScarlsonj 	} else {
473108322fbScarlsonj 		while ((onext = optptr->zone_fsopt_next) != NULL) {
474108322fbScarlsonj 			optptr->zone_fsopt_next = onext->zone_fsopt_next;
475108322fbScarlsonj 			free(onext);
476108322fbScarlsonj 		}
477108322fbScarlsonj 	}
478108322fbScarlsonj 	(void) strcpy(optptr->zone_fsopt_opt, IPD_DEFAULT_OPTS);
479108322fbScarlsonj 	optptr->zone_fsopt_next = NULL;
480108322fbScarlsonj 	fsptr->zone_fs_options = optptr;
481108322fbScarlsonj 	return (0);
482108322fbScarlsonj }
483108322fbScarlsonj 
4847c478bd9Sstevel@tonic-gate static int
4857c478bd9Sstevel@tonic-gate make_one_dir(zlog_t *zlogp, const char *prefix, const char *subdir, mode_t mode)
4867c478bd9Sstevel@tonic-gate {
4877c478bd9Sstevel@tonic-gate 	char path[MAXPATHLEN];
4887c478bd9Sstevel@tonic-gate 	struct stat st;
4897c478bd9Sstevel@tonic-gate 
4907c478bd9Sstevel@tonic-gate 	if (snprintf(path, sizeof (path), "%s%s", prefix, subdir) >
4917c478bd9Sstevel@tonic-gate 	    sizeof (path)) {
4927c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "pathname %s%s is too long", prefix,
4937c478bd9Sstevel@tonic-gate 		    subdir);
4947c478bd9Sstevel@tonic-gate 		return (-1);
4957c478bd9Sstevel@tonic-gate 	}
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate 	if (lstat(path, &st) == 0) {
4987c478bd9Sstevel@tonic-gate 		/*
4997c478bd9Sstevel@tonic-gate 		 * We don't check the file mode since presumably the zone
5007c478bd9Sstevel@tonic-gate 		 * administrator may have had good reason to change the mode,
5017c478bd9Sstevel@tonic-gate 		 * and we don't need to second guess him.
5027c478bd9Sstevel@tonic-gate 		 */
5037c478bd9Sstevel@tonic-gate 		if (!S_ISDIR(st.st_mode)) {
50445916cd2Sjpk 			if (is_system_labeled() &&
50545916cd2Sjpk 			    S_ISREG(st.st_mode)) {
50645916cd2Sjpk 				/*
50745916cd2Sjpk 				 * The need to mount readonly copies of
50845916cd2Sjpk 				 * global zone /etc/ files is unique to
50945916cd2Sjpk 				 * Trusted Extensions.
51045916cd2Sjpk 				 */
51145916cd2Sjpk 				if (strncmp(subdir, "/etc/",
51245916cd2Sjpk 				    strlen("/etc/")) != 0) {
51345916cd2Sjpk 					zerror(zlogp, B_FALSE,
51445916cd2Sjpk 					    "%s is not in /etc", path);
5157c478bd9Sstevel@tonic-gate 					return (-1);
5167c478bd9Sstevel@tonic-gate 				}
51745916cd2Sjpk 			} else {
51845916cd2Sjpk 				zerror(zlogp, B_FALSE,
51945916cd2Sjpk 				    "%s is not a directory", path);
52045916cd2Sjpk 				return (-1);
52145916cd2Sjpk 			}
52245916cd2Sjpk 		}
5237c478bd9Sstevel@tonic-gate 	} else if (mkdirp(path, mode) != 0) {
5247c478bd9Sstevel@tonic-gate 		if (errno == EROFS)
5257c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "Could not mkdir %s.\nIt is on "
5267c478bd9Sstevel@tonic-gate 			    "a read-only file system in this local zone.\nMake "
5277c478bd9Sstevel@tonic-gate 			    "sure %s exists in the global zone.", path, subdir);
5287c478bd9Sstevel@tonic-gate 		else
5297c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "mkdirp of %s failed", path);
5307c478bd9Sstevel@tonic-gate 		return (-1);
5317c478bd9Sstevel@tonic-gate 	}
5327c478bd9Sstevel@tonic-gate 	return (0);
5337c478bd9Sstevel@tonic-gate }
5347c478bd9Sstevel@tonic-gate 
5357c478bd9Sstevel@tonic-gate static void
5367c478bd9Sstevel@tonic-gate free_remote_fstypes(char **types)
5377c478bd9Sstevel@tonic-gate {
5387c478bd9Sstevel@tonic-gate 	uint_t i;
5397c478bd9Sstevel@tonic-gate 
5407c478bd9Sstevel@tonic-gate 	if (types == NULL)
5417c478bd9Sstevel@tonic-gate 		return;
5427c478bd9Sstevel@tonic-gate 	for (i = 0; types[i] != NULL; i++)
5437c478bd9Sstevel@tonic-gate 		free(types[i]);
5447c478bd9Sstevel@tonic-gate 	free(types);
5457c478bd9Sstevel@tonic-gate }
5467c478bd9Sstevel@tonic-gate 
5477c478bd9Sstevel@tonic-gate static char **
5487c478bd9Sstevel@tonic-gate get_remote_fstypes(zlog_t *zlogp)
5497c478bd9Sstevel@tonic-gate {
5507c478bd9Sstevel@tonic-gate 	char **types = NULL;
5517c478bd9Sstevel@tonic-gate 	FILE *fp;
5527c478bd9Sstevel@tonic-gate 	char buf[MAXPATHLEN];
5537c478bd9Sstevel@tonic-gate 	char fstype[MAXPATHLEN];
5547c478bd9Sstevel@tonic-gate 	uint_t lines = 0;
5557c478bd9Sstevel@tonic-gate 	uint_t i;
5567c478bd9Sstevel@tonic-gate 
5577c478bd9Sstevel@tonic-gate 	if ((fp = fopen(DFSTYPES, "r")) == NULL) {
5587c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "failed to open %s", DFSTYPES);
5597c478bd9Sstevel@tonic-gate 		return (NULL);
5607c478bd9Sstevel@tonic-gate 	}
5617c478bd9Sstevel@tonic-gate 	/*
5627c478bd9Sstevel@tonic-gate 	 * Count the number of lines
5637c478bd9Sstevel@tonic-gate 	 */
5647c478bd9Sstevel@tonic-gate 	while (fgets(buf, sizeof (buf), fp) != NULL)
5657c478bd9Sstevel@tonic-gate 		lines++;
5667c478bd9Sstevel@tonic-gate 	if (lines == 0)	/* didn't read anything; empty file */
5677c478bd9Sstevel@tonic-gate 		goto out;
5687c478bd9Sstevel@tonic-gate 	rewind(fp);
5697c478bd9Sstevel@tonic-gate 	/*
5707c478bd9Sstevel@tonic-gate 	 * Allocate enough space for a NULL-terminated array.
5717c478bd9Sstevel@tonic-gate 	 */
5727c478bd9Sstevel@tonic-gate 	types = calloc(lines + 1, sizeof (char *));
5737c478bd9Sstevel@tonic-gate 	if (types == NULL) {
5747c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
5757c478bd9Sstevel@tonic-gate 		goto out;
5767c478bd9Sstevel@tonic-gate 	}
5777c478bd9Sstevel@tonic-gate 	i = 0;
5787c478bd9Sstevel@tonic-gate 	while (fgets(buf, sizeof (buf), fp) != NULL) {
5797c478bd9Sstevel@tonic-gate 		/* LINTED - fstype is big enough to hold buf */
5807c478bd9Sstevel@tonic-gate 		if (sscanf(buf, "%s", fstype) == 0) {
5817c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "unable to parse %s", DFSTYPES);
5827c478bd9Sstevel@tonic-gate 			free_remote_fstypes(types);
5837c478bd9Sstevel@tonic-gate 			types = NULL;
5847c478bd9Sstevel@tonic-gate 			goto out;
5857c478bd9Sstevel@tonic-gate 		}
5867c478bd9Sstevel@tonic-gate 		types[i] = strdup(fstype);
5877c478bd9Sstevel@tonic-gate 		if (types[i] == NULL) {
5887c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "memory allocation failed");
5897c478bd9Sstevel@tonic-gate 			free_remote_fstypes(types);
5907c478bd9Sstevel@tonic-gate 			types = NULL;
5917c478bd9Sstevel@tonic-gate 			goto out;
5927c478bd9Sstevel@tonic-gate 		}
5937c478bd9Sstevel@tonic-gate 		i++;
5947c478bd9Sstevel@tonic-gate 	}
5957c478bd9Sstevel@tonic-gate out:
5967c478bd9Sstevel@tonic-gate 	(void) fclose(fp);
5977c478bd9Sstevel@tonic-gate 	return (types);
5987c478bd9Sstevel@tonic-gate }
5997c478bd9Sstevel@tonic-gate 
6007c478bd9Sstevel@tonic-gate static boolean_t
6017c478bd9Sstevel@tonic-gate is_remote_fstype(const char *fstype, char *const *remote_fstypes)
6027c478bd9Sstevel@tonic-gate {
6037c478bd9Sstevel@tonic-gate 	uint_t i;
6047c478bd9Sstevel@tonic-gate 
6057c478bd9Sstevel@tonic-gate 	if (remote_fstypes == NULL)
6067c478bd9Sstevel@tonic-gate 		return (B_FALSE);
6077c478bd9Sstevel@tonic-gate 	for (i = 0; remote_fstypes[i] != NULL; i++) {
6087c478bd9Sstevel@tonic-gate 		if (strcmp(remote_fstypes[i], fstype) == 0)
6097c478bd9Sstevel@tonic-gate 			return (B_TRUE);
6107c478bd9Sstevel@tonic-gate 	}
6117c478bd9Sstevel@tonic-gate 	return (B_FALSE);
6127c478bd9Sstevel@tonic-gate }
6137c478bd9Sstevel@tonic-gate 
614108322fbScarlsonj /*
615108322fbScarlsonj  * This converts a zone root path (normally of the form .../root) to a Live
616108322fbScarlsonj  * Upgrade scratch zone root (of the form .../lu).
617108322fbScarlsonj  */
6187c478bd9Sstevel@tonic-gate static void
619108322fbScarlsonj root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
6207c478bd9Sstevel@tonic-gate {
621108322fbScarlsonj 	if (!isresolved && zonecfg_in_alt_root())
622108322fbScarlsonj 		resolve_lofs(zlogp, zroot, zrootlen);
623108322fbScarlsonj 	(void) strcpy(strrchr(zroot, '/') + 1, "lu");
6247c478bd9Sstevel@tonic-gate }
6257c478bd9Sstevel@tonic-gate 
6267c478bd9Sstevel@tonic-gate /*
6277c478bd9Sstevel@tonic-gate  * The general strategy for unmounting filesystems is as follows:
6287c478bd9Sstevel@tonic-gate  *
6297c478bd9Sstevel@tonic-gate  * - Remote filesystems may be dead, and attempting to contact them as
6307c478bd9Sstevel@tonic-gate  * part of a regular unmount may hang forever; we want to always try to
6317c478bd9Sstevel@tonic-gate  * forcibly unmount such filesystems and only fall back to regular
6327c478bd9Sstevel@tonic-gate  * unmounts if the filesystem doesn't support forced unmounts.
6337c478bd9Sstevel@tonic-gate  *
6347c478bd9Sstevel@tonic-gate  * - We don't want to unnecessarily corrupt metadata on local
6357c478bd9Sstevel@tonic-gate  * filesystems (ie UFS), so we want to start off with graceful unmounts,
6367c478bd9Sstevel@tonic-gate  * and only escalate to doing forced unmounts if we get stuck.
6377c478bd9Sstevel@tonic-gate  *
6387c478bd9Sstevel@tonic-gate  * We start off walking backwards through the mount table.  This doesn't
6397c478bd9Sstevel@tonic-gate  * give us strict ordering but ensures that we try to unmount submounts
6407c478bd9Sstevel@tonic-gate  * first.  We thus limit the number of failed umount2(2) calls.
6417c478bd9Sstevel@tonic-gate  *
6427c478bd9Sstevel@tonic-gate  * The mechanism for determining if we're stuck is to count the number
6437c478bd9Sstevel@tonic-gate  * of failed unmounts each iteration through the mount table.  This
6447c478bd9Sstevel@tonic-gate  * gives us an upper bound on the number of filesystems which remain
6457c478bd9Sstevel@tonic-gate  * mounted (autofs trigger nodes are dealt with separately).  If at the
6467c478bd9Sstevel@tonic-gate  * end of one unmount+autofs_cleanup cycle we still have the same number
6477c478bd9Sstevel@tonic-gate  * of mounts that we started out with, we're stuck and try a forced
6487c478bd9Sstevel@tonic-gate  * unmount.  If that fails (filesystem doesn't support forced unmounts)
6497c478bd9Sstevel@tonic-gate  * then we bail and are unable to teardown the zone.  If it succeeds,
6507c478bd9Sstevel@tonic-gate  * we're no longer stuck so we continue with our policy of trying
6517c478bd9Sstevel@tonic-gate  * graceful mounts first.
6527c478bd9Sstevel@tonic-gate  *
6537c478bd9Sstevel@tonic-gate  * Zone must be down (ie, no processes or threads active).
6547c478bd9Sstevel@tonic-gate  */
6557c478bd9Sstevel@tonic-gate static int
656108322fbScarlsonj unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
6577c478bd9Sstevel@tonic-gate {
6587c478bd9Sstevel@tonic-gate 	int error = 0;
6597c478bd9Sstevel@tonic-gate 	FILE *mnttab;
6607c478bd9Sstevel@tonic-gate 	struct mnttab *mnts;
6617c478bd9Sstevel@tonic-gate 	uint_t nmnt;
6627c478bd9Sstevel@tonic-gate 	char zroot[MAXPATHLEN + 1];
6637c478bd9Sstevel@tonic-gate 	size_t zrootlen;
6647c478bd9Sstevel@tonic-gate 	uint_t oldcount = UINT_MAX;
6657c478bd9Sstevel@tonic-gate 	boolean_t stuck = B_FALSE;
6667c478bd9Sstevel@tonic-gate 	char **remote_fstypes = NULL;
6677c478bd9Sstevel@tonic-gate 
6687c478bd9Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
6697c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "unable to determine zone root");
6707c478bd9Sstevel@tonic-gate 		return (-1);
6717c478bd9Sstevel@tonic-gate 	}
672108322fbScarlsonj 	if (unmount_cmd)
673108322fbScarlsonj 		root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
6747c478bd9Sstevel@tonic-gate 
6757c478bd9Sstevel@tonic-gate 	(void) strcat(zroot, "/");
6767c478bd9Sstevel@tonic-gate 	zrootlen = strlen(zroot);
6777c478bd9Sstevel@tonic-gate 
67845916cd2Sjpk 	/*
67945916cd2Sjpk 	 * For Trusted Extensions unmount each higher level zone's mount
68045916cd2Sjpk 	 * of our zone's /export/home
68145916cd2Sjpk 	 */
68248451833Scarlsonj 	if (!unmount_cmd)
68345916cd2Sjpk 		tsol_unmounts(zlogp, zone_name);
68445916cd2Sjpk 
6857c478bd9Sstevel@tonic-gate 	if ((mnttab = fopen(MNTTAB, "r")) == NULL) {
6867c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "failed to open %s", MNTTAB);
6877c478bd9Sstevel@tonic-gate 		return (-1);
6887c478bd9Sstevel@tonic-gate 	}
6897c478bd9Sstevel@tonic-gate 	/*
6907c478bd9Sstevel@tonic-gate 	 * Use our hacky mntfs ioctl so we see everything, even mounts with
6917c478bd9Sstevel@tonic-gate 	 * MS_NOMNTTAB.
6927c478bd9Sstevel@tonic-gate 	 */
6937c478bd9Sstevel@tonic-gate 	if (ioctl(fileno(mnttab), MNTIOC_SHOWHIDDEN, NULL) < 0) {
6947c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to configure %s", MNTTAB);
6957c478bd9Sstevel@tonic-gate 		error++;
6967c478bd9Sstevel@tonic-gate 		goto out;
6977c478bd9Sstevel@tonic-gate 	}
6987c478bd9Sstevel@tonic-gate 
6997c478bd9Sstevel@tonic-gate 	/*
7007c478bd9Sstevel@tonic-gate 	 * Build the list of remote fstypes so we know which ones we
7017c478bd9Sstevel@tonic-gate 	 * should forcibly unmount.
7027c478bd9Sstevel@tonic-gate 	 */
7037c478bd9Sstevel@tonic-gate 	remote_fstypes = get_remote_fstypes(zlogp);
7047c478bd9Sstevel@tonic-gate 	for (; /* ever */; ) {
7057c478bd9Sstevel@tonic-gate 		uint_t newcount = 0;
7067c478bd9Sstevel@tonic-gate 		boolean_t unmounted;
7077c478bd9Sstevel@tonic-gate 		struct mnttab *mnp;
7087c478bd9Sstevel@tonic-gate 		char *path;
7097c478bd9Sstevel@tonic-gate 		uint_t i;
7107c478bd9Sstevel@tonic-gate 
7117c478bd9Sstevel@tonic-gate 		mnts = NULL;
7127c478bd9Sstevel@tonic-gate 		nmnt = 0;
7137c478bd9Sstevel@tonic-gate 		/*
7147c478bd9Sstevel@tonic-gate 		 * MNTTAB gives us a way to walk through mounted
7157c478bd9Sstevel@tonic-gate 		 * filesystems; we need to be able to walk them in
7167c478bd9Sstevel@tonic-gate 		 * reverse order, so we build a list of all mounted
7177c478bd9Sstevel@tonic-gate 		 * filesystems.
7187c478bd9Sstevel@tonic-gate 		 */
7197c478bd9Sstevel@tonic-gate 		if (build_mnttable(zlogp, zroot, zrootlen, mnttab, &mnts,
7207c478bd9Sstevel@tonic-gate 		    &nmnt) != 0) {
7217c478bd9Sstevel@tonic-gate 			error++;
7227c478bd9Sstevel@tonic-gate 			goto out;
7237c478bd9Sstevel@tonic-gate 		}
7247c478bd9Sstevel@tonic-gate 		for (i = 0; i < nmnt; i++) {
7257c478bd9Sstevel@tonic-gate 			mnp = &mnts[nmnt - i - 1]; /* access in reverse order */
7267c478bd9Sstevel@tonic-gate 			path = mnp->mnt_mountp;
7277c478bd9Sstevel@tonic-gate 			unmounted = B_FALSE;
7287c478bd9Sstevel@tonic-gate 			/*
7297c478bd9Sstevel@tonic-gate 			 * Try forced unmount first for remote filesystems.
7307c478bd9Sstevel@tonic-gate 			 *
7317c478bd9Sstevel@tonic-gate 			 * Not all remote filesystems support forced unmounts,
7327c478bd9Sstevel@tonic-gate 			 * so if this fails (ENOTSUP) we'll continue on
7337c478bd9Sstevel@tonic-gate 			 * and try a regular unmount.
7347c478bd9Sstevel@tonic-gate 			 */
7357c478bd9Sstevel@tonic-gate 			if (is_remote_fstype(mnp->mnt_fstype, remote_fstypes)) {
7367c478bd9Sstevel@tonic-gate 				if (umount2(path, MS_FORCE) == 0)
7377c478bd9Sstevel@tonic-gate 					unmounted = B_TRUE;
7387c478bd9Sstevel@tonic-gate 			}
7397c478bd9Sstevel@tonic-gate 			/*
7407c478bd9Sstevel@tonic-gate 			 * Try forced unmount if we're stuck.
7417c478bd9Sstevel@tonic-gate 			 */
7427c478bd9Sstevel@tonic-gate 			if (stuck) {
7437c478bd9Sstevel@tonic-gate 				if (umount2(path, MS_FORCE) == 0) {
7447c478bd9Sstevel@tonic-gate 					unmounted = B_TRUE;
7457c478bd9Sstevel@tonic-gate 					stuck = B_FALSE;
7467c478bd9Sstevel@tonic-gate 				} else {
7477c478bd9Sstevel@tonic-gate 					/*
7487c478bd9Sstevel@tonic-gate 					 * The first failure indicates a
7497c478bd9Sstevel@tonic-gate 					 * mount we won't be able to get
7507c478bd9Sstevel@tonic-gate 					 * rid of automatically, so we
7517c478bd9Sstevel@tonic-gate 					 * bail.
7527c478bd9Sstevel@tonic-gate 					 */
7537c478bd9Sstevel@tonic-gate 					error++;
7547c478bd9Sstevel@tonic-gate 					zerror(zlogp, B_FALSE,
7557c478bd9Sstevel@tonic-gate 					    "unable to unmount '%s'", path);
7567c478bd9Sstevel@tonic-gate 					free_mnttable(mnts, nmnt);
7577c478bd9Sstevel@tonic-gate 					goto out;
7587c478bd9Sstevel@tonic-gate 				}
7597c478bd9Sstevel@tonic-gate 			}
7607c478bd9Sstevel@tonic-gate 			/*
7617c478bd9Sstevel@tonic-gate 			 * Try regular unmounts for everything else.
7627c478bd9Sstevel@tonic-gate 			 */
7637c478bd9Sstevel@tonic-gate 			if (!unmounted && umount2(path, 0) != 0)
7647c478bd9Sstevel@tonic-gate 				newcount++;
7657c478bd9Sstevel@tonic-gate 		}
7667c478bd9Sstevel@tonic-gate 		free_mnttable(mnts, nmnt);
7677c478bd9Sstevel@tonic-gate 
7687c478bd9Sstevel@tonic-gate 		if (newcount == 0)
7697c478bd9Sstevel@tonic-gate 			break;
7707c478bd9Sstevel@tonic-gate 		if (newcount >= oldcount) {
7717c478bd9Sstevel@tonic-gate 			/*
7727c478bd9Sstevel@tonic-gate 			 * Last round didn't unmount anything; we're stuck and
7737c478bd9Sstevel@tonic-gate 			 * should start trying forced unmounts.
7747c478bd9Sstevel@tonic-gate 			 */
7757c478bd9Sstevel@tonic-gate 			stuck = B_TRUE;
7767c478bd9Sstevel@tonic-gate 		}
7777c478bd9Sstevel@tonic-gate 		oldcount = newcount;
7787c478bd9Sstevel@tonic-gate 
7797c478bd9Sstevel@tonic-gate 		/*
7807c478bd9Sstevel@tonic-gate 		 * Autofs doesn't let you unmount its trigger nodes from
7817c478bd9Sstevel@tonic-gate 		 * userland so we have to tell the kernel to cleanup for us.
7827c478bd9Sstevel@tonic-gate 		 */
7837c478bd9Sstevel@tonic-gate 		if (autofs_cleanup(zoneid) != 0) {
7847c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "unable to remove autofs nodes");
7857c478bd9Sstevel@tonic-gate 			error++;
7867c478bd9Sstevel@tonic-gate 			goto out;
7877c478bd9Sstevel@tonic-gate 		}
7887c478bd9Sstevel@tonic-gate 	}
7897c478bd9Sstevel@tonic-gate 
7907c478bd9Sstevel@tonic-gate out:
7917c478bd9Sstevel@tonic-gate 	free_remote_fstypes(remote_fstypes);
7927c478bd9Sstevel@tonic-gate 	(void) fclose(mnttab);
7937c478bd9Sstevel@tonic-gate 	return (error ? -1 : 0);
7947c478bd9Sstevel@tonic-gate }
7957c478bd9Sstevel@tonic-gate 
7967c478bd9Sstevel@tonic-gate static int
7977c478bd9Sstevel@tonic-gate fs_compare(const void *m1, const void *m2)
7987c478bd9Sstevel@tonic-gate {
7997c478bd9Sstevel@tonic-gate 	struct zone_fstab *i = (struct zone_fstab *)m1;
8007c478bd9Sstevel@tonic-gate 	struct zone_fstab *j = (struct zone_fstab *)m2;
8017c478bd9Sstevel@tonic-gate 
8027c478bd9Sstevel@tonic-gate 	return (strcmp(i->zone_fs_dir, j->zone_fs_dir));
8037c478bd9Sstevel@tonic-gate }
8047c478bd9Sstevel@tonic-gate 
8057c478bd9Sstevel@tonic-gate /*
8067c478bd9Sstevel@tonic-gate  * Fork and exec (and wait for) the mentioned binary with the provided
8077c478bd9Sstevel@tonic-gate  * arguments.  Returns (-1) if something went wrong with fork(2) or exec(2),
8087c478bd9Sstevel@tonic-gate  * returns the exit status otherwise.
8097c478bd9Sstevel@tonic-gate  *
8107c478bd9Sstevel@tonic-gate  * If we were unable to exec the provided pathname (for whatever
8117c478bd9Sstevel@tonic-gate  * reason), we return the special token ZEXIT_EXEC.  The current value
8127c478bd9Sstevel@tonic-gate  * of ZEXIT_EXEC doesn't conflict with legitimate exit codes of the
8137c478bd9Sstevel@tonic-gate  * consumers of this function; any future consumers must make sure this
8147c478bd9Sstevel@tonic-gate  * remains the case.
8157c478bd9Sstevel@tonic-gate  */
8167c478bd9Sstevel@tonic-gate static int
8177c478bd9Sstevel@tonic-gate forkexec(zlog_t *zlogp, const char *path, char *const argv[])
8187c478bd9Sstevel@tonic-gate {
8197c478bd9Sstevel@tonic-gate 	pid_t child_pid;
8207c478bd9Sstevel@tonic-gate 	int child_status = 0;
8217c478bd9Sstevel@tonic-gate 
8227c478bd9Sstevel@tonic-gate 	/*
8237c478bd9Sstevel@tonic-gate 	 * Do not let another thread localize a message while we are forking.
8247c478bd9Sstevel@tonic-gate 	 */
8257c478bd9Sstevel@tonic-gate 	(void) mutex_lock(&msglock);
8267c478bd9Sstevel@tonic-gate 	child_pid = fork();
8277c478bd9Sstevel@tonic-gate 	(void) mutex_unlock(&msglock);
8287c478bd9Sstevel@tonic-gate 	if (child_pid == -1) {
8297c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not fork for %s", argv[0]);
8307c478bd9Sstevel@tonic-gate 		return (-1);
8317c478bd9Sstevel@tonic-gate 	} else if (child_pid == 0) {
8327c478bd9Sstevel@tonic-gate 		closefrom(0);
8331390a385Sgjelinek 		/* redirect stdin, stdout & stderr to /dev/null */
8341390a385Sgjelinek 		(void) open("/dev/null", O_RDONLY);	/* stdin */
8351390a385Sgjelinek 		(void) open("/dev/null", O_WRONLY);	/* stdout */
8361390a385Sgjelinek 		(void) open("/dev/null", O_WRONLY);	/* stderr */
8377c478bd9Sstevel@tonic-gate 		(void) execv(path, argv);
8387c478bd9Sstevel@tonic-gate 		/*
8397c478bd9Sstevel@tonic-gate 		 * Since we are in the child, there is no point calling zerror()
8407c478bd9Sstevel@tonic-gate 		 * since there is nobody waiting to consume it.  So exit with a
8417c478bd9Sstevel@tonic-gate 		 * special code that the parent will recognize and call zerror()
8427c478bd9Sstevel@tonic-gate 		 * accordingly.
8437c478bd9Sstevel@tonic-gate 		 */
8447c478bd9Sstevel@tonic-gate 
8457c478bd9Sstevel@tonic-gate 		_exit(ZEXIT_EXEC);
8467c478bd9Sstevel@tonic-gate 	} else {
8477c478bd9Sstevel@tonic-gate 		(void) waitpid(child_pid, &child_status, 0);
8487c478bd9Sstevel@tonic-gate 	}
8497c478bd9Sstevel@tonic-gate 
8507c478bd9Sstevel@tonic-gate 	if (WIFSIGNALED(child_status)) {
8517c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
8527c478bd9Sstevel@tonic-gate 		    "signal %d", path, WTERMSIG(child_status));
8537c478bd9Sstevel@tonic-gate 		return (-1);
8547c478bd9Sstevel@tonic-gate 	}
8557c478bd9Sstevel@tonic-gate 	assert(WIFEXITED(child_status));
8567c478bd9Sstevel@tonic-gate 	if (WEXITSTATUS(child_status) == ZEXIT_EXEC) {
8577c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "failed to exec %s", path);
8587c478bd9Sstevel@tonic-gate 		return (-1);
8597c478bd9Sstevel@tonic-gate 	}
8607c478bd9Sstevel@tonic-gate 	return (WEXITSTATUS(child_status));
8617c478bd9Sstevel@tonic-gate }
8627c478bd9Sstevel@tonic-gate 
8637c478bd9Sstevel@tonic-gate static int
8647c478bd9Sstevel@tonic-gate dofsck(zlog_t *zlogp, const char *fstype, const char *rawdev)
8657c478bd9Sstevel@tonic-gate {
8667c478bd9Sstevel@tonic-gate 	char cmdbuf[MAXPATHLEN];
8677c478bd9Sstevel@tonic-gate 	char *argv[4];
8687c478bd9Sstevel@tonic-gate 	int status;
8697c478bd9Sstevel@tonic-gate 
8707c478bd9Sstevel@tonic-gate 	/*
8717c478bd9Sstevel@tonic-gate 	 * We could alternatively have called /usr/sbin/fsck -F <fstype>, but
8727c478bd9Sstevel@tonic-gate 	 * that would cost us an extra fork/exec without buying us anything.
8737c478bd9Sstevel@tonic-gate 	 */
8747c478bd9Sstevel@tonic-gate 	if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/fsck", fstype)
8757c478bd9Sstevel@tonic-gate 	    > sizeof (cmdbuf)) {
8767c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "file-system type %s too long", fstype);
8777c478bd9Sstevel@tonic-gate 		return (-1);
8787c478bd9Sstevel@tonic-gate 	}
8797c478bd9Sstevel@tonic-gate 
8807c478bd9Sstevel@tonic-gate 	argv[0] = "fsck";
8817c478bd9Sstevel@tonic-gate 	argv[1] = "-m";
8827c478bd9Sstevel@tonic-gate 	argv[2] = (char *)rawdev;
8837c478bd9Sstevel@tonic-gate 	argv[3] = NULL;
8847c478bd9Sstevel@tonic-gate 
8857c478bd9Sstevel@tonic-gate 	status = forkexec(zlogp, cmdbuf, argv);
8867c478bd9Sstevel@tonic-gate 	if (status == 0 || status == -1)
8877c478bd9Sstevel@tonic-gate 		return (status);
8887c478bd9Sstevel@tonic-gate 	zerror(zlogp, B_FALSE, "fsck of '%s' failed with exit status %d; "
8897c478bd9Sstevel@tonic-gate 	    "run fsck manually", rawdev, status);
8907c478bd9Sstevel@tonic-gate 	return (-1);
8917c478bd9Sstevel@tonic-gate }
8927c478bd9Sstevel@tonic-gate 
8937c478bd9Sstevel@tonic-gate static int
8947c478bd9Sstevel@tonic-gate domount(zlog_t *zlogp, const char *fstype, const char *opts,
8957c478bd9Sstevel@tonic-gate     const char *special, const char *directory)
8967c478bd9Sstevel@tonic-gate {
8977c478bd9Sstevel@tonic-gate 	char cmdbuf[MAXPATHLEN];
8987c478bd9Sstevel@tonic-gate 	char *argv[6];
8997c478bd9Sstevel@tonic-gate 	int status;
9007c478bd9Sstevel@tonic-gate 
9017c478bd9Sstevel@tonic-gate 	/*
9027c478bd9Sstevel@tonic-gate 	 * We could alternatively have called /usr/sbin/mount -F <fstype>, but
9037c478bd9Sstevel@tonic-gate 	 * that would cost us an extra fork/exec without buying us anything.
9047c478bd9Sstevel@tonic-gate 	 */
9057c478bd9Sstevel@tonic-gate 	if (snprintf(cmdbuf, sizeof (cmdbuf), "/usr/lib/fs/%s/mount", fstype)
9067c478bd9Sstevel@tonic-gate 	    > sizeof (cmdbuf)) {
9077c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "file-system type %s too long", fstype);
9087c478bd9Sstevel@tonic-gate 		return (-1);
9097c478bd9Sstevel@tonic-gate 	}
9107c478bd9Sstevel@tonic-gate 	argv[0] = "mount";
9117c478bd9Sstevel@tonic-gate 	if (opts[0] == '\0') {
9127c478bd9Sstevel@tonic-gate 		argv[1] = (char *)special;
9137c478bd9Sstevel@tonic-gate 		argv[2] = (char *)directory;
9147c478bd9Sstevel@tonic-gate 		argv[3] = NULL;
9157c478bd9Sstevel@tonic-gate 	} else {
9167c478bd9Sstevel@tonic-gate 		argv[1] = "-o";
9177c478bd9Sstevel@tonic-gate 		argv[2] = (char *)opts;
9187c478bd9Sstevel@tonic-gate 		argv[3] = (char *)special;
9197c478bd9Sstevel@tonic-gate 		argv[4] = (char *)directory;
9207c478bd9Sstevel@tonic-gate 		argv[5] = NULL;
9217c478bd9Sstevel@tonic-gate 	}
9227c478bd9Sstevel@tonic-gate 
9237c478bd9Sstevel@tonic-gate 	status = forkexec(zlogp, cmdbuf, argv);
9247c478bd9Sstevel@tonic-gate 	if (status == 0 || status == -1)
9257c478bd9Sstevel@tonic-gate 		return (status);
9267c478bd9Sstevel@tonic-gate 	if (opts[0] == '\0')
9277c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "\"%s %s %s\" "
9287c478bd9Sstevel@tonic-gate 		    "failed with exit code %d",
9297c478bd9Sstevel@tonic-gate 		    cmdbuf, special, directory, status);
9307c478bd9Sstevel@tonic-gate 	else
9317c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "\"%s -o %s %s %s\" "
9327c478bd9Sstevel@tonic-gate 		    "failed with exit code %d",
9337c478bd9Sstevel@tonic-gate 		    cmdbuf, opts, special, directory, status);
9347c478bd9Sstevel@tonic-gate 	return (-1);
9357c478bd9Sstevel@tonic-gate }
9367c478bd9Sstevel@tonic-gate 
9377c478bd9Sstevel@tonic-gate /*
9387c478bd9Sstevel@tonic-gate  * Make sure if a given path exists, it is not a sym-link, and is a directory.
9397c478bd9Sstevel@tonic-gate  */
9407c478bd9Sstevel@tonic-gate static int
9417c478bd9Sstevel@tonic-gate check_path(zlog_t *zlogp, const char *path)
9427c478bd9Sstevel@tonic-gate {
9437c478bd9Sstevel@tonic-gate 	struct stat statbuf;
9447c478bd9Sstevel@tonic-gate 	char respath[MAXPATHLEN];
9457c478bd9Sstevel@tonic-gate 	int res;
9467c478bd9Sstevel@tonic-gate 
9477c478bd9Sstevel@tonic-gate 	if (lstat(path, &statbuf) != 0) {
9487c478bd9Sstevel@tonic-gate 		if (errno == ENOENT)
9497c478bd9Sstevel@tonic-gate 			return (0);
9507c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "can't stat %s", path);
9517c478bd9Sstevel@tonic-gate 		return (-1);
9527c478bd9Sstevel@tonic-gate 	}
9537c478bd9Sstevel@tonic-gate 	if (S_ISLNK(statbuf.st_mode)) {
9547c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s is a symlink", path);
9557c478bd9Sstevel@tonic-gate 		return (-1);
9567c478bd9Sstevel@tonic-gate 	}
9577c478bd9Sstevel@tonic-gate 	if (!S_ISDIR(statbuf.st_mode)) {
95845916cd2Sjpk 		if (is_system_labeled() && S_ISREG(statbuf.st_mode)) {
95945916cd2Sjpk 			/*
96045916cd2Sjpk 			 * The need to mount readonly copies of
96145916cd2Sjpk 			 * global zone /etc/ files is unique to
96245916cd2Sjpk 			 * Trusted Extensions.
96345916cd2Sjpk 			 * The check for /etc/ via strstr() is to
96445916cd2Sjpk 			 * allow paths like $ZONEROOT/etc/passwd
96545916cd2Sjpk 			 */
96645916cd2Sjpk 			if (strstr(path, "/etc/") == NULL) {
96745916cd2Sjpk 				zerror(zlogp, B_FALSE,
96845916cd2Sjpk 				    "%s is not in /etc", path);
96945916cd2Sjpk 				return (-1);
97045916cd2Sjpk 			}
97145916cd2Sjpk 		} else {
9727c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s is not a directory", path);
9737c478bd9Sstevel@tonic-gate 			return (-1);
9747c478bd9Sstevel@tonic-gate 		}
97545916cd2Sjpk 	}
9767c478bd9Sstevel@tonic-gate 	if ((res = resolvepath(path, respath, sizeof (respath))) == -1) {
9777c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to resolve path %s", path);
9787c478bd9Sstevel@tonic-gate 		return (-1);
9797c478bd9Sstevel@tonic-gate 	}
9807c478bd9Sstevel@tonic-gate 	respath[res] = '\0';
9817c478bd9Sstevel@tonic-gate 	if (strcmp(path, respath) != 0) {
9827c478bd9Sstevel@tonic-gate 		/*
9837c478bd9Sstevel@tonic-gate 		 * We don't like ".."s and "."s throwing us off
9847c478bd9Sstevel@tonic-gate 		 */
9857c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s is not a canonical path", path);
9867c478bd9Sstevel@tonic-gate 		return (-1);
9877c478bd9Sstevel@tonic-gate 	}
9887c478bd9Sstevel@tonic-gate 	return (0);
9897c478bd9Sstevel@tonic-gate }
9907c478bd9Sstevel@tonic-gate 
9917c478bd9Sstevel@tonic-gate /*
9927c478bd9Sstevel@tonic-gate  * Check every component of rootpath/relpath.  If any component fails (ie,
9937c478bd9Sstevel@tonic-gate  * exists but isn't the canonical path to a directory), it is returned in
9947c478bd9Sstevel@tonic-gate  * badpath, which is assumed to be at least of size MAXPATHLEN.
9957c478bd9Sstevel@tonic-gate  *
9967c478bd9Sstevel@tonic-gate  * Relpath must begin with '/'.
9977c478bd9Sstevel@tonic-gate  */
9987c478bd9Sstevel@tonic-gate static boolean_t
9997c478bd9Sstevel@tonic-gate valid_mount_path(zlog_t *zlogp, const char *rootpath, const char *relpath)
10007c478bd9Sstevel@tonic-gate {
10017c478bd9Sstevel@tonic-gate 	char abspath[MAXPATHLEN], *slashp;
10027c478bd9Sstevel@tonic-gate 
10037c478bd9Sstevel@tonic-gate 	/*
10047c478bd9Sstevel@tonic-gate 	 * Make sure abspath has at least one '/' after its rootpath
10057c478bd9Sstevel@tonic-gate 	 * component, and ends with '/'.
10067c478bd9Sstevel@tonic-gate 	 */
10077c478bd9Sstevel@tonic-gate 	if (snprintf(abspath, sizeof (abspath), "%s%s/", rootpath, relpath) >
10087c478bd9Sstevel@tonic-gate 	    sizeof (abspath)) {
10097c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "pathname %s%s is too long", rootpath,
10107c478bd9Sstevel@tonic-gate 		    relpath);
10117c478bd9Sstevel@tonic-gate 		return (B_FALSE);
10127c478bd9Sstevel@tonic-gate 	}
10137c478bd9Sstevel@tonic-gate 
10147c478bd9Sstevel@tonic-gate 	slashp = &abspath[strlen(rootpath)];
10157c478bd9Sstevel@tonic-gate 	assert(*slashp == '/');
10167c478bd9Sstevel@tonic-gate 	do {
10177c478bd9Sstevel@tonic-gate 		*slashp = '\0';
10187c478bd9Sstevel@tonic-gate 		if (check_path(zlogp, abspath) != 0)
10197c478bd9Sstevel@tonic-gate 			return (B_FALSE);
10207c478bd9Sstevel@tonic-gate 		*slashp = '/';
10217c478bd9Sstevel@tonic-gate 		slashp++;
10227c478bd9Sstevel@tonic-gate 	} while ((slashp = strchr(slashp, '/')) != NULL);
10237c478bd9Sstevel@tonic-gate 	return (B_TRUE);
10247c478bd9Sstevel@tonic-gate }
10257c478bd9Sstevel@tonic-gate 
10267c478bd9Sstevel@tonic-gate static int
10277c478bd9Sstevel@tonic-gate mount_one(zlog_t *zlogp, struct zone_fstab *fsptr, const char *rootpath)
10287c478bd9Sstevel@tonic-gate {
10297c478bd9Sstevel@tonic-gate 	char    path[MAXPATHLEN];
1030108322fbScarlsonj 	char	specpath[MAXPATHLEN];
10317c478bd9Sstevel@tonic-gate 	char    optstr[MAX_MNTOPT_STR];
10327c478bd9Sstevel@tonic-gate 	zone_fsopt_t *optptr;
10337c478bd9Sstevel@tonic-gate 
10347c478bd9Sstevel@tonic-gate 	if (!valid_mount_path(zlogp, rootpath, fsptr->zone_fs_dir)) {
10357c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s%s is not a valid mount point",
10367c478bd9Sstevel@tonic-gate 		    rootpath, fsptr->zone_fs_dir);
10377c478bd9Sstevel@tonic-gate 		return (-1);
10387c478bd9Sstevel@tonic-gate 	}
10397c478bd9Sstevel@tonic-gate 
10407c478bd9Sstevel@tonic-gate 	if (make_one_dir(zlogp, rootpath, fsptr->zone_fs_dir,
10417c478bd9Sstevel@tonic-gate 	    DEFAULT_DIR_MODE) != 0)
10427c478bd9Sstevel@tonic-gate 		return (-1);
10437c478bd9Sstevel@tonic-gate 
10447c478bd9Sstevel@tonic-gate 	(void) snprintf(path, sizeof (path), "%s%s", rootpath,
10457c478bd9Sstevel@tonic-gate 	    fsptr->zone_fs_dir);
10467c478bd9Sstevel@tonic-gate 
10477c478bd9Sstevel@tonic-gate 	if (strlen(fsptr->zone_fs_special) == 0) {
10487c478bd9Sstevel@tonic-gate 		/*
10497c478bd9Sstevel@tonic-gate 		 * A zero-length special is how we distinguish IPDs from
1050108322fbScarlsonj 		 * general-purpose FSs.  Make sure it mounts from a place that
1051108322fbScarlsonj 		 * can be seen via the alternate zone's root.
10527c478bd9Sstevel@tonic-gate 		 */
1053108322fbScarlsonj 		if (snprintf(specpath, sizeof (specpath), "%s%s",
1054108322fbScarlsonj 		    zonecfg_get_root(), fsptr->zone_fs_dir) >=
1055108322fbScarlsonj 		    sizeof (specpath)) {
1056108322fbScarlsonj 			zerror(zlogp, B_FALSE, "cannot mount %s: path too "
1057108322fbScarlsonj 			    "long in alternate root", fsptr->zone_fs_dir);
1058108322fbScarlsonj 			return (-1);
1059108322fbScarlsonj 		}
1060108322fbScarlsonj 		if (zonecfg_in_alt_root())
1061108322fbScarlsonj 			resolve_lofs(zlogp, specpath, sizeof (specpath));
10627c478bd9Sstevel@tonic-gate 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS,
1063108322fbScarlsonj 		    specpath, path) != 0) {
10647c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "failed to loopback mount %s",
1065108322fbScarlsonj 			    specpath);
10667c478bd9Sstevel@tonic-gate 			return (-1);
10677c478bd9Sstevel@tonic-gate 		}
10687c478bd9Sstevel@tonic-gate 		return (0);
10697c478bd9Sstevel@tonic-gate 	}
10707c478bd9Sstevel@tonic-gate 
10717c478bd9Sstevel@tonic-gate 	/*
10727c478bd9Sstevel@tonic-gate 	 * In general the strategy here is to do just as much verification as
10737c478bd9Sstevel@tonic-gate 	 * necessary to avoid crashing or otherwise doing something bad; if the
10747c478bd9Sstevel@tonic-gate 	 * administrator initiated the operation via zoneadm(1m), he'll get
10757c478bd9Sstevel@tonic-gate 	 * auto-verification which will let him know what's wrong.  If he
10767c478bd9Sstevel@tonic-gate 	 * modifies the zone configuration of a running zone and doesn't attempt
10777c478bd9Sstevel@tonic-gate 	 * to verify that it's OK we won't crash but won't bother trying to be
10787c478bd9Sstevel@tonic-gate 	 * too helpful either.  zoneadm verify is only a couple keystrokes away.
10797c478bd9Sstevel@tonic-gate 	 */
10807c478bd9Sstevel@tonic-gate 	if (!zonecfg_valid_fs_type(fsptr->zone_fs_type)) {
10817c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "cannot mount %s on %s: "
10827c478bd9Sstevel@tonic-gate 		    "invalid file-system type %s", fsptr->zone_fs_special,
10837c478bd9Sstevel@tonic-gate 		    fsptr->zone_fs_dir, fsptr->zone_fs_type);
10847c478bd9Sstevel@tonic-gate 		return (-1);
10857c478bd9Sstevel@tonic-gate 	}
10867c478bd9Sstevel@tonic-gate 
10877c478bd9Sstevel@tonic-gate 	/*
1088108322fbScarlsonj 	 * If we're looking at an alternate root environment, then construct
1089108322fbScarlsonj 	 * read-only loopback mounts as necessary.  For all lofs mounts, make
1090108322fbScarlsonj 	 * sure that the 'special' entry points inside the alternate root.  (We
1091108322fbScarlsonj 	 * don't do this with other mounts, as devfs isn't in the alternate
1092108322fbScarlsonj 	 * root, and we need to assume the device environment is roughly the
1093108322fbScarlsonj 	 * same.)
1094108322fbScarlsonj 	 */
1095108322fbScarlsonj 	if (zonecfg_in_alt_root()) {
1096108322fbScarlsonj 		struct stat64 st;
1097108322fbScarlsonj 
1098108322fbScarlsonj 		if (stat64(fsptr->zone_fs_special, &st) != -1 &&
1099108322fbScarlsonj 		    S_ISBLK(st.st_mode) &&
1100108322fbScarlsonj 		    check_lofs_needed(zlogp, fsptr) == -1)
1101108322fbScarlsonj 			return (-1);
1102108322fbScarlsonj 		if (strcmp(fsptr->zone_fs_type, MNTTYPE_LOFS) == 0) {
1103108322fbScarlsonj 			if (snprintf(specpath, sizeof (specpath), "%s%s",
1104108322fbScarlsonj 			    zonecfg_get_root(), fsptr->zone_fs_special) >=
1105108322fbScarlsonj 			    sizeof (specpath)) {
1106108322fbScarlsonj 				zerror(zlogp, B_FALSE, "cannot mount %s: path "
1107108322fbScarlsonj 				    "too long in alternate root",
1108108322fbScarlsonj 				    fsptr->zone_fs_special);
1109108322fbScarlsonj 				return (-1);
1110108322fbScarlsonj 			}
1111108322fbScarlsonj 			resolve_lofs(zlogp, specpath, sizeof (specpath));
1112108322fbScarlsonj 			(void) strlcpy(fsptr->zone_fs_special, specpath,
1113108322fbScarlsonj 			    sizeof (fsptr->zone_fs_special));
1114108322fbScarlsonj 		}
1115108322fbScarlsonj 	}
1116108322fbScarlsonj 
1117108322fbScarlsonj 	/*
11187c478bd9Sstevel@tonic-gate 	 * Run 'fsck -m' if there's a device to fsck.
11197c478bd9Sstevel@tonic-gate 	 */
11207c478bd9Sstevel@tonic-gate 	if (fsptr->zone_fs_raw[0] != '\0' &&
11217c478bd9Sstevel@tonic-gate 	    dofsck(zlogp, fsptr->zone_fs_type, fsptr->zone_fs_raw) != 0)
11227c478bd9Sstevel@tonic-gate 		return (-1);
11237c478bd9Sstevel@tonic-gate 
11247c478bd9Sstevel@tonic-gate 	/*
11257c478bd9Sstevel@tonic-gate 	 * Build up mount option string.
11267c478bd9Sstevel@tonic-gate 	 */
11277c478bd9Sstevel@tonic-gate 	optstr[0] = '\0';
11287c478bd9Sstevel@tonic-gate 	if (fsptr->zone_fs_options != NULL) {
11297c478bd9Sstevel@tonic-gate 		(void) strlcpy(optstr, fsptr->zone_fs_options->zone_fsopt_opt,
11307c478bd9Sstevel@tonic-gate 		    sizeof (optstr));
11317c478bd9Sstevel@tonic-gate 		for (optptr = fsptr->zone_fs_options->zone_fsopt_next;
11327c478bd9Sstevel@tonic-gate 		    optptr != NULL; optptr = optptr->zone_fsopt_next) {
11337c478bd9Sstevel@tonic-gate 			(void) strlcat(optstr, ",", sizeof (optstr));
11347c478bd9Sstevel@tonic-gate 			(void) strlcat(optstr, optptr->zone_fsopt_opt,
11357c478bd9Sstevel@tonic-gate 			    sizeof (optstr));
11367c478bd9Sstevel@tonic-gate 		}
11377c478bd9Sstevel@tonic-gate 	}
11387c478bd9Sstevel@tonic-gate 	return (domount(zlogp, fsptr->zone_fs_type, optstr,
11397c478bd9Sstevel@tonic-gate 	    fsptr->zone_fs_special, path));
11407c478bd9Sstevel@tonic-gate }
11417c478bd9Sstevel@tonic-gate 
11427c478bd9Sstevel@tonic-gate static void
11437c478bd9Sstevel@tonic-gate free_fs_data(struct zone_fstab *fsarray, uint_t nelem)
11447c478bd9Sstevel@tonic-gate {
11457c478bd9Sstevel@tonic-gate 	uint_t i;
11467c478bd9Sstevel@tonic-gate 
11477c478bd9Sstevel@tonic-gate 	if (fsarray == NULL)
11487c478bd9Sstevel@tonic-gate 		return;
11497c478bd9Sstevel@tonic-gate 	for (i = 0; i < nelem; i++)
11507c478bd9Sstevel@tonic-gate 		zonecfg_free_fs_option_list(fsarray[i].zone_fs_options);
11517c478bd9Sstevel@tonic-gate 	free(fsarray);
11527c478bd9Sstevel@tonic-gate }
11537c478bd9Sstevel@tonic-gate 
1154108322fbScarlsonj /*
1155*f4368d3dSvp157776  * This function initiates the creation of a small Solaris Environment for
1156*f4368d3dSvp157776  * scratch zone. The Environment creation process is split up into two
1157*f4368d3dSvp157776  * functions(build_mounted_pre_var() and build_mounted_post_var()). It
1158*f4368d3dSvp157776  * is done this way because:
1159*f4368d3dSvp157776  * 	We need to have both /etc and /var in the root of the scratchzone.
1160*f4368d3dSvp157776  * 	We loopback mount zone's own /etc and /var into the root of the
1161*f4368d3dSvp157776  * 	scratch zone. Unlike /etc, /var can be a seperate filesystem. So we
1162*f4368d3dSvp157776  * 	need to delay the mount of /var till the zone's root gets populated.
1163*f4368d3dSvp157776  *	So mounting of localdirs[](/etc and /var) have been moved to the
1164*f4368d3dSvp157776  * 	build_mounted_post_var() which gets called only after the zone
1165*f4368d3dSvp157776  * 	specific filesystems are mounted.
1166108322fbScarlsonj  */
1167108322fbScarlsonj static boolean_t
1168*f4368d3dSvp157776 build_mounted_pre_var(zlog_t *zlogp, char *rootpath,
1169*f4368d3dSvp157776     size_t rootlen, const char *zonepath)
1170108322fbScarlsonj {
1171108322fbScarlsonj 	char tmp[MAXPATHLEN], fromdir[MAXPATHLEN];
1172108322fbScarlsonj 	char luroot[MAXPATHLEN];
1173108322fbScarlsonj 	const char **cpp;
1174108322fbScarlsonj 	static const char *mkdirs[] = {
11753f604e0fSdp 		"/system", "/system/contract", "/system/object", "/proc",
11763f604e0fSdp 		"/dev", "/tmp", "/a", NULL
1177108322fbScarlsonj 	};
1178108322fbScarlsonj 	char *altstr;
1179*f4368d3dSvp157776 	FILE *fp;
1180108322fbScarlsonj 	uuid_t uuid;
1181108322fbScarlsonj 
1182108322fbScarlsonj 	resolve_lofs(zlogp, rootpath, rootlen);
1183108322fbScarlsonj 	(void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
1184108322fbScarlsonj 	resolve_lofs(zlogp, luroot, sizeof (luroot));
1185108322fbScarlsonj 	(void) snprintf(tmp, sizeof (tmp), "%s/bin", luroot);
1186108322fbScarlsonj 	(void) symlink("./usr/bin", tmp);
1187108322fbScarlsonj 
1188108322fbScarlsonj 	/*
1189108322fbScarlsonj 	 * These are mostly special mount points; not handled here.  (See
1190108322fbScarlsonj 	 * zone_mount_early.)
1191108322fbScarlsonj 	 */
1192108322fbScarlsonj 	for (cpp = mkdirs; *cpp != NULL; cpp++) {
1193108322fbScarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1194108322fbScarlsonj 		if (mkdir(tmp, 0755) != 0) {
1195108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1196108322fbScarlsonj 			return (B_FALSE);
1197108322fbScarlsonj 		}
1198108322fbScarlsonj 	}
1199*f4368d3dSvp157776 	/*
1200*f4368d3dSvp157776 	 * This is here to support lucopy.  If there's an instance of this same
1201*f4368d3dSvp157776 	 * zone on the current running system, then we mount its root up as
1202*f4368d3dSvp157776 	 * read-only inside the scratch zone.
1203*f4368d3dSvp157776 	 */
1204*f4368d3dSvp157776 	(void) zonecfg_get_uuid(zone_name, uuid);
1205*f4368d3dSvp157776 	altstr = strdup(zonecfg_get_root());
1206*f4368d3dSvp157776 	if (altstr == NULL) {
1207*f4368d3dSvp157776 		zerror(zlogp, B_TRUE, "memory allocation failed");
1208*f4368d3dSvp157776 		return (B_FALSE);
1209*f4368d3dSvp157776 	}
1210*f4368d3dSvp157776 	zonecfg_set_root("");
1211*f4368d3dSvp157776 	(void) strlcpy(tmp, zone_name, sizeof (tmp));
1212*f4368d3dSvp157776 	(void) zonecfg_get_name_by_uuid(uuid, tmp, sizeof (tmp));
1213*f4368d3dSvp157776 	if (zone_get_rootpath(tmp, fromdir, sizeof (fromdir)) == Z_OK &&
1214*f4368d3dSvp157776 	    strcmp(fromdir, rootpath) != 0) {
1215*f4368d3dSvp157776 		(void) snprintf(tmp, sizeof (tmp), "%s/b", luroot);
1216*f4368d3dSvp157776 		if (mkdir(tmp, 0755) != 0) {
1217*f4368d3dSvp157776 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1218*f4368d3dSvp157776 			return (B_FALSE);
1219*f4368d3dSvp157776 		}
1220*f4368d3dSvp157776 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, fromdir,
1221*f4368d3dSvp157776 		    tmp) != 0) {
1222*f4368d3dSvp157776 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1223*f4368d3dSvp157776 			    fromdir);
1224*f4368d3dSvp157776 			return (B_FALSE);
1225*f4368d3dSvp157776 		}
1226*f4368d3dSvp157776 	}
1227*f4368d3dSvp157776 	zonecfg_set_root(altstr);
1228*f4368d3dSvp157776 	free(altstr);
1229*f4368d3dSvp157776 
1230*f4368d3dSvp157776 	if ((fp = zonecfg_open_scratch(luroot, B_TRUE)) == NULL) {
1231*f4368d3dSvp157776 		zerror(zlogp, B_TRUE, "cannot open zone mapfile");
1232*f4368d3dSvp157776 		return (B_FALSE);
1233*f4368d3dSvp157776 	}
1234*f4368d3dSvp157776 	(void) ftruncate(fileno(fp), 0);
1235*f4368d3dSvp157776 	if (zonecfg_add_scratch(fp, zone_name, kernzone, "/") == -1) {
1236*f4368d3dSvp157776 		zerror(zlogp, B_TRUE, "cannot add zone mapfile entry");
1237*f4368d3dSvp157776 	}
1238*f4368d3dSvp157776 	zonecfg_close_scratch(fp);
1239*f4368d3dSvp157776 	(void) snprintf(tmp, sizeof (tmp), "%s/a", luroot);
1240*f4368d3dSvp157776 	if (domount(zlogp, MNTTYPE_LOFS, "", rootpath, tmp) != 0)
1241*f4368d3dSvp157776 		return (B_FALSE);
1242*f4368d3dSvp157776 	(void) strlcpy(rootpath, tmp, rootlen);
1243*f4368d3dSvp157776 	return (B_TRUE);
1244*f4368d3dSvp157776 }
1245*f4368d3dSvp157776 
1246*f4368d3dSvp157776 
1247*f4368d3dSvp157776 static boolean_t
1248*f4368d3dSvp157776 build_mounted_post_var(zlog_t *zlogp, char *rootpath, const char *zonepath)
1249*f4368d3dSvp157776 {
1250*f4368d3dSvp157776 	char tmp[MAXPATHLEN], fromdir[MAXPATHLEN];
1251*f4368d3dSvp157776 	char luroot[MAXPATHLEN];
1252*f4368d3dSvp157776 	const char **cpp;
1253*f4368d3dSvp157776 	static const char *localdirs[] = {
1254*f4368d3dSvp157776 		"/etc", "/var", NULL
1255*f4368d3dSvp157776 	};
1256*f4368d3dSvp157776 	static const char *loopdirs[] = {
1257*f4368d3dSvp157776 		"/etc/lib", "/etc/fs", "/lib", "/sbin", "/platform",
1258*f4368d3dSvp157776 		"/usr", NULL
1259*f4368d3dSvp157776 	};
1260*f4368d3dSvp157776 	static const char *tmpdirs[] = {
1261*f4368d3dSvp157776 		"/tmp", "/var/run", NULL
1262*f4368d3dSvp157776 	};
1263*f4368d3dSvp157776 	struct stat st;
1264*f4368d3dSvp157776 
1265*f4368d3dSvp157776 	(void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
1266108322fbScarlsonj 
1267108322fbScarlsonj 	/*
1268108322fbScarlsonj 	 * These are mounted read-write from the zone undergoing upgrade.  We
1269108322fbScarlsonj 	 * must be careful not to 'leak' things from the main system into the
1270108322fbScarlsonj 	 * zone, and this accomplishes that goal.
1271108322fbScarlsonj 	 */
1272108322fbScarlsonj 	for (cpp = localdirs; *cpp != NULL; cpp++) {
1273108322fbScarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1274108322fbScarlsonj 		(void) snprintf(fromdir, sizeof (fromdir), "%s%s", rootpath,
1275108322fbScarlsonj 		    *cpp);
1276108322fbScarlsonj 		if (mkdir(tmp, 0755) != 0) {
1277108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1278108322fbScarlsonj 			return (B_FALSE);
1279108322fbScarlsonj 		}
1280108322fbScarlsonj 		if (domount(zlogp, MNTTYPE_LOFS, "", fromdir, tmp) != 0) {
1281108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1282108322fbScarlsonj 			    *cpp);
1283108322fbScarlsonj 			return (B_FALSE);
1284108322fbScarlsonj 		}
1285108322fbScarlsonj 	}
1286108322fbScarlsonj 
1287108322fbScarlsonj 	/*
1288108322fbScarlsonj 	 * These are things mounted read-only from the running system because
1289108322fbScarlsonj 	 * they contain binaries that must match system.
1290108322fbScarlsonj 	 */
1291108322fbScarlsonj 	for (cpp = loopdirs; *cpp != NULL; cpp++) {
1292108322fbScarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1293108322fbScarlsonj 		if (mkdir(tmp, 0755) != 0) {
1294108322fbScarlsonj 			if (errno != EEXIST) {
1295108322fbScarlsonj 				zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1296108322fbScarlsonj 				return (B_FALSE);
1297108322fbScarlsonj 			}
1298108322fbScarlsonj 			if (lstat(tmp, &st) != 0) {
1299108322fbScarlsonj 				zerror(zlogp, B_TRUE, "cannot stat %s", tmp);
1300108322fbScarlsonj 				return (B_FALSE);
1301108322fbScarlsonj 			}
1302108322fbScarlsonj 			/*
1303108322fbScarlsonj 			 * Ignore any non-directories encountered.  These are
1304108322fbScarlsonj 			 * things that have been converted into symlinks
1305108322fbScarlsonj 			 * (/etc/fs and /etc/lib) and no longer need a lofs
1306108322fbScarlsonj 			 * fixup.
1307108322fbScarlsonj 			 */
1308108322fbScarlsonj 			if (!S_ISDIR(st.st_mode))
1309108322fbScarlsonj 				continue;
1310108322fbScarlsonj 		}
1311108322fbScarlsonj 		if (domount(zlogp, MNTTYPE_LOFS, IPD_DEFAULT_OPTS, *cpp,
1312108322fbScarlsonj 		    tmp) != 0) {
1313108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot mount %s on %s", tmp,
1314108322fbScarlsonj 			    *cpp);
1315108322fbScarlsonj 			return (B_FALSE);
1316108322fbScarlsonj 		}
1317108322fbScarlsonj 	}
1318108322fbScarlsonj 
1319108322fbScarlsonj 	/*
1320108322fbScarlsonj 	 * These are things with tmpfs mounted inside.
1321108322fbScarlsonj 	 */
1322108322fbScarlsonj 	for (cpp = tmpdirs; *cpp != NULL; cpp++) {
1323108322fbScarlsonj 		(void) snprintf(tmp, sizeof (tmp), "%s%s", luroot, *cpp);
1324108322fbScarlsonj 		if (mkdir(tmp, 0755) != 0 && errno != EEXIST) {
1325108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", tmp);
1326108322fbScarlsonj 			return (B_FALSE);
1327108322fbScarlsonj 		}
1328108322fbScarlsonj 		if (domount(zlogp, MNTTYPE_TMPFS, "", "swap", tmp) != 0) {
1329108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot mount swap on %s", *cpp);
1330108322fbScarlsonj 			return (B_FALSE);
1331108322fbScarlsonj 		}
1332108322fbScarlsonj 	}
1333108322fbScarlsonj 	return (B_TRUE);
1334108322fbScarlsonj }
1335108322fbScarlsonj 
13367c478bd9Sstevel@tonic-gate static int
1337108322fbScarlsonj mount_filesystems(zlog_t *zlogp, boolean_t mount_cmd)
13387c478bd9Sstevel@tonic-gate {
13397c478bd9Sstevel@tonic-gate 	char	rootpath[MAXPATHLEN];
13407c478bd9Sstevel@tonic-gate 	char	zonepath[MAXPATHLEN];
13417c478bd9Sstevel@tonic-gate 	int	num_fs = 0, i;
13427c478bd9Sstevel@tonic-gate 	struct zone_fstab fstab, *fs_ptr = NULL, *tmp_ptr;
13437c478bd9Sstevel@tonic-gate 	struct zone_fstab *fsp;
13447c478bd9Sstevel@tonic-gate 	zone_dochandle_t handle = NULL;
13457c478bd9Sstevel@tonic-gate 	zone_state_t zstate;
13467c478bd9Sstevel@tonic-gate 
13477c478bd9Sstevel@tonic-gate 	if (zone_get_state(zone_name, &zstate) != Z_OK ||
1348108322fbScarlsonj 	    (zstate != ZONE_STATE_READY && zstate != ZONE_STATE_MOUNTED)) {
13497c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
1350108322fbScarlsonj 		    "zone must be in '%s' or '%s' state to mount file-systems",
1351108322fbScarlsonj 		    zone_state_str(ZONE_STATE_READY),
1352108322fbScarlsonj 		    zone_state_str(ZONE_STATE_MOUNTED));
13537c478bd9Sstevel@tonic-gate 		goto bad;
13547c478bd9Sstevel@tonic-gate 	}
13557c478bd9Sstevel@tonic-gate 
13567c478bd9Sstevel@tonic-gate 	if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
13577c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone path");
13587c478bd9Sstevel@tonic-gate 		goto bad;
13597c478bd9Sstevel@tonic-gate 	}
13607c478bd9Sstevel@tonic-gate 
13617c478bd9Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
13627c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone root");
13637c478bd9Sstevel@tonic-gate 		goto bad;
13647c478bd9Sstevel@tonic-gate 	}
13657c478bd9Sstevel@tonic-gate 
13667c478bd9Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
1367ffbafc53Scomay 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
13687c478bd9Sstevel@tonic-gate 		goto bad;
13697c478bd9Sstevel@tonic-gate 	}
13707c478bd9Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
13717c478bd9Sstevel@tonic-gate 	    zonecfg_setfsent(handle) != Z_OK) {
13727c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
13737c478bd9Sstevel@tonic-gate 		goto bad;
13747c478bd9Sstevel@tonic-gate 	}
13757c478bd9Sstevel@tonic-gate 
13767c478bd9Sstevel@tonic-gate 	/*
13777c478bd9Sstevel@tonic-gate 	 * Iterate through the rest of the filesystems, first the IPDs, then
13787c478bd9Sstevel@tonic-gate 	 * the general FSs.  Sort them all, then mount them in sorted order.
13797c478bd9Sstevel@tonic-gate 	 * This is to make sure the higher level directories (e.g., /usr)
13807c478bd9Sstevel@tonic-gate 	 * get mounted before any beneath them (e.g., /usr/local).
13817c478bd9Sstevel@tonic-gate 	 */
13827c478bd9Sstevel@tonic-gate 	if (zonecfg_setipdent(handle) != Z_OK) {
13837c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
13847c478bd9Sstevel@tonic-gate 		goto bad;
13857c478bd9Sstevel@tonic-gate 	}
13867c478bd9Sstevel@tonic-gate 	while (zonecfg_getipdent(handle, &fstab) == Z_OK) {
13877c478bd9Sstevel@tonic-gate 		num_fs++;
13887c478bd9Sstevel@tonic-gate 		if ((tmp_ptr = realloc(fs_ptr,
13897c478bd9Sstevel@tonic-gate 		    num_fs * sizeof (*tmp_ptr))) == NULL) {
13907c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "memory allocation failed");
13917c478bd9Sstevel@tonic-gate 			num_fs--;
13927c478bd9Sstevel@tonic-gate 			(void) zonecfg_endipdent(handle);
13937c478bd9Sstevel@tonic-gate 			goto bad;
13947c478bd9Sstevel@tonic-gate 		}
13957c478bd9Sstevel@tonic-gate 		fs_ptr = tmp_ptr;
13967c478bd9Sstevel@tonic-gate 		fsp = &fs_ptr[num_fs - 1];
13977c478bd9Sstevel@tonic-gate 		/*
13987c478bd9Sstevel@tonic-gate 		 * IPDs logically only have a mount point; all other properties
13997c478bd9Sstevel@tonic-gate 		 * are implied.
14007c478bd9Sstevel@tonic-gate 		 */
14017c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_dir,
14027c478bd9Sstevel@tonic-gate 		    fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir));
14037c478bd9Sstevel@tonic-gate 		fsp->zone_fs_special[0] = '\0';
14047c478bd9Sstevel@tonic-gate 		fsp->zone_fs_raw[0] = '\0';
14057c478bd9Sstevel@tonic-gate 		fsp->zone_fs_type[0] = '\0';
14067c478bd9Sstevel@tonic-gate 		fsp->zone_fs_options = NULL;
14077c478bd9Sstevel@tonic-gate 	}
14087c478bd9Sstevel@tonic-gate 	(void) zonecfg_endipdent(handle);
14097c478bd9Sstevel@tonic-gate 
14107c478bd9Sstevel@tonic-gate 	if (zonecfg_setfsent(handle) != Z_OK) {
14117c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
14127c478bd9Sstevel@tonic-gate 		goto bad;
14137c478bd9Sstevel@tonic-gate 	}
14147c478bd9Sstevel@tonic-gate 	while (zonecfg_getfsent(handle, &fstab) == Z_OK) {
1415fa9e4066Sahrens 		/*
1416fa9e4066Sahrens 		 * ZFS filesystems will not be accessible under an alternate
1417fa9e4066Sahrens 		 * root, since the pool will not be known.  Ignore them in this
1418fa9e4066Sahrens 		 * case.
1419fa9e4066Sahrens 		 */
1420fa9e4066Sahrens 		if (mount_cmd && strcmp(fstab.zone_fs_type, MNTTYPE_ZFS) == 0)
1421fa9e4066Sahrens 			continue;
1422fa9e4066Sahrens 
14237c478bd9Sstevel@tonic-gate 		num_fs++;
14247c478bd9Sstevel@tonic-gate 		if ((tmp_ptr = realloc(fs_ptr,
14257c478bd9Sstevel@tonic-gate 		    num_fs * sizeof (*tmp_ptr))) == NULL) {
14267c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "memory allocation failed");
14277c478bd9Sstevel@tonic-gate 			num_fs--;
14287c478bd9Sstevel@tonic-gate 			(void) zonecfg_endfsent(handle);
14297c478bd9Sstevel@tonic-gate 			goto bad;
14307c478bd9Sstevel@tonic-gate 		}
14317c478bd9Sstevel@tonic-gate 		fs_ptr = tmp_ptr;
14327c478bd9Sstevel@tonic-gate 		fsp = &fs_ptr[num_fs - 1];
14337c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_dir,
14347c478bd9Sstevel@tonic-gate 		    fstab.zone_fs_dir, sizeof (fsp->zone_fs_dir));
14357c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_special, fstab.zone_fs_special,
14367c478bd9Sstevel@tonic-gate 		    sizeof (fsp->zone_fs_special));
14377c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_raw, fstab.zone_fs_raw,
14387c478bd9Sstevel@tonic-gate 		    sizeof (fsp->zone_fs_raw));
14397c478bd9Sstevel@tonic-gate 		(void) strlcpy(fsp->zone_fs_type, fstab.zone_fs_type,
14407c478bd9Sstevel@tonic-gate 		    sizeof (fsp->zone_fs_type));
14417c478bd9Sstevel@tonic-gate 		fsp->zone_fs_options = fstab.zone_fs_options;
14427c478bd9Sstevel@tonic-gate 	}
14437c478bd9Sstevel@tonic-gate 	(void) zonecfg_endfsent(handle);
14447c478bd9Sstevel@tonic-gate 	zonecfg_fini_handle(handle);
14457c478bd9Sstevel@tonic-gate 	handle = NULL;
14467c478bd9Sstevel@tonic-gate 
1447108322fbScarlsonj 	/*
1448facf4a8dSllai1 	 * When we're mounting a zone for administration, / is the
1449facf4a8dSllai1 	 * scratch zone and dev is mounted at /dev.  The to-be-upgraded
1450facf4a8dSllai1 	 * zone is mounted at /a, and we set up that environment so that
1451facf4a8dSllai1 	 * process can access both the running system's utilities
1452facf4a8dSllai1 	 * and the to-be-modified zone's files.  The only exception
1453facf4a8dSllai1 	 * is the zone's /dev which isn't mounted at all, which is
1454facf4a8dSllai1 	 * the same as global zone installation where /a/dev and
1455facf4a8dSllai1 	 * /a/devices are not mounted.
1456*f4368d3dSvp157776 	 * Zone mounting is done in three phases.
1457*f4368d3dSvp157776 	 *   1) Create and populate lu directory (build_mounted_pre_var()).
1458*f4368d3dSvp157776 	 *   2) Mount the required filesystems as per the zone configuration.
1459*f4368d3dSvp157776 	 *   3) Set up the rest of the scratch zone environment
1460*f4368d3dSvp157776 	 *	(build_mounted_post_var()).
1461108322fbScarlsonj 	 */
1462108322fbScarlsonj 	if (mount_cmd &&
1463*f4368d3dSvp157776 	    !build_mounted_pre_var(zlogp,
1464*f4368d3dSvp157776 	    rootpath, sizeof (rootpath), zonepath))
1465108322fbScarlsonj 		goto bad;
1466108322fbScarlsonj 
14677c478bd9Sstevel@tonic-gate 	qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
14687c478bd9Sstevel@tonic-gate 	for (i = 0; i < num_fs; i++) {
14697c478bd9Sstevel@tonic-gate 		if (mount_one(zlogp, &fs_ptr[i], rootpath) != 0)
14707c478bd9Sstevel@tonic-gate 			goto bad;
14717c478bd9Sstevel@tonic-gate 	}
1472*f4368d3dSvp157776 	if (mount_cmd &&
1473*f4368d3dSvp157776 	    !build_mounted_post_var(zlogp, rootpath, zonepath))
1474*f4368d3dSvp157776 		goto bad;
147545916cd2Sjpk 
147645916cd2Sjpk 	/*
147745916cd2Sjpk 	 * For Trusted Extensions cross-mount each lower level /export/home
147845916cd2Sjpk 	 */
147948451833Scarlsonj 	if (!mount_cmd && tsol_mounts(zlogp, zone_name, rootpath) != 0)
148045916cd2Sjpk 		goto bad;
148145916cd2Sjpk 
14827c478bd9Sstevel@tonic-gate 	free_fs_data(fs_ptr, num_fs);
14837c478bd9Sstevel@tonic-gate 
14847c478bd9Sstevel@tonic-gate 	/*
14857c478bd9Sstevel@tonic-gate 	 * Everything looks fine.
14867c478bd9Sstevel@tonic-gate 	 */
14877c478bd9Sstevel@tonic-gate 	return (0);
14887c478bd9Sstevel@tonic-gate 
14897c478bd9Sstevel@tonic-gate bad:
14907c478bd9Sstevel@tonic-gate 	if (handle != NULL)
14917c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
14927c478bd9Sstevel@tonic-gate 	free_fs_data(fs_ptr, num_fs);
14937c478bd9Sstevel@tonic-gate 	return (-1);
14947c478bd9Sstevel@tonic-gate }
14957c478bd9Sstevel@tonic-gate 
14967c478bd9Sstevel@tonic-gate /* caller makes sure neither parameter is NULL */
14977c478bd9Sstevel@tonic-gate static int
14987c478bd9Sstevel@tonic-gate addr2netmask(char *prefixstr, int maxprefixlen, uchar_t *maskstr)
14997c478bd9Sstevel@tonic-gate {
15007c478bd9Sstevel@tonic-gate 	int prefixlen;
15017c478bd9Sstevel@tonic-gate 
15027c478bd9Sstevel@tonic-gate 	prefixlen = atoi(prefixstr);
15037c478bd9Sstevel@tonic-gate 	if (prefixlen < 0 || prefixlen > maxprefixlen)
15047c478bd9Sstevel@tonic-gate 		return (1);
15057c478bd9Sstevel@tonic-gate 	while (prefixlen > 0) {
15067c478bd9Sstevel@tonic-gate 		if (prefixlen >= 8) {
15077c478bd9Sstevel@tonic-gate 			*maskstr++ = 0xFF;
15087c478bd9Sstevel@tonic-gate 			prefixlen -= 8;
15097c478bd9Sstevel@tonic-gate 			continue;
15107c478bd9Sstevel@tonic-gate 		}
15117c478bd9Sstevel@tonic-gate 		*maskstr |= 1 << (8 - prefixlen);
15127c478bd9Sstevel@tonic-gate 		prefixlen--;
15137c478bd9Sstevel@tonic-gate 	}
15147c478bd9Sstevel@tonic-gate 	return (0);
15157c478bd9Sstevel@tonic-gate }
15167c478bd9Sstevel@tonic-gate 
15177c478bd9Sstevel@tonic-gate /*
15187c478bd9Sstevel@tonic-gate  * Tear down all interfaces belonging to the given zone.  This should
15197c478bd9Sstevel@tonic-gate  * be called with the zone in a state other than "running", so that
15207c478bd9Sstevel@tonic-gate  * interfaces can't be assigned to the zone after this returns.
15217c478bd9Sstevel@tonic-gate  *
15227c478bd9Sstevel@tonic-gate  * If anything goes wrong, log an error message and return an error.
15237c478bd9Sstevel@tonic-gate  */
15247c478bd9Sstevel@tonic-gate static int
15257c478bd9Sstevel@tonic-gate unconfigure_network_interfaces(zlog_t *zlogp, zoneid_t zone_id)
15267c478bd9Sstevel@tonic-gate {
15277c478bd9Sstevel@tonic-gate 	struct lifnum lifn;
15287c478bd9Sstevel@tonic-gate 	struct lifconf lifc;
15297c478bd9Sstevel@tonic-gate 	struct lifreq *lifrp, lifrl;
15307c478bd9Sstevel@tonic-gate 	int64_t lifc_flags = LIFC_NOXMIT | LIFC_ALLZONES;
15317c478bd9Sstevel@tonic-gate 	int num_ifs, s, i, ret_code = 0;
15327c478bd9Sstevel@tonic-gate 	uint_t bufsize;
15337c478bd9Sstevel@tonic-gate 	char *buf = NULL;
15347c478bd9Sstevel@tonic-gate 
15357c478bd9Sstevel@tonic-gate 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
15367c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get socket");
15377c478bd9Sstevel@tonic-gate 		ret_code = -1;
15387c478bd9Sstevel@tonic-gate 		goto bad;
15397c478bd9Sstevel@tonic-gate 	}
15407c478bd9Sstevel@tonic-gate 	lifn.lifn_family = AF_UNSPEC;
15417c478bd9Sstevel@tonic-gate 	lifn.lifn_flags = (int)lifc_flags;
15427c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) {
15437c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
15447c478bd9Sstevel@tonic-gate 		    "could not determine number of interfaces");
15457c478bd9Sstevel@tonic-gate 		ret_code = -1;
15467c478bd9Sstevel@tonic-gate 		goto bad;
15477c478bd9Sstevel@tonic-gate 	}
15487c478bd9Sstevel@tonic-gate 	num_ifs = lifn.lifn_count;
15497c478bd9Sstevel@tonic-gate 	bufsize = num_ifs * sizeof (struct lifreq);
15507c478bd9Sstevel@tonic-gate 	if ((buf = malloc(bufsize)) == NULL) {
15517c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
15527c478bd9Sstevel@tonic-gate 		ret_code = -1;
15537c478bd9Sstevel@tonic-gate 		goto bad;
15547c478bd9Sstevel@tonic-gate 	}
15557c478bd9Sstevel@tonic-gate 	lifc.lifc_family = AF_UNSPEC;
15567c478bd9Sstevel@tonic-gate 	lifc.lifc_flags = (int)lifc_flags;
15577c478bd9Sstevel@tonic-gate 	lifc.lifc_len = bufsize;
15587c478bd9Sstevel@tonic-gate 	lifc.lifc_buf = buf;
15597c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) {
15607c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get configured interfaces");
15617c478bd9Sstevel@tonic-gate 		ret_code = -1;
15627c478bd9Sstevel@tonic-gate 		goto bad;
15637c478bd9Sstevel@tonic-gate 	}
15647c478bd9Sstevel@tonic-gate 	lifrp = lifc.lifc_req;
15657c478bd9Sstevel@tonic-gate 	for (i = lifc.lifc_len / sizeof (struct lifreq); i > 0; i--, lifrp++) {
15667c478bd9Sstevel@tonic-gate 		(void) close(s);
15677c478bd9Sstevel@tonic-gate 		if ((s = socket(lifrp->lifr_addr.ss_family, SOCK_DGRAM, 0)) <
15687c478bd9Sstevel@tonic-gate 		    0) {
15697c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not get socket",
15707c478bd9Sstevel@tonic-gate 			    lifrl.lifr_name);
15717c478bd9Sstevel@tonic-gate 			ret_code = -1;
15727c478bd9Sstevel@tonic-gate 			continue;
15737c478bd9Sstevel@tonic-gate 		}
15747c478bd9Sstevel@tonic-gate 		(void) memset(&lifrl, 0, sizeof (lifrl));
15757c478bd9Sstevel@tonic-gate 		(void) strncpy(lifrl.lifr_name, lifrp->lifr_name,
15767c478bd9Sstevel@tonic-gate 		    sizeof (lifrl.lifr_name));
15777c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFZONE, (caddr_t)&lifrl) < 0) {
15787c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
15797c478bd9Sstevel@tonic-gate 			    "%s: could not determine zone interface belongs to",
15807c478bd9Sstevel@tonic-gate 			    lifrl.lifr_name);
15817c478bd9Sstevel@tonic-gate 			ret_code = -1;
15827c478bd9Sstevel@tonic-gate 			continue;
15837c478bd9Sstevel@tonic-gate 		}
15847c478bd9Sstevel@tonic-gate 		if (lifrl.lifr_zoneid == zone_id) {
15857c478bd9Sstevel@tonic-gate 			if (ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifrl) < 0) {
15867c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_TRUE,
15877c478bd9Sstevel@tonic-gate 				    "%s: could not remove interface",
15887c478bd9Sstevel@tonic-gate 				    lifrl.lifr_name);
15897c478bd9Sstevel@tonic-gate 				ret_code = -1;
15907c478bd9Sstevel@tonic-gate 				continue;
15917c478bd9Sstevel@tonic-gate 			}
15927c478bd9Sstevel@tonic-gate 		}
15937c478bd9Sstevel@tonic-gate 	}
15947c478bd9Sstevel@tonic-gate bad:
15957c478bd9Sstevel@tonic-gate 	if (s > 0)
15967c478bd9Sstevel@tonic-gate 		(void) close(s);
15977c478bd9Sstevel@tonic-gate 	if (buf)
15987c478bd9Sstevel@tonic-gate 		free(buf);
15997c478bd9Sstevel@tonic-gate 	return (ret_code);
16007c478bd9Sstevel@tonic-gate }
16017c478bd9Sstevel@tonic-gate 
16027c478bd9Sstevel@tonic-gate static union	sockunion {
16037c478bd9Sstevel@tonic-gate 	struct	sockaddr sa;
16047c478bd9Sstevel@tonic-gate 	struct	sockaddr_in sin;
16057c478bd9Sstevel@tonic-gate 	struct	sockaddr_dl sdl;
16067c478bd9Sstevel@tonic-gate 	struct	sockaddr_in6 sin6;
16077c478bd9Sstevel@tonic-gate } so_dst, so_ifp;
16087c478bd9Sstevel@tonic-gate 
16097c478bd9Sstevel@tonic-gate static struct {
16107c478bd9Sstevel@tonic-gate 	struct	rt_msghdr hdr;
16117c478bd9Sstevel@tonic-gate 	char	space[512];
16127c478bd9Sstevel@tonic-gate } rtmsg;
16137c478bd9Sstevel@tonic-gate 
16147c478bd9Sstevel@tonic-gate static int
16157c478bd9Sstevel@tonic-gate salen(struct sockaddr *sa)
16167c478bd9Sstevel@tonic-gate {
16177c478bd9Sstevel@tonic-gate 	switch (sa->sa_family) {
16187c478bd9Sstevel@tonic-gate 	case AF_INET:
16197c478bd9Sstevel@tonic-gate 		return (sizeof (struct sockaddr_in));
16207c478bd9Sstevel@tonic-gate 	case AF_LINK:
16217c478bd9Sstevel@tonic-gate 		return (sizeof (struct sockaddr_dl));
16227c478bd9Sstevel@tonic-gate 	case AF_INET6:
16237c478bd9Sstevel@tonic-gate 		return (sizeof (struct sockaddr_in6));
16247c478bd9Sstevel@tonic-gate 	default:
16257c478bd9Sstevel@tonic-gate 		return (sizeof (struct sockaddr));
16267c478bd9Sstevel@tonic-gate 	}
16277c478bd9Sstevel@tonic-gate }
16287c478bd9Sstevel@tonic-gate 
16297c478bd9Sstevel@tonic-gate #define	ROUNDUP_LONG(a) \
16307c478bd9Sstevel@tonic-gate 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
16317c478bd9Sstevel@tonic-gate 
16327c478bd9Sstevel@tonic-gate /*
16337c478bd9Sstevel@tonic-gate  * Look up which zone is using a given IP address.  The address in question
16347c478bd9Sstevel@tonic-gate  * is expected to have been stuffed into the structure to which lifr points
16357c478bd9Sstevel@tonic-gate  * via a previous SIOCGLIFADDR ioctl().
16367c478bd9Sstevel@tonic-gate  *
16377c478bd9Sstevel@tonic-gate  * This is done using black router socket magic.
16387c478bd9Sstevel@tonic-gate  *
16397c478bd9Sstevel@tonic-gate  * Return the name of the zone on success or NULL on failure.
16407c478bd9Sstevel@tonic-gate  *
16417c478bd9Sstevel@tonic-gate  * This is a lot of code for a simple task; a new ioctl request to take care
16427c478bd9Sstevel@tonic-gate  * of this might be a useful RFE.
16437c478bd9Sstevel@tonic-gate  */
16447c478bd9Sstevel@tonic-gate 
16457c478bd9Sstevel@tonic-gate static char *
16467c478bd9Sstevel@tonic-gate who_is_using(zlog_t *zlogp, struct lifreq *lifr)
16477c478bd9Sstevel@tonic-gate {
16487c478bd9Sstevel@tonic-gate 	static char answer[ZONENAME_MAX];
16497c478bd9Sstevel@tonic-gate 	pid_t pid;
16507c478bd9Sstevel@tonic-gate 	int s, rlen, l, i;
16517c478bd9Sstevel@tonic-gate 	char *cp = rtmsg.space;
16527c478bd9Sstevel@tonic-gate 	struct sockaddr_dl *ifp = NULL;
16537c478bd9Sstevel@tonic-gate 	struct sockaddr *sa;
16547c478bd9Sstevel@tonic-gate 	char save_if_name[LIFNAMSIZ];
16557c478bd9Sstevel@tonic-gate 
16567c478bd9Sstevel@tonic-gate 	answer[0] = '\0';
16577c478bd9Sstevel@tonic-gate 
16587c478bd9Sstevel@tonic-gate 	pid = getpid();
16597c478bd9Sstevel@tonic-gate 	if ((s = socket(PF_ROUTE, SOCK_RAW, 0)) < 0) {
16607c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get routing socket");
16617c478bd9Sstevel@tonic-gate 		return (NULL);
16627c478bd9Sstevel@tonic-gate 	}
16637c478bd9Sstevel@tonic-gate 
16647c478bd9Sstevel@tonic-gate 	if (lifr->lifr_addr.ss_family == AF_INET) {
16657c478bd9Sstevel@tonic-gate 		struct sockaddr_in *sin4;
16667c478bd9Sstevel@tonic-gate 
16677c478bd9Sstevel@tonic-gate 		so_dst.sa.sa_family = AF_INET;
16687c478bd9Sstevel@tonic-gate 		sin4 = (struct sockaddr_in *)&lifr->lifr_addr;
16697c478bd9Sstevel@tonic-gate 		so_dst.sin.sin_addr = sin4->sin_addr;
16707c478bd9Sstevel@tonic-gate 	} else {
16717c478bd9Sstevel@tonic-gate 		struct sockaddr_in6 *sin6;
16727c478bd9Sstevel@tonic-gate 
16737c478bd9Sstevel@tonic-gate 		so_dst.sa.sa_family = AF_INET6;
16747c478bd9Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr;
16757c478bd9Sstevel@tonic-gate 		so_dst.sin6.sin6_addr = sin6->sin6_addr;
16767c478bd9Sstevel@tonic-gate 	}
16777c478bd9Sstevel@tonic-gate 
16787c478bd9Sstevel@tonic-gate 	so_ifp.sa.sa_family = AF_LINK;
16797c478bd9Sstevel@tonic-gate 
16807c478bd9Sstevel@tonic-gate 	(void) memset(&rtmsg, 0, sizeof (rtmsg));
16817c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_type = RTM_GET;
16827c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_flags = RTF_UP | RTF_HOST;
16837c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_version = RTM_VERSION;
16847c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_seq = ++rts_seqno;
16857c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_addrs = RTA_IFP | RTA_DST;
16867c478bd9Sstevel@tonic-gate 
16877c478bd9Sstevel@tonic-gate 	l = ROUNDUP_LONG(salen(&so_dst.sa));
16887c478bd9Sstevel@tonic-gate 	(void) memmove(cp, &(so_dst), l);
16897c478bd9Sstevel@tonic-gate 	cp += l;
16907c478bd9Sstevel@tonic-gate 	l = ROUNDUP_LONG(salen(&so_ifp.sa));
16917c478bd9Sstevel@tonic-gate 	(void) memmove(cp, &(so_ifp), l);
16927c478bd9Sstevel@tonic-gate 	cp += l;
16937c478bd9Sstevel@tonic-gate 
16947c478bd9Sstevel@tonic-gate 	rtmsg.hdr.rtm_msglen = l = cp - (char *)&rtmsg;
16957c478bd9Sstevel@tonic-gate 
16967c478bd9Sstevel@tonic-gate 	if ((rlen = write(s, &rtmsg, l)) < 0) {
16977c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "writing to routing socket");
16987c478bd9Sstevel@tonic-gate 		return (NULL);
16997c478bd9Sstevel@tonic-gate 	} else if (rlen < (int)rtmsg.hdr.rtm_msglen) {
17007c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
17017c478bd9Sstevel@tonic-gate 		    "write to routing socket got only %d for len\n", rlen);
17027c478bd9Sstevel@tonic-gate 		return (NULL);
17037c478bd9Sstevel@tonic-gate 	}
17047c478bd9Sstevel@tonic-gate 	do {
17057c478bd9Sstevel@tonic-gate 		l = read(s, &rtmsg, sizeof (rtmsg));
17067c478bd9Sstevel@tonic-gate 	} while (l > 0 && (rtmsg.hdr.rtm_seq != rts_seqno ||
17077c478bd9Sstevel@tonic-gate 	    rtmsg.hdr.rtm_pid != pid));
17087c478bd9Sstevel@tonic-gate 	if (l < 0) {
17097c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "reading from routing socket");
17107c478bd9Sstevel@tonic-gate 		return (NULL);
17117c478bd9Sstevel@tonic-gate 	}
17127c478bd9Sstevel@tonic-gate 
17137c478bd9Sstevel@tonic-gate 	if (rtmsg.hdr.rtm_version != RTM_VERSION) {
17147c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
17157c478bd9Sstevel@tonic-gate 		    "routing message version %d not understood",
17167c478bd9Sstevel@tonic-gate 		    rtmsg.hdr.rtm_version);
17177c478bd9Sstevel@tonic-gate 		return (NULL);
17187c478bd9Sstevel@tonic-gate 	}
17197c478bd9Sstevel@tonic-gate 	if (rtmsg.hdr.rtm_msglen != (ushort_t)l) {
17207c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "message length mismatch, "
17217c478bd9Sstevel@tonic-gate 		    "expected %d bytes, returned %d bytes",
17227c478bd9Sstevel@tonic-gate 		    rtmsg.hdr.rtm_msglen, l);
17237c478bd9Sstevel@tonic-gate 		return (NULL);
17247c478bd9Sstevel@tonic-gate 	}
17257c478bd9Sstevel@tonic-gate 	if (rtmsg.hdr.rtm_errno != 0)  {
17267c478bd9Sstevel@tonic-gate 		errno = rtmsg.hdr.rtm_errno;
17277c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "RTM_GET routing socket message");
17287c478bd9Sstevel@tonic-gate 		return (NULL);
17297c478bd9Sstevel@tonic-gate 	}
17307c478bd9Sstevel@tonic-gate 	if ((rtmsg.hdr.rtm_addrs & RTA_IFP) == 0) {
17317c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "interface not found");
17327c478bd9Sstevel@tonic-gate 		return (NULL);
17337c478bd9Sstevel@tonic-gate 	}
17347c478bd9Sstevel@tonic-gate 	cp = ((char *)(&rtmsg.hdr + 1));
17357c478bd9Sstevel@tonic-gate 	for (i = 1; i != 0; i <<= 1) {
17367c478bd9Sstevel@tonic-gate 		/* LINTED E_BAD_PTR_CAST_ALIGN */
17377c478bd9Sstevel@tonic-gate 		sa = (struct sockaddr *)cp;
17387c478bd9Sstevel@tonic-gate 		if (i != RTA_IFP) {
17397c478bd9Sstevel@tonic-gate 			if ((i & rtmsg.hdr.rtm_addrs) != 0)
17407c478bd9Sstevel@tonic-gate 				cp += ROUNDUP_LONG(salen(sa));
17417c478bd9Sstevel@tonic-gate 			continue;
17427c478bd9Sstevel@tonic-gate 		}
17437c478bd9Sstevel@tonic-gate 		if (sa->sa_family == AF_LINK &&
17447c478bd9Sstevel@tonic-gate 		    ((struct sockaddr_dl *)sa)->sdl_nlen != 0)
17457c478bd9Sstevel@tonic-gate 			ifp = (struct sockaddr_dl *)sa;
17467c478bd9Sstevel@tonic-gate 		break;
17477c478bd9Sstevel@tonic-gate 	}
17487c478bd9Sstevel@tonic-gate 	if (ifp == NULL) {
17497c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "interface could not be determined");
17507c478bd9Sstevel@tonic-gate 		return (NULL);
17517c478bd9Sstevel@tonic-gate 	}
17527c478bd9Sstevel@tonic-gate 
17537c478bd9Sstevel@tonic-gate 	/*
17547c478bd9Sstevel@tonic-gate 	 * We need to set the I/F name to what we got above, then do the
17557c478bd9Sstevel@tonic-gate 	 * appropriate ioctl to get its zone name.  But lifr->lifr_name is
17567c478bd9Sstevel@tonic-gate 	 * used by the calling function to do a REMOVEIF, so if we leave the
17577c478bd9Sstevel@tonic-gate 	 * "good" zone's I/F name in place, *that* I/F will be removed instead
17587c478bd9Sstevel@tonic-gate 	 * of the bad one.  So we save the old (bad) I/F name before over-
17597c478bd9Sstevel@tonic-gate 	 * writing it and doing the ioctl, then restore it after the ioctl.
17607c478bd9Sstevel@tonic-gate 	 */
17617c478bd9Sstevel@tonic-gate 	(void) strlcpy(save_if_name, lifr->lifr_name, sizeof (save_if_name));
17627c478bd9Sstevel@tonic-gate 	(void) strncpy(lifr->lifr_name, ifp->sdl_data, ifp->sdl_nlen);
17637c478bd9Sstevel@tonic-gate 	lifr->lifr_name[ifp->sdl_nlen] = '\0';
17647c478bd9Sstevel@tonic-gate 	i = ioctl(s, SIOCGLIFZONE, lifr);
17657c478bd9Sstevel@tonic-gate 	(void) strlcpy(lifr->lifr_name, save_if_name, sizeof (save_if_name));
17667c478bd9Sstevel@tonic-gate 	if (i < 0) {
17677c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
17687c478bd9Sstevel@tonic-gate 		    "%s: could not determine the zone interface belongs to",
17697c478bd9Sstevel@tonic-gate 		    lifr->lifr_name);
17707c478bd9Sstevel@tonic-gate 		return (NULL);
17717c478bd9Sstevel@tonic-gate 	}
17727c478bd9Sstevel@tonic-gate 	if (getzonenamebyid(lifr->lifr_zoneid, answer, sizeof (answer)) < 0)
17737c478bd9Sstevel@tonic-gate 		(void) snprintf(answer, sizeof (answer), "%d",
17747c478bd9Sstevel@tonic-gate 		    lifr->lifr_zoneid);
17757c478bd9Sstevel@tonic-gate 
17767c478bd9Sstevel@tonic-gate 	if (strlen(answer) > 0)
17777c478bd9Sstevel@tonic-gate 		return (answer);
17787c478bd9Sstevel@tonic-gate 	return (NULL);
17797c478bd9Sstevel@tonic-gate }
17807c478bd9Sstevel@tonic-gate 
17817c478bd9Sstevel@tonic-gate typedef struct mcast_rtmsg_s {
17827c478bd9Sstevel@tonic-gate 	struct rt_msghdr	m_rtm;
17837c478bd9Sstevel@tonic-gate 	union {
17847c478bd9Sstevel@tonic-gate 		struct {
17857c478bd9Sstevel@tonic-gate 			struct sockaddr_in	m_dst;
17867c478bd9Sstevel@tonic-gate 			struct sockaddr_in	m_gw;
17877c478bd9Sstevel@tonic-gate 			struct sockaddr_in	m_netmask;
17887c478bd9Sstevel@tonic-gate 		} m_v4;
17897c478bd9Sstevel@tonic-gate 		struct {
17907c478bd9Sstevel@tonic-gate 			struct sockaddr_in6	m_dst;
17917c478bd9Sstevel@tonic-gate 			struct sockaddr_in6	m_gw;
17927c478bd9Sstevel@tonic-gate 			struct sockaddr_in6	m_netmask;
17937c478bd9Sstevel@tonic-gate 		} m_v6;
17947c478bd9Sstevel@tonic-gate 	} m_u;
17957c478bd9Sstevel@tonic-gate } mcast_rtmsg_t;
17967c478bd9Sstevel@tonic-gate #define	m_dst4		m_u.m_v4.m_dst
17977c478bd9Sstevel@tonic-gate #define	m_dst6		m_u.m_v6.m_dst
17987c478bd9Sstevel@tonic-gate #define	m_gw4		m_u.m_v4.m_gw
17997c478bd9Sstevel@tonic-gate #define	m_gw6		m_u.m_v6.m_gw
18007c478bd9Sstevel@tonic-gate #define	m_netmask4	m_u.m_v4.m_netmask
18017c478bd9Sstevel@tonic-gate #define	m_netmask6	m_u.m_v6.m_netmask
18027c478bd9Sstevel@tonic-gate 
18037c478bd9Sstevel@tonic-gate /*
18047c478bd9Sstevel@tonic-gate  * Configures a single interface: a new virtual interface is added, based on
18057c478bd9Sstevel@tonic-gate  * the physical interface nwiftabptr->zone_nwif_physical, with the address
18067c478bd9Sstevel@tonic-gate  * specified in nwiftabptr->zone_nwif_address, for zone zone_id.  Note that
18077c478bd9Sstevel@tonic-gate  * the "address" can be an IPv6 address (with a /prefixlength required), an
18087c478bd9Sstevel@tonic-gate  * IPv4 address (with a /prefixlength optional), or a name; for the latter,
18097c478bd9Sstevel@tonic-gate  * an IPv4 name-to-address resolution will be attempted.
18107c478bd9Sstevel@tonic-gate  *
18117c478bd9Sstevel@tonic-gate  * A default interface route for multicast is created on the first IPv4 and
18127c478bd9Sstevel@tonic-gate  * IPv6 interfaces (that have the IFF_MULTICAST flag set), respectively.
18137c478bd9Sstevel@tonic-gate  * This should really be done in the init scripts if we ever allow zones to
18147c478bd9Sstevel@tonic-gate  * modify the routing tables.
18157c478bd9Sstevel@tonic-gate  *
18167c478bd9Sstevel@tonic-gate  * If anything goes wrong, we log an detailed error message, attempt to tear
18177c478bd9Sstevel@tonic-gate  * down whatever we set up and return an error.
18187c478bd9Sstevel@tonic-gate  */
18197c478bd9Sstevel@tonic-gate static int
18207c478bd9Sstevel@tonic-gate configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
18217c478bd9Sstevel@tonic-gate     struct zone_nwiftab *nwiftabptr, boolean_t *mcast_rt_v4_setp,
18227c478bd9Sstevel@tonic-gate     boolean_t *mcast_rt_v6_setp)
18237c478bd9Sstevel@tonic-gate {
18247c478bd9Sstevel@tonic-gate 	struct lifreq lifr;
18257c478bd9Sstevel@tonic-gate 	struct sockaddr_in netmask4;
18267c478bd9Sstevel@tonic-gate 	struct sockaddr_in6 netmask6;
18277c478bd9Sstevel@tonic-gate 	struct in_addr in4;
18287c478bd9Sstevel@tonic-gate 	struct in6_addr in6;
18297c478bd9Sstevel@tonic-gate 	sa_family_t af;
18307c478bd9Sstevel@tonic-gate 	char *slashp = strchr(nwiftabptr->zone_nwif_address, '/');
18317c478bd9Sstevel@tonic-gate 	mcast_rtmsg_t mcast_rtmsg;
18327c478bd9Sstevel@tonic-gate 	int s;
18337c478bd9Sstevel@tonic-gate 	int rs;
18347c478bd9Sstevel@tonic-gate 	int rlen;
18357c478bd9Sstevel@tonic-gate 	boolean_t got_netmask = B_FALSE;
18367c478bd9Sstevel@tonic-gate 	char addrstr4[INET_ADDRSTRLEN];
18377c478bd9Sstevel@tonic-gate 	int res;
18387c478bd9Sstevel@tonic-gate 
18397c478bd9Sstevel@tonic-gate 	res = zonecfg_valid_net_address(nwiftabptr->zone_nwif_address, &lifr);
18407c478bd9Sstevel@tonic-gate 	if (res != Z_OK) {
18417c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s: %s", zonecfg_strerror(res),
18427c478bd9Sstevel@tonic-gate 		    nwiftabptr->zone_nwif_address);
18437c478bd9Sstevel@tonic-gate 		return (-1);
18447c478bd9Sstevel@tonic-gate 	}
18457c478bd9Sstevel@tonic-gate 	af = lifr.lifr_addr.ss_family;
18467c478bd9Sstevel@tonic-gate 	if (af == AF_INET)
18477c478bd9Sstevel@tonic-gate 		in4 = ((struct sockaddr_in *)(&lifr.lifr_addr))->sin_addr;
18487c478bd9Sstevel@tonic-gate 	else
18497c478bd9Sstevel@tonic-gate 		in6 = ((struct sockaddr_in6 *)(&lifr.lifr_addr))->sin6_addr;
18507c478bd9Sstevel@tonic-gate 
18517c478bd9Sstevel@tonic-gate 	if ((s = socket(af, SOCK_DGRAM, 0)) < 0) {
18527c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "could not get socket");
18537c478bd9Sstevel@tonic-gate 		return (-1);
18547c478bd9Sstevel@tonic-gate 	}
18557c478bd9Sstevel@tonic-gate 
18567c478bd9Sstevel@tonic-gate 	(void) strlcpy(lifr.lifr_name, nwiftabptr->zone_nwif_physical,
18577c478bd9Sstevel@tonic-gate 	    sizeof (lifr.lifr_name));
18587c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) {
185922321485Svp157776 		/*
186022321485Svp157776 		 * Here, we know that the interface can't be brought up.
186122321485Svp157776 		 * A similar warning message was already printed out to
186222321485Svp157776 		 * the console by zoneadm(1M) so instead we log the
186322321485Svp157776 		 * message to syslog and continue.
186422321485Svp157776 		 */
186522321485Svp157776 		zerror(&logsys, B_TRUE, "WARNING: skipping interface "
186622321485Svp157776 		    "'%s' which may not be present/plumbed in the "
186722321485Svp157776 		    "global zone.", lifr.lifr_name);
18687c478bd9Sstevel@tonic-gate 		(void) close(s);
186922321485Svp157776 		return (Z_OK);
18707c478bd9Sstevel@tonic-gate 	}
18717c478bd9Sstevel@tonic-gate 
18727c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) {
18737c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE,
18747c478bd9Sstevel@tonic-gate 		    "%s: could not set IP address to %s",
18757c478bd9Sstevel@tonic-gate 		    lifr.lifr_name, nwiftabptr->zone_nwif_address);
18767c478bd9Sstevel@tonic-gate 		goto bad;
18777c478bd9Sstevel@tonic-gate 	}
18787c478bd9Sstevel@tonic-gate 
18797c478bd9Sstevel@tonic-gate 	/* Preserve literal IPv4 address for later potential printing. */
18807c478bd9Sstevel@tonic-gate 	if (af == AF_INET)
18817c478bd9Sstevel@tonic-gate 		(void) inet_ntop(AF_INET, &in4, addrstr4, INET_ADDRSTRLEN);
18827c478bd9Sstevel@tonic-gate 
18837c478bd9Sstevel@tonic-gate 	lifr.lifr_zoneid = zone_id;
18847c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCSLIFZONE, (caddr_t)&lifr) < 0) {
18857c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s: could not place interface into zone",
18867c478bd9Sstevel@tonic-gate 		    lifr.lifr_name);
18877c478bd9Sstevel@tonic-gate 		goto bad;
18887c478bd9Sstevel@tonic-gate 	}
18897c478bd9Sstevel@tonic-gate 
18907c478bd9Sstevel@tonic-gate 	if (strcmp(nwiftabptr->zone_nwif_physical, "lo0") == 0) {
18917c478bd9Sstevel@tonic-gate 		got_netmask = B_TRUE;	/* default setting will be correct */
18927c478bd9Sstevel@tonic-gate 	} else {
18937c478bd9Sstevel@tonic-gate 		if (af == AF_INET) {
18947c478bd9Sstevel@tonic-gate 			/*
18957c478bd9Sstevel@tonic-gate 			 * The IPv4 netmask can be determined either
18967c478bd9Sstevel@tonic-gate 			 * directly if a prefix length was supplied with
18977c478bd9Sstevel@tonic-gate 			 * the address or via the netmasks database.  Not
18987c478bd9Sstevel@tonic-gate 			 * being able to determine it is a common failure,
18997c478bd9Sstevel@tonic-gate 			 * but it often is not fatal to operation of the
19007c478bd9Sstevel@tonic-gate 			 * interface.  In that case, a warning will be
19017c478bd9Sstevel@tonic-gate 			 * printed after the rest of the interface's
19027c478bd9Sstevel@tonic-gate 			 * parameters have been configured.
19037c478bd9Sstevel@tonic-gate 			 */
19047c478bd9Sstevel@tonic-gate 			(void) memset(&netmask4, 0, sizeof (netmask4));
19057c478bd9Sstevel@tonic-gate 			if (slashp != NULL) {
19067c478bd9Sstevel@tonic-gate 				if (addr2netmask(slashp + 1, V4_ADDR_LEN,
19077c478bd9Sstevel@tonic-gate 				    (uchar_t *)&netmask4.sin_addr) != 0) {
19087c478bd9Sstevel@tonic-gate 					*slashp = '/';
19097c478bd9Sstevel@tonic-gate 					zerror(zlogp, B_FALSE,
19107c478bd9Sstevel@tonic-gate 					    "%s: invalid prefix length in %s",
19117c478bd9Sstevel@tonic-gate 					    lifr.lifr_name,
19127c478bd9Sstevel@tonic-gate 					    nwiftabptr->zone_nwif_address);
19137c478bd9Sstevel@tonic-gate 					goto bad;
19147c478bd9Sstevel@tonic-gate 				}
19157c478bd9Sstevel@tonic-gate 				got_netmask = B_TRUE;
19167c478bd9Sstevel@tonic-gate 			} else if (getnetmaskbyaddr(in4,
19177c478bd9Sstevel@tonic-gate 			    &netmask4.sin_addr) == 0) {
19187c478bd9Sstevel@tonic-gate 				got_netmask = B_TRUE;
19197c478bd9Sstevel@tonic-gate 			}
19207c478bd9Sstevel@tonic-gate 			if (got_netmask) {
19217c478bd9Sstevel@tonic-gate 				netmask4.sin_family = af;
19227c478bd9Sstevel@tonic-gate 				(void) memcpy(&lifr.lifr_addr, &netmask4,
19237c478bd9Sstevel@tonic-gate 				    sizeof (netmask4));
19247c478bd9Sstevel@tonic-gate 			}
19257c478bd9Sstevel@tonic-gate 		} else {
19267c478bd9Sstevel@tonic-gate 			(void) memset(&netmask6, 0, sizeof (netmask6));
19277c478bd9Sstevel@tonic-gate 			if (addr2netmask(slashp + 1, V6_ADDR_LEN,
19287c478bd9Sstevel@tonic-gate 			    (uchar_t *)&netmask6.sin6_addr) != 0) {
19297c478bd9Sstevel@tonic-gate 				*slashp = '/';
19307c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
19317c478bd9Sstevel@tonic-gate 				    "%s: invalid prefix length in %s",
19327c478bd9Sstevel@tonic-gate 				    lifr.lifr_name,
19337c478bd9Sstevel@tonic-gate 				    nwiftabptr->zone_nwif_address);
19347c478bd9Sstevel@tonic-gate 				goto bad;
19357c478bd9Sstevel@tonic-gate 			}
19367c478bd9Sstevel@tonic-gate 			got_netmask = B_TRUE;
19377c478bd9Sstevel@tonic-gate 			netmask6.sin6_family = af;
19387c478bd9Sstevel@tonic-gate 			(void) memcpy(&lifr.lifr_addr, &netmask6,
19397c478bd9Sstevel@tonic-gate 			    sizeof (netmask6));
19407c478bd9Sstevel@tonic-gate 		}
19417c478bd9Sstevel@tonic-gate 		if (got_netmask &&
19427c478bd9Sstevel@tonic-gate 		    ioctl(s, SIOCSLIFNETMASK, (caddr_t)&lifr) < 0) {
19437c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not set netmask",
19447c478bd9Sstevel@tonic-gate 			    lifr.lifr_name);
19457c478bd9Sstevel@tonic-gate 			goto bad;
19467c478bd9Sstevel@tonic-gate 		}
19477c478bd9Sstevel@tonic-gate 
19487c478bd9Sstevel@tonic-gate 		/*
19497c478bd9Sstevel@tonic-gate 		 * This doesn't set the broadcast address at all. Rather, it
19507c478bd9Sstevel@tonic-gate 		 * gets, then sets the interface's address, relying on the fact
19517c478bd9Sstevel@tonic-gate 		 * that resetting the address will reset the broadcast address.
19527c478bd9Sstevel@tonic-gate 		 */
19537c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) {
19547c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not get address",
19557c478bd9Sstevel@tonic-gate 			    lifr.lifr_name);
19567c478bd9Sstevel@tonic-gate 			goto bad;
19577c478bd9Sstevel@tonic-gate 		}
19587c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCSLIFADDR, (caddr_t)&lifr) < 0) {
19597c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
19607c478bd9Sstevel@tonic-gate 			    "%s: could not reset broadcast address",
19617c478bd9Sstevel@tonic-gate 			    lifr.lifr_name);
19627c478bd9Sstevel@tonic-gate 			goto bad;
19637c478bd9Sstevel@tonic-gate 		}
19647c478bd9Sstevel@tonic-gate 	}
19657c478bd9Sstevel@tonic-gate 
19667c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
19677c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s: could not get flags",
19687c478bd9Sstevel@tonic-gate 		    lifr.lifr_name);
19697c478bd9Sstevel@tonic-gate 		goto bad;
19707c478bd9Sstevel@tonic-gate 	}
19717c478bd9Sstevel@tonic-gate 	lifr.lifr_flags |= IFF_UP;
19727c478bd9Sstevel@tonic-gate 	if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
19737c478bd9Sstevel@tonic-gate 		int save_errno = errno;
19747c478bd9Sstevel@tonic-gate 		char *zone_using;
19757c478bd9Sstevel@tonic-gate 
19767c478bd9Sstevel@tonic-gate 		/*
19777c478bd9Sstevel@tonic-gate 		 * If we failed with something other than EADDRNOTAVAIL,
19787c478bd9Sstevel@tonic-gate 		 * then skip to the end.  Otherwise, look up our address,
19797c478bd9Sstevel@tonic-gate 		 * then call a function to determine which zone is already
19807c478bd9Sstevel@tonic-gate 		 * using that address.
19817c478bd9Sstevel@tonic-gate 		 */
19827c478bd9Sstevel@tonic-gate 		if (errno != EADDRNOTAVAIL) {
19837c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
19847c478bd9Sstevel@tonic-gate 			    "%s: could not bring interface up", lifr.lifr_name);
19857c478bd9Sstevel@tonic-gate 			goto bad;
19867c478bd9Sstevel@tonic-gate 		}
19877c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFADDR, (caddr_t)&lifr) < 0) {
19887c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not get address",
19897c478bd9Sstevel@tonic-gate 			    lifr.lifr_name);
19907c478bd9Sstevel@tonic-gate 			goto bad;
19917c478bd9Sstevel@tonic-gate 		}
19927c478bd9Sstevel@tonic-gate 		zone_using = who_is_using(zlogp, &lifr);
19937c478bd9Sstevel@tonic-gate 		errno = save_errno;
19947c478bd9Sstevel@tonic-gate 		if (zone_using == NULL)
19957c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE,
19967c478bd9Sstevel@tonic-gate 			    "%s: could not bring interface up", lifr.lifr_name);
19977c478bd9Sstevel@tonic-gate 		else
19987c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not bring interface "
19997c478bd9Sstevel@tonic-gate 			    "up: address in use by zone '%s'", lifr.lifr_name,
20007c478bd9Sstevel@tonic-gate 			    zone_using);
20017c478bd9Sstevel@tonic-gate 		goto bad;
20027c478bd9Sstevel@tonic-gate 	}
20037c478bd9Sstevel@tonic-gate 	if ((lifr.lifr_flags & IFF_MULTICAST) && ((af == AF_INET &&
20047c478bd9Sstevel@tonic-gate 	    mcast_rt_v4_setp != NULL && *mcast_rt_v4_setp == B_FALSE) ||
20057c478bd9Sstevel@tonic-gate 	    (af == AF_INET6 &&
20067c478bd9Sstevel@tonic-gate 	    mcast_rt_v6_setp != NULL && *mcast_rt_v6_setp == B_FALSE))) {
20077c478bd9Sstevel@tonic-gate 		rs = socket(PF_ROUTE, SOCK_RAW, 0);
20087c478bd9Sstevel@tonic-gate 		if (rs < 0) {
20097c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s: could not create "
20107c478bd9Sstevel@tonic-gate 			    "routing socket", lifr.lifr_name);
20117c478bd9Sstevel@tonic-gate 			goto bad;
20127c478bd9Sstevel@tonic-gate 		}
20137c478bd9Sstevel@tonic-gate 		(void) shutdown(rs, 0);
20147c478bd9Sstevel@tonic-gate 		(void) memset((void *)&mcast_rtmsg, 0, sizeof (mcast_rtmsg_t));
20157c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_msglen =  sizeof (struct rt_msghdr) +
20167c478bd9Sstevel@tonic-gate 		    3 * (af == AF_INET ? sizeof (struct sockaddr_in) :
20177c478bd9Sstevel@tonic-gate 		    sizeof (struct sockaddr_in6));
20187c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_version = RTM_VERSION;
20197c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_type = RTM_ADD;
20207c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_flags = RTF_UP;
20217c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_addrs =
20227c478bd9Sstevel@tonic-gate 		    RTA_DST | RTA_GATEWAY | RTA_NETMASK;
20237c478bd9Sstevel@tonic-gate 		mcast_rtmsg.m_rtm.rtm_seq = ++rts_seqno;
20247c478bd9Sstevel@tonic-gate 		if (af == AF_INET) {
20257c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_dst4.sin_family = AF_INET;
20267c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_dst4.sin_addr.s_addr =
20277c478bd9Sstevel@tonic-gate 			    htonl(INADDR_UNSPEC_GROUP);
20287c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_gw4.sin_family = AF_INET;
20297c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_gw4.sin_addr = in4;
20307c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_netmask4.sin_family = AF_INET;
20317c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_netmask4.sin_addr.s_addr =
20327c478bd9Sstevel@tonic-gate 			    htonl(IN_CLASSD_NET);
20337c478bd9Sstevel@tonic-gate 		} else {
20347c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_dst6.sin6_family = AF_INET6;
20357c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_dst6.sin6_addr.s6_addr[0] = 0xffU;
20367c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_gw6.sin6_family = AF_INET6;
20377c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_gw6.sin6_addr = in6;
20387c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_netmask6.sin6_family = AF_INET6;
20397c478bd9Sstevel@tonic-gate 			mcast_rtmsg.m_netmask6.sin6_addr.s6_addr[0] = 0xffU;
20407c478bd9Sstevel@tonic-gate 		}
20417c478bd9Sstevel@tonic-gate 		rlen = write(rs, (char *)&mcast_rtmsg,
20427c478bd9Sstevel@tonic-gate 		    mcast_rtmsg.m_rtm.rtm_msglen);
204322321485Svp157776 		/*
204422321485Svp157776 		 * The write to the multicast socket will fail if the
204522321485Svp157776 		 * interface belongs to a failed IPMP group. This is a
204622321485Svp157776 		 * non-fatal error and the zone will continue booting.
204722321485Svp157776 		 * While the zone is running, if any interface in the
204822321485Svp157776 		 * failed IPMP group recovers, the zone will fallback to
204922321485Svp157776 		 * using that interface.
205022321485Svp157776 		 */
20517c478bd9Sstevel@tonic-gate 		if (rlen < mcast_rtmsg.m_rtm.rtm_msglen) {
20527c478bd9Sstevel@tonic-gate 			if (rlen < 0) {
205322321485Svp157776 				zerror(zlogp, B_TRUE, "WARNING: interface "
205422321485Svp157776 				    "'%s' not available as default for "
205522321485Svp157776 				    "multicast.", lifr.lifr_name);
20567c478bd9Sstevel@tonic-gate 			} else {
205722321485Svp157776 				zerror(zlogp, B_FALSE, "WARNING: interface "
205822321485Svp157776 				    "'%s' not available as default for "
205922321485Svp157776 				    "multicast; routing socket returned "
206022321485Svp157776 				    "unexpected %d bytes.",
206122321485Svp157776 				    lifr.lifr_name, rlen);
20627c478bd9Sstevel@tonic-gate 			}
206322321485Svp157776 		} else {
206422321485Svp157776 
20657c478bd9Sstevel@tonic-gate 			if (af == AF_INET) {
20667c478bd9Sstevel@tonic-gate 				*mcast_rt_v4_setp = B_TRUE;
20677c478bd9Sstevel@tonic-gate 			} else {
20687c478bd9Sstevel@tonic-gate 				*mcast_rt_v6_setp = B_TRUE;
20697c478bd9Sstevel@tonic-gate 			}
207022321485Svp157776 		}
20717c478bd9Sstevel@tonic-gate 		(void) close(rs);
20727c478bd9Sstevel@tonic-gate 	}
20737c478bd9Sstevel@tonic-gate 
20747c478bd9Sstevel@tonic-gate 	if (!got_netmask) {
20757c478bd9Sstevel@tonic-gate 		/*
20767c478bd9Sstevel@tonic-gate 		 * A common, but often non-fatal problem, is that the system
20777c478bd9Sstevel@tonic-gate 		 * cannot find the netmask for an interface address. This is
20787c478bd9Sstevel@tonic-gate 		 * often caused by it being only in /etc/inet/netmasks, but
20797c478bd9Sstevel@tonic-gate 		 * /etc/nsswitch.conf says to use NIS or NIS+ and it's not
20807c478bd9Sstevel@tonic-gate 		 * in that. This doesn't show up at boot because the netmask
20817c478bd9Sstevel@tonic-gate 		 * is obtained from /etc/inet/netmasks when no network
20827c478bd9Sstevel@tonic-gate 		 * interfaces are up, but isn't consulted when NIS/NIS+ is
20837c478bd9Sstevel@tonic-gate 		 * available. We warn the user here that something like this
20847c478bd9Sstevel@tonic-gate 		 * has happened and we're just running with a default and
20857c478bd9Sstevel@tonic-gate 		 * possible incorrect netmask.
20867c478bd9Sstevel@tonic-gate 		 */
20877c478bd9Sstevel@tonic-gate 		char buffer[INET6_ADDRSTRLEN];
20887c478bd9Sstevel@tonic-gate 		void  *addr;
20897c478bd9Sstevel@tonic-gate 
20907c478bd9Sstevel@tonic-gate 		if (af == AF_INET)
20917c478bd9Sstevel@tonic-gate 			addr = &((struct sockaddr_in *)
20927c478bd9Sstevel@tonic-gate 			    (&lifr.lifr_addr))->sin_addr;
20937c478bd9Sstevel@tonic-gate 		else
20947c478bd9Sstevel@tonic-gate 			addr = &((struct sockaddr_in6 *)
20957c478bd9Sstevel@tonic-gate 			    (&lifr.lifr_addr))->sin6_addr;
20967c478bd9Sstevel@tonic-gate 
20977c478bd9Sstevel@tonic-gate 		/* Find out what netmask interface is going to be using */
20987c478bd9Sstevel@tonic-gate 		if (ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifr) < 0 ||
20997c478bd9Sstevel@tonic-gate 		    inet_ntop(af, addr, buffer, sizeof (buffer)) == NULL)
21007c478bd9Sstevel@tonic-gate 			goto bad;
21017c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
21027c478bd9Sstevel@tonic-gate 		    "WARNING: %s: no matching subnet found in netmasks(4) for "
21037c478bd9Sstevel@tonic-gate 		    "%s; using default of %s.",
21047c478bd9Sstevel@tonic-gate 		    lifr.lifr_name, addrstr4, buffer);
21057c478bd9Sstevel@tonic-gate 	}
21067c478bd9Sstevel@tonic-gate 
21077c478bd9Sstevel@tonic-gate 	(void) close(s);
21087c478bd9Sstevel@tonic-gate 	return (Z_OK);
21097c478bd9Sstevel@tonic-gate bad:
21107c478bd9Sstevel@tonic-gate 	(void) ioctl(s, SIOCLIFREMOVEIF, (caddr_t)&lifr);
21117c478bd9Sstevel@tonic-gate 	(void) close(s);
21127c478bd9Sstevel@tonic-gate 	return (-1);
21137c478bd9Sstevel@tonic-gate }
21147c478bd9Sstevel@tonic-gate 
21157c478bd9Sstevel@tonic-gate /*
21167c478bd9Sstevel@tonic-gate  * Sets up network interfaces based on information from the zone configuration.
21177c478bd9Sstevel@tonic-gate  * An IPv4 loopback interface is set up "for free", modeling the global system.
21187c478bd9Sstevel@tonic-gate  * If any of the configuration interfaces were IPv6, then an IPv6 loopback
21197c478bd9Sstevel@tonic-gate  * address is set up as well.
21207c478bd9Sstevel@tonic-gate  *
21217c478bd9Sstevel@tonic-gate  * If anything goes wrong, we log a general error message, attempt to tear down
21227c478bd9Sstevel@tonic-gate  * whatever we set up, and return an error.
21237c478bd9Sstevel@tonic-gate  */
21247c478bd9Sstevel@tonic-gate static int
21257c478bd9Sstevel@tonic-gate configure_network_interfaces(zlog_t *zlogp)
21267c478bd9Sstevel@tonic-gate {
21277c478bd9Sstevel@tonic-gate 	zone_dochandle_t handle;
21287c478bd9Sstevel@tonic-gate 	struct zone_nwiftab nwiftab, loopback_iftab;
21297c478bd9Sstevel@tonic-gate 	boolean_t saw_v6 = B_FALSE;
21307c478bd9Sstevel@tonic-gate 	boolean_t mcast_rt_v4_set = B_FALSE;
21317c478bd9Sstevel@tonic-gate 	boolean_t mcast_rt_v6_set = B_FALSE;
21327c478bd9Sstevel@tonic-gate 	zoneid_t zoneid;
21337c478bd9Sstevel@tonic-gate 
21347c478bd9Sstevel@tonic-gate 	if ((zoneid = getzoneidbyname(zone_name)) == ZONE_ID_UNDEFINED) {
21357c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to get zoneid");
21367c478bd9Sstevel@tonic-gate 		return (-1);
21377c478bd9Sstevel@tonic-gate 	}
21387c478bd9Sstevel@tonic-gate 
21397c478bd9Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
21407c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
21417c478bd9Sstevel@tonic-gate 		return (-1);
21427c478bd9Sstevel@tonic-gate 	}
21437c478bd9Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
21447c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
21457c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
21467c478bd9Sstevel@tonic-gate 		return (-1);
21477c478bd9Sstevel@tonic-gate 	}
21487c478bd9Sstevel@tonic-gate 	if (zonecfg_setnwifent(handle) == Z_OK) {
21497c478bd9Sstevel@tonic-gate 		for (;;) {
21507c478bd9Sstevel@tonic-gate 			struct in6_addr in6;
21517c478bd9Sstevel@tonic-gate 
21527c478bd9Sstevel@tonic-gate 			if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
21537c478bd9Sstevel@tonic-gate 				break;
21547c478bd9Sstevel@tonic-gate 			if (configure_one_interface(zlogp, zoneid,
21557c478bd9Sstevel@tonic-gate 			    &nwiftab, &mcast_rt_v4_set, &mcast_rt_v6_set) !=
21567c478bd9Sstevel@tonic-gate 			    Z_OK) {
21577c478bd9Sstevel@tonic-gate 				(void) zonecfg_endnwifent(handle);
21587c478bd9Sstevel@tonic-gate 				zonecfg_fini_handle(handle);
21597c478bd9Sstevel@tonic-gate 				return (-1);
21607c478bd9Sstevel@tonic-gate 			}
21617c478bd9Sstevel@tonic-gate 			if (inet_pton(AF_INET6, nwiftab.zone_nwif_address,
21627c478bd9Sstevel@tonic-gate 			    &in6) == 1)
21637c478bd9Sstevel@tonic-gate 				saw_v6 = B_TRUE;
21647c478bd9Sstevel@tonic-gate 		}
21657c478bd9Sstevel@tonic-gate 		(void) zonecfg_endnwifent(handle);
21667c478bd9Sstevel@tonic-gate 	}
21677c478bd9Sstevel@tonic-gate 	zonecfg_fini_handle(handle);
21687c478bd9Sstevel@tonic-gate 	(void) strlcpy(loopback_iftab.zone_nwif_physical, "lo0",
21697c478bd9Sstevel@tonic-gate 	    sizeof (loopback_iftab.zone_nwif_physical));
21707c478bd9Sstevel@tonic-gate 	(void) strlcpy(loopback_iftab.zone_nwif_address, "127.0.0.1",
21717c478bd9Sstevel@tonic-gate 	    sizeof (loopback_iftab.zone_nwif_address));
21727c478bd9Sstevel@tonic-gate 	if (configure_one_interface(zlogp, zoneid, &loopback_iftab, NULL, NULL)
21737c478bd9Sstevel@tonic-gate 	    != Z_OK) {
21747c478bd9Sstevel@tonic-gate 		return (-1);
21757c478bd9Sstevel@tonic-gate 	}
21767c478bd9Sstevel@tonic-gate 	if (saw_v6) {
21777c478bd9Sstevel@tonic-gate 		(void) strlcpy(loopback_iftab.zone_nwif_address, "::1/128",
21787c478bd9Sstevel@tonic-gate 		    sizeof (loopback_iftab.zone_nwif_address));
21797c478bd9Sstevel@tonic-gate 		if (configure_one_interface(zlogp, zoneid,
21807c478bd9Sstevel@tonic-gate 		    &loopback_iftab, NULL, NULL) != Z_OK) {
21817c478bd9Sstevel@tonic-gate 			return (-1);
21827c478bd9Sstevel@tonic-gate 		}
21837c478bd9Sstevel@tonic-gate 	}
21847c478bd9Sstevel@tonic-gate 	return (0);
21857c478bd9Sstevel@tonic-gate }
21867c478bd9Sstevel@tonic-gate 
21877c478bd9Sstevel@tonic-gate static int
21887c478bd9Sstevel@tonic-gate tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid,
21897c478bd9Sstevel@tonic-gate     const struct sockaddr_storage *local, const struct sockaddr_storage *remote)
21907c478bd9Sstevel@tonic-gate {
21917c478bd9Sstevel@tonic-gate 	int fd;
21927c478bd9Sstevel@tonic-gate 	struct strioctl ioc;
21937c478bd9Sstevel@tonic-gate 	tcp_ioc_abort_conn_t conn;
21947c478bd9Sstevel@tonic-gate 	int error;
21957c478bd9Sstevel@tonic-gate 
21967c478bd9Sstevel@tonic-gate 	conn.ac_local = *local;
21977c478bd9Sstevel@tonic-gate 	conn.ac_remote = *remote;
21987c478bd9Sstevel@tonic-gate 	conn.ac_start = TCPS_SYN_SENT;
21997c478bd9Sstevel@tonic-gate 	conn.ac_end = TCPS_TIME_WAIT;
22007c478bd9Sstevel@tonic-gate 	conn.ac_zoneid = zoneid;
22017c478bd9Sstevel@tonic-gate 
22027c478bd9Sstevel@tonic-gate 	ioc.ic_cmd = TCP_IOC_ABORT_CONN;
22037c478bd9Sstevel@tonic-gate 	ioc.ic_timout = -1; /* infinite timeout */
22047c478bd9Sstevel@tonic-gate 	ioc.ic_len = sizeof (conn);
22057c478bd9Sstevel@tonic-gate 	ioc.ic_dp = (char *)&conn;
22067c478bd9Sstevel@tonic-gate 
22077c478bd9Sstevel@tonic-gate 	if ((fd = open("/dev/tcp", O_RDONLY)) < 0) {
22087c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to open %s", "/dev/tcp");
22097c478bd9Sstevel@tonic-gate 		return (-1);
22107c478bd9Sstevel@tonic-gate 	}
22117c478bd9Sstevel@tonic-gate 
22127c478bd9Sstevel@tonic-gate 	error = ioctl(fd, I_STR, &ioc);
22137c478bd9Sstevel@tonic-gate 	(void) close(fd);
22147c478bd9Sstevel@tonic-gate 	if (error == 0 || errno == ENOENT)	/* ENOENT is not an error */
22157c478bd9Sstevel@tonic-gate 		return (0);
22167c478bd9Sstevel@tonic-gate 	return (-1);
22177c478bd9Sstevel@tonic-gate }
22187c478bd9Sstevel@tonic-gate 
22197c478bd9Sstevel@tonic-gate static int
22207c478bd9Sstevel@tonic-gate tcp_abort_connections(zlog_t *zlogp, zoneid_t zoneid)
22217c478bd9Sstevel@tonic-gate {
22227c478bd9Sstevel@tonic-gate 	struct sockaddr_storage l, r;
22237c478bd9Sstevel@tonic-gate 	struct sockaddr_in *local, *remote;
22247c478bd9Sstevel@tonic-gate 	struct sockaddr_in6 *local6, *remote6;
22257c478bd9Sstevel@tonic-gate 	int error;
22267c478bd9Sstevel@tonic-gate 
22277c478bd9Sstevel@tonic-gate 	/*
22287c478bd9Sstevel@tonic-gate 	 * Abort IPv4 connections.
22297c478bd9Sstevel@tonic-gate 	 */
22307c478bd9Sstevel@tonic-gate 	bzero(&l, sizeof (*local));
22317c478bd9Sstevel@tonic-gate 	local = (struct sockaddr_in *)&l;
22327c478bd9Sstevel@tonic-gate 	local->sin_family = AF_INET;
22337c478bd9Sstevel@tonic-gate 	local->sin_addr.s_addr = INADDR_ANY;
22347c478bd9Sstevel@tonic-gate 	local->sin_port = 0;
22357c478bd9Sstevel@tonic-gate 
22367c478bd9Sstevel@tonic-gate 	bzero(&r, sizeof (*remote));
22377c478bd9Sstevel@tonic-gate 	remote = (struct sockaddr_in *)&r;
22387c478bd9Sstevel@tonic-gate 	remote->sin_family = AF_INET;
22397c478bd9Sstevel@tonic-gate 	remote->sin_addr.s_addr = INADDR_ANY;
22407c478bd9Sstevel@tonic-gate 	remote->sin_port = 0;
22417c478bd9Sstevel@tonic-gate 
22427c478bd9Sstevel@tonic-gate 	if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
22437c478bd9Sstevel@tonic-gate 		return (error);
22447c478bd9Sstevel@tonic-gate 
22457c478bd9Sstevel@tonic-gate 	/*
22467c478bd9Sstevel@tonic-gate 	 * Abort IPv6 connections.
22477c478bd9Sstevel@tonic-gate 	 */
22487c478bd9Sstevel@tonic-gate 	bzero(&l, sizeof (*local6));
22497c478bd9Sstevel@tonic-gate 	local6 = (struct sockaddr_in6 *)&l;
22507c478bd9Sstevel@tonic-gate 	local6->sin6_family = AF_INET6;
22517c478bd9Sstevel@tonic-gate 	local6->sin6_port = 0;
22527c478bd9Sstevel@tonic-gate 	local6->sin6_addr = in6addr_any;
22537c478bd9Sstevel@tonic-gate 
22547c478bd9Sstevel@tonic-gate 	bzero(&r, sizeof (*remote6));
22557c478bd9Sstevel@tonic-gate 	remote6 = (struct sockaddr_in6 *)&r;
22567c478bd9Sstevel@tonic-gate 	remote6->sin6_family = AF_INET6;
22577c478bd9Sstevel@tonic-gate 	remote6->sin6_port = 0;
22587c478bd9Sstevel@tonic-gate 	remote6->sin6_addr = in6addr_any;
22597c478bd9Sstevel@tonic-gate 
22607c478bd9Sstevel@tonic-gate 	if ((error = tcp_abort_conn(zlogp, zoneid, &l, &r)) != 0)
22617c478bd9Sstevel@tonic-gate 		return (error);
22627c478bd9Sstevel@tonic-gate 	return (0);
22637c478bd9Sstevel@tonic-gate }
22647c478bd9Sstevel@tonic-gate 
22657c478bd9Sstevel@tonic-gate static int
2266ffbafc53Scomay get_privset(zlog_t *zlogp, priv_set_t *privs, boolean_t mount_cmd)
2267ffbafc53Scomay {
2268ffbafc53Scomay 	int error = -1;
2269ffbafc53Scomay 	zone_dochandle_t handle;
2270ffbafc53Scomay 	char *privname = NULL;
2271ffbafc53Scomay 
2272ffbafc53Scomay 	if (mount_cmd) {
2273ffbafc53Scomay 		if (zonecfg_default_privset(privs) == Z_OK)
2274ffbafc53Scomay 			return (0);
2275ffbafc53Scomay 		zerror(zlogp, B_FALSE,
2276ffbafc53Scomay 		    "failed to determine the zone's default privilege set");
2277ffbafc53Scomay 		return (-1);
2278ffbafc53Scomay 	}
2279ffbafc53Scomay 
2280ffbafc53Scomay 	if ((handle = zonecfg_init_handle()) == NULL) {
2281ffbafc53Scomay 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2282ffbafc53Scomay 		return (-1);
2283ffbafc53Scomay 	}
2284ffbafc53Scomay 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2285ffbafc53Scomay 		zerror(zlogp, B_FALSE, "invalid configuration");
2286ffbafc53Scomay 		zonecfg_fini_handle(handle);
2287ffbafc53Scomay 		return (-1);
2288ffbafc53Scomay 	}
2289ffbafc53Scomay 
2290ffbafc53Scomay 	switch (zonecfg_get_privset(handle, privs, &privname)) {
2291ffbafc53Scomay 	case Z_OK:
2292ffbafc53Scomay 		error = 0;
2293ffbafc53Scomay 		break;
2294ffbafc53Scomay 	case Z_PRIV_PROHIBITED:
2295ffbafc53Scomay 		zerror(zlogp, B_FALSE, "privilege \"%s\" is not permitted "
2296ffbafc53Scomay 		    "within the zone's privilege set", privname);
2297ffbafc53Scomay 		break;
2298ffbafc53Scomay 	case Z_PRIV_REQUIRED:
2299ffbafc53Scomay 		zerror(zlogp, B_FALSE, "required privilege \"%s\" is missing "
2300ffbafc53Scomay 		    "from the zone's privilege set", privname);
2301ffbafc53Scomay 		break;
2302ffbafc53Scomay 	case Z_PRIV_UNKNOWN:
2303ffbafc53Scomay 		zerror(zlogp, B_FALSE, "unknown privilege \"%s\" specified "
2304ffbafc53Scomay 		    "in the zone's privilege set", privname);
2305ffbafc53Scomay 		break;
2306ffbafc53Scomay 	default:
2307ffbafc53Scomay 		zerror(zlogp, B_FALSE, "failed to determine the zone's "
2308ffbafc53Scomay 		    "privilege set");
2309ffbafc53Scomay 		break;
2310ffbafc53Scomay 	}
2311ffbafc53Scomay 
2312ffbafc53Scomay 	free(privname);
2313ffbafc53Scomay 	zonecfg_fini_handle(handle);
2314ffbafc53Scomay 	return (error);
2315ffbafc53Scomay }
2316ffbafc53Scomay 
2317ffbafc53Scomay static int
23187c478bd9Sstevel@tonic-gate get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
23197c478bd9Sstevel@tonic-gate {
23207c478bd9Sstevel@tonic-gate 	nvlist_t *nvl = NULL;
23217c478bd9Sstevel@tonic-gate 	char *nvl_packed = NULL;
23227c478bd9Sstevel@tonic-gate 	size_t nvl_size = 0;
23237c478bd9Sstevel@tonic-gate 	nvlist_t **nvlv = NULL;
23247c478bd9Sstevel@tonic-gate 	int rctlcount = 0;
23257c478bd9Sstevel@tonic-gate 	int error = -1;
23267c478bd9Sstevel@tonic-gate 	zone_dochandle_t handle;
23277c478bd9Sstevel@tonic-gate 	struct zone_rctltab rctltab;
23287c478bd9Sstevel@tonic-gate 	rctlblk_t *rctlblk = NULL;
23297c478bd9Sstevel@tonic-gate 
23307c478bd9Sstevel@tonic-gate 	*bufp = NULL;
23317c478bd9Sstevel@tonic-gate 	*bufsizep = 0;
23327c478bd9Sstevel@tonic-gate 
23337c478bd9Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
23347c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
23357c478bd9Sstevel@tonic-gate 		return (-1);
23367c478bd9Sstevel@tonic-gate 	}
23377c478bd9Sstevel@tonic-gate 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
23387c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
23397c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
23407c478bd9Sstevel@tonic-gate 		return (-1);
23417c478bd9Sstevel@tonic-gate 	}
23427c478bd9Sstevel@tonic-gate 
23437c478bd9Sstevel@tonic-gate 	rctltab.zone_rctl_valptr = NULL;
23447c478bd9Sstevel@tonic-gate 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
23457c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
23467c478bd9Sstevel@tonic-gate 		goto out;
23477c478bd9Sstevel@tonic-gate 	}
23487c478bd9Sstevel@tonic-gate 
23497c478bd9Sstevel@tonic-gate 	if (zonecfg_setrctlent(handle) != Z_OK) {
23507c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
23517c478bd9Sstevel@tonic-gate 		goto out;
23527c478bd9Sstevel@tonic-gate 	}
23537c478bd9Sstevel@tonic-gate 
23547c478bd9Sstevel@tonic-gate 	if ((rctlblk = malloc(rctlblk_size())) == NULL) {
23557c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "memory allocation failed");
23567c478bd9Sstevel@tonic-gate 		goto out;
23577c478bd9Sstevel@tonic-gate 	}
23587c478bd9Sstevel@tonic-gate 	while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
23597c478bd9Sstevel@tonic-gate 		struct zone_rctlvaltab *rctlval;
23607c478bd9Sstevel@tonic-gate 		uint_t i, count;
23617c478bd9Sstevel@tonic-gate 		const char *name = rctltab.zone_rctl_name;
23627c478bd9Sstevel@tonic-gate 
23637c478bd9Sstevel@tonic-gate 		/* zoneadm should have already warned about unknown rctls. */
23647c478bd9Sstevel@tonic-gate 		if (!zonecfg_is_rctl(name)) {
23657c478bd9Sstevel@tonic-gate 			zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
23667c478bd9Sstevel@tonic-gate 			rctltab.zone_rctl_valptr = NULL;
23677c478bd9Sstevel@tonic-gate 			continue;
23687c478bd9Sstevel@tonic-gate 		}
23697c478bd9Sstevel@tonic-gate 		count = 0;
23707c478bd9Sstevel@tonic-gate 		for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
23717c478bd9Sstevel@tonic-gate 		    rctlval = rctlval->zone_rctlval_next) {
23727c478bd9Sstevel@tonic-gate 			count++;
23737c478bd9Sstevel@tonic-gate 		}
23747c478bd9Sstevel@tonic-gate 		if (count == 0) {	/* ignore */
23757c478bd9Sstevel@tonic-gate 			continue;	/* Nothing to free */
23767c478bd9Sstevel@tonic-gate 		}
23777c478bd9Sstevel@tonic-gate 		if ((nvlv = malloc(sizeof (*nvlv) * count)) == NULL)
23787c478bd9Sstevel@tonic-gate 			goto out;
23797c478bd9Sstevel@tonic-gate 		i = 0;
23807c478bd9Sstevel@tonic-gate 		for (rctlval = rctltab.zone_rctl_valptr; rctlval != NULL;
23817c478bd9Sstevel@tonic-gate 		    rctlval = rctlval->zone_rctlval_next, i++) {
23827c478bd9Sstevel@tonic-gate 			if (nvlist_alloc(&nvlv[i], NV_UNIQUE_NAME, 0) != 0) {
23837c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_TRUE, "%s failed",
23847c478bd9Sstevel@tonic-gate 				    "nvlist_alloc");
23857c478bd9Sstevel@tonic-gate 				goto out;
23867c478bd9Sstevel@tonic-gate 			}
23877c478bd9Sstevel@tonic-gate 			if (zonecfg_construct_rctlblk(rctlval, rctlblk)
23887c478bd9Sstevel@tonic-gate 			    != Z_OK) {
23897c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "invalid rctl value: "
23907c478bd9Sstevel@tonic-gate 				    "(priv=%s,limit=%s,action=%s)",
23917c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_priv,
23927c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_limit,
23937c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_action);
23947c478bd9Sstevel@tonic-gate 				goto out;
23957c478bd9Sstevel@tonic-gate 			}
23967c478bd9Sstevel@tonic-gate 			if (!zonecfg_valid_rctl(name, rctlblk)) {
23977c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
23987c478bd9Sstevel@tonic-gate 				    "(priv=%s,limit=%s,action=%s) is not a "
23997c478bd9Sstevel@tonic-gate 				    "valid value for rctl '%s'",
24007c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_priv,
24017c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_limit,
24027c478bd9Sstevel@tonic-gate 				    rctlval->zone_rctlval_action,
24037c478bd9Sstevel@tonic-gate 				    name);
24047c478bd9Sstevel@tonic-gate 				goto out;
24057c478bd9Sstevel@tonic-gate 			}
24067c478bd9Sstevel@tonic-gate 			if (nvlist_add_uint64(nvlv[i], "privilege",
24077c478bd9Sstevel@tonic-gate 			    rctlblk_get_privilege(rctlblk)) != 0) {
24087c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s failed",
24097c478bd9Sstevel@tonic-gate 				    "nvlist_add_uint64");
24107c478bd9Sstevel@tonic-gate 				goto out;
24117c478bd9Sstevel@tonic-gate 			}
24127c478bd9Sstevel@tonic-gate 			if (nvlist_add_uint64(nvlv[i], "limit",
24137c478bd9Sstevel@tonic-gate 			    rctlblk_get_value(rctlblk)) != 0) {
24147c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s failed",
24157c478bd9Sstevel@tonic-gate 				    "nvlist_add_uint64");
24167c478bd9Sstevel@tonic-gate 				goto out;
24177c478bd9Sstevel@tonic-gate 			}
24187c478bd9Sstevel@tonic-gate 			if (nvlist_add_uint64(nvlv[i], "action",
24197c478bd9Sstevel@tonic-gate 			    (uint_t)rctlblk_get_local_action(rctlblk, NULL))
24207c478bd9Sstevel@tonic-gate 			    != 0) {
24217c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE, "%s failed",
24227c478bd9Sstevel@tonic-gate 				    "nvlist_add_uint64");
24237c478bd9Sstevel@tonic-gate 				goto out;
24247c478bd9Sstevel@tonic-gate 			}
24257c478bd9Sstevel@tonic-gate 		}
24267c478bd9Sstevel@tonic-gate 		zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
24277c478bd9Sstevel@tonic-gate 		rctltab.zone_rctl_valptr = NULL;
24287c478bd9Sstevel@tonic-gate 		if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
24297c478bd9Sstevel@tonic-gate 		    != 0) {
24307c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s failed",
24317c478bd9Sstevel@tonic-gate 			    "nvlist_add_nvlist_array");
24327c478bd9Sstevel@tonic-gate 			goto out;
24337c478bd9Sstevel@tonic-gate 		}
24347c478bd9Sstevel@tonic-gate 		for (i = 0; i < count; i++)
24357c478bd9Sstevel@tonic-gate 			nvlist_free(nvlv[i]);
24367c478bd9Sstevel@tonic-gate 		free(nvlv);
24377c478bd9Sstevel@tonic-gate 		nvlv = NULL;
24387c478bd9Sstevel@tonic-gate 		rctlcount++;
24397c478bd9Sstevel@tonic-gate 	}
24407c478bd9Sstevel@tonic-gate 	(void) zonecfg_endrctlent(handle);
24417c478bd9Sstevel@tonic-gate 
24427c478bd9Sstevel@tonic-gate 	if (rctlcount == 0) {
24437c478bd9Sstevel@tonic-gate 		error = 0;
24447c478bd9Sstevel@tonic-gate 		goto out;
24457c478bd9Sstevel@tonic-gate 	}
24467c478bd9Sstevel@tonic-gate 	if (nvlist_pack(nvl, &nvl_packed, &nvl_size, NV_ENCODE_NATIVE, 0)
24477c478bd9Sstevel@tonic-gate 	    != 0) {
24487c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "nvlist_pack");
24497c478bd9Sstevel@tonic-gate 		goto out;
24507c478bd9Sstevel@tonic-gate 	}
24517c478bd9Sstevel@tonic-gate 
24527c478bd9Sstevel@tonic-gate 	error = 0;
24537c478bd9Sstevel@tonic-gate 	*bufp = nvl_packed;
24547c478bd9Sstevel@tonic-gate 	*bufsizep = nvl_size;
24557c478bd9Sstevel@tonic-gate 
24567c478bd9Sstevel@tonic-gate out:
24577c478bd9Sstevel@tonic-gate 	free(rctlblk);
24587c478bd9Sstevel@tonic-gate 	zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
24597c478bd9Sstevel@tonic-gate 	if (error && nvl_packed != NULL)
24607c478bd9Sstevel@tonic-gate 		free(nvl_packed);
24617c478bd9Sstevel@tonic-gate 	if (nvl != NULL)
24627c478bd9Sstevel@tonic-gate 		nvlist_free(nvl);
24637c478bd9Sstevel@tonic-gate 	if (nvlv != NULL)
24647c478bd9Sstevel@tonic-gate 		free(nvlv);
24657c478bd9Sstevel@tonic-gate 	if (handle != NULL)
24667c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
24677c478bd9Sstevel@tonic-gate 	return (error);
24687c478bd9Sstevel@tonic-gate }
24697c478bd9Sstevel@tonic-gate 
24707c478bd9Sstevel@tonic-gate static int
24717c478bd9Sstevel@tonic-gate get_zone_pool(zlog_t *zlogp, char *poolbuf, size_t bufsz)
24727c478bd9Sstevel@tonic-gate {
24737c478bd9Sstevel@tonic-gate 	zone_dochandle_t handle;
24747c478bd9Sstevel@tonic-gate 	int error;
24757c478bd9Sstevel@tonic-gate 
24767c478bd9Sstevel@tonic-gate 	if ((handle = zonecfg_init_handle()) == NULL) {
24777c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2478ffbafc53Scomay 		return (Z_NOMEM);
24797c478bd9Sstevel@tonic-gate 	}
2480ffbafc53Scomay 	error = zonecfg_get_snapshot_handle(zone_name, handle);
2481ffbafc53Scomay 	if (error != Z_OK) {
24827c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "invalid configuration");
24837c478bd9Sstevel@tonic-gate 		zonecfg_fini_handle(handle);
2484ffbafc53Scomay 		return (error);
24857c478bd9Sstevel@tonic-gate 	}
24867c478bd9Sstevel@tonic-gate 	error = zonecfg_get_pool(handle, poolbuf, bufsz);
24877c478bd9Sstevel@tonic-gate 	zonecfg_fini_handle(handle);
24887c478bd9Sstevel@tonic-gate 	return (error);
24897c478bd9Sstevel@tonic-gate }
24907c478bd9Sstevel@tonic-gate 
24917c478bd9Sstevel@tonic-gate static int
2492fa9e4066Sahrens get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
2493fa9e4066Sahrens {
2494fa9e4066Sahrens 	zone_dochandle_t handle;
2495fa9e4066Sahrens 	struct zone_dstab dstab;
2496fa9e4066Sahrens 	size_t total, offset, len;
2497fa9e4066Sahrens 	int error = -1;
2498fa9e4066Sahrens 	char *str;
2499fa9e4066Sahrens 
2500fa9e4066Sahrens 	*bufp = NULL;
2501fa9e4066Sahrens 	*bufsizep = 0;
2502fa9e4066Sahrens 
2503fa9e4066Sahrens 	if ((handle = zonecfg_init_handle()) == NULL) {
2504fa9e4066Sahrens 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2505fa9e4066Sahrens 		return (-1);
2506fa9e4066Sahrens 	}
2507fa9e4066Sahrens 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2508fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "invalid configuration");
2509fa9e4066Sahrens 		zonecfg_fini_handle(handle);
2510fa9e4066Sahrens 		return (-1);
2511fa9e4066Sahrens 	}
2512fa9e4066Sahrens 
2513fa9e4066Sahrens 	if (zonecfg_setdsent(handle) != Z_OK) {
2514fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
2515fa9e4066Sahrens 		goto out;
2516fa9e4066Sahrens 	}
2517fa9e4066Sahrens 
2518fa9e4066Sahrens 	total = 0;
2519fa9e4066Sahrens 	while (zonecfg_getdsent(handle, &dstab) == Z_OK)
2520fa9e4066Sahrens 		total += strlen(dstab.zone_dataset_name) + 1;
2521fa9e4066Sahrens 	(void) zonecfg_enddsent(handle);
2522fa9e4066Sahrens 
2523fa9e4066Sahrens 	if (total == 0) {
2524fa9e4066Sahrens 		error = 0;
2525fa9e4066Sahrens 		goto out;
2526fa9e4066Sahrens 	}
2527fa9e4066Sahrens 
2528fa9e4066Sahrens 	if ((str = malloc(total)) == NULL) {
2529fa9e4066Sahrens 		zerror(zlogp, B_TRUE, "memory allocation failed");
2530fa9e4066Sahrens 		goto out;
2531fa9e4066Sahrens 	}
2532fa9e4066Sahrens 
2533fa9e4066Sahrens 	if (zonecfg_setdsent(handle) != Z_OK) {
2534fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
2535fa9e4066Sahrens 		goto out;
2536fa9e4066Sahrens 	}
2537fa9e4066Sahrens 	offset = 0;
2538fa9e4066Sahrens 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
2539fa9e4066Sahrens 		len = strlen(dstab.zone_dataset_name);
2540fa9e4066Sahrens 		(void) strlcpy(str + offset, dstab.zone_dataset_name,
2541fa9e4066Sahrens 		    sizeof (dstab.zone_dataset_name) - offset);
2542fa9e4066Sahrens 		offset += len;
2543fa9e4066Sahrens 		if (offset != total - 1)
2544fa9e4066Sahrens 			str[offset++] = ',';
2545fa9e4066Sahrens 	}
2546fa9e4066Sahrens 	(void) zonecfg_enddsent(handle);
2547fa9e4066Sahrens 
2548fa9e4066Sahrens 	error = 0;
2549fa9e4066Sahrens 	*bufp = str;
2550fa9e4066Sahrens 	*bufsizep = total;
2551fa9e4066Sahrens 
2552fa9e4066Sahrens out:
2553fa9e4066Sahrens 	if (error != 0 && str != NULL)
2554fa9e4066Sahrens 		free(str);
2555fa9e4066Sahrens 	if (handle != NULL)
2556fa9e4066Sahrens 		zonecfg_fini_handle(handle);
2557fa9e4066Sahrens 
2558fa9e4066Sahrens 	return (error);
2559fa9e4066Sahrens }
2560fa9e4066Sahrens 
2561fa9e4066Sahrens static int
2562fa9e4066Sahrens validate_datasets(zlog_t *zlogp)
2563fa9e4066Sahrens {
2564fa9e4066Sahrens 	zone_dochandle_t handle;
2565fa9e4066Sahrens 	struct zone_dstab dstab;
2566fa9e4066Sahrens 	zfs_handle_t *zhp;
256799653d4eSeschrock 	libzfs_handle_t *hdl;
2568fa9e4066Sahrens 
2569fa9e4066Sahrens 	if ((handle = zonecfg_init_handle()) == NULL) {
2570fa9e4066Sahrens 		zerror(zlogp, B_TRUE, "getting zone configuration handle");
2571fa9e4066Sahrens 		return (-1);
2572fa9e4066Sahrens 	}
2573fa9e4066Sahrens 	if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
2574fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "invalid configuration");
2575fa9e4066Sahrens 		zonecfg_fini_handle(handle);
2576fa9e4066Sahrens 		return (-1);
2577fa9e4066Sahrens 	}
2578fa9e4066Sahrens 
2579fa9e4066Sahrens 	if (zonecfg_setdsent(handle) != Z_OK) {
2580fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "invalid configuration");
2581fa9e4066Sahrens 		zonecfg_fini_handle(handle);
2582fa9e4066Sahrens 		return (-1);
2583fa9e4066Sahrens 	}
2584fa9e4066Sahrens 
258599653d4eSeschrock 	if ((hdl = libzfs_init()) == NULL) {
258699653d4eSeschrock 		zerror(zlogp, B_FALSE, "opening ZFS library");
258799653d4eSeschrock 		zonecfg_fini_handle(handle);
258899653d4eSeschrock 		return (-1);
258999653d4eSeschrock 	}
2590fa9e4066Sahrens 
2591fa9e4066Sahrens 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
2592fa9e4066Sahrens 
259399653d4eSeschrock 		if ((zhp = zfs_open(hdl, dstab.zone_dataset_name,
2594fa9e4066Sahrens 		    ZFS_TYPE_FILESYSTEM)) == NULL) {
2595fa9e4066Sahrens 			zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'",
2596fa9e4066Sahrens 			    dstab.zone_dataset_name);
2597fa9e4066Sahrens 			zonecfg_fini_handle(handle);
259899653d4eSeschrock 			libzfs_fini(hdl);
2599fa9e4066Sahrens 			return (-1);
2600fa9e4066Sahrens 		}
2601fa9e4066Sahrens 
2602fa9e4066Sahrens 		/*
2603fa9e4066Sahrens 		 * Automatically set the 'zoned' property.  We check the value
2604fa9e4066Sahrens 		 * first because we'll get EPERM if it is already set.
2605fa9e4066Sahrens 		 */
2606fa9e4066Sahrens 		if (!zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
2607fa9e4066Sahrens 		    zfs_prop_set(zhp, ZFS_PROP_ZONED, "on") != 0) {
2608fa9e4066Sahrens 			zerror(zlogp, B_FALSE, "cannot set 'zoned' "
2609fa9e4066Sahrens 			    "property for ZFS dataset '%s'\n",
2610fa9e4066Sahrens 			    dstab.zone_dataset_name);
2611fa9e4066Sahrens 			zonecfg_fini_handle(handle);
2612fa9e4066Sahrens 			zfs_close(zhp);
261399653d4eSeschrock 			libzfs_fini(hdl);
2614fa9e4066Sahrens 			return (-1);
2615fa9e4066Sahrens 		}
2616fa9e4066Sahrens 
2617fa9e4066Sahrens 		zfs_close(zhp);
2618fa9e4066Sahrens 	}
2619fa9e4066Sahrens 	(void) zonecfg_enddsent(handle);
2620fa9e4066Sahrens 
2621fa9e4066Sahrens 	zonecfg_fini_handle(handle);
262299653d4eSeschrock 	libzfs_fini(hdl);
2623fa9e4066Sahrens 
2624fa9e4066Sahrens 	return (0);
2625fa9e4066Sahrens }
2626fa9e4066Sahrens 
2627fa9e4066Sahrens static int
26287c478bd9Sstevel@tonic-gate bind_to_pool(zlog_t *zlogp, zoneid_t zoneid)
26297c478bd9Sstevel@tonic-gate {
26307c478bd9Sstevel@tonic-gate 	pool_conf_t *poolconf;
26317c478bd9Sstevel@tonic-gate 	pool_t *pool;
26327c478bd9Sstevel@tonic-gate 	char poolname[MAXPATHLEN];
26337c478bd9Sstevel@tonic-gate 	int status;
26347c478bd9Sstevel@tonic-gate 	int error;
26357c478bd9Sstevel@tonic-gate 
26367c478bd9Sstevel@tonic-gate 	/*
26377c478bd9Sstevel@tonic-gate 	 * Find the pool mentioned in the zone configuration, and bind to it.
26387c478bd9Sstevel@tonic-gate 	 */
26397c478bd9Sstevel@tonic-gate 	error = get_zone_pool(zlogp, poolname, sizeof (poolname));
26407c478bd9Sstevel@tonic-gate 	if (error == Z_NO_ENTRY || (error == Z_OK && strlen(poolname) == 0)) {
26417c478bd9Sstevel@tonic-gate 		/*
26427c478bd9Sstevel@tonic-gate 		 * The property is not set on the zone, so the pool
26437c478bd9Sstevel@tonic-gate 		 * should be bound to the default pool.  But that's
26447c478bd9Sstevel@tonic-gate 		 * already done by the kernel, so we can just return.
26457c478bd9Sstevel@tonic-gate 		 */
26467c478bd9Sstevel@tonic-gate 		return (0);
26477c478bd9Sstevel@tonic-gate 	}
26487c478bd9Sstevel@tonic-gate 	if (error != Z_OK) {
26497c478bd9Sstevel@tonic-gate 		/*
26507c478bd9Sstevel@tonic-gate 		 * Not an error, even though it shouldn't be happening.
26517c478bd9Sstevel@tonic-gate 		 */
26527c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
26537c478bd9Sstevel@tonic-gate 		    "WARNING: unable to retrieve default pool.");
26547c478bd9Sstevel@tonic-gate 		return (0);
26557c478bd9Sstevel@tonic-gate 	}
26567c478bd9Sstevel@tonic-gate 	/*
26577c478bd9Sstevel@tonic-gate 	 * Don't do anything if pools aren't enabled.
26587c478bd9Sstevel@tonic-gate 	 */
26597c478bd9Sstevel@tonic-gate 	if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED) {
26607c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: pools facility not active; "
26617c478bd9Sstevel@tonic-gate 		    "zone will not be bound to pool '%s'.", poolname);
26627c478bd9Sstevel@tonic-gate 		return (0);
26637c478bd9Sstevel@tonic-gate 	}
26647c478bd9Sstevel@tonic-gate 	/*
26657c478bd9Sstevel@tonic-gate 	 * Try to provide a sane error message if the requested pool doesn't
26667c478bd9Sstevel@tonic-gate 	 * exist.
26677c478bd9Sstevel@tonic-gate 	 */
26687c478bd9Sstevel@tonic-gate 	if ((poolconf = pool_conf_alloc()) == NULL) {
26697c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "pool_conf_alloc");
26707c478bd9Sstevel@tonic-gate 		return (-1);
26717c478bd9Sstevel@tonic-gate 	}
26727c478bd9Sstevel@tonic-gate 	if (pool_conf_open(poolconf, pool_dynamic_location(), PO_RDONLY) !=
26737c478bd9Sstevel@tonic-gate 	    PO_SUCCESS) {
26747c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "%s failed", "pool_conf_open");
26757c478bd9Sstevel@tonic-gate 		pool_conf_free(poolconf);
26767c478bd9Sstevel@tonic-gate 		return (-1);
26777c478bd9Sstevel@tonic-gate 	}
26787c478bd9Sstevel@tonic-gate 	pool = pool_get_pool(poolconf, poolname);
26797c478bd9Sstevel@tonic-gate 	(void) pool_conf_close(poolconf);
26807c478bd9Sstevel@tonic-gate 	pool_conf_free(poolconf);
26817c478bd9Sstevel@tonic-gate 	if (pool == NULL) {
26827c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: pool '%s' not found; "
26837c478bd9Sstevel@tonic-gate 		    "using default pool.", poolname);
26847c478bd9Sstevel@tonic-gate 		return (0);
26857c478bd9Sstevel@tonic-gate 	}
26867c478bd9Sstevel@tonic-gate 	/*
26877c478bd9Sstevel@tonic-gate 	 * Bind the zone to the pool.
26887c478bd9Sstevel@tonic-gate 	 */
26897c478bd9Sstevel@tonic-gate 	if (pool_set_binding(poolname, P_ZONEID, zoneid) != PO_SUCCESS) {
26907c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: unable to bind to pool '%s'; "
26917c478bd9Sstevel@tonic-gate 		    "using default pool.", poolname);
26927c478bd9Sstevel@tonic-gate 	}
26937c478bd9Sstevel@tonic-gate 	return (0);
26947c478bd9Sstevel@tonic-gate }
26957c478bd9Sstevel@tonic-gate 
269645916cd2Sjpk /*
269745916cd2Sjpk  * Mount lower level home directories into/from current zone
269845916cd2Sjpk  * Share exported directories specified in dfstab for zone
269945916cd2Sjpk  */
270045916cd2Sjpk static int
270145916cd2Sjpk tsol_mounts(zlog_t *zlogp, char *zone_name, char *rootpath)
270245916cd2Sjpk {
270345916cd2Sjpk 	zoneid_t *zids = NULL;
270445916cd2Sjpk 	priv_set_t *zid_privs;
270545916cd2Sjpk 	const priv_impl_info_t *ip = NULL;
270645916cd2Sjpk 	uint_t nzents_saved;
270745916cd2Sjpk 	uint_t nzents;
270845916cd2Sjpk 	int i;
270945916cd2Sjpk 	char readonly[] = "ro";
271045916cd2Sjpk 	struct zone_fstab lower_fstab;
271145916cd2Sjpk 	char *argv[4];
271245916cd2Sjpk 
271345916cd2Sjpk 	if (!is_system_labeled())
271445916cd2Sjpk 		return (0);
271545916cd2Sjpk 
271645916cd2Sjpk 	if (zid_label == NULL) {
271745916cd2Sjpk 		zid_label = m_label_alloc(MAC_LABEL);
271845916cd2Sjpk 		if (zid_label == NULL)
271945916cd2Sjpk 			return (-1);
272045916cd2Sjpk 	}
272145916cd2Sjpk 
272245916cd2Sjpk 	/* Make sure our zone has an /export/home dir */
272345916cd2Sjpk 	(void) make_one_dir(zlogp, rootpath, "/export/home",
272445916cd2Sjpk 	    DEFAULT_DIR_MODE);
272545916cd2Sjpk 
272645916cd2Sjpk 	lower_fstab.zone_fs_raw[0] = '\0';
272745916cd2Sjpk 	(void) strlcpy(lower_fstab.zone_fs_type, MNTTYPE_LOFS,
272845916cd2Sjpk 	    sizeof (lower_fstab.zone_fs_type));
272945916cd2Sjpk 	lower_fstab.zone_fs_options = NULL;
273045916cd2Sjpk 	(void) zonecfg_add_fs_option(&lower_fstab, readonly);
273145916cd2Sjpk 
273245916cd2Sjpk 	/*
273345916cd2Sjpk 	 * Get the list of zones from the kernel
273445916cd2Sjpk 	 */
273545916cd2Sjpk 	if (zone_list(NULL, &nzents) != 0) {
273645916cd2Sjpk 		zerror(zlogp, B_TRUE, "unable to list zones");
273745916cd2Sjpk 		zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
273845916cd2Sjpk 		return (-1);
273945916cd2Sjpk 	}
274045916cd2Sjpk again:
274145916cd2Sjpk 	if (nzents == 0) {
274245916cd2Sjpk 		zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
274345916cd2Sjpk 		return (-1);
274445916cd2Sjpk 	}
274545916cd2Sjpk 
274645916cd2Sjpk 	zids = malloc(nzents * sizeof (zoneid_t));
274745916cd2Sjpk 	if (zids == NULL) {
27483f2f09c1Sdp 		zerror(zlogp, B_TRUE, "memory allocation failed");
274945916cd2Sjpk 		return (-1);
275045916cd2Sjpk 	}
275145916cd2Sjpk 	nzents_saved = nzents;
275245916cd2Sjpk 
275345916cd2Sjpk 	if (zone_list(zids, &nzents) != 0) {
275445916cd2Sjpk 		zerror(zlogp, B_TRUE, "unable to list zones");
275545916cd2Sjpk 		zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
275645916cd2Sjpk 		free(zids);
275745916cd2Sjpk 		return (-1);
275845916cd2Sjpk 	}
275945916cd2Sjpk 	if (nzents != nzents_saved) {
276045916cd2Sjpk 		/* list changed, try again */
276145916cd2Sjpk 		free(zids);
276245916cd2Sjpk 		goto again;
276345916cd2Sjpk 	}
276445916cd2Sjpk 
276545916cd2Sjpk 	ip = getprivimplinfo();
276645916cd2Sjpk 	if ((zid_privs = priv_allocset()) == NULL) {
276745916cd2Sjpk 		zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
276845916cd2Sjpk 		zonecfg_free_fs_option_list(
276945916cd2Sjpk 		    lower_fstab.zone_fs_options);
277045916cd2Sjpk 		free(zids);
277145916cd2Sjpk 		return (-1);
277245916cd2Sjpk 	}
277345916cd2Sjpk 
277445916cd2Sjpk 	for (i = 0; i < nzents; i++) {
277545916cd2Sjpk 		char zid_name[ZONENAME_MAX];
277645916cd2Sjpk 		zone_state_t zid_state;
277745916cd2Sjpk 		char zid_rpath[MAXPATHLEN];
277845916cd2Sjpk 		struct stat stat_buf;
277945916cd2Sjpk 
278045916cd2Sjpk 		if (zids[i] == GLOBAL_ZONEID)
278145916cd2Sjpk 			continue;
278245916cd2Sjpk 
278345916cd2Sjpk 		if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1)
278445916cd2Sjpk 			continue;
278545916cd2Sjpk 
278645916cd2Sjpk 		/*
278745916cd2Sjpk 		 * Do special setup for the zone we are booting
278845916cd2Sjpk 		 */
278945916cd2Sjpk 		if (strcmp(zid_name, zone_name) == 0) {
279045916cd2Sjpk 			struct zone_fstab autofs_fstab;
279145916cd2Sjpk 			char map_path[MAXPATHLEN];
279245916cd2Sjpk 			int fd;
279345916cd2Sjpk 
279445916cd2Sjpk 			/*
279545916cd2Sjpk 			 * Create auto_home_<zone> map for this zone
279645916cd2Sjpk 			 * in the global zone. The local zone entry
279745916cd2Sjpk 			 * will be created by automount when the zone
279845916cd2Sjpk 			 * is booted.
279945916cd2Sjpk 			 */
280045916cd2Sjpk 
280145916cd2Sjpk 			(void) snprintf(autofs_fstab.zone_fs_special,
280245916cd2Sjpk 			    MAXPATHLEN, "auto_home_%s", zid_name);
280345916cd2Sjpk 
280445916cd2Sjpk 			(void) snprintf(autofs_fstab.zone_fs_dir, MAXPATHLEN,
280545916cd2Sjpk 			    "/zone/%s/home", zid_name);
280645916cd2Sjpk 
280745916cd2Sjpk 			(void) snprintf(map_path, sizeof (map_path),
280845916cd2Sjpk 			    "/etc/%s", autofs_fstab.zone_fs_special);
280945916cd2Sjpk 			/*
281045916cd2Sjpk 			 * If the map file doesn't exist create a template
281145916cd2Sjpk 			 */
281245916cd2Sjpk 			if ((fd = open(map_path, O_RDWR | O_CREAT | O_EXCL,
281345916cd2Sjpk 			    S_IRUSR | S_IWUSR | S_IRGRP| S_IROTH)) != -1) {
281445916cd2Sjpk 				int len;
281545916cd2Sjpk 				char map_rec[MAXPATHLEN];
281645916cd2Sjpk 
281745916cd2Sjpk 				len = snprintf(map_rec, sizeof (map_rec),
281845916cd2Sjpk 				    "+%s\n*\t-fstype=lofs\t:%s/export/home/&\n",
281945916cd2Sjpk 				    autofs_fstab.zone_fs_special, rootpath);
282045916cd2Sjpk 				(void) write(fd, map_rec, len);
282145916cd2Sjpk 				(void) close(fd);
282245916cd2Sjpk 			}
282345916cd2Sjpk 
282445916cd2Sjpk 			/*
282545916cd2Sjpk 			 * Mount auto_home_<zone> in the global zone if absent.
282645916cd2Sjpk 			 * If it's already of type autofs, then
282745916cd2Sjpk 			 * don't mount it again.
282845916cd2Sjpk 			 */
282945916cd2Sjpk 			if ((stat(autofs_fstab.zone_fs_dir, &stat_buf) == -1) ||
283045916cd2Sjpk 			    strcmp(stat_buf.st_fstype, MNTTYPE_AUTOFS) != 0) {
283145916cd2Sjpk 				char optstr[] = "indirect,ignore,nobrowse";
283245916cd2Sjpk 
283345916cd2Sjpk 				(void) make_one_dir(zlogp, "",
283445916cd2Sjpk 				    autofs_fstab.zone_fs_dir, DEFAULT_DIR_MODE);
283545916cd2Sjpk 
283645916cd2Sjpk 				/*
283745916cd2Sjpk 				 * Mount will fail if automounter has already
283845916cd2Sjpk 				 * processed the auto_home_<zonename> map
283945916cd2Sjpk 				 */
284045916cd2Sjpk 				(void) domount(zlogp, MNTTYPE_AUTOFS, optstr,
284145916cd2Sjpk 				    autofs_fstab.zone_fs_special,
284245916cd2Sjpk 				    autofs_fstab.zone_fs_dir);
284345916cd2Sjpk 			}
284445916cd2Sjpk 			continue;
284545916cd2Sjpk 		}
284645916cd2Sjpk 
284745916cd2Sjpk 
284845916cd2Sjpk 		if (zone_get_state(zid_name, &zid_state) != Z_OK ||
284948451833Scarlsonj 		    (zid_state != ZONE_STATE_READY &&
285048451833Scarlsonj 		    zid_state != ZONE_STATE_RUNNING))
285145916cd2Sjpk 			/* Skip over zones without mounted filesystems */
285245916cd2Sjpk 			continue;
285345916cd2Sjpk 
285445916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label,
285545916cd2Sjpk 		    sizeof (m_label_t)) < 0)
285645916cd2Sjpk 			/* Skip over zones with unspecified label */
285745916cd2Sjpk 			continue;
285845916cd2Sjpk 
285945916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath,
286045916cd2Sjpk 		    sizeof (zid_rpath)) == -1)
286145916cd2Sjpk 			/* Skip over zones with bad path */
286245916cd2Sjpk 			continue;
286345916cd2Sjpk 
286445916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_PRIVSET, zid_privs,
286545916cd2Sjpk 		    sizeof (priv_chunk_t) * ip->priv_setsize) == -1)
286645916cd2Sjpk 			/* Skip over zones with bad privs */
286745916cd2Sjpk 			continue;
286845916cd2Sjpk 
286945916cd2Sjpk 		/*
287045916cd2Sjpk 		 * Reading down is valid according to our label model
287145916cd2Sjpk 		 * but some customers want to disable it because it
287245916cd2Sjpk 		 * allows execute down and other possible attacks.
287345916cd2Sjpk 		 * Therefore, we restrict this feature to zones that
287445916cd2Sjpk 		 * have the NET_MAC_AWARE privilege which is required
287545916cd2Sjpk 		 * for NFS read-down semantics.
287645916cd2Sjpk 		 */
287745916cd2Sjpk 		if ((bldominates(zlabel, zid_label)) &&
287845916cd2Sjpk 		    (priv_ismember(zprivs, PRIV_NET_MAC_AWARE))) {
287945916cd2Sjpk 			/*
288045916cd2Sjpk 			 * Our zone dominates this one.
288145916cd2Sjpk 			 * Create a lofs mount from lower zone's /export/home
288245916cd2Sjpk 			 */
288345916cd2Sjpk 			(void) snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN,
288445916cd2Sjpk 			    "%s/zone/%s/export/home", rootpath, zid_name);
288545916cd2Sjpk 
288645916cd2Sjpk 			/*
288745916cd2Sjpk 			 * If the target is already an LOFS mount
288845916cd2Sjpk 			 * then don't do it again.
288945916cd2Sjpk 			 */
289045916cd2Sjpk 			if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) ||
289145916cd2Sjpk 			    strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) {
289245916cd2Sjpk 
289345916cd2Sjpk 				if (snprintf(lower_fstab.zone_fs_special,
289445916cd2Sjpk 				    MAXPATHLEN, "%s/export",
289545916cd2Sjpk 				    zid_rpath) > MAXPATHLEN)
289645916cd2Sjpk 					continue;
289745916cd2Sjpk 
289845916cd2Sjpk 				/*
289945916cd2Sjpk 				 * Make sure the lower-level home exists
290045916cd2Sjpk 				 */
290145916cd2Sjpk 				if (make_one_dir(zlogp,
290245916cd2Sjpk 				    lower_fstab.zone_fs_special,
290345916cd2Sjpk 				    "/home", DEFAULT_DIR_MODE) != 0)
290445916cd2Sjpk 					continue;
290545916cd2Sjpk 
290645916cd2Sjpk 				(void) strlcat(lower_fstab.zone_fs_special,
290745916cd2Sjpk 				    "/home", MAXPATHLEN);
290845916cd2Sjpk 
290945916cd2Sjpk 				/*
291045916cd2Sjpk 				 * Mount can fail because the lower-level
291145916cd2Sjpk 				 * zone may have already done a mount up.
291245916cd2Sjpk 				 */
291345916cd2Sjpk 				(void) mount_one(zlogp, &lower_fstab, "");
291445916cd2Sjpk 			}
291545916cd2Sjpk 		} else if ((bldominates(zid_label, zlabel)) &&
291645916cd2Sjpk 		    (priv_ismember(zid_privs, PRIV_NET_MAC_AWARE))) {
291745916cd2Sjpk 			/*
291845916cd2Sjpk 			 * This zone dominates our zone.
291945916cd2Sjpk 			 * Create a lofs mount from our zone's /export/home
292045916cd2Sjpk 			 */
292145916cd2Sjpk 			if (snprintf(lower_fstab.zone_fs_dir, MAXPATHLEN,
292245916cd2Sjpk 			    "%s/zone/%s/export/home", zid_rpath,
292345916cd2Sjpk 			    zone_name) > MAXPATHLEN)
292445916cd2Sjpk 				continue;
292545916cd2Sjpk 
292645916cd2Sjpk 			/*
292745916cd2Sjpk 			 * If the target is already an LOFS mount
292845916cd2Sjpk 			 * then don't do it again.
292945916cd2Sjpk 			 */
293045916cd2Sjpk 			if ((stat(lower_fstab.zone_fs_dir, &stat_buf) == -1) ||
293145916cd2Sjpk 			    strcmp(stat_buf.st_fstype, MNTTYPE_LOFS) != 0) {
293245916cd2Sjpk 
293345916cd2Sjpk 				(void) snprintf(lower_fstab.zone_fs_special,
293445916cd2Sjpk 				    MAXPATHLEN, "%s/export/home", rootpath);
293545916cd2Sjpk 
293645916cd2Sjpk 				/*
293745916cd2Sjpk 				 * Mount can fail because the higher-level
293845916cd2Sjpk 				 * zone may have already done a mount down.
293945916cd2Sjpk 				 */
294045916cd2Sjpk 				(void) mount_one(zlogp, &lower_fstab, "");
294145916cd2Sjpk 			}
294245916cd2Sjpk 		}
294345916cd2Sjpk 	}
294445916cd2Sjpk 	zonecfg_free_fs_option_list(lower_fstab.zone_fs_options);
294545916cd2Sjpk 	priv_freeset(zid_privs);
294645916cd2Sjpk 	free(zids);
294745916cd2Sjpk 
294845916cd2Sjpk 	/*
294945916cd2Sjpk 	 * Now share any exported directories from this zone.
295045916cd2Sjpk 	 * Each zone can have its own dfstab.
295145916cd2Sjpk 	 */
295245916cd2Sjpk 
295345916cd2Sjpk 	argv[0] = "zoneshare";
295445916cd2Sjpk 	argv[1] = "-z";
295545916cd2Sjpk 	argv[2] = zone_name;
295645916cd2Sjpk 	argv[3] = NULL;
295745916cd2Sjpk 
295845916cd2Sjpk 	(void) forkexec(zlogp, "/usr/lib/zones/zoneshare", argv);
295945916cd2Sjpk 	/* Don't check for errors since they don't affect the zone */
296045916cd2Sjpk 
296145916cd2Sjpk 	return (0);
296245916cd2Sjpk }
296345916cd2Sjpk 
296445916cd2Sjpk /*
296545916cd2Sjpk  * Unmount lofs mounts from higher level zones
296645916cd2Sjpk  * Unshare nfs exported directories
296745916cd2Sjpk  */
296845916cd2Sjpk static void
296945916cd2Sjpk tsol_unmounts(zlog_t *zlogp, char *zone_name)
297045916cd2Sjpk {
297145916cd2Sjpk 	zoneid_t *zids = NULL;
297245916cd2Sjpk 	uint_t nzents_saved;
297345916cd2Sjpk 	uint_t nzents;
297445916cd2Sjpk 	int i;
297545916cd2Sjpk 	char *argv[4];
297645916cd2Sjpk 	char path[MAXPATHLEN];
297745916cd2Sjpk 
297845916cd2Sjpk 	if (!is_system_labeled())
297945916cd2Sjpk 		return;
298045916cd2Sjpk 
298145916cd2Sjpk 	/*
298245916cd2Sjpk 	 * Get the list of zones from the kernel
298345916cd2Sjpk 	 */
298445916cd2Sjpk 	if (zone_list(NULL, &nzents) != 0) {
298545916cd2Sjpk 		return;
298645916cd2Sjpk 	}
298745916cd2Sjpk 
298845916cd2Sjpk 	if (zid_label == NULL) {
298945916cd2Sjpk 		zid_label = m_label_alloc(MAC_LABEL);
299045916cd2Sjpk 		if (zid_label == NULL)
299145916cd2Sjpk 			return;
299245916cd2Sjpk 	}
299345916cd2Sjpk 
299445916cd2Sjpk again:
299545916cd2Sjpk 	if (nzents == 0)
299645916cd2Sjpk 		return;
299745916cd2Sjpk 
299845916cd2Sjpk 	zids = malloc(nzents * sizeof (zoneid_t));
299945916cd2Sjpk 	if (zids == NULL) {
30003f2f09c1Sdp 		zerror(zlogp, B_TRUE, "memory allocation failed");
300145916cd2Sjpk 		return;
300245916cd2Sjpk 	}
300345916cd2Sjpk 	nzents_saved = nzents;
300445916cd2Sjpk 
300545916cd2Sjpk 	if (zone_list(zids, &nzents) != 0) {
300645916cd2Sjpk 		free(zids);
300745916cd2Sjpk 		return;
300845916cd2Sjpk 	}
300945916cd2Sjpk 	if (nzents != nzents_saved) {
301045916cd2Sjpk 		/* list changed, try again */
301145916cd2Sjpk 		free(zids);
301245916cd2Sjpk 		goto again;
301345916cd2Sjpk 	}
301445916cd2Sjpk 
301545916cd2Sjpk 	for (i = 0; i < nzents; i++) {
301645916cd2Sjpk 		char zid_name[ZONENAME_MAX];
301745916cd2Sjpk 		zone_state_t zid_state;
301845916cd2Sjpk 		char zid_rpath[MAXPATHLEN];
301945916cd2Sjpk 
302045916cd2Sjpk 		if (zids[i] == GLOBAL_ZONEID)
302145916cd2Sjpk 			continue;
302245916cd2Sjpk 
302345916cd2Sjpk 		if (getzonenamebyid(zids[i], zid_name, ZONENAME_MAX) == -1)
302445916cd2Sjpk 			continue;
302545916cd2Sjpk 
302645916cd2Sjpk 		/*
302745916cd2Sjpk 		 * Skip the zone we are halting
302845916cd2Sjpk 		 */
302945916cd2Sjpk 		if (strcmp(zid_name, zone_name) == 0)
303045916cd2Sjpk 			continue;
303145916cd2Sjpk 
303245916cd2Sjpk 		if ((zone_getattr(zids[i], ZONE_ATTR_STATUS, &zid_state,
303345916cd2Sjpk 		    sizeof (zid_state)) < 0) ||
303445916cd2Sjpk 		    (zid_state < ZONE_IS_READY))
303545916cd2Sjpk 			/* Skip over zones without mounted filesystems */
303645916cd2Sjpk 			continue;
303745916cd2Sjpk 
303845916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_SLBL, zid_label,
303945916cd2Sjpk 		    sizeof (m_label_t)) < 0)
304045916cd2Sjpk 			/* Skip over zones with unspecified label */
304145916cd2Sjpk 			continue;
304245916cd2Sjpk 
304345916cd2Sjpk 		if (zone_getattr(zids[i], ZONE_ATTR_ROOT, zid_rpath,
304445916cd2Sjpk 		    sizeof (zid_rpath)) == -1)
304545916cd2Sjpk 			/* Skip over zones with bad path */
304645916cd2Sjpk 			continue;
304745916cd2Sjpk 
304845916cd2Sjpk 		if (zlabel != NULL && bldominates(zid_label, zlabel)) {
304945916cd2Sjpk 			/*
305045916cd2Sjpk 			 * This zone dominates our zone.
305145916cd2Sjpk 			 * Unmount the lofs mount of our zone's /export/home
305245916cd2Sjpk 			 */
305345916cd2Sjpk 
305445916cd2Sjpk 			if (snprintf(path, MAXPATHLEN,
305545916cd2Sjpk 			    "%s/zone/%s/export/home", zid_rpath,
305645916cd2Sjpk 			    zone_name) > MAXPATHLEN)
305745916cd2Sjpk 				continue;
305845916cd2Sjpk 
305945916cd2Sjpk 			/* Skip over mount failures */
306045916cd2Sjpk 			(void) umount(path);
306145916cd2Sjpk 		}
306245916cd2Sjpk 	}
306345916cd2Sjpk 	free(zids);
306445916cd2Sjpk 
306545916cd2Sjpk 	/*
306645916cd2Sjpk 	 * Unmount global zone autofs trigger for this zone
306745916cd2Sjpk 	 */
306845916cd2Sjpk 	(void) snprintf(path, MAXPATHLEN, "/zone/%s/home", zone_name);
306945916cd2Sjpk 	/* Skip over mount failures */
307045916cd2Sjpk 	(void) umount(path);
307145916cd2Sjpk 
307245916cd2Sjpk 	/*
307345916cd2Sjpk 	 * Next unshare any exported directories from this zone.
307445916cd2Sjpk 	 */
307545916cd2Sjpk 
307645916cd2Sjpk 	argv[0] = "zoneunshare";
307745916cd2Sjpk 	argv[1] = "-z";
307845916cd2Sjpk 	argv[2] = zone_name;
307945916cd2Sjpk 	argv[3] = NULL;
308045916cd2Sjpk 
308145916cd2Sjpk 	(void) forkexec(zlogp, "/usr/lib/zones/zoneunshare", argv);
308245916cd2Sjpk 	/* Don't check for errors since they don't affect the zone */
308345916cd2Sjpk 
308445916cd2Sjpk 	/*
308545916cd2Sjpk 	 * Finally, deallocate any devices in the zone.
308645916cd2Sjpk 	 */
308745916cd2Sjpk 
308845916cd2Sjpk 	argv[0] = "deallocate";
308945916cd2Sjpk 	argv[1] = "-Isz";
309045916cd2Sjpk 	argv[2] = zone_name;
309145916cd2Sjpk 	argv[3] = NULL;
309245916cd2Sjpk 
309345916cd2Sjpk 	(void) forkexec(zlogp, "/usr/sbin/deallocate", argv);
309445916cd2Sjpk 	/* Don't check for errors since they don't affect the zone */
309545916cd2Sjpk }
309645916cd2Sjpk 
309745916cd2Sjpk /*
309845916cd2Sjpk  * Fetch the Trusted Extensions label and multi-level ports (MLPs) for
309945916cd2Sjpk  * this zone.
310045916cd2Sjpk  */
310145916cd2Sjpk static tsol_zcent_t *
310245916cd2Sjpk get_zone_label(zlog_t *zlogp, priv_set_t *privs)
310345916cd2Sjpk {
310445916cd2Sjpk 	FILE *fp;
310545916cd2Sjpk 	tsol_zcent_t *zcent = NULL;
310645916cd2Sjpk 	char line[MAXTNZLEN];
310745916cd2Sjpk 
310845916cd2Sjpk 	if ((fp = fopen(TNZONECFG_PATH, "r")) == NULL) {
310945916cd2Sjpk 		zerror(zlogp, B_TRUE, "%s", TNZONECFG_PATH);
311045916cd2Sjpk 		return (NULL);
311145916cd2Sjpk 	}
311245916cd2Sjpk 
311345916cd2Sjpk 	while (fgets(line, sizeof (line), fp) != NULL) {
311445916cd2Sjpk 		/*
311545916cd2Sjpk 		 * Check for malformed database
311645916cd2Sjpk 		 */
311745916cd2Sjpk 		if (strlen(line) == MAXTNZLEN - 1)
311845916cd2Sjpk 			break;
311945916cd2Sjpk 		if ((zcent = tsol_sgetzcent(line, NULL, NULL)) == NULL)
312045916cd2Sjpk 			continue;
312145916cd2Sjpk 		if (strcmp(zcent->zc_name, zone_name) == 0)
312245916cd2Sjpk 			break;
312345916cd2Sjpk 		tsol_freezcent(zcent);
312445916cd2Sjpk 		zcent = NULL;
312545916cd2Sjpk 	}
312645916cd2Sjpk 	(void) fclose(fp);
312745916cd2Sjpk 
312845916cd2Sjpk 	if (zcent == NULL) {
312945916cd2Sjpk 		zerror(zlogp, B_FALSE, "zone requires a label assignment. "
313045916cd2Sjpk 		    "See tnzonecfg(4)");
313145916cd2Sjpk 	} else {
313245916cd2Sjpk 		if (zlabel == NULL)
313345916cd2Sjpk 			zlabel = m_label_alloc(MAC_LABEL);
313445916cd2Sjpk 		/*
313545916cd2Sjpk 		 * Save this zone's privileges for later read-down processing
313645916cd2Sjpk 		 */
313745916cd2Sjpk 		if ((zprivs = priv_allocset()) == NULL) {
313845916cd2Sjpk 			zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
313945916cd2Sjpk 			return (NULL);
314045916cd2Sjpk 		} else {
314145916cd2Sjpk 			priv_copyset(privs, zprivs);
314245916cd2Sjpk 		}
314345916cd2Sjpk 	}
314445916cd2Sjpk 	return (zcent);
314545916cd2Sjpk }
314645916cd2Sjpk 
314745916cd2Sjpk /*
314845916cd2Sjpk  * Add the Trusted Extensions multi-level ports for this zone.
314945916cd2Sjpk  */
315045916cd2Sjpk static void
315145916cd2Sjpk set_mlps(zlog_t *zlogp, zoneid_t zoneid, tsol_zcent_t *zcent)
315245916cd2Sjpk {
315345916cd2Sjpk 	tsol_mlp_t *mlp;
315445916cd2Sjpk 	tsol_mlpent_t tsme;
315545916cd2Sjpk 
315645916cd2Sjpk 	if (!is_system_labeled())
315745916cd2Sjpk 		return;
315845916cd2Sjpk 
315945916cd2Sjpk 	tsme.tsme_zoneid = zoneid;
316045916cd2Sjpk 	tsme.tsme_flags = 0;
316145916cd2Sjpk 	for (mlp = zcent->zc_private_mlp; !TSOL_MLP_END(mlp); mlp++) {
316245916cd2Sjpk 		tsme.tsme_mlp = *mlp;
316345916cd2Sjpk 		if (tnmlp(TNDB_LOAD, &tsme) != 0) {
316445916cd2Sjpk 			zerror(zlogp, B_TRUE, "cannot set zone-specific MLP "
316545916cd2Sjpk 			    "on %d-%d/%d", mlp->mlp_port,
316645916cd2Sjpk 			    mlp->mlp_port_upper, mlp->mlp_ipp);
316745916cd2Sjpk 		}
316845916cd2Sjpk 	}
316945916cd2Sjpk 
317045916cd2Sjpk 	tsme.tsme_flags = TSOL_MEF_SHARED;
317145916cd2Sjpk 	for (mlp = zcent->zc_shared_mlp; !TSOL_MLP_END(mlp); mlp++) {
317245916cd2Sjpk 		tsme.tsme_mlp = *mlp;
317345916cd2Sjpk 		if (tnmlp(TNDB_LOAD, &tsme) != 0) {
317445916cd2Sjpk 			zerror(zlogp, B_TRUE, "cannot set shared MLP "
317545916cd2Sjpk 			    "on %d-%d/%d", mlp->mlp_port,
317645916cd2Sjpk 			    mlp->mlp_port_upper, mlp->mlp_ipp);
317745916cd2Sjpk 		}
317845916cd2Sjpk 	}
317945916cd2Sjpk }
318045916cd2Sjpk 
318145916cd2Sjpk static void
318245916cd2Sjpk remove_mlps(zlog_t *zlogp, zoneid_t zoneid)
318345916cd2Sjpk {
318445916cd2Sjpk 	tsol_mlpent_t tsme;
318545916cd2Sjpk 
318645916cd2Sjpk 	if (!is_system_labeled())
318745916cd2Sjpk 		return;
318845916cd2Sjpk 
318945916cd2Sjpk 	(void) memset(&tsme, 0, sizeof (tsme));
319045916cd2Sjpk 	tsme.tsme_zoneid = zoneid;
319145916cd2Sjpk 	if (tnmlp(TNDB_FLUSH, &tsme) != 0)
319245916cd2Sjpk 		zerror(zlogp, B_TRUE, "cannot flush MLPs");
319345916cd2Sjpk }
319445916cd2Sjpk 
31957c478bd9Sstevel@tonic-gate int
31967c478bd9Sstevel@tonic-gate prtmount(const char *fs, void *x) {
31977c478bd9Sstevel@tonic-gate 	zerror((zlog_t *)x, B_FALSE, "  %s", fs);
31987c478bd9Sstevel@tonic-gate 	return (0);
31997c478bd9Sstevel@tonic-gate }
32007c478bd9Sstevel@tonic-gate 
3201108322fbScarlsonj /*
3202108322fbScarlsonj  * Look for zones running on the main system that are using this root (or any
3203108322fbScarlsonj  * subdirectory of it).  Return B_TRUE and print an error if a conflicting zone
3204108322fbScarlsonj  * is found or if we can't tell.
3205108322fbScarlsonj  */
3206108322fbScarlsonj static boolean_t
3207108322fbScarlsonj duplicate_zone_root(zlog_t *zlogp, const char *rootpath)
32087c478bd9Sstevel@tonic-gate {
3209108322fbScarlsonj 	zoneid_t *zids = NULL;
3210108322fbScarlsonj 	uint_t nzids = 0;
3211108322fbScarlsonj 	boolean_t retv;
3212108322fbScarlsonj 	int rlen, zlen;
3213108322fbScarlsonj 	char zroot[MAXPATHLEN];
3214108322fbScarlsonj 	char zonename[ZONENAME_MAX];
3215108322fbScarlsonj 
3216108322fbScarlsonj 	for (;;) {
3217108322fbScarlsonj 		nzids += 10;
3218108322fbScarlsonj 		zids = malloc(nzids * sizeof (*zids));
3219108322fbScarlsonj 		if (zids == NULL) {
32203f2f09c1Sdp 			zerror(zlogp, B_TRUE, "memory allocation failed");
3221108322fbScarlsonj 			return (B_TRUE);
3222108322fbScarlsonj 		}
3223108322fbScarlsonj 		if (zone_list(zids, &nzids) == 0)
3224108322fbScarlsonj 			break;
3225108322fbScarlsonj 		free(zids);
3226108322fbScarlsonj 	}
3227108322fbScarlsonj 	retv = B_FALSE;
3228108322fbScarlsonj 	rlen = strlen(rootpath);
3229108322fbScarlsonj 	while (nzids > 0) {
3230108322fbScarlsonj 		/*
3231108322fbScarlsonj 		 * Ignore errors; they just mean that the zone has disappeared
3232108322fbScarlsonj 		 * while we were busy.
3233108322fbScarlsonj 		 */
3234108322fbScarlsonj 		if (zone_getattr(zids[--nzids], ZONE_ATTR_ROOT, zroot,
3235108322fbScarlsonj 		    sizeof (zroot)) == -1)
3236108322fbScarlsonj 			continue;
3237108322fbScarlsonj 		zlen = strlen(zroot);
3238108322fbScarlsonj 		if (zlen > rlen)
3239108322fbScarlsonj 			zlen = rlen;
3240108322fbScarlsonj 		if (strncmp(rootpath, zroot, zlen) == 0 &&
3241108322fbScarlsonj 		    (zroot[zlen] == '\0' || zroot[zlen] == '/') &&
3242108322fbScarlsonj 		    (rootpath[zlen] == '\0' || rootpath[zlen] == '/')) {
3243108322fbScarlsonj 			if (getzonenamebyid(zids[nzids], zonename,
3244108322fbScarlsonj 			    sizeof (zonename)) == -1)
3245108322fbScarlsonj 				(void) snprintf(zonename, sizeof (zonename),
3246108322fbScarlsonj 				    "id %d", (int)zids[nzids]);
3247108322fbScarlsonj 			zerror(zlogp, B_FALSE,
3248108322fbScarlsonj 			    "zone root %s already in use by zone %s",
3249108322fbScarlsonj 			    rootpath, zonename);
3250108322fbScarlsonj 			retv = B_TRUE;
3251108322fbScarlsonj 			break;
3252108322fbScarlsonj 		}
3253108322fbScarlsonj 	}
3254108322fbScarlsonj 	free(zids);
3255108322fbScarlsonj 	return (retv);
3256108322fbScarlsonj }
3257108322fbScarlsonj 
3258108322fbScarlsonj /*
3259108322fbScarlsonj  * Search for loopback mounts that use this same source node (same device and
3260108322fbScarlsonj  * inode).  Return B_TRUE if there is one or if we can't tell.
3261108322fbScarlsonj  */
3262108322fbScarlsonj static boolean_t
3263108322fbScarlsonj duplicate_reachable_path(zlog_t *zlogp, const char *rootpath)
3264108322fbScarlsonj {
3265108322fbScarlsonj 	struct stat64 rst, zst;
3266108322fbScarlsonj 	struct mnttab *mnp;
3267108322fbScarlsonj 
3268108322fbScarlsonj 	if (stat64(rootpath, &rst) == -1) {
3269108322fbScarlsonj 		zerror(zlogp, B_TRUE, "can't stat %s", rootpath);
3270108322fbScarlsonj 		return (B_TRUE);
3271108322fbScarlsonj 	}
3272108322fbScarlsonj 	if (resolve_lofs_mnts == NULL && lofs_read_mnttab(zlogp) == -1)
3273108322fbScarlsonj 		return (B_TRUE);
3274108322fbScarlsonj 	for (mnp = resolve_lofs_mnts; mnp < resolve_lofs_mnt_max; mnp++) {
3275108322fbScarlsonj 		if (mnp->mnt_fstype == NULL ||
3276108322fbScarlsonj 		    strcmp(MNTTYPE_LOFS, mnp->mnt_fstype) != 0)
3277108322fbScarlsonj 			continue;
3278108322fbScarlsonj 		/* We're looking at a loopback mount.  Stat it. */
3279108322fbScarlsonj 		if (mnp->mnt_special != NULL &&
3280108322fbScarlsonj 		    stat64(mnp->mnt_special, &zst) != -1 &&
3281108322fbScarlsonj 		    rst.st_dev == zst.st_dev && rst.st_ino == zst.st_ino) {
3282108322fbScarlsonj 			zerror(zlogp, B_FALSE,
3283108322fbScarlsonj 			    "zone root %s is reachable through %s",
3284108322fbScarlsonj 			    rootpath, mnp->mnt_mountp);
3285108322fbScarlsonj 			return (B_TRUE);
3286108322fbScarlsonj 		}
3287108322fbScarlsonj 	}
3288108322fbScarlsonj 	return (B_FALSE);
3289108322fbScarlsonj }
3290108322fbScarlsonj 
3291108322fbScarlsonj zoneid_t
3292108322fbScarlsonj vplat_create(zlog_t *zlogp, boolean_t mount_cmd)
3293108322fbScarlsonj {
3294108322fbScarlsonj 	zoneid_t rval = -1;
32957c478bd9Sstevel@tonic-gate 	priv_set_t *privs;
32967c478bd9Sstevel@tonic-gate 	char rootpath[MAXPATHLEN];
32977c478bd9Sstevel@tonic-gate 	char *rctlbuf = NULL;
3298108322fbScarlsonj 	size_t rctlbufsz = 0;
3299fa9e4066Sahrens 	char *zfsbuf = NULL;
3300fa9e4066Sahrens 	size_t zfsbufsz = 0;
3301108322fbScarlsonj 	zoneid_t zoneid = -1;
33027c478bd9Sstevel@tonic-gate 	int xerr;
3303108322fbScarlsonj 	char *kzone;
3304108322fbScarlsonj 	FILE *fp = NULL;
330545916cd2Sjpk 	tsol_zcent_t *zcent = NULL;
330645916cd2Sjpk 	int match = 0;
330745916cd2Sjpk 	int doi = 0;
33087c478bd9Sstevel@tonic-gate 
33097c478bd9Sstevel@tonic-gate 	if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
33107c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to determine zone root");
33117c478bd9Sstevel@tonic-gate 		return (-1);
33127c478bd9Sstevel@tonic-gate 	}
3313108322fbScarlsonj 	if (zonecfg_in_alt_root())
3314108322fbScarlsonj 		resolve_lofs(zlogp, rootpath, sizeof (rootpath));
33157c478bd9Sstevel@tonic-gate 
33167c478bd9Sstevel@tonic-gate 	if ((privs = priv_allocset()) == NULL) {
33177c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
33187c478bd9Sstevel@tonic-gate 		return (-1);
33197c478bd9Sstevel@tonic-gate 	}
33207c478bd9Sstevel@tonic-gate 	priv_emptyset(privs);
3321ffbafc53Scomay 	if (get_privset(zlogp, privs, mount_cmd) != 0)
33227c478bd9Sstevel@tonic-gate 		goto error;
3323ffbafc53Scomay 
3324108322fbScarlsonj 	if (!mount_cmd && get_rctls(zlogp, &rctlbuf, &rctlbufsz) != 0) {
33257c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "Unable to get list of rctls");
33267c478bd9Sstevel@tonic-gate 		goto error;
33277c478bd9Sstevel@tonic-gate 	}
3328ffbafc53Scomay 
3329fa9e4066Sahrens 	if (get_datasets(zlogp, &zfsbuf, &zfsbufsz) != 0) {
3330fa9e4066Sahrens 		zerror(zlogp, B_FALSE, "Unable to get list of ZFS datasets");
3331fa9e4066Sahrens 		goto error;
3332fa9e4066Sahrens 	}
33337c478bd9Sstevel@tonic-gate 
333448451833Scarlsonj 	if (!mount_cmd && is_system_labeled()) {
333545916cd2Sjpk 		zcent = get_zone_label(zlogp, privs);
333648451833Scarlsonj 		if (zcent != NULL) {
333745916cd2Sjpk 			match = zcent->zc_match;
333845916cd2Sjpk 			doi = zcent->zc_doi;
333945916cd2Sjpk 			*zlabel = zcent->zc_label;
334045916cd2Sjpk 		} else {
334145916cd2Sjpk 			goto error;
334245916cd2Sjpk 		}
334345916cd2Sjpk 	}
334445916cd2Sjpk 
3345108322fbScarlsonj 	kzone = zone_name;
3346108322fbScarlsonj 
3347108322fbScarlsonj 	/*
3348108322fbScarlsonj 	 * We must do this scan twice.  First, we look for zones running on the
3349108322fbScarlsonj 	 * main system that are using this root (or any subdirectory of it).
3350108322fbScarlsonj 	 * Next, we reduce to the shortest path and search for loopback mounts
3351108322fbScarlsonj 	 * that use this same source node (same device and inode).
3352108322fbScarlsonj 	 */
3353108322fbScarlsonj 	if (duplicate_zone_root(zlogp, rootpath))
3354108322fbScarlsonj 		goto error;
3355108322fbScarlsonj 	if (duplicate_reachable_path(zlogp, rootpath))
3356108322fbScarlsonj 		goto error;
3357108322fbScarlsonj 
3358108322fbScarlsonj 	if (mount_cmd) {
3359108322fbScarlsonj 		root_to_lu(zlogp, rootpath, sizeof (rootpath), B_TRUE);
3360108322fbScarlsonj 
3361108322fbScarlsonj 		/*
3362108322fbScarlsonj 		 * Forge up a special root for this zone.  When a zone is
3363108322fbScarlsonj 		 * mounted, we can't let the zone have its own root because the
3364108322fbScarlsonj 		 * tools that will be used in this "scratch zone" need access
3365108322fbScarlsonj 		 * to both the zone's resources and the running machine's
3366108322fbScarlsonj 		 * executables.
3367108322fbScarlsonj 		 *
3368108322fbScarlsonj 		 * Note that the mkdir here also catches read-only filesystems.
3369108322fbScarlsonj 		 */
3370108322fbScarlsonj 		if (mkdir(rootpath, 0755) != 0 && errno != EEXIST) {
3371108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot create %s", rootpath);
3372108322fbScarlsonj 			goto error;
3373108322fbScarlsonj 		}
3374108322fbScarlsonj 		if (domount(zlogp, "tmpfs", "", "swap", rootpath) != 0)
3375108322fbScarlsonj 			goto error;
3376108322fbScarlsonj 	}
3377108322fbScarlsonj 
3378108322fbScarlsonj 	if (zonecfg_in_alt_root()) {
3379108322fbScarlsonj 		/*
3380108322fbScarlsonj 		 * If we are mounting up a zone in an alternate root partition,
3381108322fbScarlsonj 		 * then we have some additional work to do before starting the
3382108322fbScarlsonj 		 * zone.  First, resolve the root path down so that we're not
3383108322fbScarlsonj 		 * fooled by duplicates.  Then forge up an internal name for
3384108322fbScarlsonj 		 * the zone.
3385108322fbScarlsonj 		 */
3386108322fbScarlsonj 		if ((fp = zonecfg_open_scratch("", B_TRUE)) == NULL) {
3387108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot open mapfile");
3388108322fbScarlsonj 			goto error;
3389108322fbScarlsonj 		}
3390108322fbScarlsonj 		if (zonecfg_lock_scratch(fp) != 0) {
3391108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot lock mapfile");
3392108322fbScarlsonj 			goto error;
3393108322fbScarlsonj 		}
3394108322fbScarlsonj 		if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
3395108322fbScarlsonj 		    NULL, 0) == 0) {
3396108322fbScarlsonj 			zerror(zlogp, B_FALSE, "scratch zone already running");
3397108322fbScarlsonj 			goto error;
3398108322fbScarlsonj 		}
3399108322fbScarlsonj 		/* This is the preferred name */
3400108322fbScarlsonj 		(void) snprintf(kernzone, sizeof (kernzone), "SUNWlu-%s",
3401108322fbScarlsonj 		    zone_name);
3402108322fbScarlsonj 		srandom(getpid());
3403108322fbScarlsonj 		while (zonecfg_reverse_scratch(fp, kernzone, NULL, 0, NULL,
3404108322fbScarlsonj 		    0) == 0) {
3405108322fbScarlsonj 			/* This is just an arbitrary name; note "." usage */
3406108322fbScarlsonj 			(void) snprintf(kernzone, sizeof (kernzone),
3407108322fbScarlsonj 			    "SUNWlu.%08lX%08lX", random(), random());
3408108322fbScarlsonj 		}
3409108322fbScarlsonj 		kzone = kernzone;
3410108322fbScarlsonj 	}
3411108322fbScarlsonj 
34127c478bd9Sstevel@tonic-gate 	xerr = 0;
3413108322fbScarlsonj 	if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf,
341445916cd2Sjpk 	    rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel)) == -1) {
34157c478bd9Sstevel@tonic-gate 		if (xerr == ZE_AREMOUNTS) {
34167c478bd9Sstevel@tonic-gate 			if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) {
34177c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
34187c478bd9Sstevel@tonic-gate 				    "An unknown file-system is mounted on "
34197c478bd9Sstevel@tonic-gate 				    "a subdirectory of %s", rootpath);
34207c478bd9Sstevel@tonic-gate 			} else {
34217c478bd9Sstevel@tonic-gate 
34227c478bd9Sstevel@tonic-gate 				zerror(zlogp, B_FALSE,
34237c478bd9Sstevel@tonic-gate 				    "These file-systems are mounted on "
34247c478bd9Sstevel@tonic-gate 				    "subdirectories of %s:", rootpath);
34257c478bd9Sstevel@tonic-gate 				(void) zonecfg_find_mounts(rootpath,
34267c478bd9Sstevel@tonic-gate 				    prtmount, zlogp);
34277c478bd9Sstevel@tonic-gate 			}
34287c478bd9Sstevel@tonic-gate 		} else if (xerr == ZE_CHROOTED) {
34297c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_FALSE, "%s: "
34307c478bd9Sstevel@tonic-gate 			    "cannot create a zone from a chrooted "
34317c478bd9Sstevel@tonic-gate 			    "environment", "zone_create");
34327c478bd9Sstevel@tonic-gate 		} else {
34337c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "%s failed", "zone_create");
34347c478bd9Sstevel@tonic-gate 		}
34357c478bd9Sstevel@tonic-gate 		goto error;
34367c478bd9Sstevel@tonic-gate 	}
3437108322fbScarlsonj 
3438108322fbScarlsonj 	if (zonecfg_in_alt_root() &&
3439108322fbScarlsonj 	    zonecfg_add_scratch(fp, zone_name, kernzone,
3440108322fbScarlsonj 	    zonecfg_get_root()) == -1) {
3441108322fbScarlsonj 		zerror(zlogp, B_TRUE, "cannot add mapfile entry");
3442108322fbScarlsonj 		goto error;
3443108322fbScarlsonj 	}
3444108322fbScarlsonj 
34457c478bd9Sstevel@tonic-gate 	/*
3446108322fbScarlsonj 	 * The following is a warning, not an error, and is not performed when
3447108322fbScarlsonj 	 * merely mounting a zone for administrative use.
34487c478bd9Sstevel@tonic-gate 	 */
3449108322fbScarlsonj 	if (!mount_cmd && bind_to_pool(zlogp, zoneid) != 0)
34507c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE, "WARNING: unable to bind zone to "
34517c478bd9Sstevel@tonic-gate 		    "requested pool; using default pool.");
345248451833Scarlsonj 	if (!mount_cmd)
345345916cd2Sjpk 		set_mlps(zlogp, zoneid, zcent);
3454108322fbScarlsonj 	rval = zoneid;
3455108322fbScarlsonj 	zoneid = -1;
3456108322fbScarlsonj 
34577c478bd9Sstevel@tonic-gate error:
3458108322fbScarlsonj 	if (zoneid != -1)
3459108322fbScarlsonj 		(void) zone_destroy(zoneid);
34607c478bd9Sstevel@tonic-gate 	if (rctlbuf != NULL)
34617c478bd9Sstevel@tonic-gate 		free(rctlbuf);
34627c478bd9Sstevel@tonic-gate 	priv_freeset(privs);
3463108322fbScarlsonj 	if (fp != NULL)
3464108322fbScarlsonj 		zonecfg_close_scratch(fp);
3465108322fbScarlsonj 	lofs_discard_mnttab();
346645916cd2Sjpk 	if (zcent != NULL)
346745916cd2Sjpk 		tsol_freezcent(zcent);
34687c478bd9Sstevel@tonic-gate 	return (rval);
34697c478bd9Sstevel@tonic-gate }
34707c478bd9Sstevel@tonic-gate 
3471555afedfScarlsonj /*
3472555afedfScarlsonj  * Enter the zone and write a /etc/zones/index file there.  This allows
3473555afedfScarlsonj  * libzonecfg (and thus zoneadm) to report the UUID and potentially other zone
3474555afedfScarlsonj  * details from inside the zone.
3475555afedfScarlsonj  */
3476555afedfScarlsonj static void
3477555afedfScarlsonj write_index_file(zoneid_t zoneid)
3478555afedfScarlsonj {
3479555afedfScarlsonj 	FILE *zef;
3480555afedfScarlsonj 	FILE *zet;
3481555afedfScarlsonj 	struct zoneent *zep;
3482555afedfScarlsonj 	pid_t child;
3483555afedfScarlsonj 	int tmpl_fd;
3484555afedfScarlsonj 	ctid_t ct;
3485555afedfScarlsonj 	int fd;
3486555afedfScarlsonj 	char uuidstr[UUID_PRINTABLE_STRING_LENGTH];
3487555afedfScarlsonj 
3488555afedfScarlsonj 	/* Locate the zone entry in the global zone's index file */
3489555afedfScarlsonj 	if ((zef = setzoneent()) == NULL)
3490555afedfScarlsonj 		return;
3491555afedfScarlsonj 	while ((zep = getzoneent_private(zef)) != NULL) {
3492555afedfScarlsonj 		if (strcmp(zep->zone_name, zone_name) == 0)
3493555afedfScarlsonj 			break;
3494555afedfScarlsonj 		free(zep);
3495555afedfScarlsonj 	}
3496555afedfScarlsonj 	endzoneent(zef);
3497555afedfScarlsonj 	if (zep == NULL)
3498555afedfScarlsonj 		return;
3499555afedfScarlsonj 
3500555afedfScarlsonj 	if ((tmpl_fd = init_template()) == -1) {
3501555afedfScarlsonj 		free(zep);
3502555afedfScarlsonj 		return;
3503555afedfScarlsonj 	}
3504555afedfScarlsonj 
3505555afedfScarlsonj 	if ((child = fork()) == -1) {
3506555afedfScarlsonj 		(void) ct_tmpl_clear(tmpl_fd);
3507555afedfScarlsonj 		(void) close(tmpl_fd);
3508555afedfScarlsonj 		free(zep);
3509555afedfScarlsonj 		return;
3510555afedfScarlsonj 	}
3511555afedfScarlsonj 
3512555afedfScarlsonj 	/* parent waits for child to finish */
3513555afedfScarlsonj 	if (child != 0) {
3514555afedfScarlsonj 		free(zep);
3515555afedfScarlsonj 		if (contract_latest(&ct) == -1)
3516555afedfScarlsonj 			ct = -1;
3517555afedfScarlsonj 		(void) ct_tmpl_clear(tmpl_fd);
3518555afedfScarlsonj 		(void) close(tmpl_fd);
3519555afedfScarlsonj 		(void) waitpid(child, NULL, 0);
3520555afedfScarlsonj 		(void) contract_abandon_id(ct);
3521555afedfScarlsonj 		return;
3522555afedfScarlsonj 	}
3523555afedfScarlsonj 
3524555afedfScarlsonj 	/* child enters zone and sets up index file */
3525555afedfScarlsonj 	(void) ct_tmpl_clear(tmpl_fd);
3526555afedfScarlsonj 	if (zone_enter(zoneid) != -1) {
3527555afedfScarlsonj 		(void) mkdir(ZONE_CONFIG_ROOT, ZONE_CONFIG_MODE);
3528555afedfScarlsonj 		(void) chown(ZONE_CONFIG_ROOT, ZONE_CONFIG_UID,
3529555afedfScarlsonj 		    ZONE_CONFIG_GID);
3530555afedfScarlsonj 		fd = open(ZONE_INDEX_FILE, O_WRONLY|O_CREAT|O_TRUNC,
3531555afedfScarlsonj 		    ZONE_INDEX_MODE);
3532555afedfScarlsonj 		if (fd != -1 && (zet = fdopen(fd, "w")) != NULL) {
3533555afedfScarlsonj 			(void) fchown(fd, ZONE_INDEX_UID, ZONE_INDEX_GID);
3534555afedfScarlsonj 			if (uuid_is_null(zep->zone_uuid))
3535555afedfScarlsonj 				uuidstr[0] = '\0';
3536555afedfScarlsonj 			else
3537555afedfScarlsonj 				uuid_unparse(zep->zone_uuid, uuidstr);
3538555afedfScarlsonj 			(void) fprintf(zet, "%s:%s:/:%s\n", zep->zone_name,
3539555afedfScarlsonj 			    zone_state_str(zep->zone_state),
3540555afedfScarlsonj 			    uuidstr);
3541555afedfScarlsonj 			(void) fclose(zet);
3542555afedfScarlsonj 		}
3543555afedfScarlsonj 	}
3544555afedfScarlsonj 	_exit(0);
3545555afedfScarlsonj }
3546555afedfScarlsonj 
35477c478bd9Sstevel@tonic-gate int
3548555afedfScarlsonj vplat_bringup(zlog_t *zlogp, boolean_t mount_cmd, zoneid_t zoneid)
35497c478bd9Sstevel@tonic-gate {
35505749802bSdp 
3551fa9e4066Sahrens 	if (!mount_cmd && validate_datasets(zlogp) != 0) {
3552fa9e4066Sahrens 		lofs_discard_mnttab();
3553fa9e4066Sahrens 		return (-1);
3554fa9e4066Sahrens 	}
3555fa9e4066Sahrens 
3556facf4a8dSllai1 	if (mount_filesystems(zlogp, mount_cmd) != 0) {
3557108322fbScarlsonj 		lofs_discard_mnttab();
35587c478bd9Sstevel@tonic-gate 		return (-1);
3559108322fbScarlsonj 	}
3560facf4a8dSllai1 
3561facf4a8dSllai1 	/* mount /dev for zone (both normal and scratch zone) */
3562facf4a8dSllai1 	if (vplat_mount_dev(zlogp) != 0) {
3563facf4a8dSllai1 		lofs_discard_mnttab();
3564facf4a8dSllai1 		return (-1);
3565facf4a8dSllai1 	}
3566facf4a8dSllai1 
3567facf4a8dSllai1 	if (!mount_cmd && configure_network_interfaces(zlogp) != 0) {
3568108322fbScarlsonj 		lofs_discard_mnttab();
35697c478bd9Sstevel@tonic-gate 		return (-1);
3570108322fbScarlsonj 	}
3571555afedfScarlsonj 
3572555afedfScarlsonj 	write_index_file(zoneid);
3573555afedfScarlsonj 
3574108322fbScarlsonj 	lofs_discard_mnttab();
35757c478bd9Sstevel@tonic-gate 	return (0);
35767c478bd9Sstevel@tonic-gate }
35777c478bd9Sstevel@tonic-gate 
3578108322fbScarlsonj static int
3579108322fbScarlsonj lu_root_teardown(zlog_t *zlogp)
35807c478bd9Sstevel@tonic-gate {
3581108322fbScarlsonj 	char zroot[MAXPATHLEN];
3582108322fbScarlsonj 
3583108322fbScarlsonj 	if (zone_get_rootpath(zone_name, zroot, sizeof (zroot)) != Z_OK) {
3584108322fbScarlsonj 		zerror(zlogp, B_FALSE, "unable to determine zone root");
3585108322fbScarlsonj 		return (-1);
3586108322fbScarlsonj 	}
3587108322fbScarlsonj 	root_to_lu(zlogp, zroot, sizeof (zroot), B_FALSE);
3588108322fbScarlsonj 
3589108322fbScarlsonj 	/*
3590108322fbScarlsonj 	 * At this point, the processes are gone, the filesystems (save the
3591108322fbScarlsonj 	 * root) are unmounted, and the zone is on death row.  But there may
3592108322fbScarlsonj 	 * still be creds floating about in the system that reference the
3593108322fbScarlsonj 	 * zone_t, and which pin down zone_rootvp causing this call to fail
3594108322fbScarlsonj 	 * with EBUSY.  Thus, we try for a little while before just giving up.
3595108322fbScarlsonj 	 * (How I wish this were not true, and umount2 just did the right
3596108322fbScarlsonj 	 * thing, or tmpfs supported MS_FORCE This is a gross hack.)
3597108322fbScarlsonj 	 */
3598108322fbScarlsonj 	if (umount2(zroot, MS_FORCE) != 0) {
3599108322fbScarlsonj 		if (errno == ENOTSUP && umount2(zroot, 0) == 0)
3600108322fbScarlsonj 			goto unmounted;
3601108322fbScarlsonj 		if (errno == EBUSY) {
3602108322fbScarlsonj 			int tries = 10;
3603108322fbScarlsonj 
3604108322fbScarlsonj 			while (--tries >= 0) {
3605108322fbScarlsonj 				(void) sleep(1);
3606108322fbScarlsonj 				if (umount2(zroot, 0) == 0)
3607108322fbScarlsonj 					goto unmounted;
3608108322fbScarlsonj 				if (errno != EBUSY)
3609108322fbScarlsonj 					break;
3610108322fbScarlsonj 			}
3611108322fbScarlsonj 		}
3612108322fbScarlsonj 		zerror(zlogp, B_TRUE, "unable to unmount '%s'", zroot);
3613108322fbScarlsonj 		return (-1);
3614108322fbScarlsonj 	}
3615108322fbScarlsonj unmounted:
3616108322fbScarlsonj 
3617108322fbScarlsonj 	/*
3618108322fbScarlsonj 	 * Only zones in an alternate root environment have scratch zone
3619108322fbScarlsonj 	 * entries.
3620108322fbScarlsonj 	 */
3621108322fbScarlsonj 	if (zonecfg_in_alt_root()) {
3622108322fbScarlsonj 		FILE *fp;
3623108322fbScarlsonj 		int retv;
3624108322fbScarlsonj 
3625108322fbScarlsonj 		if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
3626108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot open mapfile");
3627108322fbScarlsonj 			return (-1);
3628108322fbScarlsonj 		}
3629108322fbScarlsonj 		retv = -1;
3630108322fbScarlsonj 		if (zonecfg_lock_scratch(fp) != 0)
3631108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot lock mapfile");
3632108322fbScarlsonj 		else if (zonecfg_delete_scratch(fp, kernzone) != 0)
3633108322fbScarlsonj 			zerror(zlogp, B_TRUE, "cannot delete map entry");
3634108322fbScarlsonj 		else
3635108322fbScarlsonj 			retv = 0;
3636108322fbScarlsonj 		zonecfg_close_scratch(fp);
3637108322fbScarlsonj 		return (retv);
3638108322fbScarlsonj 	} else {
3639108322fbScarlsonj 		return (0);
3640108322fbScarlsonj 	}
3641108322fbScarlsonj }
3642108322fbScarlsonj 
3643108322fbScarlsonj int
3644108322fbScarlsonj vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd)
3645108322fbScarlsonj {
3646108322fbScarlsonj 	char *kzone;
36477c478bd9Sstevel@tonic-gate 	zoneid_t zoneid;
36487c478bd9Sstevel@tonic-gate 
3649108322fbScarlsonj 	kzone = zone_name;
3650108322fbScarlsonj 	if (zonecfg_in_alt_root()) {
3651108322fbScarlsonj 		FILE *fp;
3652108322fbScarlsonj 
3653108322fbScarlsonj 		if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
3654108322fbScarlsonj 			zerror(zlogp, B_TRUE, "unable to open map file");
3655108322fbScarlsonj 			goto error;
3656108322fbScarlsonj 		}
3657108322fbScarlsonj 		if (zonecfg_find_scratch(fp, zone_name, zonecfg_get_root(),
3658108322fbScarlsonj 		    kernzone, sizeof (kernzone)) != 0) {
3659108322fbScarlsonj 			zerror(zlogp, B_FALSE, "unable to find scratch zone");
3660108322fbScarlsonj 			zonecfg_close_scratch(fp);
3661108322fbScarlsonj 			goto error;
3662108322fbScarlsonj 		}
3663108322fbScarlsonj 		zonecfg_close_scratch(fp);
3664108322fbScarlsonj 		kzone = kernzone;
3665108322fbScarlsonj 	}
3666108322fbScarlsonj 
3667108322fbScarlsonj 	if ((zoneid = getzoneidbyname(kzone)) == ZONE_ID_UNDEFINED) {
36687c478bd9Sstevel@tonic-gate 		if (!bringup_failure_recovery)
36697c478bd9Sstevel@tonic-gate 			zerror(zlogp, B_TRUE, "unable to get zoneid");
3670108322fbScarlsonj 		if (unmount_cmd)
3671108322fbScarlsonj 			(void) lu_root_teardown(zlogp);
36727c478bd9Sstevel@tonic-gate 		goto error;
36737c478bd9Sstevel@tonic-gate 	}
36747c478bd9Sstevel@tonic-gate 
36757c478bd9Sstevel@tonic-gate 	if (zone_shutdown(zoneid) != 0) {
36767c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to shutdown zone");
36777c478bd9Sstevel@tonic-gate 		goto error;
36787c478bd9Sstevel@tonic-gate 	}
36797c478bd9Sstevel@tonic-gate 
3680108322fbScarlsonj 	if (!unmount_cmd &&
3681108322fbScarlsonj 	    unconfigure_network_interfaces(zlogp, zoneid) != 0) {
36827c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
36837c478bd9Sstevel@tonic-gate 		    "unable to unconfigure network interfaces in zone");
36847c478bd9Sstevel@tonic-gate 		goto error;
36857c478bd9Sstevel@tonic-gate 	}
36867c478bd9Sstevel@tonic-gate 
3687108322fbScarlsonj 	if (!unmount_cmd && tcp_abort_connections(zlogp, zoneid) != 0) {
36887c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to abort TCP connections");
36897c478bd9Sstevel@tonic-gate 		goto error;
36907c478bd9Sstevel@tonic-gate 	}
36917c478bd9Sstevel@tonic-gate 
3692facf4a8dSllai1 	/* destroy zconsole before umount /dev */
3693facf4a8dSllai1 	if (!unmount_cmd)
3694facf4a8dSllai1 		destroy_console_slave();
3695facf4a8dSllai1 
3696108322fbScarlsonj 	if (unmount_filesystems(zlogp, zoneid, unmount_cmd) != 0) {
36977c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_FALSE,
36987c478bd9Sstevel@tonic-gate 		    "unable to unmount file systems in zone");
36997c478bd9Sstevel@tonic-gate 		goto error;
37007c478bd9Sstevel@tonic-gate 	}
37017c478bd9Sstevel@tonic-gate 
370245916cd2Sjpk 	remove_mlps(zlogp, zoneid);
370345916cd2Sjpk 
37047c478bd9Sstevel@tonic-gate 	if (zone_destroy(zoneid) != 0) {
37057c478bd9Sstevel@tonic-gate 		zerror(zlogp, B_TRUE, "unable to destroy zone");
37067c478bd9Sstevel@tonic-gate 		goto error;
37077c478bd9Sstevel@tonic-gate 	}
3708108322fbScarlsonj 
3709108322fbScarlsonj 	/*
3710108322fbScarlsonj 	 * Special teardown for alternate boot environments: remove the tmpfs
3711108322fbScarlsonj 	 * root for the zone and then remove it from the map file.
3712108322fbScarlsonj 	 */
3713108322fbScarlsonj 	if (unmount_cmd && lu_root_teardown(zlogp) != 0)
3714108322fbScarlsonj 		goto error;
3715108322fbScarlsonj 
3716108322fbScarlsonj 	lofs_discard_mnttab();
37177c478bd9Sstevel@tonic-gate 	return (0);
37187c478bd9Sstevel@tonic-gate 
37197c478bd9Sstevel@tonic-gate error:
3720108322fbScarlsonj 	lofs_discard_mnttab();
37217c478bd9Sstevel@tonic-gate 	return (-1);
37227c478bd9Sstevel@tonic-gate }
3723facf4a8dSllai1 
3724facf4a8dSllai1 /*
3725facf4a8dSllai1  * Apply the standard lists of devices/symlinks/mappings and the user-specified
3726facf4a8dSllai1  * list of devices (via zonecfg) to the /dev filesystem.  The filesystem will
3727facf4a8dSllai1  * use these as a profile/filter to determine what exists in /dev.
3728facf4a8dSllai1  */
3729facf4a8dSllai1 static int
3730facf4a8dSllai1 vplat_mount_dev(zlog_t *zlogp)
3731facf4a8dSllai1 {
3732facf4a8dSllai1 	char			zonedevpath[MAXPATHLEN];
3733facf4a8dSllai1 	zone_dochandle_t	handle = NULL;
3734facf4a8dSllai1 	struct zone_devtab	ztab;
3735facf4a8dSllai1 	zone_fsopt_t		opt_attr;
3736facf4a8dSllai1 	di_prof_t		prof = NULL;
3737facf4a8dSllai1 	int			i, err, len;
3738facf4a8dSllai1 	int			retval = -1;
3739facf4a8dSllai1 
3740facf4a8dSllai1 	struct zone_fstab devtab = {
3741facf4a8dSllai1 		"/dev",
3742facf4a8dSllai1 		"/dev",
3743facf4a8dSllai1 		MNTTYPE_DEV,
3744facf4a8dSllai1 		NULL,
3745facf4a8dSllai1 		""
3746facf4a8dSllai1 	};
3747facf4a8dSllai1 
3748facf4a8dSllai1 	if (err = zone_get_devroot(zone_name, zonedevpath,
3749facf4a8dSllai1 	    sizeof (zonedevpath))) {
3750facf4a8dSllai1 		zerror(zlogp, B_FALSE, "can't get zone dev: %s",
3751facf4a8dSllai1 		    zonecfg_strerror(err));
3752facf4a8dSllai1 		return (-1);
3753facf4a8dSllai1 	}
3754facf4a8dSllai1 
3755facf4a8dSllai1 	/*
3756facf4a8dSllai1 	 * The old /dev was a lofs mount from <zonepath>/dev, with
3757facf4a8dSllai1 	 * dev fs, that becomes a mount on <zonepath>/root/dev.
3758facf4a8dSllai1 	 * However, we need to preserve device permission bits during
3759facf4a8dSllai1 	 * upgrade.  What we should do is migrate the attribute directory
3760facf4a8dSllai1 	 * on upgrade, but for now, preserve it at <zonepath>/dev.
3761facf4a8dSllai1 	 */
3762facf4a8dSllai1 	(void) strcpy(opt_attr.zone_fsopt_opt, "attrdir=");
3763facf4a8dSllai1 	len = strlen(opt_attr.zone_fsopt_opt);
3764facf4a8dSllai1 	if (err = zone_get_zonepath(zone_name,
3765facf4a8dSllai1 	    opt_attr.zone_fsopt_opt + len, MAX_MNTOPT_STR - len)) {
3766facf4a8dSllai1 		zerror(zlogp, B_FALSE, "can't get zone path: %s",
3767facf4a8dSllai1 		    zonecfg_strerror(err));
3768facf4a8dSllai1 		return (-1);
3769facf4a8dSllai1 	}
3770facf4a8dSllai1 
3771facf4a8dSllai1 	if (make_one_dir(zlogp, opt_attr.zone_fsopt_opt + len, "/dev",
3772facf4a8dSllai1 	    DEFAULT_DIR_MODE) != 0)
3773facf4a8dSllai1 		return (-1);
3774facf4a8dSllai1 
3775facf4a8dSllai1 	(void) strlcat(opt_attr.zone_fsopt_opt, "/dev", MAX_MNTOPT_STR);
3776facf4a8dSllai1 	devtab.zone_fs_options = &opt_attr;
3777facf4a8dSllai1 	opt_attr.zone_fsopt_next = NULL;
3778facf4a8dSllai1 
3779facf4a8dSllai1 	/* mount /dev inside the zone */
3780facf4a8dSllai1 	i = strlen(zonedevpath);
3781facf4a8dSllai1 	if (mount_one(zlogp, &devtab, zonedevpath))
3782facf4a8dSllai1 		return (-1);
3783facf4a8dSllai1 
3784facf4a8dSllai1 	(void) strlcat(zonedevpath, "/dev", sizeof (zonedevpath));
3785facf4a8dSllai1 	if (di_prof_init(zonedevpath, &prof)) {
3786facf4a8dSllai1 		zerror(zlogp, B_TRUE, "failed to initialize profile");
3787facf4a8dSllai1 		goto cleanup;
3788facf4a8dSllai1 	}
3789facf4a8dSllai1 
3790facf4a8dSllai1 	/* Add the standard devices and directories */
3791facf4a8dSllai1 	for (i = 0; standard_devs[i] != NULL; ++i) {
3792facf4a8dSllai1 		if (di_prof_add_dev(prof, standard_devs[i])) {
3793facf4a8dSllai1 			zerror(zlogp, B_TRUE, "failed to add "
3794facf4a8dSllai1 			    "standard device");
3795facf4a8dSllai1 			goto cleanup;
3796facf4a8dSllai1 		}
3797facf4a8dSllai1 	}
3798facf4a8dSllai1 
3799facf4a8dSllai1 	/* Add the standard symlinks */
3800facf4a8dSllai1 	for (i = 0; standard_devlinks[i].source != NULL; ++i) {
3801facf4a8dSllai1 		if (di_prof_add_symlink(prof,
3802facf4a8dSllai1 		    standard_devlinks[i].source,
3803facf4a8dSllai1 		    standard_devlinks[i].target)) {
3804facf4a8dSllai1 			zerror(zlogp, B_TRUE, "failed to add "
3805facf4a8dSllai1 			    "standard symlink");
3806facf4a8dSllai1 			goto cleanup;
3807facf4a8dSllai1 		}
3808facf4a8dSllai1 	}
3809facf4a8dSllai1 
3810facf4a8dSllai1 	/* Add user-specified devices and directories */
3811facf4a8dSllai1 	if ((handle = zonecfg_init_handle()) == NULL) {
3812facf4a8dSllai1 		zerror(zlogp, B_FALSE, "can't initialize zone handle");
3813facf4a8dSllai1 		goto cleanup;
3814facf4a8dSllai1 	}
3815facf4a8dSllai1 	if (err = zonecfg_get_handle(zone_name, handle)) {
3816facf4a8dSllai1 		zerror(zlogp, B_FALSE, "can't get handle for zone "
3817facf4a8dSllai1 		    "%s: %s", zone_name, zonecfg_strerror(err));
3818facf4a8dSllai1 		goto cleanup;
3819facf4a8dSllai1 	}
3820facf4a8dSllai1 	if (err = zonecfg_setdevent(handle)) {
3821facf4a8dSllai1 		zerror(zlogp, B_FALSE, "%s: %s", zone_name,
3822facf4a8dSllai1 		    zonecfg_strerror(err));
3823facf4a8dSllai1 		goto cleanup;
3824facf4a8dSllai1 	}
3825facf4a8dSllai1 	while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
3826facf4a8dSllai1 		if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
3827facf4a8dSllai1 			zerror(zlogp, B_TRUE, "failed to add "
3828facf4a8dSllai1 			    "user-specified device");
3829facf4a8dSllai1 			goto cleanup;
3830facf4a8dSllai1 		}
3831facf4a8dSllai1 	}
3832facf4a8dSllai1 	(void) zonecfg_enddevent(handle);
3833facf4a8dSllai1 
3834facf4a8dSllai1 	/* Send profile to kernel */
3835facf4a8dSllai1 	if (di_prof_commit(prof)) {
3836facf4a8dSllai1 		zerror(zlogp, B_TRUE, "failed to commit profile");
3837facf4a8dSllai1 		goto cleanup;
3838facf4a8dSllai1 	}
3839facf4a8dSllai1 
3840facf4a8dSllai1 	retval = 0;
3841facf4a8dSllai1 
3842facf4a8dSllai1 cleanup:
3843facf4a8dSllai1 	if (handle)
3844facf4a8dSllai1 		zonecfg_fini_handle(handle);
3845facf4a8dSllai1 	if (prof)
3846facf4a8dSllai1 		di_prof_fini(prof);
3847facf4a8dSllai1 	return (retval);
3848facf4a8dSllai1 }
3849